elasticity 1.5 → 2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.rspec +2 -1
- data/.rvmrc +1 -1
- data/HISTORY.md +47 -24
- data/LICENSE +1 -1
- data/README.md +165 -317
- data/Rakefile +4 -3
- data/elasticity.gemspec +3 -5
- data/lib/elasticity.rb +10 -5
- data/lib/elasticity/aws_request.rb +81 -20
- data/lib/elasticity/custom_jar_step.rb +33 -0
- data/lib/elasticity/emr.rb +45 -117
- data/lib/elasticity/hadoop_bootstrap_action.rb +27 -0
- data/lib/elasticity/hive_step.rb +57 -0
- data/lib/elasticity/job_flow.rb +109 -39
- data/lib/elasticity/job_flow_status.rb +53 -0
- data/lib/elasticity/job_flow_status_step.rb +35 -0
- data/lib/elasticity/job_flow_step.rb +17 -25
- data/lib/elasticity/pig_step.rb +82 -0
- data/lib/elasticity/support/conditional_raise.rb +23 -0
- data/lib/elasticity/version.rb +1 -1
- data/spec/lib/elasticity/aws_request_spec.rb +159 -51
- data/spec/lib/elasticity/custom_jar_step_spec.rb +59 -0
- data/spec/lib/elasticity/emr_spec.rb +231 -762
- data/spec/lib/elasticity/hadoop_bootstrap_action_spec.rb +26 -0
- data/spec/lib/elasticity/hive_step_spec.rb +74 -0
- data/spec/lib/elasticity/job_flow_integration_spec.rb +197 -0
- data/spec/lib/elasticity/job_flow_spec.rb +369 -138
- data/spec/lib/elasticity/job_flow_status_spec.rb +147 -0
- data/spec/lib/elasticity/job_flow_status_step_spec.rb +73 -0
- data/spec/lib/elasticity/job_flow_step_spec.rb +26 -64
- data/spec/lib/elasticity/pig_step_spec.rb +104 -0
- data/spec/lib/elasticity/support/conditional_raise_spec.rb +35 -0
- data/spec/spec_helper.rb +1 -50
- data/spec/support/be_a_hash_including_matcher.rb +35 -0
- metadata +101 -119
- data/.autotest +0 -2
- data/lib/elasticity/custom_jar_job.rb +0 -38
- data/lib/elasticity/hive_job.rb +0 -69
- data/lib/elasticity/pig_job.rb +0 -109
- data/lib/elasticity/simple_job.rb +0 -51
- data/spec/fixtures/vcr_cassettes/add_instance_groups/one_group_successful.yml +0 -44
- data/spec/fixtures/vcr_cassettes/add_instance_groups/one_group_unsuccessful.yml +0 -41
- data/spec/fixtures/vcr_cassettes/add_jobflow_steps/add_multiple_steps.yml +0 -266
- data/spec/fixtures/vcr_cassettes/custom_jar_job/cloudburst.yml +0 -41
- data/spec/fixtures/vcr_cassettes/describe_jobflows/all_jobflows.yml +0 -75
- data/spec/fixtures/vcr_cassettes/direct/terminate_jobflow.yml +0 -38
- data/spec/fixtures/vcr_cassettes/hive_job/hive_ads.yml +0 -41
- data/spec/fixtures/vcr_cassettes/modify_instance_groups/set_instances_to_3.yml +0 -38
- data/spec/fixtures/vcr_cassettes/pig_job/apache_log_reports.yml +0 -41
- data/spec/fixtures/vcr_cassettes/pig_job/apache_log_reports_with_bootstrap.yml +0 -41
- data/spec/fixtures/vcr_cassettes/run_jobflow/word_count.yml +0 -41
- data/spec/fixtures/vcr_cassettes/set_termination_protection/nonexistent_job_flows.yml +0 -41
- data/spec/fixtures/vcr_cassettes/set_termination_protection/protect_multiple_job_flows.yml +0 -38
- data/spec/fixtures/vcr_cassettes/terminate_jobflows/one_jobflow.yml +0 -38
- data/spec/lib/elasticity/custom_jar_job_spec.rb +0 -118
- data/spec/lib/elasticity/hive_job_spec.rb +0 -90
- data/spec/lib/elasticity/pig_job_spec.rb +0 -226
@@ -1,38 +0,0 @@
|
|
1
|
-
---
|
2
|
-
http_interactions:
|
3
|
-
- request:
|
4
|
-
method: get
|
5
|
-
uri: !ruby/regexp /^https/
|
6
|
-
body:
|
7
|
-
encoding: US-ASCII
|
8
|
-
string: ""
|
9
|
-
headers:
|
10
|
-
Accept:
|
11
|
-
- "*/*; q=0.5, application/xml"
|
12
|
-
Accept-Encoding:
|
13
|
-
- gzip, deflate
|
14
|
-
response:
|
15
|
-
status:
|
16
|
-
code: 200
|
17
|
-
message: OK
|
18
|
-
headers:
|
19
|
-
Content-Length:
|
20
|
-
- "225"
|
21
|
-
Date:
|
22
|
-
- Sun, 10 Apr 2011 23:01:48 GMT
|
23
|
-
Content-Type:
|
24
|
-
- text/xml
|
25
|
-
X-Amzn-Requestid:
|
26
|
-
- 83e0154d-63c6-11e0-bc41-ababd98a870b
|
27
|
-
body:
|
28
|
-
encoding: US-ASCII
|
29
|
-
string: |
|
30
|
-
<TerminateJobFlowsResponse xmlns="http://elasticmapreduce.amazonaws.com/doc/2009-03-31">
|
31
|
-
<ResponseMetadata>
|
32
|
-
<RequestId>83e0154d-63c6-11e0-bc41-ababd98a870b</RequestId>
|
33
|
-
</ResponseMetadata>
|
34
|
-
</TerminateJobFlowsResponse>
|
35
|
-
|
36
|
-
http_version: "1.1"
|
37
|
-
recorded_at: Sat, 03 Mar 2012 23:00:44 GMT
|
38
|
-
recorded_with: VCR 2.0.0
|
@@ -1,118 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Elasticity::CustomJarJob do
|
4
|
-
|
5
|
-
describe ".new" do
|
6
|
-
|
7
|
-
it "should have good defaults" do
|
8
|
-
custom_jar = Elasticity::CustomJarJob.new("access", "secret")
|
9
|
-
custom_jar.aws_access_key_id.should == "access"
|
10
|
-
custom_jar.aws_secret_access_key.should == "secret"
|
11
|
-
custom_jar.ec2_key_name.should == "default"
|
12
|
-
custom_jar.hadoop_version.should == "0.20"
|
13
|
-
custom_jar.instance_count.should == 2
|
14
|
-
custom_jar.master_instance_type.should == "m1.small"
|
15
|
-
custom_jar.name.should == "Elasticity Custom Jar Job"
|
16
|
-
custom_jar.slave_instance_type.should == "m1.small"
|
17
|
-
custom_jar.action_on_failure.should == "TERMINATE_JOB_FLOW"
|
18
|
-
custom_jar.log_uri.should == nil
|
19
|
-
end
|
20
|
-
|
21
|
-
end
|
22
|
-
|
23
|
-
describe "#run" do
|
24
|
-
|
25
|
-
context "when there are arguments provided" do
|
26
|
-
it "should run the script with the specified variables and return the jobflow_id" do
|
27
|
-
aws = Elasticity::EMR.new("", "")
|
28
|
-
aws.should_receive(:run_job_flow).with(
|
29
|
-
{
|
30
|
-
:name => "Elasticity Custom Jar Job",
|
31
|
-
:log_uri => "s3n://slif-test/output/logs",
|
32
|
-
:instances => {
|
33
|
-
:ec2_key_name => "default",
|
34
|
-
:hadoop_version => "0.20",
|
35
|
-
:instance_count => 2,
|
36
|
-
:master_instance_type => "m1.small",
|
37
|
-
:slave_instance_type => "m1.small",
|
38
|
-
},
|
39
|
-
:steps => [
|
40
|
-
{
|
41
|
-
:action_on_failure => "TERMINATE_JOB_FLOW",
|
42
|
-
:hadoop_jar_step => {
|
43
|
-
:jar => "s3n://elasticmapreduce/samples/cloudburst/cloudburst.jar",
|
44
|
-
:args => [
|
45
|
-
"s3n://elasticmapreduce/samples/cloudburst/input/s_suis.br",
|
46
|
-
"s3n://elasticmapreduce/samples/cloudburst/input/100k.br",
|
47
|
-
"s3n://slif_hadoop_test/cloudburst/output/2011-12-09",
|
48
|
-
],
|
49
|
-
},
|
50
|
-
:name => "Execute Custom Jar"
|
51
|
-
}
|
52
|
-
]
|
53
|
-
}).and_return("new_jobflow_id")
|
54
|
-
Elasticity::EMR.should_receive(:new).with("access", "secret").and_return(aws)
|
55
|
-
|
56
|
-
custom_jar = Elasticity::CustomJarJob.new("access", "secret")
|
57
|
-
custom_jar.log_uri = "s3n://slif-test/output/logs"
|
58
|
-
custom_jar.action_on_failure = "TERMINATE_JOB_FLOW"
|
59
|
-
jobflow_id = custom_jar.run('s3n://elasticmapreduce/samples/cloudburst/cloudburst.jar', [
|
60
|
-
"s3n://elasticmapreduce/samples/cloudburst/input/s_suis.br",
|
61
|
-
"s3n://elasticmapreduce/samples/cloudburst/input/100k.br",
|
62
|
-
"s3n://slif_hadoop_test/cloudburst/output/2011-12-09",
|
63
|
-
])
|
64
|
-
jobflow_id.should == "new_jobflow_id"
|
65
|
-
end
|
66
|
-
end
|
67
|
-
|
68
|
-
context "when there are no arguments provided" do
|
69
|
-
it "should run the script with the specified variables and return the jobflow_id" do
|
70
|
-
aws = Elasticity::EMR.new("", "")
|
71
|
-
aws.should_receive(:run_job_flow).with(
|
72
|
-
{
|
73
|
-
:name => "Elasticity Custom Jar Job",
|
74
|
-
:log_uri => "s3n://slif-test/output/logs",
|
75
|
-
:instances => {
|
76
|
-
:ec2_key_name => "default",
|
77
|
-
:hadoop_version => "0.20",
|
78
|
-
:instance_count => 2,
|
79
|
-
:master_instance_type => "m1.small",
|
80
|
-
:slave_instance_type => "m1.small",
|
81
|
-
},
|
82
|
-
:steps => [
|
83
|
-
{
|
84
|
-
:action_on_failure => "TERMINATE_JOB_FLOW",
|
85
|
-
:hadoop_jar_step => {
|
86
|
-
:jar => "s3n://elasticmapreduce/samples/cloudburst/cloudburst.jar"
|
87
|
-
},
|
88
|
-
:name => "Execute Custom Jar"
|
89
|
-
}
|
90
|
-
]
|
91
|
-
}).and_return("new_jobflow_id")
|
92
|
-
Elasticity::EMR.should_receive(:new).with("access", "secret").and_return(aws)
|
93
|
-
|
94
|
-
custom_jar = Elasticity::CustomJarJob.new("access", "secret")
|
95
|
-
custom_jar.log_uri = "s3n://slif-test/output/logs"
|
96
|
-
custom_jar.action_on_failure = "TERMINATE_JOB_FLOW"
|
97
|
-
jobflow_id = custom_jar.run('s3n://elasticmapreduce/samples/cloudburst/cloudburst.jar')
|
98
|
-
jobflow_id.should == "new_jobflow_id"
|
99
|
-
end
|
100
|
-
end
|
101
|
-
|
102
|
-
end
|
103
|
-
|
104
|
-
describe "integration happy path" do
|
105
|
-
use_vcr_cassette "custom_jar_job/cloudburst", :record => :none
|
106
|
-
it "should kick off the sample Amazion EMR Hive application" do
|
107
|
-
custom_jar = Elasticity::CustomJarJob.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
|
108
|
-
custom_jar.ec2_key_name = "sharethrough_dev"
|
109
|
-
jobflow_id = custom_jar.run('s3n://elasticmapreduce/samples/cloudburst/cloudburst.jar', [
|
110
|
-
"s3n://elasticmapreduce/samples/cloudburst/input/s_suis.br",
|
111
|
-
"s3n://elasticmapreduce/samples/cloudburst/input/100k.br",
|
112
|
-
"s3n://slif_hadoop_test/cloudburst/output/2011-12-09",
|
113
|
-
])
|
114
|
-
jobflow_id.should == "j-1IU6NM8OUPS9I"
|
115
|
-
end
|
116
|
-
end
|
117
|
-
|
118
|
-
end
|
@@ -1,90 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Elasticity::HiveJob do
|
4
|
-
|
5
|
-
describe ".new" do
|
6
|
-
|
7
|
-
it "should have good defaults" do
|
8
|
-
hive = Elasticity::HiveJob.new("access", "secret")
|
9
|
-
hive.aws_access_key_id.should == "access"
|
10
|
-
hive.aws_secret_access_key.should == "secret"
|
11
|
-
hive.ec2_key_name.should == "default"
|
12
|
-
hive.hadoop_version.should == "0.20"
|
13
|
-
hive.instance_count.should == 2
|
14
|
-
hive.master_instance_type.should == "m1.small"
|
15
|
-
hive.name.should == "Elasticity Hive Job"
|
16
|
-
hive.slave_instance_type.should == "m1.small"
|
17
|
-
hive.action_on_failure.should == "TERMINATE_JOB_FLOW"
|
18
|
-
hive.log_uri.should == nil
|
19
|
-
end
|
20
|
-
|
21
|
-
end
|
22
|
-
|
23
|
-
describe "#run" do
|
24
|
-
|
25
|
-
it "should run the script with the specified variables and return the jobflow_id" do
|
26
|
-
aws = Elasticity::EMR.new("", "")
|
27
|
-
aws.should_receive(:run_job_flow).with({
|
28
|
-
:name => "Elasticity Hive Job",
|
29
|
-
:log_uri => "s3n://slif-test/output/logs",
|
30
|
-
:instances => {
|
31
|
-
:ec2_key_name => "default",
|
32
|
-
:hadoop_version => "0.20",
|
33
|
-
:instance_count => 2,
|
34
|
-
:master_instance_type => "m1.small",
|
35
|
-
:slave_instance_type => "m1.small",
|
36
|
-
},
|
37
|
-
:steps => [
|
38
|
-
{
|
39
|
-
:action_on_failure => "TERMINATE_JOB_FLOW",
|
40
|
-
:hadoop_jar_step => {
|
41
|
-
:jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
|
42
|
-
:args => [
|
43
|
-
"s3://elasticmapreduce/libs/hive/hive-script",
|
44
|
-
"--base-path",
|
45
|
-
"s3://elasticmapreduce/libs/hive/",
|
46
|
-
"--install-hive"
|
47
|
-
],
|
48
|
-
},
|
49
|
-
:name => "Setup Hive"
|
50
|
-
},
|
51
|
-
{
|
52
|
-
:action_on_failure => "CONTINUE",
|
53
|
-
:hadoop_jar_step => {
|
54
|
-
:jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
|
55
|
-
:args => [
|
56
|
-
"s3://elasticmapreduce/libs/hive/hive-script",
|
57
|
-
"--run-hive-script",
|
58
|
-
"--args",
|
59
|
-
"-f", "s3n://slif-hive/test.q",
|
60
|
-
"-d", "OUTPUT=s3n://slif-test/output"
|
61
|
-
],
|
62
|
-
},
|
63
|
-
:name => "Run Hive Script"
|
64
|
-
}
|
65
|
-
]
|
66
|
-
}).and_return("new_jobflow_id")
|
67
|
-
Elasticity::EMR.should_receive(:new).with("access", "secret").and_return(aws)
|
68
|
-
|
69
|
-
hive = Elasticity::HiveJob.new("access", "secret")
|
70
|
-
hive.log_uri = "s3n://slif-test/output/logs"
|
71
|
-
hive.action_on_failure = "CONTINUE"
|
72
|
-
jobflow_id = hive.run('s3n://slif-hive/test.q', {
|
73
|
-
'OUTPUT' => 's3n://slif-test/output'
|
74
|
-
})
|
75
|
-
jobflow_id.should == "new_jobflow_id"
|
76
|
-
end
|
77
|
-
|
78
|
-
end
|
79
|
-
|
80
|
-
describe "integration happy path" do
|
81
|
-
use_vcr_cassette "hive_job/hive_ads", :record => :none
|
82
|
-
it "should kick off the sample Amazion EMR Hive application" do
|
83
|
-
hive = Elasticity::HiveJob.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
|
84
|
-
hive.ec2_key_name = "sharethrough_dev"
|
85
|
-
jobflow_id = hive.run("s3n://elasticmapreduce/samples/hive-ads/libs/model-build.q")
|
86
|
-
jobflow_id.should == "j-2I4HV6S3SDGD9"
|
87
|
-
end
|
88
|
-
end
|
89
|
-
|
90
|
-
end
|
@@ -1,226 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Elasticity::PigJob do
|
4
|
-
|
5
|
-
describe ".new" do
|
6
|
-
it "should have good defaults" do
|
7
|
-
pig = Elasticity::PigJob.new("access", "secret")
|
8
|
-
pig.aws_access_key_id.should == "access"
|
9
|
-
pig.aws_secret_access_key.should == "secret"
|
10
|
-
pig.ec2_key_name.should == "default"
|
11
|
-
pig.hadoop_version.should == "0.20"
|
12
|
-
pig.instance_count.should == 2
|
13
|
-
pig.master_instance_type.should == "m1.small"
|
14
|
-
pig.name.should == "Elasticity Pig Job"
|
15
|
-
pig.slave_instance_type.should == "m1.small"
|
16
|
-
pig.action_on_failure.should == "TERMINATE_JOB_FLOW"
|
17
|
-
pig.log_uri.should == nil
|
18
|
-
pig.parallels.should == 1
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
describe "#instance_count=" do
|
23
|
-
it "should not allow instances to be set less than 2" do
|
24
|
-
pig = Elasticity::PigJob.new("access", "secret")
|
25
|
-
lambda {
|
26
|
-
pig.instance_count = 1
|
27
|
-
}.should raise_error(ArgumentError, "Instance count cannot be set to less than 2 (requested 1)")
|
28
|
-
end
|
29
|
-
|
30
|
-
it "should recalculate @parallels" do
|
31
|
-
pig = Elasticity::PigJob.new("access", "secret")
|
32
|
-
lambda {
|
33
|
-
pig.instance_count = 10
|
34
|
-
}.should change(pig, :parallels)
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
describe "#slave_instance_type=" do
|
39
|
-
it "should recalculate @parallels" do
|
40
|
-
pig = Elasticity::PigJob.new("access", "secret")
|
41
|
-
lambda {
|
42
|
-
pig.slave_instance_type = "c1.xlarge"
|
43
|
-
}.should change(pig, :parallels)
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
describe "calculated value of parallels" do
|
48
|
-
|
49
|
-
before do
|
50
|
-
@pig = Elasticity::PigJob.new("access", "secret")
|
51
|
-
@pig.instance_count = 8
|
52
|
-
end
|
53
|
-
|
54
|
-
context "when slave is m1.small" do
|
55
|
-
it "should be 7" do
|
56
|
-
@pig.slave_instance_type = "m1.small"
|
57
|
-
@pig.parallels.should == 7
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
context "when slave is m1.large" do
|
62
|
-
it "should be 13" do
|
63
|
-
@pig.slave_instance_type = "m1.large"
|
64
|
-
@pig.parallels.should == 13
|
65
|
-
end
|
66
|
-
end
|
67
|
-
|
68
|
-
context "when slave is c1.medium" do
|
69
|
-
it "should be 13" do
|
70
|
-
@pig.slave_instance_type = "c1.medium"
|
71
|
-
@pig.parallels.should == 13
|
72
|
-
end
|
73
|
-
end
|
74
|
-
|
75
|
-
context "when slave is m1.xlarge" do
|
76
|
-
it "should be 26" do
|
77
|
-
@pig.slave_instance_type = "m1.xlarge"
|
78
|
-
@pig.parallels.should == 26
|
79
|
-
end
|
80
|
-
end
|
81
|
-
|
82
|
-
context "when slave is c1.xlarge" do
|
83
|
-
it "should be 26" do
|
84
|
-
@pig.slave_instance_type = "c1.xlarge"
|
85
|
-
@pig.parallels.should == 26
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
|
-
context "when slave is any other type" do
|
90
|
-
it "should be 1" do
|
91
|
-
@pig.slave_instance_type = "foo"
|
92
|
-
@pig.parallels.should == 7
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
|
-
end
|
97
|
-
|
98
|
-
describe "#run" do
|
99
|
-
|
100
|
-
context "when no bootstrap actions are specified" do
|
101
|
-
|
102
|
-
it "should run the script with the specified variables and return the jobflow_id" do
|
103
|
-
aws = Elasticity::EMR.new("", "")
|
104
|
-
aws.should_receive(:run_job_flow).with({
|
105
|
-
:name => "Elasticity Pig Job",
|
106
|
-
:log_uri => "s3n://slif-test/output/logs",
|
107
|
-
:instances => {
|
108
|
-
:ec2_key_name => "default",
|
109
|
-
:hadoop_version => "0.20",
|
110
|
-
:instance_count => 8,
|
111
|
-
:master_instance_type => "m1.small",
|
112
|
-
:slave_instance_type => "m1.xlarge",
|
113
|
-
},
|
114
|
-
:steps => [
|
115
|
-
{
|
116
|
-
:action_on_failure => "TERMINATE_JOB_FLOW",
|
117
|
-
:hadoop_jar_step => {
|
118
|
-
:jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
|
119
|
-
:args => [
|
120
|
-
"s3://elasticmapreduce/libs/pig/pig-script",
|
121
|
-
"--base-path",
|
122
|
-
"s3://elasticmapreduce/libs/pig/",
|
123
|
-
"--install-pig"
|
124
|
-
],
|
125
|
-
},
|
126
|
-
:name => "Setup Pig"
|
127
|
-
},
|
128
|
-
{
|
129
|
-
:action_on_failure => "CONTINUE",
|
130
|
-
:hadoop_jar_step => {
|
131
|
-
:jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
|
132
|
-
:args => [
|
133
|
-
"s3://elasticmapreduce/libs/pig/pig-script",
|
134
|
-
"--run-pig-script",
|
135
|
-
"--args",
|
136
|
-
"-p", "OUTPUT=s3n://slif-pig-test/output",
|
137
|
-
"-p", "XREFS=s3n://slif-pig-test/xrefs",
|
138
|
-
"-p", "E_PARALLELS=26",
|
139
|
-
"s3n://slif-pig-test/test.pig"
|
140
|
-
],
|
141
|
-
},
|
142
|
-
:name => "Run Pig Script"
|
143
|
-
}
|
144
|
-
]
|
145
|
-
}).and_return("new_jobflow_id")
|
146
|
-
|
147
|
-
Elasticity::EMR.should_receive(:new).with("access", "secret").and_return(aws)
|
148
|
-
pig = Elasticity::PigJob.new("access", "secret")
|
149
|
-
|
150
|
-
pig.log_uri = "s3n://slif-test/output/logs"
|
151
|
-
pig.action_on_failure = "CONTINUE"
|
152
|
-
pig.instance_count = 8
|
153
|
-
pig.slave_instance_type = "m1.xlarge"
|
154
|
-
|
155
|
-
jobflow_id = pig.run('s3n://slif-pig-test/test.pig', {
|
156
|
-
'OUTPUT' => 's3n://slif-pig-test/output',
|
157
|
-
'XREFS' => 's3n://slif-pig-test/xrefs'
|
158
|
-
})
|
159
|
-
jobflow_id.should == "new_jobflow_id"
|
160
|
-
end
|
161
|
-
end
|
162
|
-
|
163
|
-
context "when bootstrap actions are specified" do
|
164
|
-
it "should run the script wth the proper job configuration" do
|
165
|
-
aws = Elasticity::EMR.new("", "")
|
166
|
-
aws.should_receive(:run_job_flow).with(hash_including({
|
167
|
-
:bootstrap_actions => [
|
168
|
-
{
|
169
|
-
:name => "Elasticity Bootstrap Action (Configure Hadoop)",
|
170
|
-
:script_bootstrap_action => {
|
171
|
-
:path => "s3n://elasticmapreduce/bootstrap-actions/configure-hadoop",
|
172
|
-
:args => ["-m", "foo=111"]
|
173
|
-
}
|
174
|
-
},
|
175
|
-
{
|
176
|
-
:name => "Elasticity Bootstrap Action (Configure Hadoop)",
|
177
|
-
:script_bootstrap_action => {
|
178
|
-
:path => "s3n://elasticmapreduce/bootstrap-actions/configure-hadoop",
|
179
|
-
:args => ["-m", "bar=222"]
|
180
|
-
}
|
181
|
-
}
|
182
|
-
],
|
183
|
-
}))
|
184
|
-
|
185
|
-
Elasticity::EMR.should_receive(:new).with("access", "secret").and_return(aws)
|
186
|
-
pig = Elasticity::PigJob.new("access", "secret")
|
187
|
-
pig.add_hadoop_bootstrap_action("-m", "foo=111")
|
188
|
-
pig.add_hadoop_bootstrap_action("-m", "bar=222")
|
189
|
-
pig.run('s3n://slif-pig-test/test.pig')
|
190
|
-
end
|
191
|
-
end
|
192
|
-
|
193
|
-
end
|
194
|
-
|
195
|
-
describe "integration happy path" do
|
196
|
-
|
197
|
-
context "with bootstrap actions" do
|
198
|
-
use_vcr_cassette "pig_job/apache_log_reports_with_bootstrap", :record => :none
|
199
|
-
it "should kick off the sample Amazion EMR Pig application" do
|
200
|
-
pig = Elasticity::PigJob.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
|
201
|
-
pig.ec2_key_name = "sharethrough_dev"
|
202
|
-
pig.add_hadoop_bootstrap_action("-m", "mapred.job.reuse.jvm.num.tasks=120")
|
203
|
-
jobflow_id = pig.run("s3n://elasticmapreduce/samples/pig-apache/do-reports.pig", {
|
204
|
-
"INPUT" => "s3n://elasticmapreduce/samples/pig-apache/input",
|
205
|
-
"OUTPUT" => "s3n://slif-elasticity/pig-apache/output/2011-05-10"
|
206
|
-
})
|
207
|
-
jobflow_id.should == "j-1UK43AWRT3QHD"
|
208
|
-
end
|
209
|
-
end
|
210
|
-
|
211
|
-
context "without bootstrap actions" do
|
212
|
-
use_vcr_cassette "pig_job/apache_log_reports", :record => :none
|
213
|
-
it "should kick off the sample Amazion EMR Pig application" do
|
214
|
-
pig = Elasticity::PigJob.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
|
215
|
-
pig.log_uri = "s3n://slif-elasticity/pig-apache/logs"
|
216
|
-
pig.ec2_key_name = "sharethrough_dev"
|
217
|
-
jobflow_id = pig.run("s3n://elasticmapreduce/samples/pig-apache/do-reports.pig", {
|
218
|
-
"INPUT" => "s3n://elasticmapreduce/samples/pig-apache/input",
|
219
|
-
"OUTPUT" => "s3n://slif-elasticity/pig-apache/output/2011-05-04"
|
220
|
-
})
|
221
|
-
jobflow_id.should == "j-1HB7A3TBRT3VS"
|
222
|
-
end
|
223
|
-
end
|
224
|
-
end
|
225
|
-
|
226
|
-
end
|