elasticity 1.5 → 2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.rspec +2 -1
- data/.rvmrc +1 -1
- data/HISTORY.md +47 -24
- data/LICENSE +1 -1
- data/README.md +165 -317
- data/Rakefile +4 -3
- data/elasticity.gemspec +3 -5
- data/lib/elasticity.rb +10 -5
- data/lib/elasticity/aws_request.rb +81 -20
- data/lib/elasticity/custom_jar_step.rb +33 -0
- data/lib/elasticity/emr.rb +45 -117
- data/lib/elasticity/hadoop_bootstrap_action.rb +27 -0
- data/lib/elasticity/hive_step.rb +57 -0
- data/lib/elasticity/job_flow.rb +109 -39
- data/lib/elasticity/job_flow_status.rb +53 -0
- data/lib/elasticity/job_flow_status_step.rb +35 -0
- data/lib/elasticity/job_flow_step.rb +17 -25
- data/lib/elasticity/pig_step.rb +82 -0
- data/lib/elasticity/support/conditional_raise.rb +23 -0
- data/lib/elasticity/version.rb +1 -1
- data/spec/lib/elasticity/aws_request_spec.rb +159 -51
- data/spec/lib/elasticity/custom_jar_step_spec.rb +59 -0
- data/spec/lib/elasticity/emr_spec.rb +231 -762
- data/spec/lib/elasticity/hadoop_bootstrap_action_spec.rb +26 -0
- data/spec/lib/elasticity/hive_step_spec.rb +74 -0
- data/spec/lib/elasticity/job_flow_integration_spec.rb +197 -0
- data/spec/lib/elasticity/job_flow_spec.rb +369 -138
- data/spec/lib/elasticity/job_flow_status_spec.rb +147 -0
- data/spec/lib/elasticity/job_flow_status_step_spec.rb +73 -0
- data/spec/lib/elasticity/job_flow_step_spec.rb +26 -64
- data/spec/lib/elasticity/pig_step_spec.rb +104 -0
- data/spec/lib/elasticity/support/conditional_raise_spec.rb +35 -0
- data/spec/spec_helper.rb +1 -50
- data/spec/support/be_a_hash_including_matcher.rb +35 -0
- metadata +101 -119
- data/.autotest +0 -2
- data/lib/elasticity/custom_jar_job.rb +0 -38
- data/lib/elasticity/hive_job.rb +0 -69
- data/lib/elasticity/pig_job.rb +0 -109
- data/lib/elasticity/simple_job.rb +0 -51
- data/spec/fixtures/vcr_cassettes/add_instance_groups/one_group_successful.yml +0 -44
- data/spec/fixtures/vcr_cassettes/add_instance_groups/one_group_unsuccessful.yml +0 -41
- data/spec/fixtures/vcr_cassettes/add_jobflow_steps/add_multiple_steps.yml +0 -266
- data/spec/fixtures/vcr_cassettes/custom_jar_job/cloudburst.yml +0 -41
- data/spec/fixtures/vcr_cassettes/describe_jobflows/all_jobflows.yml +0 -75
- data/spec/fixtures/vcr_cassettes/direct/terminate_jobflow.yml +0 -38
- data/spec/fixtures/vcr_cassettes/hive_job/hive_ads.yml +0 -41
- data/spec/fixtures/vcr_cassettes/modify_instance_groups/set_instances_to_3.yml +0 -38
- data/spec/fixtures/vcr_cassettes/pig_job/apache_log_reports.yml +0 -41
- data/spec/fixtures/vcr_cassettes/pig_job/apache_log_reports_with_bootstrap.yml +0 -41
- data/spec/fixtures/vcr_cassettes/run_jobflow/word_count.yml +0 -41
- data/spec/fixtures/vcr_cassettes/set_termination_protection/nonexistent_job_flows.yml +0 -41
- data/spec/fixtures/vcr_cassettes/set_termination_protection/protect_multiple_job_flows.yml +0 -38
- data/spec/fixtures/vcr_cassettes/terminate_jobflows/one_jobflow.yml +0 -38
- data/spec/lib/elasticity/custom_jar_job_spec.rb +0 -118
- data/spec/lib/elasticity/hive_job_spec.rb +0 -90
- data/spec/lib/elasticity/pig_job_spec.rb +0 -226
@@ -1,38 +0,0 @@
|
|
1
|
-
---
|
2
|
-
http_interactions:
|
3
|
-
- request:
|
4
|
-
method: get
|
5
|
-
uri: !ruby/regexp /^https/
|
6
|
-
body:
|
7
|
-
encoding: US-ASCII
|
8
|
-
string: ""
|
9
|
-
headers:
|
10
|
-
Accept:
|
11
|
-
- "*/*; q=0.5, application/xml"
|
12
|
-
Accept-Encoding:
|
13
|
-
- gzip, deflate
|
14
|
-
response:
|
15
|
-
status:
|
16
|
-
code: 200
|
17
|
-
message: OK
|
18
|
-
headers:
|
19
|
-
Content-Length:
|
20
|
-
- "225"
|
21
|
-
Date:
|
22
|
-
- Sun, 10 Apr 2011 23:01:48 GMT
|
23
|
-
Content-Type:
|
24
|
-
- text/xml
|
25
|
-
X-Amzn-Requestid:
|
26
|
-
- 83e0154d-63c6-11e0-bc41-ababd98a870b
|
27
|
-
body:
|
28
|
-
encoding: US-ASCII
|
29
|
-
string: |
|
30
|
-
<TerminateJobFlowsResponse xmlns="http://elasticmapreduce.amazonaws.com/doc/2009-03-31">
|
31
|
-
<ResponseMetadata>
|
32
|
-
<RequestId>83e0154d-63c6-11e0-bc41-ababd98a870b</RequestId>
|
33
|
-
</ResponseMetadata>
|
34
|
-
</TerminateJobFlowsResponse>
|
35
|
-
|
36
|
-
http_version: "1.1"
|
37
|
-
recorded_at: Sat, 03 Mar 2012 23:00:44 GMT
|
38
|
-
recorded_with: VCR 2.0.0
|
@@ -1,118 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Elasticity::CustomJarJob do
|
4
|
-
|
5
|
-
describe ".new" do
|
6
|
-
|
7
|
-
it "should have good defaults" do
|
8
|
-
custom_jar = Elasticity::CustomJarJob.new("access", "secret")
|
9
|
-
custom_jar.aws_access_key_id.should == "access"
|
10
|
-
custom_jar.aws_secret_access_key.should == "secret"
|
11
|
-
custom_jar.ec2_key_name.should == "default"
|
12
|
-
custom_jar.hadoop_version.should == "0.20"
|
13
|
-
custom_jar.instance_count.should == 2
|
14
|
-
custom_jar.master_instance_type.should == "m1.small"
|
15
|
-
custom_jar.name.should == "Elasticity Custom Jar Job"
|
16
|
-
custom_jar.slave_instance_type.should == "m1.small"
|
17
|
-
custom_jar.action_on_failure.should == "TERMINATE_JOB_FLOW"
|
18
|
-
custom_jar.log_uri.should == nil
|
19
|
-
end
|
20
|
-
|
21
|
-
end
|
22
|
-
|
23
|
-
describe "#run" do
|
24
|
-
|
25
|
-
context "when there are arguments provided" do
|
26
|
-
it "should run the script with the specified variables and return the jobflow_id" do
|
27
|
-
aws = Elasticity::EMR.new("", "")
|
28
|
-
aws.should_receive(:run_job_flow).with(
|
29
|
-
{
|
30
|
-
:name => "Elasticity Custom Jar Job",
|
31
|
-
:log_uri => "s3n://slif-test/output/logs",
|
32
|
-
:instances => {
|
33
|
-
:ec2_key_name => "default",
|
34
|
-
:hadoop_version => "0.20",
|
35
|
-
:instance_count => 2,
|
36
|
-
:master_instance_type => "m1.small",
|
37
|
-
:slave_instance_type => "m1.small",
|
38
|
-
},
|
39
|
-
:steps => [
|
40
|
-
{
|
41
|
-
:action_on_failure => "TERMINATE_JOB_FLOW",
|
42
|
-
:hadoop_jar_step => {
|
43
|
-
:jar => "s3n://elasticmapreduce/samples/cloudburst/cloudburst.jar",
|
44
|
-
:args => [
|
45
|
-
"s3n://elasticmapreduce/samples/cloudburst/input/s_suis.br",
|
46
|
-
"s3n://elasticmapreduce/samples/cloudburst/input/100k.br",
|
47
|
-
"s3n://slif_hadoop_test/cloudburst/output/2011-12-09",
|
48
|
-
],
|
49
|
-
},
|
50
|
-
:name => "Execute Custom Jar"
|
51
|
-
}
|
52
|
-
]
|
53
|
-
}).and_return("new_jobflow_id")
|
54
|
-
Elasticity::EMR.should_receive(:new).with("access", "secret").and_return(aws)
|
55
|
-
|
56
|
-
custom_jar = Elasticity::CustomJarJob.new("access", "secret")
|
57
|
-
custom_jar.log_uri = "s3n://slif-test/output/logs"
|
58
|
-
custom_jar.action_on_failure = "TERMINATE_JOB_FLOW"
|
59
|
-
jobflow_id = custom_jar.run('s3n://elasticmapreduce/samples/cloudburst/cloudburst.jar', [
|
60
|
-
"s3n://elasticmapreduce/samples/cloudburst/input/s_suis.br",
|
61
|
-
"s3n://elasticmapreduce/samples/cloudburst/input/100k.br",
|
62
|
-
"s3n://slif_hadoop_test/cloudburst/output/2011-12-09",
|
63
|
-
])
|
64
|
-
jobflow_id.should == "new_jobflow_id"
|
65
|
-
end
|
66
|
-
end
|
67
|
-
|
68
|
-
context "when there are no arguments provided" do
|
69
|
-
it "should run the script with the specified variables and return the jobflow_id" do
|
70
|
-
aws = Elasticity::EMR.new("", "")
|
71
|
-
aws.should_receive(:run_job_flow).with(
|
72
|
-
{
|
73
|
-
:name => "Elasticity Custom Jar Job",
|
74
|
-
:log_uri => "s3n://slif-test/output/logs",
|
75
|
-
:instances => {
|
76
|
-
:ec2_key_name => "default",
|
77
|
-
:hadoop_version => "0.20",
|
78
|
-
:instance_count => 2,
|
79
|
-
:master_instance_type => "m1.small",
|
80
|
-
:slave_instance_type => "m1.small",
|
81
|
-
},
|
82
|
-
:steps => [
|
83
|
-
{
|
84
|
-
:action_on_failure => "TERMINATE_JOB_FLOW",
|
85
|
-
:hadoop_jar_step => {
|
86
|
-
:jar => "s3n://elasticmapreduce/samples/cloudburst/cloudburst.jar"
|
87
|
-
},
|
88
|
-
:name => "Execute Custom Jar"
|
89
|
-
}
|
90
|
-
]
|
91
|
-
}).and_return("new_jobflow_id")
|
92
|
-
Elasticity::EMR.should_receive(:new).with("access", "secret").and_return(aws)
|
93
|
-
|
94
|
-
custom_jar = Elasticity::CustomJarJob.new("access", "secret")
|
95
|
-
custom_jar.log_uri = "s3n://slif-test/output/logs"
|
96
|
-
custom_jar.action_on_failure = "TERMINATE_JOB_FLOW"
|
97
|
-
jobflow_id = custom_jar.run('s3n://elasticmapreduce/samples/cloudburst/cloudburst.jar')
|
98
|
-
jobflow_id.should == "new_jobflow_id"
|
99
|
-
end
|
100
|
-
end
|
101
|
-
|
102
|
-
end
|
103
|
-
|
104
|
-
describe "integration happy path" do
|
105
|
-
use_vcr_cassette "custom_jar_job/cloudburst", :record => :none
|
106
|
-
it "should kick off the sample Amazion EMR Hive application" do
|
107
|
-
custom_jar = Elasticity::CustomJarJob.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
|
108
|
-
custom_jar.ec2_key_name = "sharethrough_dev"
|
109
|
-
jobflow_id = custom_jar.run('s3n://elasticmapreduce/samples/cloudburst/cloudburst.jar', [
|
110
|
-
"s3n://elasticmapreduce/samples/cloudburst/input/s_suis.br",
|
111
|
-
"s3n://elasticmapreduce/samples/cloudburst/input/100k.br",
|
112
|
-
"s3n://slif_hadoop_test/cloudburst/output/2011-12-09",
|
113
|
-
])
|
114
|
-
jobflow_id.should == "j-1IU6NM8OUPS9I"
|
115
|
-
end
|
116
|
-
end
|
117
|
-
|
118
|
-
end
|
@@ -1,90 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Elasticity::HiveJob do
|
4
|
-
|
5
|
-
describe ".new" do
|
6
|
-
|
7
|
-
it "should have good defaults" do
|
8
|
-
hive = Elasticity::HiveJob.new("access", "secret")
|
9
|
-
hive.aws_access_key_id.should == "access"
|
10
|
-
hive.aws_secret_access_key.should == "secret"
|
11
|
-
hive.ec2_key_name.should == "default"
|
12
|
-
hive.hadoop_version.should == "0.20"
|
13
|
-
hive.instance_count.should == 2
|
14
|
-
hive.master_instance_type.should == "m1.small"
|
15
|
-
hive.name.should == "Elasticity Hive Job"
|
16
|
-
hive.slave_instance_type.should == "m1.small"
|
17
|
-
hive.action_on_failure.should == "TERMINATE_JOB_FLOW"
|
18
|
-
hive.log_uri.should == nil
|
19
|
-
end
|
20
|
-
|
21
|
-
end
|
22
|
-
|
23
|
-
describe "#run" do
|
24
|
-
|
25
|
-
it "should run the script with the specified variables and return the jobflow_id" do
|
26
|
-
aws = Elasticity::EMR.new("", "")
|
27
|
-
aws.should_receive(:run_job_flow).with({
|
28
|
-
:name => "Elasticity Hive Job",
|
29
|
-
:log_uri => "s3n://slif-test/output/logs",
|
30
|
-
:instances => {
|
31
|
-
:ec2_key_name => "default",
|
32
|
-
:hadoop_version => "0.20",
|
33
|
-
:instance_count => 2,
|
34
|
-
:master_instance_type => "m1.small",
|
35
|
-
:slave_instance_type => "m1.small",
|
36
|
-
},
|
37
|
-
:steps => [
|
38
|
-
{
|
39
|
-
:action_on_failure => "TERMINATE_JOB_FLOW",
|
40
|
-
:hadoop_jar_step => {
|
41
|
-
:jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
|
42
|
-
:args => [
|
43
|
-
"s3://elasticmapreduce/libs/hive/hive-script",
|
44
|
-
"--base-path",
|
45
|
-
"s3://elasticmapreduce/libs/hive/",
|
46
|
-
"--install-hive"
|
47
|
-
],
|
48
|
-
},
|
49
|
-
:name => "Setup Hive"
|
50
|
-
},
|
51
|
-
{
|
52
|
-
:action_on_failure => "CONTINUE",
|
53
|
-
:hadoop_jar_step => {
|
54
|
-
:jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
|
55
|
-
:args => [
|
56
|
-
"s3://elasticmapreduce/libs/hive/hive-script",
|
57
|
-
"--run-hive-script",
|
58
|
-
"--args",
|
59
|
-
"-f", "s3n://slif-hive/test.q",
|
60
|
-
"-d", "OUTPUT=s3n://slif-test/output"
|
61
|
-
],
|
62
|
-
},
|
63
|
-
:name => "Run Hive Script"
|
64
|
-
}
|
65
|
-
]
|
66
|
-
}).and_return("new_jobflow_id")
|
67
|
-
Elasticity::EMR.should_receive(:new).with("access", "secret").and_return(aws)
|
68
|
-
|
69
|
-
hive = Elasticity::HiveJob.new("access", "secret")
|
70
|
-
hive.log_uri = "s3n://slif-test/output/logs"
|
71
|
-
hive.action_on_failure = "CONTINUE"
|
72
|
-
jobflow_id = hive.run('s3n://slif-hive/test.q', {
|
73
|
-
'OUTPUT' => 's3n://slif-test/output'
|
74
|
-
})
|
75
|
-
jobflow_id.should == "new_jobflow_id"
|
76
|
-
end
|
77
|
-
|
78
|
-
end
|
79
|
-
|
80
|
-
describe "integration happy path" do
|
81
|
-
use_vcr_cassette "hive_job/hive_ads", :record => :none
|
82
|
-
it "should kick off the sample Amazion EMR Hive application" do
|
83
|
-
hive = Elasticity::HiveJob.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
|
84
|
-
hive.ec2_key_name = "sharethrough_dev"
|
85
|
-
jobflow_id = hive.run("s3n://elasticmapreduce/samples/hive-ads/libs/model-build.q")
|
86
|
-
jobflow_id.should == "j-2I4HV6S3SDGD9"
|
87
|
-
end
|
88
|
-
end
|
89
|
-
|
90
|
-
end
|
@@ -1,226 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe Elasticity::PigJob do
|
4
|
-
|
5
|
-
describe ".new" do
|
6
|
-
it "should have good defaults" do
|
7
|
-
pig = Elasticity::PigJob.new("access", "secret")
|
8
|
-
pig.aws_access_key_id.should == "access"
|
9
|
-
pig.aws_secret_access_key.should == "secret"
|
10
|
-
pig.ec2_key_name.should == "default"
|
11
|
-
pig.hadoop_version.should == "0.20"
|
12
|
-
pig.instance_count.should == 2
|
13
|
-
pig.master_instance_type.should == "m1.small"
|
14
|
-
pig.name.should == "Elasticity Pig Job"
|
15
|
-
pig.slave_instance_type.should == "m1.small"
|
16
|
-
pig.action_on_failure.should == "TERMINATE_JOB_FLOW"
|
17
|
-
pig.log_uri.should == nil
|
18
|
-
pig.parallels.should == 1
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
describe "#instance_count=" do
|
23
|
-
it "should not allow instances to be set less than 2" do
|
24
|
-
pig = Elasticity::PigJob.new("access", "secret")
|
25
|
-
lambda {
|
26
|
-
pig.instance_count = 1
|
27
|
-
}.should raise_error(ArgumentError, "Instance count cannot be set to less than 2 (requested 1)")
|
28
|
-
end
|
29
|
-
|
30
|
-
it "should recalculate @parallels" do
|
31
|
-
pig = Elasticity::PigJob.new("access", "secret")
|
32
|
-
lambda {
|
33
|
-
pig.instance_count = 10
|
34
|
-
}.should change(pig, :parallels)
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
describe "#slave_instance_type=" do
|
39
|
-
it "should recalculate @parallels" do
|
40
|
-
pig = Elasticity::PigJob.new("access", "secret")
|
41
|
-
lambda {
|
42
|
-
pig.slave_instance_type = "c1.xlarge"
|
43
|
-
}.should change(pig, :parallels)
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
describe "calculated value of parallels" do
|
48
|
-
|
49
|
-
before do
|
50
|
-
@pig = Elasticity::PigJob.new("access", "secret")
|
51
|
-
@pig.instance_count = 8
|
52
|
-
end
|
53
|
-
|
54
|
-
context "when slave is m1.small" do
|
55
|
-
it "should be 7" do
|
56
|
-
@pig.slave_instance_type = "m1.small"
|
57
|
-
@pig.parallels.should == 7
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
context "when slave is m1.large" do
|
62
|
-
it "should be 13" do
|
63
|
-
@pig.slave_instance_type = "m1.large"
|
64
|
-
@pig.parallels.should == 13
|
65
|
-
end
|
66
|
-
end
|
67
|
-
|
68
|
-
context "when slave is c1.medium" do
|
69
|
-
it "should be 13" do
|
70
|
-
@pig.slave_instance_type = "c1.medium"
|
71
|
-
@pig.parallels.should == 13
|
72
|
-
end
|
73
|
-
end
|
74
|
-
|
75
|
-
context "when slave is m1.xlarge" do
|
76
|
-
it "should be 26" do
|
77
|
-
@pig.slave_instance_type = "m1.xlarge"
|
78
|
-
@pig.parallels.should == 26
|
79
|
-
end
|
80
|
-
end
|
81
|
-
|
82
|
-
context "when slave is c1.xlarge" do
|
83
|
-
it "should be 26" do
|
84
|
-
@pig.slave_instance_type = "c1.xlarge"
|
85
|
-
@pig.parallels.should == 26
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
|
-
context "when slave is any other type" do
|
90
|
-
it "should be 1" do
|
91
|
-
@pig.slave_instance_type = "foo"
|
92
|
-
@pig.parallels.should == 7
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
|
-
end
|
97
|
-
|
98
|
-
describe "#run" do
|
99
|
-
|
100
|
-
context "when no bootstrap actions are specified" do
|
101
|
-
|
102
|
-
it "should run the script with the specified variables and return the jobflow_id" do
|
103
|
-
aws = Elasticity::EMR.new("", "")
|
104
|
-
aws.should_receive(:run_job_flow).with({
|
105
|
-
:name => "Elasticity Pig Job",
|
106
|
-
:log_uri => "s3n://slif-test/output/logs",
|
107
|
-
:instances => {
|
108
|
-
:ec2_key_name => "default",
|
109
|
-
:hadoop_version => "0.20",
|
110
|
-
:instance_count => 8,
|
111
|
-
:master_instance_type => "m1.small",
|
112
|
-
:slave_instance_type => "m1.xlarge",
|
113
|
-
},
|
114
|
-
:steps => [
|
115
|
-
{
|
116
|
-
:action_on_failure => "TERMINATE_JOB_FLOW",
|
117
|
-
:hadoop_jar_step => {
|
118
|
-
:jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
|
119
|
-
:args => [
|
120
|
-
"s3://elasticmapreduce/libs/pig/pig-script",
|
121
|
-
"--base-path",
|
122
|
-
"s3://elasticmapreduce/libs/pig/",
|
123
|
-
"--install-pig"
|
124
|
-
],
|
125
|
-
},
|
126
|
-
:name => "Setup Pig"
|
127
|
-
},
|
128
|
-
{
|
129
|
-
:action_on_failure => "CONTINUE",
|
130
|
-
:hadoop_jar_step => {
|
131
|
-
:jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
|
132
|
-
:args => [
|
133
|
-
"s3://elasticmapreduce/libs/pig/pig-script",
|
134
|
-
"--run-pig-script",
|
135
|
-
"--args",
|
136
|
-
"-p", "OUTPUT=s3n://slif-pig-test/output",
|
137
|
-
"-p", "XREFS=s3n://slif-pig-test/xrefs",
|
138
|
-
"-p", "E_PARALLELS=26",
|
139
|
-
"s3n://slif-pig-test/test.pig"
|
140
|
-
],
|
141
|
-
},
|
142
|
-
:name => "Run Pig Script"
|
143
|
-
}
|
144
|
-
]
|
145
|
-
}).and_return("new_jobflow_id")
|
146
|
-
|
147
|
-
Elasticity::EMR.should_receive(:new).with("access", "secret").and_return(aws)
|
148
|
-
pig = Elasticity::PigJob.new("access", "secret")
|
149
|
-
|
150
|
-
pig.log_uri = "s3n://slif-test/output/logs"
|
151
|
-
pig.action_on_failure = "CONTINUE"
|
152
|
-
pig.instance_count = 8
|
153
|
-
pig.slave_instance_type = "m1.xlarge"
|
154
|
-
|
155
|
-
jobflow_id = pig.run('s3n://slif-pig-test/test.pig', {
|
156
|
-
'OUTPUT' => 's3n://slif-pig-test/output',
|
157
|
-
'XREFS' => 's3n://slif-pig-test/xrefs'
|
158
|
-
})
|
159
|
-
jobflow_id.should == "new_jobflow_id"
|
160
|
-
end
|
161
|
-
end
|
162
|
-
|
163
|
-
context "when bootstrap actions are specified" do
|
164
|
-
it "should run the script wth the proper job configuration" do
|
165
|
-
aws = Elasticity::EMR.new("", "")
|
166
|
-
aws.should_receive(:run_job_flow).with(hash_including({
|
167
|
-
:bootstrap_actions => [
|
168
|
-
{
|
169
|
-
:name => "Elasticity Bootstrap Action (Configure Hadoop)",
|
170
|
-
:script_bootstrap_action => {
|
171
|
-
:path => "s3n://elasticmapreduce/bootstrap-actions/configure-hadoop",
|
172
|
-
:args => ["-m", "foo=111"]
|
173
|
-
}
|
174
|
-
},
|
175
|
-
{
|
176
|
-
:name => "Elasticity Bootstrap Action (Configure Hadoop)",
|
177
|
-
:script_bootstrap_action => {
|
178
|
-
:path => "s3n://elasticmapreduce/bootstrap-actions/configure-hadoop",
|
179
|
-
:args => ["-m", "bar=222"]
|
180
|
-
}
|
181
|
-
}
|
182
|
-
],
|
183
|
-
}))
|
184
|
-
|
185
|
-
Elasticity::EMR.should_receive(:new).with("access", "secret").and_return(aws)
|
186
|
-
pig = Elasticity::PigJob.new("access", "secret")
|
187
|
-
pig.add_hadoop_bootstrap_action("-m", "foo=111")
|
188
|
-
pig.add_hadoop_bootstrap_action("-m", "bar=222")
|
189
|
-
pig.run('s3n://slif-pig-test/test.pig')
|
190
|
-
end
|
191
|
-
end
|
192
|
-
|
193
|
-
end
|
194
|
-
|
195
|
-
describe "integration happy path" do
|
196
|
-
|
197
|
-
context "with bootstrap actions" do
|
198
|
-
use_vcr_cassette "pig_job/apache_log_reports_with_bootstrap", :record => :none
|
199
|
-
it "should kick off the sample Amazion EMR Pig application" do
|
200
|
-
pig = Elasticity::PigJob.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
|
201
|
-
pig.ec2_key_name = "sharethrough_dev"
|
202
|
-
pig.add_hadoop_bootstrap_action("-m", "mapred.job.reuse.jvm.num.tasks=120")
|
203
|
-
jobflow_id = pig.run("s3n://elasticmapreduce/samples/pig-apache/do-reports.pig", {
|
204
|
-
"INPUT" => "s3n://elasticmapreduce/samples/pig-apache/input",
|
205
|
-
"OUTPUT" => "s3n://slif-elasticity/pig-apache/output/2011-05-10"
|
206
|
-
})
|
207
|
-
jobflow_id.should == "j-1UK43AWRT3QHD"
|
208
|
-
end
|
209
|
-
end
|
210
|
-
|
211
|
-
context "without bootstrap actions" do
|
212
|
-
use_vcr_cassette "pig_job/apache_log_reports", :record => :none
|
213
|
-
it "should kick off the sample Amazion EMR Pig application" do
|
214
|
-
pig = Elasticity::PigJob.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
|
215
|
-
pig.log_uri = "s3n://slif-elasticity/pig-apache/logs"
|
216
|
-
pig.ec2_key_name = "sharethrough_dev"
|
217
|
-
jobflow_id = pig.run("s3n://elasticmapreduce/samples/pig-apache/do-reports.pig", {
|
218
|
-
"INPUT" => "s3n://elasticmapreduce/samples/pig-apache/input",
|
219
|
-
"OUTPUT" => "s3n://slif-elasticity/pig-apache/output/2011-05-04"
|
220
|
-
})
|
221
|
-
jobflow_id.should == "j-1HB7A3TBRT3VS"
|
222
|
-
end
|
223
|
-
end
|
224
|
-
end
|
225
|
-
|
226
|
-
end
|