elasticity 1.2.1 → 1.2.2
Sign up to get free protection for your applications and to get access to all the features.
- data/HISTORY.mediawiki +4 -0
- data/README.mediawiki +11 -1
- data/lib/elasticity/pig_job.rb +1 -0
- data/lib/elasticity/simple_job.rb +18 -0
- data/lib/elasticity/version.rb +1 -1
- data/spec/fixtures/vcr_cassettes/pig_job/apache_log_reports_with_bootstrap.yml +35 -0
- data/spec/lib/elasticity/pig_job_spec.rb +110 -59
- metadata +6 -4
data/HISTORY.mediawiki
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
=== 1.2.2 ===
|
2
|
+
|
3
|
+
* HiveJob and PigJob now support configuring Hadoop options via .add_hadoop_bootstrap_action().
|
4
|
+
|
1
5
|
=== 1.2.1 ===
|
2
6
|
|
3
7
|
* Shipping up E_PARALLELS Pig variable with each invocation; reasonable default value for PARALLEL based on the number and type of instances configured.
|
data/README.mediawiki
CHANGED
@@ -26,7 +26,17 @@ Elasticity currently provides simplified access to launching Hive and Pig job fl
|
|
26
26
|
@slave_instance_type = "m1.small"
|
27
27
|
</pre>
|
28
28
|
|
29
|
-
These are all accessible from HiveJob and PigJob
|
29
|
+
These are all accessible from HiveJob and PigJob. See the PigJob description for an example.
|
30
|
+
|
31
|
+
=== Bootstrap Actions ===
|
32
|
+
|
33
|
+
You can also configure Hadoop options with add_hadoop_bootstrap_action().
|
34
|
+
|
35
|
+
<pre>
|
36
|
+
pig = Elasticity::PigJob.new(ENV["AWS_ACCESS_KEY_ID"], ENV["AWS_SECRET_KEY"])
|
37
|
+
pig.add_hadoop_bootstrap_action("-m", "mapred.job.reuse.jvm.num.tasks=120")
|
38
|
+
...
|
39
|
+
</pre>
|
30
40
|
|
31
41
|
== Hive ==
|
32
42
|
|
data/lib/elasticity/pig_job.rb
CHANGED
@@ -27,6 +27,24 @@ module Elasticity
|
|
27
27
|
@emr = Elasticity::EMR.new(aws_access_key_id, aws_secret_access_key)
|
28
28
|
end
|
29
29
|
|
30
|
+
def add_hadoop_bootstrap_action(option, value)
|
31
|
+
@hadoop_actions ||= []
|
32
|
+
@hadoop_actions << {
|
33
|
+
:name => "Elasticity Bootstrap Action (Configure Hadoop)",
|
34
|
+
:script_bootstrap_action => {
|
35
|
+
:path => "s3n://elasticmapreduce/bootstrap-actions/configure-hadoop",
|
36
|
+
:args => [option, value]
|
37
|
+
}
|
38
|
+
}
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def get_bootstrap_actions
|
44
|
+
return {} unless @hadoop_actions && !@hadoop_actions.empty?
|
45
|
+
{ :bootstrap_actions => @hadoop_actions }
|
46
|
+
end
|
47
|
+
|
30
48
|
end
|
31
49
|
|
32
50
|
end
|
data/lib/elasticity/version.rb
CHANGED
@@ -0,0 +1,35 @@
|
|
1
|
+
---
|
2
|
+
- !ruby/struct:VCR::HTTPInteraction
|
3
|
+
request: !ruby/struct:VCR::Request
|
4
|
+
method: :get
|
5
|
+
uri: !ruby/regexp /^http:\/\/elasticmapreduce.amazonaws.com:80\/\?AWSAccessKeyId=AKIAI7HEMMNKGT6VFFSA&BootstrapActions.member.1.Name=Elasticity%20Bootstrap%20Action%20\(Configure%20Hadoop\)&BootstrapActions.member.1.ScriptBootstrapAction.Args.member.1=-m&BootstrapActions.member.1.ScriptBootstrapAction.Args.member.2=mapred.job.reuse.jvm.num.tasks=120&BootstrapActions.member.1.ScriptBootstrapAction.Path=s3n:\/\/elasticmapreduce\/bootstrap-actions\/configure-hadoop&Instances.Ec2KeyName=sharethrough_dev&Instances.HadoopVersion=0.20&Instances.InstanceCount=2&Instances.MasterInstanceType=m1.small&Instances.SlaveInstanceType=m1.small&Name=Elasticity%20Pig%20Job&Operation=RunJobFlow&.*&Steps.member.1.ActionOnFailure=TERMINATE_JOB_FLOW&Steps.member.1.HadoopJarStep.Args.member.1=s3:\/\/elasticmapreduce\/libs\/pig\/pig-script&Steps.member.1.HadoopJarStep.Args.member.2=--base-path&Steps.member.1.HadoopJarStep.Args.member.3=s3:\/\/elasticmapreduce\/libs\/pig\/&Steps.member.1.HadoopJarStep.Args.member.4=--install-pig&Steps.member.1.HadoopJarStep.Jar=s3:\/\/elasticmapreduce\/libs\/script-runner\/script-runner.jar&Steps.member.1.Name=Setup%20Pig&Steps.member.2.ActionOnFailure=TERMINATE_JOB_FLOW&Steps.member.2.HadoopJarStep.Args.member.1=s3:\/\/elasticmapreduce\/libs\/pig\/pig-script&Steps.member.2.HadoopJarStep.Args.member.10=s3n:\/\/elasticmapreduce\/samples\/pig-apache\/do-reports.pig&Steps.member.2.HadoopJarStep.Args.member.2=--run-pig-script&Steps.member.2.HadoopJarStep.Args.member.3=--args&Steps.member.2.HadoopJarStep.Args.member.4=-p&Steps.member.2.HadoopJarStep.Args.member.5=INPUT=s3n:\/\/elasticmapreduce\/samples\/pig-apache\/input&Steps.member.2.HadoopJarStep.Args.member.6=-p&Steps.member.2.HadoopJarStep.Args.member.7=OUTPUT=s3n:\/\/slif-elasticity\/pig-apache\/output\/2011-05-10&Steps.member.2.HadoopJarStep.Args.member.8=-p&Steps.member.2.HadoopJarStep.Args.member.9=E_PARALLELS=1&Steps.member.2.HadoopJarStep.Jar=s3:\/\/elasticmapreduce\/libs\/script-runner\/script-runner.jar&Steps.member.2.Name=Run%20Pig%20Script/
|
6
|
+
body:
|
7
|
+
headers:
|
8
|
+
accept:
|
9
|
+
- "*/*; q=0.5, application/xml"
|
10
|
+
accept-encoding:
|
11
|
+
- gzip, deflate
|
12
|
+
response: !ruby/struct:VCR::Response
|
13
|
+
status: !ruby/struct:VCR::ResponseStatus
|
14
|
+
code: 200
|
15
|
+
message: OK
|
16
|
+
headers:
|
17
|
+
x-amzn-requestid:
|
18
|
+
- 10533247-7ad7-11e0-86ad-3b6ddfd3d4b9
|
19
|
+
content-type:
|
20
|
+
- text/xml
|
21
|
+
date:
|
22
|
+
- Tue, 10 May 2011 07:28:12 GMT
|
23
|
+
content-length:
|
24
|
+
- "297"
|
25
|
+
body: |
|
26
|
+
<RunJobFlowResponse xmlns="http://elasticmapreduce.amazonaws.com/doc/2009-03-31">
|
27
|
+
<RunJobFlowResult>
|
28
|
+
<JobFlowId>j-1UK43AWRT3QHD</JobFlowId>
|
29
|
+
</RunJobFlowResult>
|
30
|
+
<ResponseMetadata>
|
31
|
+
<RequestId>10533247-7ad7-11e0-86ad-3b6ddfd3d4b9</RequestId>
|
32
|
+
</ResponseMetadata>
|
33
|
+
</RunJobFlowResponse>
|
34
|
+
|
35
|
+
http_version: "1.1"
|
@@ -81,80 +81,131 @@ describe Elasticity::PigJob do
|
|
81
81
|
|
82
82
|
describe "#run" do
|
83
83
|
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
:
|
89
|
-
|
90
|
-
:
|
91
|
-
:
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
{
|
98
|
-
:action_on_failure => "TERMINATE_JOB_FLOW",
|
99
|
-
:hadoop_jar_step => {
|
100
|
-
:jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
|
101
|
-
:args => [
|
102
|
-
"s3://elasticmapreduce/libs/pig/pig-script",
|
103
|
-
"--base-path",
|
104
|
-
"s3://elasticmapreduce/libs/pig/",
|
105
|
-
"--install-pig"
|
106
|
-
],
|
107
|
-
},
|
108
|
-
:name => "Setup Pig"
|
84
|
+
context "when no bootstrap actions are specified" do
|
85
|
+
|
86
|
+
it "should run the script with the specified variables and return the jobflow_id" do
|
87
|
+
aws = Elasticity::EMR.new("", "")
|
88
|
+
aws.should_receive(:run_job_flow).with({
|
89
|
+
:name => "Elasticity Pig Job",
|
90
|
+
:log_uri => "s3n://slif-test/output/logs",
|
91
|
+
:instances => {
|
92
|
+
:ec2_key_name => "default",
|
93
|
+
:hadoop_version => "0.20",
|
94
|
+
:instance_count => 8,
|
95
|
+
:master_instance_type => "m1.small",
|
96
|
+
:slave_instance_type => "m1.xlarge",
|
109
97
|
},
|
98
|
+
:steps => [
|
110
99
|
{
|
111
|
-
:action_on_failure => "
|
100
|
+
:action_on_failure => "TERMINATE_JOB_FLOW",
|
112
101
|
:hadoop_jar_step => {
|
113
102
|
:jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
|
114
103
|
:args => [
|
115
104
|
"s3://elasticmapreduce/libs/pig/pig-script",
|
116
|
-
"--
|
117
|
-
"
|
118
|
-
"-
|
119
|
-
"-p", "XREFS=s3n://slif-pig-test/xrefs",
|
120
|
-
"-p", "E_PARALLELS=26",
|
121
|
-
"s3n://slif-pig-test/test.pig"
|
105
|
+
"--base-path",
|
106
|
+
"s3://elasticmapreduce/libs/pig/",
|
107
|
+
"--install-pig"
|
122
108
|
],
|
123
109
|
},
|
124
|
-
:name => "
|
125
|
-
}
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
110
|
+
:name => "Setup Pig"
|
111
|
+
},
|
112
|
+
{
|
113
|
+
:action_on_failure => "CONTINUE",
|
114
|
+
:hadoop_jar_step => {
|
115
|
+
:jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
|
116
|
+
:args => [
|
117
|
+
"s3://elasticmapreduce/libs/pig/pig-script",
|
118
|
+
"--run-pig-script",
|
119
|
+
"--args",
|
120
|
+
"-p", "OUTPUT=s3n://slif-pig-test/output",
|
121
|
+
"-p", "XREFS=s3n://slif-pig-test/xrefs",
|
122
|
+
"-p", "E_PARALLELS=26",
|
123
|
+
"s3n://slif-pig-test/test.pig"
|
124
|
+
],
|
125
|
+
},
|
126
|
+
:name => "Run Pig Script"
|
127
|
+
}
|
128
|
+
]
|
129
|
+
}).and_return("new_jobflow_id")
|
130
|
+
|
131
|
+
Elasticity::EMR.should_receive(:new).with("access", "secret").and_return(aws)
|
132
|
+
pig = Elasticity::PigJob.new("access", "secret")
|
133
|
+
|
134
|
+
pig.log_uri = "s3n://slif-test/output/logs"
|
135
|
+
pig.action_on_failure = "CONTINUE"
|
136
|
+
pig.instance_count = 8
|
137
|
+
pig.slave_instance_type = "m1.xlarge"
|
138
|
+
|
139
|
+
jobflow_id = pig.run('s3n://slif-pig-test/test.pig', {
|
140
|
+
'OUTPUT' => 's3n://slif-pig-test/output',
|
141
|
+
'XREFS' => 's3n://slif-pig-test/xrefs'
|
142
|
+
})
|
143
|
+
jobflow_id.should == "new_jobflow_id"
|
144
|
+
end
|
145
|
+
end
|
136
146
|
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
147
|
+
context "when bootstrap actions are specified" do
|
148
|
+
it "should run the script wth the proper job configuration" do
|
149
|
+
aws = Elasticity::EMR.new("", "")
|
150
|
+
aws.should_receive(:run_job_flow).with(hash_including({
|
151
|
+
:bootstrap_actions => [
|
152
|
+
{
|
153
|
+
:name => "Elasticity Bootstrap Action (Configure Hadoop)",
|
154
|
+
:script_bootstrap_action => {
|
155
|
+
:path => "s3n://elasticmapreduce/bootstrap-actions/configure-hadoop",
|
156
|
+
:args => ["-m", "foo=111"]
|
157
|
+
}
|
158
|
+
},
|
159
|
+
{
|
160
|
+
:name => "Elasticity Bootstrap Action (Configure Hadoop)",
|
161
|
+
:script_bootstrap_action => {
|
162
|
+
:path => "s3n://elasticmapreduce/bootstrap-actions/configure-hadoop",
|
163
|
+
:args => ["-m", "bar=222"]
|
164
|
+
}
|
165
|
+
}
|
166
|
+
],
|
167
|
+
}))
|
168
|
+
|
169
|
+
Elasticity::EMR.should_receive(:new).with("access", "secret").and_return(aws)
|
170
|
+
pig = Elasticity::PigJob.new("access", "secret")
|
171
|
+
pig.add_hadoop_bootstrap_action("-m", "foo=111")
|
172
|
+
pig.add_hadoop_bootstrap_action("-m", "bar=222")
|
173
|
+
pig.run('s3n://slif-pig-test/test.pig')
|
174
|
+
end
|
142
175
|
end
|
143
176
|
|
144
177
|
end
|
145
178
|
|
146
179
|
describe "integration happy path" do
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
"
|
154
|
-
|
155
|
-
|
156
|
-
|
180
|
+
|
181
|
+
context "with bootstrap actions" do
|
182
|
+
use_vcr_cassette "pig_job/apache_log_reports_with_bootstrap", :record => :none
|
183
|
+
it "should kick off the sample Amazion EMR Pig application" do
|
184
|
+
pig = Elasticity::PigJob.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
|
185
|
+
pig.ec2_key_name = "sharethrough_dev"
|
186
|
+
pig.add_hadoop_bootstrap_action("-m", "mapred.job.reuse.jvm.num.tasks=120")
|
187
|
+
jobflow_id = pig.run("s3n://elasticmapreduce/samples/pig-apache/do-reports.pig", {
|
188
|
+
"INPUT" => "s3n://elasticmapreduce/samples/pig-apache/input",
|
189
|
+
"OUTPUT" => "s3n://slif-elasticity/pig-apache/output/2011-05-10"
|
190
|
+
})
|
191
|
+
jobflow_id.should == "j-1UK43AWRT3QHD"
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
context "without bootstrap actions" do
|
196
|
+
use_vcr_cassette "pig_job/apache_log_reports", :record => :none
|
197
|
+
it "should kick off the sample Amazion EMR Pig application" do
|
198
|
+
pig = Elasticity::PigJob.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
|
199
|
+
pig.log_uri = "s3n://slif-elasticity/pig-apache/logs"
|
200
|
+
pig.ec2_key_name = "sharethrough_dev"
|
201
|
+
jobflow_id = pig.run("s3n://elasticmapreduce/samples/pig-apache/do-reports.pig", {
|
202
|
+
"INPUT" => "s3n://elasticmapreduce/samples/pig-apache/input",
|
203
|
+
"OUTPUT" => "s3n://slif-elasticity/pig-apache/output/2011-05-04"
|
204
|
+
})
|
205
|
+
jobflow_id.should == "j-1HB7A3TBRT3VS"
|
206
|
+
end
|
157
207
|
end
|
158
208
|
end
|
159
209
|
|
210
|
+
|
160
211
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: elasticity
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 27
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 2
|
9
|
-
-
|
10
|
-
version: 1.2.
|
9
|
+
- 2
|
10
|
+
version: 1.2.2
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Robert Slifka
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-05-
|
18
|
+
date: 2011-05-10 00:00:00 -07:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -186,6 +186,7 @@ files:
|
|
186
186
|
- spec/fixtures/vcr_cassettes/hive_job/hive_ads.yml
|
187
187
|
- spec/fixtures/vcr_cassettes/modify_instance_groups/set_instances_to_3.yml
|
188
188
|
- spec/fixtures/vcr_cassettes/pig_job/apache_log_reports.yml
|
189
|
+
- spec/fixtures/vcr_cassettes/pig_job/apache_log_reports_with_bootstrap.yml
|
189
190
|
- spec/fixtures/vcr_cassettes/run_jobflow/word_count.yml
|
190
191
|
- spec/fixtures/vcr_cassettes/set_termination_protection/nonexistent_job_flows.yml
|
191
192
|
- spec/fixtures/vcr_cassettes/set_termination_protection/protect_multiple_job_flows.yml
|
@@ -240,6 +241,7 @@ test_files:
|
|
240
241
|
- spec/fixtures/vcr_cassettes/hive_job/hive_ads.yml
|
241
242
|
- spec/fixtures/vcr_cassettes/modify_instance_groups/set_instances_to_3.yml
|
242
243
|
- spec/fixtures/vcr_cassettes/pig_job/apache_log_reports.yml
|
244
|
+
- spec/fixtures/vcr_cassettes/pig_job/apache_log_reports_with_bootstrap.yml
|
243
245
|
- spec/fixtures/vcr_cassettes/run_jobflow/word_count.yml
|
244
246
|
- spec/fixtures/vcr_cassettes/set_termination_protection/nonexistent_job_flows.yml
|
245
247
|
- spec/fixtures/vcr_cassettes/set_termination_protection/protect_multiple_job_flows.yml
|