elasticity 1.2.1 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/HISTORY.mediawiki +4 -0
- data/README.mediawiki +11 -1
- data/lib/elasticity/pig_job.rb +1 -0
- data/lib/elasticity/simple_job.rb +18 -0
- data/lib/elasticity/version.rb +1 -1
- data/spec/fixtures/vcr_cassettes/pig_job/apache_log_reports_with_bootstrap.yml +35 -0
- data/spec/lib/elasticity/pig_job_spec.rb +110 -59
- metadata +6 -4
data/HISTORY.mediawiki
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
=== 1.2.2 ===
|
|
2
|
+
|
|
3
|
+
* HiveJob and PigJob now support configuring Hadoop options via .add_hadoop_bootstrap_action().
|
|
4
|
+
|
|
1
5
|
=== 1.2.1 ===
|
|
2
6
|
|
|
3
7
|
* Shipping up E_PARALLELS Pig variable with each invocation; reasonable default value for PARALLEL based on the number and type of instances configured.
|
data/README.mediawiki
CHANGED
|
@@ -26,7 +26,17 @@ Elasticity currently provides simplified access to launching Hive and Pig job fl
|
|
|
26
26
|
@slave_instance_type = "m1.small"
|
|
27
27
|
</pre>
|
|
28
28
|
|
|
29
|
-
These are all accessible from HiveJob and PigJob
|
|
29
|
+
These are all accessible from HiveJob and PigJob. See the PigJob description for an example.
|
|
30
|
+
|
|
31
|
+
=== Bootstrap Actions ===
|
|
32
|
+
|
|
33
|
+
You can also configure Hadoop options with add_hadoop_bootstrap_action().
|
|
34
|
+
|
|
35
|
+
<pre>
|
|
36
|
+
pig = Elasticity::PigJob.new(ENV["AWS_ACCESS_KEY_ID"], ENV["AWS_SECRET_KEY"])
|
|
37
|
+
pig.add_hadoop_bootstrap_action("-m", "mapred.job.reuse.jvm.num.tasks=120")
|
|
38
|
+
...
|
|
39
|
+
</pre>
|
|
30
40
|
|
|
31
41
|
== Hive ==
|
|
32
42
|
|
data/lib/elasticity/pig_job.rb
CHANGED
|
@@ -27,6 +27,24 @@ module Elasticity
|
|
|
27
27
|
@emr = Elasticity::EMR.new(aws_access_key_id, aws_secret_access_key)
|
|
28
28
|
end
|
|
29
29
|
|
|
30
|
+
def add_hadoop_bootstrap_action(option, value)
|
|
31
|
+
@hadoop_actions ||= []
|
|
32
|
+
@hadoop_actions << {
|
|
33
|
+
:name => "Elasticity Bootstrap Action (Configure Hadoop)",
|
|
34
|
+
:script_bootstrap_action => {
|
|
35
|
+
:path => "s3n://elasticmapreduce/bootstrap-actions/configure-hadoop",
|
|
36
|
+
:args => [option, value]
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
def get_bootstrap_actions
|
|
44
|
+
return {} unless @hadoop_actions && !@hadoop_actions.empty?
|
|
45
|
+
{ :bootstrap_actions => @hadoop_actions }
|
|
46
|
+
end
|
|
47
|
+
|
|
30
48
|
end
|
|
31
49
|
|
|
32
50
|
end
|
data/lib/elasticity/version.rb
CHANGED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
---
|
|
2
|
+
- !ruby/struct:VCR::HTTPInteraction
|
|
3
|
+
request: !ruby/struct:VCR::Request
|
|
4
|
+
method: :get
|
|
5
|
+
uri: !ruby/regexp /^http:\/\/elasticmapreduce.amazonaws.com:80\/\?AWSAccessKeyId=AKIAI7HEMMNKGT6VFFSA&BootstrapActions.member.1.Name=Elasticity%20Bootstrap%20Action%20\(Configure%20Hadoop\)&BootstrapActions.member.1.ScriptBootstrapAction.Args.member.1=-m&BootstrapActions.member.1.ScriptBootstrapAction.Args.member.2=mapred.job.reuse.jvm.num.tasks=120&BootstrapActions.member.1.ScriptBootstrapAction.Path=s3n:\/\/elasticmapreduce\/bootstrap-actions\/configure-hadoop&Instances.Ec2KeyName=sharethrough_dev&Instances.HadoopVersion=0.20&Instances.InstanceCount=2&Instances.MasterInstanceType=m1.small&Instances.SlaveInstanceType=m1.small&Name=Elasticity%20Pig%20Job&Operation=RunJobFlow&.*&Steps.member.1.ActionOnFailure=TERMINATE_JOB_FLOW&Steps.member.1.HadoopJarStep.Args.member.1=s3:\/\/elasticmapreduce\/libs\/pig\/pig-script&Steps.member.1.HadoopJarStep.Args.member.2=--base-path&Steps.member.1.HadoopJarStep.Args.member.3=s3:\/\/elasticmapreduce\/libs\/pig\/&Steps.member.1.HadoopJarStep.Args.member.4=--install-pig&Steps.member.1.HadoopJarStep.Jar=s3:\/\/elasticmapreduce\/libs\/script-runner\/script-runner.jar&Steps.member.1.Name=Setup%20Pig&Steps.member.2.ActionOnFailure=TERMINATE_JOB_FLOW&Steps.member.2.HadoopJarStep.Args.member.1=s3:\/\/elasticmapreduce\/libs\/pig\/pig-script&Steps.member.2.HadoopJarStep.Args.member.10=s3n:\/\/elasticmapreduce\/samples\/pig-apache\/do-reports.pig&Steps.member.2.HadoopJarStep.Args.member.2=--run-pig-script&Steps.member.2.HadoopJarStep.Args.member.3=--args&Steps.member.2.HadoopJarStep.Args.member.4=-p&Steps.member.2.HadoopJarStep.Args.member.5=INPUT=s3n:\/\/elasticmapreduce\/samples\/pig-apache\/input&Steps.member.2.HadoopJarStep.Args.member.6=-p&Steps.member.2.HadoopJarStep.Args.member.7=OUTPUT=s3n:\/\/slif-elasticity\/pig-apache\/output\/2011-05-10&Steps.member.2.HadoopJarStep.Args.member.8=-p&Steps.member.2.HadoopJarStep.Args.member.9=E_PARALLELS=1&Steps.member.2.HadoopJarStep.Jar=s3:\/\/elasticmapreduce\/libs\/script-runner\/script-runner.jar&Steps.member.2.Name=Run%20Pig%20Script/
|
|
6
|
+
body:
|
|
7
|
+
headers:
|
|
8
|
+
accept:
|
|
9
|
+
- "*/*; q=0.5, application/xml"
|
|
10
|
+
accept-encoding:
|
|
11
|
+
- gzip, deflate
|
|
12
|
+
response: !ruby/struct:VCR::Response
|
|
13
|
+
status: !ruby/struct:VCR::ResponseStatus
|
|
14
|
+
code: 200
|
|
15
|
+
message: OK
|
|
16
|
+
headers:
|
|
17
|
+
x-amzn-requestid:
|
|
18
|
+
- 10533247-7ad7-11e0-86ad-3b6ddfd3d4b9
|
|
19
|
+
content-type:
|
|
20
|
+
- text/xml
|
|
21
|
+
date:
|
|
22
|
+
- Tue, 10 May 2011 07:28:12 GMT
|
|
23
|
+
content-length:
|
|
24
|
+
- "297"
|
|
25
|
+
body: |
|
|
26
|
+
<RunJobFlowResponse xmlns="http://elasticmapreduce.amazonaws.com/doc/2009-03-31">
|
|
27
|
+
<RunJobFlowResult>
|
|
28
|
+
<JobFlowId>j-1UK43AWRT3QHD</JobFlowId>
|
|
29
|
+
</RunJobFlowResult>
|
|
30
|
+
<ResponseMetadata>
|
|
31
|
+
<RequestId>10533247-7ad7-11e0-86ad-3b6ddfd3d4b9</RequestId>
|
|
32
|
+
</ResponseMetadata>
|
|
33
|
+
</RunJobFlowResponse>
|
|
34
|
+
|
|
35
|
+
http_version: "1.1"
|
|
@@ -81,80 +81,131 @@ describe Elasticity::PigJob do
|
|
|
81
81
|
|
|
82
82
|
describe "#run" do
|
|
83
83
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
:
|
|
89
|
-
|
|
90
|
-
:
|
|
91
|
-
:
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
{
|
|
98
|
-
:action_on_failure => "TERMINATE_JOB_FLOW",
|
|
99
|
-
:hadoop_jar_step => {
|
|
100
|
-
:jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
|
|
101
|
-
:args => [
|
|
102
|
-
"s3://elasticmapreduce/libs/pig/pig-script",
|
|
103
|
-
"--base-path",
|
|
104
|
-
"s3://elasticmapreduce/libs/pig/",
|
|
105
|
-
"--install-pig"
|
|
106
|
-
],
|
|
107
|
-
},
|
|
108
|
-
:name => "Setup Pig"
|
|
84
|
+
context "when no bootstrap actions are specified" do
|
|
85
|
+
|
|
86
|
+
it "should run the script with the specified variables and return the jobflow_id" do
|
|
87
|
+
aws = Elasticity::EMR.new("", "")
|
|
88
|
+
aws.should_receive(:run_job_flow).with({
|
|
89
|
+
:name => "Elasticity Pig Job",
|
|
90
|
+
:log_uri => "s3n://slif-test/output/logs",
|
|
91
|
+
:instances => {
|
|
92
|
+
:ec2_key_name => "default",
|
|
93
|
+
:hadoop_version => "0.20",
|
|
94
|
+
:instance_count => 8,
|
|
95
|
+
:master_instance_type => "m1.small",
|
|
96
|
+
:slave_instance_type => "m1.xlarge",
|
|
109
97
|
},
|
|
98
|
+
:steps => [
|
|
110
99
|
{
|
|
111
|
-
:action_on_failure => "
|
|
100
|
+
:action_on_failure => "TERMINATE_JOB_FLOW",
|
|
112
101
|
:hadoop_jar_step => {
|
|
113
102
|
:jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
|
|
114
103
|
:args => [
|
|
115
104
|
"s3://elasticmapreduce/libs/pig/pig-script",
|
|
116
|
-
"--
|
|
117
|
-
"
|
|
118
|
-
"-
|
|
119
|
-
"-p", "XREFS=s3n://slif-pig-test/xrefs",
|
|
120
|
-
"-p", "E_PARALLELS=26",
|
|
121
|
-
"s3n://slif-pig-test/test.pig"
|
|
105
|
+
"--base-path",
|
|
106
|
+
"s3://elasticmapreduce/libs/pig/",
|
|
107
|
+
"--install-pig"
|
|
122
108
|
],
|
|
123
109
|
},
|
|
124
|
-
:name => "
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
110
|
+
:name => "Setup Pig"
|
|
111
|
+
},
|
|
112
|
+
{
|
|
113
|
+
:action_on_failure => "CONTINUE",
|
|
114
|
+
:hadoop_jar_step => {
|
|
115
|
+
:jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
|
|
116
|
+
:args => [
|
|
117
|
+
"s3://elasticmapreduce/libs/pig/pig-script",
|
|
118
|
+
"--run-pig-script",
|
|
119
|
+
"--args",
|
|
120
|
+
"-p", "OUTPUT=s3n://slif-pig-test/output",
|
|
121
|
+
"-p", "XREFS=s3n://slif-pig-test/xrefs",
|
|
122
|
+
"-p", "E_PARALLELS=26",
|
|
123
|
+
"s3n://slif-pig-test/test.pig"
|
|
124
|
+
],
|
|
125
|
+
},
|
|
126
|
+
:name => "Run Pig Script"
|
|
127
|
+
}
|
|
128
|
+
]
|
|
129
|
+
}).and_return("new_jobflow_id")
|
|
130
|
+
|
|
131
|
+
Elasticity::EMR.should_receive(:new).with("access", "secret").and_return(aws)
|
|
132
|
+
pig = Elasticity::PigJob.new("access", "secret")
|
|
133
|
+
|
|
134
|
+
pig.log_uri = "s3n://slif-test/output/logs"
|
|
135
|
+
pig.action_on_failure = "CONTINUE"
|
|
136
|
+
pig.instance_count = 8
|
|
137
|
+
pig.slave_instance_type = "m1.xlarge"
|
|
138
|
+
|
|
139
|
+
jobflow_id = pig.run('s3n://slif-pig-test/test.pig', {
|
|
140
|
+
'OUTPUT' => 's3n://slif-pig-test/output',
|
|
141
|
+
'XREFS' => 's3n://slif-pig-test/xrefs'
|
|
142
|
+
})
|
|
143
|
+
jobflow_id.should == "new_jobflow_id"
|
|
144
|
+
end
|
|
145
|
+
end
|
|
136
146
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
147
|
+
context "when bootstrap actions are specified" do
|
|
148
|
+
it "should run the script wth the proper job configuration" do
|
|
149
|
+
aws = Elasticity::EMR.new("", "")
|
|
150
|
+
aws.should_receive(:run_job_flow).with(hash_including({
|
|
151
|
+
:bootstrap_actions => [
|
|
152
|
+
{
|
|
153
|
+
:name => "Elasticity Bootstrap Action (Configure Hadoop)",
|
|
154
|
+
:script_bootstrap_action => {
|
|
155
|
+
:path => "s3n://elasticmapreduce/bootstrap-actions/configure-hadoop",
|
|
156
|
+
:args => ["-m", "foo=111"]
|
|
157
|
+
}
|
|
158
|
+
},
|
|
159
|
+
{
|
|
160
|
+
:name => "Elasticity Bootstrap Action (Configure Hadoop)",
|
|
161
|
+
:script_bootstrap_action => {
|
|
162
|
+
:path => "s3n://elasticmapreduce/bootstrap-actions/configure-hadoop",
|
|
163
|
+
:args => ["-m", "bar=222"]
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
],
|
|
167
|
+
}))
|
|
168
|
+
|
|
169
|
+
Elasticity::EMR.should_receive(:new).with("access", "secret").and_return(aws)
|
|
170
|
+
pig = Elasticity::PigJob.new("access", "secret")
|
|
171
|
+
pig.add_hadoop_bootstrap_action("-m", "foo=111")
|
|
172
|
+
pig.add_hadoop_bootstrap_action("-m", "bar=222")
|
|
173
|
+
pig.run('s3n://slif-pig-test/test.pig')
|
|
174
|
+
end
|
|
142
175
|
end
|
|
143
176
|
|
|
144
177
|
end
|
|
145
178
|
|
|
146
179
|
describe "integration happy path" do
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
"
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
180
|
+
|
|
181
|
+
context "with bootstrap actions" do
|
|
182
|
+
use_vcr_cassette "pig_job/apache_log_reports_with_bootstrap", :record => :none
|
|
183
|
+
it "should kick off the sample Amazion EMR Pig application" do
|
|
184
|
+
pig = Elasticity::PigJob.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
|
|
185
|
+
pig.ec2_key_name = "sharethrough_dev"
|
|
186
|
+
pig.add_hadoop_bootstrap_action("-m", "mapred.job.reuse.jvm.num.tasks=120")
|
|
187
|
+
jobflow_id = pig.run("s3n://elasticmapreduce/samples/pig-apache/do-reports.pig", {
|
|
188
|
+
"INPUT" => "s3n://elasticmapreduce/samples/pig-apache/input",
|
|
189
|
+
"OUTPUT" => "s3n://slif-elasticity/pig-apache/output/2011-05-10"
|
|
190
|
+
})
|
|
191
|
+
jobflow_id.should == "j-1UK43AWRT3QHD"
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
context "without bootstrap actions" do
|
|
196
|
+
use_vcr_cassette "pig_job/apache_log_reports", :record => :none
|
|
197
|
+
it "should kick off the sample Amazion EMR Pig application" do
|
|
198
|
+
pig = Elasticity::PigJob.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
|
|
199
|
+
pig.log_uri = "s3n://slif-elasticity/pig-apache/logs"
|
|
200
|
+
pig.ec2_key_name = "sharethrough_dev"
|
|
201
|
+
jobflow_id = pig.run("s3n://elasticmapreduce/samples/pig-apache/do-reports.pig", {
|
|
202
|
+
"INPUT" => "s3n://elasticmapreduce/samples/pig-apache/input",
|
|
203
|
+
"OUTPUT" => "s3n://slif-elasticity/pig-apache/output/2011-05-04"
|
|
204
|
+
})
|
|
205
|
+
jobflow_id.should == "j-1HB7A3TBRT3VS"
|
|
206
|
+
end
|
|
157
207
|
end
|
|
158
208
|
end
|
|
159
209
|
|
|
210
|
+
|
|
160
211
|
end
|
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: elasticity
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
hash:
|
|
4
|
+
hash: 27
|
|
5
5
|
prerelease:
|
|
6
6
|
segments:
|
|
7
7
|
- 1
|
|
8
8
|
- 2
|
|
9
|
-
-
|
|
10
|
-
version: 1.2.
|
|
9
|
+
- 2
|
|
10
|
+
version: 1.2.2
|
|
11
11
|
platform: ruby
|
|
12
12
|
authors:
|
|
13
13
|
- Robert Slifka
|
|
@@ -15,7 +15,7 @@ autorequire:
|
|
|
15
15
|
bindir: bin
|
|
16
16
|
cert_chain: []
|
|
17
17
|
|
|
18
|
-
date: 2011-05-
|
|
18
|
+
date: 2011-05-10 00:00:00 -07:00
|
|
19
19
|
default_executable:
|
|
20
20
|
dependencies:
|
|
21
21
|
- !ruby/object:Gem::Dependency
|
|
@@ -186,6 +186,7 @@ files:
|
|
|
186
186
|
- spec/fixtures/vcr_cassettes/hive_job/hive_ads.yml
|
|
187
187
|
- spec/fixtures/vcr_cassettes/modify_instance_groups/set_instances_to_3.yml
|
|
188
188
|
- spec/fixtures/vcr_cassettes/pig_job/apache_log_reports.yml
|
|
189
|
+
- spec/fixtures/vcr_cassettes/pig_job/apache_log_reports_with_bootstrap.yml
|
|
189
190
|
- spec/fixtures/vcr_cassettes/run_jobflow/word_count.yml
|
|
190
191
|
- spec/fixtures/vcr_cassettes/set_termination_protection/nonexistent_job_flows.yml
|
|
191
192
|
- spec/fixtures/vcr_cassettes/set_termination_protection/protect_multiple_job_flows.yml
|
|
@@ -240,6 +241,7 @@ test_files:
|
|
|
240
241
|
- spec/fixtures/vcr_cassettes/hive_job/hive_ads.yml
|
|
241
242
|
- spec/fixtures/vcr_cassettes/modify_instance_groups/set_instances_to_3.yml
|
|
242
243
|
- spec/fixtures/vcr_cassettes/pig_job/apache_log_reports.yml
|
|
244
|
+
- spec/fixtures/vcr_cassettes/pig_job/apache_log_reports_with_bootstrap.yml
|
|
243
245
|
- spec/fixtures/vcr_cassettes/run_jobflow/word_count.yml
|
|
244
246
|
- spec/fixtures/vcr_cassettes/set_termination_protection/nonexistent_job_flows.yml
|
|
245
247
|
- spec/fixtures/vcr_cassettes/set_termination_protection/protect_multiple_job_flows.yml
|