elasticity 1.2.1 → 1.2.2

Sign up to get free protection for your applications and to get access to all the features.
data/HISTORY.mediawiki CHANGED
@@ -1,3 +1,7 @@
1
+ === 1.2.2 ===
2
+
3
+ * HiveJob and PigJob now support configuring Hadoop options via .add_hadoop_bootstrap_action().
4
+
1
5
  === 1.2.1 ===
2
6
 
3
7
  * Shipping up E_PARALLELS Pig variable with each invocation; reasonable default value for PARALLEL based on the number and type of instances configured.
data/README.mediawiki CHANGED
@@ -26,7 +26,17 @@ Elasticity currently provides simplified access to launching Hive and Pig job fl
26
26
  @slave_instance_type = "m1.small"
27
27
  </pre>
28
28
 
29
- These are all accessible from HiveJob and PigJob instances. See the PigJob description for an example.
29
+ These are all accessible from HiveJob and PigJob. See the PigJob description for an example.
30
+
31
+ === Bootstrap Actions ===
32
+
33
+ You can also configure Hadoop options with add_hadoop_bootstrap_action().
34
+
35
+ <pre>
36
+ pig = Elasticity::PigJob.new(ENV["AWS_ACCESS_KEY_ID"], ENV["AWS_SECRET_KEY"])
37
+ pig.add_hadoop_bootstrap_action("-m", "mapred.job.reuse.jvm.num.tasks=120")
38
+ ...
39
+ </pre>
30
40
 
31
41
  == Hive ==
32
42
 
@@ -76,6 +76,7 @@ module Elasticity
76
76
  }
77
77
 
78
78
  jobflow_config.merge!(:log_uri => @log_uri) if @log_uri
79
+ jobflow_config.merge!(get_bootstrap_actions)
79
80
 
80
81
  @emr.run_job_flow(jobflow_config)
81
82
  end
@@ -27,6 +27,24 @@ module Elasticity
27
27
  @emr = Elasticity::EMR.new(aws_access_key_id, aws_secret_access_key)
28
28
  end
29
29
 
30
+ def add_hadoop_bootstrap_action(option, value)
31
+ @hadoop_actions ||= []
32
+ @hadoop_actions << {
33
+ :name => "Elasticity Bootstrap Action (Configure Hadoop)",
34
+ :script_bootstrap_action => {
35
+ :path => "s3n://elasticmapreduce/bootstrap-actions/configure-hadoop",
36
+ :args => [option, value]
37
+ }
38
+ }
39
+ end
40
+
41
+ private
42
+
43
+ def get_bootstrap_actions
44
+ return {} unless @hadoop_actions && !@hadoop_actions.empty?
45
+ { :bootstrap_actions => @hadoop_actions }
46
+ end
47
+
30
48
  end
31
49
 
32
50
  end
@@ -1,3 +1,3 @@
1
1
  module Elasticity
2
- VERSION = "1.2.1"
2
+ VERSION = "1.2.2"
3
3
  end
@@ -0,0 +1,35 @@
1
+ ---
2
+ - !ruby/struct:VCR::HTTPInteraction
3
+ request: !ruby/struct:VCR::Request
4
+ method: :get
5
+ uri: !ruby/regexp /^http:\/\/elasticmapreduce.amazonaws.com:80\/\?AWSAccessKeyId=AKIAI7HEMMNKGT6VFFSA&BootstrapActions.member.1.Name=Elasticity%20Bootstrap%20Action%20\(Configure%20Hadoop\)&BootstrapActions.member.1.ScriptBootstrapAction.Args.member.1=-m&BootstrapActions.member.1.ScriptBootstrapAction.Args.member.2=mapred.job.reuse.jvm.num.tasks=120&BootstrapActions.member.1.ScriptBootstrapAction.Path=s3n:\/\/elasticmapreduce\/bootstrap-actions\/configure-hadoop&Instances.Ec2KeyName=sharethrough_dev&Instances.HadoopVersion=0.20&Instances.InstanceCount=2&Instances.MasterInstanceType=m1.small&Instances.SlaveInstanceType=m1.small&Name=Elasticity%20Pig%20Job&Operation=RunJobFlow&.*&Steps.member.1.ActionOnFailure=TERMINATE_JOB_FLOW&Steps.member.1.HadoopJarStep.Args.member.1=s3:\/\/elasticmapreduce\/libs\/pig\/pig-script&Steps.member.1.HadoopJarStep.Args.member.2=--base-path&Steps.member.1.HadoopJarStep.Args.member.3=s3:\/\/elasticmapreduce\/libs\/pig\/&Steps.member.1.HadoopJarStep.Args.member.4=--install-pig&Steps.member.1.HadoopJarStep.Jar=s3:\/\/elasticmapreduce\/libs\/script-runner\/script-runner.jar&Steps.member.1.Name=Setup%20Pig&Steps.member.2.ActionOnFailure=TERMINATE_JOB_FLOW&Steps.member.2.HadoopJarStep.Args.member.1=s3:\/\/elasticmapreduce\/libs\/pig\/pig-script&Steps.member.2.HadoopJarStep.Args.member.10=s3n:\/\/elasticmapreduce\/samples\/pig-apache\/do-reports.pig&Steps.member.2.HadoopJarStep.Args.member.2=--run-pig-script&Steps.member.2.HadoopJarStep.Args.member.3=--args&Steps.member.2.HadoopJarStep.Args.member.4=-p&Steps.member.2.HadoopJarStep.Args.member.5=INPUT=s3n:\/\/elasticmapreduce\/samples\/pig-apache\/input&Steps.member.2.HadoopJarStep.Args.member.6=-p&Steps.member.2.HadoopJarStep.Args.member.7=OUTPUT=s3n:\/\/slif-elasticity\/pig-apache\/output\/2011-05-10&Steps.member.2.HadoopJarStep.Args.member.8=-p&Steps.member.2.HadoopJarStep.Args.member.9=E_PARALLELS=1&Steps.member.2.HadoopJarStep.Jar=s3:\/\/elasticmapreduce\/libs\/script-runner\/script-runner.jar&Steps.member.2.Name=Run%20Pig%20Script/
6
+ body:
7
+ headers:
8
+ accept:
9
+ - "*/*; q=0.5, application/xml"
10
+ accept-encoding:
11
+ - gzip, deflate
12
+ response: !ruby/struct:VCR::Response
13
+ status: !ruby/struct:VCR::ResponseStatus
14
+ code: 200
15
+ message: OK
16
+ headers:
17
+ x-amzn-requestid:
18
+ - 10533247-7ad7-11e0-86ad-3b6ddfd3d4b9
19
+ content-type:
20
+ - text/xml
21
+ date:
22
+ - Tue, 10 May 2011 07:28:12 GMT
23
+ content-length:
24
+ - "297"
25
+ body: |
26
+ <RunJobFlowResponse xmlns="http://elasticmapreduce.amazonaws.com/doc/2009-03-31">
27
+ <RunJobFlowResult>
28
+ <JobFlowId>j-1UK43AWRT3QHD</JobFlowId>
29
+ </RunJobFlowResult>
30
+ <ResponseMetadata>
31
+ <RequestId>10533247-7ad7-11e0-86ad-3b6ddfd3d4b9</RequestId>
32
+ </ResponseMetadata>
33
+ </RunJobFlowResponse>
34
+
35
+ http_version: "1.1"
@@ -81,80 +81,131 @@ describe Elasticity::PigJob do
81
81
 
82
82
  describe "#run" do
83
83
 
84
- it "should run the script with the specified variables and return the jobflow_id" do
85
- aws = Elasticity::EMR.new("", "")
86
- aws.should_receive(:run_job_flow).with({
87
- :name => "Elasticity Pig Job",
88
- :log_uri => "s3n://slif-test/output/logs",
89
- :instances => {
90
- :ec2_key_name => "default",
91
- :hadoop_version => "0.20",
92
- :instance_count => 8,
93
- :master_instance_type => "m1.small",
94
- :slave_instance_type => "m1.xlarge",
95
- },
96
- :steps => [
97
- {
98
- :action_on_failure => "TERMINATE_JOB_FLOW",
99
- :hadoop_jar_step => {
100
- :jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
101
- :args => [
102
- "s3://elasticmapreduce/libs/pig/pig-script",
103
- "--base-path",
104
- "s3://elasticmapreduce/libs/pig/",
105
- "--install-pig"
106
- ],
107
- },
108
- :name => "Setup Pig"
84
+ context "when no bootstrap actions are specified" do
85
+
86
+ it "should run the script with the specified variables and return the jobflow_id" do
87
+ aws = Elasticity::EMR.new("", "")
88
+ aws.should_receive(:run_job_flow).with({
89
+ :name => "Elasticity Pig Job",
90
+ :log_uri => "s3n://slif-test/output/logs",
91
+ :instances => {
92
+ :ec2_key_name => "default",
93
+ :hadoop_version => "0.20",
94
+ :instance_count => 8,
95
+ :master_instance_type => "m1.small",
96
+ :slave_instance_type => "m1.xlarge",
109
97
  },
98
+ :steps => [
110
99
  {
111
- :action_on_failure => "CONTINUE",
100
+ :action_on_failure => "TERMINATE_JOB_FLOW",
112
101
  :hadoop_jar_step => {
113
102
  :jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
114
103
  :args => [
115
104
  "s3://elasticmapreduce/libs/pig/pig-script",
116
- "--run-pig-script",
117
- "--args",
118
- "-p", "OUTPUT=s3n://slif-pig-test/output",
119
- "-p", "XREFS=s3n://slif-pig-test/xrefs",
120
- "-p", "E_PARALLELS=26",
121
- "s3n://slif-pig-test/test.pig"
105
+ "--base-path",
106
+ "s3://elasticmapreduce/libs/pig/",
107
+ "--install-pig"
122
108
  ],
123
109
  },
124
- :name => "Run Pig Script"
125
- }
126
- ]
127
- }).and_return("new_jobflow_id")
128
-
129
- Elasticity::EMR.should_receive(:new).with("access", "secret").and_return(aws)
130
- pig = Elasticity::PigJob.new("access", "secret")
131
-
132
- pig.log_uri = "s3n://slif-test/output/logs"
133
- pig.action_on_failure = "CONTINUE"
134
- pig.instance_count = 8
135
- pig.slave_instance_type = "m1.xlarge"
110
+ :name => "Setup Pig"
111
+ },
112
+ {
113
+ :action_on_failure => "CONTINUE",
114
+ :hadoop_jar_step => {
115
+ :jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
116
+ :args => [
117
+ "s3://elasticmapreduce/libs/pig/pig-script",
118
+ "--run-pig-script",
119
+ "--args",
120
+ "-p", "OUTPUT=s3n://slif-pig-test/output",
121
+ "-p", "XREFS=s3n://slif-pig-test/xrefs",
122
+ "-p", "E_PARALLELS=26",
123
+ "s3n://slif-pig-test/test.pig"
124
+ ],
125
+ },
126
+ :name => "Run Pig Script"
127
+ }
128
+ ]
129
+ }).and_return("new_jobflow_id")
130
+
131
+ Elasticity::EMR.should_receive(:new).with("access", "secret").and_return(aws)
132
+ pig = Elasticity::PigJob.new("access", "secret")
133
+
134
+ pig.log_uri = "s3n://slif-test/output/logs"
135
+ pig.action_on_failure = "CONTINUE"
136
+ pig.instance_count = 8
137
+ pig.slave_instance_type = "m1.xlarge"
138
+
139
+ jobflow_id = pig.run('s3n://slif-pig-test/test.pig', {
140
+ 'OUTPUT' => 's3n://slif-pig-test/output',
141
+ 'XREFS' => 's3n://slif-pig-test/xrefs'
142
+ })
143
+ jobflow_id.should == "new_jobflow_id"
144
+ end
145
+ end
136
146
 
137
- jobflow_id = pig.run('s3n://slif-pig-test/test.pig', {
138
- 'OUTPUT' => 's3n://slif-pig-test/output',
139
- 'XREFS' => 's3n://slif-pig-test/xrefs'
140
- })
141
- jobflow_id.should == "new_jobflow_id"
147
+ context "when bootstrap actions are specified" do
148
+ it "should run the script wth the proper job configuration" do
149
+ aws = Elasticity::EMR.new("", "")
150
+ aws.should_receive(:run_job_flow).with(hash_including({
151
+ :bootstrap_actions => [
152
+ {
153
+ :name => "Elasticity Bootstrap Action (Configure Hadoop)",
154
+ :script_bootstrap_action => {
155
+ :path => "s3n://elasticmapreduce/bootstrap-actions/configure-hadoop",
156
+ :args => ["-m", "foo=111"]
157
+ }
158
+ },
159
+ {
160
+ :name => "Elasticity Bootstrap Action (Configure Hadoop)",
161
+ :script_bootstrap_action => {
162
+ :path => "s3n://elasticmapreduce/bootstrap-actions/configure-hadoop",
163
+ :args => ["-m", "bar=222"]
164
+ }
165
+ }
166
+ ],
167
+ }))
168
+
169
+ Elasticity::EMR.should_receive(:new).with("access", "secret").and_return(aws)
170
+ pig = Elasticity::PigJob.new("access", "secret")
171
+ pig.add_hadoop_bootstrap_action("-m", "foo=111")
172
+ pig.add_hadoop_bootstrap_action("-m", "bar=222")
173
+ pig.run('s3n://slif-pig-test/test.pig')
174
+ end
142
175
  end
143
176
 
144
177
  end
145
178
 
146
179
  describe "integration happy path" do
147
- use_vcr_cassette "pig_job/apache_log_reports", :record => :none
148
- it "should kick off the sample Amazion EMR Pig application" do
149
- pig = Elasticity::PigJob.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
150
- pig.log_uri = "s3n://slif-elasticity/pig-apache/logs"
151
- pig.ec2_key_name = "sharethrough_dev"
152
- jobflow_id = pig.run("s3n://elasticmapreduce/samples/pig-apache/do-reports.pig", {
153
- "INPUT" => "s3n://elasticmapreduce/samples/pig-apache/input",
154
- "OUTPUT" => "s3n://slif-elasticity/pig-apache/output/2011-05-04"
155
- })
156
- jobflow_id.should == "j-1HB7A3TBRT3VS"
180
+
181
+ context "with bootstrap actions" do
182
+ use_vcr_cassette "pig_job/apache_log_reports_with_bootstrap", :record => :none
183
+ it "should kick off the sample Amazion EMR Pig application" do
184
+ pig = Elasticity::PigJob.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
185
+ pig.ec2_key_name = "sharethrough_dev"
186
+ pig.add_hadoop_bootstrap_action("-m", "mapred.job.reuse.jvm.num.tasks=120")
187
+ jobflow_id = pig.run("s3n://elasticmapreduce/samples/pig-apache/do-reports.pig", {
188
+ "INPUT" => "s3n://elasticmapreduce/samples/pig-apache/input",
189
+ "OUTPUT" => "s3n://slif-elasticity/pig-apache/output/2011-05-10"
190
+ })
191
+ jobflow_id.should == "j-1UK43AWRT3QHD"
192
+ end
193
+ end
194
+
195
+ context "without bootstrap actions" do
196
+ use_vcr_cassette "pig_job/apache_log_reports", :record => :none
197
+ it "should kick off the sample Amazion EMR Pig application" do
198
+ pig = Elasticity::PigJob.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
199
+ pig.log_uri = "s3n://slif-elasticity/pig-apache/logs"
200
+ pig.ec2_key_name = "sharethrough_dev"
201
+ jobflow_id = pig.run("s3n://elasticmapreduce/samples/pig-apache/do-reports.pig", {
202
+ "INPUT" => "s3n://elasticmapreduce/samples/pig-apache/input",
203
+ "OUTPUT" => "s3n://slif-elasticity/pig-apache/output/2011-05-04"
204
+ })
205
+ jobflow_id.should == "j-1HB7A3TBRT3VS"
206
+ end
157
207
  end
158
208
  end
159
209
 
210
+
160
211
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: elasticity
3
3
  version: !ruby/object:Gem::Version
4
- hash: 29
4
+ hash: 27
5
5
  prerelease:
6
6
  segments:
7
7
  - 1
8
8
  - 2
9
- - 1
10
- version: 1.2.1
9
+ - 2
10
+ version: 1.2.2
11
11
  platform: ruby
12
12
  authors:
13
13
  - Robert Slifka
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-05-07 00:00:00 -07:00
18
+ date: 2011-05-10 00:00:00 -07:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -186,6 +186,7 @@ files:
186
186
  - spec/fixtures/vcr_cassettes/hive_job/hive_ads.yml
187
187
  - spec/fixtures/vcr_cassettes/modify_instance_groups/set_instances_to_3.yml
188
188
  - spec/fixtures/vcr_cassettes/pig_job/apache_log_reports.yml
189
+ - spec/fixtures/vcr_cassettes/pig_job/apache_log_reports_with_bootstrap.yml
189
190
  - spec/fixtures/vcr_cassettes/run_jobflow/word_count.yml
190
191
  - spec/fixtures/vcr_cassettes/set_termination_protection/nonexistent_job_flows.yml
191
192
  - spec/fixtures/vcr_cassettes/set_termination_protection/protect_multiple_job_flows.yml
@@ -240,6 +241,7 @@ test_files:
240
241
  - spec/fixtures/vcr_cassettes/hive_job/hive_ads.yml
241
242
  - spec/fixtures/vcr_cassettes/modify_instance_groups/set_instances_to_3.yml
242
243
  - spec/fixtures/vcr_cassettes/pig_job/apache_log_reports.yml
244
+ - spec/fixtures/vcr_cassettes/pig_job/apache_log_reports_with_bootstrap.yml
243
245
  - spec/fixtures/vcr_cassettes/run_jobflow/word_count.yml
244
246
  - spec/fixtures/vcr_cassettes/set_termination_protection/nonexistent_job_flows.yml
245
247
  - spec/fixtures/vcr_cassettes/set_termination_protection/protect_multiple_job_flows.yml