elasticity 1.2.1 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/HISTORY.mediawiki CHANGED
@@ -1,3 +1,7 @@
1
+ === 1.2.2 ===
2
+
3
+ * HiveJob and PigJob now support configuring Hadoop options via .add_hadoop_bootstrap_action().
4
+
1
5
  === 1.2.1 ===
2
6
 
3
7
  * Shipping up E_PARALLELS Pig variable with each invocation; reasonable default value for PARALLEL based on the number and type of instances configured.
data/README.mediawiki CHANGED
@@ -26,7 +26,17 @@ Elasticity currently provides simplified access to launching Hive and Pig job fl
26
26
  @slave_instance_type = "m1.small"
27
27
  </pre>
28
28
 
29
- These are all accessible from HiveJob and PigJob instances. See the PigJob description for an example.
29
+ These are all accessible from HiveJob and PigJob. See the PigJob description for an example.
30
+
31
+ === Bootstrap Actions ===
32
+
33
+ You can also configure Hadoop options with add_hadoop_bootstrap_action().
34
+
35
+ <pre>
36
+ pig = Elasticity::PigJob.new(ENV["AWS_ACCESS_KEY_ID"], ENV["AWS_SECRET_KEY"])
37
+ pig.add_hadoop_bootstrap_action("-m", "mapred.job.reuse.jvm.num.tasks=120")
38
+ ...
39
+ </pre>
30
40
 
31
41
  == Hive ==
32
42
 
@@ -76,6 +76,7 @@ module Elasticity
76
76
  }
77
77
 
78
78
  jobflow_config.merge!(:log_uri => @log_uri) if @log_uri
79
+ jobflow_config.merge!(get_bootstrap_actions)
79
80
 
80
81
  @emr.run_job_flow(jobflow_config)
81
82
  end
@@ -27,6 +27,24 @@ module Elasticity
27
27
  @emr = Elasticity::EMR.new(aws_access_key_id, aws_secret_access_key)
28
28
  end
29
29
 
30
+ def add_hadoop_bootstrap_action(option, value)
31
+ @hadoop_actions ||= []
32
+ @hadoop_actions << {
33
+ :name => "Elasticity Bootstrap Action (Configure Hadoop)",
34
+ :script_bootstrap_action => {
35
+ :path => "s3n://elasticmapreduce/bootstrap-actions/configure-hadoop",
36
+ :args => [option, value]
37
+ }
38
+ }
39
+ end
40
+
41
+ private
42
+
43
+ def get_bootstrap_actions
44
+ return {} unless @hadoop_actions && !@hadoop_actions.empty?
45
+ { :bootstrap_actions => @hadoop_actions }
46
+ end
47
+
30
48
  end
31
49
 
32
50
  end
@@ -1,3 +1,3 @@
1
1
  module Elasticity
2
- VERSION = "1.2.1"
2
+ VERSION = "1.2.2"
3
3
  end
@@ -0,0 +1,35 @@
1
+ ---
2
+ - !ruby/struct:VCR::HTTPInteraction
3
+ request: !ruby/struct:VCR::Request
4
+ method: :get
5
+ uri: !ruby/regexp /^http:\/\/elasticmapreduce.amazonaws.com:80\/\?AWSAccessKeyId=AKIAI7HEMMNKGT6VFFSA&BootstrapActions.member.1.Name=Elasticity%20Bootstrap%20Action%20\(Configure%20Hadoop\)&BootstrapActions.member.1.ScriptBootstrapAction.Args.member.1=-m&BootstrapActions.member.1.ScriptBootstrapAction.Args.member.2=mapred.job.reuse.jvm.num.tasks=120&BootstrapActions.member.1.ScriptBootstrapAction.Path=s3n:\/\/elasticmapreduce\/bootstrap-actions\/configure-hadoop&Instances.Ec2KeyName=sharethrough_dev&Instances.HadoopVersion=0.20&Instances.InstanceCount=2&Instances.MasterInstanceType=m1.small&Instances.SlaveInstanceType=m1.small&Name=Elasticity%20Pig%20Job&Operation=RunJobFlow&.*&Steps.member.1.ActionOnFailure=TERMINATE_JOB_FLOW&Steps.member.1.HadoopJarStep.Args.member.1=s3:\/\/elasticmapreduce\/libs\/pig\/pig-script&Steps.member.1.HadoopJarStep.Args.member.2=--base-path&Steps.member.1.HadoopJarStep.Args.member.3=s3:\/\/elasticmapreduce\/libs\/pig\/&Steps.member.1.HadoopJarStep.Args.member.4=--install-pig&Steps.member.1.HadoopJarStep.Jar=s3:\/\/elasticmapreduce\/libs\/script-runner\/script-runner.jar&Steps.member.1.Name=Setup%20Pig&Steps.member.2.ActionOnFailure=TERMINATE_JOB_FLOW&Steps.member.2.HadoopJarStep.Args.member.1=s3:\/\/elasticmapreduce\/libs\/pig\/pig-script&Steps.member.2.HadoopJarStep.Args.member.10=s3n:\/\/elasticmapreduce\/samples\/pig-apache\/do-reports.pig&Steps.member.2.HadoopJarStep.Args.member.2=--run-pig-script&Steps.member.2.HadoopJarStep.Args.member.3=--args&Steps.member.2.HadoopJarStep.Args.member.4=-p&Steps.member.2.HadoopJarStep.Args.member.5=INPUT=s3n:\/\/elasticmapreduce\/samples\/pig-apache\/input&Steps.member.2.HadoopJarStep.Args.member.6=-p&Steps.member.2.HadoopJarStep.Args.member.7=OUTPUT=s3n:\/\/slif-elasticity\/pig-apache\/output\/2011-05-10&Steps.member.2.HadoopJarStep.Args.member.8=-p&Steps.member.2.HadoopJarStep.Args.member.9=E_PARALLELS=1&Steps.member.2.HadoopJarStep.Jar=s3:\/\/elasticmapreduce\/libs\/script-runner\/script-runner.jar&Steps.member.2.Name=Run%20Pig%20Script/
6
+ body:
7
+ headers:
8
+ accept:
9
+ - "*/*; q=0.5, application/xml"
10
+ accept-encoding:
11
+ - gzip, deflate
12
+ response: !ruby/struct:VCR::Response
13
+ status: !ruby/struct:VCR::ResponseStatus
14
+ code: 200
15
+ message: OK
16
+ headers:
17
+ x-amzn-requestid:
18
+ - 10533247-7ad7-11e0-86ad-3b6ddfd3d4b9
19
+ content-type:
20
+ - text/xml
21
+ date:
22
+ - Tue, 10 May 2011 07:28:12 GMT
23
+ content-length:
24
+ - "297"
25
+ body: |
26
+ <RunJobFlowResponse xmlns="http://elasticmapreduce.amazonaws.com/doc/2009-03-31">
27
+ <RunJobFlowResult>
28
+ <JobFlowId>j-1UK43AWRT3QHD</JobFlowId>
29
+ </RunJobFlowResult>
30
+ <ResponseMetadata>
31
+ <RequestId>10533247-7ad7-11e0-86ad-3b6ddfd3d4b9</RequestId>
32
+ </ResponseMetadata>
33
+ </RunJobFlowResponse>
34
+
35
+ http_version: "1.1"
@@ -81,80 +81,131 @@ describe Elasticity::PigJob do
81
81
 
82
82
  describe "#run" do
83
83
 
84
- it "should run the script with the specified variables and return the jobflow_id" do
85
- aws = Elasticity::EMR.new("", "")
86
- aws.should_receive(:run_job_flow).with({
87
- :name => "Elasticity Pig Job",
88
- :log_uri => "s3n://slif-test/output/logs",
89
- :instances => {
90
- :ec2_key_name => "default",
91
- :hadoop_version => "0.20",
92
- :instance_count => 8,
93
- :master_instance_type => "m1.small",
94
- :slave_instance_type => "m1.xlarge",
95
- },
96
- :steps => [
97
- {
98
- :action_on_failure => "TERMINATE_JOB_FLOW",
99
- :hadoop_jar_step => {
100
- :jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
101
- :args => [
102
- "s3://elasticmapreduce/libs/pig/pig-script",
103
- "--base-path",
104
- "s3://elasticmapreduce/libs/pig/",
105
- "--install-pig"
106
- ],
107
- },
108
- :name => "Setup Pig"
84
+ context "when no bootstrap actions are specified" do
85
+
86
+ it "should run the script with the specified variables and return the jobflow_id" do
87
+ aws = Elasticity::EMR.new("", "")
88
+ aws.should_receive(:run_job_flow).with({
89
+ :name => "Elasticity Pig Job",
90
+ :log_uri => "s3n://slif-test/output/logs",
91
+ :instances => {
92
+ :ec2_key_name => "default",
93
+ :hadoop_version => "0.20",
94
+ :instance_count => 8,
95
+ :master_instance_type => "m1.small",
96
+ :slave_instance_type => "m1.xlarge",
109
97
  },
98
+ :steps => [
110
99
  {
111
- :action_on_failure => "CONTINUE",
100
+ :action_on_failure => "TERMINATE_JOB_FLOW",
112
101
  :hadoop_jar_step => {
113
102
  :jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
114
103
  :args => [
115
104
  "s3://elasticmapreduce/libs/pig/pig-script",
116
- "--run-pig-script",
117
- "--args",
118
- "-p", "OUTPUT=s3n://slif-pig-test/output",
119
- "-p", "XREFS=s3n://slif-pig-test/xrefs",
120
- "-p", "E_PARALLELS=26",
121
- "s3n://slif-pig-test/test.pig"
105
+ "--base-path",
106
+ "s3://elasticmapreduce/libs/pig/",
107
+ "--install-pig"
122
108
  ],
123
109
  },
124
- :name => "Run Pig Script"
125
- }
126
- ]
127
- }).and_return("new_jobflow_id")
128
-
129
- Elasticity::EMR.should_receive(:new).with("access", "secret").and_return(aws)
130
- pig = Elasticity::PigJob.new("access", "secret")
131
-
132
- pig.log_uri = "s3n://slif-test/output/logs"
133
- pig.action_on_failure = "CONTINUE"
134
- pig.instance_count = 8
135
- pig.slave_instance_type = "m1.xlarge"
110
+ :name => "Setup Pig"
111
+ },
112
+ {
113
+ :action_on_failure => "CONTINUE",
114
+ :hadoop_jar_step => {
115
+ :jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
116
+ :args => [
117
+ "s3://elasticmapreduce/libs/pig/pig-script",
118
+ "--run-pig-script",
119
+ "--args",
120
+ "-p", "OUTPUT=s3n://slif-pig-test/output",
121
+ "-p", "XREFS=s3n://slif-pig-test/xrefs",
122
+ "-p", "E_PARALLELS=26",
123
+ "s3n://slif-pig-test/test.pig"
124
+ ],
125
+ },
126
+ :name => "Run Pig Script"
127
+ }
128
+ ]
129
+ }).and_return("new_jobflow_id")
130
+
131
+ Elasticity::EMR.should_receive(:new).with("access", "secret").and_return(aws)
132
+ pig = Elasticity::PigJob.new("access", "secret")
133
+
134
+ pig.log_uri = "s3n://slif-test/output/logs"
135
+ pig.action_on_failure = "CONTINUE"
136
+ pig.instance_count = 8
137
+ pig.slave_instance_type = "m1.xlarge"
138
+
139
+ jobflow_id = pig.run('s3n://slif-pig-test/test.pig', {
140
+ 'OUTPUT' => 's3n://slif-pig-test/output',
141
+ 'XREFS' => 's3n://slif-pig-test/xrefs'
142
+ })
143
+ jobflow_id.should == "new_jobflow_id"
144
+ end
145
+ end
136
146
 
137
- jobflow_id = pig.run('s3n://slif-pig-test/test.pig', {
138
- 'OUTPUT' => 's3n://slif-pig-test/output',
139
- 'XREFS' => 's3n://slif-pig-test/xrefs'
140
- })
141
- jobflow_id.should == "new_jobflow_id"
147
+ context "when bootstrap actions are specified" do
148
+ it "should run the script wth the proper job configuration" do
149
+ aws = Elasticity::EMR.new("", "")
150
+ aws.should_receive(:run_job_flow).with(hash_including({
151
+ :bootstrap_actions => [
152
+ {
153
+ :name => "Elasticity Bootstrap Action (Configure Hadoop)",
154
+ :script_bootstrap_action => {
155
+ :path => "s3n://elasticmapreduce/bootstrap-actions/configure-hadoop",
156
+ :args => ["-m", "foo=111"]
157
+ }
158
+ },
159
+ {
160
+ :name => "Elasticity Bootstrap Action (Configure Hadoop)",
161
+ :script_bootstrap_action => {
162
+ :path => "s3n://elasticmapreduce/bootstrap-actions/configure-hadoop",
163
+ :args => ["-m", "bar=222"]
164
+ }
165
+ }
166
+ ],
167
+ }))
168
+
169
+ Elasticity::EMR.should_receive(:new).with("access", "secret").and_return(aws)
170
+ pig = Elasticity::PigJob.new("access", "secret")
171
+ pig.add_hadoop_bootstrap_action("-m", "foo=111")
172
+ pig.add_hadoop_bootstrap_action("-m", "bar=222")
173
+ pig.run('s3n://slif-pig-test/test.pig')
174
+ end
142
175
  end
143
176
 
144
177
  end
145
178
 
146
179
  describe "integration happy path" do
147
- use_vcr_cassette "pig_job/apache_log_reports", :record => :none
148
- it "should kick off the sample Amazion EMR Pig application" do
149
- pig = Elasticity::PigJob.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
150
- pig.log_uri = "s3n://slif-elasticity/pig-apache/logs"
151
- pig.ec2_key_name = "sharethrough_dev"
152
- jobflow_id = pig.run("s3n://elasticmapreduce/samples/pig-apache/do-reports.pig", {
153
- "INPUT" => "s3n://elasticmapreduce/samples/pig-apache/input",
154
- "OUTPUT" => "s3n://slif-elasticity/pig-apache/output/2011-05-04"
155
- })
156
- jobflow_id.should == "j-1HB7A3TBRT3VS"
180
+
181
+ context "with bootstrap actions" do
182
+ use_vcr_cassette "pig_job/apache_log_reports_with_bootstrap", :record => :none
183
+ it "should kick off the sample Amazion EMR Pig application" do
184
+ pig = Elasticity::PigJob.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
185
+ pig.ec2_key_name = "sharethrough_dev"
186
+ pig.add_hadoop_bootstrap_action("-m", "mapred.job.reuse.jvm.num.tasks=120")
187
+ jobflow_id = pig.run("s3n://elasticmapreduce/samples/pig-apache/do-reports.pig", {
188
+ "INPUT" => "s3n://elasticmapreduce/samples/pig-apache/input",
189
+ "OUTPUT" => "s3n://slif-elasticity/pig-apache/output/2011-05-10"
190
+ })
191
+ jobflow_id.should == "j-1UK43AWRT3QHD"
192
+ end
193
+ end
194
+
195
+ context "without bootstrap actions" do
196
+ use_vcr_cassette "pig_job/apache_log_reports", :record => :none
197
+ it "should kick off the sample Amazion EMR Pig application" do
198
+ pig = Elasticity::PigJob.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
199
+ pig.log_uri = "s3n://slif-elasticity/pig-apache/logs"
200
+ pig.ec2_key_name = "sharethrough_dev"
201
+ jobflow_id = pig.run("s3n://elasticmapreduce/samples/pig-apache/do-reports.pig", {
202
+ "INPUT" => "s3n://elasticmapreduce/samples/pig-apache/input",
203
+ "OUTPUT" => "s3n://slif-elasticity/pig-apache/output/2011-05-04"
204
+ })
205
+ jobflow_id.should == "j-1HB7A3TBRT3VS"
206
+ end
157
207
  end
158
208
  end
159
209
 
210
+
160
211
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: elasticity
3
3
  version: !ruby/object:Gem::Version
4
- hash: 29
4
+ hash: 27
5
5
  prerelease:
6
6
  segments:
7
7
  - 1
8
8
  - 2
9
- - 1
10
- version: 1.2.1
9
+ - 2
10
+ version: 1.2.2
11
11
  platform: ruby
12
12
  authors:
13
13
  - Robert Slifka
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-05-07 00:00:00 -07:00
18
+ date: 2011-05-10 00:00:00 -07:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -186,6 +186,7 @@ files:
186
186
  - spec/fixtures/vcr_cassettes/hive_job/hive_ads.yml
187
187
  - spec/fixtures/vcr_cassettes/modify_instance_groups/set_instances_to_3.yml
188
188
  - spec/fixtures/vcr_cassettes/pig_job/apache_log_reports.yml
189
+ - spec/fixtures/vcr_cassettes/pig_job/apache_log_reports_with_bootstrap.yml
189
190
  - spec/fixtures/vcr_cassettes/run_jobflow/word_count.yml
190
191
  - spec/fixtures/vcr_cassettes/set_termination_protection/nonexistent_job_flows.yml
191
192
  - spec/fixtures/vcr_cassettes/set_termination_protection/protect_multiple_job_flows.yml
@@ -240,6 +241,7 @@ test_files:
240
241
  - spec/fixtures/vcr_cassettes/hive_job/hive_ads.yml
241
242
  - spec/fixtures/vcr_cassettes/modify_instance_groups/set_instances_to_3.yml
242
243
  - spec/fixtures/vcr_cassettes/pig_job/apache_log_reports.yml
244
+ - spec/fixtures/vcr_cassettes/pig_job/apache_log_reports_with_bootstrap.yml
243
245
  - spec/fixtures/vcr_cassettes/run_jobflow/word_count.yml
244
246
  - spec/fixtures/vcr_cassettes/set_termination_protection/nonexistent_job_flows.yml
245
247
  - spec/fixtures/vcr_cassettes/set_termination_protection/protect_multiple_job_flows.yml