wakoopa-elasticity 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. data/.autotest +2 -0
  2. data/.gitignore +5 -0
  3. data/.rspec +2 -0
  4. data/.rvmrc +1 -0
  5. data/Gemfile +4 -0
  6. data/HISTORY.mediawiki +30 -0
  7. data/LICENSE +202 -0
  8. data/README.mediawiki +332 -0
  9. data/Rakefile +11 -0
  10. data/elasticity.gemspec +29 -0
  11. data/lib/elasticity.rb +16 -0
  12. data/lib/elasticity/aws_request.rb +52 -0
  13. data/lib/elasticity/emr.rb +282 -0
  14. data/lib/elasticity/hive_job.rb +71 -0
  15. data/lib/elasticity/job_flow.rb +53 -0
  16. data/lib/elasticity/job_flow_step.rb +36 -0
  17. data/lib/elasticity/pig_job.rb +112 -0
  18. data/lib/elasticity/simple_job.rb +50 -0
  19. data/lib/elasticity/version.rb +3 -0
  20. data/spec/fixtures/vcr_cassettes/add_instance_groups/one_group_successful.yml +38 -0
  21. data/spec/fixtures/vcr_cassettes/add_instance_groups/one_group_unsuccessful.yml +35 -0
  22. data/spec/fixtures/vcr_cassettes/add_jobflow_steps/add_multiple_steps.yml +252 -0
  23. data/spec/fixtures/vcr_cassettes/describe_jobflows/all_jobflows.yml +69 -0
  24. data/spec/fixtures/vcr_cassettes/direct/terminate_jobflow.yml +32 -0
  25. data/spec/fixtures/vcr_cassettes/hive_job/hive_ads.yml +35 -0
  26. data/spec/fixtures/vcr_cassettes/modify_instance_groups/set_instances_to_3.yml +32 -0
  27. data/spec/fixtures/vcr_cassettes/pig_job/apache_log_reports.yml +35 -0
  28. data/spec/fixtures/vcr_cassettes/pig_job/apache_log_reports_with_bootstrap.yml +35 -0
  29. data/spec/fixtures/vcr_cassettes/run_jobflow/word_count.yml +35 -0
  30. data/spec/fixtures/vcr_cassettes/set_termination_protection/nonexistent_job_flows.yml +35 -0
  31. data/spec/fixtures/vcr_cassettes/set_termination_protection/protect_multiple_job_flows.yml +32 -0
  32. data/spec/fixtures/vcr_cassettes/terminate_jobflows/one_jobflow.yml +32 -0
  33. data/spec/lib/elasticity/aws_request_spec.rb +62 -0
  34. data/spec/lib/elasticity/emr_spec.rb +794 -0
  35. data/spec/lib/elasticity/hive_job_spec.rb +96 -0
  36. data/spec/lib/elasticity/job_flow_spec.rb +139 -0
  37. data/spec/lib/elasticity/job_flow_step_spec.rb +76 -0
  38. data/spec/lib/elasticity/pig_job_spec.rb +211 -0
  39. data/spec/spec_helper.rb +43 -0
  40. metadata +253 -0
@@ -0,0 +1,794 @@
1
+ require 'spec_helper'
2
+
3
+ describe Elasticity::EMR do
4
+
5
+ describe "#add_instance_groups" do
6
+
7
+ describe "integration happy path" do
8
+
9
+ context "when properly specified" do
10
+ use_vcr_cassette "add_instance_groups/one_group_successful", :record => :none
11
+ it "should add the instance groups" do
12
+ emr = Elasticity::EMR.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
13
+ instance_group_config = {
14
+ :instance_count => 1,
15
+ :instance_role => "TASK",
16
+ :instance_type => "m1.small",
17
+ :market => "ON_DEMAND",
18
+ :name => "Go Canucks Go!"
19
+ }
20
+ instance_group_ids = emr.add_instance_groups("j-OALI7TZTQMHX", [instance_group_config])
21
+ instance_group_ids.should == ["ig-2GOVEN6HVJZID"]
22
+ end
23
+ end
24
+
25
+ context "when improperly specified" do
26
+ use_vcr_cassette "add_instance_groups/one_group_unsuccessful", :record => :none
27
+ it "should add the instance groups" do
28
+ emr = Elasticity::EMR.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
29
+ instance_group_config = {
30
+ :bid_price => 0,
31
+ :instance_count => 1,
32
+ :instance_role => "TASK",
33
+ :instance_type => "m1.small",
34
+ :market => "ON_DEMAND",
35
+ :name => "Go Canucks Go!"
36
+ }
37
+ lambda {
38
+ emr.add_instance_groups("j-19WDDS68ZUENP", [instance_group_config])
39
+ }.should raise_error(ArgumentError, "Task instance group already exists in the job flow, cannot add more task groups")
40
+ end
41
+ end
42
+
43
+ end
44
+
45
+ describe "unit tests" do
46
+
47
+ context "when multiple instance groups are specified" do
48
+ before do
49
+ @add_instance_groups_xml = <<-ADD_GROUPS
50
+ <AddInstanceGroupsResponse xmlns="http://elasticmapreduce.amazonaws.com/doc/2009-03-31">
51
+ <AddInstanceGroupsResult>
52
+ <JobFlowId>j-OALI7TZTQMHX</JobFlowId>
53
+ <InstanceGroupIds>
54
+ <member>ig-1</member>
55
+ <member>ig-2</member>
56
+ <member>ig-3</member>
57
+ </InstanceGroupIds>
58
+ </AddInstanceGroupsResult>
59
+ </AddInstanceGroupsResponse>
60
+ ADD_GROUPS
61
+ end
62
+
63
+ it "should iterate over them and send the correct params to AWS" do
64
+ instance_group_configs = [
65
+ {:instance_type=>"m1.small", :instance_role=>"CORE", :market=>"ON_DEMAND", :instance_count=>1, :name=>"Go Canucks Go!", :bid_price=>0},
66
+ {:instance_type=>"m1.small", :instance_role=>"CORE", :market=>"ON_DEMAND", :instance_count=>1, :name=>"Go Canucks Go!", :bid_price=>0},
67
+ ]
68
+ aws_request = Elasticity::AwsRequest.new("aws_access_key_id", "aws_secret_key")
69
+ aws_request.should_receive(:aws_emr_request).with({
70
+ "Operation" => "AddInstanceGroups",
71
+ "InstanceGroups.member.1.Name"=>"Go Canucks Go!",
72
+ "InstanceGroups.member.1.InstanceRole"=>"CORE",
73
+ "InstanceGroups.member.1.InstanceCount"=>1,
74
+ "InstanceGroups.member.1.BidPrice"=>0,
75
+ "InstanceGroups.member.1.InstanceType"=>"m1.small",
76
+ "InstanceGroups.member.1.Market"=>"ON_DEMAND",
77
+ "InstanceGroups.member.2.Name"=>"Go Canucks Go!",
78
+ "InstanceGroups.member.2.InstanceRole"=>"CORE",
79
+ "InstanceGroups.member.2.InstanceCount"=>1,
80
+ "InstanceGroups.member.2.BidPrice"=>0,
81
+ "InstanceGroups.member.2.InstanceType"=>"m1.small",
82
+ "InstanceGroups.member.2.Market"=>"ON_DEMAND",
83
+ "JobFlowId"=>"j-19WDDS68ZUENP"
84
+ })
85
+ Elasticity::AwsRequest.should_receive(:new).and_return(aws_request)
86
+ emr = Elasticity::EMR.new("aws_access_key_id", "aws_secret_key")
87
+ emr.add_instance_groups("j-19WDDS68ZUENP", instance_group_configs)
88
+ end
89
+
90
+ it "should return an array of the instance groups created" do
91
+ aws_request = Elasticity::AwsRequest.new("aws_access_key_id", "aws_secret_key")
92
+ aws_request.should_receive(:aws_emr_request).and_return(@add_instance_groups_xml)
93
+ Elasticity::AwsRequest.should_receive(:new).and_return(aws_request)
94
+ emr = Elasticity::EMR.new("aws_access_key_id", "aws_secret_key")
95
+ emr.add_instance_groups("", []).should == ["ig-1", "ig-2", "ig-3"]
96
+ end
97
+ end
98
+
99
+ context "when a block is provided" do
100
+ it "should yield the XML result" do
101
+ aws_request = Elasticity::AwsRequest.new("aws_access_key_id", "aws_secret_key")
102
+ aws_request.should_receive(:aws_emr_request).and_return("AWS XML")
103
+ Elasticity::AwsRequest.should_receive(:new).and_return(aws_request)
104
+ emr = Elasticity::EMR.new("aws_access_key_id", "aws_secret_key")
105
+ xml_result = nil
106
+ emr.add_instance_groups("", []) do |xml|
107
+ xml_result = xml
108
+ end
109
+ xml_result.should == "AWS XML"
110
+ end
111
+ end
112
+
113
+ end
114
+
115
+ end
116
+
117
+ describe "#add_jobflow_steps" do
118
+
119
+ describe "integration happy path" do
120
+ use_vcr_cassette "add_jobflow_steps/add_multiple_steps", :record => :none
121
+
122
+ before do
123
+ @setup_pig_step = {
124
+ :action_on_failure => "TERMINATE_JOB_FLOW",
125
+ :hadoop_jar_step => {
126
+ :args => [
127
+ "s3://elasticmapreduce/libs/pig/pig-script",
128
+ "--base-path",
129
+ "s3://elasticmapreduce/libs/pig/",
130
+ "--install-pig"
131
+ ],
132
+ :jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar"
133
+ },
134
+ :name => "Setup Pig"
135
+ }
136
+ @emr = Elasticity::EMR.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
137
+ @jobflow_id = @emr.run_job_flow({
138
+ :name => "Elasticity Test Flow (EMR Pig Script)",
139
+ :instances => {
140
+ :ec2_key_name => "sharethrough-dev",
141
+ :instance_count => 2,
142
+ :master_instance_type => "m1.small",
143
+ :slave_instance_type => "m1.small",
144
+ },
145
+ :steps => [@setup_pig_step]
146
+ })
147
+ end
148
+
149
+ it "should add a job flow step to the specified job flow" do
150
+ @emr.add_jobflow_steps(@jobflow_id, {
151
+ :steps => [
152
+ @setup_pig_step.merge(:name => "Setup Pig 2"),
153
+ @setup_pig_step.merge(:name => "Setup Pig 3")
154
+ ]
155
+ })
156
+ jobflow = @emr.describe_jobflows.select { |jf| jf.jobflow_id = @jobflow_id }.first
157
+ jobflow.steps.map(&:name).should == ["Setup Pig", "Setup Pig 2", "Setup Pig 3"]
158
+ end
159
+
160
+ end
161
+
162
+ describe "unit tests" do
163
+
164
+ it "should add the specified steps to the job flow" do
165
+ aws_request = Elasticity::AwsRequest.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
166
+ aws_request.should_receive(:aws_emr_request).with({
167
+ "Operation" => "AddJobFlowSteps",
168
+ "JobFlowId" => "j-1",
169
+ "Steps.member.1.Name" => "Step 1",
170
+ "Steps.member.1.ActionOnFailure" => "TERMINATE_JOB_FLOW",
171
+ "Steps.member.1.HadoopJarStep.Jar" => "jar1",
172
+ "Steps.member.1.HadoopJarStep.Args.member.1" => "arg1-1",
173
+ "Steps.member.1.HadoopJarStep.Args.member.2" => "arg1-2",
174
+ "Steps.member.2.Name" => "Step 2",
175
+ "Steps.member.2.ActionOnFailure" => "CONTINUE",
176
+ "Steps.member.2.HadoopJarStep.Jar" => "jar2",
177
+ "Steps.member.2.HadoopJarStep.Args.member.1" => "arg2-1",
178
+ "Steps.member.2.HadoopJarStep.Args.member.2" => "arg2-2",
179
+ })
180
+ Elasticity::AwsRequest.should_receive(:new).and_return(aws_request)
181
+ emr = Elasticity::EMR.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
182
+ emr.add_jobflow_steps("j-1", {
183
+ :steps => [
184
+ {
185
+ :action_on_failure => "TERMINATE_JOB_FLOW",
186
+ :name => "Step 1",
187
+ :hadoop_jar_step => {
188
+ :args => ["arg1-1", "arg1-2"],
189
+ :jar => "jar1",
190
+ }
191
+ },
192
+ {
193
+ :action_on_failure => "CONTINUE",
194
+ :name => "Step 2",
195
+ :hadoop_jar_step => {
196
+ :args => ["arg2-1", "arg2-2"],
197
+ :jar => "jar2",
198
+ }
199
+ }
200
+ ]
201
+ })
202
+ end
203
+
204
+ context "when there is an error" do
205
+ before do
206
+ @error_message = "2 validation errors detected: Value null at 'steps' failed to satisfy constraint: Member must not be null; Value null at 'jobFlowId' failed to satisfy constraint: Member must not be null"
207
+ @error_xml = <<-ERROR
208
+ <ErrorResponse xmlns="http://elasticmapreduce.amazonaws.com/doc/2009-03-31">
209
+ <Error>
210
+ <Message>#{@error_message}</Message>
211
+ </Error>
212
+ </ErrorResponse>
213
+ ERROR
214
+ end
215
+
216
+ it "should raise an ArgumentError with the error message" do
217
+ aws_request = Elasticity::AwsRequest.new("aws_access_key_id", "aws_secret_key")
218
+ @exception = RestClient::BadRequest.new
219
+ @exception.should_receive(:http_body).and_return(@error_xml)
220
+ aws_request.should_receive(:aws_emr_request).and_raise(@exception)
221
+ Elasticity::AwsRequest.should_receive(:new).and_return(aws_request)
222
+ emr = Elasticity::EMR.new("aws_access_key_id", "aws_secret_key")
223
+ lambda {
224
+ emr.add_jobflow_steps("", {})
225
+ }.should raise_error(ArgumentError, @error_message)
226
+ end
227
+ end
228
+
229
+ context "when a block is given" do
230
+ it "should yield the XML result" do
231
+ aws_request = Elasticity::AwsRequest.new("aws_access_key_id", "aws_secret_key")
232
+ aws_request.should_receive(:aws_emr_request).and_return("xml_response")
233
+ Elasticity::AwsRequest.should_receive(:new).and_return(aws_request)
234
+ emr = Elasticity::EMR.new("aws_access_key_id", "aws_secret_key")
235
+ xml_result = nil
236
+ emr.add_jobflow_steps("", {}) do |xml|
237
+ xml_result = xml
238
+ end
239
+ xml_result.should == "xml_response"
240
+ end
241
+ end
242
+
243
+
244
+ end
245
+
246
+ end
247
+
248
+ describe "#describe_jobflows" do
249
+
250
+ describe "integration happy path" do
251
+ use_vcr_cassette "describe_jobflows/all_jobflows", :record => :none
252
+ it "should return the names of all running job flows" do
253
+ emr = Elasticity::EMR.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
254
+ jobflows = emr.describe_jobflows
255
+ jobflows.map(&:name).should == ["WM+RS", "Interactive Audience Hive Test", "Audience (Hive)", "Audience Reporting"]
256
+ jobflows.map(&:jobflow_id).should == ["j-1MZ5TVWFJRSKN", "j-38EU2XZQP9KJ4", "j-2TDCVGEEHOFI9", "j-NKKQ429D858I"]
257
+ jobflows.map(&:state).should == ["TERMINATED", "TERMINATED", "TERMINATED", "TERMINATED"]
258
+ end
259
+ end
260
+
261
+ describe "unit tests" do
262
+ before do
263
+ @describe_jobflows_xml = <<-JOBFLOWS
264
+ <DescribeJobFlowsResponse xmlns="http://elasticmapreduce.amazonaws.com/doc/2009-03-31">
265
+ <DescribeJobFlowsResult>
266
+ <JobFlows>
267
+ <member>
268
+ <ExecutionStatusDetail>
269
+ <State>TERMINATED</State>
270
+ </ExecutionStatusDetail>
271
+ <JobFlowId>j-p</JobFlowId>
272
+ <Name>Pig Job</Name>
273
+ </member>
274
+ <member>
275
+ <ExecutionStatusDetail>
276
+ <State>TERMINATED</State>
277
+ </ExecutionStatusDetail>
278
+ <JobFlowId>j-h</JobFlowId>
279
+ <Name>Hive Job</Name>
280
+ </member>
281
+ </JobFlows>
282
+ </DescribeJobFlowsResult>
283
+ </DescribeJobFlowsResponse>
284
+ JOBFLOWS
285
+ end
286
+
287
+ it "should return the names of all running job flows" do
288
+ aws_request = Elasticity::AwsRequest.new("aws_access_key_id", "aws_secret_key")
289
+ aws_request.should_receive(:aws_emr_request).with({"Operation" => "DescribeJobFlows"}).and_return(@describe_jobflows_xml)
290
+ Elasticity::AwsRequest.should_receive(:new).and_return(aws_request)
291
+ emr = Elasticity::EMR.new("aws_access_key_id", "aws_secret_key")
292
+ jobflows = emr.describe_jobflows
293
+ jobflows.map(&:name).should == ["Pig Job", "Hive Job"]
294
+ end
295
+
296
+ it "should accept additional parameters" do
297
+ aws_request = Elasticity::AwsRequest.new("aws_access_key_id", "aws_secret_key")
298
+ aws_request.should_receive(:aws_emr_request).with({"Operation" => "DescribeJobFlows","CreatedBefore" => "2011-10-04"}).and_return(@describe_jobflows_xml)
299
+ Elasticity::AwsRequest.should_receive(:new).and_return(aws_request)
300
+ emr = Elasticity::EMR.new("aws_access_key_id", "aws_secret_key")
301
+ emr.describe_jobflows(:CreatedBefore => "2011-10-04")
302
+ end
303
+
304
+ context "when a block is provided" do
305
+ it "should yield the XML result" do
306
+ aws_request = Elasticity::AwsRequest.new("aws_access_key_id", "aws_secret_key")
307
+ aws_request.should_receive(:aws_emr_request).and_return("describe!")
308
+ Elasticity::AwsRequest.should_receive(:new).and_return(aws_request)
309
+ emr = Elasticity::EMR.new("aws_access_key_id", "aws_secret_key")
310
+ xml_result = nil
311
+ emr.describe_jobflows do |xml|
312
+ xml_result = xml
313
+ end
314
+ xml_result.should == "describe!"
315
+ end
316
+ end
317
+ end
318
+
319
+ end
320
+
321
+ describe "#modify_instance_groups" do
322
+
323
+ describe "integration happy path" do
324
+ context "when the instance group exists" do
325
+ use_vcr_cassette "modify_instance_groups/set_instances_to_3", :record => :none
326
+ it "should terminate the specified jobflow" do
327
+ emr = Elasticity::EMR.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
328
+ instance_group_config = {"ig-2T1HNUO61BG3O" => 2}
329
+ emr.modify_instance_groups(instance_group_config)
330
+ end
331
+ end
332
+ end
333
+
334
+ describe "unit tests" do
335
+
336
+ context "when the instance group exists" do
337
+ it "should modify the specified instance group" do
338
+ aws_request = Elasticity::AwsRequest.new("aws_access_key_id", "aws_secret_key")
339
+ aws_request.should_receive(:aws_emr_request).with({
340
+ "Operation" => "ModifyInstanceGroups",
341
+ "InstanceGroups.member.1.InstanceGroupId" => "ig-1",
342
+ "InstanceGroups.member.1.InstanceCount" => 2
343
+ })
344
+ Elasticity::AwsRequest.should_receive(:new).and_return(aws_request)
345
+ emr = Elasticity::EMR.new("aws_access_key_id", "aws_secret_key")
346
+ emr.modify_instance_groups({"ig-1" => 2})
347
+ end
348
+ end
349
+
350
+ context "when a block is given" do
351
+ it "should yield the XML result" do
352
+ aws_request = Elasticity::AwsRequest.new("aws_access_key_id", "aws_secret_key")
353
+ aws_request.should_receive(:aws_emr_request).and_return("xml result!")
354
+ Elasticity::AwsRequest.should_receive(:new).and_return(aws_request)
355
+ emr = Elasticity::EMR.new("aws_access_key_id", "aws_secret_key")
356
+ xml_result = nil
357
+ emr.modify_instance_groups({"ig-1" => 2}) do |xml|
358
+ xml_result = xml
359
+ end
360
+ xml_result.should == "xml result!"
361
+ end
362
+ end
363
+
364
+
365
+ context "when there is an error" do
366
+
367
+ before do
368
+ @error_message = "1 validation error detected: Value null at 'instanceGroups.1.member.instanceCount' failed to satisfy constraint: Member must not be null"
369
+ @error_xml = <<-ERROR
370
+ <ErrorResponse xmlns="http://elasticmapreduce.amazonaws.com/doc/2009-03-31">
371
+ <Error>
372
+ <Message>#{@error_message}</Message>
373
+ </Error>
374
+ </ErrorResponse>
375
+ ERROR
376
+ end
377
+
378
+ it "should raise an ArgumentError with the error message" do
379
+ aws_request = Elasticity::AwsRequest.new("aws_access_key_id", "aws_secret_key")
380
+ @exception = RestClient::BadRequest.new
381
+ @exception.should_receive(:http_body).and_return(@error_xml)
382
+ aws_request.should_receive(:aws_emr_request).and_raise(@exception)
383
+ Elasticity::AwsRequest.should_receive(:new).and_return(aws_request)
384
+ emr = Elasticity::EMR.new("aws_access_key_id", "aws_secret_key")
385
+ lambda {
386
+ emr.modify_instance_groups({"ig-1" => 2})
387
+ }.should raise_error(ArgumentError, @error_message)
388
+ end
389
+
390
+ end
391
+
392
+ end
393
+
394
+ end
395
+
396
+ describe "#run_jobflow" do
397
+
398
+ describe "integration happy path" do
399
+
400
+ context "when the job flow is properly specified" do
401
+ use_vcr_cassette "run_jobflow/word_count", :record => :none
402
+ it "should start the specified job flow" do
403
+ emr = Elasticity::EMR.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
404
+ jobflow_id = emr.run_job_flow({
405
+ :name => "Elasticity Test Flow (EMR Pig Script)",
406
+ :instances => {
407
+ :ec2_key_name => "sharethrough-dev",
408
+ :hadoop_version => "0.20",
409
+ :instance_count => 2,
410
+ :master_instance_type => "m1.small",
411
+ :placement => {
412
+ :availability_zone => "us-east-1a"
413
+ },
414
+ :slave_instance_type => "m1.small",
415
+ },
416
+ :steps => [
417
+ {
418
+ :action_on_failure => "TERMINATE_JOB_FLOW",
419
+ :hadoop_jar_step => {
420
+ :args => [
421
+ "s3://elasticmapreduce/libs/pig/pig-script",
422
+ "--base-path",
423
+ "s3://elasticmapreduce/libs/pig/",
424
+ "--install-pig"
425
+ ],
426
+ :jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar"
427
+ },
428
+ :name => "Setup Pig"
429
+ },
430
+ {
431
+ :action_on_failure => "TERMINATE_JOB_FLOW",
432
+ :hadoop_jar_step => {
433
+ :args => [
434
+ "s3://elasticmapreduce/libs/pig/pig-script",
435
+ "--run-pig-script",
436
+ "--args",
437
+ "-p",
438
+ "INPUT=s3n://elasticmapreduce/samples/pig-apache/input",
439
+ "-p",
440
+ "OUTPUT=s3n://slif-elasticity/pig-apache/output/2011-04-19",
441
+ "s3n://elasticmapreduce/samples/pig-apache/do-reports.pig"
442
+ ],
443
+ :jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar"
444
+ },
445
+ :name => "Run Pig Script"
446
+ }
447
+ ]
448
+ })
449
+ jobflow_id.should == "j-G6N5HA528AD4"
450
+ end
451
+ end
452
+ end
453
+
454
+ describe "unit tests" do
455
+ it "should return the job flow ID of the new job" do
456
+ run_jobflow_response = <<-RESPONSE
457
+ <RunJobFlowResponse xmlns="http://elasticmapreduce.amazonaws.com/doc/2009-03-31">
458
+ <RunJobFlowResult>
459
+ <JobFlowId>j-N500G8Y8U7ZQ</JobFlowId>
460
+ </RunJobFlowResult>
461
+ <ResponseMetadata>
462
+ <RequestId>a6dddf4c-6a49-11e0-b6c0-e9580d1f7304</RequestId>
463
+ </ResponseMetadata>
464
+ </RunJobFlowResponse>
465
+ RESPONSE
466
+ aws_request = Elasticity::AwsRequest.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
467
+ aws_request.should_receive(:aws_emr_request).and_return(run_jobflow_response)
468
+ Elasticity::AwsRequest.should_receive(:new).and_return(aws_request)
469
+ emr = Elasticity::EMR.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
470
+ jobflow_id = emr.run_job_flow({})
471
+ jobflow_id.should == "j-N500G8Y8U7ZQ"
472
+ end
473
+
474
+ it "should run the specified job flow" do
475
+ aws_request = Elasticity::AwsRequest.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
476
+ aws_request.should_receive(:aws_emr_request).with({
477
+ "Operation" => "RunJobFlow",
478
+ "Name" => "Job flow name",
479
+ "Instances.MasterInstanceType" => "m1.small",
480
+ "Instances.Placement.AvailabilityZone" => "us-east-1a",
481
+ "Steps.member.1.Name" => "Streaming Job",
482
+ "Steps.member.1.ActionOnFailure" => "TERMINATE_JOB_FLOW",
483
+ "Steps.member.1.HadoopJarStep.Jar" => "/home/hadoop/contrib/streaming/hadoop-streaming.jar",
484
+ "Steps.member.1.HadoopJarStep.Args.member.1" => "-input",
485
+ "Steps.member.1.HadoopJarStep.Args.member.2" => "s3n://elasticmapreduce/samples/wordcount/input"
486
+ })
487
+ Elasticity::AwsRequest.should_receive(:new).and_return(aws_request)
488
+ emr = Elasticity::EMR.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
489
+ emr.run_job_flow({
490
+ :name => "Job flow name",
491
+ :instances => {
492
+ :master_instance_type => "m1.small",
493
+ :placement => {
494
+ :availability_zone => "us-east-1a"
495
+ }
496
+ },
497
+ :steps => [
498
+ {
499
+ :action_on_failure => "TERMINATE_JOB_FLOW",
500
+ :name => "Streaming Job",
501
+ :hadoop_jar_step => {
502
+ :args => ["-input", "s3n://elasticmapreduce/samples/wordcount/input"],
503
+ :jar => "/home/hadoop/contrib/streaming/hadoop-streaming.jar",
504
+ }
505
+ }
506
+ ]
507
+ })
508
+ end
509
+
510
+ context "when there is an error" do
511
+ before do
512
+ @error_message = "1 validation error detected: Value null at 'instanceGroups.1.member.instanceCount' failed to satisfy constraint: Member must not be null"
513
+ @error_xml = <<-ERROR
514
+ <ErrorResponse xmlns="http://elasticmapreduce.amazonaws.com/doc/2009-03-31">
515
+ <Error>
516
+ <Message>#{@error_message}</Message>
517
+ </Error>
518
+ </ErrorResponse>
519
+ ERROR
520
+ end
521
+
522
+ it "should raise an ArgumentError with the error message" do
523
+ aws_request = Elasticity::AwsRequest.new("aws_access_key_id", "aws_secret_key")
524
+ @exception = RestClient::BadRequest.new
525
+ @exception.should_receive(:http_body).and_return(@error_xml)
526
+ aws_request.should_receive(:aws_emr_request).and_raise(@exception)
527
+ Elasticity::AwsRequest.should_receive(:new).and_return(aws_request)
528
+ emr = Elasticity::EMR.new("aws_access_key_id", "aws_secret_key")
529
+ lambda {
530
+ emr.run_job_flow({})
531
+ }.should raise_error(ArgumentError, @error_message)
532
+ end
533
+ end
534
+
535
+ context "when a block is given" do
536
+ it "should yield the XML result" do
537
+ aws_request = Elasticity::AwsRequest.new("aws_access_key_id", "aws_secret_key")
538
+ aws_request.should_receive(:aws_emr_request).and_return("jobflow_id!")
539
+ Elasticity::AwsRequest.should_receive(:new).and_return(aws_request)
540
+ emr = Elasticity::EMR.new("aws_access_key_id", "aws_secret_key")
541
+ xml_result = nil
542
+ emr.run_job_flow({}) do |xml|
543
+ xml_result = xml
544
+ end
545
+ xml_result.should == "jobflow_id!"
546
+ end
547
+ end
548
+ end
549
+
550
+ end
551
+
552
+ describe "#terminate_jobflows" do
553
+
554
+ describe "integration happy path" do
555
+ context "when the job flow exists" do
556
+ use_vcr_cassette "terminate_jobflows/one_jobflow", :record => :none
557
+ it "should terminate the specified jobflow" do
558
+ emr = Elasticity::EMR.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
559
+ emr.terminate_jobflows("j-1MZ5TVWFJRSKN")
560
+ end
561
+ end
562
+ end
563
+
564
+ describe "unit tests" do
565
+
566
+ context "when the jobflow exists" do
567
+ before do
568
+ @terminate_jobflows_xml = <<-RESPONSE
569
+ <TerminateJobFlowsResponse xmlns="http://elasticmapreduce.amazonaws.com/doc/2009-03-31">
570
+ <ResponseMetadata>
571
+ <RequestId>2690d7eb-ed86-11dd-9877-6fad448a8419</RequestId>
572
+ </ResponseMetadata>
573
+ </TerminateJobFlowsResponse>
574
+ RESPONSE
575
+ end
576
+ it "should terminate the specific jobflow" do
577
+ aws_request = Elasticity::AwsRequest.new("aws_access_key_id", "aws_secret_key")
578
+ aws_request.should_receive(:aws_emr_request).with({
579
+ "Operation" => "TerminateJobFlows",
580
+ "JobFlowIds.member.1" => "j-1"
581
+ }).and_return(@terminate_jobflows_xml)
582
+ Elasticity::AwsRequest.should_receive(:new).and_return(aws_request)
583
+ emr = Elasticity::EMR.new("aws_access_key_id", "aws_secret_key")
584
+ emr.terminate_jobflows("j-1")
585
+ end
586
+ end
587
+
588
+ context "when the jobflow does not exist" do
589
+ it "should raise an ArgumentError" do
590
+ aws_request = Elasticity::AwsRequest.new("aws_access_key_id", "aws_secret_key")
591
+ aws_request.should_receive(:aws_emr_request).and_raise(RestClient::BadRequest)
592
+ Elasticity::AwsRequest.should_receive(:new).and_return(aws_request)
593
+ emr = Elasticity::EMR.new("aws_access_key_id", "aws_secret_key")
594
+ lambda {
595
+ emr.terminate_jobflows("invalid_jobflow_id")
596
+ }.should raise_error(ArgumentError)
597
+ end
598
+ end
599
+
600
+ context "when a block is given" do
601
+ it "should yield the XML result" do
602
+ aws_request = Elasticity::AwsRequest.new("aws_access_key_id", "aws_secret_key")
603
+ aws_request.should_receive(:aws_emr_request).and_return("terminated!")
604
+ Elasticity::AwsRequest.should_receive(:new).and_return(aws_request)
605
+ emr = Elasticity::EMR.new("aws_access_key_id", "aws_secret_key")
606
+ xml_result = nil
607
+ emr.terminate_jobflows("j-1") do |xml|
608
+ xml_result = xml
609
+ end
610
+ xml_result.should == "terminated!"
611
+ end
612
+ end
613
+
614
+ end
615
+ end
616
+
617
+ describe "#set_termination_protection" do
618
+
619
+ describe "integration happy path" do
620
+
621
+ context "when protecting multiple job flows" do
622
+ use_vcr_cassette "set_termination_protection/protect_multiple_job_flows", :record => :none
623
+ it "should protect the specified job flows" do
624
+ emr = Elasticity::EMR.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
625
+ emr.set_termination_protection(["j-1B4D1XP0C0A35", "j-1YG2MYL0HVYS5"], true)
626
+ end
627
+ end
628
+
629
+ context "when specifying a job flow that doesn't exist" do
630
+ use_vcr_cassette "set_termination_protection/nonexistent_job_flows", :record => :none
631
+ it "should have an error" do
632
+ emr = Elasticity::EMR.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
633
+ lambda {
634
+ emr.set_termination_protection(["j-1B4D1XP0C0A35", "j-2"], true)
635
+ }.should raise_error(ArgumentError, "Specified job flow ID not valid")
636
+ end
637
+ end
638
+
639
+ end
640
+
641
+ describe "unit tests" do
642
+ it "should enable protection on the specified job flows" do
643
+ aws_request = Elasticity::AwsRequest.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
644
+ Elasticity::AwsRequest.should_receive(:new).and_return(aws_request)
645
+ aws_request.should_receive(:aws_emr_request).with({
646
+ "Operation" => "SetTerminationProtection",
647
+ "JobFlowIds.member.1" => "jobflow1",
648
+ "JobFlowIds.member.2" => "jobflow2",
649
+ "TerminationProtected" => true
650
+ })
651
+ emr = Elasticity::EMR.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
652
+ emr.set_termination_protection(["jobflow1", "jobflow2"], true)
653
+ end
654
+
655
+ it "should disable protection on the specified job flows" do
656
+ aws_request = Elasticity::AwsRequest.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
657
+ Elasticity::AwsRequest.should_receive(:new).and_return(aws_request)
658
+ aws_request.should_receive(:aws_emr_request).with({
659
+ "Operation" => "SetTerminationProtection",
660
+ "JobFlowIds.member.1" => "jobflow1",
661
+ "JobFlowIds.member.2" => "jobflow2",
662
+ "TerminationProtected" => false
663
+ })
664
+ emr = Elasticity::EMR.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
665
+ emr.set_termination_protection(["jobflow1", "jobflow2"], false)
666
+ end
667
+
668
+ it "should enable protection when not specified" do
669
+ aws_request = Elasticity::AwsRequest.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
670
+ Elasticity::AwsRequest.should_receive(:new).and_return(aws_request)
671
+ aws_request.should_receive(:aws_emr_request).with({
672
+ "Operation" => "SetTerminationProtection",
673
+ "JobFlowIds.member.1" => "jobflow1",
674
+ "JobFlowIds.member.2" => "jobflow2",
675
+ "TerminationProtected" => true
676
+ })
677
+ emr = Elasticity::EMR.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
678
+ emr.set_termination_protection(["jobflow1", "jobflow2"])
679
+ end
680
+
681
+ context "when a block is given" do
682
+ before do
683
+ @xml_response = <<-RESPONSE
684
+ <SetTerminationProtectionResponse xmlns="http://elasticmapreduce.amazonaws.com/doc/2009-03-31">
685
+ <ResponseMetadata>
686
+ <RequestId>755ebe8a-6923-11e0-a9c2-c126f1bb4493</RequestId>
687
+ </ResponseMetadata>
688
+ </SetTerminationProtectionResponse>
689
+ RESPONSE
690
+ end
691
+ it "should yield the XML result" do
692
+ aws_request = Elasticity::AwsRequest.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
693
+ Elasticity::AwsRequest.should_receive(:new).and_return(aws_request)
694
+ aws_request.should_receive(:aws_emr_request).and_return(@xml_response)
695
+ emr = Elasticity::EMR.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
696
+ xml = nil
697
+ emr.set_termination_protection([]) do |aws_response|
698
+ xml = aws_response
699
+ end
700
+ xml.should == @xml_response
701
+ end
702
+ end
703
+ end
704
+
705
+ end
706
+
707
+ describe "#direct" do
708
+
709
+ describe "integration happy path" do
710
+ use_vcr_cassette "direct/terminate_jobflow", :record => :none
711
+ it "should terminate the specified jobflow" do
712
+ emr = Elasticity::EMR.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
713
+ params = {
714
+ "Operation" => "TerminateJobFlows",
715
+ "JobFlowIds.member.1" => "j-1MZ5TVWFJRSKN"
716
+ }
717
+ emr.direct(params)
718
+ end
719
+ end
720
+
721
+ describe "unit tests" do
722
+ before do
723
+ @terminate_jobflows_xml = <<-RESPONSE
724
+ <TerminateJobFlowsResponse xmlns="http://elasticmapreduce.amazonaws.com/doc/2009-03-31">
725
+ <ResponseMetadata>
726
+ <RequestId>2690d7eb-ed86-11dd-9877-6fad448a8419</RequestId>
727
+ </ResponseMetadata>
728
+ </TerminateJobFlowsResponse>
729
+ RESPONSE
730
+ end
731
+ it "should pass through directly to the request" do
732
+ aws_request = Elasticity::AwsRequest.new("aws_access_key_id", "aws_secret_key")
733
+ aws_request.should_receive(:aws_emr_request).with({
734
+ "Operation" => "TerminateJobFlows",
735
+ "JobFlowIds.member.1" => "j-1"
736
+ }).and_return(@terminate_jobflows_xml)
737
+ Elasticity::AwsRequest.should_receive(:new).and_return(aws_request)
738
+ emr = Elasticity::EMR.new("aws_access_key_id", "aws_secret_key")
739
+ params = {
740
+ "Operation" => "TerminateJobFlows",
741
+ "JobFlowIds.member.1" => "j-1"
742
+ }
743
+ emr.direct(params).should == @terminate_jobflows_xml
744
+ end
745
+ end
746
+ end
747
+
748
+ describe ".convert_ruby_to_aws" do
749
+ it "should convert the params" do
750
+ add_jobflow_steps_params = {
751
+ :job_flow_id => "j-1",
752
+ :steps => [
753
+ {
754
+ :action_on_failure => "CONTINUE",
755
+ :name => "First New Job Step",
756
+ :hadoop_jar_step => {
757
+ :args => ["arg1", "arg2", "arg3",],
758
+ :jar => "first_step.jar",
759
+ :main_class => "first_class.jar"
760
+ }
761
+ },
762
+ {
763
+ :action_on_failure => "CANCEL_AND_WAIT",
764
+ :name => "Second New Job Step",
765
+ :hadoop_jar_step => {
766
+ :args => ["arg4", "arg5", "arg6",],
767
+ :jar => "second_step.jar",
768
+ :main_class => "second_class.jar"
769
+ }
770
+ }
771
+ ]
772
+ }
773
+ expected_result = {
774
+ "JobFlowId" => "j-1",
775
+ "Steps.member.1.Name" => "First New Job Step",
776
+ "Steps.member.1.ActionOnFailure" => "CONTINUE",
777
+ "Steps.member.1.HadoopJarStep.Jar" => "first_step.jar",
778
+ "Steps.member.1.HadoopJarStep.MainClass" => "first_class.jar",
779
+ "Steps.member.1.HadoopJarStep.Args.member.1" => "arg1",
780
+ "Steps.member.1.HadoopJarStep.Args.member.2" => "arg2",
781
+ "Steps.member.1.HadoopJarStep.Args.member.3" => "arg3",
782
+ "Steps.member.2.Name" => "Second New Job Step",
783
+ "Steps.member.2.ActionOnFailure" => "CANCEL_AND_WAIT",
784
+ "Steps.member.2.HadoopJarStep.Jar" => "second_step.jar",
785
+ "Steps.member.2.HadoopJarStep.MainClass" => "second_class.jar",
786
+ "Steps.member.2.HadoopJarStep.Args.member.1" => "arg4",
787
+ "Steps.member.2.HadoopJarStep.Args.member.2" => "arg5",
788
+ "Steps.member.2.HadoopJarStep.Args.member.3" => "arg6"
789
+ }
790
+ Elasticity::EMR.send(:convert_ruby_to_aws, add_jobflow_steps_params).should == expected_result
791
+ end
792
+ end
793
+
794
+ end