elasticity 1.5 → 2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. data/.rspec +2 -1
  2. data/.rvmrc +1 -1
  3. data/HISTORY.md +47 -24
  4. data/LICENSE +1 -1
  5. data/README.md +165 -317
  6. data/Rakefile +4 -3
  7. data/elasticity.gemspec +3 -5
  8. data/lib/elasticity.rb +10 -5
  9. data/lib/elasticity/aws_request.rb +81 -20
  10. data/lib/elasticity/custom_jar_step.rb +33 -0
  11. data/lib/elasticity/emr.rb +45 -117
  12. data/lib/elasticity/hadoop_bootstrap_action.rb +27 -0
  13. data/lib/elasticity/hive_step.rb +57 -0
  14. data/lib/elasticity/job_flow.rb +109 -39
  15. data/lib/elasticity/job_flow_status.rb +53 -0
  16. data/lib/elasticity/job_flow_status_step.rb +35 -0
  17. data/lib/elasticity/job_flow_step.rb +17 -25
  18. data/lib/elasticity/pig_step.rb +82 -0
  19. data/lib/elasticity/support/conditional_raise.rb +23 -0
  20. data/lib/elasticity/version.rb +1 -1
  21. data/spec/lib/elasticity/aws_request_spec.rb +159 -51
  22. data/spec/lib/elasticity/custom_jar_step_spec.rb +59 -0
  23. data/spec/lib/elasticity/emr_spec.rb +231 -762
  24. data/spec/lib/elasticity/hadoop_bootstrap_action_spec.rb +26 -0
  25. data/spec/lib/elasticity/hive_step_spec.rb +74 -0
  26. data/spec/lib/elasticity/job_flow_integration_spec.rb +197 -0
  27. data/spec/lib/elasticity/job_flow_spec.rb +369 -138
  28. data/spec/lib/elasticity/job_flow_status_spec.rb +147 -0
  29. data/spec/lib/elasticity/job_flow_status_step_spec.rb +73 -0
  30. data/spec/lib/elasticity/job_flow_step_spec.rb +26 -64
  31. data/spec/lib/elasticity/pig_step_spec.rb +104 -0
  32. data/spec/lib/elasticity/support/conditional_raise_spec.rb +35 -0
  33. data/spec/spec_helper.rb +1 -50
  34. data/spec/support/be_a_hash_including_matcher.rb +35 -0
  35. metadata +101 -119
  36. data/.autotest +0 -2
  37. data/lib/elasticity/custom_jar_job.rb +0 -38
  38. data/lib/elasticity/hive_job.rb +0 -69
  39. data/lib/elasticity/pig_job.rb +0 -109
  40. data/lib/elasticity/simple_job.rb +0 -51
  41. data/spec/fixtures/vcr_cassettes/add_instance_groups/one_group_successful.yml +0 -44
  42. data/spec/fixtures/vcr_cassettes/add_instance_groups/one_group_unsuccessful.yml +0 -41
  43. data/spec/fixtures/vcr_cassettes/add_jobflow_steps/add_multiple_steps.yml +0 -266
  44. data/spec/fixtures/vcr_cassettes/custom_jar_job/cloudburst.yml +0 -41
  45. data/spec/fixtures/vcr_cassettes/describe_jobflows/all_jobflows.yml +0 -75
  46. data/spec/fixtures/vcr_cassettes/direct/terminate_jobflow.yml +0 -38
  47. data/spec/fixtures/vcr_cassettes/hive_job/hive_ads.yml +0 -41
  48. data/spec/fixtures/vcr_cassettes/modify_instance_groups/set_instances_to_3.yml +0 -38
  49. data/spec/fixtures/vcr_cassettes/pig_job/apache_log_reports.yml +0 -41
  50. data/spec/fixtures/vcr_cassettes/pig_job/apache_log_reports_with_bootstrap.yml +0 -41
  51. data/spec/fixtures/vcr_cassettes/run_jobflow/word_count.yml +0 -41
  52. data/spec/fixtures/vcr_cassettes/set_termination_protection/nonexistent_job_flows.yml +0 -41
  53. data/spec/fixtures/vcr_cassettes/set_termination_protection/protect_multiple_job_flows.yml +0 -38
  54. data/spec/fixtures/vcr_cassettes/terminate_jobflows/one_jobflow.yml +0 -38
  55. data/spec/lib/elasticity/custom_jar_job_spec.rb +0 -118
  56. data/spec/lib/elasticity/hive_job_spec.rb +0 -90
  57. data/spec/lib/elasticity/pig_job_spec.rb +0 -226
@@ -1,38 +0,0 @@
1
- ---
2
- http_interactions:
3
- - request:
4
- method: get
5
- uri: !ruby/regexp /^https/
6
- body:
7
- encoding: US-ASCII
8
- string: ""
9
- headers:
10
- Accept:
11
- - "*/*; q=0.5, application/xml"
12
- Accept-Encoding:
13
- - gzip, deflate
14
- response:
15
- status:
16
- code: 200
17
- message: OK
18
- headers:
19
- Content-Length:
20
- - "225"
21
- Date:
22
- - Sun, 10 Apr 2011 23:01:48 GMT
23
- Content-Type:
24
- - text/xml
25
- X-Amzn-Requestid:
26
- - 83e0154d-63c6-11e0-bc41-ababd98a870b
27
- body:
28
- encoding: US-ASCII
29
- string: |
30
- <TerminateJobFlowsResponse xmlns="http://elasticmapreduce.amazonaws.com/doc/2009-03-31">
31
- <ResponseMetadata>
32
- <RequestId>83e0154d-63c6-11e0-bc41-ababd98a870b</RequestId>
33
- </ResponseMetadata>
34
- </TerminateJobFlowsResponse>
35
-
36
- http_version: "1.1"
37
- recorded_at: Sat, 03 Mar 2012 23:00:44 GMT
38
- recorded_with: VCR 2.0.0
@@ -1,118 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Elasticity::CustomJarJob do
4
-
5
- describe ".new" do
6
-
7
- it "should have good defaults" do
8
- custom_jar = Elasticity::CustomJarJob.new("access", "secret")
9
- custom_jar.aws_access_key_id.should == "access"
10
- custom_jar.aws_secret_access_key.should == "secret"
11
- custom_jar.ec2_key_name.should == "default"
12
- custom_jar.hadoop_version.should == "0.20"
13
- custom_jar.instance_count.should == 2
14
- custom_jar.master_instance_type.should == "m1.small"
15
- custom_jar.name.should == "Elasticity Custom Jar Job"
16
- custom_jar.slave_instance_type.should == "m1.small"
17
- custom_jar.action_on_failure.should == "TERMINATE_JOB_FLOW"
18
- custom_jar.log_uri.should == nil
19
- end
20
-
21
- end
22
-
23
- describe "#run" do
24
-
25
- context "when there are arguments provided" do
26
- it "should run the script with the specified variables and return the jobflow_id" do
27
- aws = Elasticity::EMR.new("", "")
28
- aws.should_receive(:run_job_flow).with(
29
- {
30
- :name => "Elasticity Custom Jar Job",
31
- :log_uri => "s3n://slif-test/output/logs",
32
- :instances => {
33
- :ec2_key_name => "default",
34
- :hadoop_version => "0.20",
35
- :instance_count => 2,
36
- :master_instance_type => "m1.small",
37
- :slave_instance_type => "m1.small",
38
- },
39
- :steps => [
40
- {
41
- :action_on_failure => "TERMINATE_JOB_FLOW",
42
- :hadoop_jar_step => {
43
- :jar => "s3n://elasticmapreduce/samples/cloudburst/cloudburst.jar",
44
- :args => [
45
- "s3n://elasticmapreduce/samples/cloudburst/input/s_suis.br",
46
- "s3n://elasticmapreduce/samples/cloudburst/input/100k.br",
47
- "s3n://slif_hadoop_test/cloudburst/output/2011-12-09",
48
- ],
49
- },
50
- :name => "Execute Custom Jar"
51
- }
52
- ]
53
- }).and_return("new_jobflow_id")
54
- Elasticity::EMR.should_receive(:new).with("access", "secret").and_return(aws)
55
-
56
- custom_jar = Elasticity::CustomJarJob.new("access", "secret")
57
- custom_jar.log_uri = "s3n://slif-test/output/logs"
58
- custom_jar.action_on_failure = "TERMINATE_JOB_FLOW"
59
- jobflow_id = custom_jar.run('s3n://elasticmapreduce/samples/cloudburst/cloudburst.jar', [
60
- "s3n://elasticmapreduce/samples/cloudburst/input/s_suis.br",
61
- "s3n://elasticmapreduce/samples/cloudburst/input/100k.br",
62
- "s3n://slif_hadoop_test/cloudburst/output/2011-12-09",
63
- ])
64
- jobflow_id.should == "new_jobflow_id"
65
- end
66
- end
67
-
68
- context "when there are no arguments provided" do
69
- it "should run the script with the specified variables and return the jobflow_id" do
70
- aws = Elasticity::EMR.new("", "")
71
- aws.should_receive(:run_job_flow).with(
72
- {
73
- :name => "Elasticity Custom Jar Job",
74
- :log_uri => "s3n://slif-test/output/logs",
75
- :instances => {
76
- :ec2_key_name => "default",
77
- :hadoop_version => "0.20",
78
- :instance_count => 2,
79
- :master_instance_type => "m1.small",
80
- :slave_instance_type => "m1.small",
81
- },
82
- :steps => [
83
- {
84
- :action_on_failure => "TERMINATE_JOB_FLOW",
85
- :hadoop_jar_step => {
86
- :jar => "s3n://elasticmapreduce/samples/cloudburst/cloudburst.jar"
87
- },
88
- :name => "Execute Custom Jar"
89
- }
90
- ]
91
- }).and_return("new_jobflow_id")
92
- Elasticity::EMR.should_receive(:new).with("access", "secret").and_return(aws)
93
-
94
- custom_jar = Elasticity::CustomJarJob.new("access", "secret")
95
- custom_jar.log_uri = "s3n://slif-test/output/logs"
96
- custom_jar.action_on_failure = "TERMINATE_JOB_FLOW"
97
- jobflow_id = custom_jar.run('s3n://elasticmapreduce/samples/cloudburst/cloudburst.jar')
98
- jobflow_id.should == "new_jobflow_id"
99
- end
100
- end
101
-
102
- end
103
-
104
- describe "integration happy path" do
105
- use_vcr_cassette "custom_jar_job/cloudburst", :record => :none
106
- it "should kick off the sample Amazion EMR Hive application" do
107
- custom_jar = Elasticity::CustomJarJob.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
108
- custom_jar.ec2_key_name = "sharethrough_dev"
109
- jobflow_id = custom_jar.run('s3n://elasticmapreduce/samples/cloudburst/cloudburst.jar', [
110
- "s3n://elasticmapreduce/samples/cloudburst/input/s_suis.br",
111
- "s3n://elasticmapreduce/samples/cloudburst/input/100k.br",
112
- "s3n://slif_hadoop_test/cloudburst/output/2011-12-09",
113
- ])
114
- jobflow_id.should == "j-1IU6NM8OUPS9I"
115
- end
116
- end
117
-
118
- end
@@ -1,90 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Elasticity::HiveJob do
4
-
5
- describe ".new" do
6
-
7
- it "should have good defaults" do
8
- hive = Elasticity::HiveJob.new("access", "secret")
9
- hive.aws_access_key_id.should == "access"
10
- hive.aws_secret_access_key.should == "secret"
11
- hive.ec2_key_name.should == "default"
12
- hive.hadoop_version.should == "0.20"
13
- hive.instance_count.should == 2
14
- hive.master_instance_type.should == "m1.small"
15
- hive.name.should == "Elasticity Hive Job"
16
- hive.slave_instance_type.should == "m1.small"
17
- hive.action_on_failure.should == "TERMINATE_JOB_FLOW"
18
- hive.log_uri.should == nil
19
- end
20
-
21
- end
22
-
23
- describe "#run" do
24
-
25
- it "should run the script with the specified variables and return the jobflow_id" do
26
- aws = Elasticity::EMR.new("", "")
27
- aws.should_receive(:run_job_flow).with({
28
- :name => "Elasticity Hive Job",
29
- :log_uri => "s3n://slif-test/output/logs",
30
- :instances => {
31
- :ec2_key_name => "default",
32
- :hadoop_version => "0.20",
33
- :instance_count => 2,
34
- :master_instance_type => "m1.small",
35
- :slave_instance_type => "m1.small",
36
- },
37
- :steps => [
38
- {
39
- :action_on_failure => "TERMINATE_JOB_FLOW",
40
- :hadoop_jar_step => {
41
- :jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
42
- :args => [
43
- "s3://elasticmapreduce/libs/hive/hive-script",
44
- "--base-path",
45
- "s3://elasticmapreduce/libs/hive/",
46
- "--install-hive"
47
- ],
48
- },
49
- :name => "Setup Hive"
50
- },
51
- {
52
- :action_on_failure => "CONTINUE",
53
- :hadoop_jar_step => {
54
- :jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
55
- :args => [
56
- "s3://elasticmapreduce/libs/hive/hive-script",
57
- "--run-hive-script",
58
- "--args",
59
- "-f", "s3n://slif-hive/test.q",
60
- "-d", "OUTPUT=s3n://slif-test/output"
61
- ],
62
- },
63
- :name => "Run Hive Script"
64
- }
65
- ]
66
- }).and_return("new_jobflow_id")
67
- Elasticity::EMR.should_receive(:new).with("access", "secret").and_return(aws)
68
-
69
- hive = Elasticity::HiveJob.new("access", "secret")
70
- hive.log_uri = "s3n://slif-test/output/logs"
71
- hive.action_on_failure = "CONTINUE"
72
- jobflow_id = hive.run('s3n://slif-hive/test.q', {
73
- 'OUTPUT' => 's3n://slif-test/output'
74
- })
75
- jobflow_id.should == "new_jobflow_id"
76
- end
77
-
78
- end
79
-
80
- describe "integration happy path" do
81
- use_vcr_cassette "hive_job/hive_ads", :record => :none
82
- it "should kick off the sample Amazion EMR Hive application" do
83
- hive = Elasticity::HiveJob.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
84
- hive.ec2_key_name = "sharethrough_dev"
85
- jobflow_id = hive.run("s3n://elasticmapreduce/samples/hive-ads/libs/model-build.q")
86
- jobflow_id.should == "j-2I4HV6S3SDGD9"
87
- end
88
- end
89
-
90
- end
@@ -1,226 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Elasticity::PigJob do
4
-
5
- describe ".new" do
6
- it "should have good defaults" do
7
- pig = Elasticity::PigJob.new("access", "secret")
8
- pig.aws_access_key_id.should == "access"
9
- pig.aws_secret_access_key.should == "secret"
10
- pig.ec2_key_name.should == "default"
11
- pig.hadoop_version.should == "0.20"
12
- pig.instance_count.should == 2
13
- pig.master_instance_type.should == "m1.small"
14
- pig.name.should == "Elasticity Pig Job"
15
- pig.slave_instance_type.should == "m1.small"
16
- pig.action_on_failure.should == "TERMINATE_JOB_FLOW"
17
- pig.log_uri.should == nil
18
- pig.parallels.should == 1
19
- end
20
- end
21
-
22
- describe "#instance_count=" do
23
- it "should not allow instances to be set less than 2" do
24
- pig = Elasticity::PigJob.new("access", "secret")
25
- lambda {
26
- pig.instance_count = 1
27
- }.should raise_error(ArgumentError, "Instance count cannot be set to less than 2 (requested 1)")
28
- end
29
-
30
- it "should recalculate @parallels" do
31
- pig = Elasticity::PigJob.new("access", "secret")
32
- lambda {
33
- pig.instance_count = 10
34
- }.should change(pig, :parallels)
35
- end
36
- end
37
-
38
- describe "#slave_instance_type=" do
39
- it "should recalculate @parallels" do
40
- pig = Elasticity::PigJob.new("access", "secret")
41
- lambda {
42
- pig.slave_instance_type = "c1.xlarge"
43
- }.should change(pig, :parallels)
44
- end
45
- end
46
-
47
- describe "calculated value of parallels" do
48
-
49
- before do
50
- @pig = Elasticity::PigJob.new("access", "secret")
51
- @pig.instance_count = 8
52
- end
53
-
54
- context "when slave is m1.small" do
55
- it "should be 7" do
56
- @pig.slave_instance_type = "m1.small"
57
- @pig.parallels.should == 7
58
- end
59
- end
60
-
61
- context "when slave is m1.large" do
62
- it "should be 13" do
63
- @pig.slave_instance_type = "m1.large"
64
- @pig.parallels.should == 13
65
- end
66
- end
67
-
68
- context "when slave is c1.medium" do
69
- it "should be 13" do
70
- @pig.slave_instance_type = "c1.medium"
71
- @pig.parallels.should == 13
72
- end
73
- end
74
-
75
- context "when slave is m1.xlarge" do
76
- it "should be 26" do
77
- @pig.slave_instance_type = "m1.xlarge"
78
- @pig.parallels.should == 26
79
- end
80
- end
81
-
82
- context "when slave is c1.xlarge" do
83
- it "should be 26" do
84
- @pig.slave_instance_type = "c1.xlarge"
85
- @pig.parallels.should == 26
86
- end
87
- end
88
-
89
- context "when slave is any other type" do
90
- it "should be 1" do
91
- @pig.slave_instance_type = "foo"
92
- @pig.parallels.should == 7
93
- end
94
- end
95
-
96
- end
97
-
98
- describe "#run" do
99
-
100
- context "when no bootstrap actions are specified" do
101
-
102
- it "should run the script with the specified variables and return the jobflow_id" do
103
- aws = Elasticity::EMR.new("", "")
104
- aws.should_receive(:run_job_flow).with({
105
- :name => "Elasticity Pig Job",
106
- :log_uri => "s3n://slif-test/output/logs",
107
- :instances => {
108
- :ec2_key_name => "default",
109
- :hadoop_version => "0.20",
110
- :instance_count => 8,
111
- :master_instance_type => "m1.small",
112
- :slave_instance_type => "m1.xlarge",
113
- },
114
- :steps => [
115
- {
116
- :action_on_failure => "TERMINATE_JOB_FLOW",
117
- :hadoop_jar_step => {
118
- :jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
119
- :args => [
120
- "s3://elasticmapreduce/libs/pig/pig-script",
121
- "--base-path",
122
- "s3://elasticmapreduce/libs/pig/",
123
- "--install-pig"
124
- ],
125
- },
126
- :name => "Setup Pig"
127
- },
128
- {
129
- :action_on_failure => "CONTINUE",
130
- :hadoop_jar_step => {
131
- :jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
132
- :args => [
133
- "s3://elasticmapreduce/libs/pig/pig-script",
134
- "--run-pig-script",
135
- "--args",
136
- "-p", "OUTPUT=s3n://slif-pig-test/output",
137
- "-p", "XREFS=s3n://slif-pig-test/xrefs",
138
- "-p", "E_PARALLELS=26",
139
- "s3n://slif-pig-test/test.pig"
140
- ],
141
- },
142
- :name => "Run Pig Script"
143
- }
144
- ]
145
- }).and_return("new_jobflow_id")
146
-
147
- Elasticity::EMR.should_receive(:new).with("access", "secret").and_return(aws)
148
- pig = Elasticity::PigJob.new("access", "secret")
149
-
150
- pig.log_uri = "s3n://slif-test/output/logs"
151
- pig.action_on_failure = "CONTINUE"
152
- pig.instance_count = 8
153
- pig.slave_instance_type = "m1.xlarge"
154
-
155
- jobflow_id = pig.run('s3n://slif-pig-test/test.pig', {
156
- 'OUTPUT' => 's3n://slif-pig-test/output',
157
- 'XREFS' => 's3n://slif-pig-test/xrefs'
158
- })
159
- jobflow_id.should == "new_jobflow_id"
160
- end
161
- end
162
-
163
- context "when bootstrap actions are specified" do
164
- it "should run the script wth the proper job configuration" do
165
- aws = Elasticity::EMR.new("", "")
166
- aws.should_receive(:run_job_flow).with(hash_including({
167
- :bootstrap_actions => [
168
- {
169
- :name => "Elasticity Bootstrap Action (Configure Hadoop)",
170
- :script_bootstrap_action => {
171
- :path => "s3n://elasticmapreduce/bootstrap-actions/configure-hadoop",
172
- :args => ["-m", "foo=111"]
173
- }
174
- },
175
- {
176
- :name => "Elasticity Bootstrap Action (Configure Hadoop)",
177
- :script_bootstrap_action => {
178
- :path => "s3n://elasticmapreduce/bootstrap-actions/configure-hadoop",
179
- :args => ["-m", "bar=222"]
180
- }
181
- }
182
- ],
183
- }))
184
-
185
- Elasticity::EMR.should_receive(:new).with("access", "secret").and_return(aws)
186
- pig = Elasticity::PigJob.new("access", "secret")
187
- pig.add_hadoop_bootstrap_action("-m", "foo=111")
188
- pig.add_hadoop_bootstrap_action("-m", "bar=222")
189
- pig.run('s3n://slif-pig-test/test.pig')
190
- end
191
- end
192
-
193
- end
194
-
195
- describe "integration happy path" do
196
-
197
- context "with bootstrap actions" do
198
- use_vcr_cassette "pig_job/apache_log_reports_with_bootstrap", :record => :none
199
- it "should kick off the sample Amazion EMR Pig application" do
200
- pig = Elasticity::PigJob.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
201
- pig.ec2_key_name = "sharethrough_dev"
202
- pig.add_hadoop_bootstrap_action("-m", "mapred.job.reuse.jvm.num.tasks=120")
203
- jobflow_id = pig.run("s3n://elasticmapreduce/samples/pig-apache/do-reports.pig", {
204
- "INPUT" => "s3n://elasticmapreduce/samples/pig-apache/input",
205
- "OUTPUT" => "s3n://slif-elasticity/pig-apache/output/2011-05-10"
206
- })
207
- jobflow_id.should == "j-1UK43AWRT3QHD"
208
- end
209
- end
210
-
211
- context "without bootstrap actions" do
212
- use_vcr_cassette "pig_job/apache_log_reports", :record => :none
213
- it "should kick off the sample Amazion EMR Pig application" do
214
- pig = Elasticity::PigJob.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
215
- pig.log_uri = "s3n://slif-elasticity/pig-apache/logs"
216
- pig.ec2_key_name = "sharethrough_dev"
217
- jobflow_id = pig.run("s3n://elasticmapreduce/samples/pig-apache/do-reports.pig", {
218
- "INPUT" => "s3n://elasticmapreduce/samples/pig-apache/input",
219
- "OUTPUT" => "s3n://slif-elasticity/pig-apache/output/2011-05-04"
220
- })
221
- jobflow_id.should == "j-1HB7A3TBRT3VS"
222
- end
223
- end
224
- end
225
-
226
- end