elasticity 1.5 → 2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. data/.rspec +2 -1
  2. data/.rvmrc +1 -1
  3. data/HISTORY.md +47 -24
  4. data/LICENSE +1 -1
  5. data/README.md +165 -317
  6. data/Rakefile +4 -3
  7. data/elasticity.gemspec +3 -5
  8. data/lib/elasticity.rb +10 -5
  9. data/lib/elasticity/aws_request.rb +81 -20
  10. data/lib/elasticity/custom_jar_step.rb +33 -0
  11. data/lib/elasticity/emr.rb +45 -117
  12. data/lib/elasticity/hadoop_bootstrap_action.rb +27 -0
  13. data/lib/elasticity/hive_step.rb +57 -0
  14. data/lib/elasticity/job_flow.rb +109 -39
  15. data/lib/elasticity/job_flow_status.rb +53 -0
  16. data/lib/elasticity/job_flow_status_step.rb +35 -0
  17. data/lib/elasticity/job_flow_step.rb +17 -25
  18. data/lib/elasticity/pig_step.rb +82 -0
  19. data/lib/elasticity/support/conditional_raise.rb +23 -0
  20. data/lib/elasticity/version.rb +1 -1
  21. data/spec/lib/elasticity/aws_request_spec.rb +159 -51
  22. data/spec/lib/elasticity/custom_jar_step_spec.rb +59 -0
  23. data/spec/lib/elasticity/emr_spec.rb +231 -762
  24. data/spec/lib/elasticity/hadoop_bootstrap_action_spec.rb +26 -0
  25. data/spec/lib/elasticity/hive_step_spec.rb +74 -0
  26. data/spec/lib/elasticity/job_flow_integration_spec.rb +197 -0
  27. data/spec/lib/elasticity/job_flow_spec.rb +369 -138
  28. data/spec/lib/elasticity/job_flow_status_spec.rb +147 -0
  29. data/spec/lib/elasticity/job_flow_status_step_spec.rb +73 -0
  30. data/spec/lib/elasticity/job_flow_step_spec.rb +26 -64
  31. data/spec/lib/elasticity/pig_step_spec.rb +104 -0
  32. data/spec/lib/elasticity/support/conditional_raise_spec.rb +35 -0
  33. data/spec/spec_helper.rb +1 -50
  34. data/spec/support/be_a_hash_including_matcher.rb +35 -0
  35. metadata +101 -119
  36. data/.autotest +0 -2
  37. data/lib/elasticity/custom_jar_job.rb +0 -38
  38. data/lib/elasticity/hive_job.rb +0 -69
  39. data/lib/elasticity/pig_job.rb +0 -109
  40. data/lib/elasticity/simple_job.rb +0 -51
  41. data/spec/fixtures/vcr_cassettes/add_instance_groups/one_group_successful.yml +0 -44
  42. data/spec/fixtures/vcr_cassettes/add_instance_groups/one_group_unsuccessful.yml +0 -41
  43. data/spec/fixtures/vcr_cassettes/add_jobflow_steps/add_multiple_steps.yml +0 -266
  44. data/spec/fixtures/vcr_cassettes/custom_jar_job/cloudburst.yml +0 -41
  45. data/spec/fixtures/vcr_cassettes/describe_jobflows/all_jobflows.yml +0 -75
  46. data/spec/fixtures/vcr_cassettes/direct/terminate_jobflow.yml +0 -38
  47. data/spec/fixtures/vcr_cassettes/hive_job/hive_ads.yml +0 -41
  48. data/spec/fixtures/vcr_cassettes/modify_instance_groups/set_instances_to_3.yml +0 -38
  49. data/spec/fixtures/vcr_cassettes/pig_job/apache_log_reports.yml +0 -41
  50. data/spec/fixtures/vcr_cassettes/pig_job/apache_log_reports_with_bootstrap.yml +0 -41
  51. data/spec/fixtures/vcr_cassettes/run_jobflow/word_count.yml +0 -41
  52. data/spec/fixtures/vcr_cassettes/set_termination_protection/nonexistent_job_flows.yml +0 -41
  53. data/spec/fixtures/vcr_cassettes/set_termination_protection/protect_multiple_job_flows.yml +0 -38
  54. data/spec/fixtures/vcr_cassettes/terminate_jobflows/one_jobflow.yml +0 -38
  55. data/spec/lib/elasticity/custom_jar_job_spec.rb +0 -118
  56. data/spec/lib/elasticity/hive_job_spec.rb +0 -90
  57. data/spec/lib/elasticity/pig_job_spec.rb +0 -226
@@ -1,38 +0,0 @@
1
- ---
2
- http_interactions:
3
- - request:
4
- method: get
5
- uri: !ruby/regexp /^https/
6
- body:
7
- encoding: US-ASCII
8
- string: ""
9
- headers:
10
- Accept:
11
- - "*/*; q=0.5, application/xml"
12
- Accept-Encoding:
13
- - gzip, deflate
14
- response:
15
- status:
16
- code: 200
17
- message: OK
18
- headers:
19
- Content-Length:
20
- - "225"
21
- Date:
22
- - Sun, 10 Apr 2011 23:01:48 GMT
23
- Content-Type:
24
- - text/xml
25
- X-Amzn-Requestid:
26
- - 83e0154d-63c6-11e0-bc41-ababd98a870b
27
- body:
28
- encoding: US-ASCII
29
- string: |
30
- <TerminateJobFlowsResponse xmlns="http://elasticmapreduce.amazonaws.com/doc/2009-03-31">
31
- <ResponseMetadata>
32
- <RequestId>83e0154d-63c6-11e0-bc41-ababd98a870b</RequestId>
33
- </ResponseMetadata>
34
- </TerminateJobFlowsResponse>
35
-
36
- http_version: "1.1"
37
- recorded_at: Sat, 03 Mar 2012 23:00:44 GMT
38
- recorded_with: VCR 2.0.0
@@ -1,118 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Elasticity::CustomJarJob do
4
-
5
- describe ".new" do
6
-
7
- it "should have good defaults" do
8
- custom_jar = Elasticity::CustomJarJob.new("access", "secret")
9
- custom_jar.aws_access_key_id.should == "access"
10
- custom_jar.aws_secret_access_key.should == "secret"
11
- custom_jar.ec2_key_name.should == "default"
12
- custom_jar.hadoop_version.should == "0.20"
13
- custom_jar.instance_count.should == 2
14
- custom_jar.master_instance_type.should == "m1.small"
15
- custom_jar.name.should == "Elasticity Custom Jar Job"
16
- custom_jar.slave_instance_type.should == "m1.small"
17
- custom_jar.action_on_failure.should == "TERMINATE_JOB_FLOW"
18
- custom_jar.log_uri.should == nil
19
- end
20
-
21
- end
22
-
23
- describe "#run" do
24
-
25
- context "when there are arguments provided" do
26
- it "should run the script with the specified variables and return the jobflow_id" do
27
- aws = Elasticity::EMR.new("", "")
28
- aws.should_receive(:run_job_flow).with(
29
- {
30
- :name => "Elasticity Custom Jar Job",
31
- :log_uri => "s3n://slif-test/output/logs",
32
- :instances => {
33
- :ec2_key_name => "default",
34
- :hadoop_version => "0.20",
35
- :instance_count => 2,
36
- :master_instance_type => "m1.small",
37
- :slave_instance_type => "m1.small",
38
- },
39
- :steps => [
40
- {
41
- :action_on_failure => "TERMINATE_JOB_FLOW",
42
- :hadoop_jar_step => {
43
- :jar => "s3n://elasticmapreduce/samples/cloudburst/cloudburst.jar",
44
- :args => [
45
- "s3n://elasticmapreduce/samples/cloudburst/input/s_suis.br",
46
- "s3n://elasticmapreduce/samples/cloudburst/input/100k.br",
47
- "s3n://slif_hadoop_test/cloudburst/output/2011-12-09",
48
- ],
49
- },
50
- :name => "Execute Custom Jar"
51
- }
52
- ]
53
- }).and_return("new_jobflow_id")
54
- Elasticity::EMR.should_receive(:new).with("access", "secret").and_return(aws)
55
-
56
- custom_jar = Elasticity::CustomJarJob.new("access", "secret")
57
- custom_jar.log_uri = "s3n://slif-test/output/logs"
58
- custom_jar.action_on_failure = "TERMINATE_JOB_FLOW"
59
- jobflow_id = custom_jar.run('s3n://elasticmapreduce/samples/cloudburst/cloudburst.jar', [
60
- "s3n://elasticmapreduce/samples/cloudburst/input/s_suis.br",
61
- "s3n://elasticmapreduce/samples/cloudburst/input/100k.br",
62
- "s3n://slif_hadoop_test/cloudburst/output/2011-12-09",
63
- ])
64
- jobflow_id.should == "new_jobflow_id"
65
- end
66
- end
67
-
68
- context "when there are no arguments provided" do
69
- it "should run the script with the specified variables and return the jobflow_id" do
70
- aws = Elasticity::EMR.new("", "")
71
- aws.should_receive(:run_job_flow).with(
72
- {
73
- :name => "Elasticity Custom Jar Job",
74
- :log_uri => "s3n://slif-test/output/logs",
75
- :instances => {
76
- :ec2_key_name => "default",
77
- :hadoop_version => "0.20",
78
- :instance_count => 2,
79
- :master_instance_type => "m1.small",
80
- :slave_instance_type => "m1.small",
81
- },
82
- :steps => [
83
- {
84
- :action_on_failure => "TERMINATE_JOB_FLOW",
85
- :hadoop_jar_step => {
86
- :jar => "s3n://elasticmapreduce/samples/cloudburst/cloudburst.jar"
87
- },
88
- :name => "Execute Custom Jar"
89
- }
90
- ]
91
- }).and_return("new_jobflow_id")
92
- Elasticity::EMR.should_receive(:new).with("access", "secret").and_return(aws)
93
-
94
- custom_jar = Elasticity::CustomJarJob.new("access", "secret")
95
- custom_jar.log_uri = "s3n://slif-test/output/logs"
96
- custom_jar.action_on_failure = "TERMINATE_JOB_FLOW"
97
- jobflow_id = custom_jar.run('s3n://elasticmapreduce/samples/cloudburst/cloudburst.jar')
98
- jobflow_id.should == "new_jobflow_id"
99
- end
100
- end
101
-
102
- end
103
-
104
- describe "integration happy path" do
105
- use_vcr_cassette "custom_jar_job/cloudburst", :record => :none
106
- it "should kick off the sample Amazion EMR Hive application" do
107
- custom_jar = Elasticity::CustomJarJob.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
108
- custom_jar.ec2_key_name = "sharethrough_dev"
109
- jobflow_id = custom_jar.run('s3n://elasticmapreduce/samples/cloudburst/cloudburst.jar', [
110
- "s3n://elasticmapreduce/samples/cloudburst/input/s_suis.br",
111
- "s3n://elasticmapreduce/samples/cloudburst/input/100k.br",
112
- "s3n://slif_hadoop_test/cloudburst/output/2011-12-09",
113
- ])
114
- jobflow_id.should == "j-1IU6NM8OUPS9I"
115
- end
116
- end
117
-
118
- end
@@ -1,90 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Elasticity::HiveJob do
4
-
5
- describe ".new" do
6
-
7
- it "should have good defaults" do
8
- hive = Elasticity::HiveJob.new("access", "secret")
9
- hive.aws_access_key_id.should == "access"
10
- hive.aws_secret_access_key.should == "secret"
11
- hive.ec2_key_name.should == "default"
12
- hive.hadoop_version.should == "0.20"
13
- hive.instance_count.should == 2
14
- hive.master_instance_type.should == "m1.small"
15
- hive.name.should == "Elasticity Hive Job"
16
- hive.slave_instance_type.should == "m1.small"
17
- hive.action_on_failure.should == "TERMINATE_JOB_FLOW"
18
- hive.log_uri.should == nil
19
- end
20
-
21
- end
22
-
23
- describe "#run" do
24
-
25
- it "should run the script with the specified variables and return the jobflow_id" do
26
- aws = Elasticity::EMR.new("", "")
27
- aws.should_receive(:run_job_flow).with({
28
- :name => "Elasticity Hive Job",
29
- :log_uri => "s3n://slif-test/output/logs",
30
- :instances => {
31
- :ec2_key_name => "default",
32
- :hadoop_version => "0.20",
33
- :instance_count => 2,
34
- :master_instance_type => "m1.small",
35
- :slave_instance_type => "m1.small",
36
- },
37
- :steps => [
38
- {
39
- :action_on_failure => "TERMINATE_JOB_FLOW",
40
- :hadoop_jar_step => {
41
- :jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
42
- :args => [
43
- "s3://elasticmapreduce/libs/hive/hive-script",
44
- "--base-path",
45
- "s3://elasticmapreduce/libs/hive/",
46
- "--install-hive"
47
- ],
48
- },
49
- :name => "Setup Hive"
50
- },
51
- {
52
- :action_on_failure => "CONTINUE",
53
- :hadoop_jar_step => {
54
- :jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
55
- :args => [
56
- "s3://elasticmapreduce/libs/hive/hive-script",
57
- "--run-hive-script",
58
- "--args",
59
- "-f", "s3n://slif-hive/test.q",
60
- "-d", "OUTPUT=s3n://slif-test/output"
61
- ],
62
- },
63
- :name => "Run Hive Script"
64
- }
65
- ]
66
- }).and_return("new_jobflow_id")
67
- Elasticity::EMR.should_receive(:new).with("access", "secret").and_return(aws)
68
-
69
- hive = Elasticity::HiveJob.new("access", "secret")
70
- hive.log_uri = "s3n://slif-test/output/logs"
71
- hive.action_on_failure = "CONTINUE"
72
- jobflow_id = hive.run('s3n://slif-hive/test.q', {
73
- 'OUTPUT' => 's3n://slif-test/output'
74
- })
75
- jobflow_id.should == "new_jobflow_id"
76
- end
77
-
78
- end
79
-
80
- describe "integration happy path" do
81
- use_vcr_cassette "hive_job/hive_ads", :record => :none
82
- it "should kick off the sample Amazion EMR Hive application" do
83
- hive = Elasticity::HiveJob.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
84
- hive.ec2_key_name = "sharethrough_dev"
85
- jobflow_id = hive.run("s3n://elasticmapreduce/samples/hive-ads/libs/model-build.q")
86
- jobflow_id.should == "j-2I4HV6S3SDGD9"
87
- end
88
- end
89
-
90
- end
@@ -1,226 +0,0 @@
1
- require 'spec_helper'
2
-
3
- describe Elasticity::PigJob do
4
-
5
- describe ".new" do
6
- it "should have good defaults" do
7
- pig = Elasticity::PigJob.new("access", "secret")
8
- pig.aws_access_key_id.should == "access"
9
- pig.aws_secret_access_key.should == "secret"
10
- pig.ec2_key_name.should == "default"
11
- pig.hadoop_version.should == "0.20"
12
- pig.instance_count.should == 2
13
- pig.master_instance_type.should == "m1.small"
14
- pig.name.should == "Elasticity Pig Job"
15
- pig.slave_instance_type.should == "m1.small"
16
- pig.action_on_failure.should == "TERMINATE_JOB_FLOW"
17
- pig.log_uri.should == nil
18
- pig.parallels.should == 1
19
- end
20
- end
21
-
22
- describe "#instance_count=" do
23
- it "should not allow instances to be set less than 2" do
24
- pig = Elasticity::PigJob.new("access", "secret")
25
- lambda {
26
- pig.instance_count = 1
27
- }.should raise_error(ArgumentError, "Instance count cannot be set to less than 2 (requested 1)")
28
- end
29
-
30
- it "should recalculate @parallels" do
31
- pig = Elasticity::PigJob.new("access", "secret")
32
- lambda {
33
- pig.instance_count = 10
34
- }.should change(pig, :parallels)
35
- end
36
- end
37
-
38
- describe "#slave_instance_type=" do
39
- it "should recalculate @parallels" do
40
- pig = Elasticity::PigJob.new("access", "secret")
41
- lambda {
42
- pig.slave_instance_type = "c1.xlarge"
43
- }.should change(pig, :parallels)
44
- end
45
- end
46
-
47
- describe "calculated value of parallels" do
48
-
49
- before do
50
- @pig = Elasticity::PigJob.new("access", "secret")
51
- @pig.instance_count = 8
52
- end
53
-
54
- context "when slave is m1.small" do
55
- it "should be 7" do
56
- @pig.slave_instance_type = "m1.small"
57
- @pig.parallels.should == 7
58
- end
59
- end
60
-
61
- context "when slave is m1.large" do
62
- it "should be 13" do
63
- @pig.slave_instance_type = "m1.large"
64
- @pig.parallels.should == 13
65
- end
66
- end
67
-
68
- context "when slave is c1.medium" do
69
- it "should be 13" do
70
- @pig.slave_instance_type = "c1.medium"
71
- @pig.parallels.should == 13
72
- end
73
- end
74
-
75
- context "when slave is m1.xlarge" do
76
- it "should be 26" do
77
- @pig.slave_instance_type = "m1.xlarge"
78
- @pig.parallels.should == 26
79
- end
80
- end
81
-
82
- context "when slave is c1.xlarge" do
83
- it "should be 26" do
84
- @pig.slave_instance_type = "c1.xlarge"
85
- @pig.parallels.should == 26
86
- end
87
- end
88
-
89
- context "when slave is any other type" do
90
- it "should be 1" do
91
- @pig.slave_instance_type = "foo"
92
- @pig.parallels.should == 7
93
- end
94
- end
95
-
96
- end
97
-
98
- describe "#run" do
99
-
100
- context "when no bootstrap actions are specified" do
101
-
102
- it "should run the script with the specified variables and return the jobflow_id" do
103
- aws = Elasticity::EMR.new("", "")
104
- aws.should_receive(:run_job_flow).with({
105
- :name => "Elasticity Pig Job",
106
- :log_uri => "s3n://slif-test/output/logs",
107
- :instances => {
108
- :ec2_key_name => "default",
109
- :hadoop_version => "0.20",
110
- :instance_count => 8,
111
- :master_instance_type => "m1.small",
112
- :slave_instance_type => "m1.xlarge",
113
- },
114
- :steps => [
115
- {
116
- :action_on_failure => "TERMINATE_JOB_FLOW",
117
- :hadoop_jar_step => {
118
- :jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
119
- :args => [
120
- "s3://elasticmapreduce/libs/pig/pig-script",
121
- "--base-path",
122
- "s3://elasticmapreduce/libs/pig/",
123
- "--install-pig"
124
- ],
125
- },
126
- :name => "Setup Pig"
127
- },
128
- {
129
- :action_on_failure => "CONTINUE",
130
- :hadoop_jar_step => {
131
- :jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
132
- :args => [
133
- "s3://elasticmapreduce/libs/pig/pig-script",
134
- "--run-pig-script",
135
- "--args",
136
- "-p", "OUTPUT=s3n://slif-pig-test/output",
137
- "-p", "XREFS=s3n://slif-pig-test/xrefs",
138
- "-p", "E_PARALLELS=26",
139
- "s3n://slif-pig-test/test.pig"
140
- ],
141
- },
142
- :name => "Run Pig Script"
143
- }
144
- ]
145
- }).and_return("new_jobflow_id")
146
-
147
- Elasticity::EMR.should_receive(:new).with("access", "secret").and_return(aws)
148
- pig = Elasticity::PigJob.new("access", "secret")
149
-
150
- pig.log_uri = "s3n://slif-test/output/logs"
151
- pig.action_on_failure = "CONTINUE"
152
- pig.instance_count = 8
153
- pig.slave_instance_type = "m1.xlarge"
154
-
155
- jobflow_id = pig.run('s3n://slif-pig-test/test.pig', {
156
- 'OUTPUT' => 's3n://slif-pig-test/output',
157
- 'XREFS' => 's3n://slif-pig-test/xrefs'
158
- })
159
- jobflow_id.should == "new_jobflow_id"
160
- end
161
- end
162
-
163
- context "when bootstrap actions are specified" do
164
- it "should run the script wth the proper job configuration" do
165
- aws = Elasticity::EMR.new("", "")
166
- aws.should_receive(:run_job_flow).with(hash_including({
167
- :bootstrap_actions => [
168
- {
169
- :name => "Elasticity Bootstrap Action (Configure Hadoop)",
170
- :script_bootstrap_action => {
171
- :path => "s3n://elasticmapreduce/bootstrap-actions/configure-hadoop",
172
- :args => ["-m", "foo=111"]
173
- }
174
- },
175
- {
176
- :name => "Elasticity Bootstrap Action (Configure Hadoop)",
177
- :script_bootstrap_action => {
178
- :path => "s3n://elasticmapreduce/bootstrap-actions/configure-hadoop",
179
- :args => ["-m", "bar=222"]
180
- }
181
- }
182
- ],
183
- }))
184
-
185
- Elasticity::EMR.should_receive(:new).with("access", "secret").and_return(aws)
186
- pig = Elasticity::PigJob.new("access", "secret")
187
- pig.add_hadoop_bootstrap_action("-m", "foo=111")
188
- pig.add_hadoop_bootstrap_action("-m", "bar=222")
189
- pig.run('s3n://slif-pig-test/test.pig')
190
- end
191
- end
192
-
193
- end
194
-
195
- describe "integration happy path" do
196
-
197
- context "with bootstrap actions" do
198
- use_vcr_cassette "pig_job/apache_log_reports_with_bootstrap", :record => :none
199
- it "should kick off the sample Amazion EMR Pig application" do
200
- pig = Elasticity::PigJob.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
201
- pig.ec2_key_name = "sharethrough_dev"
202
- pig.add_hadoop_bootstrap_action("-m", "mapred.job.reuse.jvm.num.tasks=120")
203
- jobflow_id = pig.run("s3n://elasticmapreduce/samples/pig-apache/do-reports.pig", {
204
- "INPUT" => "s3n://elasticmapreduce/samples/pig-apache/input",
205
- "OUTPUT" => "s3n://slif-elasticity/pig-apache/output/2011-05-10"
206
- })
207
- jobflow_id.should == "j-1UK43AWRT3QHD"
208
- end
209
- end
210
-
211
- context "without bootstrap actions" do
212
- use_vcr_cassette "pig_job/apache_log_reports", :record => :none
213
- it "should kick off the sample Amazion EMR Pig application" do
214
- pig = Elasticity::PigJob.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
215
- pig.log_uri = "s3n://slif-elasticity/pig-apache/logs"
216
- pig.ec2_key_name = "sharethrough_dev"
217
- jobflow_id = pig.run("s3n://elasticmapreduce/samples/pig-apache/do-reports.pig", {
218
- "INPUT" => "s3n://elasticmapreduce/samples/pig-apache/input",
219
- "OUTPUT" => "s3n://slif-elasticity/pig-apache/output/2011-05-04"
220
- })
221
- jobflow_id.should == "j-1HB7A3TBRT3VS"
222
- end
223
- end
224
- end
225
-
226
- end