elasticity 1.5 → 2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. data/.rspec +2 -1
  2. data/.rvmrc +1 -1
  3. data/HISTORY.md +47 -24
  4. data/LICENSE +1 -1
  5. data/README.md +165 -317
  6. data/Rakefile +4 -3
  7. data/elasticity.gemspec +3 -5
  8. data/lib/elasticity.rb +10 -5
  9. data/lib/elasticity/aws_request.rb +81 -20
  10. data/lib/elasticity/custom_jar_step.rb +33 -0
  11. data/lib/elasticity/emr.rb +45 -117
  12. data/lib/elasticity/hadoop_bootstrap_action.rb +27 -0
  13. data/lib/elasticity/hive_step.rb +57 -0
  14. data/lib/elasticity/job_flow.rb +109 -39
  15. data/lib/elasticity/job_flow_status.rb +53 -0
  16. data/lib/elasticity/job_flow_status_step.rb +35 -0
  17. data/lib/elasticity/job_flow_step.rb +17 -25
  18. data/lib/elasticity/pig_step.rb +82 -0
  19. data/lib/elasticity/support/conditional_raise.rb +23 -0
  20. data/lib/elasticity/version.rb +1 -1
  21. data/spec/lib/elasticity/aws_request_spec.rb +159 -51
  22. data/spec/lib/elasticity/custom_jar_step_spec.rb +59 -0
  23. data/spec/lib/elasticity/emr_spec.rb +231 -762
  24. data/spec/lib/elasticity/hadoop_bootstrap_action_spec.rb +26 -0
  25. data/spec/lib/elasticity/hive_step_spec.rb +74 -0
  26. data/spec/lib/elasticity/job_flow_integration_spec.rb +197 -0
  27. data/spec/lib/elasticity/job_flow_spec.rb +369 -138
  28. data/spec/lib/elasticity/job_flow_status_spec.rb +147 -0
  29. data/spec/lib/elasticity/job_flow_status_step_spec.rb +73 -0
  30. data/spec/lib/elasticity/job_flow_step_spec.rb +26 -64
  31. data/spec/lib/elasticity/pig_step_spec.rb +104 -0
  32. data/spec/lib/elasticity/support/conditional_raise_spec.rb +35 -0
  33. data/spec/spec_helper.rb +1 -50
  34. data/spec/support/be_a_hash_including_matcher.rb +35 -0
  35. metadata +101 -119
  36. data/.autotest +0 -2
  37. data/lib/elasticity/custom_jar_job.rb +0 -38
  38. data/lib/elasticity/hive_job.rb +0 -69
  39. data/lib/elasticity/pig_job.rb +0 -109
  40. data/lib/elasticity/simple_job.rb +0 -51
  41. data/spec/fixtures/vcr_cassettes/add_instance_groups/one_group_successful.yml +0 -44
  42. data/spec/fixtures/vcr_cassettes/add_instance_groups/one_group_unsuccessful.yml +0 -41
  43. data/spec/fixtures/vcr_cassettes/add_jobflow_steps/add_multiple_steps.yml +0 -266
  44. data/spec/fixtures/vcr_cassettes/custom_jar_job/cloudburst.yml +0 -41
  45. data/spec/fixtures/vcr_cassettes/describe_jobflows/all_jobflows.yml +0 -75
  46. data/spec/fixtures/vcr_cassettes/direct/terminate_jobflow.yml +0 -38
  47. data/spec/fixtures/vcr_cassettes/hive_job/hive_ads.yml +0 -41
  48. data/spec/fixtures/vcr_cassettes/modify_instance_groups/set_instances_to_3.yml +0 -38
  49. data/spec/fixtures/vcr_cassettes/pig_job/apache_log_reports.yml +0 -41
  50. data/spec/fixtures/vcr_cassettes/pig_job/apache_log_reports_with_bootstrap.yml +0 -41
  51. data/spec/fixtures/vcr_cassettes/run_jobflow/word_count.yml +0 -41
  52. data/spec/fixtures/vcr_cassettes/set_termination_protection/nonexistent_job_flows.yml +0 -41
  53. data/spec/fixtures/vcr_cassettes/set_termination_protection/protect_multiple_job_flows.yml +0 -38
  54. data/spec/fixtures/vcr_cassettes/terminate_jobflows/one_jobflow.yml +0 -38
  55. data/spec/lib/elasticity/custom_jar_job_spec.rb +0 -118
  56. data/spec/lib/elasticity/hive_job_spec.rb +0 -90
  57. data/spec/lib/elasticity/pig_job_spec.rb +0 -226
@@ -0,0 +1,26 @@
1
+ describe Elasticity::HadoopBootstrapAction do
2
+
3
+ subject do
4
+ Elasticity::HadoopBootstrapAction.new('option', 'value')
5
+ end
6
+
7
+ its(:name) { should == 'Elasticity Bootstrap Action (Configure Hadoop)' }
8
+ its(:option) { should == 'option' }
9
+ its(:value) { should == 'value' }
10
+
11
+ describe '#to_aws_bootstrap_action' do
12
+
13
+ it 'should create a bootstrap action' do
14
+ subject.to_aws_bootstrap_action.should ==
15
+ {
16
+ :name => 'Elasticity Bootstrap Action (Configure Hadoop)',
17
+ :script_bootstrap_action => {
18
+ :path => 's3n://elasticmapreduce/bootstrap-actions/configure-hadoop',
19
+ :args => ['option', 'value']
20
+ }
21
+ }
22
+ end
23
+
24
+ end
25
+
26
+ end
@@ -0,0 +1,74 @@
1
+ describe Elasticity::HiveStep do
2
+
3
+ subject do
4
+ Elasticity::HiveStep.new('script.hql')
5
+ end
6
+
7
+ it { should be_a Elasticity::JobFlowStep }
8
+
9
+ its(:name) { should == 'Elasticity Hive Step (script.hql)' }
10
+ its(:script) { should == 'script.hql' }
11
+ its(:variables) { should == { } }
12
+ its(:action_on_failure) { should == 'TERMINATE_JOB_FLOW' }
13
+
14
+ describe '#to_aws_step' do
15
+
16
+ it 'should convert to aws step format' do
17
+ step = subject.to_aws_step(Elasticity::JobFlow.new('access', 'secret'))
18
+ step[:name].should == 'Elasticity Hive Step (script.hql)'
19
+ step[:action_on_failure].should == 'TERMINATE_JOB_FLOW'
20
+ step[:hadoop_jar_step][:jar].should == 's3://elasticmapreduce/libs/script-runner/script-runner.jar'
21
+ step[:hadoop_jar_step][:args].should start_with([
22
+ 's3://elasticmapreduce/libs/hive/hive-script',
23
+ '--run-hive-script',
24
+ '--args',
25
+ '-f',
26
+ 'script.hql'
27
+ ])
28
+ end
29
+
30
+ context 'when variables are provided' do
31
+ let(:hs_with_variables) do
32
+ Elasticity::HiveStep.new('script.pig').tap do |hs|
33
+ hs.variables = {
34
+ 'VAR1' => 'VALUE1',
35
+ 'VAR2' => 'VALUE2'
36
+ }
37
+ end
38
+ end
39
+
40
+ it 'should convert to aws step format' do
41
+ step = hs_with_variables.to_aws_step(Elasticity::JobFlow.new('access', 'secret'))
42
+ step[:hadoop_jar_step][:args][5..9].should == %w(-d VAR1=VALUE1 -d VAR2=VALUE2)
43
+ end
44
+ end
45
+
46
+ end
47
+
48
+ describe '.requires_installation?' do
49
+ it 'should require installation' do
50
+ Elasticity::HiveStep.requires_installation?.should be_true
51
+ end
52
+ end
53
+
54
+ describe '.aws_installation_step' do
55
+
56
+ it 'should provide a means to install Hive' do
57
+ Elasticity::HiveStep.aws_installation_step.should == {
58
+ :action_on_failure => 'TERMINATE_JOB_FLOW',
59
+ :hadoop_jar_step => {
60
+ :jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
61
+ :args => [
62
+ 's3://elasticmapreduce/libs/hive/hive-script',
63
+ '--base-path',
64
+ 's3://elasticmapreduce/libs/hive/',
65
+ '--install-hive'
66
+ ],
67
+ },
68
+ :name => 'Elasticity - Install Hive'
69
+ }
70
+ end
71
+
72
+ end
73
+
74
+ end
@@ -0,0 +1,197 @@
1
+ describe 'Elasticity::JobFlow Integration Examples' do
2
+
3
+ let(:emr) { double('Elasticity::EMR') }
4
+
5
+ before do
6
+ Elasticity::EMR.should_receive(:new).with('access', 'secret').and_return(emr)
7
+ end
8
+
9
+ describe 'Hive' do
10
+
11
+ let(:hive_step) do
12
+ Elasticity::HiveStep.new('s3n://slif-hive/test.q').tap do |hs|
13
+ hs.variables = {'OUTPUT' => 's3n://slif-test/output'}
14
+ hs.action_on_failure = 'CONTINUE'
15
+ end
16
+ end
17
+
18
+ let(:hive_jobflow) do
19
+ Elasticity::JobFlow.new('access', 'secret').tap do |jf|
20
+ jf.log_uri = 's3n://slif-test/output/logs'
21
+ jf.add_step(hive_step)
22
+ end
23
+ end
24
+
25
+ it 'should launch the Hive job with the specified EMR credentials' do
26
+ emr.should_receive(:run_job_flow).with({
27
+ :name => 'Elasticity Job Flow',
28
+ :log_uri => 's3n://slif-test/output/logs',
29
+ :ami_version => 'latest',
30
+ :instances => {
31
+ :keep_job_flow_alive_when_no_steps => false,
32
+ :ec2_key_name => 'default',
33
+ :hadoop_version => '0.20.205',
34
+ :instance_count => 2,
35
+ :master_instance_type => 'm1.small',
36
+ :slave_instance_type => 'm1.small',
37
+ },
38
+ :steps => [
39
+ {
40
+ :action_on_failure => 'TERMINATE_JOB_FLOW',
41
+ :hadoop_jar_step => {
42
+ :jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
43
+ :args => [
44
+ 's3://elasticmapreduce/libs/hive/hive-script',
45
+ '--base-path',
46
+ 's3://elasticmapreduce/libs/hive/',
47
+ '--install-hive'
48
+ ],
49
+ },
50
+ :name => 'Elasticity - Install Hive'
51
+ },
52
+ {
53
+ :action_on_failure => 'CONTINUE',
54
+ :hadoop_jar_step => {
55
+ :jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
56
+ :args => [
57
+ 's3://elasticmapreduce/libs/hive/hive-script',
58
+ '--run-hive-script',
59
+ '--args',
60
+ '-f', 's3n://slif-hive/test.q',
61
+ '-d', 'OUTPUT=s3n://slif-test/output'
62
+ ],
63
+ },
64
+ :name => 'Elasticity Hive Step (s3n://slif-hive/test.q)'
65
+ }
66
+ ]
67
+ }).and_return('HIVE_JOBFLOW_ID')
68
+
69
+ hive_jobflow.run.should == 'HIVE_JOBFLOW_ID'
70
+ end
71
+
72
+ end
73
+
74
+ describe 'Pig' do
75
+
76
+ let(:pig_step) do
77
+ Elasticity::PigStep.new('s3n://slif-pig-test/test.pig').tap do |ps|
78
+ ps.variables = {'OUTPUT' => 's3n://slif-pig-test/output', 'XREFS' => 's3n://slif-pig-test/xrefs'}
79
+ ps.action_on_failure = 'CONTINUE'
80
+ end
81
+ end
82
+
83
+ let(:pig_jobflow) do
84
+ Elasticity::JobFlow.new('access', 'secret').tap do |jf|
85
+ jf.instance_count = 8
86
+ jf.slave_instance_type = 'm1.xlarge'
87
+ jf.log_uri = 's3n://slif-test/output/logs'
88
+ jf.add_step(pig_step)
89
+ end
90
+ end
91
+
92
+ it 'should launch the Pig job with the specified EMR credentials' do
93
+ emr.should_receive(:run_job_flow).with({
94
+ :name => 'Elasticity Job Flow',
95
+ :log_uri => 's3n://slif-test/output/logs',
96
+ :ami_version => 'latest',
97
+ :instances => {
98
+ :keep_job_flow_alive_when_no_steps => false,
99
+ :ec2_key_name => 'default',
100
+ :hadoop_version => '0.20.205',
101
+ :instance_count => 8,
102
+ :master_instance_type => 'm1.small',
103
+ :slave_instance_type => 'm1.xlarge',
104
+ },
105
+ :steps => [
106
+ {
107
+ :action_on_failure => 'TERMINATE_JOB_FLOW',
108
+ :hadoop_jar_step => {
109
+ :jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
110
+ :args => [
111
+ 's3://elasticmapreduce/libs/pig/pig-script',
112
+ '--base-path',
113
+ 's3://elasticmapreduce/libs/pig/',
114
+ '--install-pig'
115
+ ],
116
+ },
117
+ :name => 'Elasticity - Install Pig'
118
+ },
119
+ {
120
+ :action_on_failure => 'CONTINUE',
121
+ :hadoop_jar_step => {
122
+ :jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
123
+ :args => [
124
+ 's3://elasticmapreduce/libs/pig/pig-script',
125
+ '--run-pig-script',
126
+ '--args',
127
+ '-p', 'OUTPUT=s3n://slif-pig-test/output',
128
+ '-p', 'XREFS=s3n://slif-pig-test/xrefs',
129
+ '-p', 'E_PARALLELS=26',
130
+ 's3n://slif-pig-test/test.pig'
131
+ ],
132
+ },
133
+ :name => 'Elasticity Pig Step (s3n://slif-pig-test/test.pig)'
134
+ }
135
+ ]
136
+ }).and_return('PIG_JOBFLOW_ID')
137
+
138
+ pig_jobflow.run.should == 'PIG_JOBFLOW_ID'
139
+ end
140
+
141
+ end
142
+
143
+ describe 'Custom Jar' do
144
+
145
+ let(:custom_jar_step) do
146
+ Elasticity::CustomJarStep.new('s3n://elasticmapreduce/samples/cloudburst/cloudburst.jar').tap do |cj|
147
+ cj.arguments = [
148
+ 's3n://elasticmapreduce/samples/cloudburst/input/s_suis.br',
149
+ 's3n://elasticmapreduce/samples/cloudburst/input/100k.br',
150
+ 's3n://slif_hadoop_test/cloudburst/output/2011-12-09',
151
+ ]
152
+ cj.action_on_failure = 'TERMINATE_JOB_FLOW'
153
+ end
154
+ end
155
+
156
+ let(:custom_jar_jobflow) do
157
+ Elasticity::JobFlow.new('access', 'secret').tap do |jf|
158
+ jf.log_uri = 's3n://slif-test/output/logs'
159
+ jf.add_step(custom_jar_step)
160
+ end
161
+ end
162
+
163
+ it 'should launch the Custom Jar job with the specified EMR credentials' do
164
+ emr.should_receive(:run_job_flow).with({
165
+ :name => 'Elasticity Job Flow',
166
+ :log_uri => 's3n://slif-test/output/logs',
167
+ :ami_version => 'latest',
168
+ :instances => {
169
+ :keep_job_flow_alive_when_no_steps => false,
170
+ :ec2_key_name => 'default',
171
+ :hadoop_version => '0.20.205',
172
+ :instance_count => 2,
173
+ :master_instance_type => 'm1.small',
174
+ :slave_instance_type => 'm1.small',
175
+ },
176
+ :steps => [
177
+ {
178
+ :action_on_failure => 'TERMINATE_JOB_FLOW',
179
+ :hadoop_jar_step => {
180
+ :jar => 's3n://elasticmapreduce/samples/cloudburst/cloudburst.jar',
181
+ :args => [
182
+ 's3n://elasticmapreduce/samples/cloudburst/input/s_suis.br',
183
+ 's3n://elasticmapreduce/samples/cloudburst/input/100k.br',
184
+ 's3n://slif_hadoop_test/cloudburst/output/2011-12-09',
185
+ ],
186
+ },
187
+ :name => 'Elasticity Custom Jar Step (s3n://elasticmapreduce/samples/cloudburst/cloudburst.jar)'
188
+ }
189
+ ]
190
+ }).and_return('CUSTOM_JAR_JOBFLOW_ID')
191
+
192
+ custom_jar_jobflow.run.should == 'CUSTOM_JAR_JOBFLOW_ID'
193
+ end
194
+
195
+ end
196
+
197
+ end
@@ -1,149 +1,380 @@
1
- require 'spec_helper'
2
-
3
1
  describe Elasticity::JobFlow do
4
2
 
5
3
  before do
6
- describe_jobflows_xml = <<-JOBFLOWS
7
- <DescribeJobFlowsResponse xmlns="http://elasticmapreduce.amazonaws.com/doc/2009-03-31">
8
- <DescribeJobFlowsResult>
9
- <JobFlows>
10
- <member>
11
- <JobFlowId>j-p</JobFlowId>
12
- <Name>Pig Job</Name>
13
- <ExecutionStatusDetail>
14
- <CreationDateTime>
15
- 2011-10-04T21:49:16Z
16
- </CreationDateTime>
17
- <LastStateChangeReason>
18
- Steps completed with errors
19
- </LastStateChangeReason>
20
- <StartDateTime>
21
- 2011-10-04T21:49:17Z
22
- </StartDateTime>
23
- <ReadyDateTime>
24
- 2011-10-04T21:49:18Z
25
- </ReadyDateTime>
26
- <State>TERMINATED</State>
27
- </ExecutionStatusDetail>
28
- <Steps>
29
- <member>
30
- <StepConfig>
31
- <Name>Setup Hive</Name>
32
- </StepConfig>
33
- <ExecutionStatusDetail>
34
- <State>FAILED</State>
35
- </ExecutionStatusDetail>
36
- </member>
37
- <member>
38
- <StepConfig>
39
- <Name>Run Hive Script</Name>
40
- </StepConfig>
41
- <ExecutionStatusDetail>
42
- <State>PENDING</State>
43
- </ExecutionStatusDetail>
44
- </member>
45
- </Steps>
46
- <Instances>
47
- <Placement>
48
- <AvailabilityZone>
49
- eu-west-1a
50
- </AvailabilityZone>
51
- </Placement>
52
- <SlaveInstanceType>
53
- m1.small
54
- </SlaveInstanceType>
55
- <MasterInstanceType>
56
- m1.small
57
- </MasterInstanceType>
58
- <Ec2KeyName>
59
- myec2keyname
60
- </Ec2KeyName>
61
- <InstanceCount>
62
- 4
63
- </InstanceCount>
64
- </Instances>
65
- </member>
66
- <member>
67
- <JobFlowId>j-h</JobFlowId>
68
- <Name>Hive Job</Name>
69
- <ExecutionStatusDetail>
70
- <CreationDateTime>
71
- 2011-10-04T22:49:16Z
72
- </CreationDateTime>
73
- <StartDateTime>
74
-
75
- </StartDateTime>
76
- <ReadyDateTime>
77
-
78
- </ReadyDateTime>
79
- <State>
80
- TERMINATED
81
- </State>
82
- <LastStateChangeReason>
83
- Steps completed
84
- </LastStateChangeReason>
85
- </ExecutionStatusDetail>
86
- <Instances>
87
- <Placement>
88
- <AvailabilityZone>
89
- eu-west-1b
90
- </AvailabilityZone>
91
- </Placement>
92
- <SlaveInstanceType>
93
- c1.medium
94
- </SlaveInstanceType>
95
- <MasterInstanceType>
96
- c1.medium
97
- </MasterInstanceType>
98
- <Ec2KeyName>
99
- myec2keyname
100
- </Ec2KeyName>
101
- <InstanceCount>
102
- 2
103
- </InstanceCount>
104
- </Instances>
105
- </member>
106
- </JobFlows>
107
- </DescribeJobFlowsResult>
108
- </DescribeJobFlowsResponse>
109
- JOBFLOWS
110
- describe_jobflows_document = Nokogiri::XML(describe_jobflows_xml)
111
- describe_jobflows_document.remove_namespaces!
112
- @members_nodeset = describe_jobflows_document.xpath('/DescribeJobFlowsResponse/DescribeJobFlowsResult/JobFlows/member')
4
+ # Ensure we don't accidentally submit to EMR for all of these examples
5
+ Elasticity::EMR.stub(:new).and_return(double('Elasticity::EMR', :run_job_flow => '_'))
6
+ end
7
+
8
+ subject do
9
+ Elasticity::JobFlow.new('access', 'secret')
10
+ end
11
+
12
+ its(:action_on_failure) { should == 'TERMINATE_JOB_FLOW' }
13
+ its(:ec2_key_name) { should == 'default' }
14
+ its(:hadoop_version) { should == '0.20.205' }
15
+ its(:instance_count) { should == 2 }
16
+ its(:log_uri) { should == nil }
17
+ its(:master_instance_type) { should == 'm1.small' }
18
+ its(:name) { should == 'Elasticity Job Flow' }
19
+ its(:slave_instance_type) { should == 'm1.small' }
20
+ its(:ami_version) { should == 'latest' }
21
+ its(:keep_job_flow_alive_when_no_steps) { should == false }
22
+
23
+ describe '#instance_count=' do
24
+
25
+ context 'when set to more than 1' do
26
+ it 'should set the number of instances' do
27
+ subject.instance_count = 10
28
+ subject.instance_count.should == 10
29
+ end
30
+ end
31
+
32
+ context 'when set to less than 2' do
33
+ it 'should be an error' do
34
+ expect {
35
+ subject.instance_count = 1
36
+ }.to raise_error(ArgumentError, 'Instance count cannot be set to less than 2 (requested 1)')
37
+ end
38
+ end
39
+
40
+ end
41
+
42
+ describe '#add_bootstrap_action' do
43
+
44
+ context 'when the jobflow is not yet started' do
45
+ it 'should not raise an error' do
46
+ expect {
47
+ subject.add_bootstrap_action(nil)
48
+ }.to_not raise_error
49
+ end
50
+ end
51
+
52
+ context 'when the jobflow is already started' do
53
+ before do
54
+ subject.add_step(Elasticity::CustomJarStep.new('_'))
55
+ subject.run
56
+ end
57
+ it 'should raise an error' do
58
+ expect {
59
+ subject.add_bootstrap_action(nil)
60
+ }.to raise_error(Elasticity::JobFlowRunningError, 'To modify bootstrap actions, please create a new job flow.')
61
+ end
62
+ end
63
+
64
+ end
65
+
66
+ describe '#add_step' do
67
+
68
+ context 'when the jobflow is already running' do
69
+
70
+ let(:emr) { double('Elasticity::EMR', :run_job_flow => 'RUNNING_JOBFLOW_ID') }
71
+
72
+ let(:running_jobflow) do
73
+ Elasticity::JobFlow.new('access', 'secret').tap do |jf|
74
+ jf.add_step(Elasticity::PigStep.new('_'))
75
+ end
76
+ end
77
+
78
+ before do
79
+ Elasticity::EMR.should_receive(:new).with('access', 'secret').and_return(emr)
80
+ running_jobflow.run
81
+ end
82
+
83
+ context 'when the step requires installation' do
84
+
85
+ context 'when the installation has already happened' do
86
+ let(:additional_step) { Elasticity::PigStep.new('_') }
87
+
88
+ it 'should submit the step' do
89
+ emr.should_receive(:add_jobflow_steps).with('RUNNING_JOBFLOW_ID', {
90
+ :steps => [additional_step.to_aws_step(running_jobflow)]
91
+ })
92
+ running_jobflow.add_step(additional_step)
93
+ end
94
+ end
95
+
96
+ context 'when the installation has not yet happened' do
97
+ let(:additional_step) { Elasticity::HiveStep.new('_') }
98
+
99
+ it 'should submit the installation step and the step' do
100
+ emr.should_receive(:add_jobflow_steps).with('RUNNING_JOBFLOW_ID', {
101
+ :steps => [
102
+ Elasticity::HiveStep.aws_installation_step,
103
+ additional_step.to_aws_step(running_jobflow)
104
+ ]
105
+ })
106
+ running_jobflow.add_step(additional_step)
107
+ end
108
+ end
109
+
110
+ end
111
+
112
+ context 'when the step does not require installation' do
113
+
114
+ let(:additional_step) { Elasticity::CustomJarStep.new('jar') }
115
+
116
+ it 'should submit the step' do
117
+ emr.should_receive(:add_jobflow_steps).with('RUNNING_JOBFLOW_ID', {
118
+ :steps => [additional_step.to_aws_step(running_jobflow)]
119
+ })
120
+ running_jobflow.add_step(additional_step)
121
+ end
122
+
123
+ end
124
+
125
+ end
126
+
127
+ context 'when the jobflow is not yet running' do
128
+ # This behaviour is tested in #jobflow_config
129
+ end
130
+ end
131
+
132
+ describe '#jobflow_config' do
133
+
134
+ it 'should incorporate the job flow preamble' do
135
+ subject.stub(:jobflow_preamble).and_return({:preamble => 'PREAMBLE'})
136
+ subject.send(:jobflow_config).should be_a_hash_including({:preamble => 'PREAMBLE'})
137
+ end
138
+
139
+ describe 'steps' do
140
+
141
+ let(:jobflow_steps) { [Elasticity::HiveStep.new('script.hql'), Elasticity::PigStep.new('script.pig'), Elasticity::CustomJarStep.new('script.jar')] }
142
+ let(:jobflow_with_steps) do
143
+ Elasticity::JobFlow.new('_', '_').tap do |jobflow|
144
+ jobflow_steps.each { |s| jobflow.add_step(s) }
145
+ end
146
+ end
147
+ let(:aws_steps) do
148
+ [
149
+ Elasticity::HiveStep.aws_installation_step,
150
+ jobflow_steps[0].to_aws_step(jobflow_with_steps),
151
+ Elasticity::PigStep.aws_installation_step,
152
+ jobflow_steps[1].to_aws_step(jobflow_with_steps),
153
+ jobflow_steps[2].to_aws_step(jobflow_with_steps),
154
+ ]
155
+ end
156
+
157
+ it 'should incorporate the installation and run steps into the jobflow config' do
158
+ jobflow_with_steps.send(:jobflow_config).should be_a_hash_including({:steps => aws_steps})
159
+ end
160
+
161
+ context 'when there are more than one installable step of the same type' do
162
+ before do
163
+ jobflow_steps << Elasticity::HiveStep.new('script.hql')
164
+ aws_steps << jobflow_steps.last.to_aws_step(jobflow_with_steps)
165
+ end
166
+ it 'should not include the installation step more than once' do
167
+ jobflow_with_steps.send(:jobflow_config).should be_a_hash_including({:steps => aws_steps})
168
+ end
169
+ end
170
+
171
+ end
172
+
173
+ describe 'log URI' do
174
+
175
+ context 'when a log URI is specified' do
176
+ let(:jobflow_with_log_uri) do
177
+ Elasticity::JobFlow.new('_', '_').tap do |jf|
178
+ jf.log_uri = 'LOG_URI'
179
+ end
180
+ end
181
+ it 'should incorporate it into the jobflow config' do
182
+ jobflow_with_log_uri.send(:jobflow_config).should be_a_hash_including({:log_uri => 'LOG_URI'})
183
+ end
184
+ end
185
+
186
+ context 'when a log URI is not specified' do
187
+ let(:jobflow_with_no_log_uri) do
188
+ Elasticity::JobFlow.new('_', '_').tap do |jf|
189
+ jf.log_uri = nil
190
+ end
191
+ end
192
+ it 'should not make space for it in the jobflow config' do
193
+ jobflow_with_no_log_uri.send(:jobflow_config).should_not have_key(:log_uri)
194
+ end
195
+ end
196
+
197
+ end
198
+
199
+ describe 'bootstrap actions' do
200
+
201
+ context 'when bootstrap actions are specified' do
202
+ let(:hadoop_bootstrap_actions) do
203
+ [
204
+ Elasticity::HadoopBootstrapAction.new('OPTION1', 'VALUE1'),
205
+ Elasticity::HadoopBootstrapAction.new('OPTION1', 'VALUE2'),
206
+ Elasticity::HadoopBootstrapAction.new('OPTION2', 'VALUE3')
207
+ ]
208
+ end
209
+ let(:jobflow_with_bootstrap_actions) do
210
+ Elasticity::JobFlow.new('_', '_').tap do |jf|
211
+ hadoop_bootstrap_actions.each do |action|
212
+ jf.add_bootstrap_action(action)
213
+ end
214
+ end
215
+ end
216
+ it 'should include them in the jobflow config' do
217
+ bootstrap_actions = hadoop_bootstrap_actions.map { |a| a.to_aws_bootstrap_action }
218
+ jobflow_with_bootstrap_actions.send(:jobflow_config).should be_a_hash_including({
219
+ :bootstrap_actions => bootstrap_actions
220
+ })
221
+ end
222
+ end
223
+
224
+ context 'when bootstrap actions are not specified' do
225
+ it 'should not make space for them in the jobflow config' do
226
+ subject.send(:jobflow_config).should_not have_key(:bootstrap_actions)
227
+ end
228
+ end
229
+
230
+ end
231
+
113
232
  end
114
233
 
115
- describe ".from_xml" do
116
- it "should return a JobFlow with the appropriate fields initialized" do
117
- jobflow = Elasticity::JobFlow.from_member_element(@members_nodeset[0])
118
- jobflow.name.should == "Pig Job"
119
- jobflow.jobflow_id.should == "j-p"
120
- jobflow.state.should == "TERMINATED"
121
- jobflow.steps.map(&:name).should == ["Setup Hive", "Run Hive Script"]
122
- jobflow.steps.map(&:state).should == ["FAILED", "PENDING"]
123
- jobflow.created_at.should == Time.parse("2011-10-04T21:49:16Z")
124
- jobflow.started_at.should == Time.parse("2011-10-04T21:49:17Z")
125
- jobflow.ready_at.should == Time.parse("2011-10-04T21:49:18Z")
126
- jobflow.master_instance_type.should == "m1.small"
127
- jobflow.slave_instance_type.should == "m1.small"
128
- jobflow.instance_count.should == "4"
129
- jobflow.last_state_change_reason.should == "Steps completed with errors"
234
+ describe '#jobflow_preamble' do
235
+
236
+ let(:basic_preamble) do
237
+ {
238
+ :name => 'Elasticity Job Flow',
239
+ :ami_version => 'latest',
240
+ :instances => {
241
+ :keep_job_flow_alive_when_no_steps => false,
242
+ :ec2_key_name => 'default',
243
+ :hadoop_version => '0.20.205',
244
+ :instance_count => 2,
245
+ :master_instance_type => 'm1.small',
246
+ :slave_instance_type => 'm1.small',
247
+ }
248
+ }
130
249
  end
250
+
251
+ it 'should create a jobflow configuration section' do
252
+ subject.send(:jobflow_preamble).should == basic_preamble
253
+ end
254
+
255
+ context 'when a VPC subnet ID is specified' do
256
+ it 'should include it in the preamble' do
257
+ subject.ec2_subnet_id = 'subnet-118b9d79'
258
+ subject.send(:jobflow_preamble).should be_a_hash_including({:ec2_subnet_id => 'subnet-118b9d79'})
259
+ end
260
+ end
261
+
131
262
  end
132
263
 
133
- describe ".from_jobflows_nodeset" do
134
- it "should return JobFlows with the appropriate fields initialized" do
135
- jobflow = Elasticity::JobFlow.from_members_nodeset(@members_nodeset)
136
- jobflow.map(&:name).should == ["Pig Job", "Hive Job"]
137
- jobflow.map(&:jobflow_id).should == ["j-p", "j-h"]
138
- jobflow.map(&:state).should == ["TERMINATED", "TERMINATED"]
139
- jobflow.map(&:created_at).should == [Time.parse("2011-10-04T21:49:16Z"), Time.parse("2011-10-04T22:49:16Z")]
140
- jobflow.map(&:started_at).should == [Time.parse("2011-10-04T21:49:17Z"), nil]
141
- jobflow.map(&:ready_at).should == [Time.parse("2011-10-04T21:49:18Z"), nil]
142
- jobflow.map(&:master_instance_type).should == ["m1.small","c1.medium"]
143
- jobflow.map(&:slave_instance_type).should == ["m1.small", "c1.medium"]
144
- jobflow.map(&:instance_count).should == ["4","2"]
145
- jobflow.map(&:last_state_change_reason).should == ["Steps completed with errors", "Steps completed"]
264
+ describe '#run' do
265
+
266
+ context 'when there are steps added' do
267
+ let(:jobflow_with_steps) do
268
+ Elasticity::JobFlow.new('STEP_TEST_ACCESS', 'STEP_TEST_SECRET').tap do |jf|
269
+ jf.add_step(Elasticity::CustomJarStep.new('_'))
270
+ end
271
+ end
272
+
273
+ context 'when the jobflow has not yet been run' do
274
+ let(:emr) { double('Elasticity::EMR', :run_job_flow => 'JOBFLOW_ID') }
275
+
276
+ it 'should run the job with the supplied EMR credentials' do
277
+ Elasticity::EMR.should_receive(:new).with('STEP_TEST_ACCESS', 'STEP_TEST_SECRET').and_return(emr)
278
+ emr.should_receive(:run_job_flow)
279
+ jobflow_with_steps.run
280
+ end
281
+
282
+ it 'should run the job with the jobflow config' do
283
+ Elasticity::EMR.stub(:new).with('STEP_TEST_ACCESS', 'STEP_TEST_SECRET').and_return(emr)
284
+ jobflow_with_steps.stub(:jobflow_config).and_return('JOBFLOW_CONFIG')
285
+ emr.should_receive(:run_job_flow).with('JOBFLOW_CONFIG')
286
+ jobflow_with_steps.run
287
+ end
288
+
289
+ it 'should return the jobflow ID' do
290
+ Elasticity::EMR.stub(:new).with('STEP_TEST_ACCESS', 'STEP_TEST_SECRET').and_return(emr)
291
+ jobflow_with_steps.run.should == 'JOBFLOW_ID'
292
+ end
293
+
294
+ end
295
+
296
+ context 'when the jobflow has already been run' do
297
+ before do
298
+ jobflow_with_steps.run
299
+ end
300
+ it 'should raise an error' do
301
+ expect {
302
+ jobflow_with_steps.run
303
+ }.to raise_error(Elasticity::JobFlowRunningError, 'Cannot run a job flow multiple times. To do more with this job flow, please use #add_step.')
304
+ end
305
+ end
306
+
307
+ end
308
+
309
+ context 'when there are no steps added' do
310
+ let(:jobflow_with_no_steps) { Elasticity::JobFlow.new('_', '_') }
311
+ it 'should raise an error' do
312
+ expect {
313
+ jobflow_with_no_steps.run
314
+ }.to raise_error(Elasticity::JobFlowMissingStepsError, 'Cannot run a job flow without adding steps. Please use #add_step.')
315
+ end
316
+ end
317
+
318
+ end
319
+
320
+ describe '#status' do
321
+
322
+ context 'before the jobflow has been run' do
323
+ it 'should raise an error' do
324
+ expect {
325
+ subject.status
326
+ }.to raise_error(Elasticity::JobFlowNotStartedError, 'Please #run this job flow before attempting to retrieve status.')
327
+ end
328
+ end
329
+
330
+ context 'after the jobflow has been run' do
331
+ let(:emr) { double('Elasticity::EMR', :run_job_flow => 'JOBFLOW_ID') }
332
+ let(:running_jobflow) { Elasticity::JobFlow.new('_', '_') }
333
+ let(:jobflow_status) do
334
+ Elasticity::JobFlowStatus.new.tap do |js|
335
+ js.stub(:state => 'TERMINATED')
336
+ end
337
+ end
338
+ before do
339
+ Elasticity::EMR.stub(:new).and_return(emr)
340
+ running_jobflow.add_step(Elasticity::CustomJarStep.new('_'))
341
+ running_jobflow.run
342
+ end
343
+ it 'should return the AWS status' do
344
+ emr.should_receive(:describe_jobflow).with('JOBFLOW_ID').and_return(jobflow_status)
345
+ status = running_jobflow.status
346
+ status.should be_a(Elasticity::JobFlowStatus)
347
+ status.state.should == 'TERMINATED'
348
+ end
146
349
  end
350
+
351
+ end
352
+
353
+ describe '#shutdown' do
354
+
355
+ context 'when the jobflow has not yet been started' do
356
+ let(:unstarted_job_flow) { Elasticity::JobFlow.new('_', '_')}
357
+ it 'should be an error' do
358
+ expect {
359
+ unstarted_job_flow.shutdown
360
+ }.to raise_error(Elasticity::JobFlowNotStartedError, 'Cannot #shutdown a job flow that has not yet been #run.')
361
+ end
362
+ end
363
+
364
+ context 'when the jobflow has been started' do
365
+ let(:emr) { double('Elasticity::EMR', :run_job_flow => 'JOBFLOW_ID') }
366
+ let(:running_jobflow) { Elasticity::JobFlow.new('_', '_') }
367
+ before do
368
+ Elasticity::EMR.stub(:new).and_return(emr)
369
+ running_jobflow.add_step(Elasticity::CustomJarStep.new('_'))
370
+ running_jobflow.run
371
+ end
372
+ it 'should shutdown the running jobflow' do
373
+ emr.should_receive(:terminate_jobflows).with('JOBFLOW_ID')
374
+ running_jobflow.shutdown
375
+ end
376
+ end
377
+
147
378
  end
148
379
 
149
- end
380
+ end