elasticity 1.5 → 2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. data/.rspec +2 -1
  2. data/.rvmrc +1 -1
  3. data/HISTORY.md +47 -24
  4. data/LICENSE +1 -1
  5. data/README.md +165 -317
  6. data/Rakefile +4 -3
  7. data/elasticity.gemspec +3 -5
  8. data/lib/elasticity.rb +10 -5
  9. data/lib/elasticity/aws_request.rb +81 -20
  10. data/lib/elasticity/custom_jar_step.rb +33 -0
  11. data/lib/elasticity/emr.rb +45 -117
  12. data/lib/elasticity/hadoop_bootstrap_action.rb +27 -0
  13. data/lib/elasticity/hive_step.rb +57 -0
  14. data/lib/elasticity/job_flow.rb +109 -39
  15. data/lib/elasticity/job_flow_status.rb +53 -0
  16. data/lib/elasticity/job_flow_status_step.rb +35 -0
  17. data/lib/elasticity/job_flow_step.rb +17 -25
  18. data/lib/elasticity/pig_step.rb +82 -0
  19. data/lib/elasticity/support/conditional_raise.rb +23 -0
  20. data/lib/elasticity/version.rb +1 -1
  21. data/spec/lib/elasticity/aws_request_spec.rb +159 -51
  22. data/spec/lib/elasticity/custom_jar_step_spec.rb +59 -0
  23. data/spec/lib/elasticity/emr_spec.rb +231 -762
  24. data/spec/lib/elasticity/hadoop_bootstrap_action_spec.rb +26 -0
  25. data/spec/lib/elasticity/hive_step_spec.rb +74 -0
  26. data/spec/lib/elasticity/job_flow_integration_spec.rb +197 -0
  27. data/spec/lib/elasticity/job_flow_spec.rb +369 -138
  28. data/spec/lib/elasticity/job_flow_status_spec.rb +147 -0
  29. data/spec/lib/elasticity/job_flow_status_step_spec.rb +73 -0
  30. data/spec/lib/elasticity/job_flow_step_spec.rb +26 -64
  31. data/spec/lib/elasticity/pig_step_spec.rb +104 -0
  32. data/spec/lib/elasticity/support/conditional_raise_spec.rb +35 -0
  33. data/spec/spec_helper.rb +1 -50
  34. data/spec/support/be_a_hash_including_matcher.rb +35 -0
  35. metadata +101 -119
  36. data/.autotest +0 -2
  37. data/lib/elasticity/custom_jar_job.rb +0 -38
  38. data/lib/elasticity/hive_job.rb +0 -69
  39. data/lib/elasticity/pig_job.rb +0 -109
  40. data/lib/elasticity/simple_job.rb +0 -51
  41. data/spec/fixtures/vcr_cassettes/add_instance_groups/one_group_successful.yml +0 -44
  42. data/spec/fixtures/vcr_cassettes/add_instance_groups/one_group_unsuccessful.yml +0 -41
  43. data/spec/fixtures/vcr_cassettes/add_jobflow_steps/add_multiple_steps.yml +0 -266
  44. data/spec/fixtures/vcr_cassettes/custom_jar_job/cloudburst.yml +0 -41
  45. data/spec/fixtures/vcr_cassettes/describe_jobflows/all_jobflows.yml +0 -75
  46. data/spec/fixtures/vcr_cassettes/direct/terminate_jobflow.yml +0 -38
  47. data/spec/fixtures/vcr_cassettes/hive_job/hive_ads.yml +0 -41
  48. data/spec/fixtures/vcr_cassettes/modify_instance_groups/set_instances_to_3.yml +0 -38
  49. data/spec/fixtures/vcr_cassettes/pig_job/apache_log_reports.yml +0 -41
  50. data/spec/fixtures/vcr_cassettes/pig_job/apache_log_reports_with_bootstrap.yml +0 -41
  51. data/spec/fixtures/vcr_cassettes/run_jobflow/word_count.yml +0 -41
  52. data/spec/fixtures/vcr_cassettes/set_termination_protection/nonexistent_job_flows.yml +0 -41
  53. data/spec/fixtures/vcr_cassettes/set_termination_protection/protect_multiple_job_flows.yml +0 -38
  54. data/spec/fixtures/vcr_cassettes/terminate_jobflows/one_jobflow.yml +0 -38
  55. data/spec/lib/elasticity/custom_jar_job_spec.rb +0 -118
  56. data/spec/lib/elasticity/hive_job_spec.rb +0 -90
  57. data/spec/lib/elasticity/pig_job_spec.rb +0 -226
@@ -0,0 +1,26 @@
1
+ describe Elasticity::HadoopBootstrapAction do
2
+
3
+ subject do
4
+ Elasticity::HadoopBootstrapAction.new('option', 'value')
5
+ end
6
+
7
+ its(:name) { should == 'Elasticity Bootstrap Action (Configure Hadoop)' }
8
+ its(:option) { should == 'option' }
9
+ its(:value) { should == 'value' }
10
+
11
+ describe '#to_aws_bootstrap_action' do
12
+
13
+ it 'should create a bootstrap action' do
14
+ subject.to_aws_bootstrap_action.should ==
15
+ {
16
+ :name => 'Elasticity Bootstrap Action (Configure Hadoop)',
17
+ :script_bootstrap_action => {
18
+ :path => 's3n://elasticmapreduce/bootstrap-actions/configure-hadoop',
19
+ :args => ['option', 'value']
20
+ }
21
+ }
22
+ end
23
+
24
+ end
25
+
26
+ end
@@ -0,0 +1,74 @@
1
+ describe Elasticity::HiveStep do
2
+
3
+ subject do
4
+ Elasticity::HiveStep.new('script.hql')
5
+ end
6
+
7
+ it { should be_a Elasticity::JobFlowStep }
8
+
9
+ its(:name) { should == 'Elasticity Hive Step (script.hql)' }
10
+ its(:script) { should == 'script.hql' }
11
+ its(:variables) { should == { } }
12
+ its(:action_on_failure) { should == 'TERMINATE_JOB_FLOW' }
13
+
14
+ describe '#to_aws_step' do
15
+
16
+ it 'should convert to aws step format' do
17
+ step = subject.to_aws_step(Elasticity::JobFlow.new('access', 'secret'))
18
+ step[:name].should == 'Elasticity Hive Step (script.hql)'
19
+ step[:action_on_failure].should == 'TERMINATE_JOB_FLOW'
20
+ step[:hadoop_jar_step][:jar].should == 's3://elasticmapreduce/libs/script-runner/script-runner.jar'
21
+ step[:hadoop_jar_step][:args].should start_with([
22
+ 's3://elasticmapreduce/libs/hive/hive-script',
23
+ '--run-hive-script',
24
+ '--args',
25
+ '-f',
26
+ 'script.hql'
27
+ ])
28
+ end
29
+
30
+ context 'when variables are provided' do
31
+ let(:hs_with_variables) do
32
+ Elasticity::HiveStep.new('script.pig').tap do |hs|
33
+ hs.variables = {
34
+ 'VAR1' => 'VALUE1',
35
+ 'VAR2' => 'VALUE2'
36
+ }
37
+ end
38
+ end
39
+
40
+ it 'should convert to aws step format' do
41
+ step = hs_with_variables.to_aws_step(Elasticity::JobFlow.new('access', 'secret'))
42
+ step[:hadoop_jar_step][:args][5..9].should == %w(-d VAR1=VALUE1 -d VAR2=VALUE2)
43
+ end
44
+ end
45
+
46
+ end
47
+
48
+ describe '.requires_installation?' do
49
+ it 'should require installation' do
50
+ Elasticity::HiveStep.requires_installation?.should be_true
51
+ end
52
+ end
53
+
54
+ describe '.aws_installation_step' do
55
+
56
+ it 'should provide a means to install Hive' do
57
+ Elasticity::HiveStep.aws_installation_step.should == {
58
+ :action_on_failure => 'TERMINATE_JOB_FLOW',
59
+ :hadoop_jar_step => {
60
+ :jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
61
+ :args => [
62
+ 's3://elasticmapreduce/libs/hive/hive-script',
63
+ '--base-path',
64
+ 's3://elasticmapreduce/libs/hive/',
65
+ '--install-hive'
66
+ ],
67
+ },
68
+ :name => 'Elasticity - Install Hive'
69
+ }
70
+ end
71
+
72
+ end
73
+
74
+ end
@@ -0,0 +1,197 @@
1
+ describe 'Elasticity::JobFlow Integration Examples' do
2
+
3
+ let(:emr) { double('Elasticity::EMR') }
4
+
5
+ before do
6
+ Elasticity::EMR.should_receive(:new).with('access', 'secret').and_return(emr)
7
+ end
8
+
9
+ describe 'Hive' do
10
+
11
+ let(:hive_step) do
12
+ Elasticity::HiveStep.new('s3n://slif-hive/test.q').tap do |hs|
13
+ hs.variables = {'OUTPUT' => 's3n://slif-test/output'}
14
+ hs.action_on_failure = 'CONTINUE'
15
+ end
16
+ end
17
+
18
+ let(:hive_jobflow) do
19
+ Elasticity::JobFlow.new('access', 'secret').tap do |jf|
20
+ jf.log_uri = 's3n://slif-test/output/logs'
21
+ jf.add_step(hive_step)
22
+ end
23
+ end
24
+
25
+ it 'should launch the Hive job with the specified EMR credentials' do
26
+ emr.should_receive(:run_job_flow).with({
27
+ :name => 'Elasticity Job Flow',
28
+ :log_uri => 's3n://slif-test/output/logs',
29
+ :ami_version => 'latest',
30
+ :instances => {
31
+ :keep_job_flow_alive_when_no_steps => false,
32
+ :ec2_key_name => 'default',
33
+ :hadoop_version => '0.20.205',
34
+ :instance_count => 2,
35
+ :master_instance_type => 'm1.small',
36
+ :slave_instance_type => 'm1.small',
37
+ },
38
+ :steps => [
39
+ {
40
+ :action_on_failure => 'TERMINATE_JOB_FLOW',
41
+ :hadoop_jar_step => {
42
+ :jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
43
+ :args => [
44
+ 's3://elasticmapreduce/libs/hive/hive-script',
45
+ '--base-path',
46
+ 's3://elasticmapreduce/libs/hive/',
47
+ '--install-hive'
48
+ ],
49
+ },
50
+ :name => 'Elasticity - Install Hive'
51
+ },
52
+ {
53
+ :action_on_failure => 'CONTINUE',
54
+ :hadoop_jar_step => {
55
+ :jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
56
+ :args => [
57
+ 's3://elasticmapreduce/libs/hive/hive-script',
58
+ '--run-hive-script',
59
+ '--args',
60
+ '-f', 's3n://slif-hive/test.q',
61
+ '-d', 'OUTPUT=s3n://slif-test/output'
62
+ ],
63
+ },
64
+ :name => 'Elasticity Hive Step (s3n://slif-hive/test.q)'
65
+ }
66
+ ]
67
+ }).and_return('HIVE_JOBFLOW_ID')
68
+
69
+ hive_jobflow.run.should == 'HIVE_JOBFLOW_ID'
70
+ end
71
+
72
+ end
73
+
74
+ describe 'Pig' do
75
+
76
+ let(:pig_step) do
77
+ Elasticity::PigStep.new('s3n://slif-pig-test/test.pig').tap do |ps|
78
+ ps.variables = {'OUTPUT' => 's3n://slif-pig-test/output', 'XREFS' => 's3n://slif-pig-test/xrefs'}
79
+ ps.action_on_failure = 'CONTINUE'
80
+ end
81
+ end
82
+
83
+ let(:pig_jobflow) do
84
+ Elasticity::JobFlow.new('access', 'secret').tap do |jf|
85
+ jf.instance_count = 8
86
+ jf.slave_instance_type = 'm1.xlarge'
87
+ jf.log_uri = 's3n://slif-test/output/logs'
88
+ jf.add_step(pig_step)
89
+ end
90
+ end
91
+
92
+ it 'should launch the Pig job with the specified EMR credentials' do
93
+ emr.should_receive(:run_job_flow).with({
94
+ :name => 'Elasticity Job Flow',
95
+ :log_uri => 's3n://slif-test/output/logs',
96
+ :ami_version => 'latest',
97
+ :instances => {
98
+ :keep_job_flow_alive_when_no_steps => false,
99
+ :ec2_key_name => 'default',
100
+ :hadoop_version => '0.20.205',
101
+ :instance_count => 8,
102
+ :master_instance_type => 'm1.small',
103
+ :slave_instance_type => 'm1.xlarge',
104
+ },
105
+ :steps => [
106
+ {
107
+ :action_on_failure => 'TERMINATE_JOB_FLOW',
108
+ :hadoop_jar_step => {
109
+ :jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
110
+ :args => [
111
+ 's3://elasticmapreduce/libs/pig/pig-script',
112
+ '--base-path',
113
+ 's3://elasticmapreduce/libs/pig/',
114
+ '--install-pig'
115
+ ],
116
+ },
117
+ :name => 'Elasticity - Install Pig'
118
+ },
119
+ {
120
+ :action_on_failure => 'CONTINUE',
121
+ :hadoop_jar_step => {
122
+ :jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
123
+ :args => [
124
+ 's3://elasticmapreduce/libs/pig/pig-script',
125
+ '--run-pig-script',
126
+ '--args',
127
+ '-p', 'OUTPUT=s3n://slif-pig-test/output',
128
+ '-p', 'XREFS=s3n://slif-pig-test/xrefs',
129
+ '-p', 'E_PARALLELS=26',
130
+ 's3n://slif-pig-test/test.pig'
131
+ ],
132
+ },
133
+ :name => 'Elasticity Pig Step (s3n://slif-pig-test/test.pig)'
134
+ }
135
+ ]
136
+ }).and_return('PIG_JOBFLOW_ID')
137
+
138
+ pig_jobflow.run.should == 'PIG_JOBFLOW_ID'
139
+ end
140
+
141
+ end
142
+
143
+ describe 'Custom Jar' do
144
+
145
+ let(:custom_jar_step) do
146
+ Elasticity::CustomJarStep.new('s3n://elasticmapreduce/samples/cloudburst/cloudburst.jar').tap do |cj|
147
+ cj.arguments = [
148
+ 's3n://elasticmapreduce/samples/cloudburst/input/s_suis.br',
149
+ 's3n://elasticmapreduce/samples/cloudburst/input/100k.br',
150
+ 's3n://slif_hadoop_test/cloudburst/output/2011-12-09',
151
+ ]
152
+ cj.action_on_failure = 'TERMINATE_JOB_FLOW'
153
+ end
154
+ end
155
+
156
+ let(:custom_jar_jobflow) do
157
+ Elasticity::JobFlow.new('access', 'secret').tap do |jf|
158
+ jf.log_uri = 's3n://slif-test/output/logs'
159
+ jf.add_step(custom_jar_step)
160
+ end
161
+ end
162
+
163
+ it 'should launch the Custom Jar job with the specified EMR credentials' do
164
+ emr.should_receive(:run_job_flow).with({
165
+ :name => 'Elasticity Job Flow',
166
+ :log_uri => 's3n://slif-test/output/logs',
167
+ :ami_version => 'latest',
168
+ :instances => {
169
+ :keep_job_flow_alive_when_no_steps => false,
170
+ :ec2_key_name => 'default',
171
+ :hadoop_version => '0.20.205',
172
+ :instance_count => 2,
173
+ :master_instance_type => 'm1.small',
174
+ :slave_instance_type => 'm1.small',
175
+ },
176
+ :steps => [
177
+ {
178
+ :action_on_failure => 'TERMINATE_JOB_FLOW',
179
+ :hadoop_jar_step => {
180
+ :jar => 's3n://elasticmapreduce/samples/cloudburst/cloudburst.jar',
181
+ :args => [
182
+ 's3n://elasticmapreduce/samples/cloudburst/input/s_suis.br',
183
+ 's3n://elasticmapreduce/samples/cloudburst/input/100k.br',
184
+ 's3n://slif_hadoop_test/cloudburst/output/2011-12-09',
185
+ ],
186
+ },
187
+ :name => 'Elasticity Custom Jar Step (s3n://elasticmapreduce/samples/cloudburst/cloudburst.jar)'
188
+ }
189
+ ]
190
+ }).and_return('CUSTOM_JAR_JOBFLOW_ID')
191
+
192
+ custom_jar_jobflow.run.should == 'CUSTOM_JAR_JOBFLOW_ID'
193
+ end
194
+
195
+ end
196
+
197
+ end
@@ -1,149 +1,380 @@
1
- require 'spec_helper'
2
-
3
1
  describe Elasticity::JobFlow do
4
2
 
5
3
  before do
6
- describe_jobflows_xml = <<-JOBFLOWS
7
- <DescribeJobFlowsResponse xmlns="http://elasticmapreduce.amazonaws.com/doc/2009-03-31">
8
- <DescribeJobFlowsResult>
9
- <JobFlows>
10
- <member>
11
- <JobFlowId>j-p</JobFlowId>
12
- <Name>Pig Job</Name>
13
- <ExecutionStatusDetail>
14
- <CreationDateTime>
15
- 2011-10-04T21:49:16Z
16
- </CreationDateTime>
17
- <LastStateChangeReason>
18
- Steps completed with errors
19
- </LastStateChangeReason>
20
- <StartDateTime>
21
- 2011-10-04T21:49:17Z
22
- </StartDateTime>
23
- <ReadyDateTime>
24
- 2011-10-04T21:49:18Z
25
- </ReadyDateTime>
26
- <State>TERMINATED</State>
27
- </ExecutionStatusDetail>
28
- <Steps>
29
- <member>
30
- <StepConfig>
31
- <Name>Setup Hive</Name>
32
- </StepConfig>
33
- <ExecutionStatusDetail>
34
- <State>FAILED</State>
35
- </ExecutionStatusDetail>
36
- </member>
37
- <member>
38
- <StepConfig>
39
- <Name>Run Hive Script</Name>
40
- </StepConfig>
41
- <ExecutionStatusDetail>
42
- <State>PENDING</State>
43
- </ExecutionStatusDetail>
44
- </member>
45
- </Steps>
46
- <Instances>
47
- <Placement>
48
- <AvailabilityZone>
49
- eu-west-1a
50
- </AvailabilityZone>
51
- </Placement>
52
- <SlaveInstanceType>
53
- m1.small
54
- </SlaveInstanceType>
55
- <MasterInstanceType>
56
- m1.small
57
- </MasterInstanceType>
58
- <Ec2KeyName>
59
- myec2keyname
60
- </Ec2KeyName>
61
- <InstanceCount>
62
- 4
63
- </InstanceCount>
64
- </Instances>
65
- </member>
66
- <member>
67
- <JobFlowId>j-h</JobFlowId>
68
- <Name>Hive Job</Name>
69
- <ExecutionStatusDetail>
70
- <CreationDateTime>
71
- 2011-10-04T22:49:16Z
72
- </CreationDateTime>
73
- <StartDateTime>
74
-
75
- </StartDateTime>
76
- <ReadyDateTime>
77
-
78
- </ReadyDateTime>
79
- <State>
80
- TERMINATED
81
- </State>
82
- <LastStateChangeReason>
83
- Steps completed
84
- </LastStateChangeReason>
85
- </ExecutionStatusDetail>
86
- <Instances>
87
- <Placement>
88
- <AvailabilityZone>
89
- eu-west-1b
90
- </AvailabilityZone>
91
- </Placement>
92
- <SlaveInstanceType>
93
- c1.medium
94
- </SlaveInstanceType>
95
- <MasterInstanceType>
96
- c1.medium
97
- </MasterInstanceType>
98
- <Ec2KeyName>
99
- myec2keyname
100
- </Ec2KeyName>
101
- <InstanceCount>
102
- 2
103
- </InstanceCount>
104
- </Instances>
105
- </member>
106
- </JobFlows>
107
- </DescribeJobFlowsResult>
108
- </DescribeJobFlowsResponse>
109
- JOBFLOWS
110
- describe_jobflows_document = Nokogiri::XML(describe_jobflows_xml)
111
- describe_jobflows_document.remove_namespaces!
112
- @members_nodeset = describe_jobflows_document.xpath('/DescribeJobFlowsResponse/DescribeJobFlowsResult/JobFlows/member')
4
+ # Ensure we don't accidentally submit to EMR for all of these examples
5
+ Elasticity::EMR.stub(:new).and_return(double('Elasticity::EMR', :run_job_flow => '_'))
6
+ end
7
+
8
+ subject do
9
+ Elasticity::JobFlow.new('access', 'secret')
10
+ end
11
+
12
+ its(:action_on_failure) { should == 'TERMINATE_JOB_FLOW' }
13
+ its(:ec2_key_name) { should == 'default' }
14
+ its(:hadoop_version) { should == '0.20.205' }
15
+ its(:instance_count) { should == 2 }
16
+ its(:log_uri) { should == nil }
17
+ its(:master_instance_type) { should == 'm1.small' }
18
+ its(:name) { should == 'Elasticity Job Flow' }
19
+ its(:slave_instance_type) { should == 'm1.small' }
20
+ its(:ami_version) { should == 'latest' }
21
+ its(:keep_job_flow_alive_when_no_steps) { should == false }
22
+
23
+ describe '#instance_count=' do
24
+
25
+ context 'when set to more than 1' do
26
+ it 'should set the number of instances' do
27
+ subject.instance_count = 10
28
+ subject.instance_count.should == 10
29
+ end
30
+ end
31
+
32
+ context 'when set to less than 2' do
33
+ it 'should be an error' do
34
+ expect {
35
+ subject.instance_count = 1
36
+ }.to raise_error(ArgumentError, 'Instance count cannot be set to less than 2 (requested 1)')
37
+ end
38
+ end
39
+
40
+ end
41
+
42
+ describe '#add_bootstrap_action' do
43
+
44
+ context 'when the jobflow is not yet started' do
45
+ it 'should not raise an error' do
46
+ expect {
47
+ subject.add_bootstrap_action(nil)
48
+ }.to_not raise_error
49
+ end
50
+ end
51
+
52
+ context 'when the jobflow is already started' do
53
+ before do
54
+ subject.add_step(Elasticity::CustomJarStep.new('_'))
55
+ subject.run
56
+ end
57
+ it 'should raise an error' do
58
+ expect {
59
+ subject.add_bootstrap_action(nil)
60
+ }.to raise_error(Elasticity::JobFlowRunningError, 'To modify bootstrap actions, please create a new job flow.')
61
+ end
62
+ end
63
+
64
+ end
65
+
66
+ describe '#add_step' do
67
+
68
+ context 'when the jobflow is already running' do
69
+
70
+ let(:emr) { double('Elasticity::EMR', :run_job_flow => 'RUNNING_JOBFLOW_ID') }
71
+
72
+ let(:running_jobflow) do
73
+ Elasticity::JobFlow.new('access', 'secret').tap do |jf|
74
+ jf.add_step(Elasticity::PigStep.new('_'))
75
+ end
76
+ end
77
+
78
+ before do
79
+ Elasticity::EMR.should_receive(:new).with('access', 'secret').and_return(emr)
80
+ running_jobflow.run
81
+ end
82
+
83
+ context 'when the step requires installation' do
84
+
85
+ context 'when the installation has already happened' do
86
+ let(:additional_step) { Elasticity::PigStep.new('_') }
87
+
88
+ it 'should submit the step' do
89
+ emr.should_receive(:add_jobflow_steps).with('RUNNING_JOBFLOW_ID', {
90
+ :steps => [additional_step.to_aws_step(running_jobflow)]
91
+ })
92
+ running_jobflow.add_step(additional_step)
93
+ end
94
+ end
95
+
96
+ context 'when the installation has not yet happened' do
97
+ let(:additional_step) { Elasticity::HiveStep.new('_') }
98
+
99
+ it 'should submit the installation step and the step' do
100
+ emr.should_receive(:add_jobflow_steps).with('RUNNING_JOBFLOW_ID', {
101
+ :steps => [
102
+ Elasticity::HiveStep.aws_installation_step,
103
+ additional_step.to_aws_step(running_jobflow)
104
+ ]
105
+ })
106
+ running_jobflow.add_step(additional_step)
107
+ end
108
+ end
109
+
110
+ end
111
+
112
+ context 'when the step does not require installation' do
113
+
114
+ let(:additional_step) { Elasticity::CustomJarStep.new('jar') }
115
+
116
+ it 'should submit the step' do
117
+ emr.should_receive(:add_jobflow_steps).with('RUNNING_JOBFLOW_ID', {
118
+ :steps => [additional_step.to_aws_step(running_jobflow)]
119
+ })
120
+ running_jobflow.add_step(additional_step)
121
+ end
122
+
123
+ end
124
+
125
+ end
126
+
127
+ context 'when the jobflow is not yet running' do
128
+ # This behaviour is tested in #jobflow_config
129
+ end
130
+ end
131
+
132
+ describe '#jobflow_config' do
133
+
134
+ it 'should incorporate the job flow preamble' do
135
+ subject.stub(:jobflow_preamble).and_return({:preamble => 'PREAMBLE'})
136
+ subject.send(:jobflow_config).should be_a_hash_including({:preamble => 'PREAMBLE'})
137
+ end
138
+
139
+ describe 'steps' do
140
+
141
+ let(:jobflow_steps) { [Elasticity::HiveStep.new('script.hql'), Elasticity::PigStep.new('script.pig'), Elasticity::CustomJarStep.new('script.jar')] }
142
+ let(:jobflow_with_steps) do
143
+ Elasticity::JobFlow.new('_', '_').tap do |jobflow|
144
+ jobflow_steps.each { |s| jobflow.add_step(s) }
145
+ end
146
+ end
147
+ let(:aws_steps) do
148
+ [
149
+ Elasticity::HiveStep.aws_installation_step,
150
+ jobflow_steps[0].to_aws_step(jobflow_with_steps),
151
+ Elasticity::PigStep.aws_installation_step,
152
+ jobflow_steps[1].to_aws_step(jobflow_with_steps),
153
+ jobflow_steps[2].to_aws_step(jobflow_with_steps),
154
+ ]
155
+ end
156
+
157
+ it 'should incorporate the installation and run steps into the jobflow config' do
158
+ jobflow_with_steps.send(:jobflow_config).should be_a_hash_including({:steps => aws_steps})
159
+ end
160
+
161
+ context 'when there are more than one installable step of the same type' do
162
+ before do
163
+ jobflow_steps << Elasticity::HiveStep.new('script.hql')
164
+ aws_steps << jobflow_steps.last.to_aws_step(jobflow_with_steps)
165
+ end
166
+ it 'should not include the installation step more than once' do
167
+ jobflow_with_steps.send(:jobflow_config).should be_a_hash_including({:steps => aws_steps})
168
+ end
169
+ end
170
+
171
+ end
172
+
173
+ describe 'log URI' do
174
+
175
+ context 'when a log URI is specified' do
176
+ let(:jobflow_with_log_uri) do
177
+ Elasticity::JobFlow.new('_', '_').tap do |jf|
178
+ jf.log_uri = 'LOG_URI'
179
+ end
180
+ end
181
+ it 'should incorporate it into the jobflow config' do
182
+ jobflow_with_log_uri.send(:jobflow_config).should be_a_hash_including({:log_uri => 'LOG_URI'})
183
+ end
184
+ end
185
+
186
+ context 'when a log URI is not specified' do
187
+ let(:jobflow_with_no_log_uri) do
188
+ Elasticity::JobFlow.new('_', '_').tap do |jf|
189
+ jf.log_uri = nil
190
+ end
191
+ end
192
+ it 'should not make space for it in the jobflow config' do
193
+ jobflow_with_no_log_uri.send(:jobflow_config).should_not have_key(:log_uri)
194
+ end
195
+ end
196
+
197
+ end
198
+
199
+ describe 'bootstrap actions' do
200
+
201
+ context 'when bootstrap actions are specified' do
202
+ let(:hadoop_bootstrap_actions) do
203
+ [
204
+ Elasticity::HadoopBootstrapAction.new('OPTION1', 'VALUE1'),
205
+ Elasticity::HadoopBootstrapAction.new('OPTION1', 'VALUE2'),
206
+ Elasticity::HadoopBootstrapAction.new('OPTION2', 'VALUE3')
207
+ ]
208
+ end
209
+ let(:jobflow_with_bootstrap_actions) do
210
+ Elasticity::JobFlow.new('_', '_').tap do |jf|
211
+ hadoop_bootstrap_actions.each do |action|
212
+ jf.add_bootstrap_action(action)
213
+ end
214
+ end
215
+ end
216
+ it 'should include them in the jobflow config' do
217
+ bootstrap_actions = hadoop_bootstrap_actions.map { |a| a.to_aws_bootstrap_action }
218
+ jobflow_with_bootstrap_actions.send(:jobflow_config).should be_a_hash_including({
219
+ :bootstrap_actions => bootstrap_actions
220
+ })
221
+ end
222
+ end
223
+
224
+ context 'when bootstrap actions are not specified' do
225
+ it 'should not make space for them in the jobflow config' do
226
+ subject.send(:jobflow_config).should_not have_key(:bootstrap_actions)
227
+ end
228
+ end
229
+
230
+ end
231
+
113
232
  end
114
233
 
115
- describe ".from_xml" do
116
- it "should return a JobFlow with the appropriate fields initialized" do
117
- jobflow = Elasticity::JobFlow.from_member_element(@members_nodeset[0])
118
- jobflow.name.should == "Pig Job"
119
- jobflow.jobflow_id.should == "j-p"
120
- jobflow.state.should == "TERMINATED"
121
- jobflow.steps.map(&:name).should == ["Setup Hive", "Run Hive Script"]
122
- jobflow.steps.map(&:state).should == ["FAILED", "PENDING"]
123
- jobflow.created_at.should == Time.parse("2011-10-04T21:49:16Z")
124
- jobflow.started_at.should == Time.parse("2011-10-04T21:49:17Z")
125
- jobflow.ready_at.should == Time.parse("2011-10-04T21:49:18Z")
126
- jobflow.master_instance_type.should == "m1.small"
127
- jobflow.slave_instance_type.should == "m1.small"
128
- jobflow.instance_count.should == "4"
129
- jobflow.last_state_change_reason.should == "Steps completed with errors"
234
+ describe '#jobflow_preamble' do
235
+
236
+ let(:basic_preamble) do
237
+ {
238
+ :name => 'Elasticity Job Flow',
239
+ :ami_version => 'latest',
240
+ :instances => {
241
+ :keep_job_flow_alive_when_no_steps => false,
242
+ :ec2_key_name => 'default',
243
+ :hadoop_version => '0.20.205',
244
+ :instance_count => 2,
245
+ :master_instance_type => 'm1.small',
246
+ :slave_instance_type => 'm1.small',
247
+ }
248
+ }
130
249
  end
250
+
251
+ it 'should create a jobflow configuration section' do
252
+ subject.send(:jobflow_preamble).should == basic_preamble
253
+ end
254
+
255
+ context 'when a VPC subnet ID is specified' do
256
+ it 'should include it in the preamble' do
257
+ subject.ec2_subnet_id = 'subnet-118b9d79'
258
+ subject.send(:jobflow_preamble).should be_a_hash_including({:ec2_subnet_id => 'subnet-118b9d79'})
259
+ end
260
+ end
261
+
131
262
  end
132
263
 
133
- describe ".from_jobflows_nodeset" do
134
- it "should return JobFlows with the appropriate fields initialized" do
135
- jobflow = Elasticity::JobFlow.from_members_nodeset(@members_nodeset)
136
- jobflow.map(&:name).should == ["Pig Job", "Hive Job"]
137
- jobflow.map(&:jobflow_id).should == ["j-p", "j-h"]
138
- jobflow.map(&:state).should == ["TERMINATED", "TERMINATED"]
139
- jobflow.map(&:created_at).should == [Time.parse("2011-10-04T21:49:16Z"), Time.parse("2011-10-04T22:49:16Z")]
140
- jobflow.map(&:started_at).should == [Time.parse("2011-10-04T21:49:17Z"), nil]
141
- jobflow.map(&:ready_at).should == [Time.parse("2011-10-04T21:49:18Z"), nil]
142
- jobflow.map(&:master_instance_type).should == ["m1.small","c1.medium"]
143
- jobflow.map(&:slave_instance_type).should == ["m1.small", "c1.medium"]
144
- jobflow.map(&:instance_count).should == ["4","2"]
145
- jobflow.map(&:last_state_change_reason).should == ["Steps completed with errors", "Steps completed"]
264
+ describe '#run' do
265
+
266
+ context 'when there are steps added' do
267
+ let(:jobflow_with_steps) do
268
+ Elasticity::JobFlow.new('STEP_TEST_ACCESS', 'STEP_TEST_SECRET').tap do |jf|
269
+ jf.add_step(Elasticity::CustomJarStep.new('_'))
270
+ end
271
+ end
272
+
273
+ context 'when the jobflow has not yet been run' do
274
+ let(:emr) { double('Elasticity::EMR', :run_job_flow => 'JOBFLOW_ID') }
275
+
276
+ it 'should run the job with the supplied EMR credentials' do
277
+ Elasticity::EMR.should_receive(:new).with('STEP_TEST_ACCESS', 'STEP_TEST_SECRET').and_return(emr)
278
+ emr.should_receive(:run_job_flow)
279
+ jobflow_with_steps.run
280
+ end
281
+
282
+ it 'should run the job with the jobflow config' do
283
+ Elasticity::EMR.stub(:new).with('STEP_TEST_ACCESS', 'STEP_TEST_SECRET').and_return(emr)
284
+ jobflow_with_steps.stub(:jobflow_config).and_return('JOBFLOW_CONFIG')
285
+ emr.should_receive(:run_job_flow).with('JOBFLOW_CONFIG')
286
+ jobflow_with_steps.run
287
+ end
288
+
289
+ it 'should return the jobflow ID' do
290
+ Elasticity::EMR.stub(:new).with('STEP_TEST_ACCESS', 'STEP_TEST_SECRET').and_return(emr)
291
+ jobflow_with_steps.run.should == 'JOBFLOW_ID'
292
+ end
293
+
294
+ end
295
+
296
+ context 'when the jobflow has already been run' do
297
+ before do
298
+ jobflow_with_steps.run
299
+ end
300
+ it 'should raise an error' do
301
+ expect {
302
+ jobflow_with_steps.run
303
+ }.to raise_error(Elasticity::JobFlowRunningError, 'Cannot run a job flow multiple times. To do more with this job flow, please use #add_step.')
304
+ end
305
+ end
306
+
307
+ end
308
+
309
+ context 'when there are no steps added' do
310
+ let(:jobflow_with_no_steps) { Elasticity::JobFlow.new('_', '_') }
311
+ it 'should raise an error' do
312
+ expect {
313
+ jobflow_with_no_steps.run
314
+ }.to raise_error(Elasticity::JobFlowMissingStepsError, 'Cannot run a job flow without adding steps. Please use #add_step.')
315
+ end
316
+ end
317
+
318
+ end
319
+
320
+ describe '#status' do
321
+
322
+ context 'before the jobflow has been run' do
323
+ it 'should raise an error' do
324
+ expect {
325
+ subject.status
326
+ }.to raise_error(Elasticity::JobFlowNotStartedError, 'Please #run this job flow before attempting to retrieve status.')
327
+ end
328
+ end
329
+
330
+ context 'after the jobflow has been run' do
331
+ let(:emr) { double('Elasticity::EMR', :run_job_flow => 'JOBFLOW_ID') }
332
+ let(:running_jobflow) { Elasticity::JobFlow.new('_', '_') }
333
+ let(:jobflow_status) do
334
+ Elasticity::JobFlowStatus.new.tap do |js|
335
+ js.stub(:state => 'TERMINATED')
336
+ end
337
+ end
338
+ before do
339
+ Elasticity::EMR.stub(:new).and_return(emr)
340
+ running_jobflow.add_step(Elasticity::CustomJarStep.new('_'))
341
+ running_jobflow.run
342
+ end
343
+ it 'should return the AWS status' do
344
+ emr.should_receive(:describe_jobflow).with('JOBFLOW_ID').and_return(jobflow_status)
345
+ status = running_jobflow.status
346
+ status.should be_a(Elasticity::JobFlowStatus)
347
+ status.state.should == 'TERMINATED'
348
+ end
146
349
  end
350
+
351
+ end
352
+
353
+ describe '#shutdown' do
354
+
355
+ context 'when the jobflow has not yet been started' do
356
+ let(:unstarted_job_flow) { Elasticity::JobFlow.new('_', '_')}
357
+ it 'should be an error' do
358
+ expect {
359
+ unstarted_job_flow.shutdown
360
+ }.to raise_error(Elasticity::JobFlowNotStartedError, 'Cannot #shutdown a job flow that has not yet been #run.')
361
+ end
362
+ end
363
+
364
+ context 'when the jobflow has been started' do
365
+ let(:emr) { double('Elasticity::EMR', :run_job_flow => 'JOBFLOW_ID') }
366
+ let(:running_jobflow) { Elasticity::JobFlow.new('_', '_') }
367
+ before do
368
+ Elasticity::EMR.stub(:new).and_return(emr)
369
+ running_jobflow.add_step(Elasticity::CustomJarStep.new('_'))
370
+ running_jobflow.run
371
+ end
372
+ it 'should shutdown the running jobflow' do
373
+ emr.should_receive(:terminate_jobflows).with('JOBFLOW_ID')
374
+ running_jobflow.shutdown
375
+ end
376
+ end
377
+
147
378
  end
148
379
 
149
- end
380
+ end