elasticity 1.5 → 2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.rspec +2 -1
- data/.rvmrc +1 -1
- data/HISTORY.md +47 -24
- data/LICENSE +1 -1
- data/README.md +165 -317
- data/Rakefile +4 -3
- data/elasticity.gemspec +3 -5
- data/lib/elasticity.rb +10 -5
- data/lib/elasticity/aws_request.rb +81 -20
- data/lib/elasticity/custom_jar_step.rb +33 -0
- data/lib/elasticity/emr.rb +45 -117
- data/lib/elasticity/hadoop_bootstrap_action.rb +27 -0
- data/lib/elasticity/hive_step.rb +57 -0
- data/lib/elasticity/job_flow.rb +109 -39
- data/lib/elasticity/job_flow_status.rb +53 -0
- data/lib/elasticity/job_flow_status_step.rb +35 -0
- data/lib/elasticity/job_flow_step.rb +17 -25
- data/lib/elasticity/pig_step.rb +82 -0
- data/lib/elasticity/support/conditional_raise.rb +23 -0
- data/lib/elasticity/version.rb +1 -1
- data/spec/lib/elasticity/aws_request_spec.rb +159 -51
- data/spec/lib/elasticity/custom_jar_step_spec.rb +59 -0
- data/spec/lib/elasticity/emr_spec.rb +231 -762
- data/spec/lib/elasticity/hadoop_bootstrap_action_spec.rb +26 -0
- data/spec/lib/elasticity/hive_step_spec.rb +74 -0
- data/spec/lib/elasticity/job_flow_integration_spec.rb +197 -0
- data/spec/lib/elasticity/job_flow_spec.rb +369 -138
- data/spec/lib/elasticity/job_flow_status_spec.rb +147 -0
- data/spec/lib/elasticity/job_flow_status_step_spec.rb +73 -0
- data/spec/lib/elasticity/job_flow_step_spec.rb +26 -64
- data/spec/lib/elasticity/pig_step_spec.rb +104 -0
- data/spec/lib/elasticity/support/conditional_raise_spec.rb +35 -0
- data/spec/spec_helper.rb +1 -50
- data/spec/support/be_a_hash_including_matcher.rb +35 -0
- metadata +101 -119
- data/.autotest +0 -2
- data/lib/elasticity/custom_jar_job.rb +0 -38
- data/lib/elasticity/hive_job.rb +0 -69
- data/lib/elasticity/pig_job.rb +0 -109
- data/lib/elasticity/simple_job.rb +0 -51
- data/spec/fixtures/vcr_cassettes/add_instance_groups/one_group_successful.yml +0 -44
- data/spec/fixtures/vcr_cassettes/add_instance_groups/one_group_unsuccessful.yml +0 -41
- data/spec/fixtures/vcr_cassettes/add_jobflow_steps/add_multiple_steps.yml +0 -266
- data/spec/fixtures/vcr_cassettes/custom_jar_job/cloudburst.yml +0 -41
- data/spec/fixtures/vcr_cassettes/describe_jobflows/all_jobflows.yml +0 -75
- data/spec/fixtures/vcr_cassettes/direct/terminate_jobflow.yml +0 -38
- data/spec/fixtures/vcr_cassettes/hive_job/hive_ads.yml +0 -41
- data/spec/fixtures/vcr_cassettes/modify_instance_groups/set_instances_to_3.yml +0 -38
- data/spec/fixtures/vcr_cassettes/pig_job/apache_log_reports.yml +0 -41
- data/spec/fixtures/vcr_cassettes/pig_job/apache_log_reports_with_bootstrap.yml +0 -41
- data/spec/fixtures/vcr_cassettes/run_jobflow/word_count.yml +0 -41
- data/spec/fixtures/vcr_cassettes/set_termination_protection/nonexistent_job_flows.yml +0 -41
- data/spec/fixtures/vcr_cassettes/set_termination_protection/protect_multiple_job_flows.yml +0 -38
- data/spec/fixtures/vcr_cassettes/terminate_jobflows/one_jobflow.yml +0 -38
- data/spec/lib/elasticity/custom_jar_job_spec.rb +0 -118
- data/spec/lib/elasticity/hive_job_spec.rb +0 -90
- data/spec/lib/elasticity/pig_job_spec.rb +0 -226
@@ -0,0 +1,26 @@
|
|
1
|
+
describe Elasticity::HadoopBootstrapAction do
|
2
|
+
|
3
|
+
subject do
|
4
|
+
Elasticity::HadoopBootstrapAction.new('option', 'value')
|
5
|
+
end
|
6
|
+
|
7
|
+
its(:name) { should == 'Elasticity Bootstrap Action (Configure Hadoop)' }
|
8
|
+
its(:option) { should == 'option' }
|
9
|
+
its(:value) { should == 'value' }
|
10
|
+
|
11
|
+
describe '#to_aws_bootstrap_action' do
|
12
|
+
|
13
|
+
it 'should create a bootstrap action' do
|
14
|
+
subject.to_aws_bootstrap_action.should ==
|
15
|
+
{
|
16
|
+
:name => 'Elasticity Bootstrap Action (Configure Hadoop)',
|
17
|
+
:script_bootstrap_action => {
|
18
|
+
:path => 's3n://elasticmapreduce/bootstrap-actions/configure-hadoop',
|
19
|
+
:args => ['option', 'value']
|
20
|
+
}
|
21
|
+
}
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
describe Elasticity::HiveStep do
|
2
|
+
|
3
|
+
subject do
|
4
|
+
Elasticity::HiveStep.new('script.hql')
|
5
|
+
end
|
6
|
+
|
7
|
+
it { should be_a Elasticity::JobFlowStep }
|
8
|
+
|
9
|
+
its(:name) { should == 'Elasticity Hive Step (script.hql)' }
|
10
|
+
its(:script) { should == 'script.hql' }
|
11
|
+
its(:variables) { should == { } }
|
12
|
+
its(:action_on_failure) { should == 'TERMINATE_JOB_FLOW' }
|
13
|
+
|
14
|
+
describe '#to_aws_step' do
|
15
|
+
|
16
|
+
it 'should convert to aws step format' do
|
17
|
+
step = subject.to_aws_step(Elasticity::JobFlow.new('access', 'secret'))
|
18
|
+
step[:name].should == 'Elasticity Hive Step (script.hql)'
|
19
|
+
step[:action_on_failure].should == 'TERMINATE_JOB_FLOW'
|
20
|
+
step[:hadoop_jar_step][:jar].should == 's3://elasticmapreduce/libs/script-runner/script-runner.jar'
|
21
|
+
step[:hadoop_jar_step][:args].should start_with([
|
22
|
+
's3://elasticmapreduce/libs/hive/hive-script',
|
23
|
+
'--run-hive-script',
|
24
|
+
'--args',
|
25
|
+
'-f',
|
26
|
+
'script.hql'
|
27
|
+
])
|
28
|
+
end
|
29
|
+
|
30
|
+
context 'when variables are provided' do
|
31
|
+
let(:hs_with_variables) do
|
32
|
+
Elasticity::HiveStep.new('script.pig').tap do |hs|
|
33
|
+
hs.variables = {
|
34
|
+
'VAR1' => 'VALUE1',
|
35
|
+
'VAR2' => 'VALUE2'
|
36
|
+
}
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
it 'should convert to aws step format' do
|
41
|
+
step = hs_with_variables.to_aws_step(Elasticity::JobFlow.new('access', 'secret'))
|
42
|
+
step[:hadoop_jar_step][:args][5..9].should == %w(-d VAR1=VALUE1 -d VAR2=VALUE2)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
|
48
|
+
describe '.requires_installation?' do
|
49
|
+
it 'should require installation' do
|
50
|
+
Elasticity::HiveStep.requires_installation?.should be_true
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
describe '.aws_installation_step' do
|
55
|
+
|
56
|
+
it 'should provide a means to install Hive' do
|
57
|
+
Elasticity::HiveStep.aws_installation_step.should == {
|
58
|
+
:action_on_failure => 'TERMINATE_JOB_FLOW',
|
59
|
+
:hadoop_jar_step => {
|
60
|
+
:jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
|
61
|
+
:args => [
|
62
|
+
's3://elasticmapreduce/libs/hive/hive-script',
|
63
|
+
'--base-path',
|
64
|
+
's3://elasticmapreduce/libs/hive/',
|
65
|
+
'--install-hive'
|
66
|
+
],
|
67
|
+
},
|
68
|
+
:name => 'Elasticity - Install Hive'
|
69
|
+
}
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
@@ -0,0 +1,197 @@
|
|
1
|
+
describe 'Elasticity::JobFlow Integration Examples' do
|
2
|
+
|
3
|
+
let(:emr) { double('Elasticity::EMR') }
|
4
|
+
|
5
|
+
before do
|
6
|
+
Elasticity::EMR.should_receive(:new).with('access', 'secret').and_return(emr)
|
7
|
+
end
|
8
|
+
|
9
|
+
describe 'Hive' do
|
10
|
+
|
11
|
+
let(:hive_step) do
|
12
|
+
Elasticity::HiveStep.new('s3n://slif-hive/test.q').tap do |hs|
|
13
|
+
hs.variables = {'OUTPUT' => 's3n://slif-test/output'}
|
14
|
+
hs.action_on_failure = 'CONTINUE'
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
let(:hive_jobflow) do
|
19
|
+
Elasticity::JobFlow.new('access', 'secret').tap do |jf|
|
20
|
+
jf.log_uri = 's3n://slif-test/output/logs'
|
21
|
+
jf.add_step(hive_step)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'should launch the Hive job with the specified EMR credentials' do
|
26
|
+
emr.should_receive(:run_job_flow).with({
|
27
|
+
:name => 'Elasticity Job Flow',
|
28
|
+
:log_uri => 's3n://slif-test/output/logs',
|
29
|
+
:ami_version => 'latest',
|
30
|
+
:instances => {
|
31
|
+
:keep_job_flow_alive_when_no_steps => false,
|
32
|
+
:ec2_key_name => 'default',
|
33
|
+
:hadoop_version => '0.20.205',
|
34
|
+
:instance_count => 2,
|
35
|
+
:master_instance_type => 'm1.small',
|
36
|
+
:slave_instance_type => 'm1.small',
|
37
|
+
},
|
38
|
+
:steps => [
|
39
|
+
{
|
40
|
+
:action_on_failure => 'TERMINATE_JOB_FLOW',
|
41
|
+
:hadoop_jar_step => {
|
42
|
+
:jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
|
43
|
+
:args => [
|
44
|
+
's3://elasticmapreduce/libs/hive/hive-script',
|
45
|
+
'--base-path',
|
46
|
+
's3://elasticmapreduce/libs/hive/',
|
47
|
+
'--install-hive'
|
48
|
+
],
|
49
|
+
},
|
50
|
+
:name => 'Elasticity - Install Hive'
|
51
|
+
},
|
52
|
+
{
|
53
|
+
:action_on_failure => 'CONTINUE',
|
54
|
+
:hadoop_jar_step => {
|
55
|
+
:jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
|
56
|
+
:args => [
|
57
|
+
's3://elasticmapreduce/libs/hive/hive-script',
|
58
|
+
'--run-hive-script',
|
59
|
+
'--args',
|
60
|
+
'-f', 's3n://slif-hive/test.q',
|
61
|
+
'-d', 'OUTPUT=s3n://slif-test/output'
|
62
|
+
],
|
63
|
+
},
|
64
|
+
:name => 'Elasticity Hive Step (s3n://slif-hive/test.q)'
|
65
|
+
}
|
66
|
+
]
|
67
|
+
}).and_return('HIVE_JOBFLOW_ID')
|
68
|
+
|
69
|
+
hive_jobflow.run.should == 'HIVE_JOBFLOW_ID'
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
|
74
|
+
describe 'Pig' do
|
75
|
+
|
76
|
+
let(:pig_step) do
|
77
|
+
Elasticity::PigStep.new('s3n://slif-pig-test/test.pig').tap do |ps|
|
78
|
+
ps.variables = {'OUTPUT' => 's3n://slif-pig-test/output', 'XREFS' => 's3n://slif-pig-test/xrefs'}
|
79
|
+
ps.action_on_failure = 'CONTINUE'
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
let(:pig_jobflow) do
|
84
|
+
Elasticity::JobFlow.new('access', 'secret').tap do |jf|
|
85
|
+
jf.instance_count = 8
|
86
|
+
jf.slave_instance_type = 'm1.xlarge'
|
87
|
+
jf.log_uri = 's3n://slif-test/output/logs'
|
88
|
+
jf.add_step(pig_step)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
it 'should launch the Pig job with the specified EMR credentials' do
|
93
|
+
emr.should_receive(:run_job_flow).with({
|
94
|
+
:name => 'Elasticity Job Flow',
|
95
|
+
:log_uri => 's3n://slif-test/output/logs',
|
96
|
+
:ami_version => 'latest',
|
97
|
+
:instances => {
|
98
|
+
:keep_job_flow_alive_when_no_steps => false,
|
99
|
+
:ec2_key_name => 'default',
|
100
|
+
:hadoop_version => '0.20.205',
|
101
|
+
:instance_count => 8,
|
102
|
+
:master_instance_type => 'm1.small',
|
103
|
+
:slave_instance_type => 'm1.xlarge',
|
104
|
+
},
|
105
|
+
:steps => [
|
106
|
+
{
|
107
|
+
:action_on_failure => 'TERMINATE_JOB_FLOW',
|
108
|
+
:hadoop_jar_step => {
|
109
|
+
:jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
|
110
|
+
:args => [
|
111
|
+
's3://elasticmapreduce/libs/pig/pig-script',
|
112
|
+
'--base-path',
|
113
|
+
's3://elasticmapreduce/libs/pig/',
|
114
|
+
'--install-pig'
|
115
|
+
],
|
116
|
+
},
|
117
|
+
:name => 'Elasticity - Install Pig'
|
118
|
+
},
|
119
|
+
{
|
120
|
+
:action_on_failure => 'CONTINUE',
|
121
|
+
:hadoop_jar_step => {
|
122
|
+
:jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
|
123
|
+
:args => [
|
124
|
+
's3://elasticmapreduce/libs/pig/pig-script',
|
125
|
+
'--run-pig-script',
|
126
|
+
'--args',
|
127
|
+
'-p', 'OUTPUT=s3n://slif-pig-test/output',
|
128
|
+
'-p', 'XREFS=s3n://slif-pig-test/xrefs',
|
129
|
+
'-p', 'E_PARALLELS=26',
|
130
|
+
's3n://slif-pig-test/test.pig'
|
131
|
+
],
|
132
|
+
},
|
133
|
+
:name => 'Elasticity Pig Step (s3n://slif-pig-test/test.pig)'
|
134
|
+
}
|
135
|
+
]
|
136
|
+
}).and_return('PIG_JOBFLOW_ID')
|
137
|
+
|
138
|
+
pig_jobflow.run.should == 'PIG_JOBFLOW_ID'
|
139
|
+
end
|
140
|
+
|
141
|
+
end
|
142
|
+
|
143
|
+
describe 'Custom Jar' do
|
144
|
+
|
145
|
+
let(:custom_jar_step) do
|
146
|
+
Elasticity::CustomJarStep.new('s3n://elasticmapreduce/samples/cloudburst/cloudburst.jar').tap do |cj|
|
147
|
+
cj.arguments = [
|
148
|
+
's3n://elasticmapreduce/samples/cloudburst/input/s_suis.br',
|
149
|
+
's3n://elasticmapreduce/samples/cloudburst/input/100k.br',
|
150
|
+
's3n://slif_hadoop_test/cloudburst/output/2011-12-09',
|
151
|
+
]
|
152
|
+
cj.action_on_failure = 'TERMINATE_JOB_FLOW'
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
let(:custom_jar_jobflow) do
|
157
|
+
Elasticity::JobFlow.new('access', 'secret').tap do |jf|
|
158
|
+
jf.log_uri = 's3n://slif-test/output/logs'
|
159
|
+
jf.add_step(custom_jar_step)
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
it 'should launch the Custom Jar job with the specified EMR credentials' do
|
164
|
+
emr.should_receive(:run_job_flow).with({
|
165
|
+
:name => 'Elasticity Job Flow',
|
166
|
+
:log_uri => 's3n://slif-test/output/logs',
|
167
|
+
:ami_version => 'latest',
|
168
|
+
:instances => {
|
169
|
+
:keep_job_flow_alive_when_no_steps => false,
|
170
|
+
:ec2_key_name => 'default',
|
171
|
+
:hadoop_version => '0.20.205',
|
172
|
+
:instance_count => 2,
|
173
|
+
:master_instance_type => 'm1.small',
|
174
|
+
:slave_instance_type => 'm1.small',
|
175
|
+
},
|
176
|
+
:steps => [
|
177
|
+
{
|
178
|
+
:action_on_failure => 'TERMINATE_JOB_FLOW',
|
179
|
+
:hadoop_jar_step => {
|
180
|
+
:jar => 's3n://elasticmapreduce/samples/cloudburst/cloudburst.jar',
|
181
|
+
:args => [
|
182
|
+
's3n://elasticmapreduce/samples/cloudburst/input/s_suis.br',
|
183
|
+
's3n://elasticmapreduce/samples/cloudburst/input/100k.br',
|
184
|
+
's3n://slif_hadoop_test/cloudburst/output/2011-12-09',
|
185
|
+
],
|
186
|
+
},
|
187
|
+
:name => 'Elasticity Custom Jar Step (s3n://elasticmapreduce/samples/cloudburst/cloudburst.jar)'
|
188
|
+
}
|
189
|
+
]
|
190
|
+
}).and_return('CUSTOM_JAR_JOBFLOW_ID')
|
191
|
+
|
192
|
+
custom_jar_jobflow.run.should == 'CUSTOM_JAR_JOBFLOW_ID'
|
193
|
+
end
|
194
|
+
|
195
|
+
end
|
196
|
+
|
197
|
+
end
|
@@ -1,149 +1,380 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
1
|
describe Elasticity::JobFlow do
|
4
2
|
|
5
3
|
before do
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
4
|
+
# Ensure we don't accidentally submit to EMR for all of these examples
|
5
|
+
Elasticity::EMR.stub(:new).and_return(double('Elasticity::EMR', :run_job_flow => '_'))
|
6
|
+
end
|
7
|
+
|
8
|
+
subject do
|
9
|
+
Elasticity::JobFlow.new('access', 'secret')
|
10
|
+
end
|
11
|
+
|
12
|
+
its(:action_on_failure) { should == 'TERMINATE_JOB_FLOW' }
|
13
|
+
its(:ec2_key_name) { should == 'default' }
|
14
|
+
its(:hadoop_version) { should == '0.20.205' }
|
15
|
+
its(:instance_count) { should == 2 }
|
16
|
+
its(:log_uri) { should == nil }
|
17
|
+
its(:master_instance_type) { should == 'm1.small' }
|
18
|
+
its(:name) { should == 'Elasticity Job Flow' }
|
19
|
+
its(:slave_instance_type) { should == 'm1.small' }
|
20
|
+
its(:ami_version) { should == 'latest' }
|
21
|
+
its(:keep_job_flow_alive_when_no_steps) { should == false }
|
22
|
+
|
23
|
+
describe '#instance_count=' do
|
24
|
+
|
25
|
+
context 'when set to more than 1' do
|
26
|
+
it 'should set the number of instances' do
|
27
|
+
subject.instance_count = 10
|
28
|
+
subject.instance_count.should == 10
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
context 'when set to less than 2' do
|
33
|
+
it 'should be an error' do
|
34
|
+
expect {
|
35
|
+
subject.instance_count = 1
|
36
|
+
}.to raise_error(ArgumentError, 'Instance count cannot be set to less than 2 (requested 1)')
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
describe '#add_bootstrap_action' do
|
43
|
+
|
44
|
+
context 'when the jobflow is not yet started' do
|
45
|
+
it 'should not raise an error' do
|
46
|
+
expect {
|
47
|
+
subject.add_bootstrap_action(nil)
|
48
|
+
}.to_not raise_error
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
context 'when the jobflow is already started' do
|
53
|
+
before do
|
54
|
+
subject.add_step(Elasticity::CustomJarStep.new('_'))
|
55
|
+
subject.run
|
56
|
+
end
|
57
|
+
it 'should raise an error' do
|
58
|
+
expect {
|
59
|
+
subject.add_bootstrap_action(nil)
|
60
|
+
}.to raise_error(Elasticity::JobFlowRunningError, 'To modify bootstrap actions, please create a new job flow.')
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
describe '#add_step' do
|
67
|
+
|
68
|
+
context 'when the jobflow is already running' do
|
69
|
+
|
70
|
+
let(:emr) { double('Elasticity::EMR', :run_job_flow => 'RUNNING_JOBFLOW_ID') }
|
71
|
+
|
72
|
+
let(:running_jobflow) do
|
73
|
+
Elasticity::JobFlow.new('access', 'secret').tap do |jf|
|
74
|
+
jf.add_step(Elasticity::PigStep.new('_'))
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
before do
|
79
|
+
Elasticity::EMR.should_receive(:new).with('access', 'secret').and_return(emr)
|
80
|
+
running_jobflow.run
|
81
|
+
end
|
82
|
+
|
83
|
+
context 'when the step requires installation' do
|
84
|
+
|
85
|
+
context 'when the installation has already happened' do
|
86
|
+
let(:additional_step) { Elasticity::PigStep.new('_') }
|
87
|
+
|
88
|
+
it 'should submit the step' do
|
89
|
+
emr.should_receive(:add_jobflow_steps).with('RUNNING_JOBFLOW_ID', {
|
90
|
+
:steps => [additional_step.to_aws_step(running_jobflow)]
|
91
|
+
})
|
92
|
+
running_jobflow.add_step(additional_step)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
context 'when the installation has not yet happened' do
|
97
|
+
let(:additional_step) { Elasticity::HiveStep.new('_') }
|
98
|
+
|
99
|
+
it 'should submit the installation step and the step' do
|
100
|
+
emr.should_receive(:add_jobflow_steps).with('RUNNING_JOBFLOW_ID', {
|
101
|
+
:steps => [
|
102
|
+
Elasticity::HiveStep.aws_installation_step,
|
103
|
+
additional_step.to_aws_step(running_jobflow)
|
104
|
+
]
|
105
|
+
})
|
106
|
+
running_jobflow.add_step(additional_step)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|
111
|
+
|
112
|
+
context 'when the step does not require installation' do
|
113
|
+
|
114
|
+
let(:additional_step) { Elasticity::CustomJarStep.new('jar') }
|
115
|
+
|
116
|
+
it 'should submit the step' do
|
117
|
+
emr.should_receive(:add_jobflow_steps).with('RUNNING_JOBFLOW_ID', {
|
118
|
+
:steps => [additional_step.to_aws_step(running_jobflow)]
|
119
|
+
})
|
120
|
+
running_jobflow.add_step(additional_step)
|
121
|
+
end
|
122
|
+
|
123
|
+
end
|
124
|
+
|
125
|
+
end
|
126
|
+
|
127
|
+
context 'when the jobflow is not yet running' do
|
128
|
+
# This behaviour is tested in #jobflow_config
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
describe '#jobflow_config' do
|
133
|
+
|
134
|
+
it 'should incorporate the job flow preamble' do
|
135
|
+
subject.stub(:jobflow_preamble).and_return({:preamble => 'PREAMBLE'})
|
136
|
+
subject.send(:jobflow_config).should be_a_hash_including({:preamble => 'PREAMBLE'})
|
137
|
+
end
|
138
|
+
|
139
|
+
describe 'steps' do
|
140
|
+
|
141
|
+
let(:jobflow_steps) { [Elasticity::HiveStep.new('script.hql'), Elasticity::PigStep.new('script.pig'), Elasticity::CustomJarStep.new('script.jar')] }
|
142
|
+
let(:jobflow_with_steps) do
|
143
|
+
Elasticity::JobFlow.new('_', '_').tap do |jobflow|
|
144
|
+
jobflow_steps.each { |s| jobflow.add_step(s) }
|
145
|
+
end
|
146
|
+
end
|
147
|
+
let(:aws_steps) do
|
148
|
+
[
|
149
|
+
Elasticity::HiveStep.aws_installation_step,
|
150
|
+
jobflow_steps[0].to_aws_step(jobflow_with_steps),
|
151
|
+
Elasticity::PigStep.aws_installation_step,
|
152
|
+
jobflow_steps[1].to_aws_step(jobflow_with_steps),
|
153
|
+
jobflow_steps[2].to_aws_step(jobflow_with_steps),
|
154
|
+
]
|
155
|
+
end
|
156
|
+
|
157
|
+
it 'should incorporate the installation and run steps into the jobflow config' do
|
158
|
+
jobflow_with_steps.send(:jobflow_config).should be_a_hash_including({:steps => aws_steps})
|
159
|
+
end
|
160
|
+
|
161
|
+
context 'when there are more than one installable step of the same type' do
|
162
|
+
before do
|
163
|
+
jobflow_steps << Elasticity::HiveStep.new('script.hql')
|
164
|
+
aws_steps << jobflow_steps.last.to_aws_step(jobflow_with_steps)
|
165
|
+
end
|
166
|
+
it 'should not include the installation step more than once' do
|
167
|
+
jobflow_with_steps.send(:jobflow_config).should be_a_hash_including({:steps => aws_steps})
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
end
|
172
|
+
|
173
|
+
describe 'log URI' do
|
174
|
+
|
175
|
+
context 'when a log URI is specified' do
|
176
|
+
let(:jobflow_with_log_uri) do
|
177
|
+
Elasticity::JobFlow.new('_', '_').tap do |jf|
|
178
|
+
jf.log_uri = 'LOG_URI'
|
179
|
+
end
|
180
|
+
end
|
181
|
+
it 'should incorporate it into the jobflow config' do
|
182
|
+
jobflow_with_log_uri.send(:jobflow_config).should be_a_hash_including({:log_uri => 'LOG_URI'})
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
context 'when a log URI is not specified' do
|
187
|
+
let(:jobflow_with_no_log_uri) do
|
188
|
+
Elasticity::JobFlow.new('_', '_').tap do |jf|
|
189
|
+
jf.log_uri = nil
|
190
|
+
end
|
191
|
+
end
|
192
|
+
it 'should not make space for it in the jobflow config' do
|
193
|
+
jobflow_with_no_log_uri.send(:jobflow_config).should_not have_key(:log_uri)
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
end
|
198
|
+
|
199
|
+
describe 'bootstrap actions' do
|
200
|
+
|
201
|
+
context 'when bootstrap actions are specified' do
|
202
|
+
let(:hadoop_bootstrap_actions) do
|
203
|
+
[
|
204
|
+
Elasticity::HadoopBootstrapAction.new('OPTION1', 'VALUE1'),
|
205
|
+
Elasticity::HadoopBootstrapAction.new('OPTION1', 'VALUE2'),
|
206
|
+
Elasticity::HadoopBootstrapAction.new('OPTION2', 'VALUE3')
|
207
|
+
]
|
208
|
+
end
|
209
|
+
let(:jobflow_with_bootstrap_actions) do
|
210
|
+
Elasticity::JobFlow.new('_', '_').tap do |jf|
|
211
|
+
hadoop_bootstrap_actions.each do |action|
|
212
|
+
jf.add_bootstrap_action(action)
|
213
|
+
end
|
214
|
+
end
|
215
|
+
end
|
216
|
+
it 'should include them in the jobflow config' do
|
217
|
+
bootstrap_actions = hadoop_bootstrap_actions.map { |a| a.to_aws_bootstrap_action }
|
218
|
+
jobflow_with_bootstrap_actions.send(:jobflow_config).should be_a_hash_including({
|
219
|
+
:bootstrap_actions => bootstrap_actions
|
220
|
+
})
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
context 'when bootstrap actions are not specified' do
|
225
|
+
it 'should not make space for them in the jobflow config' do
|
226
|
+
subject.send(:jobflow_config).should_not have_key(:bootstrap_actions)
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
end
|
231
|
+
|
113
232
|
end
|
114
233
|
|
115
|
-
describe
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
234
|
+
describe '#jobflow_preamble' do
|
235
|
+
|
236
|
+
let(:basic_preamble) do
|
237
|
+
{
|
238
|
+
:name => 'Elasticity Job Flow',
|
239
|
+
:ami_version => 'latest',
|
240
|
+
:instances => {
|
241
|
+
:keep_job_flow_alive_when_no_steps => false,
|
242
|
+
:ec2_key_name => 'default',
|
243
|
+
:hadoop_version => '0.20.205',
|
244
|
+
:instance_count => 2,
|
245
|
+
:master_instance_type => 'm1.small',
|
246
|
+
:slave_instance_type => 'm1.small',
|
247
|
+
}
|
248
|
+
}
|
130
249
|
end
|
250
|
+
|
251
|
+
it 'should create a jobflow configuration section' do
|
252
|
+
subject.send(:jobflow_preamble).should == basic_preamble
|
253
|
+
end
|
254
|
+
|
255
|
+
context 'when a VPC subnet ID is specified' do
|
256
|
+
it 'should include it in the preamble' do
|
257
|
+
subject.ec2_subnet_id = 'subnet-118b9d79'
|
258
|
+
subject.send(:jobflow_preamble).should be_a_hash_including({:ec2_subnet_id => 'subnet-118b9d79'})
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
131
262
|
end
|
132
263
|
|
133
|
-
describe
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
jobflow
|
143
|
-
|
144
|
-
|
145
|
-
|
264
|
+
describe '#run' do
|
265
|
+
|
266
|
+
context 'when there are steps added' do
|
267
|
+
let(:jobflow_with_steps) do
|
268
|
+
Elasticity::JobFlow.new('STEP_TEST_ACCESS', 'STEP_TEST_SECRET').tap do |jf|
|
269
|
+
jf.add_step(Elasticity::CustomJarStep.new('_'))
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
context 'when the jobflow has not yet been run' do
|
274
|
+
let(:emr) { double('Elasticity::EMR', :run_job_flow => 'JOBFLOW_ID') }
|
275
|
+
|
276
|
+
it 'should run the job with the supplied EMR credentials' do
|
277
|
+
Elasticity::EMR.should_receive(:new).with('STEP_TEST_ACCESS', 'STEP_TEST_SECRET').and_return(emr)
|
278
|
+
emr.should_receive(:run_job_flow)
|
279
|
+
jobflow_with_steps.run
|
280
|
+
end
|
281
|
+
|
282
|
+
it 'should run the job with the jobflow config' do
|
283
|
+
Elasticity::EMR.stub(:new).with('STEP_TEST_ACCESS', 'STEP_TEST_SECRET').and_return(emr)
|
284
|
+
jobflow_with_steps.stub(:jobflow_config).and_return('JOBFLOW_CONFIG')
|
285
|
+
emr.should_receive(:run_job_flow).with('JOBFLOW_CONFIG')
|
286
|
+
jobflow_with_steps.run
|
287
|
+
end
|
288
|
+
|
289
|
+
it 'should return the jobflow ID' do
|
290
|
+
Elasticity::EMR.stub(:new).with('STEP_TEST_ACCESS', 'STEP_TEST_SECRET').and_return(emr)
|
291
|
+
jobflow_with_steps.run.should == 'JOBFLOW_ID'
|
292
|
+
end
|
293
|
+
|
294
|
+
end
|
295
|
+
|
296
|
+
context 'when the jobflow has already been run' do
|
297
|
+
before do
|
298
|
+
jobflow_with_steps.run
|
299
|
+
end
|
300
|
+
it 'should raise an error' do
|
301
|
+
expect {
|
302
|
+
jobflow_with_steps.run
|
303
|
+
}.to raise_error(Elasticity::JobFlowRunningError, 'Cannot run a job flow multiple times. To do more with this job flow, please use #add_step.')
|
304
|
+
end
|
305
|
+
end
|
306
|
+
|
307
|
+
end
|
308
|
+
|
309
|
+
context 'when there are no steps added' do
|
310
|
+
let(:jobflow_with_no_steps) { Elasticity::JobFlow.new('_', '_') }
|
311
|
+
it 'should raise an error' do
|
312
|
+
expect {
|
313
|
+
jobflow_with_no_steps.run
|
314
|
+
}.to raise_error(Elasticity::JobFlowMissingStepsError, 'Cannot run a job flow without adding steps. Please use #add_step.')
|
315
|
+
end
|
316
|
+
end
|
317
|
+
|
318
|
+
end
|
319
|
+
|
320
|
+
describe '#status' do
|
321
|
+
|
322
|
+
context 'before the jobflow has been run' do
|
323
|
+
it 'should raise an error' do
|
324
|
+
expect {
|
325
|
+
subject.status
|
326
|
+
}.to raise_error(Elasticity::JobFlowNotStartedError, 'Please #run this job flow before attempting to retrieve status.')
|
327
|
+
end
|
328
|
+
end
|
329
|
+
|
330
|
+
context 'after the jobflow has been run' do
|
331
|
+
let(:emr) { double('Elasticity::EMR', :run_job_flow => 'JOBFLOW_ID') }
|
332
|
+
let(:running_jobflow) { Elasticity::JobFlow.new('_', '_') }
|
333
|
+
let(:jobflow_status) do
|
334
|
+
Elasticity::JobFlowStatus.new.tap do |js|
|
335
|
+
js.stub(:state => 'TERMINATED')
|
336
|
+
end
|
337
|
+
end
|
338
|
+
before do
|
339
|
+
Elasticity::EMR.stub(:new).and_return(emr)
|
340
|
+
running_jobflow.add_step(Elasticity::CustomJarStep.new('_'))
|
341
|
+
running_jobflow.run
|
342
|
+
end
|
343
|
+
it 'should return the AWS status' do
|
344
|
+
emr.should_receive(:describe_jobflow).with('JOBFLOW_ID').and_return(jobflow_status)
|
345
|
+
status = running_jobflow.status
|
346
|
+
status.should be_a(Elasticity::JobFlowStatus)
|
347
|
+
status.state.should == 'TERMINATED'
|
348
|
+
end
|
146
349
|
end
|
350
|
+
|
351
|
+
end
|
352
|
+
|
353
|
+
describe '#shutdown' do
|
354
|
+
|
355
|
+
context 'when the jobflow has not yet been started' do
|
356
|
+
let(:unstarted_job_flow) { Elasticity::JobFlow.new('_', '_')}
|
357
|
+
it 'should be an error' do
|
358
|
+
expect {
|
359
|
+
unstarted_job_flow.shutdown
|
360
|
+
}.to raise_error(Elasticity::JobFlowNotStartedError, 'Cannot #shutdown a job flow that has not yet been #run.')
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
context 'when the jobflow has been started' do
|
365
|
+
let(:emr) { double('Elasticity::EMR', :run_job_flow => 'JOBFLOW_ID') }
|
366
|
+
let(:running_jobflow) { Elasticity::JobFlow.new('_', '_') }
|
367
|
+
before do
|
368
|
+
Elasticity::EMR.stub(:new).and_return(emr)
|
369
|
+
running_jobflow.add_step(Elasticity::CustomJarStep.new('_'))
|
370
|
+
running_jobflow.run
|
371
|
+
end
|
372
|
+
it 'should shutdown the running jobflow' do
|
373
|
+
emr.should_receive(:terminate_jobflows).with('JOBFLOW_ID')
|
374
|
+
running_jobflow.shutdown
|
375
|
+
end
|
376
|
+
end
|
377
|
+
|
147
378
|
end
|
148
379
|
|
149
|
-
end
|
380
|
+
end
|