elasticity 2.5.1 → 2.5.2
Sign up to get free protection for your applications and to get access to all the features.
- data/HISTORY.md +5 -1
- data/README.md +13 -2
- data/lib/elasticity.rb +21 -0
- data/lib/elasticity/hive_step.rb +36 -11
- data/lib/elasticity/job_flow.rb +2 -2
- data/lib/elasticity/job_flow_status.rb +1 -1
- data/lib/elasticity/job_flow_step.rb +11 -3
- data/lib/elasticity/pig_step.rb +18 -12
- data/lib/elasticity/version.rb +1 -1
- data/spec/lib/elasticity/hive_step_spec.rb +39 -3
- data/spec/lib/elasticity/job_flow_spec.rb +12 -15
- data/spec/lib/elasticity/job_flow_step_spec.rb +21 -6
- data/spec/lib/elasticity/pig_step_spec.rb +22 -18
- data/spec/spec_helper.rb +10 -5
- metadata +1 -1
data/HISTORY.md
CHANGED
@@ -1,7 +1,11 @@
|
|
1
|
+
## 2.5.2 - November 29, 2012
|
2
|
+
|
3
|
+
+ Configuration of Hive installations via ```hive_site``` is now supported. See the README.md for details.
|
4
|
+
|
1
5
|
## 2.5.1 - November 28, 2012
|
2
6
|
|
3
7
|
+ When ```JobFlow#placement``` is specified, instances are created in that availability zone. Previously, this setting was only used to derive the EMR API endpoint to connect to (i.e. the region).
|
4
|
-
+ Updated development
|
8
|
+
+ Updated development dependencies.
|
5
9
|
|
6
10
|
## 2.5 - September 29, 2012
|
7
11
|
|
data/README.md
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
(2012-11-23) Hey all, I'm still out here. Elasticity remains part of our daily workflow at [Sharethrough](http://www.sharethrough.com/) and after a year of work, our usage patterns have stabilized. Submit an issue if you have suggestions!
|
2
|
-
|
3
1
|
Elasticity provides programmatic access to Amazon's Elastic Map Reduce service. The aim is to conveniently abstract away the complex EMR REST API and make working with job flows more productive and more enjoyable.
|
4
2
|
|
5
3
|
[![Build Status](https://secure.travis-ci.org/rslifka/elasticity.png)](http://travis-ci.org/rslifka/elasticity) REE, 1.8.7, 1.9.2, 1.9.3
|
@@ -323,6 +321,19 @@ If that's the case, or if you'd just like to terminate a running jobflow before
|
|
323
321
|
jobflow.shutdown
|
324
322
|
```
|
325
323
|
|
324
|
+
# Elasticity Configuration
|
325
|
+
|
326
|
+
Elasticity supports a wide range of configuration options :) all of which are shown below.
|
327
|
+
|
328
|
+
```ruby
|
329
|
+
Elasticity.configure do |config|
|
330
|
+
|
331
|
+
# If using Hive, it will be configured via the directives here
|
332
|
+
config.hive_site = 's3://bucket/hive-site.xml'
|
333
|
+
|
334
|
+
end
|
335
|
+
```
|
336
|
+
|
326
337
|
# Amazon EMR Documentation
|
327
338
|
|
328
339
|
Elasticity wraps all of the EMR API calls. Please see the Amazon guide for details on these operations because the default values aren't obvious (e.g. the meaning of <code>DescribeJobFlows</code> without parameters).
|
data/lib/elasticity.rb
CHANGED
@@ -27,4 +27,25 @@ require 'elasticity/pig_step'
|
|
27
27
|
require 'elasticity/streaming_step'
|
28
28
|
|
29
29
|
module Elasticity
|
30
|
+
|
31
|
+
class << self
|
32
|
+
attr_reader :configuration
|
33
|
+
|
34
|
+
def configuration
|
35
|
+
@configuration ||= Configuration.new
|
36
|
+
end
|
37
|
+
|
38
|
+
def default_configuration
|
39
|
+
@configuration = Configuration.new
|
40
|
+
end
|
41
|
+
|
42
|
+
def configure
|
43
|
+
yield(configuration)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
class Configuration
|
48
|
+
attr_accessor :hive_site
|
49
|
+
end
|
50
|
+
|
30
51
|
end
|
data/lib/elasticity/hive_step.rb
CHANGED
@@ -12,11 +12,11 @@ module Elasticity
|
|
12
12
|
def initialize(script)
|
13
13
|
@name = "Elasticity Hive Step (#{script})"
|
14
14
|
@script = script
|
15
|
-
@variables = {
|
15
|
+
@variables = {}
|
16
16
|
@action_on_failure = 'TERMINATE_JOB_FLOW'
|
17
17
|
end
|
18
18
|
|
19
|
-
def to_aws_step(
|
19
|
+
def to_aws_step(_)
|
20
20
|
args = %w(s3://elasticmapreduce/libs/hive/hive-script --base-path s3://elasticmapreduce/libs/hive/ --hive-versions latest --run-hive-script --args)
|
21
21
|
args.concat(['-f', @script])
|
22
22
|
@variables.keys.sort.each do |name|
|
@@ -36,15 +36,40 @@ module Elasticity
|
|
36
36
|
true
|
37
37
|
end
|
38
38
|
|
39
|
-
def self.
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
39
|
+
def self.aws_installation_step_name
|
40
|
+
'Elasticity - Install Hive'
|
41
|
+
end
|
42
|
+
|
43
|
+
def self.aws_installation_steps
|
44
|
+
steps = [
|
45
|
+
{
|
46
|
+
:action_on_failure => 'TERMINATE_JOB_FLOW',
|
47
|
+
:hadoop_jar_step => {
|
48
|
+
:jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
|
49
|
+
:args => %w(s3://elasticmapreduce/libs/hive/hive-script --base-path s3://elasticmapreduce/libs/hive/ --install-hive --hive-versions latest)
|
50
|
+
},
|
51
|
+
:name => aws_installation_step_name
|
52
|
+
}
|
53
|
+
]
|
54
|
+
if Elasticity.configuration.hive_site
|
55
|
+
steps << {
|
56
|
+
:action_on_failure => 'TERMINATE_JOB_FLOW',
|
57
|
+
:hadoop_jar_step => {
|
58
|
+
:jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
|
59
|
+
:args => [
|
60
|
+
's3://elasticmapreduce/libs/hive/hive-script',
|
61
|
+
'--base-path',
|
62
|
+
's3://elasticmapreduce/libs/hive/',
|
63
|
+
'--install-hive-site',
|
64
|
+
"--hive-site=#{Elasticity.configuration.hive_site}",
|
65
|
+
'--hive-versions',
|
66
|
+
'latest'
|
67
|
+
],
|
68
|
+
},
|
69
|
+
:name => 'Elasticity - Configure Hive via Hive Site'
|
70
|
+
}
|
71
|
+
end
|
72
|
+
steps
|
48
73
|
end
|
49
74
|
|
50
75
|
end
|
data/lib/elasticity/job_flow.rb
CHANGED
@@ -98,7 +98,7 @@ module Elasticity
|
|
98
98
|
if is_jobflow_running?
|
99
99
|
jobflow_steps = []
|
100
100
|
if jobflow_step.requires_installation? && !@installed_steps.include?(jobflow_step.class)
|
101
|
-
jobflow_steps
|
101
|
+
jobflow_steps.concat(jobflow_step.aws_installation_steps)
|
102
102
|
end
|
103
103
|
jobflow_steps << jobflow_step.to_aws_step(self)
|
104
104
|
emr.add_jobflow_steps(@jobflow_id, {:steps => jobflow_steps})
|
@@ -172,7 +172,7 @@ module Elasticity
|
|
172
172
|
steps = []
|
173
173
|
@jobflow_steps.each do |step|
|
174
174
|
if step.class.send(:requires_installation?) && !@installed_steps.include?(step.class)
|
175
|
-
steps
|
175
|
+
steps.concat(step.class.send(:aws_installation_steps))
|
176
176
|
@installed_steps << step.class
|
177
177
|
end
|
178
178
|
steps << step.to_aws_step(self)
|
@@ -35,7 +35,7 @@ module Elasticity
|
|
35
35
|
|
36
36
|
step_names = jobflow.steps.map(&:name)
|
37
37
|
Elasticity::JobFlowStep.steps_requiring_installation.each do |step|
|
38
|
-
jobflow.installed_steps << step if step_names.include?(step.
|
38
|
+
jobflow.installed_steps << step if step_names.include?(step.aws_installation_step_name)
|
39
39
|
end
|
40
40
|
|
41
41
|
jobflow.created_at = Time.parse(xml_element.xpath('./ExecutionStatusDetail/CreationDateTime').text.strip)
|
@@ -12,8 +12,12 @@ module Elasticity
|
|
12
12
|
self.class.requires_installation?
|
13
13
|
end
|
14
14
|
|
15
|
-
def
|
16
|
-
self.class.
|
15
|
+
def aws_installation_steps
|
16
|
+
self.class.aws_installation_steps
|
17
|
+
end
|
18
|
+
|
19
|
+
def aws_installation_step_name
|
20
|
+
self.class.aws_installation_step_name
|
17
21
|
end
|
18
22
|
|
19
23
|
module ClassMethods
|
@@ -22,10 +26,14 @@ module Elasticity
|
|
22
26
|
false
|
23
27
|
end
|
24
28
|
|
25
|
-
def
|
29
|
+
def aws_installation_steps
|
26
30
|
raise RuntimeError, '.aws_installation_step is required to be defined when a step requires installation (e.g. Pig, Hive).'
|
27
31
|
end
|
28
32
|
|
33
|
+
def aws_installation_step_name
|
34
|
+
raise RuntimeError, '.aws_installation_step_name is required to be defined when a step requires installation (e.g. Pig, Hive).'
|
35
|
+
end
|
36
|
+
|
29
37
|
end
|
30
38
|
|
31
39
|
def self.included(base)
|
data/lib/elasticity/pig_step.rb
CHANGED
@@ -12,7 +12,7 @@ module Elasticity
|
|
12
12
|
def initialize(script)
|
13
13
|
@name = "Elasticity Pig Step (#{script})"
|
14
14
|
@script = script
|
15
|
-
@variables = {
|
15
|
+
@variables = {}
|
16
16
|
@action_on_failure = 'TERMINATE_JOB_FLOW'
|
17
17
|
end
|
18
18
|
|
@@ -37,20 +37,26 @@ module Elasticity
|
|
37
37
|
true
|
38
38
|
end
|
39
39
|
|
40
|
-
def self.
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
40
|
+
def self.aws_installation_step_name
|
41
|
+
'Elasticity - Install Pig'
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.aws_installation_steps
|
45
|
+
[
|
46
|
+
{
|
47
|
+
:action_on_failure => 'TERMINATE_JOB_FLOW',
|
48
|
+
:hadoop_jar_step => {
|
49
|
+
:jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
|
50
|
+
:args => [
|
51
|
+
's3://elasticmapreduce/libs/pig/pig-script',
|
47
52
|
'--base-path',
|
48
53
|
's3://elasticmapreduce/libs/pig/',
|
49
54
|
'--install-pig'
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
55
|
+
],
|
56
|
+
},
|
57
|
+
:name => aws_installation_step_name
|
58
|
+
}
|
59
|
+
]
|
54
60
|
end
|
55
61
|
|
56
62
|
private
|
data/lib/elasticity/version.rb
CHANGED
@@ -55,10 +55,10 @@ describe Elasticity::HiveStep do
|
|
55
55
|
end
|
56
56
|
end
|
57
57
|
|
58
|
-
describe '.
|
58
|
+
describe '.aws_installation_steps' do
|
59
59
|
|
60
|
-
|
61
|
-
|
60
|
+
let(:install_hive_step) do
|
61
|
+
{
|
62
62
|
:action_on_failure => 'TERMINATE_JOB_FLOW',
|
63
63
|
:hadoop_jar_step => {
|
64
64
|
:jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
|
@@ -75,6 +75,42 @@ describe Elasticity::HiveStep do
|
|
75
75
|
}
|
76
76
|
end
|
77
77
|
|
78
|
+
let(:configure_hive_step) do
|
79
|
+
{
|
80
|
+
:action_on_failure => 'TERMINATE_JOB_FLOW',
|
81
|
+
:hadoop_jar_step => {
|
82
|
+
:jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
|
83
|
+
:args => [
|
84
|
+
's3://elasticmapreduce/libs/hive/hive-script',
|
85
|
+
'--base-path',
|
86
|
+
's3://elasticmapreduce/libs/hive/',
|
87
|
+
'--install-hive-site',
|
88
|
+
'--hive-site=s3://TEST/hive-site.xml',
|
89
|
+
'--hive-versions',
|
90
|
+
'latest'
|
91
|
+
],
|
92
|
+
},
|
93
|
+
:name => 'Elasticity - Configure Hive via Hive Site'
|
94
|
+
}
|
95
|
+
end
|
96
|
+
|
97
|
+
context 'when a hive site configuration file is not specified' do
|
98
|
+
it 'should specify how to install Hive' do
|
99
|
+
Elasticity::HiveStep.aws_installation_steps.should == [install_hive_step]
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
context 'when a hive site configuration file is specified' do
|
104
|
+
before do
|
105
|
+
Elasticity.configure do |config|
|
106
|
+
config.hive_site = 's3://TEST/hive-site.xml'
|
107
|
+
end
|
108
|
+
end
|
109
|
+
it 'should specify how to install Hive' do
|
110
|
+
Elasticity::HiveStep.aws_installation_steps.should == [install_hive_step, configure_hive_step]
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
78
114
|
end
|
79
115
|
|
80
116
|
end
|
@@ -161,10 +161,7 @@ describe Elasticity::JobFlow do
|
|
161
161
|
|
162
162
|
it 'should submit the installation step and the step' do
|
163
163
|
emr.should_receive(:add_jobflow_steps).with('RUNNING_JOBFLOW_ID', {
|
164
|
-
:steps =>
|
165
|
-
Elasticity::HiveStep.aws_installation_step,
|
166
|
-
additional_step.to_aws_step(running_jobflow)
|
167
|
-
]
|
164
|
+
:steps => Elasticity::HiveStep.aws_installation_steps << additional_step.to_aws_step(running_jobflow)
|
168
165
|
})
|
169
166
|
running_jobflow.add_step(additional_step)
|
170
167
|
end
|
@@ -209,12 +206,12 @@ describe Elasticity::JobFlow do
|
|
209
206
|
end
|
210
207
|
let(:aws_steps) do
|
211
208
|
[
|
212
|
-
Elasticity::HiveStep.
|
209
|
+
Elasticity::HiveStep.aws_installation_steps,
|
213
210
|
jobflow_steps[0].to_aws_step(jobflow_with_steps),
|
214
|
-
Elasticity::PigStep.
|
211
|
+
Elasticity::PigStep.aws_installation_steps,
|
215
212
|
jobflow_steps[1].to_aws_step(jobflow_with_steps),
|
216
213
|
jobflow_steps[2].to_aws_step(jobflow_with_steps),
|
217
|
-
]
|
214
|
+
].flatten
|
218
215
|
end
|
219
216
|
|
220
217
|
it 'should incorporate the installation and run steps into the jobflow config' do
|
@@ -265,8 +262,8 @@ describe Elasticity::JobFlow do
|
|
265
262
|
let(:hadoop_bootstrap_actions) do
|
266
263
|
[
|
267
264
|
Elasticity::HadoopBootstrapAction.new('OPTION1', 'VALUE1'),
|
268
|
-
|
269
|
-
|
265
|
+
Elasticity::HadoopBootstrapAction.new('OPTION1', 'VALUE2'),
|
266
|
+
Elasticity::HadoopBootstrapAction.new('OPTION2', 'VALUE3')
|
270
267
|
]
|
271
268
|
end
|
272
269
|
let(:jobflow_with_bootstrap_actions) do
|
@@ -306,12 +303,12 @@ describe Elasticity::JobFlow do
|
|
306
303
|
:instance_type => 'm1.small',
|
307
304
|
:market => 'ON_DEMAND',
|
308
305
|
},
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
306
|
+
{
|
307
|
+
:instance_count => 1,
|
308
|
+
:instance_role => 'CORE',
|
309
|
+
:instance_type => 'm1.small',
|
310
|
+
:market => 'ON_DEMAND'
|
311
|
+
},
|
315
312
|
]
|
316
313
|
end
|
317
314
|
|
@@ -33,18 +33,33 @@ describe Elasticity::JobFlowStep do
|
|
33
33
|
end
|
34
34
|
end
|
35
35
|
|
36
|
-
describe '
|
36
|
+
describe '#aws_installation_step_name' do
|
37
|
+
it 'should delegate to the class method' do
|
38
|
+
FakeStep.should_receive(:aws_installation_step_name).and_return('AWS_INSTALLATION_STEP_NAME')
|
39
|
+
subject.aws_installation_step_name.should == 'AWS_INSTALLATION_STEP_NAME'
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
describe '.aws_installation_step_name' do
|
37
44
|
it 'should raise an error by default' do
|
38
45
|
expect {
|
39
|
-
FakeStep.
|
40
|
-
}.to raise_error(RuntimeError, '.
|
46
|
+
FakeStep.aws_installation_step_name
|
47
|
+
}.to raise_error(RuntimeError, '.aws_installation_step_name is required to be defined when a step requires installation (e.g. Pig, Hive).')
|
41
48
|
end
|
42
49
|
end
|
43
50
|
|
44
|
-
describe '#
|
51
|
+
describe '#aws_installation_steps' do
|
45
52
|
it 'should delegate to the class method' do
|
46
|
-
FakeStep.should_receive(:
|
47
|
-
subject.
|
53
|
+
FakeStep.should_receive(:aws_installation_steps).and_return('AWS_INSTALLATION_STEPS')
|
54
|
+
subject.aws_installation_steps.should == 'AWS_INSTALLATION_STEPS'
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
describe '.aws_installation_steps' do
|
59
|
+
it 'should raise an error by default' do
|
60
|
+
expect {
|
61
|
+
FakeStep.aws_installation_steps
|
62
|
+
}.to raise_error(RuntimeError, '.aws_installation_step is required to be defined when a step requires installation (e.g. Pig, Hive).')
|
48
63
|
end
|
49
64
|
end
|
50
65
|
|
@@ -8,7 +8,7 @@ describe Elasticity::PigStep do
|
|
8
8
|
|
9
9
|
its(:name) { should == 'Elasticity Pig Step (script.pig)' }
|
10
10
|
its(:script) { should == 'script.pig' }
|
11
|
-
its(:variables) { should == {
|
11
|
+
its(:variables) { should == {} }
|
12
12
|
its(:action_on_failure) { should == 'TERMINATE_JOB_FLOW' }
|
13
13
|
|
14
14
|
describe '#to_aws_step' do
|
@@ -20,9 +20,9 @@ describe Elasticity::PigStep do
|
|
20
20
|
step[:hadoop_jar_step][:jar].should == 's3://elasticmapreduce/libs/script-runner/script-runner.jar'
|
21
21
|
step[:hadoop_jar_step][:args].should start_with([
|
22
22
|
's3://elasticmapreduce/libs/pig/pig-script',
|
23
|
-
|
24
|
-
|
25
|
-
|
23
|
+
'--run-pig-script',
|
24
|
+
'--args',
|
25
|
+
'-p'
|
26
26
|
])
|
27
27
|
step[:hadoop_jar_step][:args][4] =~ /^E_PARALLELS=\d+$/
|
28
28
|
end
|
@@ -68,9 +68,9 @@ describe Elasticity::PigStep do
|
|
68
68
|
step = ps_with_variables.to_aws_step(Elasticity::JobFlow.new('access', 'secret'))
|
69
69
|
step[:hadoop_jar_step][:args][3..9].should == [
|
70
70
|
'-p', 'VAR1=VALUE1',
|
71
|
-
|
72
|
-
|
73
|
-
|
71
|
+
'-p', 'VAR2=VALUE2',
|
72
|
+
'-p', 'E_PARALLELS=1',
|
73
|
+
'script.pig'
|
74
74
|
]
|
75
75
|
end
|
76
76
|
end
|
@@ -83,22 +83,26 @@ describe Elasticity::PigStep do
|
|
83
83
|
end
|
84
84
|
end
|
85
85
|
|
86
|
-
describe '.
|
86
|
+
describe '.aws_installation_steps' do
|
87
|
+
|
87
88
|
it 'should provide a means to install Pig' do
|
88
|
-
Elasticity::PigStep.
|
89
|
-
|
90
|
-
|
91
|
-
:
|
92
|
-
|
93
|
-
|
89
|
+
Elasticity::PigStep.aws_installation_steps.should == [
|
90
|
+
{
|
91
|
+
:action_on_failure => 'TERMINATE_JOB_FLOW',
|
92
|
+
:hadoop_jar_step => {
|
93
|
+
:jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
|
94
|
+
:args => [
|
95
|
+
's3://elasticmapreduce/libs/pig/pig-script',
|
94
96
|
'--base-path',
|
95
97
|
's3://elasticmapreduce/libs/pig/',
|
96
98
|
'--install-pig'
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
99
|
+
],
|
100
|
+
},
|
101
|
+
:name => 'Elasticity - Install Pig'
|
102
|
+
}
|
103
|
+
]
|
101
104
|
end
|
105
|
+
|
102
106
|
end
|
103
107
|
|
104
108
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,10 +1,15 @@
|
|
1
|
-
|
2
|
-
require 'bundler/setup'
|
3
|
-
require 'elasticity'
|
1
|
+
ENV['RAILS_ENV'] ||= 'test'
|
4
2
|
|
3
|
+
require 'elasticity'
|
5
4
|
require 'timecop'
|
6
5
|
require 'fakefs/spec_helpers'
|
7
6
|
|
8
|
-
ENV['RAILS_ENV'] ||= 'test'
|
9
|
-
|
10
7
|
Dir[File.join(File.dirname(__FILE__), 'support', '**', '*.rb')].each { |f| require f }
|
8
|
+
|
9
|
+
RSpec.configure do |config|
|
10
|
+
|
11
|
+
config.before(:each) do
|
12
|
+
Elasticity.default_configuration
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|