elasticity 2.5.1 → 2.5.2

Sign up to get free protection for your applications and to get access to all the features.
data/HISTORY.md CHANGED
@@ -1,7 +1,11 @@
1
+ ## 2.5.2 - November 29, 2012
2
+
3
+ + Configuration of Hive installations via ```hive_site``` is now supported. See the README.md for details.
4
+
1
5
  ## 2.5.1 - November 28, 2012
2
6
 
3
7
  + When ```JobFlow#placement``` is specified, instances are created in that availability zone. Previously, this setting was only used to derive the EMR API endpoint to connect to (i.e. the region).
4
- + Updated development depdencies.
8
+ + Updated development dependencies.
5
9
 
6
10
  ## 2.5 - September 29, 2012
7
11
 
data/README.md CHANGED
@@ -1,5 +1,3 @@
1
- (2012-11-23) Hey all, I'm still out here. Elasticity remains part of our daily workflow at [Sharethrough](http://www.sharethrough.com/) and after a year of work, our usage patterns have stabilized. Submit an issue if you have suggestions!
2
-
3
1
  Elasticity provides programmatic access to Amazon's Elastic Map Reduce service. The aim is to conveniently abstract away the complex EMR REST API and make working with job flows more productive and more enjoyable.
4
2
 
5
3
  [![Build Status](https://secure.travis-ci.org/rslifka/elasticity.png)](http://travis-ci.org/rslifka/elasticity) REE, 1.8.7, 1.9.2, 1.9.3
@@ -323,6 +321,19 @@ If that's the case, or if you'd just like to terminate a running jobflow before
323
321
  jobflow.shutdown
324
322
  ```
325
323
 
324
+ # Elasticity Configuration
325
+
326
+ Elasticity supports a wide range of configuration options :) all of which are shown below.
327
+
328
+ ```ruby
329
+ Elasticity.configure do |config|
330
+
331
+ # If using Hive, it will be configured via the directives here
332
+ config.hive_site = 's3://bucket/hive-site.xml'
333
+
334
+ end
335
+ ```
336
+
326
337
  # Amazon EMR Documentation
327
338
 
328
339
  Elasticity wraps all of the EMR API calls. Please see the Amazon guide for details on these operations because the default values aren't obvious (e.g. the meaning of <code>DescribeJobFlows</code> without parameters).
@@ -27,4 +27,25 @@ require 'elasticity/pig_step'
27
27
  require 'elasticity/streaming_step'
28
28
 
29
29
  module Elasticity
30
+
31
+ class << self
32
+ attr_reader :configuration
33
+
34
+ def configuration
35
+ @configuration ||= Configuration.new
36
+ end
37
+
38
+ def default_configuration
39
+ @configuration = Configuration.new
40
+ end
41
+
42
+ def configure
43
+ yield(configuration)
44
+ end
45
+ end
46
+
47
+ class Configuration
48
+ attr_accessor :hive_site
49
+ end
50
+
30
51
  end
@@ -12,11 +12,11 @@ module Elasticity
12
12
  def initialize(script)
13
13
  @name = "Elasticity Hive Step (#{script})"
14
14
  @script = script
15
- @variables = { }
15
+ @variables = {}
16
16
  @action_on_failure = 'TERMINATE_JOB_FLOW'
17
17
  end
18
18
 
19
- def to_aws_step(job_flow)
19
+ def to_aws_step(_)
20
20
  args = %w(s3://elasticmapreduce/libs/hive/hive-script --base-path s3://elasticmapreduce/libs/hive/ --hive-versions latest --run-hive-script --args)
21
21
  args.concat(['-f', @script])
22
22
  @variables.keys.sort.each do |name|
@@ -36,15 +36,40 @@ module Elasticity
36
36
  true
37
37
  end
38
38
 
39
- def self.aws_installation_step
40
- {
41
- :action_on_failure => 'TERMINATE_JOB_FLOW',
42
- :hadoop_jar_step => {
43
- :jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
44
- :args => %w(s3://elasticmapreduce/libs/hive/hive-script --base-path s3://elasticmapreduce/libs/hive/ --install-hive --hive-versions latest)
45
- },
46
- :name => 'Elasticity - Install Hive'
47
- }
39
+ def self.aws_installation_step_name
40
+ 'Elasticity - Install Hive'
41
+ end
42
+
43
+ def self.aws_installation_steps
44
+ steps = [
45
+ {
46
+ :action_on_failure => 'TERMINATE_JOB_FLOW',
47
+ :hadoop_jar_step => {
48
+ :jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
49
+ :args => %w(s3://elasticmapreduce/libs/hive/hive-script --base-path s3://elasticmapreduce/libs/hive/ --install-hive --hive-versions latest)
50
+ },
51
+ :name => aws_installation_step_name
52
+ }
53
+ ]
54
+ if Elasticity.configuration.hive_site
55
+ steps << {
56
+ :action_on_failure => 'TERMINATE_JOB_FLOW',
57
+ :hadoop_jar_step => {
58
+ :jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
59
+ :args => [
60
+ 's3://elasticmapreduce/libs/hive/hive-script',
61
+ '--base-path',
62
+ 's3://elasticmapreduce/libs/hive/',
63
+ '--install-hive-site',
64
+ "--hive-site=#{Elasticity.configuration.hive_site}",
65
+ '--hive-versions',
66
+ 'latest'
67
+ ],
68
+ },
69
+ :name => 'Elasticity - Configure Hive via Hive Site'
70
+ }
71
+ end
72
+ steps
48
73
  end
49
74
 
50
75
  end
@@ -98,7 +98,7 @@ module Elasticity
98
98
  if is_jobflow_running?
99
99
  jobflow_steps = []
100
100
  if jobflow_step.requires_installation? && !@installed_steps.include?(jobflow_step.class)
101
- jobflow_steps << jobflow_step.aws_installation_step
101
+ jobflow_steps.concat(jobflow_step.aws_installation_steps)
102
102
  end
103
103
  jobflow_steps << jobflow_step.to_aws_step(self)
104
104
  emr.add_jobflow_steps(@jobflow_id, {:steps => jobflow_steps})
@@ -172,7 +172,7 @@ module Elasticity
172
172
  steps = []
173
173
  @jobflow_steps.each do |step|
174
174
  if step.class.send(:requires_installation?) && !@installed_steps.include?(step.class)
175
- steps << step.class.send(:aws_installation_step)
175
+ steps.concat(step.class.send(:aws_installation_steps))
176
176
  @installed_steps << step.class
177
177
  end
178
178
  steps << step.to_aws_step(self)
@@ -35,7 +35,7 @@ module Elasticity
35
35
 
36
36
  step_names = jobflow.steps.map(&:name)
37
37
  Elasticity::JobFlowStep.steps_requiring_installation.each do |step|
38
- jobflow.installed_steps << step if step_names.include?(step.aws_installation_step[:name])
38
+ jobflow.installed_steps << step if step_names.include?(step.aws_installation_step_name)
39
39
  end
40
40
 
41
41
  jobflow.created_at = Time.parse(xml_element.xpath('./ExecutionStatusDetail/CreationDateTime').text.strip)
@@ -12,8 +12,12 @@ module Elasticity
12
12
  self.class.requires_installation?
13
13
  end
14
14
 
15
- def aws_installation_step
16
- self.class.aws_installation_step
15
+ def aws_installation_steps
16
+ self.class.aws_installation_steps
17
+ end
18
+
19
+ def aws_installation_step_name
20
+ self.class.aws_installation_step_name
17
21
  end
18
22
 
19
23
  module ClassMethods
@@ -22,10 +26,14 @@ module Elasticity
22
26
  false
23
27
  end
24
28
 
25
- def aws_installation_step
29
+ def aws_installation_steps
26
30
  raise RuntimeError, '.aws_installation_step is required to be defined when a step requires installation (e.g. Pig, Hive).'
27
31
  end
28
32
 
33
+ def aws_installation_step_name
34
+ raise RuntimeError, '.aws_installation_step_name is required to be defined when a step requires installation (e.g. Pig, Hive).'
35
+ end
36
+
29
37
  end
30
38
 
31
39
  def self.included(base)
@@ -12,7 +12,7 @@ module Elasticity
12
12
  def initialize(script)
13
13
  @name = "Elasticity Pig Step (#{script})"
14
14
  @script = script
15
- @variables = { }
15
+ @variables = {}
16
16
  @action_on_failure = 'TERMINATE_JOB_FLOW'
17
17
  end
18
18
 
@@ -37,20 +37,26 @@ module Elasticity
37
37
  true
38
38
  end
39
39
 
40
- def self.aws_installation_step
41
- {
42
- :action_on_failure => 'TERMINATE_JOB_FLOW',
43
- :hadoop_jar_step => {
44
- :jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
45
- :args => [
46
- 's3://elasticmapreduce/libs/pig/pig-script',
40
+ def self.aws_installation_step_name
41
+ 'Elasticity - Install Pig'
42
+ end
43
+
44
+ def self.aws_installation_steps
45
+ [
46
+ {
47
+ :action_on_failure => 'TERMINATE_JOB_FLOW',
48
+ :hadoop_jar_step => {
49
+ :jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
50
+ :args => [
51
+ 's3://elasticmapreduce/libs/pig/pig-script',
47
52
  '--base-path',
48
53
  's3://elasticmapreduce/libs/pig/',
49
54
  '--install-pig'
50
- ],
51
- },
52
- :name => 'Elasticity - Install Pig'
53
- }
55
+ ],
56
+ },
57
+ :name => aws_installation_step_name
58
+ }
59
+ ]
54
60
  end
55
61
 
56
62
  private
@@ -1,3 +1,3 @@
1
1
  module Elasticity
2
- VERSION = '2.5.1'
2
+ VERSION = '2.5.2'
3
3
  end
@@ -55,10 +55,10 @@ describe Elasticity::HiveStep do
55
55
  end
56
56
  end
57
57
 
58
- describe '.aws_installation_step' do
58
+ describe '.aws_installation_steps' do
59
59
 
60
- it 'should provide a means to install Hive' do
61
- Elasticity::HiveStep.aws_installation_step.should == {
60
+ let(:install_hive_step) do
61
+ {
62
62
  :action_on_failure => 'TERMINATE_JOB_FLOW',
63
63
  :hadoop_jar_step => {
64
64
  :jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
@@ -75,6 +75,42 @@ describe Elasticity::HiveStep do
75
75
  }
76
76
  end
77
77
 
78
+ let(:configure_hive_step) do
79
+ {
80
+ :action_on_failure => 'TERMINATE_JOB_FLOW',
81
+ :hadoop_jar_step => {
82
+ :jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
83
+ :args => [
84
+ 's3://elasticmapreduce/libs/hive/hive-script',
85
+ '--base-path',
86
+ 's3://elasticmapreduce/libs/hive/',
87
+ '--install-hive-site',
88
+ '--hive-site=s3://TEST/hive-site.xml',
89
+ '--hive-versions',
90
+ 'latest'
91
+ ],
92
+ },
93
+ :name => 'Elasticity - Configure Hive via Hive Site'
94
+ }
95
+ end
96
+
97
+ context 'when a hive site configuration file is not specified' do
98
+ it 'should specify how to install Hive' do
99
+ Elasticity::HiveStep.aws_installation_steps.should == [install_hive_step]
100
+ end
101
+ end
102
+
103
+ context 'when a hive site configuration file is specified' do
104
+ before do
105
+ Elasticity.configure do |config|
106
+ config.hive_site = 's3://TEST/hive-site.xml'
107
+ end
108
+ end
109
+ it 'should specify how to install Hive' do
110
+ Elasticity::HiveStep.aws_installation_steps.should == [install_hive_step, configure_hive_step]
111
+ end
112
+ end
113
+
78
114
  end
79
115
 
80
116
  end
@@ -161,10 +161,7 @@ describe Elasticity::JobFlow do
161
161
 
162
162
  it 'should submit the installation step and the step' do
163
163
  emr.should_receive(:add_jobflow_steps).with('RUNNING_JOBFLOW_ID', {
164
- :steps => [
165
- Elasticity::HiveStep.aws_installation_step,
166
- additional_step.to_aws_step(running_jobflow)
167
- ]
164
+ :steps => Elasticity::HiveStep.aws_installation_steps << additional_step.to_aws_step(running_jobflow)
168
165
  })
169
166
  running_jobflow.add_step(additional_step)
170
167
  end
@@ -209,12 +206,12 @@ describe Elasticity::JobFlow do
209
206
  end
210
207
  let(:aws_steps) do
211
208
  [
212
- Elasticity::HiveStep.aws_installation_step,
209
+ Elasticity::HiveStep.aws_installation_steps,
213
210
  jobflow_steps[0].to_aws_step(jobflow_with_steps),
214
- Elasticity::PigStep.aws_installation_step,
211
+ Elasticity::PigStep.aws_installation_steps,
215
212
  jobflow_steps[1].to_aws_step(jobflow_with_steps),
216
213
  jobflow_steps[2].to_aws_step(jobflow_with_steps),
217
- ]
214
+ ].flatten
218
215
  end
219
216
 
220
217
  it 'should incorporate the installation and run steps into the jobflow config' do
@@ -265,8 +262,8 @@ describe Elasticity::JobFlow do
265
262
  let(:hadoop_bootstrap_actions) do
266
263
  [
267
264
  Elasticity::HadoopBootstrapAction.new('OPTION1', 'VALUE1'),
268
- Elasticity::HadoopBootstrapAction.new('OPTION1', 'VALUE2'),
269
- Elasticity::HadoopBootstrapAction.new('OPTION2', 'VALUE3')
265
+ Elasticity::HadoopBootstrapAction.new('OPTION1', 'VALUE2'),
266
+ Elasticity::HadoopBootstrapAction.new('OPTION2', 'VALUE3')
270
267
  ]
271
268
  end
272
269
  let(:jobflow_with_bootstrap_actions) do
@@ -306,12 +303,12 @@ describe Elasticity::JobFlow do
306
303
  :instance_type => 'm1.small',
307
304
  :market => 'ON_DEMAND',
308
305
  },
309
- {
310
- :instance_count => 1,
311
- :instance_role => 'CORE',
312
- :instance_type => 'm1.small',
313
- :market => 'ON_DEMAND'
314
- },
306
+ {
307
+ :instance_count => 1,
308
+ :instance_role => 'CORE',
309
+ :instance_type => 'm1.small',
310
+ :market => 'ON_DEMAND'
311
+ },
315
312
  ]
316
313
  end
317
314
 
@@ -33,18 +33,33 @@ describe Elasticity::JobFlowStep do
33
33
  end
34
34
  end
35
35
 
36
- describe '.aws_installation_step' do
36
+ describe '#aws_installation_step_name' do
37
+ it 'should delegate to the class method' do
38
+ FakeStep.should_receive(:aws_installation_step_name).and_return('AWS_INSTALLATION_STEP_NAME')
39
+ subject.aws_installation_step_name.should == 'AWS_INSTALLATION_STEP_NAME'
40
+ end
41
+ end
42
+
43
+ describe '.aws_installation_step_name' do
37
44
  it 'should raise an error by default' do
38
45
  expect {
39
- FakeStep.aws_installation_step
40
- }.to raise_error(RuntimeError, '.aws_installation_step is required to be defined when a step requires installation (e.g. Pig, Hive).')
46
+ FakeStep.aws_installation_step_name
47
+ }.to raise_error(RuntimeError, '.aws_installation_step_name is required to be defined when a step requires installation (e.g. Pig, Hive).')
41
48
  end
42
49
  end
43
50
 
44
- describe '#aws_installation_step' do
51
+ describe '#aws_installation_steps' do
45
52
  it 'should delegate to the class method' do
46
- FakeStep.should_receive(:aws_installation_step).and_return('AWS_INSTALLATION_STEP')
47
- subject.aws_installation_step.should == 'AWS_INSTALLATION_STEP'
53
+ FakeStep.should_receive(:aws_installation_steps).and_return('AWS_INSTALLATION_STEPS')
54
+ subject.aws_installation_steps.should == 'AWS_INSTALLATION_STEPS'
55
+ end
56
+ end
57
+
58
+ describe '.aws_installation_steps' do
59
+ it 'should raise an error by default' do
60
+ expect {
61
+ FakeStep.aws_installation_steps
62
+ }.to raise_error(RuntimeError, '.aws_installation_step is required to be defined when a step requires installation (e.g. Pig, Hive).')
48
63
  end
49
64
  end
50
65
 
@@ -8,7 +8,7 @@ describe Elasticity::PigStep do
8
8
 
9
9
  its(:name) { should == 'Elasticity Pig Step (script.pig)' }
10
10
  its(:script) { should == 'script.pig' }
11
- its(:variables) { should == { } }
11
+ its(:variables) { should == {} }
12
12
  its(:action_on_failure) { should == 'TERMINATE_JOB_FLOW' }
13
13
 
14
14
  describe '#to_aws_step' do
@@ -20,9 +20,9 @@ describe Elasticity::PigStep do
20
20
  step[:hadoop_jar_step][:jar].should == 's3://elasticmapreduce/libs/script-runner/script-runner.jar'
21
21
  step[:hadoop_jar_step][:args].should start_with([
22
22
  's3://elasticmapreduce/libs/pig/pig-script',
23
- '--run-pig-script',
24
- '--args',
25
- '-p'
23
+ '--run-pig-script',
24
+ '--args',
25
+ '-p'
26
26
  ])
27
27
  step[:hadoop_jar_step][:args][4] =~ /^E_PARALLELS=\d+$/
28
28
  end
@@ -68,9 +68,9 @@ describe Elasticity::PigStep do
68
68
  step = ps_with_variables.to_aws_step(Elasticity::JobFlow.new('access', 'secret'))
69
69
  step[:hadoop_jar_step][:args][3..9].should == [
70
70
  '-p', 'VAR1=VALUE1',
71
- '-p', 'VAR2=VALUE2',
72
- '-p', 'E_PARALLELS=1',
73
- 'script.pig'
71
+ '-p', 'VAR2=VALUE2',
72
+ '-p', 'E_PARALLELS=1',
73
+ 'script.pig'
74
74
  ]
75
75
  end
76
76
  end
@@ -83,22 +83,26 @@ describe Elasticity::PigStep do
83
83
  end
84
84
  end
85
85
 
86
- describe '.aws_installation_step' do
86
+ describe '.aws_installation_steps' do
87
+
87
88
  it 'should provide a means to install Pig' do
88
- Elasticity::PigStep.aws_installation_step.should == {
89
- :action_on_failure => 'TERMINATE_JOB_FLOW',
90
- :hadoop_jar_step => {
91
- :jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
92
- :args => [
93
- 's3://elasticmapreduce/libs/pig/pig-script',
89
+ Elasticity::PigStep.aws_installation_steps.should == [
90
+ {
91
+ :action_on_failure => 'TERMINATE_JOB_FLOW',
92
+ :hadoop_jar_step => {
93
+ :jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
94
+ :args => [
95
+ 's3://elasticmapreduce/libs/pig/pig-script',
94
96
  '--base-path',
95
97
  's3://elasticmapreduce/libs/pig/',
96
98
  '--install-pig'
97
- ],
98
- },
99
- :name => 'Elasticity - Install Pig'
100
- }
99
+ ],
100
+ },
101
+ :name => 'Elasticity - Install Pig'
102
+ }
103
+ ]
101
104
  end
105
+
102
106
  end
103
107
 
104
108
  end
@@ -1,10 +1,15 @@
1
- require 'rubygems'
2
- require 'bundler/setup'
3
- require 'elasticity'
1
+ ENV['RAILS_ENV'] ||= 'test'
4
2
 
3
+ require 'elasticity'
5
4
  require 'timecop'
6
5
  require 'fakefs/spec_helpers'
7
6
 
8
- ENV['RAILS_ENV'] ||= 'test'
9
-
10
7
  Dir[File.join(File.dirname(__FILE__), 'support', '**', '*.rb')].each { |f| require f }
8
+
9
+ RSpec.configure do |config|
10
+
11
+ config.before(:each) do
12
+ Elasticity.default_configuration
13
+ end
14
+
15
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: elasticity
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.5.1
4
+ version: 2.5.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: