elasticity 2.5.3 → 2.5.5
Sign up to get free protection for your applications and to get access to all the features.
- data/HISTORY.md +8 -0
- data/README.md +11 -5
- data/elasticity.gemspec +1 -1
- data/lib/elasticity/job_flow_status.rb +26 -23
- data/lib/elasticity/streaming_step.rb +5 -3
- data/lib/elasticity/version.rb +1 -1
- data/spec/lib/elasticity/job_flow_status_spec.rb +47 -39
- data/spec/lib/elasticity/streaming_step_spec.rb +4 -3
- metadata +10 -4
data/HISTORY.md
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
## 2.5.5 - February 3, 2013
|
2
|
+
|
3
|
+
+ Pull request from [Aaron Olson](https://github.com/airolson), adding ```StreamingStep#arguments```.
|
4
|
+
|
5
|
+
## 2.5.4 - February 1, 2013
|
6
|
+
|
7
|
+
+ Pull request from [Aaron Olson](https://github.com/airolson), adding ```JobFlowStatus#normalized_instance_hours```.
|
8
|
+
|
1
9
|
## 2.5.3 - January 16, 2013
|
2
10
|
|
3
11
|
+ Added ```#visible_to_all_users``` to ```JobFlow```. Thanks to [dstumm](https://github.com/dstumm) for the contribution!
|
data/README.md
CHANGED
@@ -1,7 +1,11 @@
|
|
1
|
-
|
1
|
+
[![Gem Version](https://badge.fury.io/rb/elasticity.png)](http://badge.fury.io/rb/elasticity)
|
2
|
+
|
3
|
+
**(February 3, 2013)** Taking requests! I have a few ideas for what might be cool features though I'd rather work on what the community wants. Go ahead and file an issue!
|
2
4
|
|
3
5
|
Elasticity provides programmatic access to Amazon's Elastic Map Reduce service. The aim is to conveniently abstract away the complex EMR REST API and make working with job flows more productive and more enjoyable.
|
4
6
|
|
7
|
+
**Travis has been flaky, failing builds before they start. "Trust me", it's green :)**
|
8
|
+
|
5
9
|
[![Build Status](https://secure.travis-ci.org/rslifka/elasticity.png)](http://travis-ci.org/rslifka/elasticity) REE, 1.8.7, 1.9.2, 1.9.3
|
6
10
|
|
7
11
|
Elasticity provides two ways to access EMR:
|
@@ -256,9 +260,12 @@ jobflow.add_step(hive_step)
|
|
256
260
|
### Adding a Streaming Step
|
257
261
|
|
258
262
|
```ruby
|
259
|
-
# Input bucket, output bucket, mapper
|
263
|
+
# Input bucket, output bucket, mapper script,reducer script
|
260
264
|
streaming_step = Elasticity::StreamingStep.new('s3n://elasticmapreduce/samples/wordcount/input', 's3n://elasticityoutput/wordcount/output/2012-07-23', 's3n://elasticmapreduce/samples/wordcount/wordSplitter.py', 'aggregate')
|
261
265
|
|
266
|
+
# Optionally, include additional *arguments
|
267
|
+
# streaming_step = Elasticity::StreamingStep.new('s3n://elasticmapreduce/samples/wordcount/input', 's3n://elasticityoutput/wordcount/output/2012-07-23', 's3n://elasticmapreduce/samples/wordcount/wordSplitter.py', 'aggregate', '-arg1', 'value1')
|
268
|
+
|
262
269
|
jobflow.add_step(streaming_step)
|
263
270
|
```
|
264
271
|
|
@@ -333,7 +340,7 @@ Elasticity.configure do |config|
|
|
333
340
|
|
334
341
|
# If using Hive, it will be configured via the directives here
|
335
342
|
config.hive_site = 's3://bucket/hive-site.xml'
|
336
|
-
|
343
|
+
|
337
344
|
end
|
338
345
|
```
|
339
346
|
|
@@ -355,8 +362,7 @@ Unfortunately, the documentation is sometimes incorrect and sometimes missing.
|
|
355
362
|
|
356
363
|
* AWS signing was used from [RightScale's](http://www.rightscale.com/) amazing [right_aws gem](https://github.com/rightscale/right_aws) which works extraordinarily well! If you need access to any AWS service (EC2, S3, etc.), have a look.
|
357
364
|
* <code>camelize</code> was used from ActiveSupport to assist in converting parmeters to AWS request format.
|
358
|
-
* Thanks to
|
359
|
-
|
365
|
+
* Thanks to [Ryan Weald](https://github.com/rweald) and [Alexander Dean](https://github.com/alexanderdean) for their constant barrage of excellent suggestions :)
|
360
366
|
|
361
367
|
# License
|
362
368
|
|
data/elasticity.gemspec
CHANGED
@@ -9,7 +9,7 @@ Gem::Specification.new do |s|
|
|
9
9
|
s.authors = ['Robert Slifka']
|
10
10
|
s.homepage = 'http://www.github.com/rslifka/elasticity'
|
11
11
|
s.summary = %q{Streamlined, programmatic access to Amazon's Elastic Map Reduce service.}
|
12
|
-
s.description = %q{Streamlined,
|
12
|
+
s.description = %q{Streamlined, programmatic access to Amazon's Elastic Map Reduce service, driven by the Sharethrough team's requirements for belting out EMR jobs.}
|
13
13
|
|
14
14
|
s.add_dependency('rest-client')
|
15
15
|
s.add_dependency('nokogiri')
|
@@ -17,6 +17,7 @@ module Elasticity
|
|
17
17
|
attr_accessor :last_state_change_reason
|
18
18
|
attr_accessor :installed_steps
|
19
19
|
attr_accessor :master_public_dns_name
|
20
|
+
attr_accessor :normalized_instance_hours
|
20
21
|
|
21
22
|
def initialize
|
22
23
|
@steps = []
|
@@ -26,55 +27,57 @@ module Elasticity
|
|
26
27
|
# Create a jobflow from an AWS <member> (Nokogiri::XML::Element):
|
27
28
|
# /DescribeJobFlowsResponse/DescribeJobFlowsResult/JobFlows/member
|
28
29
|
def self.from_member_element(xml_element)
|
29
|
-
|
30
|
+
jobflow_status = JobFlowStatus.new
|
30
31
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
32
|
+
jobflow_status.name = xml_element.xpath('./Name').text.strip
|
33
|
+
jobflow_status.jobflow_id = xml_element.xpath('./JobFlowId').text.strip
|
34
|
+
jobflow_status.state = xml_element.xpath('./ExecutionStatusDetail/State').text.strip
|
35
|
+
jobflow_status.last_state_change_reason = xml_element.xpath('./ExecutionStatusDetail/LastStateChangeReason').text.strip
|
35
36
|
|
36
|
-
|
37
|
+
jobflow_status.steps = JobFlowStatusStep.from_members_nodeset(xml_element.xpath('./Steps/member'))
|
37
38
|
|
38
|
-
step_names =
|
39
|
+
step_names = jobflow_status.steps.map(&:name)
|
39
40
|
Elasticity::JobFlowStep.steps_requiring_installation.each do |step|
|
40
|
-
|
41
|
+
jobflow_status.installed_steps << step if step_names.include?(step.aws_installation_step_name)
|
41
42
|
end
|
42
43
|
|
43
|
-
|
44
|
+
jobflow_status.created_at = Time.parse(xml_element.xpath('./ExecutionStatusDetail/CreationDateTime').text.strip)
|
44
45
|
|
45
46
|
ready_at = xml_element.xpath('./ExecutionStatusDetail/ReadyDateTime').text.strip
|
46
|
-
|
47
|
+
jobflow_status.ready_at = (ready_at == '') ? (nil) : (Time.parse(ready_at))
|
47
48
|
|
48
49
|
started_at = xml_element.xpath('./ExecutionStatusDetail/StartDateTime').text.strip
|
49
|
-
|
50
|
+
jobflow_status.started_at = (started_at == '') ? (nil) : (Time.parse(started_at))
|
50
51
|
|
51
52
|
ended_at = xml_element.xpath('./ExecutionStatusDetail/EndDateTime').text.strip
|
52
|
-
|
53
|
+
jobflow_status.ended_at = (ended_at == '') ? (nil) : (Time.parse(ended_at))
|
53
54
|
|
54
|
-
if
|
55
|
-
|
55
|
+
if jobflow_status.ended_at && jobflow_status.started_at
|
56
|
+
jobflow_status.duration = ((jobflow_status.ended_at - jobflow_status.started_at) / 60).to_i
|
56
57
|
end
|
57
58
|
|
58
|
-
|
59
|
-
|
60
|
-
|
59
|
+
jobflow_status.instance_count = xml_element.xpath('./Instances/InstanceCount').text.strip
|
60
|
+
jobflow_status.master_instance_type = xml_element.xpath('./Instances/MasterInstanceType').text.strip
|
61
|
+
jobflow_status.slave_instance_type = xml_element.xpath('./Instances/SlaveInstanceType').text.strip
|
61
62
|
|
62
63
|
master_public_dns_name = xml_element.xpath('./Instances/MasterPublicDnsName').text.strip
|
63
|
-
|
64
|
+
jobflow_status.master_public_dns_name = (master_public_dns_name == '') ? (nil) : (master_public_dns_name)
|
64
65
|
|
65
|
-
|
66
|
+
jobflow_status.normalized_instance_hours = xml_element.xpath('./Instances/NormalizedInstanceHours').text.strip
|
67
|
+
|
68
|
+
jobflow_status
|
66
69
|
end
|
67
70
|
|
68
71
|
# Create JobFlows from a collection of AWS <member> nodes (Nokogiri::XML::NodeSet):
|
69
72
|
# /DescribeJobFlowsResponse/DescribeJobFlowsResult/JobFlows
|
70
73
|
def self.from_members_nodeset(members_nodeset)
|
71
|
-
|
74
|
+
jobflow_statuses = []
|
72
75
|
members_nodeset.each do |member|
|
73
|
-
|
76
|
+
jobflow_statuses << from_member_element(member)
|
74
77
|
end
|
75
|
-
|
78
|
+
jobflow_statuses
|
76
79
|
end
|
77
80
|
|
78
81
|
end
|
79
82
|
|
80
|
-
end
|
83
|
+
end
|
@@ -10,24 +10,26 @@ module Elasticity
|
|
10
10
|
attr_accessor :output_bucket
|
11
11
|
attr_accessor :mapper
|
12
12
|
attr_accessor :reducer
|
13
|
+
attr_accessor :arguments
|
13
14
|
|
14
|
-
def initialize(input_bucket, output_bucket, mapper, reducer)
|
15
|
+
def initialize(input_bucket, output_bucket, mapper, reducer, *arguments)
|
15
16
|
@name = 'Elasticity Streaming Step'
|
16
17
|
@action_on_failure = 'TERMINATE_JOB_FLOW'
|
17
18
|
@input_bucket = input_bucket
|
18
19
|
@output_bucket = output_bucket
|
19
20
|
@mapper = mapper
|
20
21
|
@reducer = reducer
|
22
|
+
@arguments = arguments || []
|
21
23
|
end
|
22
24
|
|
23
25
|
def to_aws_step(job_flow)
|
24
26
|
step = Elasticity::CustomJarStep.new('/home/hadoop/contrib/streaming/hadoop-streaming.jar')
|
25
27
|
step.name = @name
|
26
28
|
step.action_on_failure = @action_on_failure
|
27
|
-
step.arguments = ['-input', @input_bucket, '-output', @output_bucket, '-mapper', @mapper, '-reducer', @reducer]
|
29
|
+
step.arguments = ['-input', @input_bucket, '-output', @output_bucket, '-mapper', @mapper, '-reducer', @reducer] + @arguments
|
28
30
|
step.to_aws_step(job_flow)
|
29
31
|
end
|
30
32
|
|
31
33
|
end
|
32
34
|
|
33
|
-
end
|
35
|
+
end
|
data/lib/elasticity/version.rb
CHANGED
@@ -77,6 +77,9 @@ describe Elasticity::JobFlowStatus do
|
|
77
77
|
<MasterPublicDnsName>
|
78
78
|
ec2-107-22-77-99.compute-1.amazonaws.com
|
79
79
|
</MasterPublicDnsName>
|
80
|
+
<NormalizedInstanceHours>
|
81
|
+
0
|
82
|
+
</NormalizedInstanceHours>
|
80
83
|
<Placement>
|
81
84
|
<AvailabilityZone>
|
82
85
|
eu-west-1a
|
@@ -111,6 +114,9 @@ describe Elasticity::JobFlowStatus do
|
|
111
114
|
</LastStateChangeReason>
|
112
115
|
</ExecutionStatusDetail>
|
113
116
|
<Instances>
|
117
|
+
<NormalizedInstanceHours>
|
118
|
+
4
|
119
|
+
</NormalizedInstanceHours>
|
114
120
|
<Placement>
|
115
121
|
<AvailabilityZone>
|
116
122
|
eu-west-1b
|
@@ -142,53 +148,55 @@ describe Elasticity::JobFlowStatus do
|
|
142
148
|
describe_jobflows_document.xpath('/DescribeJobFlowsResponse/DescribeJobFlowsResult/JobFlows/member')
|
143
149
|
end
|
144
150
|
|
145
|
-
let(:
|
151
|
+
let(:single_jobflow_status) { Elasticity::JobFlowStatus.from_member_element(members_nodeset[0]) }
|
146
152
|
|
147
|
-
let(:
|
153
|
+
let(:multiple_jobflow_statuses) { Elasticity::JobFlowStatus.from_members_nodeset(members_nodeset) }
|
148
154
|
|
149
155
|
describe '.from_xml' do
|
150
|
-
it 'should return a
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
156
|
+
it 'should return a JobFlowStatus with the appropriate fields initialized' do
|
157
|
+
single_jobflow_status.name.should == 'Hive Job 1'
|
158
|
+
single_jobflow_status.jobflow_id.should == 'j-p'
|
159
|
+
single_jobflow_status.state.should == 'TERMINATED'
|
160
|
+
single_jobflow_status.steps.map(&:name).should == ['Elasticity - Install Hive', 'Run Hive Script']
|
161
|
+
single_jobflow_status.steps.map(&:state).should == %w(FAILED PENDING)
|
162
|
+
single_jobflow_status.created_at.should == Time.parse('2011-10-04T21:49:16Z')
|
163
|
+
single_jobflow_status.started_at.should == Time.parse('2011-10-04T21:49:17Z')
|
164
|
+
single_jobflow_status.ready_at.should == Time.parse('2011-10-04T21:49:18Z')
|
165
|
+
single_jobflow_status.ended_at.should == Time.parse('2011-10-05T21:49:18Z')
|
166
|
+
single_jobflow_status.duration.should == 1440
|
167
|
+
single_jobflow_status.master_instance_type.should == 'm1.small'
|
168
|
+
single_jobflow_status.slave_instance_type.should == 'm1.small'
|
169
|
+
single_jobflow_status.instance_count.should == '4'
|
170
|
+
single_jobflow_status.last_state_change_reason.should == 'Steps completed with errors'
|
171
|
+
single_jobflow_status.master_public_dns_name.should == 'ec2-107-22-77-99.compute-1.amazonaws.com'
|
172
|
+
single_jobflow_status.normalized_instance_hours.should == '0'
|
166
173
|
end
|
167
174
|
|
168
175
|
context 'when the jobflow never started' do
|
169
176
|
let(:started_at) {}
|
170
177
|
it 'should have a nil duration' do
|
171
|
-
|
172
|
-
|
178
|
+
single_jobflow_status.started_at.should == nil
|
179
|
+
single_jobflow_status.duration.should == nil
|
173
180
|
end
|
174
181
|
end
|
175
182
|
end
|
176
183
|
|
177
|
-
describe '.
|
178
|
-
it 'should return
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
184
|
+
describe '.from_jobflow_statuses_nodeset' do
|
185
|
+
it 'should return JobFlowStatuses with the appropriate fields initialized' do
|
186
|
+
multiple_jobflow_statuses.map(&:name).should == ['Hive Job 1', 'Hive Job 2']
|
187
|
+
multiple_jobflow_statuses.map(&:jobflow_id).should == %w(j-p j-h)
|
188
|
+
multiple_jobflow_statuses.map(&:state).should == %w(TERMINATED TERMINATED)
|
189
|
+
multiple_jobflow_statuses.map(&:created_at).should == [Time.parse('2011-10-04T21:49:16Z'), Time.parse('2011-10-04T22:49:16Z')]
|
190
|
+
multiple_jobflow_statuses.map(&:started_at).should == [Time.parse('2011-10-04T21:49:17Z'), nil]
|
191
|
+
multiple_jobflow_statuses.map(&:ready_at).should == [Time.parse('2011-10-04T21:49:18Z'), nil]
|
192
|
+
multiple_jobflow_statuses.map(&:ended_at).should == [Time.parse('2011-10-05T21:49:18Z'), nil]
|
193
|
+
multiple_jobflow_statuses.map(&:duration).should == [1440, nil]
|
194
|
+
multiple_jobflow_statuses.map(&:master_instance_type).should == %w(m1.small c1.medium)
|
195
|
+
multiple_jobflow_statuses.map(&:slave_instance_type).should == %w(m1.small c1.medium)
|
196
|
+
multiple_jobflow_statuses.map(&:instance_count).should == %w(4 2)
|
197
|
+
multiple_jobflow_statuses.map(&:last_state_change_reason).should == ['Steps completed with errors', 'Steps completed']
|
198
|
+
multiple_jobflow_statuses.map(&:master_public_dns_name).should == ['ec2-107-22-77-99.compute-1.amazonaws.com', nil]
|
199
|
+
multiple_jobflow_statuses.map(&:normalized_instance_hours).should == %w(0 4)
|
192
200
|
end
|
193
201
|
end
|
194
202
|
|
@@ -197,28 +205,28 @@ describe Elasticity::JobFlowStatus do
|
|
197
205
|
context 'when nothing has been installed' do
|
198
206
|
let(:setup_config) { }
|
199
207
|
it 'should be empty' do
|
200
|
-
|
208
|
+
single_jobflow_status.installed_steps.should == []
|
201
209
|
end
|
202
210
|
end
|
203
211
|
|
204
212
|
context 'when Hive has been installed by Elasticity' do
|
205
213
|
let(:setup_config) { hive_setup_config }
|
206
214
|
it 'should include HiveStep' do
|
207
|
-
|
215
|
+
single_jobflow_status.installed_steps.should == [Elasticity::HiveStep]
|
208
216
|
end
|
209
217
|
end
|
210
218
|
|
211
219
|
context 'when Pig has been installed by Elasticity' do
|
212
220
|
let(:setup_config) { pig_setup_config }
|
213
221
|
it 'should include PigStep' do
|
214
|
-
|
222
|
+
single_jobflow_status.installed_steps.should == [Elasticity::PigStep]
|
215
223
|
end
|
216
224
|
end
|
217
225
|
|
218
226
|
context 'when more than one step has been installed by Elasticity' do
|
219
227
|
let(:setup_config) { hive_setup_config + pig_setup_config }
|
220
228
|
it 'should include all of them' do
|
221
|
-
|
229
|
+
single_jobflow_status.installed_steps.should =~ [Elasticity::HiveStep, Elasticity::PigStep]
|
222
230
|
end
|
223
231
|
end
|
224
232
|
end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
describe Elasticity::StreamingStep do
|
2
2
|
|
3
3
|
subject do
|
4
|
-
Elasticity::StreamingStep.new('INPUT_BUCKET', 'OUTPUT_BUCKET', 'MAPPER', 'REDUCER')
|
4
|
+
Elasticity::StreamingStep.new('INPUT_BUCKET', 'OUTPUT_BUCKET', 'MAPPER', 'REDUCER', '-ARG1', 'VALUE1')
|
5
5
|
end
|
6
6
|
|
7
7
|
it { should be_a Elasticity::JobFlowStep }
|
@@ -12,6 +12,7 @@ describe Elasticity::StreamingStep do
|
|
12
12
|
its(:output_bucket) { should == 'OUTPUT_BUCKET' }
|
13
13
|
its(:mapper) { should == 'MAPPER' }
|
14
14
|
its(:reducer) { should == 'REDUCER' }
|
15
|
+
its(:arguments) { should == %w(-ARG1 VALUE1) }
|
15
16
|
|
16
17
|
describe '#to_aws_step' do
|
17
18
|
|
@@ -21,7 +22,7 @@ describe Elasticity::StreamingStep do
|
|
21
22
|
:action_on_failure => 'TERMINATE_JOB_FLOW',
|
22
23
|
:hadoop_jar_step => {
|
23
24
|
:jar => '/home/hadoop/contrib/streaming/hadoop-streaming.jar',
|
24
|
-
:args => %w(-input INPUT_BUCKET -output OUTPUT_BUCKET -mapper MAPPER -reducer REDUCER),
|
25
|
+
:args => %w(-input INPUT_BUCKET -output OUTPUT_BUCKET -mapper MAPPER -reducer REDUCER -ARG1 VALUE1),
|
25
26
|
},
|
26
27
|
}
|
27
28
|
end
|
@@ -34,4 +35,4 @@ describe Elasticity::StreamingStep do
|
|
34
35
|
end
|
35
36
|
end
|
36
37
|
|
37
|
-
end
|
38
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: elasticity
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.5.
|
4
|
+
version: 2.5.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-02-03 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rest-client
|
@@ -123,7 +123,7 @@ dependencies:
|
|
123
123
|
- - ~>
|
124
124
|
- !ruby/object:Gem::Version
|
125
125
|
version: '0.4'
|
126
|
-
description: Streamlined,
|
126
|
+
description: Streamlined, programmatic access to Amazon's Elastic Map Reduce service,
|
127
127
|
driven by the Sharethrough team's requirements for belting out EMR jobs.
|
128
128
|
email:
|
129
129
|
executables: []
|
@@ -187,15 +187,21 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
187
187
|
- - ! '>='
|
188
188
|
- !ruby/object:Gem::Version
|
189
189
|
version: '0'
|
190
|
+
segments:
|
191
|
+
- 0
|
192
|
+
hash: 4428846755123210746
|
190
193
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
191
194
|
none: false
|
192
195
|
requirements:
|
193
196
|
- - ! '>='
|
194
197
|
- !ruby/object:Gem::Version
|
195
198
|
version: '0'
|
199
|
+
segments:
|
200
|
+
- 0
|
201
|
+
hash: 4428846755123210746
|
196
202
|
requirements: []
|
197
203
|
rubyforge_project:
|
198
|
-
rubygems_version: 1.8.
|
204
|
+
rubygems_version: 1.8.25
|
199
205
|
signing_key:
|
200
206
|
specification_version: 3
|
201
207
|
summary: Streamlined, programmatic access to Amazon's Elastic Map Reduce service.
|