elasticity 6.0.10 → 6.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/HISTORY.md +5 -0
- data/README.md +36 -2
- data/lib/elasticity.rb +2 -0
- data/lib/elasticity/scalding_step.rb +32 -0
- data/lib/elasticity/spark_step.rb +44 -0
- data/lib/elasticity/version.rb +1 -1
- data/spec/lib/elasticity/aws_request_v4_spec.rb +1 -1
- data/spec/lib/elasticity/scalding_step_spec.rb +40 -0
- data/spec/lib/elasticity/spark_step_spec.rb +67 -0
- metadata +8 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2f61a0e2148b8a7f75fe8c28516fbc777a57c566
|
4
|
+
data.tar.gz: b2913d92acd6c595eb9ef2637be9649f2a42c996
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f524779e497a2a325d7e48ee5a08333e67f9e2c28d897bcab9beaa2f21d715b1e85b2830f6abddab3c3320db15e308fbf87bff2ba99d9af803ba16603b3dc7f9
|
7
|
+
data.tar.gz: 3d81e2b0e7f23b10ee8cd937e843e5ffd16a739a75c9300a13b505746d535e15e9e38f2d699fd0381d95d044df97d840c74c74e78d4c660729cdd08530b64f0c
|
data/HISTORY.md
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
## 6.0.11 - February 7, 2017
|
2
|
+
|
3
|
+
- Including PR [#134](https://github.com/rslifka/elasticity/pull/134) - "Spark step". Thank you [@BenFradet](https://github.com/BenFradet)!
|
4
|
+
- Including PR [#133](https://github.com/rslifka/elasticity/pull/133) - "Scalding step". Thank you [@BenFradet](https://github.com/BenFradet)!
|
5
|
+
|
1
6
|
## 6.0.10 - January 16, 2017
|
2
7
|
|
3
8
|
- Including PR [#132](https://github.com/rslifka/elasticity/pull/132) - "Fix size_in_gb not being parsed correctly for EBS volumes". Thank you [@jbeemster](https://github.com/jbeemster)!
|
data/README.md
CHANGED
@@ -141,7 +141,7 @@ jobflow.additional_slave_security_groups = ['sg-1111', 'sg-2222']
|
|
141
141
|
With the release of EMR 4.0.0 you can now supply applications which EMR will install for you on boot(rather than a manual bootstrap action. Which you can still use if required). You must set the `release_label` for the jobflow(>=4.0.0)
|
142
142
|
|
143
143
|
```ruby
|
144
|
-
jobflow.release_label = '4.3.0'
|
144
|
+
jobflow.release_label = '4.3.0'
|
145
145
|
# the simple way
|
146
146
|
jobflow.add_application("Spark") # Pig, Hive, Mahout
|
147
147
|
# more verbose
|
@@ -341,7 +341,41 @@ jobflow.add_step(copy_step)
|
|
341
341
|
|
342
342
|
# For AMI < 4.x you need to specifify legacy argument
|
343
343
|
copy_step = Elasticity::S3DistCpStep.new(true)
|
344
|
+
```
|
345
|
+
|
346
|
+
### Adding a Scalding Step
|
347
|
+
|
348
|
+
```ruby
|
349
|
+
scalding_step = Elasticity::ScaldingStep.new('jar_location', 'main_class_fqcn', { 'arg1' => 'value1' })
|
350
|
+
|
351
|
+
jobflow.add_step(scalding_step)
|
352
|
+
```
|
353
|
+
|
354
|
+
This will result in the following command line arguments:
|
355
|
+
|
356
|
+
```bash
|
357
|
+
main_class_fqcn --hdfs --arg1 value1
|
358
|
+
```
|
359
|
+
|
360
|
+
### Adding a Spark Step
|
361
|
+
|
362
|
+
```ruby
|
363
|
+
spark_step = Elasticity::SparkStep.new('jar_location', 'main_class_fqcn')
|
344
364
|
|
365
|
+
# Specifying arguments relative to Spark
|
366
|
+
spark_step.spark_arguments = { 'driver-memory' => '2G' }
|
367
|
+
# Specifying arguments relative to your application
|
368
|
+
spark_step.app_arguments = { 'arg1' => 'value1' }
|
369
|
+
```
|
370
|
+
|
371
|
+
This will be equivalent to the following script:
|
372
|
+
|
373
|
+
```bash
|
374
|
+
spark-submit \
|
375
|
+
--driver-memory 2G \
|
376
|
+
--class main_class_fqcn \
|
377
|
+
jar_location \
|
378
|
+
--arg1 value1
|
345
379
|
```
|
346
380
|
|
347
381
|
## 7 - Upload Assets (optional)
|
@@ -422,7 +456,7 @@ Elasticity.configure do |config|
|
|
422
456
|
# AWS credentials
|
423
457
|
config.access_key = ENV['AWS_ACCESS_KEY_ID']
|
424
458
|
config.secret_key = ENV['AWS_SECRET_ACCESS_KEY']
|
425
|
-
|
459
|
+
|
426
460
|
# if you use federated Identity Management
|
427
461
|
#config.security_token = ENV['AWS_SECURITY_TOKEN']
|
428
462
|
|
data/lib/elasticity.rb
CHANGED
@@ -0,0 +1,32 @@
|
|
1
|
+
module Elasticity
|
2
|
+
|
3
|
+
class ScaldingStep
|
4
|
+
|
5
|
+
include Elasticity::JobFlowStep
|
6
|
+
|
7
|
+
attr_accessor :name
|
8
|
+
attr_accessor :action_on_failure
|
9
|
+
attr_accessor :jar
|
10
|
+
attr_accessor :arguments
|
11
|
+
|
12
|
+
def initialize(jar, main_class, args)
|
13
|
+
@name = 'Elasticity Scalding Step'
|
14
|
+
@action_on_failure = 'TERMINATE_JOB_FLOW'
|
15
|
+
@jar = jar
|
16
|
+
@arguments = [ main_class, '--hdfs' ]
|
17
|
+
args.each do |arg, value|
|
18
|
+
@arguments << "--#{arg}" << value
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def to_aws_step(job_flow)
|
23
|
+
step = Elasticity::CustomJarStep.new(@jar)
|
24
|
+
step.name = @name
|
25
|
+
step.action_on_failure = @action_on_failure
|
26
|
+
step.arguments = @arguments
|
27
|
+
step.to_aws_step(job_flow)
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module Elasticity
|
2
|
+
|
3
|
+
class SparkStep
|
4
|
+
|
5
|
+
include Elasticity::JobFlowStep
|
6
|
+
|
7
|
+
attr_accessor :name
|
8
|
+
attr_accessor :main_class
|
9
|
+
attr_accessor :jar
|
10
|
+
attr_accessor :spark_arguments
|
11
|
+
attr_accessor :app_arguments
|
12
|
+
attr_accessor :action_on_failure
|
13
|
+
|
14
|
+
def initialize(jar, main_class)
|
15
|
+
@name = 'Elasticity Spark Step'
|
16
|
+
@main_class = main_class
|
17
|
+
@jar = jar
|
18
|
+
@spark_arguments = {}
|
19
|
+
@app_arguments = {}
|
20
|
+
@action_on_failure = 'TERMINATE_JOB_FLOW'
|
21
|
+
end
|
22
|
+
|
23
|
+
def to_aws_step(_)
|
24
|
+
args = %W(spark-submit --class #{@main_class})
|
25
|
+
spark_arguments.each do |arg, value|
|
26
|
+
args << "--#{arg}" << value
|
27
|
+
end
|
28
|
+
args.push(@jar)
|
29
|
+
app_arguments.each do |arg, value|
|
30
|
+
args << "--#{arg}" << value
|
31
|
+
end
|
32
|
+
{
|
33
|
+
:name => @name,
|
34
|
+
:action_on_failure => @action_on_failure,
|
35
|
+
:hadoop_jar_step => {
|
36
|
+
:jar => 'command-runner.jar',
|
37
|
+
:args => args
|
38
|
+
}
|
39
|
+
}
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
data/lib/elasticity/version.rb
CHANGED
@@ -123,7 +123,7 @@ describe Elasticity::AwsRequestV4 do
|
|
123
123
|
|
124
124
|
describe '.aws_v4_signature' do
|
125
125
|
it 'should create the proper signature' do
|
126
|
-
subject.send(:aws_v4_signature).should == '
|
126
|
+
subject.send(:aws_v4_signature).should == '9fcb4107e8346b2b92a4a2c56de98ed80fdd87d3f9514e728adce76390c5b267'
|
127
127
|
end
|
128
128
|
end
|
129
129
|
|
@@ -0,0 +1,40 @@
|
|
1
|
+
describe Elasticity::ScaldingStep do
|
2
|
+
|
3
|
+
subject do
|
4
|
+
Elasticity::ScaldingStep.new('jar', 'class', { 'key' => 'value' })
|
5
|
+
end
|
6
|
+
|
7
|
+
it { should be_a Elasticity::JobFlowStep }
|
8
|
+
|
9
|
+
describe '.initialize' do
|
10
|
+
it 'should set the fields appropriately' do
|
11
|
+
expect(subject.name).to eql('Elasticity Scalding Step')
|
12
|
+
expect(subject.action_on_failure).to eql('TERMINATE_JOB_FLOW')
|
13
|
+
expect(subject.jar).to eql('jar')
|
14
|
+
expect(subject.arguments).to eql(['class', '--hdfs', '--key', 'value'])
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
describe '#to_aws_step' do
|
19
|
+
|
20
|
+
it { should respond_to(:to_aws_step).with(1).argument }
|
21
|
+
|
22
|
+
it 'should convert to aws step format' do
|
23
|
+
subject.to_aws_step(Elasticity::JobFlow.new).should == {
|
24
|
+
:name => 'Elasticity Scalding Step',
|
25
|
+
:action_on_failure => 'TERMINATE_JOB_FLOW',
|
26
|
+
:hadoop_jar_step => {
|
27
|
+
:jar => 'jar',
|
28
|
+
:args => %w(class --hdfs --key value)
|
29
|
+
}
|
30
|
+
}
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
describe '.requires_installation?' do
|
35
|
+
it 'should not require installation' do
|
36
|
+
expect(Elasticity::ScaldingStep.requires_installation?).to be false
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
describe Elasticity::SparkStep do
|
2
|
+
|
3
|
+
subject do
|
4
|
+
Elasticity::SparkStep.new('jar', 'class')
|
5
|
+
end
|
6
|
+
|
7
|
+
it { should be_a Elasticity::JobFlowStep }
|
8
|
+
|
9
|
+
describe '.initialize' do
|
10
|
+
it 'should set the fields appropriately' do
|
11
|
+
expect(subject.name).to eql('Elasticity Spark Step')
|
12
|
+
expect(subject.jar).to eql('jar')
|
13
|
+
expect(subject.main_class).to eql('class')
|
14
|
+
expect(subject.spark_arguments).to eql({})
|
15
|
+
expect(subject.app_arguments).to eql({})
|
16
|
+
expect(subject.action_on_failure).to eql('TERMINATE_JOB_FLOW')
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
describe '#to_aws_step' do
|
21
|
+
|
22
|
+
it { should respond_to(:to_aws_step).with(1).argument }
|
23
|
+
|
24
|
+
context 'when there are no arguments provided' do
|
25
|
+
let(:ss_with_no_args) { Elasticity::SparkStep.new('jar', 'class') }
|
26
|
+
|
27
|
+
it 'should convert to aws step format' do
|
28
|
+
ss_with_no_args.to_aws_step(Elasticity::JobFlow.new).should == {
|
29
|
+
:name => 'Elasticity Spark Step',
|
30
|
+
:action_on_failure => 'TERMINATE_JOB_FLOW',
|
31
|
+
:hadoop_jar_step => {
|
32
|
+
:jar => 'command-runner.jar',
|
33
|
+
:args => %w(spark-submit --class class jar)
|
34
|
+
}
|
35
|
+
}
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
context 'when there are arguments provided' do
|
40
|
+
let(:ss_with_args) do
|
41
|
+
Elasticity::SparkStep.new('jar', 'class').tap do |ss|
|
42
|
+
ss.spark_arguments = { 'key1' => 'value1' }
|
43
|
+
ss.app_arguments = { 'key2' => 'value2' }
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'should convert to aws step format' do
|
48
|
+
ss_with_args.to_aws_step(Elasticity::JobFlow.new).should == {
|
49
|
+
:name => 'Elasticity Spark Step',
|
50
|
+
:action_on_failure => 'TERMINATE_JOB_FLOW',
|
51
|
+
:hadoop_jar_step => {
|
52
|
+
:jar => 'command-runner.jar',
|
53
|
+
:args => %w(spark-submit --class class --key1 value1 jar --key2 value2)
|
54
|
+
}
|
55
|
+
}
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
|
61
|
+
describe '.requires_installation?' do
|
62
|
+
it 'should not require installation' do
|
63
|
+
expect(Elasticity::SparkStep.requires_installation?).to be false
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: elasticity
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 6.0.
|
4
|
+
version: 6.0.11
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Robert Slifka
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-02-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rest-client
|
@@ -161,8 +161,10 @@ files:
|
|
161
161
|
- lib/elasticity/looper.rb
|
162
162
|
- lib/elasticity/pig_step.rb
|
163
163
|
- lib/elasticity/s3distcp_step.rb
|
164
|
+
- lib/elasticity/scalding_step.rb
|
164
165
|
- lib/elasticity/script_step.rb
|
165
166
|
- lib/elasticity/setup_hadoop_debugging_step.rb
|
167
|
+
- lib/elasticity/spark_step.rb
|
166
168
|
- lib/elasticity/streaming_step.rb
|
167
169
|
- lib/elasticity/sync_to_s3.rb
|
168
170
|
- lib/elasticity/version.rb
|
@@ -188,8 +190,10 @@ files:
|
|
188
190
|
- spec/lib/elasticity/looper_spec.rb
|
189
191
|
- spec/lib/elasticity/pig_step_spec.rb
|
190
192
|
- spec/lib/elasticity/s3distcp_step_spec.rb
|
193
|
+
- spec/lib/elasticity/scalding_step_spec.rb
|
191
194
|
- spec/lib/elasticity/script_step_spec.rb
|
192
195
|
- spec/lib/elasticity/setup_hadoop_debugging_step_spec.rb
|
196
|
+
- spec/lib/elasticity/spark_step_spec.rb
|
193
197
|
- spec/lib/elasticity/streaming_step_spec.rb
|
194
198
|
- spec/lib/elasticity/sync_to_s3_spec.rb
|
195
199
|
- spec/spec_helper.rb
|
@@ -241,8 +245,10 @@ test_files:
|
|
241
245
|
- spec/lib/elasticity/looper_spec.rb
|
242
246
|
- spec/lib/elasticity/pig_step_spec.rb
|
243
247
|
- spec/lib/elasticity/s3distcp_step_spec.rb
|
248
|
+
- spec/lib/elasticity/scalding_step_spec.rb
|
244
249
|
- spec/lib/elasticity/script_step_spec.rb
|
245
250
|
- spec/lib/elasticity/setup_hadoop_debugging_step_spec.rb
|
251
|
+
- spec/lib/elasticity/spark_step_spec.rb
|
246
252
|
- spec/lib/elasticity/streaming_step_spec.rb
|
247
253
|
- spec/lib/elasticity/sync_to_s3_spec.rb
|
248
254
|
- spec/spec_helper.rb
|