elasticity 6.0.10 → 6.0.11

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: dbcf92882a9214132462533d34770d92deffa247
4
- data.tar.gz: 93d0188ecd69661ead048656600d3ee602ab9ce7
3
+ metadata.gz: 2f61a0e2148b8a7f75fe8c28516fbc777a57c566
4
+ data.tar.gz: b2913d92acd6c595eb9ef2637be9649f2a42c996
5
5
  SHA512:
6
- metadata.gz: 5742f6c8ac1fbbc2df514b9711374d9e4ec3723e6513ddfe8a61395aababcf3361e6ba6cbac3f5d7346ed5a0a8b7f5cacdbe97a2a57638c08122cdf16be0d23c
7
- data.tar.gz: 51bde4a0262b62b7ab7fdf19c0a33255700fef3eb3c50f3bb27f182a78ae73fecc2ed87518de4929ed984645240f5e83b5a73fdc6e2670f5ba58194d0b9609f5
6
+ metadata.gz: f524779e497a2a325d7e48ee5a08333e67f9e2c28d897bcab9beaa2f21d715b1e85b2830f6abddab3c3320db15e308fbf87bff2ba99d9af803ba16603b3dc7f9
7
+ data.tar.gz: 3d81e2b0e7f23b10ee8cd937e843e5ffd16a739a75c9300a13b505746d535e15e9e38f2d699fd0381d95d044df97d840c74c74e78d4c660729cdd08530b64f0c
data/HISTORY.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 6.0.11 - February 7, 2017
2
+
3
+ - Including PR [#134](https://github.com/rslifka/elasticity/pull/134) - "Spark step". Thank you [@BenFradet](https://github.com/BenFradet)!
4
+ - Including PR [#133](https://github.com/rslifka/elasticity/pull/133) - "Scalding step". Thank you [@BenFradet](https://github.com/BenFradet)!
5
+
1
6
  ## 6.0.10 - January 16, 2017
2
7
 
3
8
  - Including PR [#132](https://github.com/rslifka/elasticity/pull/132) - "Fix size_in_gb not being parsed correctly for EBS volumes". Thank you [@jbeemster](https://github.com/jbeemster)!
data/README.md CHANGED
@@ -141,7 +141,7 @@ jobflow.additional_slave_security_groups = ['sg-1111', 'sg-2222']
141
141
  With the release of EMR 4.0.0 you can now supply applications which EMR will install for you on boot(rather than a manual bootstrap action. Which you can still use if required). You must set the `release_label` for the jobflow(>=4.0.0)
142
142
 
143
143
  ```ruby
144
- jobflow.release_label = '4.3.0'
144
+ jobflow.release_label = '4.3.0'
145
145
  # the simple way
146
146
  jobflow.add_application("Spark") # Pig, Hive, Mahout
147
147
  # more verbose
@@ -341,7 +341,41 @@ jobflow.add_step(copy_step)
341
341
 
342
342
  # For AMI < 4.x you need to specifify legacy argument
343
343
  copy_step = Elasticity::S3DistCpStep.new(true)
344
+ ```
345
+
346
+ ### Adding a Scalding Step
347
+
348
+ ```ruby
349
+ scalding_step = Elasticity::ScaldingStep.new('jar_location', 'main_class_fqcn', { 'arg1' => 'value1' })
350
+
351
+ jobflow.add_step(scalding_step)
352
+ ```
353
+
354
+ This will result in the following command line arguments:
355
+
356
+ ```bash
357
+ main_class_fqcn --hdfs --arg1 value1
358
+ ```
359
+
360
+ ### Adding a Spark Step
361
+
362
+ ```ruby
363
+ spark_step = Elasticity::SparkStep.new('jar_location', 'main_class_fqcn')
344
364
 
365
+ # Specifying arguments relative to Spark
366
+ spark_step.spark_arguments = { 'driver-memory' => '2G' }
367
+ # Specifying arguments relative to your application
368
+ spark_step.app_arguments = { 'arg1' => 'value1' }
369
+ ```
370
+
371
+ This will be equivalent to the following script:
372
+
373
+ ```bash
374
+ spark-submit \
375
+ --driver-memory 2G \
376
+ --class main_class_fqcn \
377
+ jar_location \
378
+ --arg1 value1
345
379
  ```
346
380
 
347
381
  ## 7 - Upload Assets (optional)
@@ -422,7 +456,7 @@ Elasticity.configure do |config|
422
456
  # AWS credentials
423
457
  config.access_key = ENV['AWS_ACCESS_KEY_ID']
424
458
  config.secret_key = ENV['AWS_SECRET_ACCESS_KEY']
425
-
459
+
426
460
  # if you use federated Identity Management
427
461
  #config.security_token = ENV['AWS_SECURITY_TOKEN']
428
462
 
@@ -35,6 +35,8 @@ require 'elasticity/pig_step'
35
35
  require 'elasticity/streaming_step'
36
36
  require 'elasticity/script_step'
37
37
  require 'elasticity/s3distcp_step'
38
+ require 'elasticity/scalding_step'
39
+ require 'elasticity/spark_step'
38
40
 
39
41
  module Elasticity
40
42
 
@@ -0,0 +1,32 @@
1
+ module Elasticity
2
+
3
+ class ScaldingStep
4
+
5
+ include Elasticity::JobFlowStep
6
+
7
+ attr_accessor :name
8
+ attr_accessor :action_on_failure
9
+ attr_accessor :jar
10
+ attr_accessor :arguments
11
+
12
+ def initialize(jar, main_class, args)
13
+ @name = 'Elasticity Scalding Step'
14
+ @action_on_failure = 'TERMINATE_JOB_FLOW'
15
+ @jar = jar
16
+ @arguments = [ main_class, '--hdfs' ]
17
+ args.each do |arg, value|
18
+ @arguments << "--#{arg}" << value
19
+ end
20
+ end
21
+
22
+ def to_aws_step(job_flow)
23
+ step = Elasticity::CustomJarStep.new(@jar)
24
+ step.name = @name
25
+ step.action_on_failure = @action_on_failure
26
+ step.arguments = @arguments
27
+ step.to_aws_step(job_flow)
28
+ end
29
+
30
+ end
31
+
32
+ end
@@ -0,0 +1,44 @@
1
+ module Elasticity
2
+
3
+ class SparkStep
4
+
5
+ include Elasticity::JobFlowStep
6
+
7
+ attr_accessor :name
8
+ attr_accessor :main_class
9
+ attr_accessor :jar
10
+ attr_accessor :spark_arguments
11
+ attr_accessor :app_arguments
12
+ attr_accessor :action_on_failure
13
+
14
+ def initialize(jar, main_class)
15
+ @name = 'Elasticity Spark Step'
16
+ @main_class = main_class
17
+ @jar = jar
18
+ @spark_arguments = {}
19
+ @app_arguments = {}
20
+ @action_on_failure = 'TERMINATE_JOB_FLOW'
21
+ end
22
+
23
+ def to_aws_step(_)
24
+ args = %W(spark-submit --class #{@main_class})
25
+ spark_arguments.each do |arg, value|
26
+ args << "--#{arg}" << value
27
+ end
28
+ args.push(@jar)
29
+ app_arguments.each do |arg, value|
30
+ args << "--#{arg}" << value
31
+ end
32
+ {
33
+ :name => @name,
34
+ :action_on_failure => @action_on_failure,
35
+ :hadoop_jar_step => {
36
+ :jar => 'command-runner.jar',
37
+ :args => args
38
+ }
39
+ }
40
+ end
41
+
42
+ end
43
+
44
+ end
@@ -1,3 +1,3 @@
1
1
  module Elasticity
2
- VERSION = '6.0.10'
2
+ VERSION = '6.0.11'
3
3
  end
@@ -123,7 +123,7 @@ describe Elasticity::AwsRequestV4 do
123
123
 
124
124
  describe '.aws_v4_signature' do
125
125
  it 'should create the proper signature' do
126
- subject.send(:aws_v4_signature).should == 'aee8cb25939b4262213612d3410b3033107f97134beadc91563b532c54cf3ab3'
126
+ subject.send(:aws_v4_signature).should == '9fcb4107e8346b2b92a4a2c56de98ed80fdd87d3f9514e728adce76390c5b267'
127
127
  end
128
128
  end
129
129
 
@@ -0,0 +1,40 @@
1
+ describe Elasticity::ScaldingStep do
2
+
3
+ subject do
4
+ Elasticity::ScaldingStep.new('jar', 'class', { 'key' => 'value' })
5
+ end
6
+
7
+ it { should be_a Elasticity::JobFlowStep }
8
+
9
+ describe '.initialize' do
10
+ it 'should set the fields appropriately' do
11
+ expect(subject.name).to eql('Elasticity Scalding Step')
12
+ expect(subject.action_on_failure).to eql('TERMINATE_JOB_FLOW')
13
+ expect(subject.jar).to eql('jar')
14
+ expect(subject.arguments).to eql(['class', '--hdfs', '--key', 'value'])
15
+ end
16
+ end
17
+
18
+ describe '#to_aws_step' do
19
+
20
+ it { should respond_to(:to_aws_step).with(1).argument }
21
+
22
+ it 'should convert to aws step format' do
23
+ subject.to_aws_step(Elasticity::JobFlow.new).should == {
24
+ :name => 'Elasticity Scalding Step',
25
+ :action_on_failure => 'TERMINATE_JOB_FLOW',
26
+ :hadoop_jar_step => {
27
+ :jar => 'jar',
28
+ :args => %w(class --hdfs --key value)
29
+ }
30
+ }
31
+ end
32
+ end
33
+
34
+ describe '.requires_installation?' do
35
+ it 'should not require installation' do
36
+ expect(Elasticity::ScaldingStep.requires_installation?).to be false
37
+ end
38
+ end
39
+
40
+ end
@@ -0,0 +1,67 @@
1
+ describe Elasticity::SparkStep do
2
+
3
+ subject do
4
+ Elasticity::SparkStep.new('jar', 'class')
5
+ end
6
+
7
+ it { should be_a Elasticity::JobFlowStep }
8
+
9
+ describe '.initialize' do
10
+ it 'should set the fields appropriately' do
11
+ expect(subject.name).to eql('Elasticity Spark Step')
12
+ expect(subject.jar).to eql('jar')
13
+ expect(subject.main_class).to eql('class')
14
+ expect(subject.spark_arguments).to eql({})
15
+ expect(subject.app_arguments).to eql({})
16
+ expect(subject.action_on_failure).to eql('TERMINATE_JOB_FLOW')
17
+ end
18
+ end
19
+
20
+ describe '#to_aws_step' do
21
+
22
+ it { should respond_to(:to_aws_step).with(1).argument }
23
+
24
+ context 'when there are no arguments provided' do
25
+ let(:ss_with_no_args) { Elasticity::SparkStep.new('jar', 'class') }
26
+
27
+ it 'should convert to aws step format' do
28
+ ss_with_no_args.to_aws_step(Elasticity::JobFlow.new).should == {
29
+ :name => 'Elasticity Spark Step',
30
+ :action_on_failure => 'TERMINATE_JOB_FLOW',
31
+ :hadoop_jar_step => {
32
+ :jar => 'command-runner.jar',
33
+ :args => %w(spark-submit --class class jar)
34
+ }
35
+ }
36
+ end
37
+ end
38
+
39
+ context 'when there are arguments provided' do
40
+ let(:ss_with_args) do
41
+ Elasticity::SparkStep.new('jar', 'class').tap do |ss|
42
+ ss.spark_arguments = { 'key1' => 'value1' }
43
+ ss.app_arguments = { 'key2' => 'value2' }
44
+ end
45
+ end
46
+
47
+ it 'should convert to aws step format' do
48
+ ss_with_args.to_aws_step(Elasticity::JobFlow.new).should == {
49
+ :name => 'Elasticity Spark Step',
50
+ :action_on_failure => 'TERMINATE_JOB_FLOW',
51
+ :hadoop_jar_step => {
52
+ :jar => 'command-runner.jar',
53
+ :args => %w(spark-submit --class class --key1 value1 jar --key2 value2)
54
+ }
55
+ }
56
+ end
57
+ end
58
+
59
+ end
60
+
61
+ describe '.requires_installation?' do
62
+ it 'should not require installation' do
63
+ expect(Elasticity::SparkStep.requires_installation?).to be false
64
+ end
65
+ end
66
+
67
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: elasticity
3
3
  version: !ruby/object:Gem::Version
4
- version: 6.0.10
4
+ version: 6.0.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - Robert Slifka
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-01-15 00:00:00.000000000 Z
11
+ date: 2017-02-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rest-client
@@ -161,8 +161,10 @@ files:
161
161
  - lib/elasticity/looper.rb
162
162
  - lib/elasticity/pig_step.rb
163
163
  - lib/elasticity/s3distcp_step.rb
164
+ - lib/elasticity/scalding_step.rb
164
165
  - lib/elasticity/script_step.rb
165
166
  - lib/elasticity/setup_hadoop_debugging_step.rb
167
+ - lib/elasticity/spark_step.rb
166
168
  - lib/elasticity/streaming_step.rb
167
169
  - lib/elasticity/sync_to_s3.rb
168
170
  - lib/elasticity/version.rb
@@ -188,8 +190,10 @@ files:
188
190
  - spec/lib/elasticity/looper_spec.rb
189
191
  - spec/lib/elasticity/pig_step_spec.rb
190
192
  - spec/lib/elasticity/s3distcp_step_spec.rb
193
+ - spec/lib/elasticity/scalding_step_spec.rb
191
194
  - spec/lib/elasticity/script_step_spec.rb
192
195
  - spec/lib/elasticity/setup_hadoop_debugging_step_spec.rb
196
+ - spec/lib/elasticity/spark_step_spec.rb
193
197
  - spec/lib/elasticity/streaming_step_spec.rb
194
198
  - spec/lib/elasticity/sync_to_s3_spec.rb
195
199
  - spec/spec_helper.rb
@@ -241,8 +245,10 @@ test_files:
241
245
  - spec/lib/elasticity/looper_spec.rb
242
246
  - spec/lib/elasticity/pig_step_spec.rb
243
247
  - spec/lib/elasticity/s3distcp_step_spec.rb
248
+ - spec/lib/elasticity/scalding_step_spec.rb
244
249
  - spec/lib/elasticity/script_step_spec.rb
245
250
  - spec/lib/elasticity/setup_hadoop_debugging_step_spec.rb
251
+ - spec/lib/elasticity/spark_step_spec.rb
246
252
  - spec/lib/elasticity/streaming_step_spec.rb
247
253
  - spec/lib/elasticity/sync_to_s3_spec.rb
248
254
  - spec/spec_helper.rb