elasticity 6.0.10 → 6.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: dbcf92882a9214132462533d34770d92deffa247
4
- data.tar.gz: 93d0188ecd69661ead048656600d3ee602ab9ce7
3
+ metadata.gz: 2f61a0e2148b8a7f75fe8c28516fbc777a57c566
4
+ data.tar.gz: b2913d92acd6c595eb9ef2637be9649f2a42c996
5
5
  SHA512:
6
- metadata.gz: 5742f6c8ac1fbbc2df514b9711374d9e4ec3723e6513ddfe8a61395aababcf3361e6ba6cbac3f5d7346ed5a0a8b7f5cacdbe97a2a57638c08122cdf16be0d23c
7
- data.tar.gz: 51bde4a0262b62b7ab7fdf19c0a33255700fef3eb3c50f3bb27f182a78ae73fecc2ed87518de4929ed984645240f5e83b5a73fdc6e2670f5ba58194d0b9609f5
6
+ metadata.gz: f524779e497a2a325d7e48ee5a08333e67f9e2c28d897bcab9beaa2f21d715b1e85b2830f6abddab3c3320db15e308fbf87bff2ba99d9af803ba16603b3dc7f9
7
+ data.tar.gz: 3d81e2b0e7f23b10ee8cd937e843e5ffd16a739a75c9300a13b505746d535e15e9e38f2d699fd0381d95d044df97d840c74c74e78d4c660729cdd08530b64f0c
data/HISTORY.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 6.0.11 - February 7, 2017
2
+
3
+ - Including PR [#134](https://github.com/rslifka/elasticity/pull/134) - "Spark step". Thank you [@BenFradet](https://github.com/BenFradet)!
4
+ - Including PR [#133](https://github.com/rslifka/elasticity/pull/133) - "Scalding step". Thank you [@BenFradet](https://github.com/BenFradet)!
5
+
1
6
  ## 6.0.10 - January 16, 2017
2
7
 
3
8
  - Including PR [#132](https://github.com/rslifka/elasticity/pull/132) - "Fix size_in_gb not being parsed correctly for EBS volumes". Thank you [@jbeemster](https://github.com/jbeemster)!
data/README.md CHANGED
@@ -141,7 +141,7 @@ jobflow.additional_slave_security_groups = ['sg-1111', 'sg-2222']
141
141
  With the release of EMR 4.0.0 you can now supply applications which EMR will install for you on boot(rather than a manual bootstrap action. Which you can still use if required). You must set the `release_label` for the jobflow(>=4.0.0)
142
142
 
143
143
  ```ruby
144
- jobflow.release_label = '4.3.0'
144
+ jobflow.release_label = '4.3.0'
145
145
  # the simple way
146
146
  jobflow.add_application("Spark") # Pig, Hive, Mahout
147
147
  # more verbose
@@ -341,7 +341,41 @@ jobflow.add_step(copy_step)
341
341
 
342
342
  # For AMI < 4.x you need to specifify legacy argument
343
343
  copy_step = Elasticity::S3DistCpStep.new(true)
344
+ ```
345
+
346
+ ### Adding a Scalding Step
347
+
348
+ ```ruby
349
+ scalding_step = Elasticity::ScaldingStep.new('jar_location', 'main_class_fqcn', { 'arg1' => 'value1' })
350
+
351
+ jobflow.add_step(scalding_step)
352
+ ```
353
+
354
+ This will result in the following command line arguments:
355
+
356
+ ```bash
357
+ main_class_fqcn --hdfs --arg1 value1
358
+ ```
359
+
360
+ ### Adding a Spark Step
361
+
362
+ ```ruby
363
+ spark_step = Elasticity::SparkStep.new('jar_location', 'main_class_fqcn')
344
364
 
365
+ # Specifying arguments relative to Spark
366
+ spark_step.spark_arguments = { 'driver-memory' => '2G' }
367
+ # Specifying arguments relative to your application
368
+ spark_step.app_arguments = { 'arg1' => 'value1' }
369
+ ```
370
+
371
+ This will be equivalent to the following script:
372
+
373
+ ```bash
374
+ spark-submit \
375
+ --driver-memory 2G \
376
+ --class main_class_fqcn \
377
+ jar_location \
378
+ --arg1 value1
345
379
  ```
346
380
 
347
381
  ## 7 - Upload Assets (optional)
@@ -422,7 +456,7 @@ Elasticity.configure do |config|
422
456
  # AWS credentials
423
457
  config.access_key = ENV['AWS_ACCESS_KEY_ID']
424
458
  config.secret_key = ENV['AWS_SECRET_ACCESS_KEY']
425
-
459
+
426
460
  # if you use federated Identity Management
427
461
  #config.security_token = ENV['AWS_SECURITY_TOKEN']
428
462
 
@@ -35,6 +35,8 @@ require 'elasticity/pig_step'
35
35
  require 'elasticity/streaming_step'
36
36
  require 'elasticity/script_step'
37
37
  require 'elasticity/s3distcp_step'
38
+ require 'elasticity/scalding_step'
39
+ require 'elasticity/spark_step'
38
40
 
39
41
  module Elasticity
40
42
 
@@ -0,0 +1,32 @@
1
+ module Elasticity
2
+
3
+ class ScaldingStep
4
+
5
+ include Elasticity::JobFlowStep
6
+
7
+ attr_accessor :name
8
+ attr_accessor :action_on_failure
9
+ attr_accessor :jar
10
+ attr_accessor :arguments
11
+
12
+ def initialize(jar, main_class, args)
13
+ @name = 'Elasticity Scalding Step'
14
+ @action_on_failure = 'TERMINATE_JOB_FLOW'
15
+ @jar = jar
16
+ @arguments = [ main_class, '--hdfs' ]
17
+ args.each do |arg, value|
18
+ @arguments << "--#{arg}" << value
19
+ end
20
+ end
21
+
22
+ def to_aws_step(job_flow)
23
+ step = Elasticity::CustomJarStep.new(@jar)
24
+ step.name = @name
25
+ step.action_on_failure = @action_on_failure
26
+ step.arguments = @arguments
27
+ step.to_aws_step(job_flow)
28
+ end
29
+
30
+ end
31
+
32
+ end
@@ -0,0 +1,44 @@
1
+ module Elasticity
2
+
3
+ class SparkStep
4
+
5
+ include Elasticity::JobFlowStep
6
+
7
+ attr_accessor :name
8
+ attr_accessor :main_class
9
+ attr_accessor :jar
10
+ attr_accessor :spark_arguments
11
+ attr_accessor :app_arguments
12
+ attr_accessor :action_on_failure
13
+
14
+ def initialize(jar, main_class)
15
+ @name = 'Elasticity Spark Step'
16
+ @main_class = main_class
17
+ @jar = jar
18
+ @spark_arguments = {}
19
+ @app_arguments = {}
20
+ @action_on_failure = 'TERMINATE_JOB_FLOW'
21
+ end
22
+
23
+ def to_aws_step(_)
24
+ args = %W(spark-submit --class #{@main_class})
25
+ spark_arguments.each do |arg, value|
26
+ args << "--#{arg}" << value
27
+ end
28
+ args.push(@jar)
29
+ app_arguments.each do |arg, value|
30
+ args << "--#{arg}" << value
31
+ end
32
+ {
33
+ :name => @name,
34
+ :action_on_failure => @action_on_failure,
35
+ :hadoop_jar_step => {
36
+ :jar => 'command-runner.jar',
37
+ :args => args
38
+ }
39
+ }
40
+ end
41
+
42
+ end
43
+
44
+ end
@@ -1,3 +1,3 @@
1
1
  module Elasticity
2
- VERSION = '6.0.10'
2
+ VERSION = '6.0.11'
3
3
  end
@@ -123,7 +123,7 @@ describe Elasticity::AwsRequestV4 do
123
123
 
124
124
  describe '.aws_v4_signature' do
125
125
  it 'should create the proper signature' do
126
- subject.send(:aws_v4_signature).should == 'aee8cb25939b4262213612d3410b3033107f97134beadc91563b532c54cf3ab3'
126
+ subject.send(:aws_v4_signature).should == '9fcb4107e8346b2b92a4a2c56de98ed80fdd87d3f9514e728adce76390c5b267'
127
127
  end
128
128
  end
129
129
 
@@ -0,0 +1,40 @@
1
+ describe Elasticity::ScaldingStep do
2
+
3
+ subject do
4
+ Elasticity::ScaldingStep.new('jar', 'class', { 'key' => 'value' })
5
+ end
6
+
7
+ it { should be_a Elasticity::JobFlowStep }
8
+
9
+ describe '.initialize' do
10
+ it 'should set the fields appropriately' do
11
+ expect(subject.name).to eql('Elasticity Scalding Step')
12
+ expect(subject.action_on_failure).to eql('TERMINATE_JOB_FLOW')
13
+ expect(subject.jar).to eql('jar')
14
+ expect(subject.arguments).to eql(['class', '--hdfs', '--key', 'value'])
15
+ end
16
+ end
17
+
18
+ describe '#to_aws_step' do
19
+
20
+ it { should respond_to(:to_aws_step).with(1).argument }
21
+
22
+ it 'should convert to aws step format' do
23
+ subject.to_aws_step(Elasticity::JobFlow.new).should == {
24
+ :name => 'Elasticity Scalding Step',
25
+ :action_on_failure => 'TERMINATE_JOB_FLOW',
26
+ :hadoop_jar_step => {
27
+ :jar => 'jar',
28
+ :args => %w(class --hdfs --key value)
29
+ }
30
+ }
31
+ end
32
+ end
33
+
34
+ describe '.requires_installation?' do
35
+ it 'should not require installation' do
36
+ expect(Elasticity::ScaldingStep.requires_installation?).to be false
37
+ end
38
+ end
39
+
40
+ end
@@ -0,0 +1,67 @@
1
+ describe Elasticity::SparkStep do
2
+
3
+ subject do
4
+ Elasticity::SparkStep.new('jar', 'class')
5
+ end
6
+
7
+ it { should be_a Elasticity::JobFlowStep }
8
+
9
+ describe '.initialize' do
10
+ it 'should set the fields appropriately' do
11
+ expect(subject.name).to eql('Elasticity Spark Step')
12
+ expect(subject.jar).to eql('jar')
13
+ expect(subject.main_class).to eql('class')
14
+ expect(subject.spark_arguments).to eql({})
15
+ expect(subject.app_arguments).to eql({})
16
+ expect(subject.action_on_failure).to eql('TERMINATE_JOB_FLOW')
17
+ end
18
+ end
19
+
20
+ describe '#to_aws_step' do
21
+
22
+ it { should respond_to(:to_aws_step).with(1).argument }
23
+
24
+ context 'when there are no arguments provided' do
25
+ let(:ss_with_no_args) { Elasticity::SparkStep.new('jar', 'class') }
26
+
27
+ it 'should convert to aws step format' do
28
+ ss_with_no_args.to_aws_step(Elasticity::JobFlow.new).should == {
29
+ :name => 'Elasticity Spark Step',
30
+ :action_on_failure => 'TERMINATE_JOB_FLOW',
31
+ :hadoop_jar_step => {
32
+ :jar => 'command-runner.jar',
33
+ :args => %w(spark-submit --class class jar)
34
+ }
35
+ }
36
+ end
37
+ end
38
+
39
+ context 'when there are arguments provided' do
40
+ let(:ss_with_args) do
41
+ Elasticity::SparkStep.new('jar', 'class').tap do |ss|
42
+ ss.spark_arguments = { 'key1' => 'value1' }
43
+ ss.app_arguments = { 'key2' => 'value2' }
44
+ end
45
+ end
46
+
47
+ it 'should convert to aws step format' do
48
+ ss_with_args.to_aws_step(Elasticity::JobFlow.new).should == {
49
+ :name => 'Elasticity Spark Step',
50
+ :action_on_failure => 'TERMINATE_JOB_FLOW',
51
+ :hadoop_jar_step => {
52
+ :jar => 'command-runner.jar',
53
+ :args => %w(spark-submit --class class --key1 value1 jar --key2 value2)
54
+ }
55
+ }
56
+ end
57
+ end
58
+
59
+ end
60
+
61
+ describe '.requires_installation?' do
62
+ it 'should not require installation' do
63
+ expect(Elasticity::SparkStep.requires_installation?).to be false
64
+ end
65
+ end
66
+
67
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: elasticity
3
3
  version: !ruby/object:Gem::Version
4
- version: 6.0.10
4
+ version: 6.0.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - Robert Slifka
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-01-15 00:00:00.000000000 Z
11
+ date: 2017-02-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rest-client
@@ -161,8 +161,10 @@ files:
161
161
  - lib/elasticity/looper.rb
162
162
  - lib/elasticity/pig_step.rb
163
163
  - lib/elasticity/s3distcp_step.rb
164
+ - lib/elasticity/scalding_step.rb
164
165
  - lib/elasticity/script_step.rb
165
166
  - lib/elasticity/setup_hadoop_debugging_step.rb
167
+ - lib/elasticity/spark_step.rb
166
168
  - lib/elasticity/streaming_step.rb
167
169
  - lib/elasticity/sync_to_s3.rb
168
170
  - lib/elasticity/version.rb
@@ -188,8 +190,10 @@ files:
188
190
  - spec/lib/elasticity/looper_spec.rb
189
191
  - spec/lib/elasticity/pig_step_spec.rb
190
192
  - spec/lib/elasticity/s3distcp_step_spec.rb
193
+ - spec/lib/elasticity/scalding_step_spec.rb
191
194
  - spec/lib/elasticity/script_step_spec.rb
192
195
  - spec/lib/elasticity/setup_hadoop_debugging_step_spec.rb
196
+ - spec/lib/elasticity/spark_step_spec.rb
193
197
  - spec/lib/elasticity/streaming_step_spec.rb
194
198
  - spec/lib/elasticity/sync_to_s3_spec.rb
195
199
  - spec/spec_helper.rb
@@ -241,8 +245,10 @@ test_files:
241
245
  - spec/lib/elasticity/looper_spec.rb
242
246
  - spec/lib/elasticity/pig_step_spec.rb
243
247
  - spec/lib/elasticity/s3distcp_step_spec.rb
248
+ - spec/lib/elasticity/scalding_step_spec.rb
244
249
  - spec/lib/elasticity/script_step_spec.rb
245
250
  - spec/lib/elasticity/setup_hadoop_debugging_step_spec.rb
251
+ - spec/lib/elasticity/spark_step_spec.rb
246
252
  - spec/lib/elasticity/streaming_step_spec.rb
247
253
  - spec/lib/elasticity/sync_to_s3_spec.rb
248
254
  - spec/spec_helper.rb