elasticity 5.0.3 → 6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/HISTORY.md +26 -0
  3. data/README.md +35 -28
  4. data/elasticity.gemspec +2 -2
  5. data/lib/elasticity.rb +5 -3
  6. data/lib/elasticity/aws_request_v4.rb +15 -3
  7. data/lib/elasticity/aws_session.rb +4 -23
  8. data/lib/elasticity/aws_utils.rb +0 -29
  9. data/lib/elasticity/cluster_status.rb +38 -0
  10. data/lib/elasticity/cluster_step_status.rb +51 -0
  11. data/lib/elasticity/emr.rb +208 -78
  12. data/lib/elasticity/job_flow.rb +16 -17
  13. data/lib/elasticity/version.rb +1 -1
  14. data/spec/factories/cluster_status_factory.rb +12 -0
  15. data/spec/factories/cluster_step_status_factory.rb +17 -0
  16. data/spec/lib/elasticity/aws_request_v4_spec.rb +54 -4
  17. data/spec/lib/elasticity/aws_session_spec.rb +22 -88
  18. data/spec/lib/elasticity/aws_utils_spec.rb +0 -46
  19. data/spec/lib/elasticity/bootstrap_action_spec.rb +7 -3
  20. data/spec/lib/elasticity/cluster_status_spec.rb +98 -0
  21. data/spec/lib/elasticity/cluster_step_status_spec.rb +80 -0
  22. data/spec/lib/elasticity/custom_jar_step_spec.rb +10 -7
  23. data/spec/lib/elasticity/emr_spec.rb +422 -132
  24. data/spec/lib/elasticity/ganglia_bootstrap_action_spec.rb +8 -3
  25. data/spec/lib/elasticity/hadoop_bootstrap_action_spec.rb +8 -3
  26. data/spec/lib/elasticity/hadoop_file_bootstrap_action_spec.rb +7 -3
  27. data/spec/lib/elasticity/hive_step_spec.rb +21 -17
  28. data/spec/lib/elasticity/instance_group_spec.rb +9 -5
  29. data/spec/lib/elasticity/job_flow_integration_spec.rb +4 -4
  30. data/spec/lib/elasticity/job_flow_spec.rb +102 -76
  31. data/spec/lib/elasticity/job_flow_step_spec.rb +1 -1
  32. data/spec/lib/elasticity/looper_spec.rb +1 -1
  33. data/spec/lib/elasticity/pig_step_spec.rb +13 -9
  34. data/spec/lib/elasticity/s3distcp_step_spec.rb +7 -5
  35. data/spec/lib/elasticity/script_step_spec.rb +11 -6
  36. data/spec/lib/elasticity/setup_hadoop_debugging_step_spec.rb +9 -5
  37. data/spec/lib/elasticity/streaming_step_spec.rb +13 -9
  38. data/spec/spec_helper.rb +8 -0
  39. data/spec/support/factory_girl.rb +8 -0
  40. metadata +24 -21
  41. data/lib/elasticity/aws_request_v2.rb +0 -42
  42. data/lib/elasticity/job_flow_status.rb +0 -91
  43. data/lib/elasticity/job_flow_status_step.rb +0 -38
  44. data/spec/lib/elasticity/aws_request_v2_spec.rb +0 -38
  45. data/spec/lib/elasticity/job_flow_status_spec.rb +0 -265
  46. data/spec/lib/elasticity/job_flow_status_step_spec.rb +0 -80
@@ -2,22 +2,21 @@ describe Elasticity::AwsSession do
2
2
 
3
3
  before do
4
4
  Timecop.freeze(Time.at(1302461096))
5
+
6
+ Elasticity.configure do |c|
7
+ c.access_key = 'access'
8
+ c.secret_key = 'secret'
9
+ end
5
10
  end
6
11
 
7
12
  after do
8
13
  Timecop.return
9
14
  end
10
15
 
11
- subject do
12
- Elasticity::AwsSession.new('access', 'secret')
13
- end
14
-
15
16
  describe '#initialize' do
16
17
 
17
18
  context 'when access and/or secret keys are provided' do
18
19
  it 'should set them to the provided values' do
19
- subject.access_key.should == 'access'
20
- subject.secret_key.should == 'secret'
21
20
  subject.region.should == 'us-east-1'
22
21
  end
23
22
  end
@@ -25,71 +24,15 @@ describe Elasticity::AwsSession do
25
24
  context 'when :region is nil' do
26
25
  it 'should be an error' do
27
26
  expect {
28
- Elasticity::AwsSession.new('_', '_', :region => nil)
27
+ Elasticity::AwsSession.new(:region => nil)
29
28
  }.to raise_error Elasticity::MissingRegionError, 'A valid :region is required to connect to EMR'
30
29
  end
31
30
  end
32
31
 
33
32
  context 'when :region is specified' do
34
- Elasticity::AwsSession.new('_', '_', :region => 'TEST_REGION').region.should == 'TEST_REGION'
35
- end
36
-
37
- context 'when either access or secret key is not provided or nil' do
38
-
39
- context 'when the proper environment variables are set' do
40
-
41
- context 'when access and secret key are not provided' do
42
- let(:default_values) { Elasticity::AwsSession.new }
43
- before do
44
- ENV.stub(:[]).with('AWS_ACCESS_KEY_ID').and_return('ENV_ACCESS')
45
- ENV.stub(:[]).with('AWS_SECRET_ACCESS_KEY').and_return('ENV_SECRET')
46
- end
47
- it 'should set access and secret keys' do
48
- default_values.access_key.should == 'ENV_ACCESS'
49
- default_values.secret_key.should == 'ENV_SECRET'
50
- end
51
- end
52
-
53
- context 'when access and secret key are nil' do
54
- let(:nil_values) { Elasticity::AwsSession.new(nil, nil) }
55
- before do
56
- ENV.stub(:[]).with('AWS_ACCESS_KEY_ID').and_return('ENV_ACCESS')
57
- ENV.stub(:[]).with('AWS_SECRET_ACCESS_KEY').and_return('ENV_SECRET')
58
- end
59
- it 'should set access and secret keys' do
60
- nil_values.access_key.should == 'ENV_ACCESS'
61
- nil_values.secret_key.should == 'ENV_SECRET'
62
- end
63
- end
64
-
65
- end
66
-
67
- context 'when the environment variables are not set' do
68
- let(:missing_something) { Elasticity::AwsSession.new }
69
- context 'when the access key is not set' do
70
- before do
71
- ENV.stub(:[]).with('AWS_ACCESS_KEY_ID').and_return(nil)
72
- ENV.stub(:[]).with('AWS_SECRET_ACCESS_KEY').and_return('_')
73
- end
74
- it 'should raise an error' do
75
- expect {
76
- missing_something.access_key
77
- }.to raise_error(Elasticity::MissingKeyError, 'Please provide an access key or set AWS_ACCESS_KEY_ID.')
78
- end
79
- end
80
- context 'when the secret key is not set' do
81
- before do
82
- ENV.stub(:[]).with('AWS_ACCESS_KEY_ID').and_return('_')
83
- ENV.stub(:[]).with('AWS_SECRET_ACCESS_KEY').and_return(nil)
84
- end
85
- it 'should raise an error' do
86
- expect {
87
- missing_something.access_key
88
- }.to raise_error(Elasticity::MissingKeyError, 'Please provide a secret key or set AWS_SECRET_ACCESS_KEY.')
89
- end
90
- end
33
+ it 'should be assigned' do
34
+ Elasticity::AwsSession.new(:region => 'TEST_REGION').region.should == 'TEST_REGION'
91
35
  end
92
-
93
36
  end
94
37
 
95
38
  end
@@ -97,12 +40,14 @@ describe Elasticity::AwsSession do
97
40
  describe '#host' do
98
41
 
99
42
  context 'when the region is not specified' do
100
- its(:host) { should == 'elasticmapreduce.us-east-1.amazonaws.com' }
43
+ it 'should use the default value' do
44
+ expect(subject.host).to eq('elasticmapreduce.us-east-1.amazonaws.com')
45
+ end
101
46
  end
102
47
 
103
48
  context 'when the region is specified' do
104
49
  let(:request_with_region) do
105
- Elasticity::AwsSession.new('_', '_', {:region => 'us-west-1'})
50
+ Elasticity::AwsSession.new(:region => 'us-west-1')
106
51
  end
107
52
  it 'should incorporate the region into the hostname' do
108
53
  request_with_region.host.should == 'elasticmapreduce.us-west-1.amazonaws.com'
@@ -131,18 +76,15 @@ describe Elasticity::AwsSession do
131
76
 
132
77
  context 'when there is an EMR error with the request' do
133
78
  let(:error_message) { 'ERROR_MESSAGE' }
134
- let(:error_xml) do
135
- <<-XML
136
- <ErrorResponse xmlns="http://elasticmapreduce.amazonaws.com/doc/2009-03-31">
137
- <Error>
138
- <Message>#{error_message}</Message>
139
- </Error>
140
- </ErrorResponse>
141
- XML
79
+ let(:error_type) { 'ERROR_TYPE' }
80
+ let(:error_json) do
81
+ <<-JSON
82
+ { "__type" : "#{error_type}", "message" : "#{error_message}" }
83
+ JSON
142
84
  end
143
85
  let(:error) do
144
86
  RestClient::BadRequest.new.tap do |error|
145
- error.stub(:http_body => error_xml)
87
+ error.stub(:http_body => error_json)
146
88
  end
147
89
  end
148
90
 
@@ -150,7 +92,7 @@ describe Elasticity::AwsSession do
150
92
  RestClient.should_receive(:post).and_raise(error)
151
93
  expect {
152
94
  subject.submit({})
153
- }.to raise_error(ArgumentError, error_message)
95
+ }.to raise_error(ArgumentError, "AWS EMR API Error (#{error_type}): #{error_message}")
154
96
  end
155
97
  end
156
98
 
@@ -160,7 +102,7 @@ describe Elasticity::AwsSession do
160
102
 
161
103
  describe 'basic equality checks with subject' do
162
104
  let(:same_object) { subject }
163
- let(:same_values) { Elasticity::AwsSession.new('access', 'secret', {}) }
105
+ let(:same_values) { Elasticity::AwsSession.new }
164
106
  let(:diff_type) { Object.new }
165
107
 
166
108
  it { should == same_object }
@@ -170,17 +112,9 @@ describe Elasticity::AwsSession do
170
112
 
171
113
  describe 'deep comparisons' do
172
114
 
173
- it 'should fail on access key check' do
174
- Elasticity::AwsSession.new('access', '_').should_not == Elasticity::AwsSession.new('_', '_')
175
- end
176
-
177
- it 'should fail on secret key check' do
178
- Elasticity::AwsSession.new('_', 'secret').should_not == Elasticity::AwsSession.new('_', '_')
179
- end
180
-
181
115
  it 'should fail on host check' do
182
- aws1 = Elasticity::AwsSession.new('_', '_', :region => 'us-east-1')
183
- aws2 = Elasticity::AwsSession.new('_', '_', :region => 'us-west-1')
116
+ aws1 = Elasticity::AwsSession.new(:region => 'us-east-1')
117
+ aws2 = Elasticity::AwsSession.new(:region => 'us-west-1')
184
118
  aws1.should_not == aws2
185
119
  end
186
120
 
@@ -56,50 +56,4 @@ describe Elasticity::AwsUtils do
56
56
  end
57
57
  end
58
58
 
59
- describe '.convert_ruby_to_aws' do
60
- it 'should convert the params' do
61
- add_jobflow_steps_params = {
62
- :job_flow_id => 'j-1',
63
- :steps => [
64
- {
65
- :action_on_failure => 'CONTINUE',
66
- :name => 'First New Job Step',
67
- :hadoop_jar_step => {
68
- :args => %w(arg1 arg2 arg3),
69
- :jar => 'first_step.jar',
70
- :main_class => 'first_class.jar'
71
- }
72
- },
73
- {
74
- :action_on_failure => 'CANCEL_AND_WAIT',
75
- :name => 'Second New Job Step',
76
- :hadoop_jar_step => {
77
- :args => %w(arg4 arg5 arg6),
78
- :jar => 'second_step.jar',
79
- :main_class => 'second_class.jar'
80
- }
81
- }
82
- ]
83
- }
84
- expected_result = {
85
- 'JobFlowId' => 'j-1',
86
- 'Steps.member.1.Name' => 'First New Job Step',
87
- 'Steps.member.1.ActionOnFailure' => 'CONTINUE',
88
- 'Steps.member.1.HadoopJarStep.Jar' => 'first_step.jar',
89
- 'Steps.member.1.HadoopJarStep.MainClass' => 'first_class.jar',
90
- 'Steps.member.1.HadoopJarStep.Args.member.1' => 'arg1',
91
- 'Steps.member.1.HadoopJarStep.Args.member.2' => 'arg2',
92
- 'Steps.member.1.HadoopJarStep.Args.member.3' => 'arg3',
93
- 'Steps.member.2.Name' => 'Second New Job Step',
94
- 'Steps.member.2.ActionOnFailure' => 'CANCEL_AND_WAIT',
95
- 'Steps.member.2.HadoopJarStep.Jar' => 'second_step.jar',
96
- 'Steps.member.2.HadoopJarStep.MainClass' => 'second_class.jar',
97
- 'Steps.member.2.HadoopJarStep.Args.member.1' => 'arg4',
98
- 'Steps.member.2.HadoopJarStep.Args.member.2' => 'arg5',
99
- 'Steps.member.2.HadoopJarStep.Args.member.3' => 'arg6'
100
- }
101
- Elasticity::AwsUtils.send(:convert_ruby_to_aws, add_jobflow_steps_params).should == expected_result
102
- end
103
- end
104
-
105
59
  end
@@ -4,9 +4,13 @@ describe Elasticity::BootstrapAction do
4
4
  Elasticity::BootstrapAction.new('script', 'arg1', 'arg2')
5
5
  end
6
6
 
7
- its(:name) { should == 'Elasticity Bootstrap Action' }
8
- its(:script) { should == 'script' }
9
- its(:arguments) { should == %w(arg1 arg2) }
7
+ describe '.initialize' do
8
+ it 'should set the fields appropriately' do
9
+ expect(subject.name).to eql('Elasticity Bootstrap Action')
10
+ expect(subject.script).to eql('script')
11
+ expect(subject.arguments).to eql(%w(arg1 arg2))
12
+ end
13
+ end
10
14
 
11
15
  describe '#to_aws_bootstrap_action' do
12
16
 
@@ -0,0 +1,98 @@
1
+ describe Elasticity::ClusterStatus do
2
+
3
+ let(:cluster_state) { 'TERMINATED' }
4
+ let(:aws_cluster_status) do
5
+ <<-JSON
6
+ {
7
+ "Cluster": {
8
+ "Applications": [
9
+ {
10
+ "Name": "hadoop",
11
+ "Version": "1.0.3"
12
+ }
13
+ ],
14
+ "AutoTerminate": true,
15
+ "Configurations": [
16
+
17
+ ],
18
+ "Ec2InstanceAttributes": {
19
+ "Ec2AvailabilityZone": "us-east-1a",
20
+ "EmrManagedMasterSecurityGroup": "sg-b7de0adf",
21
+ "EmrManagedSlaveSecurityGroup": "sg-89de0ae1"
22
+ },
23
+ "Id": "j-3T0PHNUXCY7SX",
24
+ "MasterPublicDnsName": "ec2-54-81-173-103.compute-1.amazonaws.com",
25
+ "Name": "Elasticity Job Flow",
26
+ "NormalizedInstanceHours": 2,
27
+ "RequestedAmiVersion": "latest",
28
+ "RunningAmiVersion": "2.4.2",
29
+ "Status": {
30
+ "State": "#{cluster_state}",
31
+ "StateChangeReason": {
32
+ "Code": "ALL_STEPS_COMPLETED",
33
+ "Message": "Steps completed"
34
+ },
35
+ "Timeline": {
36
+ "CreationDateTime": 1436788464.415,
37
+ "EndDateTime": 1436791032.097,
38
+ "ReadyDateTime": 1436788842.195
39
+ }
40
+ },
41
+ "Tags": [
42
+ {
43
+ "Key": "key",
44
+ "Value": "value"
45
+ }
46
+ ],
47
+ "TerminationProtected": false,
48
+ "VisibleToAllUsers": false
49
+ }
50
+ }
51
+ JSON
52
+ end
53
+
54
+ subject do
55
+ Elasticity::ClusterStatus.from_aws_data(JSON.parse(aws_cluster_status))
56
+ end
57
+
58
+ describe '.from_aws_data' do
59
+ it 'should hydate properly' do
60
+ expect(subject.name).to eql('Elasticity Job Flow')
61
+ expect(subject.cluster_id).to eql('j-3T0PHNUXCY7SX')
62
+ expect(subject.state).to eql('TERMINATED')
63
+ expect(subject.created_at).to eql(Time.at(1436788464.415))
64
+ expect(subject.ready_at).to eql(Time.at(1436788842.195))
65
+ expect(subject.ended_at).to eql(Time.at(1436791032.097))
66
+ expect(subject.last_state_change_reason).to eql('ALL_STEPS_COMPLETED')
67
+ expect(subject.master_public_dns_name).to eql('ec2-54-81-173-103.compute-1.amazonaws.com')
68
+ expect(subject.normalized_instance_hours).to eql(2)
69
+ end
70
+ end
71
+
72
+ describe '#active?' do
73
+
74
+ context 'when the jobflow status is terminal' do
75
+ %w{COMPLETED TERMINATED FAILED _}.each do |status|
76
+ context "when the jobflow is #{status}" do
77
+ let(:cluster_state) {status}
78
+ it 'is not active' do
79
+ expect(subject.active?).to be false
80
+ end
81
+ end
82
+ end
83
+ end
84
+
85
+ context 'when the jobflow status is not terminal' do
86
+ %w{RUNNING STARTING BOOTSTRAPPING WAITING SHUTTING_DOWN}.each do |status|
87
+ context "when the jobflow is #{status}" do
88
+ let(:cluster_state) {status}
89
+ it 'is active' do
90
+ expect(subject.active?).to be true
91
+ end
92
+ end
93
+ end
94
+ end
95
+
96
+ end
97
+
98
+ end
@@ -0,0 +1,80 @@
1
+ describe Elasticity::ClusterStepStatus do
2
+
3
+ let(:aws_cluster_steps) do
4
+ <<-JSON
5
+ {
6
+ "Steps": [
7
+ {
8
+ "ActionOnFailure": "TERMINATE_CLUSTER",
9
+ "Config": {
10
+ "Args": [
11
+ "36",
12
+ "3",
13
+ "0"
14
+ ],
15
+ "Jar": "s3n://elasticmapreduce/samples/cloudburst/cloudburst.jar",
16
+ "MainClass" : "MAIN_CLASS",
17
+ "Properties": {
18
+ "Key1" : "Value1",
19
+ "Key2" : "Value2"
20
+ }
21
+ },
22
+ "Id": "s-OYPPAC4XPPUC",
23
+ "Name": "Elasticity Custom Jar Step",
24
+ "Status": {
25
+ "State": "COMPLETED",
26
+ "StateChangeReason": {
27
+ "Code": "ALL_STEPS_COMPLETED",
28
+ "Message": "Steps completed"
29
+ },
30
+ "Timeline": {
31
+ "CreationDateTime": 1436788464.416,
32
+ "EndDateTime": 1436790944.162,
33
+ "StartDateTime": 1436788841.237
34
+ }
35
+ }
36
+ }
37
+ ]
38
+ }
39
+ JSON
40
+ end
41
+
42
+ describe '.from_aws_list_data' do
43
+ let(:cluster_step_statuses) { Elasticity::ClusterStepStatus.from_aws_list_data(JSON.parse(aws_cluster_steps)) }
44
+
45
+ it 'should extract the proper number of steps' do
46
+ expect(cluster_step_statuses.length).to eql(1)
47
+ end
48
+
49
+ it 'should hydate properly' do
50
+ status = cluster_step_statuses[0]
51
+ expect(status.action_on_failure).to eql('TERMINATE_CLUSTER')
52
+ expect(status.args).to eql(['36', '3', '0',])
53
+ expect(status.jar).to eql('s3n://elasticmapreduce/samples/cloudburst/cloudburst.jar')
54
+ expect(status.main_class).to eql('MAIN_CLASS')
55
+ expect(status.step_id).to eql('s-OYPPAC4XPPUC')
56
+ expect(status.properties).to eql({'Key1' => 'Value1', 'Key2' => 'Value2'})
57
+ expect(status.name).to eql('Elasticity Custom Jar Step')
58
+ expect(status.state).to eql('COMPLETED')
59
+ expect(status.state_change_reason).to eql('ALL_STEPS_COMPLETED')
60
+ expect(status.state_change_reason_message).to eql('Steps completed')
61
+ expect(status.created_at).to eql(Time.at(1436788464.416))
62
+ expect(status.started_at).to eql(Time.at(1436788841.237))
63
+ expect(status.ended_at).to eql(Time.at(1436790944.162))
64
+ end
65
+ end
66
+
67
+ describe '.installed_steps' do
68
+ let(:installed_cluster_step_statuses) do
69
+ step_names = Elasticity::JobFlowStep.steps_requiring_installation.map { |s| s.aws_installation_step_name }
70
+ step_names.map { |name| build(:cluster_step_status, :name => name) }
71
+ end
72
+
73
+ it 'should return a list of steps that are installed' do
74
+ expect(Elasticity::ClusterStepStatus.installed_steps(installed_cluster_step_statuses)).to match_array([
75
+ Elasticity::PigStep, Elasticity::HiveStep
76
+ ])
77
+ end
78
+ end
79
+
80
+ end
@@ -6,10 +6,13 @@ describe Elasticity::CustomJarStep do
6
6
 
7
7
  it { should be_a Elasticity::JobFlowStep }
8
8
 
9
- its(:name) { should == 'Elasticity Custom Jar Step' }
10
- its(:jar) { should == 'jar' }
11
- its(:arguments) { should == [] }
12
- its(:action_on_failure) { should == 'TERMINATE_JOB_FLOW' }
9
+ describe '.initialize' do
10
+ it 'should set the fields appropriately' do
11
+ expect(subject.name).to eql('Elasticity Custom Jar Step')
12
+ expect(subject.jar).to eql('jar')
13
+ expect(subject.arguments).to eql([])
14
+ end
15
+ end
13
16
 
14
17
  describe '#to_aws_step' do
15
18
 
@@ -19,7 +22,7 @@ describe Elasticity::CustomJarStep do
19
22
  let(:cjs_with_no_args) { Elasticity::CustomJarStep.new('jar') }
20
23
 
21
24
  it 'should convert to aws step format' do
22
- cjs_with_no_args.to_aws_step(Elasticity::JobFlow.new('access', 'secret')).should == {
25
+ cjs_with_no_args.to_aws_step(Elasticity::JobFlow.new).should == {
23
26
  :action_on_failure => 'TERMINATE_JOB_FLOW',
24
27
  :hadoop_jar_step => {
25
28
  :jar => 'jar'
@@ -37,7 +40,7 @@ describe Elasticity::CustomJarStep do
37
40
  end
38
41
 
39
42
  it 'should convert to aws step format' do
40
- cjs_with_args.to_aws_step(Elasticity::JobFlow.new('access', 'secret')).should == {
43
+ cjs_with_args.to_aws_step(Elasticity::JobFlow.new).should == {
41
44
  :action_on_failure => 'TERMINATE_JOB_FLOW',
42
45
  :hadoop_jar_step => {
43
46
  :jar => 'jar',
@@ -52,7 +55,7 @@ describe Elasticity::CustomJarStep do
52
55
 
53
56
  describe '.requires_installation?' do
54
57
  it 'should not require installation' do
55
- Elasticity::CustomJarStep.requires_installation?.should be_false
58
+ expect(Elasticity::CustomJarStep.requires_installation?).to be false
56
59
  end
57
60
  end
58
61