elasticity 5.0.3 → 6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/HISTORY.md +26 -0
  3. data/README.md +35 -28
  4. data/elasticity.gemspec +2 -2
  5. data/lib/elasticity.rb +5 -3
  6. data/lib/elasticity/aws_request_v4.rb +15 -3
  7. data/lib/elasticity/aws_session.rb +4 -23
  8. data/lib/elasticity/aws_utils.rb +0 -29
  9. data/lib/elasticity/cluster_status.rb +38 -0
  10. data/lib/elasticity/cluster_step_status.rb +51 -0
  11. data/lib/elasticity/emr.rb +208 -78
  12. data/lib/elasticity/job_flow.rb +16 -17
  13. data/lib/elasticity/version.rb +1 -1
  14. data/spec/factories/cluster_status_factory.rb +12 -0
  15. data/spec/factories/cluster_step_status_factory.rb +17 -0
  16. data/spec/lib/elasticity/aws_request_v4_spec.rb +54 -4
  17. data/spec/lib/elasticity/aws_session_spec.rb +22 -88
  18. data/spec/lib/elasticity/aws_utils_spec.rb +0 -46
  19. data/spec/lib/elasticity/bootstrap_action_spec.rb +7 -3
  20. data/spec/lib/elasticity/cluster_status_spec.rb +98 -0
  21. data/spec/lib/elasticity/cluster_step_status_spec.rb +80 -0
  22. data/spec/lib/elasticity/custom_jar_step_spec.rb +10 -7
  23. data/spec/lib/elasticity/emr_spec.rb +422 -132
  24. data/spec/lib/elasticity/ganglia_bootstrap_action_spec.rb +8 -3
  25. data/spec/lib/elasticity/hadoop_bootstrap_action_spec.rb +8 -3
  26. data/spec/lib/elasticity/hadoop_file_bootstrap_action_spec.rb +7 -3
  27. data/spec/lib/elasticity/hive_step_spec.rb +21 -17
  28. data/spec/lib/elasticity/instance_group_spec.rb +9 -5
  29. data/spec/lib/elasticity/job_flow_integration_spec.rb +4 -4
  30. data/spec/lib/elasticity/job_flow_spec.rb +102 -76
  31. data/spec/lib/elasticity/job_flow_step_spec.rb +1 -1
  32. data/spec/lib/elasticity/looper_spec.rb +1 -1
  33. data/spec/lib/elasticity/pig_step_spec.rb +13 -9
  34. data/spec/lib/elasticity/s3distcp_step_spec.rb +7 -5
  35. data/spec/lib/elasticity/script_step_spec.rb +11 -6
  36. data/spec/lib/elasticity/setup_hadoop_debugging_step_spec.rb +9 -5
  37. data/spec/lib/elasticity/streaming_step_spec.rb +13 -9
  38. data/spec/spec_helper.rb +8 -0
  39. data/spec/support/factory_girl.rb +8 -0
  40. metadata +24 -21
  41. data/lib/elasticity/aws_request_v2.rb +0 -42
  42. data/lib/elasticity/job_flow_status.rb +0 -91
  43. data/lib/elasticity/job_flow_status_step.rb +0 -38
  44. data/spec/lib/elasticity/aws_request_v2_spec.rb +0 -38
  45. data/spec/lib/elasticity/job_flow_status_spec.rb +0 -265
  46. data/spec/lib/elasticity/job_flow_status_step_spec.rb +0 -80
@@ -1,38 +0,0 @@
1
- module Elasticity
2
-
3
- class JobFlowStatusStep
4
-
5
- attr_accessor :name
6
- attr_accessor :state
7
- attr_accessor :created_at
8
- attr_accessor :started_at
9
- attr_accessor :ended_at
10
-
11
- # Create a job flow from an AWS <member> (Nokogiri::XML::Element):
12
- # /DescribeJobFlowsResponse/DescribeJobFlowsResult/JobFlows/member/Steps/member
13
- def self.from_member_element(xml_element)
14
- job_flow_step = JobFlowStatusStep.new
15
- job_flow_step.name = xml_element.xpath('./StepConfig/Name').text.strip
16
- job_flow_step.state = xml_element.xpath('./ExecutionStatusDetail/State').text.strip
17
- created_at = xml_element.xpath('./ExecutionStatusDetail/CreationDateTime').text.strip
18
- job_flow_step.created_at = (created_at == '') ? (nil) : (Time.parse(created_at))
19
- started_at = xml_element.xpath('./ExecutionStatusDetail/StartDateTime').text.strip
20
- job_flow_step.started_at = (started_at == '') ? (nil) : (Time.parse(started_at))
21
- ended_at = xml_element.xpath('./ExecutionStatusDetail/EndDateTime').text.strip
22
- job_flow_step.ended_at = (ended_at == '') ? (nil) : (Time.parse(ended_at))
23
- job_flow_step
24
- end
25
-
26
- # Create JobFlowSteps from a collection of AWS <member> nodes (Nokogiri::XML::NodeSet):
27
- # /DescribeJobFlowsResponse/DescribeJobFlowsResult/JobFlows/member/Steps/member
28
- def self.from_members_nodeset(members_nodeset)
29
- jobflow_steps = []
30
- members_nodeset.each do |member|
31
- jobflow_steps << from_member_element(member)
32
- end
33
- jobflow_steps
34
- end
35
-
36
- end
37
-
38
- end
@@ -1,38 +0,0 @@
1
- describe Elasticity::AwsRequestV2 do
2
-
3
- before do
4
- Timecop.freeze(Time.at(1302461096))
5
- end
6
-
7
- after do
8
- Timecop.return
9
- end
10
-
11
- subject do
12
- Elasticity::AwsRequestV2.new(
13
- Elasticity::AwsSession.new('access', 'secret'),
14
- {:operation => 'RunJobFlow', :name => 'Elasticity Job Flow'}
15
- )
16
- end
17
-
18
- describe '#url' do
19
- it 'should construct a proper endpoint' do
20
- subject.url.should == 'https://elasticmapreduce.us-east-1.amazonaws.com'
21
- end
22
- end
23
-
24
- describe '#headers' do
25
- it 'should create the proper headers' do
26
- subject.headers.should == {
27
- :content_type => 'application/x-www-form-urlencoded; charset=utf-8'
28
- }
29
- end
30
- end
31
-
32
- describe '#payload' do
33
- it 'should payload up the place' do
34
- subject.payload.should == 'AWSAccessKeyId=access&Name=Elasticity%20Job%20Flow&Operation=RunJobFlow&SignatureMethod=HmacSHA256&SignatureVersion=2&Timestamp=2011-04-10T18%3A44%3A56.000Z&Signature=5x6YilYHOjgM%2F6nalIOf62txOKoLFGBYyIivoHb%2F27k%3D'
35
- end
36
- end
37
-
38
- end
@@ -1,265 +0,0 @@
1
- describe Elasticity::JobFlowStatus do
2
-
3
- let(:hive_setup_config) do
4
- <<-XML
5
- <member>
6
- <StepConfig>
7
- <Name>Elasticity - Install Hive</Name>
8
- </StepConfig>
9
- <ExecutionStatusDetail>
10
- <State>FAILED</State>
11
- </ExecutionStatusDetail>
12
- </member>
13
- XML
14
- end
15
-
16
- let(:pig_setup_config) do
17
- <<-XML
18
- <member>
19
- <StepConfig>
20
- <Name>Elasticity - Install Pig</Name>
21
- </StepConfig>
22
- <ExecutionStatusDetail>
23
- <State>FAILED</State>
24
- </ExecutionStatusDetail>
25
- </member>
26
- XML
27
- end
28
-
29
- let(:started_at) do
30
- <<-XML
31
- <StartDateTime>
32
- 2011-10-04T21:49:17Z
33
- </StartDateTime>
34
- XML
35
- end
36
-
37
- let(:setup_config) do
38
- hive_setup_config
39
- end
40
-
41
- let(:describe_jobflows_xml) do
42
- <<-XML
43
- <DescribeJobFlowsResponse xmlns="http://elasticmapreduce.amazonaws.com/doc/2009-03-31">
44
- <DescribeJobFlowsResult>
45
- <JobFlows>
46
- <member>
47
- <JobFlowId>j-p</JobFlowId>
48
- <Name>Hive Job 1</Name>
49
- <ExecutionStatusDetail>
50
- <CreationDateTime>
51
- 2011-10-04T21:49:16Z
52
- </CreationDateTime>
53
- <LastStateChangeReason>
54
- Steps completed with errors
55
- </LastStateChangeReason>
56
- #{started_at}
57
- <ReadyDateTime>
58
- 2011-10-04T21:49:18Z
59
- </ReadyDateTime>
60
- <EndDateTime>
61
- 2011-10-05T21:49:18Z
62
- </EndDateTime>
63
- <State>TERMINATED</State>
64
- </ExecutionStatusDetail>
65
- <Steps>
66
- #{setup_config}
67
- <member>
68
- <StepConfig>
69
- <Name>Run Hive Script</Name>
70
- </StepConfig>
71
- <ExecutionStatusDetail>
72
- <State>PENDING</State>
73
- </ExecutionStatusDetail>
74
- </member>
75
- </Steps>
76
- <Instances>
77
- <MasterPublicDnsName>
78
- ec2-107-22-77-99.compute-1.amazonaws.com
79
- </MasterPublicDnsName>
80
- <NormalizedInstanceHours>
81
- 0
82
- </NormalizedInstanceHours>
83
- <Placement>
84
- <AvailabilityZone>
85
- eu-west-1a
86
- </AvailabilityZone>
87
- </Placement>
88
- <SlaveInstanceType>
89
- m1.small
90
- </SlaveInstanceType>
91
- <MasterInstanceId>
92
- i-15a4417c
93
- </MasterInstanceId>
94
- <MasterInstanceType>
95
- m1.small
96
- </MasterInstanceType>
97
- <Ec2KeyName>
98
- myec2keyname
99
- </Ec2KeyName>
100
- <InstanceCount>
101
- 4
102
- </InstanceCount>
103
- </Instances>
104
- </member>
105
- <member>
106
- <JobFlowId>j-h</JobFlowId>
107
- <Name>Hive Job 2</Name>
108
- <ExecutionStatusDetail>
109
- <CreationDateTime>
110
- 2011-10-04T22:49:16Z
111
- </CreationDateTime>
112
- <State>
113
- TERMINATED
114
- </State>
115
- <LastStateChangeReason>
116
- Steps completed
117
- </LastStateChangeReason>
118
- </ExecutionStatusDetail>
119
- <Instances>
120
- <NormalizedInstanceHours>
121
- 4
122
- </NormalizedInstanceHours>
123
- <Placement>
124
- <AvailabilityZone>
125
- eu-west-1b
126
- </AvailabilityZone>
127
- </Placement>
128
- <SlaveInstanceType>
129
- c1.medium
130
- </SlaveInstanceType>
131
- <MasterInstanceType>
132
- c1.medium
133
- </MasterInstanceType>
134
- <Ec2KeyName>
135
- myec2keyname
136
- </Ec2KeyName>
137
- <InstanceCount>
138
- 2
139
- </InstanceCount>
140
- </Instances>
141
- </member>
142
- </JobFlows>
143
- </DescribeJobFlowsResult>
144
- </DescribeJobFlowsResponse>
145
- XML
146
- end
147
-
148
- let(:members_nodeset) do
149
- describe_jobflows_document = Nokogiri::XML(describe_jobflows_xml)
150
- describe_jobflows_document.remove_namespaces!
151
- describe_jobflows_document.xpath('/DescribeJobFlowsResponse/DescribeJobFlowsResult/JobFlows/member')
152
- end
153
-
154
- let(:single_jobflow_status) { Elasticity::JobFlowStatus.from_member_element(members_nodeset[0]) }
155
-
156
- let(:multiple_jobflow_statuses) { Elasticity::JobFlowStatus.from_members_nodeset(members_nodeset) }
157
-
158
- describe '.from_xml' do
159
- it 'should return a JobFlowStatus with the appropriate fields initialized' do
160
- single_jobflow_status.name.should == 'Hive Job 1'
161
- single_jobflow_status.jobflow_id.should == 'j-p'
162
- single_jobflow_status.state.should == 'TERMINATED'
163
- single_jobflow_status.steps.map(&:name).should == ['Elasticity - Install Hive', 'Run Hive Script']
164
- single_jobflow_status.steps.map(&:state).should == %w(FAILED PENDING)
165
- single_jobflow_status.created_at.should == Time.parse('2011-10-04T21:49:16Z')
166
- single_jobflow_status.started_at.should == Time.parse('2011-10-04T21:49:17Z')
167
- single_jobflow_status.ready_at.should == Time.parse('2011-10-04T21:49:18Z')
168
- single_jobflow_status.ended_at.should == Time.parse('2011-10-05T21:49:18Z')
169
- single_jobflow_status.duration.should == 1440
170
- single_jobflow_status.master_instance_id.should == 'i-15a4417c'
171
- single_jobflow_status.master_instance_type.should == 'm1.small'
172
- single_jobflow_status.slave_instance_type.should == 'm1.small'
173
- single_jobflow_status.instance_count.should == '4'
174
- single_jobflow_status.last_state_change_reason.should == 'Steps completed with errors'
175
- single_jobflow_status.master_public_dns_name.should == 'ec2-107-22-77-99.compute-1.amazonaws.com'
176
- single_jobflow_status.normalized_instance_hours.should == '0'
177
- end
178
-
179
- context 'when the jobflow never started' do
180
- let(:started_at) {}
181
- it 'should have a nil duration' do
182
- single_jobflow_status.started_at.should == nil
183
- single_jobflow_status.duration.should == nil
184
- end
185
- end
186
- end
187
-
188
- describe '.from_jobflow_statuses_nodeset' do
189
- it 'should return JobFlowStatuses with the appropriate fields initialized' do
190
- multiple_jobflow_statuses.map(&:name).should == ['Hive Job 1', 'Hive Job 2']
191
- multiple_jobflow_statuses.map(&:jobflow_id).should == %w(j-p j-h)
192
- multiple_jobflow_statuses.map(&:state).should == %w(TERMINATED TERMINATED)
193
- multiple_jobflow_statuses.map(&:created_at).should == [Time.parse('2011-10-04T21:49:16Z'), Time.parse('2011-10-04T22:49:16Z')]
194
- multiple_jobflow_statuses.map(&:started_at).should == [Time.parse('2011-10-04T21:49:17Z'), nil]
195
- multiple_jobflow_statuses.map(&:ready_at).should == [Time.parse('2011-10-04T21:49:18Z'), nil]
196
- multiple_jobflow_statuses.map(&:ended_at).should == [Time.parse('2011-10-05T21:49:18Z'), nil]
197
- multiple_jobflow_statuses.map(&:duration).should == [1440, nil]
198
- multiple_jobflow_statuses.map(&:master_instance_id).should == ['i-15a4417c', nil]
199
- multiple_jobflow_statuses.map(&:master_instance_type).should == %w(m1.small c1.medium)
200
- multiple_jobflow_statuses.map(&:slave_instance_type).should == %w(m1.small c1.medium)
201
- multiple_jobflow_statuses.map(&:instance_count).should == %w(4 2)
202
- multiple_jobflow_statuses.map(&:last_state_change_reason).should == ['Steps completed with errors', 'Steps completed']
203
- multiple_jobflow_statuses.map(&:master_public_dns_name).should == ['ec2-107-22-77-99.compute-1.amazonaws.com', nil]
204
- multiple_jobflow_statuses.map(&:normalized_instance_hours).should == %w(0 4)
205
- end
206
- end
207
-
208
- describe '#installed_steps' do
209
-
210
- context 'when nothing has been installed' do
211
- let(:setup_config) { }
212
- it 'should be empty' do
213
- single_jobflow_status.installed_steps.should == []
214
- end
215
- end
216
-
217
- context 'when Hive has been installed by Elasticity' do
218
- let(:setup_config) { hive_setup_config }
219
- it 'should include HiveStep' do
220
- single_jobflow_status.installed_steps.should == [Elasticity::HiveStep]
221
- end
222
- end
223
-
224
- context 'when Pig has been installed by Elasticity' do
225
- let(:setup_config) { pig_setup_config }
226
- it 'should include PigStep' do
227
- single_jobflow_status.installed_steps.should == [Elasticity::PigStep]
228
- end
229
- end
230
-
231
- context 'when more than one step has been installed by Elasticity' do
232
- let(:setup_config) { hive_setup_config + pig_setup_config }
233
- it 'should include all of them' do
234
- single_jobflow_status.installed_steps.should =~ [Elasticity::HiveStep, Elasticity::PigStep]
235
- end
236
- end
237
- end
238
-
239
- describe '#active?' do
240
-
241
- context 'when the jobflow status is terminal' do
242
- %w{COMPLETED TERMINATED FAILED _}.each do |status|
243
- context "when the jobflow is #{status}" do
244
- it 'is not active' do
245
- single_jobflow_status.state = status
246
- single_jobflow_status.active?.should be_false
247
- end
248
- end
249
- end
250
- end
251
-
252
- context 'when the jobflow status is not terminal' do
253
- %w{RUNNING STARTING BOOTSTRAPPING WAITING SHUTTING_DOWN}.each do |status|
254
- context "when the jobflow is #{status}" do
255
- it 'is active' do
256
- single_jobflow_status.state = status
257
- single_jobflow_status.active?.should be_true
258
- end
259
- end
260
- end
261
- end
262
-
263
- end
264
-
265
- end
@@ -1,80 +0,0 @@
1
- describe Elasticity::JobFlowStatusStep do
2
-
3
- before do
4
- describe_jobflows_xml = <<-JOBFLOWS
5
- <DescribeJobFlowsResponse xmlns="http://elasticmapreduce.amazonaws.com/doc/2009-03-31">
6
- <DescribeJobFlowsResult>
7
- <JobFlows>
8
- <member>
9
- <JobFlowId>j-p</JobFlowId>
10
- <Name>Pig Job</Name>
11
- <ExecutionStatusDetail>
12
- <State>TERMINATED</State>
13
- </ExecutionStatusDetail>
14
- <Steps>
15
- <member>
16
- <StepConfig>
17
- <Name>Setup Hive</Name>
18
- </StepConfig>
19
- <ExecutionStatusDetail>
20
- <State>FAILED</State>
21
- <CreationDateTime>
22
- 2011-10-04T21:46:16Z
23
- </CreationDateTime>
24
- <StartDateTime>
25
- 2011-10-04T21:49:16Z
26
- </StartDateTime>
27
- <EndDateTime>
28
- 2011-10-04T21:51:16Z
29
- </EndDateTime>
30
- </ExecutionStatusDetail>
31
- </member>
32
- <member>
33
- <StepConfig>
34
- <Name>Run Hive Script</Name>
35
- </StepConfig>
36
- <ExecutionStatusDetail>
37
- <State>PENDING</State>
38
- <CreationDateTime>
39
- </CreationDateTime>
40
- <StartDateTime>
41
- </StartDateTime>
42
- <EndDateTime>
43
- </EndDateTime>
44
- </ExecutionStatusDetail>
45
- </member>
46
- </Steps>
47
- </member>
48
- </JobFlows>
49
- </DescribeJobFlowsResult>
50
- </DescribeJobFlowsResponse>
51
- JOBFLOWS
52
- describe_jobflows_document = Nokogiri::XML(describe_jobflows_xml)
53
- describe_jobflows_document.remove_namespaces!
54
- @members_nodeset = describe_jobflows_document.xpath('/DescribeJobFlowsResponse/DescribeJobFlowsResult/JobFlows/member/Steps/member')
55
- end
56
-
57
- describe ".from_xml" do
58
- it "should return a JobFlowStep with the appropriate fields initialized" do
59
- jobflow_step = Elasticity::JobFlowStatusStep.from_member_element(@members_nodeset[0])
60
- jobflow_step.name.should == "Setup Hive"
61
- jobflow_step.state.should == "FAILED"
62
- jobflow_step.created_at.should == Time.parse("2011-10-04T21:46:16Z")
63
- jobflow_step.started_at.should == Time.parse("2011-10-04T21:49:16Z")
64
- jobflow_step.ended_at.should == Time.parse("2011-10-04T21:51:16Z")
65
- end
66
- end
67
-
68
- describe ".from_steps_nodeset" do
69
- it "should return JobFlowSteps with the appropriate fields initialized" do
70
- jobflow_steps = Elasticity::JobFlowStatusStep.from_members_nodeset(@members_nodeset)
71
- jobflow_steps.map(&:name).should == ["Setup Hive", "Run Hive Script"]
72
- jobflow_steps.map(&:state).should == ["FAILED", "PENDING"]
73
- jobflow_steps.map(&:created_at).should == [Time.parse("2011-10-04T21:46:16Z"), nil]
74
- jobflow_steps.map(&:started_at).should == [Time.parse("2011-10-04T21:49:16Z"), nil]
75
- jobflow_steps.map(&:ended_at).should == [Time.parse("2011-10-04T21:51:16Z"), nil]
76
- end
77
- end
78
-
79
-
80
- end