elasticity 5.0.3 → 6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/HISTORY.md +26 -0
  3. data/README.md +35 -28
  4. data/elasticity.gemspec +2 -2
  5. data/lib/elasticity.rb +5 -3
  6. data/lib/elasticity/aws_request_v4.rb +15 -3
  7. data/lib/elasticity/aws_session.rb +4 -23
  8. data/lib/elasticity/aws_utils.rb +0 -29
  9. data/lib/elasticity/cluster_status.rb +38 -0
  10. data/lib/elasticity/cluster_step_status.rb +51 -0
  11. data/lib/elasticity/emr.rb +208 -78
  12. data/lib/elasticity/job_flow.rb +16 -17
  13. data/lib/elasticity/version.rb +1 -1
  14. data/spec/factories/cluster_status_factory.rb +12 -0
  15. data/spec/factories/cluster_step_status_factory.rb +17 -0
  16. data/spec/lib/elasticity/aws_request_v4_spec.rb +54 -4
  17. data/spec/lib/elasticity/aws_session_spec.rb +22 -88
  18. data/spec/lib/elasticity/aws_utils_spec.rb +0 -46
  19. data/spec/lib/elasticity/bootstrap_action_spec.rb +7 -3
  20. data/spec/lib/elasticity/cluster_status_spec.rb +98 -0
  21. data/spec/lib/elasticity/cluster_step_status_spec.rb +80 -0
  22. data/spec/lib/elasticity/custom_jar_step_spec.rb +10 -7
  23. data/spec/lib/elasticity/emr_spec.rb +422 -132
  24. data/spec/lib/elasticity/ganglia_bootstrap_action_spec.rb +8 -3
  25. data/spec/lib/elasticity/hadoop_bootstrap_action_spec.rb +8 -3
  26. data/spec/lib/elasticity/hadoop_file_bootstrap_action_spec.rb +7 -3
  27. data/spec/lib/elasticity/hive_step_spec.rb +21 -17
  28. data/spec/lib/elasticity/instance_group_spec.rb +9 -5
  29. data/spec/lib/elasticity/job_flow_integration_spec.rb +4 -4
  30. data/spec/lib/elasticity/job_flow_spec.rb +102 -76
  31. data/spec/lib/elasticity/job_flow_step_spec.rb +1 -1
  32. data/spec/lib/elasticity/looper_spec.rb +1 -1
  33. data/spec/lib/elasticity/pig_step_spec.rb +13 -9
  34. data/spec/lib/elasticity/s3distcp_step_spec.rb +7 -5
  35. data/spec/lib/elasticity/script_step_spec.rb +11 -6
  36. data/spec/lib/elasticity/setup_hadoop_debugging_step_spec.rb +9 -5
  37. data/spec/lib/elasticity/streaming_step_spec.rb +13 -9
  38. data/spec/spec_helper.rb +8 -0
  39. data/spec/support/factory_girl.rb +8 -0
  40. metadata +24 -21
  41. data/lib/elasticity/aws_request_v2.rb +0 -42
  42. data/lib/elasticity/job_flow_status.rb +0 -91
  43. data/lib/elasticity/job_flow_status_step.rb +0 -38
  44. data/spec/lib/elasticity/aws_request_v2_spec.rb +0 -38
  45. data/spec/lib/elasticity/job_flow_status_spec.rb +0 -265
  46. data/spec/lib/elasticity/job_flow_status_step_spec.rb +0 -80
@@ -1,38 +0,0 @@
1
- module Elasticity
2
-
3
- class JobFlowStatusStep
4
-
5
- attr_accessor :name
6
- attr_accessor :state
7
- attr_accessor :created_at
8
- attr_accessor :started_at
9
- attr_accessor :ended_at
10
-
11
- # Create a job flow from an AWS <member> (Nokogiri::XML::Element):
12
- # /DescribeJobFlowsResponse/DescribeJobFlowsResult/JobFlows/member/Steps/member
13
- def self.from_member_element(xml_element)
14
- job_flow_step = JobFlowStatusStep.new
15
- job_flow_step.name = xml_element.xpath('./StepConfig/Name').text.strip
16
- job_flow_step.state = xml_element.xpath('./ExecutionStatusDetail/State').text.strip
17
- created_at = xml_element.xpath('./ExecutionStatusDetail/CreationDateTime').text.strip
18
- job_flow_step.created_at = (created_at == '') ? (nil) : (Time.parse(created_at))
19
- started_at = xml_element.xpath('./ExecutionStatusDetail/StartDateTime').text.strip
20
- job_flow_step.started_at = (started_at == '') ? (nil) : (Time.parse(started_at))
21
- ended_at = xml_element.xpath('./ExecutionStatusDetail/EndDateTime').text.strip
22
- job_flow_step.ended_at = (ended_at == '') ? (nil) : (Time.parse(ended_at))
23
- job_flow_step
24
- end
25
-
26
- # Create JobFlowSteps from a collection of AWS <member> nodes (Nokogiri::XML::NodeSet):
27
- # /DescribeJobFlowsResponse/DescribeJobFlowsResult/JobFlows/member/Steps/member
28
- def self.from_members_nodeset(members_nodeset)
29
- jobflow_steps = []
30
- members_nodeset.each do |member|
31
- jobflow_steps << from_member_element(member)
32
- end
33
- jobflow_steps
34
- end
35
-
36
- end
37
-
38
- end
@@ -1,38 +0,0 @@
1
- describe Elasticity::AwsRequestV2 do
2
-
3
- before do
4
- Timecop.freeze(Time.at(1302461096))
5
- end
6
-
7
- after do
8
- Timecop.return
9
- end
10
-
11
- subject do
12
- Elasticity::AwsRequestV2.new(
13
- Elasticity::AwsSession.new('access', 'secret'),
14
- {:operation => 'RunJobFlow', :name => 'Elasticity Job Flow'}
15
- )
16
- end
17
-
18
- describe '#url' do
19
- it 'should construct a proper endpoint' do
20
- subject.url.should == 'https://elasticmapreduce.us-east-1.amazonaws.com'
21
- end
22
- end
23
-
24
- describe '#headers' do
25
- it 'should create the proper headers' do
26
- subject.headers.should == {
27
- :content_type => 'application/x-www-form-urlencoded; charset=utf-8'
28
- }
29
- end
30
- end
31
-
32
- describe '#payload' do
33
- it 'should payload up the place' do
34
- subject.payload.should == 'AWSAccessKeyId=access&Name=Elasticity%20Job%20Flow&Operation=RunJobFlow&SignatureMethod=HmacSHA256&SignatureVersion=2&Timestamp=2011-04-10T18%3A44%3A56.000Z&Signature=5x6YilYHOjgM%2F6nalIOf62txOKoLFGBYyIivoHb%2F27k%3D'
35
- end
36
- end
37
-
38
- end
@@ -1,265 +0,0 @@
1
- describe Elasticity::JobFlowStatus do
2
-
3
- let(:hive_setup_config) do
4
- <<-XML
5
- <member>
6
- <StepConfig>
7
- <Name>Elasticity - Install Hive</Name>
8
- </StepConfig>
9
- <ExecutionStatusDetail>
10
- <State>FAILED</State>
11
- </ExecutionStatusDetail>
12
- </member>
13
- XML
14
- end
15
-
16
- let(:pig_setup_config) do
17
- <<-XML
18
- <member>
19
- <StepConfig>
20
- <Name>Elasticity - Install Pig</Name>
21
- </StepConfig>
22
- <ExecutionStatusDetail>
23
- <State>FAILED</State>
24
- </ExecutionStatusDetail>
25
- </member>
26
- XML
27
- end
28
-
29
- let(:started_at) do
30
- <<-XML
31
- <StartDateTime>
32
- 2011-10-04T21:49:17Z
33
- </StartDateTime>
34
- XML
35
- end
36
-
37
- let(:setup_config) do
38
- hive_setup_config
39
- end
40
-
41
- let(:describe_jobflows_xml) do
42
- <<-XML
43
- <DescribeJobFlowsResponse xmlns="http://elasticmapreduce.amazonaws.com/doc/2009-03-31">
44
- <DescribeJobFlowsResult>
45
- <JobFlows>
46
- <member>
47
- <JobFlowId>j-p</JobFlowId>
48
- <Name>Hive Job 1</Name>
49
- <ExecutionStatusDetail>
50
- <CreationDateTime>
51
- 2011-10-04T21:49:16Z
52
- </CreationDateTime>
53
- <LastStateChangeReason>
54
- Steps completed with errors
55
- </LastStateChangeReason>
56
- #{started_at}
57
- <ReadyDateTime>
58
- 2011-10-04T21:49:18Z
59
- </ReadyDateTime>
60
- <EndDateTime>
61
- 2011-10-05T21:49:18Z
62
- </EndDateTime>
63
- <State>TERMINATED</State>
64
- </ExecutionStatusDetail>
65
- <Steps>
66
- #{setup_config}
67
- <member>
68
- <StepConfig>
69
- <Name>Run Hive Script</Name>
70
- </StepConfig>
71
- <ExecutionStatusDetail>
72
- <State>PENDING</State>
73
- </ExecutionStatusDetail>
74
- </member>
75
- </Steps>
76
- <Instances>
77
- <MasterPublicDnsName>
78
- ec2-107-22-77-99.compute-1.amazonaws.com
79
- </MasterPublicDnsName>
80
- <NormalizedInstanceHours>
81
- 0
82
- </NormalizedInstanceHours>
83
- <Placement>
84
- <AvailabilityZone>
85
- eu-west-1a
86
- </AvailabilityZone>
87
- </Placement>
88
- <SlaveInstanceType>
89
- m1.small
90
- </SlaveInstanceType>
91
- <MasterInstanceId>
92
- i-15a4417c
93
- </MasterInstanceId>
94
- <MasterInstanceType>
95
- m1.small
96
- </MasterInstanceType>
97
- <Ec2KeyName>
98
- myec2keyname
99
- </Ec2KeyName>
100
- <InstanceCount>
101
- 4
102
- </InstanceCount>
103
- </Instances>
104
- </member>
105
- <member>
106
- <JobFlowId>j-h</JobFlowId>
107
- <Name>Hive Job 2</Name>
108
- <ExecutionStatusDetail>
109
- <CreationDateTime>
110
- 2011-10-04T22:49:16Z
111
- </CreationDateTime>
112
- <State>
113
- TERMINATED
114
- </State>
115
- <LastStateChangeReason>
116
- Steps completed
117
- </LastStateChangeReason>
118
- </ExecutionStatusDetail>
119
- <Instances>
120
- <NormalizedInstanceHours>
121
- 4
122
- </NormalizedInstanceHours>
123
- <Placement>
124
- <AvailabilityZone>
125
- eu-west-1b
126
- </AvailabilityZone>
127
- </Placement>
128
- <SlaveInstanceType>
129
- c1.medium
130
- </SlaveInstanceType>
131
- <MasterInstanceType>
132
- c1.medium
133
- </MasterInstanceType>
134
- <Ec2KeyName>
135
- myec2keyname
136
- </Ec2KeyName>
137
- <InstanceCount>
138
- 2
139
- </InstanceCount>
140
- </Instances>
141
- </member>
142
- </JobFlows>
143
- </DescribeJobFlowsResult>
144
- </DescribeJobFlowsResponse>
145
- XML
146
- end
147
-
148
- let(:members_nodeset) do
149
- describe_jobflows_document = Nokogiri::XML(describe_jobflows_xml)
150
- describe_jobflows_document.remove_namespaces!
151
- describe_jobflows_document.xpath('/DescribeJobFlowsResponse/DescribeJobFlowsResult/JobFlows/member')
152
- end
153
-
154
- let(:single_jobflow_status) { Elasticity::JobFlowStatus.from_member_element(members_nodeset[0]) }
155
-
156
- let(:multiple_jobflow_statuses) { Elasticity::JobFlowStatus.from_members_nodeset(members_nodeset) }
157
-
158
- describe '.from_xml' do
159
- it 'should return a JobFlowStatus with the appropriate fields initialized' do
160
- single_jobflow_status.name.should == 'Hive Job 1'
161
- single_jobflow_status.jobflow_id.should == 'j-p'
162
- single_jobflow_status.state.should == 'TERMINATED'
163
- single_jobflow_status.steps.map(&:name).should == ['Elasticity - Install Hive', 'Run Hive Script']
164
- single_jobflow_status.steps.map(&:state).should == %w(FAILED PENDING)
165
- single_jobflow_status.created_at.should == Time.parse('2011-10-04T21:49:16Z')
166
- single_jobflow_status.started_at.should == Time.parse('2011-10-04T21:49:17Z')
167
- single_jobflow_status.ready_at.should == Time.parse('2011-10-04T21:49:18Z')
168
- single_jobflow_status.ended_at.should == Time.parse('2011-10-05T21:49:18Z')
169
- single_jobflow_status.duration.should == 1440
170
- single_jobflow_status.master_instance_id.should == 'i-15a4417c'
171
- single_jobflow_status.master_instance_type.should == 'm1.small'
172
- single_jobflow_status.slave_instance_type.should == 'm1.small'
173
- single_jobflow_status.instance_count.should == '4'
174
- single_jobflow_status.last_state_change_reason.should == 'Steps completed with errors'
175
- single_jobflow_status.master_public_dns_name.should == 'ec2-107-22-77-99.compute-1.amazonaws.com'
176
- single_jobflow_status.normalized_instance_hours.should == '0'
177
- end
178
-
179
- context 'when the jobflow never started' do
180
- let(:started_at) {}
181
- it 'should have a nil duration' do
182
- single_jobflow_status.started_at.should == nil
183
- single_jobflow_status.duration.should == nil
184
- end
185
- end
186
- end
187
-
188
- describe '.from_jobflow_statuses_nodeset' do
189
- it 'should return JobFlowStatuses with the appropriate fields initialized' do
190
- multiple_jobflow_statuses.map(&:name).should == ['Hive Job 1', 'Hive Job 2']
191
- multiple_jobflow_statuses.map(&:jobflow_id).should == %w(j-p j-h)
192
- multiple_jobflow_statuses.map(&:state).should == %w(TERMINATED TERMINATED)
193
- multiple_jobflow_statuses.map(&:created_at).should == [Time.parse('2011-10-04T21:49:16Z'), Time.parse('2011-10-04T22:49:16Z')]
194
- multiple_jobflow_statuses.map(&:started_at).should == [Time.parse('2011-10-04T21:49:17Z'), nil]
195
- multiple_jobflow_statuses.map(&:ready_at).should == [Time.parse('2011-10-04T21:49:18Z'), nil]
196
- multiple_jobflow_statuses.map(&:ended_at).should == [Time.parse('2011-10-05T21:49:18Z'), nil]
197
- multiple_jobflow_statuses.map(&:duration).should == [1440, nil]
198
- multiple_jobflow_statuses.map(&:master_instance_id).should == ['i-15a4417c', nil]
199
- multiple_jobflow_statuses.map(&:master_instance_type).should == %w(m1.small c1.medium)
200
- multiple_jobflow_statuses.map(&:slave_instance_type).should == %w(m1.small c1.medium)
201
- multiple_jobflow_statuses.map(&:instance_count).should == %w(4 2)
202
- multiple_jobflow_statuses.map(&:last_state_change_reason).should == ['Steps completed with errors', 'Steps completed']
203
- multiple_jobflow_statuses.map(&:master_public_dns_name).should == ['ec2-107-22-77-99.compute-1.amazonaws.com', nil]
204
- multiple_jobflow_statuses.map(&:normalized_instance_hours).should == %w(0 4)
205
- end
206
- end
207
-
208
- describe '#installed_steps' do
209
-
210
- context 'when nothing has been installed' do
211
- let(:setup_config) { }
212
- it 'should be empty' do
213
- single_jobflow_status.installed_steps.should == []
214
- end
215
- end
216
-
217
- context 'when Hive has been installed by Elasticity' do
218
- let(:setup_config) { hive_setup_config }
219
- it 'should include HiveStep' do
220
- single_jobflow_status.installed_steps.should == [Elasticity::HiveStep]
221
- end
222
- end
223
-
224
- context 'when Pig has been installed by Elasticity' do
225
- let(:setup_config) { pig_setup_config }
226
- it 'should include PigStep' do
227
- single_jobflow_status.installed_steps.should == [Elasticity::PigStep]
228
- end
229
- end
230
-
231
- context 'when more than one step has been installed by Elasticity' do
232
- let(:setup_config) { hive_setup_config + pig_setup_config }
233
- it 'should include all of them' do
234
- single_jobflow_status.installed_steps.should =~ [Elasticity::HiveStep, Elasticity::PigStep]
235
- end
236
- end
237
- end
238
-
239
- describe '#active?' do
240
-
241
- context 'when the jobflow status is terminal' do
242
- %w{COMPLETED TERMINATED FAILED _}.each do |status|
243
- context "when the jobflow is #{status}" do
244
- it 'is not active' do
245
- single_jobflow_status.state = status
246
- single_jobflow_status.active?.should be_false
247
- end
248
- end
249
- end
250
- end
251
-
252
- context 'when the jobflow status is not terminal' do
253
- %w{RUNNING STARTING BOOTSTRAPPING WAITING SHUTTING_DOWN}.each do |status|
254
- context "when the jobflow is #{status}" do
255
- it 'is active' do
256
- single_jobflow_status.state = status
257
- single_jobflow_status.active?.should be_true
258
- end
259
- end
260
- end
261
- end
262
-
263
- end
264
-
265
- end
@@ -1,80 +0,0 @@
1
- describe Elasticity::JobFlowStatusStep do
2
-
3
- before do
4
- describe_jobflows_xml = <<-JOBFLOWS
5
- <DescribeJobFlowsResponse xmlns="http://elasticmapreduce.amazonaws.com/doc/2009-03-31">
6
- <DescribeJobFlowsResult>
7
- <JobFlows>
8
- <member>
9
- <JobFlowId>j-p</JobFlowId>
10
- <Name>Pig Job</Name>
11
- <ExecutionStatusDetail>
12
- <State>TERMINATED</State>
13
- </ExecutionStatusDetail>
14
- <Steps>
15
- <member>
16
- <StepConfig>
17
- <Name>Setup Hive</Name>
18
- </StepConfig>
19
- <ExecutionStatusDetail>
20
- <State>FAILED</State>
21
- <CreationDateTime>
22
- 2011-10-04T21:46:16Z
23
- </CreationDateTime>
24
- <StartDateTime>
25
- 2011-10-04T21:49:16Z
26
- </StartDateTime>
27
- <EndDateTime>
28
- 2011-10-04T21:51:16Z
29
- </EndDateTime>
30
- </ExecutionStatusDetail>
31
- </member>
32
- <member>
33
- <StepConfig>
34
- <Name>Run Hive Script</Name>
35
- </StepConfig>
36
- <ExecutionStatusDetail>
37
- <State>PENDING</State>
38
- <CreationDateTime>
39
- </CreationDateTime>
40
- <StartDateTime>
41
- </StartDateTime>
42
- <EndDateTime>
43
- </EndDateTime>
44
- </ExecutionStatusDetail>
45
- </member>
46
- </Steps>
47
- </member>
48
- </JobFlows>
49
- </DescribeJobFlowsResult>
50
- </DescribeJobFlowsResponse>
51
- JOBFLOWS
52
- describe_jobflows_document = Nokogiri::XML(describe_jobflows_xml)
53
- describe_jobflows_document.remove_namespaces!
54
- @members_nodeset = describe_jobflows_document.xpath('/DescribeJobFlowsResponse/DescribeJobFlowsResult/JobFlows/member/Steps/member')
55
- end
56
-
57
- describe ".from_xml" do
58
- it "should return a JobFlowStep with the appropriate fields initialized" do
59
- jobflow_step = Elasticity::JobFlowStatusStep.from_member_element(@members_nodeset[0])
60
- jobflow_step.name.should == "Setup Hive"
61
- jobflow_step.state.should == "FAILED"
62
- jobflow_step.created_at.should == Time.parse("2011-10-04T21:46:16Z")
63
- jobflow_step.started_at.should == Time.parse("2011-10-04T21:49:16Z")
64
- jobflow_step.ended_at.should == Time.parse("2011-10-04T21:51:16Z")
65
- end
66
- end
67
-
68
- describe ".from_steps_nodeset" do
69
- it "should return JobFlowSteps with the appropriate fields initialized" do
70
- jobflow_steps = Elasticity::JobFlowStatusStep.from_members_nodeset(@members_nodeset)
71
- jobflow_steps.map(&:name).should == ["Setup Hive", "Run Hive Script"]
72
- jobflow_steps.map(&:state).should == ["FAILED", "PENDING"]
73
- jobflow_steps.map(&:created_at).should == [Time.parse("2011-10-04T21:46:16Z"), nil]
74
- jobflow_steps.map(&:started_at).should == [Time.parse("2011-10-04T21:49:16Z"), nil]
75
- jobflow_steps.map(&:ended_at).should == [Time.parse("2011-10-04T21:51:16Z"), nil]
76
- end
77
- end
78
-
79
-
80
- end