elasticity 1.5 → 2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. data/.rspec +2 -1
  2. data/.rvmrc +1 -1
  3. data/HISTORY.md +47 -24
  4. data/LICENSE +1 -1
  5. data/README.md +165 -317
  6. data/Rakefile +4 -3
  7. data/elasticity.gemspec +3 -5
  8. data/lib/elasticity.rb +10 -5
  9. data/lib/elasticity/aws_request.rb +81 -20
  10. data/lib/elasticity/custom_jar_step.rb +33 -0
  11. data/lib/elasticity/emr.rb +45 -117
  12. data/lib/elasticity/hadoop_bootstrap_action.rb +27 -0
  13. data/lib/elasticity/hive_step.rb +57 -0
  14. data/lib/elasticity/job_flow.rb +109 -39
  15. data/lib/elasticity/job_flow_status.rb +53 -0
  16. data/lib/elasticity/job_flow_status_step.rb +35 -0
  17. data/lib/elasticity/job_flow_step.rb +17 -25
  18. data/lib/elasticity/pig_step.rb +82 -0
  19. data/lib/elasticity/support/conditional_raise.rb +23 -0
  20. data/lib/elasticity/version.rb +1 -1
  21. data/spec/lib/elasticity/aws_request_spec.rb +159 -51
  22. data/spec/lib/elasticity/custom_jar_step_spec.rb +59 -0
  23. data/spec/lib/elasticity/emr_spec.rb +231 -762
  24. data/spec/lib/elasticity/hadoop_bootstrap_action_spec.rb +26 -0
  25. data/spec/lib/elasticity/hive_step_spec.rb +74 -0
  26. data/spec/lib/elasticity/job_flow_integration_spec.rb +197 -0
  27. data/spec/lib/elasticity/job_flow_spec.rb +369 -138
  28. data/spec/lib/elasticity/job_flow_status_spec.rb +147 -0
  29. data/spec/lib/elasticity/job_flow_status_step_spec.rb +73 -0
  30. data/spec/lib/elasticity/job_flow_step_spec.rb +26 -64
  31. data/spec/lib/elasticity/pig_step_spec.rb +104 -0
  32. data/spec/lib/elasticity/support/conditional_raise_spec.rb +35 -0
  33. data/spec/spec_helper.rb +1 -50
  34. data/spec/support/be_a_hash_including_matcher.rb +35 -0
  35. metadata +101 -119
  36. data/.autotest +0 -2
  37. data/lib/elasticity/custom_jar_job.rb +0 -38
  38. data/lib/elasticity/hive_job.rb +0 -69
  39. data/lib/elasticity/pig_job.rb +0 -109
  40. data/lib/elasticity/simple_job.rb +0 -51
  41. data/spec/fixtures/vcr_cassettes/add_instance_groups/one_group_successful.yml +0 -44
  42. data/spec/fixtures/vcr_cassettes/add_instance_groups/one_group_unsuccessful.yml +0 -41
  43. data/spec/fixtures/vcr_cassettes/add_jobflow_steps/add_multiple_steps.yml +0 -266
  44. data/spec/fixtures/vcr_cassettes/custom_jar_job/cloudburst.yml +0 -41
  45. data/spec/fixtures/vcr_cassettes/describe_jobflows/all_jobflows.yml +0 -75
  46. data/spec/fixtures/vcr_cassettes/direct/terminate_jobflow.yml +0 -38
  47. data/spec/fixtures/vcr_cassettes/hive_job/hive_ads.yml +0 -41
  48. data/spec/fixtures/vcr_cassettes/modify_instance_groups/set_instances_to_3.yml +0 -38
  49. data/spec/fixtures/vcr_cassettes/pig_job/apache_log_reports.yml +0 -41
  50. data/spec/fixtures/vcr_cassettes/pig_job/apache_log_reports_with_bootstrap.yml +0 -41
  51. data/spec/fixtures/vcr_cassettes/run_jobflow/word_count.yml +0 -41
  52. data/spec/fixtures/vcr_cassettes/set_termination_protection/nonexistent_job_flows.yml +0 -41
  53. data/spec/fixtures/vcr_cassettes/set_termination_protection/protect_multiple_job_flows.yml +0 -38
  54. data/spec/fixtures/vcr_cassettes/terminate_jobflows/one_jobflow.yml +0 -38
  55. data/spec/lib/elasticity/custom_jar_job_spec.rb +0 -118
  56. data/spec/lib/elasticity/hive_job_spec.rb +0 -90
  57. data/spec/lib/elasticity/pig_job_spec.rb +0 -226
@@ -0,0 +1,147 @@
1
+ describe Elasticity::JobFlowStatus do
2
+
3
+ before do
4
+ describe_jobflows_xml = <<-JOBFLOWS
5
+ <DescribeJobFlowsResponse xmlns="http://elasticmapreduce.amazonaws.com/doc/2009-03-31">
6
+ <DescribeJobFlowsResult>
7
+ <JobFlows>
8
+ <member>
9
+ <JobFlowId>j-p</JobFlowId>
10
+ <Name>Pig Job</Name>
11
+ <ExecutionStatusDetail>
12
+ <CreationDateTime>
13
+ 2011-10-04T21:49:16Z
14
+ </CreationDateTime>
15
+ <LastStateChangeReason>
16
+ Steps completed with errors
17
+ </LastStateChangeReason>
18
+ <StartDateTime>
19
+ 2011-10-04T21:49:17Z
20
+ </StartDateTime>
21
+ <ReadyDateTime>
22
+ 2011-10-04T21:49:18Z
23
+ </ReadyDateTime>
24
+ <State>TERMINATED</State>
25
+ </ExecutionStatusDetail>
26
+ <Steps>
27
+ <member>
28
+ <StepConfig>
29
+ <Name>Setup Hive</Name>
30
+ </StepConfig>
31
+ <ExecutionStatusDetail>
32
+ <State>FAILED</State>
33
+ </ExecutionStatusDetail>
34
+ </member>
35
+ <member>
36
+ <StepConfig>
37
+ <Name>Run Hive Script</Name>
38
+ </StepConfig>
39
+ <ExecutionStatusDetail>
40
+ <State>PENDING</State>
41
+ </ExecutionStatusDetail>
42
+ </member>
43
+ </Steps>
44
+ <Instances>
45
+ <Placement>
46
+ <AvailabilityZone>
47
+ eu-west-1a
48
+ </AvailabilityZone>
49
+ </Placement>
50
+ <SlaveInstanceType>
51
+ m1.small
52
+ </SlaveInstanceType>
53
+ <MasterInstanceType>
54
+ m1.small
55
+ </MasterInstanceType>
56
+ <Ec2KeyName>
57
+ myec2keyname
58
+ </Ec2KeyName>
59
+ <InstanceCount>
60
+ 4
61
+ </InstanceCount>
62
+ </Instances>
63
+ </member>
64
+ <member>
65
+ <JobFlowId>j-h</JobFlowId>
66
+ <Name>Hive Job</Name>
67
+ <ExecutionStatusDetail>
68
+ <CreationDateTime>
69
+ 2011-10-04T22:49:16Z
70
+ </CreationDateTime>
71
+ <StartDateTime>
72
+
73
+ </StartDateTime>
74
+ <ReadyDateTime>
75
+
76
+ </ReadyDateTime>
77
+ <State>
78
+ TERMINATED
79
+ </State>
80
+ <LastStateChangeReason>
81
+ Steps completed
82
+ </LastStateChangeReason>
83
+ </ExecutionStatusDetail>
84
+ <Instances>
85
+ <Placement>
86
+ <AvailabilityZone>
87
+ eu-west-1b
88
+ </AvailabilityZone>
89
+ </Placement>
90
+ <SlaveInstanceType>
91
+ c1.medium
92
+ </SlaveInstanceType>
93
+ <MasterInstanceType>
94
+ c1.medium
95
+ </MasterInstanceType>
96
+ <Ec2KeyName>
97
+ myec2keyname
98
+ </Ec2KeyName>
99
+ <InstanceCount>
100
+ 2
101
+ </InstanceCount>
102
+ </Instances>
103
+ </member>
104
+ </JobFlows>
105
+ </DescribeJobFlowsResult>
106
+ </DescribeJobFlowsResponse>
107
+ JOBFLOWS
108
+ describe_jobflows_document = Nokogiri::XML(describe_jobflows_xml)
109
+ describe_jobflows_document.remove_namespaces!
110
+ @members_nodeset = describe_jobflows_document.xpath('/DescribeJobFlowsResponse/DescribeJobFlowsResult/JobFlows/member')
111
+ end
112
+
113
+ describe ".from_xml" do
114
+ it "should return a JobFlow with the appropriate fields initialized" do
115
+ jobflow = Elasticity::JobFlowStatus.from_member_element(@members_nodeset[0])
116
+ jobflow.name.should == "Pig Job"
117
+ jobflow.jobflow_id.should == "j-p"
118
+ jobflow.state.should == "TERMINATED"
119
+ jobflow.steps.map(&:name).should == ["Setup Hive", "Run Hive Script"]
120
+ jobflow.steps.map(&:state).should == ["FAILED", "PENDING"]
121
+ jobflow.created_at.should == Time.parse("2011-10-04T21:49:16Z")
122
+ jobflow.started_at.should == Time.parse("2011-10-04T21:49:17Z")
123
+ jobflow.ready_at.should == Time.parse("2011-10-04T21:49:18Z")
124
+ jobflow.master_instance_type.should == "m1.small"
125
+ jobflow.slave_instance_type.should == "m1.small"
126
+ jobflow.instance_count.should == "4"
127
+ jobflow.last_state_change_reason.should == "Steps completed with errors"
128
+ end
129
+ end
130
+
131
+ describe ".from_jobflows_nodeset" do
132
+ it "should return JobFlows with the appropriate fields initialized" do
133
+ jobflow = Elasticity::JobFlowStatus.from_members_nodeset(@members_nodeset)
134
+ jobflow.map(&:name).should == ["Pig Job", "Hive Job"]
135
+ jobflow.map(&:jobflow_id).should == ["j-p", "j-h"]
136
+ jobflow.map(&:state).should == ["TERMINATED", "TERMINATED"]
137
+ jobflow.map(&:created_at).should == [Time.parse("2011-10-04T21:49:16Z"), Time.parse("2011-10-04T22:49:16Z")]
138
+ jobflow.map(&:started_at).should == [Time.parse("2011-10-04T21:49:17Z"), nil]
139
+ jobflow.map(&:ready_at).should == [Time.parse("2011-10-04T21:49:18Z"), nil]
140
+ jobflow.map(&:master_instance_type).should == ["m1.small","c1.medium"]
141
+ jobflow.map(&:slave_instance_type).should == ["m1.small", "c1.medium"]
142
+ jobflow.map(&:instance_count).should == ["4","2"]
143
+ jobflow.map(&:last_state_change_reason).should == ["Steps completed with errors", "Steps completed"]
144
+ end
145
+ end
146
+
147
+ end
@@ -0,0 +1,73 @@
1
+ describe Elasticity::JobFlowStatusStep do
2
+
3
+ before do
4
+ describe_jobflows_xml = <<-JOBFLOWS
5
+ <DescribeJobFlowsResponse xmlns="http://elasticmapreduce.amazonaws.com/doc/2009-03-31">
6
+ <DescribeJobFlowsResult>
7
+ <JobFlows>
8
+ <member>
9
+ <JobFlowId>j-p</JobFlowId>
10
+ <Name>Pig Job</Name>
11
+ <ExecutionStatusDetail>
12
+ <State>TERMINATED</State>
13
+ </ExecutionStatusDetail>
14
+ <Steps>
15
+ <member>
16
+ <StepConfig>
17
+ <Name>Setup Hive</Name>
18
+ </StepConfig>
19
+ <ExecutionStatusDetail>
20
+ <State>FAILED</State>
21
+ <StartDateTime>
22
+ 2011-10-04T21:49:16Z
23
+ </StartDateTime>
24
+ <EndDateTime>
25
+ 2011-10-04T21:51:16Z
26
+ </EndDateTime>
27
+ </ExecutionStatusDetail>
28
+ </member>
29
+ <member>
30
+ <StepConfig>
31
+ <Name>Run Hive Script</Name>
32
+ </StepConfig>
33
+ <ExecutionStatusDetail>
34
+ <State>PENDING</State>
35
+ <StartDateTime>
36
+ </StartDateTime>
37
+ <EndDateTime>
38
+ </EndDateTime>
39
+ </ExecutionStatusDetail>
40
+ </member>
41
+ </Steps>
42
+ </member>
43
+ </JobFlows>
44
+ </DescribeJobFlowsResult>
45
+ </DescribeJobFlowsResponse>
46
+ JOBFLOWS
47
+ describe_jobflows_document = Nokogiri::XML(describe_jobflows_xml)
48
+ describe_jobflows_document.remove_namespaces!
49
+ @members_nodeset = describe_jobflows_document.xpath('/DescribeJobFlowsResponse/DescribeJobFlowsResult/JobFlows/member/Steps/member')
50
+ end
51
+
52
+ describe ".from_xml" do
53
+ it "should return a JobFlowStep with the appropriate fields initialized" do
54
+ jobflow_step = Elasticity::JobFlowStatusStep.from_member_element(@members_nodeset[0])
55
+ jobflow_step.name.should == "Setup Hive"
56
+ jobflow_step.state.should == "FAILED"
57
+ jobflow_step.started_at.should == Time.parse("2011-10-04T21:49:16Z")
58
+ jobflow_step.ended_at.should == Time.parse("2011-10-04T21:51:16Z")
59
+ end
60
+ end
61
+
62
+ describe ".from_steps_nodeset" do
63
+ it "should return JobFlowSteps with the appropriate fields initialized" do
64
+ jobflow_steps = Elasticity::JobFlowStatusStep.from_members_nodeset(@members_nodeset)
65
+ jobflow_steps.map(&:name).should == ["Setup Hive", "Run Hive Script"]
66
+ jobflow_steps.map(&:state).should == ["FAILED", "PENDING"]
67
+ jobflow_steps.map(&:started_at).should == [Time.parse("2011-10-04T21:49:16Z"), nil]
68
+ jobflow_steps.map(&:ended_at).should == [Time.parse("2011-10-04T21:51:16Z"), nil]
69
+ end
70
+ end
71
+
72
+
73
+ end
@@ -1,75 +1,37 @@
1
- require 'spec_helper'
2
-
3
1
  describe Elasticity::JobFlowStep do
4
2
 
5
- before do
6
- describe_jobflows_xml = <<-JOBFLOWS
7
- <DescribeJobFlowsResponse xmlns="http://elasticmapreduce.amazonaws.com/doc/2009-03-31">
8
- <DescribeJobFlowsResult>
9
- <JobFlows>
10
- <member>
11
- <JobFlowId>j-p</JobFlowId>
12
- <Name>Pig Job</Name>
13
- <ExecutionStatusDetail>
14
- <State>TERMINATED</State>
15
- </ExecutionStatusDetail>
16
- <Steps>
17
- <member>
18
- <StepConfig>
19
- <Name>Setup Hive</Name>
20
- </StepConfig>
21
- <ExecutionStatusDetail>
22
- <State>FAILED</State>
23
- <StartDateTime>
24
- 2011-10-04T21:49:16Z
25
- </StartDateTime>
26
- <EndDateTime>
27
- 2011-10-04T21:51:16Z
28
- </EndDateTime>
29
- </ExecutionStatusDetail>
30
- </member>
31
- <member>
32
- <StepConfig>
33
- <Name>Run Hive Script</Name>
34
- </StepConfig>
35
- <ExecutionStatusDetail>
36
- <State>PENDING</State>
37
- <StartDateTime>
38
- </StartDateTime>
39
- <EndDateTime>
40
- </EndDateTime>
41
- </ExecutionStatusDetail>
42
- </member>
43
- </Steps>
44
- </member>
45
- </JobFlows>
46
- </DescribeJobFlowsResult>
47
- </DescribeJobFlowsResponse>
48
- JOBFLOWS
49
- describe_jobflows_document = Nokogiri::XML(describe_jobflows_xml)
50
- describe_jobflows_document.remove_namespaces!
51
- @members_nodeset = describe_jobflows_document.xpath('/DescribeJobFlowsResponse/DescribeJobFlowsResult/JobFlows/member/Steps/member')
3
+ class FakeStep
4
+ include Elasticity::JobFlowStep
52
5
  end
53
6
 
54
- describe ".from_xml" do
55
- it "should return a JobFlowStep with the appropriate fields initialized" do
56
- jobflow_step = Elasticity::JobFlowStep.from_member_element(@members_nodeset[0])
57
- jobflow_step.name.should == "Setup Hive"
58
- jobflow_step.state.should == "FAILED"
59
- jobflow_step.started_at.should == Time.parse("2011-10-04T21:49:16Z")
60
- jobflow_step.ended_at.should == Time.parse("2011-10-04T21:51:16Z")
7
+ subject { FakeStep.new }
8
+
9
+ describe '#to_aws_step' do
10
+
11
+ it 'should take a job flow as context' do
12
+ subject.should respond_to(:to_aws_step).with(1).argument
13
+ end
14
+
15
+ it 'should raise an error by default' do
16
+ expect {
17
+ subject.to_aws_step(nil)
18
+ }.to raise_error(RuntimeError, '#to_aws_step is required to be defined on all job flow steps.')
61
19
  end
20
+
62
21
  end
63
22
 
64
- describe ".from_steps_nodeset" do
65
- it "should return JobFlowSteps with the appropriate fields initialized" do
66
- jobflow_steps = Elasticity::JobFlowStep.from_members_nodeset(@members_nodeset)
67
- jobflow_steps.map(&:name).should == ["Setup Hive", "Run Hive Script"]
68
- jobflow_steps.map(&:state).should == ["FAILED", "PENDING"]
69
- jobflow_steps.map(&:started_at).should == [Time.parse("2011-10-04T21:49:16Z"), nil]
70
- jobflow_steps.map(&:ended_at).should == [Time.parse("2011-10-04T21:51:16Z"), nil]
23
+ describe '.requires_installation?' do
24
+ it 'should be false by default' do
25
+ FakeStep.requires_installation?.should be_false
71
26
  end
72
27
  end
73
28
 
29
+ describe '.aws_installation_step' do
30
+ it 'should raise an error by default' do
31
+ expect {
32
+ FakeStep.aws_installation_step
33
+ }.to raise_error(RuntimeError, '.aws_installation_step is required to be defined when a step requires installation (e.g. Pig, Hive).')
34
+ end
35
+ end
74
36
 
75
- end
37
+ end
@@ -0,0 +1,104 @@
1
+ describe Elasticity::PigStep do
2
+
3
+ subject do
4
+ Elasticity::PigStep.new('script.pig')
5
+ end
6
+
7
+ it { should be_a Elasticity::JobFlowStep }
8
+
9
+ its(:name) { should == 'Elasticity Pig Step (script.pig)' }
10
+ its(:script) { should == 'script.pig' }
11
+ its(:variables) { should == { } }
12
+ its(:action_on_failure) { should == 'TERMINATE_JOB_FLOW' }
13
+
14
+ describe '#to_aws_step' do
15
+
16
+ it 'should convert to aws step format' do
17
+ step = subject.to_aws_step(Elasticity::JobFlow.new('access', 'secret'))
18
+ step[:name].should == 'Elasticity Pig Step (script.pig)'
19
+ step[:action_on_failure].should == 'TERMINATE_JOB_FLOW'
20
+ step[:hadoop_jar_step][:jar].should == 's3://elasticmapreduce/libs/script-runner/script-runner.jar'
21
+ step[:hadoop_jar_step][:args].should start_with([
22
+ 's3://elasticmapreduce/libs/pig/pig-script',
23
+ '--run-pig-script',
24
+ '--args',
25
+ '-p'
26
+ ])
27
+ step[:hadoop_jar_step][:args][4] =~ /^E_PARALLELS=\d+$/
28
+ end
29
+
30
+ describe 'E_PARALLELS' do
31
+ it 'should include the correct value of E_PARALLELS' do
32
+ job_flow = Elasticity::JobFlow.new('access', 'secret')
33
+ job_flow.instance_count = 8
34
+ {
35
+ '_' => 7,
36
+ 'm1.small' => 7,
37
+ 'm1.large' => 13,
38
+ 'c1.medium' => 13,
39
+ 'c1.xlarge' => 26
40
+ }.each do |instance_type, value|
41
+ job_flow.slave_instance_type = instance_type
42
+ step = subject.to_aws_step(job_flow)
43
+ step[:hadoop_jar_step][:args][4].should == "E_PARALLELS=#{value}"
44
+ end
45
+ end
46
+ end
47
+
48
+ context 'when variables are not provided' do
49
+ let(:ps_with_no_variables) { Elasticity::PigStep.new('script.pig') }
50
+
51
+ it 'should convert to aws step format' do
52
+ step = ps_with_no_variables.to_aws_step(Elasticity::JobFlow.new('access', 'secret'))
53
+ step[:hadoop_jar_step][:args][5].should == 'script.pig'
54
+ end
55
+ end
56
+
57
+ context 'when variables are provided' do
58
+ let(:ps_with_variables) do
59
+ Elasticity::PigStep.new('script.pig').tap do |ps|
60
+ ps.variables = {
61
+ 'VAR1' => 'VALUE1',
62
+ 'VAR2' => 'VALUE2'
63
+ }
64
+ end
65
+ end
66
+
67
+ it 'should convert to aws step format' do
68
+ step = ps_with_variables.to_aws_step(Elasticity::JobFlow.new('access', 'secret'))
69
+ step[:hadoop_jar_step][:args][3..9].should == [
70
+ '-p', 'VAR1=VALUE1',
71
+ '-p', 'VAR2=VALUE2',
72
+ '-p', 'E_PARALLELS=1',
73
+ 'script.pig'
74
+ ]
75
+ end
76
+ end
77
+
78
+ end
79
+
80
+ describe '.requires_installation?' do
81
+ it 'should require installation' do
82
+ Elasticity::PigStep.requires_installation?.should be_true
83
+ end
84
+ end
85
+
86
+ describe '.aws_installation_step' do
87
+ it 'should provide a means to install Pig' do
88
+ Elasticity::PigStep.aws_installation_step.should == {
89
+ :action_on_failure => 'TERMINATE_JOB_FLOW',
90
+ :hadoop_jar_step => {
91
+ :jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
92
+ :args => [
93
+ 's3://elasticmapreduce/libs/pig/pig-script',
94
+ '--base-path',
95
+ 's3://elasticmapreduce/libs/pig/',
96
+ '--install-pig'
97
+ ],
98
+ },
99
+ :name => 'Elasticity - Install Pig'
100
+ }
101
+ end
102
+ end
103
+
104
+ end
@@ -0,0 +1,35 @@
1
+ describe 'Conditional Raising' do
2
+
3
+ describe '#raise_if' do
4
+
5
+ it 'should raise the specified error if the condition is true' do
6
+ expect {
7
+ raise_if true, RuntimeError, 'MESSAGE'
8
+ }.to raise_error(RuntimeError, 'MESSAGE')
9
+ end
10
+
11
+ it 'should not raise the specified error if the condition is false' do
12
+ expect {
13
+ raise_if false, RuntimeError, 'MESSAGE'
14
+ }.to_not raise_error(RuntimeError, 'MESSAGE')
15
+ end
16
+
17
+ end
18
+
19
+ describe '#raise_unless' do
20
+
21
+ it 'should not raise the specified error unless the condition is true' do
22
+ expect {
23
+ raise_unless true, RuntimeError, 'MESSAGE'
24
+ }.to_not raise_error(RuntimeError, 'MESSAGE')
25
+ end
26
+
27
+ it 'should raise the specified error unless the condition is false' do
28
+ expect {
29
+ raise_unless false, RuntimeError, 'MESSAGE'
30
+ }.to raise_error(RuntimeError, 'MESSAGE')
31
+ end
32
+
33
+ end
34
+
35
+ end