elasticity 1.5 → 2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. data/.rspec +2 -1
  2. data/.rvmrc +1 -1
  3. data/HISTORY.md +47 -24
  4. data/LICENSE +1 -1
  5. data/README.md +165 -317
  6. data/Rakefile +4 -3
  7. data/elasticity.gemspec +3 -5
  8. data/lib/elasticity.rb +10 -5
  9. data/lib/elasticity/aws_request.rb +81 -20
  10. data/lib/elasticity/custom_jar_step.rb +33 -0
  11. data/lib/elasticity/emr.rb +45 -117
  12. data/lib/elasticity/hadoop_bootstrap_action.rb +27 -0
  13. data/lib/elasticity/hive_step.rb +57 -0
  14. data/lib/elasticity/job_flow.rb +109 -39
  15. data/lib/elasticity/job_flow_status.rb +53 -0
  16. data/lib/elasticity/job_flow_status_step.rb +35 -0
  17. data/lib/elasticity/job_flow_step.rb +17 -25
  18. data/lib/elasticity/pig_step.rb +82 -0
  19. data/lib/elasticity/support/conditional_raise.rb +23 -0
  20. data/lib/elasticity/version.rb +1 -1
  21. data/spec/lib/elasticity/aws_request_spec.rb +159 -51
  22. data/spec/lib/elasticity/custom_jar_step_spec.rb +59 -0
  23. data/spec/lib/elasticity/emr_spec.rb +231 -762
  24. data/spec/lib/elasticity/hadoop_bootstrap_action_spec.rb +26 -0
  25. data/spec/lib/elasticity/hive_step_spec.rb +74 -0
  26. data/spec/lib/elasticity/job_flow_integration_spec.rb +197 -0
  27. data/spec/lib/elasticity/job_flow_spec.rb +369 -138
  28. data/spec/lib/elasticity/job_flow_status_spec.rb +147 -0
  29. data/spec/lib/elasticity/job_flow_status_step_spec.rb +73 -0
  30. data/spec/lib/elasticity/job_flow_step_spec.rb +26 -64
  31. data/spec/lib/elasticity/pig_step_spec.rb +104 -0
  32. data/spec/lib/elasticity/support/conditional_raise_spec.rb +35 -0
  33. data/spec/spec_helper.rb +1 -50
  34. data/spec/support/be_a_hash_including_matcher.rb +35 -0
  35. metadata +101 -119
  36. data/.autotest +0 -2
  37. data/lib/elasticity/custom_jar_job.rb +0 -38
  38. data/lib/elasticity/hive_job.rb +0 -69
  39. data/lib/elasticity/pig_job.rb +0 -109
  40. data/lib/elasticity/simple_job.rb +0 -51
  41. data/spec/fixtures/vcr_cassettes/add_instance_groups/one_group_successful.yml +0 -44
  42. data/spec/fixtures/vcr_cassettes/add_instance_groups/one_group_unsuccessful.yml +0 -41
  43. data/spec/fixtures/vcr_cassettes/add_jobflow_steps/add_multiple_steps.yml +0 -266
  44. data/spec/fixtures/vcr_cassettes/custom_jar_job/cloudburst.yml +0 -41
  45. data/spec/fixtures/vcr_cassettes/describe_jobflows/all_jobflows.yml +0 -75
  46. data/spec/fixtures/vcr_cassettes/direct/terminate_jobflow.yml +0 -38
  47. data/spec/fixtures/vcr_cassettes/hive_job/hive_ads.yml +0 -41
  48. data/spec/fixtures/vcr_cassettes/modify_instance_groups/set_instances_to_3.yml +0 -38
  49. data/spec/fixtures/vcr_cassettes/pig_job/apache_log_reports.yml +0 -41
  50. data/spec/fixtures/vcr_cassettes/pig_job/apache_log_reports_with_bootstrap.yml +0 -41
  51. data/spec/fixtures/vcr_cassettes/run_jobflow/word_count.yml +0 -41
  52. data/spec/fixtures/vcr_cassettes/set_termination_protection/nonexistent_job_flows.yml +0 -41
  53. data/spec/fixtures/vcr_cassettes/set_termination_protection/protect_multiple_job_flows.yml +0 -38
  54. data/spec/fixtures/vcr_cassettes/terminate_jobflows/one_jobflow.yml +0 -38
  55. data/spec/lib/elasticity/custom_jar_job_spec.rb +0 -118
  56. data/spec/lib/elasticity/hive_job_spec.rb +0 -90
  57. data/spec/lib/elasticity/pig_job_spec.rb +0 -226
@@ -0,0 +1,147 @@
1
+ describe Elasticity::JobFlowStatus do
2
+
3
+ before do
4
+ describe_jobflows_xml = <<-JOBFLOWS
5
+ <DescribeJobFlowsResponse xmlns="http://elasticmapreduce.amazonaws.com/doc/2009-03-31">
6
+ <DescribeJobFlowsResult>
7
+ <JobFlows>
8
+ <member>
9
+ <JobFlowId>j-p</JobFlowId>
10
+ <Name>Pig Job</Name>
11
+ <ExecutionStatusDetail>
12
+ <CreationDateTime>
13
+ 2011-10-04T21:49:16Z
14
+ </CreationDateTime>
15
+ <LastStateChangeReason>
16
+ Steps completed with errors
17
+ </LastStateChangeReason>
18
+ <StartDateTime>
19
+ 2011-10-04T21:49:17Z
20
+ </StartDateTime>
21
+ <ReadyDateTime>
22
+ 2011-10-04T21:49:18Z
23
+ </ReadyDateTime>
24
+ <State>TERMINATED</State>
25
+ </ExecutionStatusDetail>
26
+ <Steps>
27
+ <member>
28
+ <StepConfig>
29
+ <Name>Setup Hive</Name>
30
+ </StepConfig>
31
+ <ExecutionStatusDetail>
32
+ <State>FAILED</State>
33
+ </ExecutionStatusDetail>
34
+ </member>
35
+ <member>
36
+ <StepConfig>
37
+ <Name>Run Hive Script</Name>
38
+ </StepConfig>
39
+ <ExecutionStatusDetail>
40
+ <State>PENDING</State>
41
+ </ExecutionStatusDetail>
42
+ </member>
43
+ </Steps>
44
+ <Instances>
45
+ <Placement>
46
+ <AvailabilityZone>
47
+ eu-west-1a
48
+ </AvailabilityZone>
49
+ </Placement>
50
+ <SlaveInstanceType>
51
+ m1.small
52
+ </SlaveInstanceType>
53
+ <MasterInstanceType>
54
+ m1.small
55
+ </MasterInstanceType>
56
+ <Ec2KeyName>
57
+ myec2keyname
58
+ </Ec2KeyName>
59
+ <InstanceCount>
60
+ 4
61
+ </InstanceCount>
62
+ </Instances>
63
+ </member>
64
+ <member>
65
+ <JobFlowId>j-h</JobFlowId>
66
+ <Name>Hive Job</Name>
67
+ <ExecutionStatusDetail>
68
+ <CreationDateTime>
69
+ 2011-10-04T22:49:16Z
70
+ </CreationDateTime>
71
+ <StartDateTime>
72
+
73
+ </StartDateTime>
74
+ <ReadyDateTime>
75
+
76
+ </ReadyDateTime>
77
+ <State>
78
+ TERMINATED
79
+ </State>
80
+ <LastStateChangeReason>
81
+ Steps completed
82
+ </LastStateChangeReason>
83
+ </ExecutionStatusDetail>
84
+ <Instances>
85
+ <Placement>
86
+ <AvailabilityZone>
87
+ eu-west-1b
88
+ </AvailabilityZone>
89
+ </Placement>
90
+ <SlaveInstanceType>
91
+ c1.medium
92
+ </SlaveInstanceType>
93
+ <MasterInstanceType>
94
+ c1.medium
95
+ </MasterInstanceType>
96
+ <Ec2KeyName>
97
+ myec2keyname
98
+ </Ec2KeyName>
99
+ <InstanceCount>
100
+ 2
101
+ </InstanceCount>
102
+ </Instances>
103
+ </member>
104
+ </JobFlows>
105
+ </DescribeJobFlowsResult>
106
+ </DescribeJobFlowsResponse>
107
+ JOBFLOWS
108
+ describe_jobflows_document = Nokogiri::XML(describe_jobflows_xml)
109
+ describe_jobflows_document.remove_namespaces!
110
+ @members_nodeset = describe_jobflows_document.xpath('/DescribeJobFlowsResponse/DescribeJobFlowsResult/JobFlows/member')
111
+ end
112
+
113
+ describe ".from_xml" do
114
+ it "should return a JobFlow with the appropriate fields initialized" do
115
+ jobflow = Elasticity::JobFlowStatus.from_member_element(@members_nodeset[0])
116
+ jobflow.name.should == "Pig Job"
117
+ jobflow.jobflow_id.should == "j-p"
118
+ jobflow.state.should == "TERMINATED"
119
+ jobflow.steps.map(&:name).should == ["Setup Hive", "Run Hive Script"]
120
+ jobflow.steps.map(&:state).should == ["FAILED", "PENDING"]
121
+ jobflow.created_at.should == Time.parse("2011-10-04T21:49:16Z")
122
+ jobflow.started_at.should == Time.parse("2011-10-04T21:49:17Z")
123
+ jobflow.ready_at.should == Time.parse("2011-10-04T21:49:18Z")
124
+ jobflow.master_instance_type.should == "m1.small"
125
+ jobflow.slave_instance_type.should == "m1.small"
126
+ jobflow.instance_count.should == "4"
127
+ jobflow.last_state_change_reason.should == "Steps completed with errors"
128
+ end
129
+ end
130
+
131
+ describe ".from_jobflows_nodeset" do
132
+ it "should return JobFlows with the appropriate fields initialized" do
133
+ jobflow = Elasticity::JobFlowStatus.from_members_nodeset(@members_nodeset)
134
+ jobflow.map(&:name).should == ["Pig Job", "Hive Job"]
135
+ jobflow.map(&:jobflow_id).should == ["j-p", "j-h"]
136
+ jobflow.map(&:state).should == ["TERMINATED", "TERMINATED"]
137
+ jobflow.map(&:created_at).should == [Time.parse("2011-10-04T21:49:16Z"), Time.parse("2011-10-04T22:49:16Z")]
138
+ jobflow.map(&:started_at).should == [Time.parse("2011-10-04T21:49:17Z"), nil]
139
+ jobflow.map(&:ready_at).should == [Time.parse("2011-10-04T21:49:18Z"), nil]
140
+ jobflow.map(&:master_instance_type).should == ["m1.small","c1.medium"]
141
+ jobflow.map(&:slave_instance_type).should == ["m1.small", "c1.medium"]
142
+ jobflow.map(&:instance_count).should == ["4","2"]
143
+ jobflow.map(&:last_state_change_reason).should == ["Steps completed with errors", "Steps completed"]
144
+ end
145
+ end
146
+
147
+ end
@@ -0,0 +1,73 @@
1
+ describe Elasticity::JobFlowStatusStep do
2
+
3
+ before do
4
+ describe_jobflows_xml = <<-JOBFLOWS
5
+ <DescribeJobFlowsResponse xmlns="http://elasticmapreduce.amazonaws.com/doc/2009-03-31">
6
+ <DescribeJobFlowsResult>
7
+ <JobFlows>
8
+ <member>
9
+ <JobFlowId>j-p</JobFlowId>
10
+ <Name>Pig Job</Name>
11
+ <ExecutionStatusDetail>
12
+ <State>TERMINATED</State>
13
+ </ExecutionStatusDetail>
14
+ <Steps>
15
+ <member>
16
+ <StepConfig>
17
+ <Name>Setup Hive</Name>
18
+ </StepConfig>
19
+ <ExecutionStatusDetail>
20
+ <State>FAILED</State>
21
+ <StartDateTime>
22
+ 2011-10-04T21:49:16Z
23
+ </StartDateTime>
24
+ <EndDateTime>
25
+ 2011-10-04T21:51:16Z
26
+ </EndDateTime>
27
+ </ExecutionStatusDetail>
28
+ </member>
29
+ <member>
30
+ <StepConfig>
31
+ <Name>Run Hive Script</Name>
32
+ </StepConfig>
33
+ <ExecutionStatusDetail>
34
+ <State>PENDING</State>
35
+ <StartDateTime>
36
+ </StartDateTime>
37
+ <EndDateTime>
38
+ </EndDateTime>
39
+ </ExecutionStatusDetail>
40
+ </member>
41
+ </Steps>
42
+ </member>
43
+ </JobFlows>
44
+ </DescribeJobFlowsResult>
45
+ </DescribeJobFlowsResponse>
46
+ JOBFLOWS
47
+ describe_jobflows_document = Nokogiri::XML(describe_jobflows_xml)
48
+ describe_jobflows_document.remove_namespaces!
49
+ @members_nodeset = describe_jobflows_document.xpath('/DescribeJobFlowsResponse/DescribeJobFlowsResult/JobFlows/member/Steps/member')
50
+ end
51
+
52
+ describe ".from_xml" do
53
+ it "should return a JobFlowStep with the appropriate fields initialized" do
54
+ jobflow_step = Elasticity::JobFlowStatusStep.from_member_element(@members_nodeset[0])
55
+ jobflow_step.name.should == "Setup Hive"
56
+ jobflow_step.state.should == "FAILED"
57
+ jobflow_step.started_at.should == Time.parse("2011-10-04T21:49:16Z")
58
+ jobflow_step.ended_at.should == Time.parse("2011-10-04T21:51:16Z")
59
+ end
60
+ end
61
+
62
+ describe ".from_steps_nodeset" do
63
+ it "should return JobFlowSteps with the appropriate fields initialized" do
64
+ jobflow_steps = Elasticity::JobFlowStatusStep.from_members_nodeset(@members_nodeset)
65
+ jobflow_steps.map(&:name).should == ["Setup Hive", "Run Hive Script"]
66
+ jobflow_steps.map(&:state).should == ["FAILED", "PENDING"]
67
+ jobflow_steps.map(&:started_at).should == [Time.parse("2011-10-04T21:49:16Z"), nil]
68
+ jobflow_steps.map(&:ended_at).should == [Time.parse("2011-10-04T21:51:16Z"), nil]
69
+ end
70
+ end
71
+
72
+
73
+ end
@@ -1,75 +1,37 @@
1
- require 'spec_helper'
2
-
3
1
  describe Elasticity::JobFlowStep do
4
2
 
5
- before do
6
- describe_jobflows_xml = <<-JOBFLOWS
7
- <DescribeJobFlowsResponse xmlns="http://elasticmapreduce.amazonaws.com/doc/2009-03-31">
8
- <DescribeJobFlowsResult>
9
- <JobFlows>
10
- <member>
11
- <JobFlowId>j-p</JobFlowId>
12
- <Name>Pig Job</Name>
13
- <ExecutionStatusDetail>
14
- <State>TERMINATED</State>
15
- </ExecutionStatusDetail>
16
- <Steps>
17
- <member>
18
- <StepConfig>
19
- <Name>Setup Hive</Name>
20
- </StepConfig>
21
- <ExecutionStatusDetail>
22
- <State>FAILED</State>
23
- <StartDateTime>
24
- 2011-10-04T21:49:16Z
25
- </StartDateTime>
26
- <EndDateTime>
27
- 2011-10-04T21:51:16Z
28
- </EndDateTime>
29
- </ExecutionStatusDetail>
30
- </member>
31
- <member>
32
- <StepConfig>
33
- <Name>Run Hive Script</Name>
34
- </StepConfig>
35
- <ExecutionStatusDetail>
36
- <State>PENDING</State>
37
- <StartDateTime>
38
- </StartDateTime>
39
- <EndDateTime>
40
- </EndDateTime>
41
- </ExecutionStatusDetail>
42
- </member>
43
- </Steps>
44
- </member>
45
- </JobFlows>
46
- </DescribeJobFlowsResult>
47
- </DescribeJobFlowsResponse>
48
- JOBFLOWS
49
- describe_jobflows_document = Nokogiri::XML(describe_jobflows_xml)
50
- describe_jobflows_document.remove_namespaces!
51
- @members_nodeset = describe_jobflows_document.xpath('/DescribeJobFlowsResponse/DescribeJobFlowsResult/JobFlows/member/Steps/member')
3
+ class FakeStep
4
+ include Elasticity::JobFlowStep
52
5
  end
53
6
 
54
- describe ".from_xml" do
55
- it "should return a JobFlowStep with the appropriate fields initialized" do
56
- jobflow_step = Elasticity::JobFlowStep.from_member_element(@members_nodeset[0])
57
- jobflow_step.name.should == "Setup Hive"
58
- jobflow_step.state.should == "FAILED"
59
- jobflow_step.started_at.should == Time.parse("2011-10-04T21:49:16Z")
60
- jobflow_step.ended_at.should == Time.parse("2011-10-04T21:51:16Z")
7
+ subject { FakeStep.new }
8
+
9
+ describe '#to_aws_step' do
10
+
11
+ it 'should take a job flow as context' do
12
+ subject.should respond_to(:to_aws_step).with(1).argument
13
+ end
14
+
15
+ it 'should raise an error by default' do
16
+ expect {
17
+ subject.to_aws_step(nil)
18
+ }.to raise_error(RuntimeError, '#to_aws_step is required to be defined on all job flow steps.')
61
19
  end
20
+
62
21
  end
63
22
 
64
- describe ".from_steps_nodeset" do
65
- it "should return JobFlowSteps with the appropriate fields initialized" do
66
- jobflow_steps = Elasticity::JobFlowStep.from_members_nodeset(@members_nodeset)
67
- jobflow_steps.map(&:name).should == ["Setup Hive", "Run Hive Script"]
68
- jobflow_steps.map(&:state).should == ["FAILED", "PENDING"]
69
- jobflow_steps.map(&:started_at).should == [Time.parse("2011-10-04T21:49:16Z"), nil]
70
- jobflow_steps.map(&:ended_at).should == [Time.parse("2011-10-04T21:51:16Z"), nil]
23
+ describe '.requires_installation?' do
24
+ it 'should be false by default' do
25
+ FakeStep.requires_installation?.should be_false
71
26
  end
72
27
  end
73
28
 
29
+ describe '.aws_installation_step' do
30
+ it 'should raise an error by default' do
31
+ expect {
32
+ FakeStep.aws_installation_step
33
+ }.to raise_error(RuntimeError, '.aws_installation_step is required to be defined when a step requires installation (e.g. Pig, Hive).')
34
+ end
35
+ end
74
36
 
75
- end
37
+ end
@@ -0,0 +1,104 @@
1
+ describe Elasticity::PigStep do
2
+
3
+ subject do
4
+ Elasticity::PigStep.new('script.pig')
5
+ end
6
+
7
+ it { should be_a Elasticity::JobFlowStep }
8
+
9
+ its(:name) { should == 'Elasticity Pig Step (script.pig)' }
10
+ its(:script) { should == 'script.pig' }
11
+ its(:variables) { should == { } }
12
+ its(:action_on_failure) { should == 'TERMINATE_JOB_FLOW' }
13
+
14
+ describe '#to_aws_step' do
15
+
16
+ it 'should convert to aws step format' do
17
+ step = subject.to_aws_step(Elasticity::JobFlow.new('access', 'secret'))
18
+ step[:name].should == 'Elasticity Pig Step (script.pig)'
19
+ step[:action_on_failure].should == 'TERMINATE_JOB_FLOW'
20
+ step[:hadoop_jar_step][:jar].should == 's3://elasticmapreduce/libs/script-runner/script-runner.jar'
21
+ step[:hadoop_jar_step][:args].should start_with([
22
+ 's3://elasticmapreduce/libs/pig/pig-script',
23
+ '--run-pig-script',
24
+ '--args',
25
+ '-p'
26
+ ])
27
+ step[:hadoop_jar_step][:args][4] =~ /^E_PARALLELS=\d+$/
28
+ end
29
+
30
+ describe 'E_PARALLELS' do
31
+ it 'should include the correct value of E_PARALLELS' do
32
+ job_flow = Elasticity::JobFlow.new('access', 'secret')
33
+ job_flow.instance_count = 8
34
+ {
35
+ '_' => 7,
36
+ 'm1.small' => 7,
37
+ 'm1.large' => 13,
38
+ 'c1.medium' => 13,
39
+ 'c1.xlarge' => 26
40
+ }.each do |instance_type, value|
41
+ job_flow.slave_instance_type = instance_type
42
+ step = subject.to_aws_step(job_flow)
43
+ step[:hadoop_jar_step][:args][4].should == "E_PARALLELS=#{value}"
44
+ end
45
+ end
46
+ end
47
+
48
+ context 'when variables are not provided' do
49
+ let(:ps_with_no_variables) { Elasticity::PigStep.new('script.pig') }
50
+
51
+ it 'should convert to aws step format' do
52
+ step = ps_with_no_variables.to_aws_step(Elasticity::JobFlow.new('access', 'secret'))
53
+ step[:hadoop_jar_step][:args][5].should == 'script.pig'
54
+ end
55
+ end
56
+
57
+ context 'when variables are provided' do
58
+ let(:ps_with_variables) do
59
+ Elasticity::PigStep.new('script.pig').tap do |ps|
60
+ ps.variables = {
61
+ 'VAR1' => 'VALUE1',
62
+ 'VAR2' => 'VALUE2'
63
+ }
64
+ end
65
+ end
66
+
67
+ it 'should convert to aws step format' do
68
+ step = ps_with_variables.to_aws_step(Elasticity::JobFlow.new('access', 'secret'))
69
+ step[:hadoop_jar_step][:args][3..9].should == [
70
+ '-p', 'VAR1=VALUE1',
71
+ '-p', 'VAR2=VALUE2',
72
+ '-p', 'E_PARALLELS=1',
73
+ 'script.pig'
74
+ ]
75
+ end
76
+ end
77
+
78
+ end
79
+
80
+ describe '.requires_installation?' do
81
+ it 'should require installation' do
82
+ Elasticity::PigStep.requires_installation?.should be_true
83
+ end
84
+ end
85
+
86
+ describe '.aws_installation_step' do
87
+ it 'should provide a means to install Pig' do
88
+ Elasticity::PigStep.aws_installation_step.should == {
89
+ :action_on_failure => 'TERMINATE_JOB_FLOW',
90
+ :hadoop_jar_step => {
91
+ :jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
92
+ :args => [
93
+ 's3://elasticmapreduce/libs/pig/pig-script',
94
+ '--base-path',
95
+ 's3://elasticmapreduce/libs/pig/',
96
+ '--install-pig'
97
+ ],
98
+ },
99
+ :name => 'Elasticity - Install Pig'
100
+ }
101
+ end
102
+ end
103
+
104
+ end
@@ -0,0 +1,35 @@
1
+ describe 'Conditional Raising' do
2
+
3
+ describe '#raise_if' do
4
+
5
+ it 'should raise the specified error if the condition is true' do
6
+ expect {
7
+ raise_if true, RuntimeError, 'MESSAGE'
8
+ }.to raise_error(RuntimeError, 'MESSAGE')
9
+ end
10
+
11
+ it 'should not raise the specified error if the condition is false' do
12
+ expect {
13
+ raise_if false, RuntimeError, 'MESSAGE'
14
+ }.to_not raise_error(RuntimeError, 'MESSAGE')
15
+ end
16
+
17
+ end
18
+
19
+ describe '#raise_unless' do
20
+
21
+ it 'should not raise the specified error unless the condition is true' do
22
+ expect {
23
+ raise_unless true, RuntimeError, 'MESSAGE'
24
+ }.to_not raise_error(RuntimeError, 'MESSAGE')
25
+ end
26
+
27
+ it 'should raise the specified error unless the condition is false' do
28
+ expect {
29
+ raise_unless false, RuntimeError, 'MESSAGE'
30
+ }.to raise_error(RuntimeError, 'MESSAGE')
31
+ end
32
+
33
+ end
34
+
35
+ end