wakoopa-elasticity 1.2.3
Sign up to get free protection for your applications and to get access to all the features.
- data/.autotest +2 -0
- data/.gitignore +5 -0
- data/.rspec +2 -0
- data/.rvmrc +1 -0
- data/Gemfile +4 -0
- data/HISTORY.mediawiki +30 -0
- data/LICENSE +202 -0
- data/README.mediawiki +332 -0
- data/Rakefile +11 -0
- data/elasticity.gemspec +29 -0
- data/lib/elasticity.rb +16 -0
- data/lib/elasticity/aws_request.rb +52 -0
- data/lib/elasticity/emr.rb +282 -0
- data/lib/elasticity/hive_job.rb +71 -0
- data/lib/elasticity/job_flow.rb +53 -0
- data/lib/elasticity/job_flow_step.rb +36 -0
- data/lib/elasticity/pig_job.rb +112 -0
- data/lib/elasticity/simple_job.rb +50 -0
- data/lib/elasticity/version.rb +3 -0
- data/spec/fixtures/vcr_cassettes/add_instance_groups/one_group_successful.yml +38 -0
- data/spec/fixtures/vcr_cassettes/add_instance_groups/one_group_unsuccessful.yml +35 -0
- data/spec/fixtures/vcr_cassettes/add_jobflow_steps/add_multiple_steps.yml +252 -0
- data/spec/fixtures/vcr_cassettes/describe_jobflows/all_jobflows.yml +69 -0
- data/spec/fixtures/vcr_cassettes/direct/terminate_jobflow.yml +32 -0
- data/spec/fixtures/vcr_cassettes/hive_job/hive_ads.yml +35 -0
- data/spec/fixtures/vcr_cassettes/modify_instance_groups/set_instances_to_3.yml +32 -0
- data/spec/fixtures/vcr_cassettes/pig_job/apache_log_reports.yml +35 -0
- data/spec/fixtures/vcr_cassettes/pig_job/apache_log_reports_with_bootstrap.yml +35 -0
- data/spec/fixtures/vcr_cassettes/run_jobflow/word_count.yml +35 -0
- data/spec/fixtures/vcr_cassettes/set_termination_protection/nonexistent_job_flows.yml +35 -0
- data/spec/fixtures/vcr_cassettes/set_termination_protection/protect_multiple_job_flows.yml +32 -0
- data/spec/fixtures/vcr_cassettes/terminate_jobflows/one_jobflow.yml +32 -0
- data/spec/lib/elasticity/aws_request_spec.rb +62 -0
- data/spec/lib/elasticity/emr_spec.rb +794 -0
- data/spec/lib/elasticity/hive_job_spec.rb +96 -0
- data/spec/lib/elasticity/job_flow_spec.rb +139 -0
- data/spec/lib/elasticity/job_flow_step_spec.rb +76 -0
- data/spec/lib/elasticity/pig_job_spec.rb +211 -0
- data/spec/spec_helper.rb +43 -0
- metadata +253 -0
@@ -0,0 +1,96 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Elasticity::HiveJob do
|
4
|
+
|
5
|
+
describe ".new" do
|
6
|
+
|
7
|
+
it "should have good defaults" do
|
8
|
+
hive = Elasticity::HiveJob.new("access", "secret")
|
9
|
+
hive.aws_access_key_id.should == "access"
|
10
|
+
hive.aws_secret_access_key.should == "secret"
|
11
|
+
hive.ec2_key_name.should == "default"
|
12
|
+
hive.hadoop_version.should == "0.20"
|
13
|
+
hive.instance_count.should == 2
|
14
|
+
hive.master_instance_type.should == "m1.small"
|
15
|
+
hive.name.should == "Elasticity Hive Job"
|
16
|
+
hive.slave_instance_type.should == "m1.small"
|
17
|
+
hive.action_on_failure.should == "TERMINATE_JOB_FLOW"
|
18
|
+
hive.log_uri.should == nil
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
describe "#run" do
|
24
|
+
|
25
|
+
it "should run the script with the specified variables and return the jobflow_id" do
|
26
|
+
aws = Elasticity::EMR.new("", "")
|
27
|
+
aws.should_receive(:run_job_flow).with({
|
28
|
+
:name => "Elasticity Hive Job",
|
29
|
+
:log_uri => "s3n://slif-test/output/logs",
|
30
|
+
:instances => {
|
31
|
+
:ec2_key_name => "default",
|
32
|
+
:hadoop_version => "0.20",
|
33
|
+
:instance_count => 2,
|
34
|
+
:master_instance_type => "m1.small",
|
35
|
+
:slave_instance_type => "m1.small",
|
36
|
+
},
|
37
|
+
:steps => [
|
38
|
+
{
|
39
|
+
:action_on_failure => "TERMINATE_JOB_FLOW",
|
40
|
+
:hadoop_jar_step => {
|
41
|
+
:jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
|
42
|
+
:args => [
|
43
|
+
"s3://elasticmapreduce/libs/hive/hive-script",
|
44
|
+
"--base-path",
|
45
|
+
"s3://elasticmapreduce/libs/hive/",
|
46
|
+
"--install-hive"
|
47
|
+
],
|
48
|
+
},
|
49
|
+
:name => "Setup Hive"
|
50
|
+
},
|
51
|
+
{
|
52
|
+
:action_on_failure => "CONTINUE",
|
53
|
+
:hadoop_jar_step => {
|
54
|
+
:jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
|
55
|
+
:args => [
|
56
|
+
"s3://elasticmapreduce/libs/hive/hive-script",
|
57
|
+
"--run-hive-script",
|
58
|
+
"--args",
|
59
|
+
"-f", "s3n://slif-hive/test.q",
|
60
|
+
"-d", "XREFS=s3n://slif-test/xrefs",
|
61
|
+
"-d", "OUTPUT=s3n://slif-test/output"
|
62
|
+
],
|
63
|
+
},
|
64
|
+
:name => "Run Hive Script"
|
65
|
+
}
|
66
|
+
]
|
67
|
+
}).and_return("new_jobflow_id")
|
68
|
+
Elasticity::EMR.should_receive(:new).with("access", "secret").and_return(aws)
|
69
|
+
|
70
|
+
hive = Elasticity::HiveJob.new("access", "secret")
|
71
|
+
hive.log_uri = "s3n://slif-test/output/logs"
|
72
|
+
hive.action_on_failure = "CONTINUE"
|
73
|
+
jobflow_id = hive.run('s3n://slif-hive/test.q', {
|
74
|
+
'OUTPUT' => 's3n://slif-test/output',
|
75
|
+
'XREFS' => 's3n://slif-test/xrefs'
|
76
|
+
})
|
77
|
+
jobflow_id.should == "new_jobflow_id"
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
|
82
|
+
describe "integration happy path" do
|
83
|
+
use_vcr_cassette "hive_job/hive_ads", :record => :none
|
84
|
+
it "should kick off the sample Amazion EMR Hive application" do
|
85
|
+
hive = Elasticity::HiveJob.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
|
86
|
+
hive.ec2_key_name = "sharethrough_dev"
|
87
|
+
jobflow_id = hive.run("s3n://elasticmapreduce/samples/hive-ads/libs/model-build.q", {
|
88
|
+
"LIBS" => "s3n://elasticmapreduce/samples/hive-ads/libs",
|
89
|
+
"INPUT" => "s3n://elasticmapreduce/samples/hive-ads/tables",
|
90
|
+
"OUTPUT" => "s3n://slif-elasticity/hive-ads/output/2011-04-19"
|
91
|
+
})
|
92
|
+
jobflow_id.should == "j-1UUVYMHBLKEGN"
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
end
|
@@ -0,0 +1,139 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Elasticity::JobFlow do
|
4
|
+
|
5
|
+
before do
|
6
|
+
describe_jobflows_xml = <<-JOBFLOWS
|
7
|
+
<DescribeJobFlowsResponse xmlns="http://elasticmapreduce.amazonaws.com/doc/2009-03-31">
|
8
|
+
<DescribeJobFlowsResult>
|
9
|
+
<JobFlows>
|
10
|
+
<member>
|
11
|
+
<JobFlowId>j-p</JobFlowId>
|
12
|
+
<Name>Pig Job</Name>
|
13
|
+
<ExecutionStatusDetail>
|
14
|
+
<CreationDateTime>
|
15
|
+
2011-10-04T21:49:16Z
|
16
|
+
</CreationDateTime>
|
17
|
+
<StartDateTime>
|
18
|
+
2011-10-04T21:49:17Z
|
19
|
+
</StartDateTime>
|
20
|
+
<ReadyDateTime>
|
21
|
+
2011-10-04T21:49:18Z
|
22
|
+
</ReadyDateTime>
|
23
|
+
<State>TERMINATED</State>
|
24
|
+
</ExecutionStatusDetail>
|
25
|
+
<Steps>
|
26
|
+
<member>
|
27
|
+
<StepConfig>
|
28
|
+
<Name>Setup Hive</Name>
|
29
|
+
</StepConfig>
|
30
|
+
<ExecutionStatusDetail>
|
31
|
+
<State>FAILED</State>
|
32
|
+
</ExecutionStatusDetail>
|
33
|
+
</member>
|
34
|
+
<member>
|
35
|
+
<StepConfig>
|
36
|
+
<Name>Run Hive Script</Name>
|
37
|
+
</StepConfig>
|
38
|
+
<ExecutionStatusDetail>
|
39
|
+
<State>PENDING</State>
|
40
|
+
</ExecutionStatusDetail>
|
41
|
+
</member>
|
42
|
+
</Steps>
|
43
|
+
<Instances>
|
44
|
+
<Placement>
|
45
|
+
<AvailabilityZone>
|
46
|
+
eu-west-1a
|
47
|
+
</AvailabilityZone>
|
48
|
+
</Placement>
|
49
|
+
<SlaveInstanceType>
|
50
|
+
m1.small
|
51
|
+
</SlaveInstanceType>
|
52
|
+
<MasterInstanceType>
|
53
|
+
m1.small
|
54
|
+
</MasterInstanceType>
|
55
|
+
<Ec2KeyName>
|
56
|
+
myec2keyname
|
57
|
+
</Ec2KeyName>
|
58
|
+
<InstanceCount>
|
59
|
+
4
|
60
|
+
</InstanceCount>
|
61
|
+
</Instances>
|
62
|
+
</member>
|
63
|
+
<member>
|
64
|
+
<JobFlowId>j-h</JobFlowId>
|
65
|
+
<Name>Hive Job</Name>
|
66
|
+
<ExecutionStatusDetail>
|
67
|
+
<CreationDateTime>
|
68
|
+
2011-10-04T22:49:16Z
|
69
|
+
</CreationDateTime>
|
70
|
+
<StartDateTime>
|
71
|
+
2011-10-04T22:49:17Z
|
72
|
+
</StartDateTime>
|
73
|
+
<ReadyDateTime>
|
74
|
+
2011-10-04T22:49:18Z
|
75
|
+
</ReadyDateTime>
|
76
|
+
<State>TERMINATED</State>
|
77
|
+
</ExecutionStatusDetail>
|
78
|
+
<Instances>
|
79
|
+
<Placement>
|
80
|
+
<AvailabilityZone>
|
81
|
+
eu-west-1b
|
82
|
+
</AvailabilityZone>
|
83
|
+
</Placement>
|
84
|
+
<SlaveInstanceType>
|
85
|
+
c1.medium
|
86
|
+
</SlaveInstanceType>
|
87
|
+
<MasterInstanceType>
|
88
|
+
c1.medium
|
89
|
+
</MasterInstanceType>
|
90
|
+
<Ec2KeyName>
|
91
|
+
myec2keyname
|
92
|
+
</Ec2KeyName>
|
93
|
+
<InstanceCount>
|
94
|
+
2
|
95
|
+
</InstanceCount>
|
96
|
+
</Instances>
|
97
|
+
</member>
|
98
|
+
</JobFlows>
|
99
|
+
</DescribeJobFlowsResult>
|
100
|
+
</DescribeJobFlowsResponse>
|
101
|
+
JOBFLOWS
|
102
|
+
describe_jobflows_document = Nokogiri::XML(describe_jobflows_xml)
|
103
|
+
describe_jobflows_document.remove_namespaces!
|
104
|
+
@members_nodeset = describe_jobflows_document.xpath('/DescribeJobFlowsResponse/DescribeJobFlowsResult/JobFlows/member')
|
105
|
+
end
|
106
|
+
|
107
|
+
describe ".from_xml" do
|
108
|
+
it "should return a JobFlow with the appropriate fields initialized" do
|
109
|
+
jobflow = Elasticity::JobFlow.from_member_element(@members_nodeset[0])
|
110
|
+
jobflow.name.should == "Pig Job"
|
111
|
+
jobflow.jobflow_id.should == "j-p"
|
112
|
+
jobflow.state.should == "TERMINATED"
|
113
|
+
jobflow.steps.map(&:name).should == ["Setup Hive", "Run Hive Script"]
|
114
|
+
jobflow.steps.map(&:state).should == ["FAILED", "PENDING"]
|
115
|
+
jobflow.created_at.should == "2011-10-04T21:49:16Z"
|
116
|
+
jobflow.started_at.should == "2011-10-04T21:49:17Z"
|
117
|
+
jobflow.ready_at.should == "2011-10-04T21:49:18Z"
|
118
|
+
jobflow.master_instance_type.should == "m1.small"
|
119
|
+
jobflow.slave_instance_type.should == "m1.small"
|
120
|
+
jobflow.instance_count.should == "4"
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
describe ".from_jobflows_nodeset" do
|
125
|
+
it "should return JobFlows with the appropriate fields initialized" do
|
126
|
+
jobflow = Elasticity::JobFlow.from_members_nodeset(@members_nodeset)
|
127
|
+
jobflow.map(&:name).should == ["Pig Job", "Hive Job"]
|
128
|
+
jobflow.map(&:jobflow_id).should == ["j-p", "j-h"]
|
129
|
+
jobflow.map(&:state).should == ["TERMINATED", "TERMINATED"]
|
130
|
+
jobflow.map(&:created_at).should == ["2011-10-04T21:49:16Z","2011-10-04T22:49:16Z"]
|
131
|
+
jobflow.map(&:started_at).should == ["2011-10-04T21:49:17Z","2011-10-04T22:49:17Z"]
|
132
|
+
jobflow.map(&:ready_at).should == ["2011-10-04T21:49:18Z","2011-10-04T22:49:18Z"]
|
133
|
+
jobflow.map(&:master_instance_type).should == ["m1.small","c1.medium"]
|
134
|
+
jobflow.map(&:slave_instance_type).should == ["m1.small", "c1.medium"]
|
135
|
+
jobflow.map(&:instance_count).should == ["4","2"]
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Elasticity::JobFlowStep do
|
4
|
+
|
5
|
+
before do
|
6
|
+
describe_jobflows_xml = <<-JOBFLOWS
|
7
|
+
<DescribeJobFlowsResponse xmlns="http://elasticmapreduce.amazonaws.com/doc/2009-03-31">
|
8
|
+
<DescribeJobFlowsResult>
|
9
|
+
<JobFlows>
|
10
|
+
<member>
|
11
|
+
<JobFlowId>j-p</JobFlowId>
|
12
|
+
<Name>Pig Job</Name>
|
13
|
+
<ExecutionStatusDetail>
|
14
|
+
<State>TERMINATED</State>
|
15
|
+
</ExecutionStatusDetail>
|
16
|
+
<Steps>
|
17
|
+
<member>
|
18
|
+
<StepConfig>
|
19
|
+
<Name>Setup Hive</Name>
|
20
|
+
</StepConfig>
|
21
|
+
<ExecutionStatusDetail>
|
22
|
+
<State>FAILED</State>
|
23
|
+
<StartDateTime>
|
24
|
+
2011-10-04T21:49:16Z
|
25
|
+
</StartDateTime>
|
26
|
+
<EndDateTime>
|
27
|
+
2011-10-04T21:51:16Z
|
28
|
+
</EndDateTime>
|
29
|
+
</ExecutionStatusDetail>
|
30
|
+
</member>
|
31
|
+
<member>
|
32
|
+
<StepConfig>
|
33
|
+
<Name>Run Hive Script</Name>
|
34
|
+
</StepConfig>
|
35
|
+
<ExecutionStatusDetail>
|
36
|
+
<State>PENDING</State>
|
37
|
+
<StartDateTime>
|
38
|
+
2011-10-04T21:51:18Z
|
39
|
+
</StartDateTime>
|
40
|
+
<EndDateTime>
|
41
|
+
</EndDateTime>
|
42
|
+
</ExecutionStatusDetail>
|
43
|
+
</member>
|
44
|
+
</Steps>
|
45
|
+
</member>
|
46
|
+
</JobFlows>
|
47
|
+
</DescribeJobFlowsResult>
|
48
|
+
</DescribeJobFlowsResponse>
|
49
|
+
JOBFLOWS
|
50
|
+
describe_jobflows_document = Nokogiri::XML(describe_jobflows_xml)
|
51
|
+
describe_jobflows_document.remove_namespaces!
|
52
|
+
@members_nodeset = describe_jobflows_document.xpath('/DescribeJobFlowsResponse/DescribeJobFlowsResult/JobFlows/member/Steps/member')
|
53
|
+
end
|
54
|
+
|
55
|
+
describe ".from_xml" do
|
56
|
+
it "should return a JobFlowStep with the appropriate fields initialized" do
|
57
|
+
jobflow_step = Elasticity::JobFlowStep.from_member_element(@members_nodeset[0])
|
58
|
+
jobflow_step.name.should == "Setup Hive"
|
59
|
+
jobflow_step.state.should == "FAILED"
|
60
|
+
jobflow_step.started_at.should == "2011-10-04T21:49:16Z"
|
61
|
+
jobflow_step.ended_at.should == "2011-10-04T21:51:16Z"
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
describe ".from_steps_nodeset" do
|
66
|
+
it "should return JobFlowSteps with the appropriate fields initialized" do
|
67
|
+
jobflow_steps = Elasticity::JobFlowStep.from_members_nodeset(@members_nodeset)
|
68
|
+
jobflow_steps.map(&:name).should == ["Setup Hive", "Run Hive Script"]
|
69
|
+
jobflow_steps.map(&:state).should == ["FAILED", "PENDING"]
|
70
|
+
jobflow_steps.map(&:started_at).should == ["2011-10-04T21:49:16Z", "2011-10-04T21:51:18Z"]
|
71
|
+
jobflow_steps.map(&:ended_at).should == ["2011-10-04T21:51:16Z", ""]
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
|
76
|
+
end
|
@@ -0,0 +1,211 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Elasticity::PigJob do
|
4
|
+
|
5
|
+
describe ".new" do
|
6
|
+
it "should have good defaults" do
|
7
|
+
pig = Elasticity::PigJob.new("access", "secret")
|
8
|
+
pig.aws_access_key_id.should == "access"
|
9
|
+
pig.aws_secret_access_key.should == "secret"
|
10
|
+
pig.ec2_key_name.should == "default"
|
11
|
+
pig.hadoop_version.should == "0.20"
|
12
|
+
pig.instance_count.should == 2
|
13
|
+
pig.master_instance_type.should == "m1.small"
|
14
|
+
pig.name.should == "Elasticity Pig Job"
|
15
|
+
pig.slave_instance_type.should == "m1.small"
|
16
|
+
pig.action_on_failure.should == "TERMINATE_JOB_FLOW"
|
17
|
+
pig.log_uri.should == nil
|
18
|
+
pig.parallels.should == 1
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
describe "#instance_count=" do
|
23
|
+
it "should not allow instances to be set less than 2" do
|
24
|
+
pig = Elasticity::PigJob.new("access", "secret")
|
25
|
+
lambda {
|
26
|
+
pig.instance_count = 1
|
27
|
+
}.should raise_error(ArgumentError, "Instance count cannot be set to less than 2 (requested 1)")
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
describe "calculated value of parallels" do
|
32
|
+
|
33
|
+
before do
|
34
|
+
@pig = Elasticity::PigJob.new("access", "secret")
|
35
|
+
@pig.instance_count = 8
|
36
|
+
end
|
37
|
+
|
38
|
+
context "when slave is m1.small" do
|
39
|
+
it "should be 7" do
|
40
|
+
@pig.slave_instance_type = "m1.small"
|
41
|
+
@pig.parallels.should == 7
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
context "when slave is m1.large" do
|
46
|
+
it "should be 13" do
|
47
|
+
@pig.slave_instance_type = "m1.large"
|
48
|
+
@pig.parallels.should == 13
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
context "when slave is c1.medium" do
|
53
|
+
it "should be 13" do
|
54
|
+
@pig.slave_instance_type = "c1.medium"
|
55
|
+
@pig.parallels.should == 13
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
context "when slave is m1.xlarge" do
|
60
|
+
it "should be 26" do
|
61
|
+
@pig.slave_instance_type = "m1.xlarge"
|
62
|
+
@pig.parallels.should == 26
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
context "when slave is c1.xlarge" do
|
67
|
+
it "should be 26" do
|
68
|
+
@pig.slave_instance_type = "c1.xlarge"
|
69
|
+
@pig.parallels.should == 26
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
context "when slave is any other type" do
|
74
|
+
it "should be 1" do
|
75
|
+
@pig.slave_instance_type = "foo"
|
76
|
+
@pig.parallels.should == 7
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
|
82
|
+
describe "#run" do
|
83
|
+
|
84
|
+
context "when no bootstrap actions are specified" do
|
85
|
+
|
86
|
+
it "should run the script with the specified variables and return the jobflow_id" do
|
87
|
+
aws = Elasticity::EMR.new("", "")
|
88
|
+
aws.should_receive(:run_job_flow).with({
|
89
|
+
:name => "Elasticity Pig Job",
|
90
|
+
:log_uri => "s3n://slif-test/output/logs",
|
91
|
+
:instances => {
|
92
|
+
:ec2_key_name => "default",
|
93
|
+
:hadoop_version => "0.20",
|
94
|
+
:instance_count => 8,
|
95
|
+
:master_instance_type => "m1.small",
|
96
|
+
:slave_instance_type => "m1.xlarge",
|
97
|
+
},
|
98
|
+
:steps => [
|
99
|
+
{
|
100
|
+
:action_on_failure => "TERMINATE_JOB_FLOW",
|
101
|
+
:hadoop_jar_step => {
|
102
|
+
:jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
|
103
|
+
:args => [
|
104
|
+
"s3://elasticmapreduce/libs/pig/pig-script",
|
105
|
+
"--base-path",
|
106
|
+
"s3://elasticmapreduce/libs/pig/",
|
107
|
+
"--install-pig"
|
108
|
+
],
|
109
|
+
},
|
110
|
+
:name => "Setup Pig"
|
111
|
+
},
|
112
|
+
{
|
113
|
+
:action_on_failure => "CONTINUE",
|
114
|
+
:hadoop_jar_step => {
|
115
|
+
:jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
|
116
|
+
:args => [
|
117
|
+
"s3://elasticmapreduce/libs/pig/pig-script",
|
118
|
+
"--run-pig-script",
|
119
|
+
"--args",
|
120
|
+
"-p", "OUTPUT=s3n://slif-pig-test/output",
|
121
|
+
"-p", "XREFS=s3n://slif-pig-test/xrefs",
|
122
|
+
"-p", "E_PARALLELS=26",
|
123
|
+
"s3n://slif-pig-test/test.pig"
|
124
|
+
],
|
125
|
+
},
|
126
|
+
:name => "Run Pig Script"
|
127
|
+
}
|
128
|
+
]
|
129
|
+
}).and_return("new_jobflow_id")
|
130
|
+
|
131
|
+
Elasticity::EMR.should_receive(:new).with("access", "secret").and_return(aws)
|
132
|
+
pig = Elasticity::PigJob.new("access", "secret")
|
133
|
+
|
134
|
+
pig.log_uri = "s3n://slif-test/output/logs"
|
135
|
+
pig.action_on_failure = "CONTINUE"
|
136
|
+
pig.instance_count = 8
|
137
|
+
pig.slave_instance_type = "m1.xlarge"
|
138
|
+
|
139
|
+
jobflow_id = pig.run('s3n://slif-pig-test/test.pig', {
|
140
|
+
'OUTPUT' => 's3n://slif-pig-test/output',
|
141
|
+
'XREFS' => 's3n://slif-pig-test/xrefs'
|
142
|
+
})
|
143
|
+
jobflow_id.should == "new_jobflow_id"
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
context "when bootstrap actions are specified" do
|
148
|
+
it "should run the script wth the proper job configuration" do
|
149
|
+
aws = Elasticity::EMR.new("", "")
|
150
|
+
aws.should_receive(:run_job_flow).with(hash_including({
|
151
|
+
:bootstrap_actions => [
|
152
|
+
{
|
153
|
+
:name => "Elasticity Bootstrap Action (Configure Hadoop)",
|
154
|
+
:script_bootstrap_action => {
|
155
|
+
:path => "s3n://elasticmapreduce/bootstrap-actions/configure-hadoop",
|
156
|
+
:args => ["-m", "foo=111"]
|
157
|
+
}
|
158
|
+
},
|
159
|
+
{
|
160
|
+
:name => "Elasticity Bootstrap Action (Configure Hadoop)",
|
161
|
+
:script_bootstrap_action => {
|
162
|
+
:path => "s3n://elasticmapreduce/bootstrap-actions/configure-hadoop",
|
163
|
+
:args => ["-m", "bar=222"]
|
164
|
+
}
|
165
|
+
}
|
166
|
+
],
|
167
|
+
}))
|
168
|
+
|
169
|
+
Elasticity::EMR.should_receive(:new).with("access", "secret").and_return(aws)
|
170
|
+
pig = Elasticity::PigJob.new("access", "secret")
|
171
|
+
pig.add_hadoop_bootstrap_action("-m", "foo=111")
|
172
|
+
pig.add_hadoop_bootstrap_action("-m", "bar=222")
|
173
|
+
pig.run('s3n://slif-pig-test/test.pig')
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
end
|
178
|
+
|
179
|
+
describe "integration happy path" do
|
180
|
+
|
181
|
+
context "with bootstrap actions" do
|
182
|
+
use_vcr_cassette "pig_job/apache_log_reports_with_bootstrap", :record => :none
|
183
|
+
it "should kick off the sample Amazion EMR Pig application" do
|
184
|
+
pig = Elasticity::PigJob.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
|
185
|
+
pig.ec2_key_name = "sharethrough_dev"
|
186
|
+
pig.add_hadoop_bootstrap_action("-m", "mapred.job.reuse.jvm.num.tasks=120")
|
187
|
+
jobflow_id = pig.run("s3n://elasticmapreduce/samples/pig-apache/do-reports.pig", {
|
188
|
+
"INPUT" => "s3n://elasticmapreduce/samples/pig-apache/input",
|
189
|
+
"OUTPUT" => "s3n://slif-elasticity/pig-apache/output/2011-05-10"
|
190
|
+
})
|
191
|
+
jobflow_id.should == "j-1UK43AWRT3QHD"
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
context "without bootstrap actions" do
|
196
|
+
use_vcr_cassette "pig_job/apache_log_reports", :record => :none
|
197
|
+
it "should kick off the sample Amazion EMR Pig application" do
|
198
|
+
pig = Elasticity::PigJob.new(AWS_ACCESS_KEY_ID, AWS_SECRET_KEY)
|
199
|
+
pig.log_uri = "s3n://slif-elasticity/pig-apache/logs"
|
200
|
+
pig.ec2_key_name = "sharethrough_dev"
|
201
|
+
jobflow_id = pig.run("s3n://elasticmapreduce/samples/pig-apache/do-reports.pig", {
|
202
|
+
"INPUT" => "s3n://elasticmapreduce/samples/pig-apache/input",
|
203
|
+
"OUTPUT" => "s3n://slif-elasticity/pig-apache/output/2011-05-04"
|
204
|
+
})
|
205
|
+
jobflow_id.should == "j-1HB7A3TBRT3VS"
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
|
211
|
+
end
|