elasticity 1.5 → 2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.rspec +2 -1
- data/.rvmrc +1 -1
- data/HISTORY.md +47 -24
- data/LICENSE +1 -1
- data/README.md +165 -317
- data/Rakefile +4 -3
- data/elasticity.gemspec +3 -5
- data/lib/elasticity.rb +10 -5
- data/lib/elasticity/aws_request.rb +81 -20
- data/lib/elasticity/custom_jar_step.rb +33 -0
- data/lib/elasticity/emr.rb +45 -117
- data/lib/elasticity/hadoop_bootstrap_action.rb +27 -0
- data/lib/elasticity/hive_step.rb +57 -0
- data/lib/elasticity/job_flow.rb +109 -39
- data/lib/elasticity/job_flow_status.rb +53 -0
- data/lib/elasticity/job_flow_status_step.rb +35 -0
- data/lib/elasticity/job_flow_step.rb +17 -25
- data/lib/elasticity/pig_step.rb +82 -0
- data/lib/elasticity/support/conditional_raise.rb +23 -0
- data/lib/elasticity/version.rb +1 -1
- data/spec/lib/elasticity/aws_request_spec.rb +159 -51
- data/spec/lib/elasticity/custom_jar_step_spec.rb +59 -0
- data/spec/lib/elasticity/emr_spec.rb +231 -762
- data/spec/lib/elasticity/hadoop_bootstrap_action_spec.rb +26 -0
- data/spec/lib/elasticity/hive_step_spec.rb +74 -0
- data/spec/lib/elasticity/job_flow_integration_spec.rb +197 -0
- data/spec/lib/elasticity/job_flow_spec.rb +369 -138
- data/spec/lib/elasticity/job_flow_status_spec.rb +147 -0
- data/spec/lib/elasticity/job_flow_status_step_spec.rb +73 -0
- data/spec/lib/elasticity/job_flow_step_spec.rb +26 -64
- data/spec/lib/elasticity/pig_step_spec.rb +104 -0
- data/spec/lib/elasticity/support/conditional_raise_spec.rb +35 -0
- data/spec/spec_helper.rb +1 -50
- data/spec/support/be_a_hash_including_matcher.rb +35 -0
- metadata +101 -119
- data/.autotest +0 -2
- data/lib/elasticity/custom_jar_job.rb +0 -38
- data/lib/elasticity/hive_job.rb +0 -69
- data/lib/elasticity/pig_job.rb +0 -109
- data/lib/elasticity/simple_job.rb +0 -51
- data/spec/fixtures/vcr_cassettes/add_instance_groups/one_group_successful.yml +0 -44
- data/spec/fixtures/vcr_cassettes/add_instance_groups/one_group_unsuccessful.yml +0 -41
- data/spec/fixtures/vcr_cassettes/add_jobflow_steps/add_multiple_steps.yml +0 -266
- data/spec/fixtures/vcr_cassettes/custom_jar_job/cloudburst.yml +0 -41
- data/spec/fixtures/vcr_cassettes/describe_jobflows/all_jobflows.yml +0 -75
- data/spec/fixtures/vcr_cassettes/direct/terminate_jobflow.yml +0 -38
- data/spec/fixtures/vcr_cassettes/hive_job/hive_ads.yml +0 -41
- data/spec/fixtures/vcr_cassettes/modify_instance_groups/set_instances_to_3.yml +0 -38
- data/spec/fixtures/vcr_cassettes/pig_job/apache_log_reports.yml +0 -41
- data/spec/fixtures/vcr_cassettes/pig_job/apache_log_reports_with_bootstrap.yml +0 -41
- data/spec/fixtures/vcr_cassettes/run_jobflow/word_count.yml +0 -41
- data/spec/fixtures/vcr_cassettes/set_termination_protection/nonexistent_job_flows.yml +0 -41
- data/spec/fixtures/vcr_cassettes/set_termination_protection/protect_multiple_job_flows.yml +0 -38
- data/spec/fixtures/vcr_cassettes/terminate_jobflows/one_jobflow.yml +0 -38
- data/spec/lib/elasticity/custom_jar_job_spec.rb +0 -118
- data/spec/lib/elasticity/hive_job_spec.rb +0 -90
- data/spec/lib/elasticity/pig_job_spec.rb +0 -226
@@ -0,0 +1,27 @@
|
|
1
|
+
module Elasticity
|
2
|
+
|
3
|
+
class HadoopBootstrapAction
|
4
|
+
|
5
|
+
attr_accessor :name
|
6
|
+
attr_accessor :option
|
7
|
+
attr_accessor :value
|
8
|
+
|
9
|
+
def initialize(option, value)
|
10
|
+
@name = 'Elasticity Bootstrap Action (Configure Hadoop)'
|
11
|
+
@option = option
|
12
|
+
@value = value
|
13
|
+
end
|
14
|
+
|
15
|
+
def to_aws_bootstrap_action
|
16
|
+
{
|
17
|
+
:name => @name,
|
18
|
+
:script_bootstrap_action => {
|
19
|
+
:path => 's3n://elasticmapreduce/bootstrap-actions/configure-hadoop',
|
20
|
+
:args => [@option, @value]
|
21
|
+
}
|
22
|
+
}
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
module Elasticity
|
2
|
+
|
3
|
+
class HiveStep
|
4
|
+
|
5
|
+
include JobFlowStep
|
6
|
+
|
7
|
+
attr_accessor :name
|
8
|
+
attr_accessor :script
|
9
|
+
attr_accessor :variables
|
10
|
+
attr_accessor :action_on_failure
|
11
|
+
|
12
|
+
def initialize(script)
|
13
|
+
@name = "Elasticity Hive Step (#{script})"
|
14
|
+
@script = script
|
15
|
+
@variables = { }
|
16
|
+
@action_on_failure = 'TERMINATE_JOB_FLOW'
|
17
|
+
end
|
18
|
+
|
19
|
+
def to_aws_step(job_flow)
|
20
|
+
args = %w(s3://elasticmapreduce/libs/hive/hive-script --run-hive-script --args)
|
21
|
+
args.concat(['-f', @script])
|
22
|
+
@variables.keys.sort.each do |name|
|
23
|
+
args.concat(['-d', "#{name}=#{@variables[name]}"])
|
24
|
+
end
|
25
|
+
{
|
26
|
+
:name => @name,
|
27
|
+
:action_on_failure => @action_on_failure,
|
28
|
+
:hadoop_jar_step => {
|
29
|
+
:jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
|
30
|
+
:args => args
|
31
|
+
}
|
32
|
+
}
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.requires_installation?
|
36
|
+
true
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.aws_installation_step
|
40
|
+
{
|
41
|
+
:action_on_failure => 'TERMINATE_JOB_FLOW',
|
42
|
+
:hadoop_jar_step => {
|
43
|
+
:jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
|
44
|
+
:args => [
|
45
|
+
's3://elasticmapreduce/libs/hive/hive-script',
|
46
|
+
'--base-path',
|
47
|
+
's3://elasticmapreduce/libs/hive/',
|
48
|
+
'--install-hive'
|
49
|
+
],
|
50
|
+
},
|
51
|
+
:name => 'Elasticity - Install Hive'
|
52
|
+
}
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
data/lib/elasticity/job_flow.rb
CHANGED
@@ -1,51 +1,121 @@
|
|
1
1
|
module Elasticity
|
2
2
|
|
3
|
+
class JobFlowRunningError < StandardError; end
|
4
|
+
class JobFlowNotStartedError < StandardError; end
|
5
|
+
class JobFlowMissingStepsError < StandardError; end
|
6
|
+
|
3
7
|
class JobFlow
|
4
8
|
|
9
|
+
attr_accessor :action_on_failure
|
10
|
+
attr_accessor :ec2_key_name
|
5
11
|
attr_accessor :name
|
6
|
-
attr_accessor :
|
7
|
-
attr_accessor :state
|
8
|
-
attr_accessor :steps
|
9
|
-
attr_accessor :created_at
|
10
|
-
attr_accessor :started_at
|
11
|
-
attr_accessor :ready_at
|
12
|
+
attr_accessor :hadoop_version
|
12
13
|
attr_accessor :instance_count
|
14
|
+
attr_accessor :log_uri
|
13
15
|
attr_accessor :master_instance_type
|
14
16
|
attr_accessor :slave_instance_type
|
15
|
-
attr_accessor :
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
17
|
+
attr_accessor :ami_version
|
18
|
+
attr_accessor :keep_job_flow_alive_when_no_steps
|
19
|
+
attr_accessor :ec2_subnet_id
|
20
|
+
|
21
|
+
def initialize(access, secret)
|
22
|
+
@action_on_failure = 'TERMINATE_JOB_FLOW'
|
23
|
+
@ec2_key_name = 'default'
|
24
|
+
@hadoop_version = '0.20.205'
|
25
|
+
@instance_count = 2
|
26
|
+
@master_instance_type = 'm1.small'
|
27
|
+
@name = 'Elasticity Job Flow'
|
28
|
+
@slave_instance_type = 'm1.small'
|
29
|
+
@ami_version = 'latest'
|
30
|
+
@keep_job_flow_alive_when_no_steps = false
|
31
|
+
|
32
|
+
@emr = Elasticity::EMR.new(access, secret)
|
33
|
+
|
34
|
+
@bootstrap_actions = []
|
35
|
+
@jobflow_steps = []
|
36
|
+
@installed_steps = []
|
37
|
+
end
|
38
|
+
|
39
|
+
def instance_count=(count)
|
40
|
+
raise ArgumentError, 'Instance count cannot be set to less than 2 (requested 1)' unless count > 1
|
41
|
+
@instance_count = count
|
42
|
+
end
|
43
|
+
|
44
|
+
def add_bootstrap_action(bootstrap_action)
|
45
|
+
raise_if is_jobflow_running?, JobFlowRunningError, 'To modify bootstrap actions, please create a new job flow.'
|
46
|
+
@bootstrap_actions << bootstrap_action
|
47
|
+
end
|
48
|
+
|
49
|
+
def add_step(jobflow_step)
|
50
|
+
if is_jobflow_running?
|
51
|
+
jobflow_steps = []
|
52
|
+
if jobflow_step.class.send(:requires_installation?) && !@installed_steps.include?(jobflow_step.class)
|
53
|
+
jobflow_steps << jobflow_step.class.send(:aws_installation_step)
|
54
|
+
end
|
55
|
+
jobflow_steps << jobflow_step.to_aws_step(self)
|
56
|
+
@emr.add_jobflow_steps(@jobflow_id, {:steps => jobflow_steps})
|
57
|
+
else
|
58
|
+
@jobflow_steps << jobflow_step
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def run
|
63
|
+
raise_if @jobflow_steps.empty?, JobFlowMissingStepsError, 'Cannot run a job flow without adding steps. Please use #add_step.'
|
64
|
+
raise_if is_jobflow_running?, JobFlowRunningError, 'Cannot run a job flow multiple times. To do more with this job flow, please use #add_step.'
|
65
|
+
@jobflow_id ||= @emr.run_job_flow(jobflow_config)
|
66
|
+
end
|
67
|
+
|
68
|
+
def shutdown
|
69
|
+
raise_unless is_jobflow_running?, JobFlowNotStartedError, 'Cannot #shutdown a job flow that has not yet been #run.'
|
70
|
+
@emr.terminate_jobflows(@jobflow_id)
|
71
|
+
end
|
72
|
+
|
73
|
+
def status
|
74
|
+
raise_unless is_jobflow_running?, JobFlowNotStartedError, 'Please #run this job flow before attempting to retrieve status.'
|
75
|
+
@emr.describe_jobflow(@jobflow_id)
|
76
|
+
end
|
77
|
+
|
78
|
+
private
|
79
|
+
|
80
|
+
def is_jobflow_running?
|
81
|
+
@jobflow_id
|
82
|
+
end
|
83
|
+
|
84
|
+
def jobflow_config
|
85
|
+
config = jobflow_preamble
|
86
|
+
config[:steps] = jobflow_steps
|
87
|
+
config[:log_uri] = @log_uri if @log_uri
|
88
|
+
config[:bootstrap_actions] = @bootstrap_actions.map{|a| a.to_aws_bootstrap_action} unless @bootstrap_actions.empty?
|
89
|
+
config
|
90
|
+
end
|
91
|
+
|
92
|
+
def jobflow_preamble
|
93
|
+
{
|
94
|
+
:name => @name,
|
95
|
+
:ami_version => @ami_version,
|
96
|
+
:instances => {
|
97
|
+
:keep_job_flow_alive_when_no_steps => @keep_job_flow_alive_when_no_steps,
|
98
|
+
:ec2_key_name => @ec2_key_name,
|
99
|
+
:hadoop_version => @hadoop_version,
|
100
|
+
:instance_count => @instance_count,
|
101
|
+
:master_instance_type => @master_instance_type,
|
102
|
+
:slave_instance_type => @slave_instance_type,
|
103
|
+
}
|
104
|
+
}.tap do |preamble|
|
105
|
+
preamble.merge!(:ec2_subnet_id => @ec2_subnet_id) if @ec2_subnet_id
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def jobflow_steps
|
110
|
+
steps = []
|
111
|
+
@jobflow_steps.each do |step|
|
112
|
+
if step.class.send(:requires_installation?) && !@installed_steps.include?(step.class)
|
113
|
+
steps << step.class.send(:aws_installation_step)
|
114
|
+
@installed_steps << step.class
|
115
|
+
end
|
116
|
+
steps << step.to_aws_step(self)
|
47
117
|
end
|
48
|
-
|
118
|
+
steps
|
49
119
|
end
|
50
120
|
|
51
121
|
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module Elasticity
|
2
|
+
|
3
|
+
class JobFlowStatus
|
4
|
+
|
5
|
+
attr_accessor :name
|
6
|
+
attr_accessor :jobflow_id
|
7
|
+
attr_accessor :state
|
8
|
+
attr_accessor :steps
|
9
|
+
attr_accessor :created_at
|
10
|
+
attr_accessor :started_at
|
11
|
+
attr_accessor :ready_at
|
12
|
+
attr_accessor :instance_count
|
13
|
+
attr_accessor :master_instance_type
|
14
|
+
attr_accessor :slave_instance_type
|
15
|
+
attr_accessor :last_state_change_reason
|
16
|
+
|
17
|
+
def initialize
|
18
|
+
@steps = []
|
19
|
+
end
|
20
|
+
|
21
|
+
# Create a jobflow from an AWS <member> (Nokogiri::XML::Element):
|
22
|
+
# /DescribeJobFlowsResponse/DescribeJobFlowsResult/JobFlows/member
|
23
|
+
def self.from_member_element(xml_element)
|
24
|
+
jobflow = JobFlowStatus.new
|
25
|
+
jobflow.name = xml_element.xpath('./Name').text.strip
|
26
|
+
jobflow.jobflow_id = xml_element.xpath('./JobFlowId').text.strip
|
27
|
+
jobflow.state = xml_element.xpath('./ExecutionStatusDetail/State').text.strip
|
28
|
+
jobflow.last_state_change_reason = xml_element.xpath('./ExecutionStatusDetail/LastStateChangeReason').text.strip
|
29
|
+
jobflow.steps = JobFlowStatusStep.from_members_nodeset(xml_element.xpath('./Steps/member'))
|
30
|
+
jobflow.created_at = Time.parse(xml_element.xpath('./ExecutionStatusDetail/CreationDateTime').text.strip)
|
31
|
+
started_at = xml_element.xpath('./ExecutionStatusDetail/StartDateTime').text.strip
|
32
|
+
jobflow.started_at = (started_at == '') ? (nil) : (Time.parse(started_at))
|
33
|
+
ready_at = xml_element.xpath('./ExecutionStatusDetail/ReadyDateTime').text.strip
|
34
|
+
jobflow.ready_at = (ready_at == '') ? (nil) : (Time.parse(ready_at))
|
35
|
+
jobflow.instance_count = xml_element.xpath('./Instances/InstanceCount').text.strip
|
36
|
+
jobflow.master_instance_type = xml_element.xpath('./Instances/MasterInstanceType').text.strip
|
37
|
+
jobflow.slave_instance_type = xml_element.xpath('./Instances/SlaveInstanceType').text.strip
|
38
|
+
jobflow
|
39
|
+
end
|
40
|
+
|
41
|
+
# Create JobFlows from a collection of AWS <member> nodes (Nokogiri::XML::NodeSet):
|
42
|
+
# /DescribeJobFlowsResponse/DescribeJobFlowsResult/JobFlows
|
43
|
+
def self.from_members_nodeset(members_nodeset)
|
44
|
+
jobflows = []
|
45
|
+
members_nodeset.each do |member|
|
46
|
+
jobflows << from_member_element(member)
|
47
|
+
end
|
48
|
+
jobflows
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Elasticity
|
2
|
+
|
3
|
+
class JobFlowStatusStep
|
4
|
+
|
5
|
+
attr_accessor :name
|
6
|
+
attr_accessor :state
|
7
|
+
attr_accessor :started_at
|
8
|
+
attr_accessor :ended_at
|
9
|
+
|
10
|
+
# Create a job flow from an AWS <member> (Nokogiri::XML::Element):
|
11
|
+
# /DescribeJobFlowsResponse/DescribeJobFlowsResult/JobFlows/member/Steps/member
|
12
|
+
def self.from_member_element(xml_element)
|
13
|
+
job_flow_step = JobFlowStatusStep.new
|
14
|
+
job_flow_step.name = xml_element.xpath("./StepConfig/Name").text.strip
|
15
|
+
job_flow_step.state = xml_element.xpath("./ExecutionStatusDetail/State").text.strip
|
16
|
+
started_at = xml_element.xpath("./ExecutionStatusDetail/StartDateTime").text.strip
|
17
|
+
job_flow_step.started_at = (started_at == "") ? (nil) : (Time.parse(started_at))
|
18
|
+
ended_at = xml_element.xpath("./ExecutionStatusDetail/EndDateTime").text.strip
|
19
|
+
job_flow_step.ended_at = (ended_at == "") ? (nil) : (Time.parse(ended_at))
|
20
|
+
job_flow_step
|
21
|
+
end
|
22
|
+
|
23
|
+
# Create JobFlowSteps from a collection of AWS <member> nodes (Nokogiri::XML::NodeSet):
|
24
|
+
# /DescribeJobFlowsResponse/DescribeJobFlowsResult/JobFlows/member/Steps/member
|
25
|
+
def self.from_members_nodeset(members_nodeset)
|
26
|
+
jobflow_steps = []
|
27
|
+
members_nodeset.each do |member|
|
28
|
+
jobflow_steps << from_member_element(member)
|
29
|
+
end
|
30
|
+
jobflow_steps
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
@@ -1,33 +1,25 @@
|
|
1
1
|
module Elasticity
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
attr_accessor :started_at
|
8
|
-
attr_accessor :ended_at
|
9
|
-
|
10
|
-
# Create a job flow from an AWS <member> (Nokogiri::XML::Element):
|
11
|
-
# /DescribeJobFlowsResponse/DescribeJobFlowsResult/JobFlows/member/Steps/member
|
12
|
-
def self.from_member_element(xml_element)
|
13
|
-
job_flow_step = JobFlowStep.new
|
14
|
-
job_flow_step.name = xml_element.xpath("./StepConfig/Name").text.strip
|
15
|
-
job_flow_step.state = xml_element.xpath("./ExecutionStatusDetail/State").text.strip
|
16
|
-
started_at = xml_element.xpath("./ExecutionStatusDetail/StartDateTime").text.strip
|
17
|
-
job_flow_step.started_at = (started_at == "") ? (nil) : (Time.parse(started_at))
|
18
|
-
ended_at = xml_element.xpath("./ExecutionStatusDetail/EndDateTime").text.strip
|
19
|
-
job_flow_step.ended_at = (ended_at == "") ? (nil) : (Time.parse(ended_at))
|
20
|
-
job_flow_step
|
3
|
+
module JobFlowStep
|
4
|
+
|
5
|
+
def to_aws_step(jobflow_step)
|
6
|
+
raise RuntimeError, '#to_aws_step is required to be defined on all job flow steps.'
|
21
7
|
end
|
22
8
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
9
|
+
module ClassMethods
|
10
|
+
|
11
|
+
def requires_installation?
|
12
|
+
false
|
13
|
+
end
|
14
|
+
|
15
|
+
def aws_installation_step
|
16
|
+
raise RuntimeError, '.aws_installation_step is required to be defined when a step requires installation (e.g. Pig, Hive).'
|
29
17
|
end
|
30
|
-
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.included(base)
|
22
|
+
base.extend(ClassMethods)
|
31
23
|
end
|
32
24
|
|
33
25
|
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
module Elasticity
|
2
|
+
|
3
|
+
class PigStep
|
4
|
+
|
5
|
+
include JobFlowStep
|
6
|
+
|
7
|
+
attr_accessor :name
|
8
|
+
attr_accessor :script
|
9
|
+
attr_accessor :variables
|
10
|
+
attr_accessor :action_on_failure
|
11
|
+
|
12
|
+
def initialize(script)
|
13
|
+
@name = "Elasticity Pig Step (#{script})"
|
14
|
+
@script = script
|
15
|
+
@variables = { }
|
16
|
+
@action_on_failure = 'TERMINATE_JOB_FLOW'
|
17
|
+
end
|
18
|
+
|
19
|
+
def to_aws_step(job_flow)
|
20
|
+
args = %w(s3://elasticmapreduce/libs/pig/pig-script --run-pig-script --args)
|
21
|
+
@variables.keys.sort.each do |name|
|
22
|
+
args.concat(['-p', "#{name}=#{@variables[name]}"])
|
23
|
+
end
|
24
|
+
args.concat(['-p', "E_PARALLELS=#{parallels(job_flow.slave_instance_type, job_flow.instance_count)}"])
|
25
|
+
args << @script
|
26
|
+
{
|
27
|
+
:action_on_failure => @action_on_failure,
|
28
|
+
:hadoop_jar_step => {
|
29
|
+
:jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
|
30
|
+
:args => args,
|
31
|
+
},
|
32
|
+
:name => @name
|
33
|
+
}
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.requires_installation?
|
37
|
+
true
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.aws_installation_step
|
41
|
+
{
|
42
|
+
:action_on_failure => 'TERMINATE_JOB_FLOW',
|
43
|
+
:hadoop_jar_step => {
|
44
|
+
:jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
|
45
|
+
:args => [
|
46
|
+
's3://elasticmapreduce/libs/pig/pig-script',
|
47
|
+
'--base-path',
|
48
|
+
's3://elasticmapreduce/libs/pig/',
|
49
|
+
'--install-pig'
|
50
|
+
],
|
51
|
+
},
|
52
|
+
:name => 'Elasticity - Install Pig'
|
53
|
+
}
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
# Calculate a common-sense default value of PARALLELS using the following
|
59
|
+
# formula from the Pig Cookbook:
|
60
|
+
#
|
61
|
+
# <num machines> * <num reduce slots per machine> * 0.9
|
62
|
+
#
|
63
|
+
# With the following reducer configuration (from an AWS forum post):
|
64
|
+
#
|
65
|
+
# m1.small 1
|
66
|
+
# m1.large 2
|
67
|
+
# m1.xlarge 4
|
68
|
+
# c1.medium 2
|
69
|
+
# c1.xlarge 4
|
70
|
+
def parallels(slave_instance_type, instance_count)
|
71
|
+
reduce_slots = Hash.new(1)
|
72
|
+
reduce_slots['m1.small'] = 1
|
73
|
+
reduce_slots['m1.large'] = 2
|
74
|
+
reduce_slots['m1.xlarge'] = 4
|
75
|
+
reduce_slots['c1.medium'] = 2
|
76
|
+
reduce_slots['c1.xlarge'] = 4
|
77
|
+
((instance_count - 1).to_f * reduce_slots[slave_instance_type].to_f * 0.9).ceil
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|