elasticity 1.5 → 2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.rspec +2 -1
- data/.rvmrc +1 -1
- data/HISTORY.md +47 -24
- data/LICENSE +1 -1
- data/README.md +165 -317
- data/Rakefile +4 -3
- data/elasticity.gemspec +3 -5
- data/lib/elasticity.rb +10 -5
- data/lib/elasticity/aws_request.rb +81 -20
- data/lib/elasticity/custom_jar_step.rb +33 -0
- data/lib/elasticity/emr.rb +45 -117
- data/lib/elasticity/hadoop_bootstrap_action.rb +27 -0
- data/lib/elasticity/hive_step.rb +57 -0
- data/lib/elasticity/job_flow.rb +109 -39
- data/lib/elasticity/job_flow_status.rb +53 -0
- data/lib/elasticity/job_flow_status_step.rb +35 -0
- data/lib/elasticity/job_flow_step.rb +17 -25
- data/lib/elasticity/pig_step.rb +82 -0
- data/lib/elasticity/support/conditional_raise.rb +23 -0
- data/lib/elasticity/version.rb +1 -1
- data/spec/lib/elasticity/aws_request_spec.rb +159 -51
- data/spec/lib/elasticity/custom_jar_step_spec.rb +59 -0
- data/spec/lib/elasticity/emr_spec.rb +231 -762
- data/spec/lib/elasticity/hadoop_bootstrap_action_spec.rb +26 -0
- data/spec/lib/elasticity/hive_step_spec.rb +74 -0
- data/spec/lib/elasticity/job_flow_integration_spec.rb +197 -0
- data/spec/lib/elasticity/job_flow_spec.rb +369 -138
- data/spec/lib/elasticity/job_flow_status_spec.rb +147 -0
- data/spec/lib/elasticity/job_flow_status_step_spec.rb +73 -0
- data/spec/lib/elasticity/job_flow_step_spec.rb +26 -64
- data/spec/lib/elasticity/pig_step_spec.rb +104 -0
- data/spec/lib/elasticity/support/conditional_raise_spec.rb +35 -0
- data/spec/spec_helper.rb +1 -50
- data/spec/support/be_a_hash_including_matcher.rb +35 -0
- metadata +101 -119
- data/.autotest +0 -2
- data/lib/elasticity/custom_jar_job.rb +0 -38
- data/lib/elasticity/hive_job.rb +0 -69
- data/lib/elasticity/pig_job.rb +0 -109
- data/lib/elasticity/simple_job.rb +0 -51
- data/spec/fixtures/vcr_cassettes/add_instance_groups/one_group_successful.yml +0 -44
- data/spec/fixtures/vcr_cassettes/add_instance_groups/one_group_unsuccessful.yml +0 -41
- data/spec/fixtures/vcr_cassettes/add_jobflow_steps/add_multiple_steps.yml +0 -266
- data/spec/fixtures/vcr_cassettes/custom_jar_job/cloudburst.yml +0 -41
- data/spec/fixtures/vcr_cassettes/describe_jobflows/all_jobflows.yml +0 -75
- data/spec/fixtures/vcr_cassettes/direct/terminate_jobflow.yml +0 -38
- data/spec/fixtures/vcr_cassettes/hive_job/hive_ads.yml +0 -41
- data/spec/fixtures/vcr_cassettes/modify_instance_groups/set_instances_to_3.yml +0 -38
- data/spec/fixtures/vcr_cassettes/pig_job/apache_log_reports.yml +0 -41
- data/spec/fixtures/vcr_cassettes/pig_job/apache_log_reports_with_bootstrap.yml +0 -41
- data/spec/fixtures/vcr_cassettes/run_jobflow/word_count.yml +0 -41
- data/spec/fixtures/vcr_cassettes/set_termination_protection/nonexistent_job_flows.yml +0 -41
- data/spec/fixtures/vcr_cassettes/set_termination_protection/protect_multiple_job_flows.yml +0 -38
- data/spec/fixtures/vcr_cassettes/terminate_jobflows/one_jobflow.yml +0 -38
- data/spec/lib/elasticity/custom_jar_job_spec.rb +0 -118
- data/spec/lib/elasticity/hive_job_spec.rb +0 -90
- data/spec/lib/elasticity/pig_job_spec.rb +0 -226
@@ -0,0 +1,27 @@
|
|
1
|
+
module Elasticity
|
2
|
+
|
3
|
+
class HadoopBootstrapAction
|
4
|
+
|
5
|
+
attr_accessor :name
|
6
|
+
attr_accessor :option
|
7
|
+
attr_accessor :value
|
8
|
+
|
9
|
+
def initialize(option, value)
|
10
|
+
@name = 'Elasticity Bootstrap Action (Configure Hadoop)'
|
11
|
+
@option = option
|
12
|
+
@value = value
|
13
|
+
end
|
14
|
+
|
15
|
+
def to_aws_bootstrap_action
|
16
|
+
{
|
17
|
+
:name => @name,
|
18
|
+
:script_bootstrap_action => {
|
19
|
+
:path => 's3n://elasticmapreduce/bootstrap-actions/configure-hadoop',
|
20
|
+
:args => [@option, @value]
|
21
|
+
}
|
22
|
+
}
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
module Elasticity
|
2
|
+
|
3
|
+
class HiveStep
|
4
|
+
|
5
|
+
include JobFlowStep
|
6
|
+
|
7
|
+
attr_accessor :name
|
8
|
+
attr_accessor :script
|
9
|
+
attr_accessor :variables
|
10
|
+
attr_accessor :action_on_failure
|
11
|
+
|
12
|
+
def initialize(script)
|
13
|
+
@name = "Elasticity Hive Step (#{script})"
|
14
|
+
@script = script
|
15
|
+
@variables = { }
|
16
|
+
@action_on_failure = 'TERMINATE_JOB_FLOW'
|
17
|
+
end
|
18
|
+
|
19
|
+
def to_aws_step(job_flow)
|
20
|
+
args = %w(s3://elasticmapreduce/libs/hive/hive-script --run-hive-script --args)
|
21
|
+
args.concat(['-f', @script])
|
22
|
+
@variables.keys.sort.each do |name|
|
23
|
+
args.concat(['-d', "#{name}=#{@variables[name]}"])
|
24
|
+
end
|
25
|
+
{
|
26
|
+
:name => @name,
|
27
|
+
:action_on_failure => @action_on_failure,
|
28
|
+
:hadoop_jar_step => {
|
29
|
+
:jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
|
30
|
+
:args => args
|
31
|
+
}
|
32
|
+
}
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.requires_installation?
|
36
|
+
true
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.aws_installation_step
|
40
|
+
{
|
41
|
+
:action_on_failure => 'TERMINATE_JOB_FLOW',
|
42
|
+
:hadoop_jar_step => {
|
43
|
+
:jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
|
44
|
+
:args => [
|
45
|
+
's3://elasticmapreduce/libs/hive/hive-script',
|
46
|
+
'--base-path',
|
47
|
+
's3://elasticmapreduce/libs/hive/',
|
48
|
+
'--install-hive'
|
49
|
+
],
|
50
|
+
},
|
51
|
+
:name => 'Elasticity - Install Hive'
|
52
|
+
}
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
data/lib/elasticity/job_flow.rb
CHANGED
@@ -1,51 +1,121 @@
|
|
1
1
|
module Elasticity
|
2
2
|
|
3
|
+
class JobFlowRunningError < StandardError; end
|
4
|
+
class JobFlowNotStartedError < StandardError; end
|
5
|
+
class JobFlowMissingStepsError < StandardError; end
|
6
|
+
|
3
7
|
class JobFlow
|
4
8
|
|
9
|
+
attr_accessor :action_on_failure
|
10
|
+
attr_accessor :ec2_key_name
|
5
11
|
attr_accessor :name
|
6
|
-
attr_accessor :
|
7
|
-
attr_accessor :state
|
8
|
-
attr_accessor :steps
|
9
|
-
attr_accessor :created_at
|
10
|
-
attr_accessor :started_at
|
11
|
-
attr_accessor :ready_at
|
12
|
+
attr_accessor :hadoop_version
|
12
13
|
attr_accessor :instance_count
|
14
|
+
attr_accessor :log_uri
|
13
15
|
attr_accessor :master_instance_type
|
14
16
|
attr_accessor :slave_instance_type
|
15
|
-
attr_accessor :
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
17
|
+
attr_accessor :ami_version
|
18
|
+
attr_accessor :keep_job_flow_alive_when_no_steps
|
19
|
+
attr_accessor :ec2_subnet_id
|
20
|
+
|
21
|
+
def initialize(access, secret)
|
22
|
+
@action_on_failure = 'TERMINATE_JOB_FLOW'
|
23
|
+
@ec2_key_name = 'default'
|
24
|
+
@hadoop_version = '0.20.205'
|
25
|
+
@instance_count = 2
|
26
|
+
@master_instance_type = 'm1.small'
|
27
|
+
@name = 'Elasticity Job Flow'
|
28
|
+
@slave_instance_type = 'm1.small'
|
29
|
+
@ami_version = 'latest'
|
30
|
+
@keep_job_flow_alive_when_no_steps = false
|
31
|
+
|
32
|
+
@emr = Elasticity::EMR.new(access, secret)
|
33
|
+
|
34
|
+
@bootstrap_actions = []
|
35
|
+
@jobflow_steps = []
|
36
|
+
@installed_steps = []
|
37
|
+
end
|
38
|
+
|
39
|
+
def instance_count=(count)
|
40
|
+
raise ArgumentError, 'Instance count cannot be set to less than 2 (requested 1)' unless count > 1
|
41
|
+
@instance_count = count
|
42
|
+
end
|
43
|
+
|
44
|
+
def add_bootstrap_action(bootstrap_action)
|
45
|
+
raise_if is_jobflow_running?, JobFlowRunningError, 'To modify bootstrap actions, please create a new job flow.'
|
46
|
+
@bootstrap_actions << bootstrap_action
|
47
|
+
end
|
48
|
+
|
49
|
+
def add_step(jobflow_step)
|
50
|
+
if is_jobflow_running?
|
51
|
+
jobflow_steps = []
|
52
|
+
if jobflow_step.class.send(:requires_installation?) && !@installed_steps.include?(jobflow_step.class)
|
53
|
+
jobflow_steps << jobflow_step.class.send(:aws_installation_step)
|
54
|
+
end
|
55
|
+
jobflow_steps << jobflow_step.to_aws_step(self)
|
56
|
+
@emr.add_jobflow_steps(@jobflow_id, {:steps => jobflow_steps})
|
57
|
+
else
|
58
|
+
@jobflow_steps << jobflow_step
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def run
|
63
|
+
raise_if @jobflow_steps.empty?, JobFlowMissingStepsError, 'Cannot run a job flow without adding steps. Please use #add_step.'
|
64
|
+
raise_if is_jobflow_running?, JobFlowRunningError, 'Cannot run a job flow multiple times. To do more with this job flow, please use #add_step.'
|
65
|
+
@jobflow_id ||= @emr.run_job_flow(jobflow_config)
|
66
|
+
end
|
67
|
+
|
68
|
+
def shutdown
|
69
|
+
raise_unless is_jobflow_running?, JobFlowNotStartedError, 'Cannot #shutdown a job flow that has not yet been #run.'
|
70
|
+
@emr.terminate_jobflows(@jobflow_id)
|
71
|
+
end
|
72
|
+
|
73
|
+
def status
|
74
|
+
raise_unless is_jobflow_running?, JobFlowNotStartedError, 'Please #run this job flow before attempting to retrieve status.'
|
75
|
+
@emr.describe_jobflow(@jobflow_id)
|
76
|
+
end
|
77
|
+
|
78
|
+
private
|
79
|
+
|
80
|
+
def is_jobflow_running?
|
81
|
+
@jobflow_id
|
82
|
+
end
|
83
|
+
|
84
|
+
def jobflow_config
|
85
|
+
config = jobflow_preamble
|
86
|
+
config[:steps] = jobflow_steps
|
87
|
+
config[:log_uri] = @log_uri if @log_uri
|
88
|
+
config[:bootstrap_actions] = @bootstrap_actions.map{|a| a.to_aws_bootstrap_action} unless @bootstrap_actions.empty?
|
89
|
+
config
|
90
|
+
end
|
91
|
+
|
92
|
+
def jobflow_preamble
|
93
|
+
{
|
94
|
+
:name => @name,
|
95
|
+
:ami_version => @ami_version,
|
96
|
+
:instances => {
|
97
|
+
:keep_job_flow_alive_when_no_steps => @keep_job_flow_alive_when_no_steps,
|
98
|
+
:ec2_key_name => @ec2_key_name,
|
99
|
+
:hadoop_version => @hadoop_version,
|
100
|
+
:instance_count => @instance_count,
|
101
|
+
:master_instance_type => @master_instance_type,
|
102
|
+
:slave_instance_type => @slave_instance_type,
|
103
|
+
}
|
104
|
+
}.tap do |preamble|
|
105
|
+
preamble.merge!(:ec2_subnet_id => @ec2_subnet_id) if @ec2_subnet_id
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def jobflow_steps
|
110
|
+
steps = []
|
111
|
+
@jobflow_steps.each do |step|
|
112
|
+
if step.class.send(:requires_installation?) && !@installed_steps.include?(step.class)
|
113
|
+
steps << step.class.send(:aws_installation_step)
|
114
|
+
@installed_steps << step.class
|
115
|
+
end
|
116
|
+
steps << step.to_aws_step(self)
|
47
117
|
end
|
48
|
-
|
118
|
+
steps
|
49
119
|
end
|
50
120
|
|
51
121
|
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module Elasticity
|
2
|
+
|
3
|
+
class JobFlowStatus
|
4
|
+
|
5
|
+
attr_accessor :name
|
6
|
+
attr_accessor :jobflow_id
|
7
|
+
attr_accessor :state
|
8
|
+
attr_accessor :steps
|
9
|
+
attr_accessor :created_at
|
10
|
+
attr_accessor :started_at
|
11
|
+
attr_accessor :ready_at
|
12
|
+
attr_accessor :instance_count
|
13
|
+
attr_accessor :master_instance_type
|
14
|
+
attr_accessor :slave_instance_type
|
15
|
+
attr_accessor :last_state_change_reason
|
16
|
+
|
17
|
+
def initialize
|
18
|
+
@steps = []
|
19
|
+
end
|
20
|
+
|
21
|
+
# Create a jobflow from an AWS <member> (Nokogiri::XML::Element):
|
22
|
+
# /DescribeJobFlowsResponse/DescribeJobFlowsResult/JobFlows/member
|
23
|
+
def self.from_member_element(xml_element)
|
24
|
+
jobflow = JobFlowStatus.new
|
25
|
+
jobflow.name = xml_element.xpath('./Name').text.strip
|
26
|
+
jobflow.jobflow_id = xml_element.xpath('./JobFlowId').text.strip
|
27
|
+
jobflow.state = xml_element.xpath('./ExecutionStatusDetail/State').text.strip
|
28
|
+
jobflow.last_state_change_reason = xml_element.xpath('./ExecutionStatusDetail/LastStateChangeReason').text.strip
|
29
|
+
jobflow.steps = JobFlowStatusStep.from_members_nodeset(xml_element.xpath('./Steps/member'))
|
30
|
+
jobflow.created_at = Time.parse(xml_element.xpath('./ExecutionStatusDetail/CreationDateTime').text.strip)
|
31
|
+
started_at = xml_element.xpath('./ExecutionStatusDetail/StartDateTime').text.strip
|
32
|
+
jobflow.started_at = (started_at == '') ? (nil) : (Time.parse(started_at))
|
33
|
+
ready_at = xml_element.xpath('./ExecutionStatusDetail/ReadyDateTime').text.strip
|
34
|
+
jobflow.ready_at = (ready_at == '') ? (nil) : (Time.parse(ready_at))
|
35
|
+
jobflow.instance_count = xml_element.xpath('./Instances/InstanceCount').text.strip
|
36
|
+
jobflow.master_instance_type = xml_element.xpath('./Instances/MasterInstanceType').text.strip
|
37
|
+
jobflow.slave_instance_type = xml_element.xpath('./Instances/SlaveInstanceType').text.strip
|
38
|
+
jobflow
|
39
|
+
end
|
40
|
+
|
41
|
+
# Create JobFlows from a collection of AWS <member> nodes (Nokogiri::XML::NodeSet):
|
42
|
+
# /DescribeJobFlowsResponse/DescribeJobFlowsResult/JobFlows
|
43
|
+
def self.from_members_nodeset(members_nodeset)
|
44
|
+
jobflows = []
|
45
|
+
members_nodeset.each do |member|
|
46
|
+
jobflows << from_member_element(member)
|
47
|
+
end
|
48
|
+
jobflows
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Elasticity
|
2
|
+
|
3
|
+
class JobFlowStatusStep
|
4
|
+
|
5
|
+
attr_accessor :name
|
6
|
+
attr_accessor :state
|
7
|
+
attr_accessor :started_at
|
8
|
+
attr_accessor :ended_at
|
9
|
+
|
10
|
+
# Create a job flow from an AWS <member> (Nokogiri::XML::Element):
|
11
|
+
# /DescribeJobFlowsResponse/DescribeJobFlowsResult/JobFlows/member/Steps/member
|
12
|
+
def self.from_member_element(xml_element)
|
13
|
+
job_flow_step = JobFlowStatusStep.new
|
14
|
+
job_flow_step.name = xml_element.xpath("./StepConfig/Name").text.strip
|
15
|
+
job_flow_step.state = xml_element.xpath("./ExecutionStatusDetail/State").text.strip
|
16
|
+
started_at = xml_element.xpath("./ExecutionStatusDetail/StartDateTime").text.strip
|
17
|
+
job_flow_step.started_at = (started_at == "") ? (nil) : (Time.parse(started_at))
|
18
|
+
ended_at = xml_element.xpath("./ExecutionStatusDetail/EndDateTime").text.strip
|
19
|
+
job_flow_step.ended_at = (ended_at == "") ? (nil) : (Time.parse(ended_at))
|
20
|
+
job_flow_step
|
21
|
+
end
|
22
|
+
|
23
|
+
# Create JobFlowSteps from a collection of AWS <member> nodes (Nokogiri::XML::NodeSet):
|
24
|
+
# /DescribeJobFlowsResponse/DescribeJobFlowsResult/JobFlows/member/Steps/member
|
25
|
+
def self.from_members_nodeset(members_nodeset)
|
26
|
+
jobflow_steps = []
|
27
|
+
members_nodeset.each do |member|
|
28
|
+
jobflow_steps << from_member_element(member)
|
29
|
+
end
|
30
|
+
jobflow_steps
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
@@ -1,33 +1,25 @@
|
|
1
1
|
module Elasticity
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
attr_accessor :started_at
|
8
|
-
attr_accessor :ended_at
|
9
|
-
|
10
|
-
# Create a job flow from an AWS <member> (Nokogiri::XML::Element):
|
11
|
-
# /DescribeJobFlowsResponse/DescribeJobFlowsResult/JobFlows/member/Steps/member
|
12
|
-
def self.from_member_element(xml_element)
|
13
|
-
job_flow_step = JobFlowStep.new
|
14
|
-
job_flow_step.name = xml_element.xpath("./StepConfig/Name").text.strip
|
15
|
-
job_flow_step.state = xml_element.xpath("./ExecutionStatusDetail/State").text.strip
|
16
|
-
started_at = xml_element.xpath("./ExecutionStatusDetail/StartDateTime").text.strip
|
17
|
-
job_flow_step.started_at = (started_at == "") ? (nil) : (Time.parse(started_at))
|
18
|
-
ended_at = xml_element.xpath("./ExecutionStatusDetail/EndDateTime").text.strip
|
19
|
-
job_flow_step.ended_at = (ended_at == "") ? (nil) : (Time.parse(ended_at))
|
20
|
-
job_flow_step
|
3
|
+
module JobFlowStep
|
4
|
+
|
5
|
+
def to_aws_step(jobflow_step)
|
6
|
+
raise RuntimeError, '#to_aws_step is required to be defined on all job flow steps.'
|
21
7
|
end
|
22
8
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
9
|
+
module ClassMethods
|
10
|
+
|
11
|
+
def requires_installation?
|
12
|
+
false
|
13
|
+
end
|
14
|
+
|
15
|
+
def aws_installation_step
|
16
|
+
raise RuntimeError, '.aws_installation_step is required to be defined when a step requires installation (e.g. Pig, Hive).'
|
29
17
|
end
|
30
|
-
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.included(base)
|
22
|
+
base.extend(ClassMethods)
|
31
23
|
end
|
32
24
|
|
33
25
|
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
module Elasticity
|
2
|
+
|
3
|
+
class PigStep
|
4
|
+
|
5
|
+
include JobFlowStep
|
6
|
+
|
7
|
+
attr_accessor :name
|
8
|
+
attr_accessor :script
|
9
|
+
attr_accessor :variables
|
10
|
+
attr_accessor :action_on_failure
|
11
|
+
|
12
|
+
def initialize(script)
|
13
|
+
@name = "Elasticity Pig Step (#{script})"
|
14
|
+
@script = script
|
15
|
+
@variables = { }
|
16
|
+
@action_on_failure = 'TERMINATE_JOB_FLOW'
|
17
|
+
end
|
18
|
+
|
19
|
+
def to_aws_step(job_flow)
|
20
|
+
args = %w(s3://elasticmapreduce/libs/pig/pig-script --run-pig-script --args)
|
21
|
+
@variables.keys.sort.each do |name|
|
22
|
+
args.concat(['-p', "#{name}=#{@variables[name]}"])
|
23
|
+
end
|
24
|
+
args.concat(['-p', "E_PARALLELS=#{parallels(job_flow.slave_instance_type, job_flow.instance_count)}"])
|
25
|
+
args << @script
|
26
|
+
{
|
27
|
+
:action_on_failure => @action_on_failure,
|
28
|
+
:hadoop_jar_step => {
|
29
|
+
:jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
|
30
|
+
:args => args,
|
31
|
+
},
|
32
|
+
:name => @name
|
33
|
+
}
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.requires_installation?
|
37
|
+
true
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.aws_installation_step
|
41
|
+
{
|
42
|
+
:action_on_failure => 'TERMINATE_JOB_FLOW',
|
43
|
+
:hadoop_jar_step => {
|
44
|
+
:jar => 's3://elasticmapreduce/libs/script-runner/script-runner.jar',
|
45
|
+
:args => [
|
46
|
+
's3://elasticmapreduce/libs/pig/pig-script',
|
47
|
+
'--base-path',
|
48
|
+
's3://elasticmapreduce/libs/pig/',
|
49
|
+
'--install-pig'
|
50
|
+
],
|
51
|
+
},
|
52
|
+
:name => 'Elasticity - Install Pig'
|
53
|
+
}
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
# Calculate a common-sense default value of PARALLELS using the following
|
59
|
+
# formula from the Pig Cookbook:
|
60
|
+
#
|
61
|
+
# <num machines> * <num reduce slots per machine> * 0.9
|
62
|
+
#
|
63
|
+
# With the following reducer configuration (from an AWS forum post):
|
64
|
+
#
|
65
|
+
# m1.small 1
|
66
|
+
# m1.large 2
|
67
|
+
# m1.xlarge 4
|
68
|
+
# c1.medium 2
|
69
|
+
# c1.xlarge 4
|
70
|
+
def parallels(slave_instance_type, instance_count)
|
71
|
+
reduce_slots = Hash.new(1)
|
72
|
+
reduce_slots['m1.small'] = 1
|
73
|
+
reduce_slots['m1.large'] = 2
|
74
|
+
reduce_slots['m1.xlarge'] = 4
|
75
|
+
reduce_slots['c1.medium'] = 2
|
76
|
+
reduce_slots['c1.xlarge'] = 4
|
77
|
+
((instance_count - 1).to_f * reduce_slots[slave_instance_type].to_f * 0.9).ceil
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|