wakoopa-elasticity 1.2.3
Sign up to get free protection for your applications and to get access to all the features.
- data/.autotest +2 -0
- data/.gitignore +5 -0
- data/.rspec +2 -0
- data/.rvmrc +1 -0
- data/Gemfile +4 -0
- data/HISTORY.mediawiki +30 -0
- data/LICENSE +202 -0
- data/README.mediawiki +332 -0
- data/Rakefile +11 -0
- data/elasticity.gemspec +29 -0
- data/lib/elasticity.rb +16 -0
- data/lib/elasticity/aws_request.rb +52 -0
- data/lib/elasticity/emr.rb +282 -0
- data/lib/elasticity/hive_job.rb +71 -0
- data/lib/elasticity/job_flow.rb +53 -0
- data/lib/elasticity/job_flow_step.rb +36 -0
- data/lib/elasticity/pig_job.rb +112 -0
- data/lib/elasticity/simple_job.rb +50 -0
- data/lib/elasticity/version.rb +3 -0
- data/spec/fixtures/vcr_cassettes/add_instance_groups/one_group_successful.yml +38 -0
- data/spec/fixtures/vcr_cassettes/add_instance_groups/one_group_unsuccessful.yml +35 -0
- data/spec/fixtures/vcr_cassettes/add_jobflow_steps/add_multiple_steps.yml +252 -0
- data/spec/fixtures/vcr_cassettes/describe_jobflows/all_jobflows.yml +69 -0
- data/spec/fixtures/vcr_cassettes/direct/terminate_jobflow.yml +32 -0
- data/spec/fixtures/vcr_cassettes/hive_job/hive_ads.yml +35 -0
- data/spec/fixtures/vcr_cassettes/modify_instance_groups/set_instances_to_3.yml +32 -0
- data/spec/fixtures/vcr_cassettes/pig_job/apache_log_reports.yml +35 -0
- data/spec/fixtures/vcr_cassettes/pig_job/apache_log_reports_with_bootstrap.yml +35 -0
- data/spec/fixtures/vcr_cassettes/run_jobflow/word_count.yml +35 -0
- data/spec/fixtures/vcr_cassettes/set_termination_protection/nonexistent_job_flows.yml +35 -0
- data/spec/fixtures/vcr_cassettes/set_termination_protection/protect_multiple_job_flows.yml +32 -0
- data/spec/fixtures/vcr_cassettes/terminate_jobflows/one_jobflow.yml +32 -0
- data/spec/lib/elasticity/aws_request_spec.rb +62 -0
- data/spec/lib/elasticity/emr_spec.rb +794 -0
- data/spec/lib/elasticity/hive_job_spec.rb +96 -0
- data/spec/lib/elasticity/job_flow_spec.rb +139 -0
- data/spec/lib/elasticity/job_flow_step_spec.rb +76 -0
- data/spec/lib/elasticity/pig_job_spec.rb +211 -0
- data/spec/spec_helper.rb +43 -0
- metadata +253 -0
data/Rakefile
ADDED
data/elasticity.gemspec
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "elasticity/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "wakoopa-elasticity"
|
7
|
+
s.version = Elasticity::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["Robert Slifka"]
|
10
|
+
s.homepage = "http://www.github.com/rslifka/elasticity"
|
11
|
+
s.summary = %q{Programmatic access to Amazon's Elastic Map Reduce service.}
|
12
|
+
s.description = %q{Programmatic access to Amazon's Elastic Map Reduce service.}
|
13
|
+
|
14
|
+
s.add_dependency("rest-client")
|
15
|
+
s.add_dependency("nokogiri")
|
16
|
+
|
17
|
+
s.add_development_dependency("autotest-fsevent")
|
18
|
+
s.add_development_dependency("autotest-growl")
|
19
|
+
s.add_development_dependency("rake")
|
20
|
+
s.add_development_dependency("rspec", ">= 2.5.0")
|
21
|
+
s.add_development_dependency("vcr", ">= 1.5.1")
|
22
|
+
s.add_development_dependency("webmock", ">= 1.6.2")
|
23
|
+
s.add_development_dependency("ZenTest")
|
24
|
+
|
25
|
+
s.files = `git ls-files`.split("\n")
|
26
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
27
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
28
|
+
s.require_paths = ["lib"]
|
29
|
+
end
|
data/lib/elasticity.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'base64'
|
2
|
+
|
3
|
+
require 'rest_client'
|
4
|
+
require 'nokogiri'
|
5
|
+
|
6
|
+
require 'elasticity/aws_request'
|
7
|
+
require 'elasticity/emr'
|
8
|
+
require 'elasticity/job_flow'
|
9
|
+
require 'elasticity/job_flow_step'
|
10
|
+
|
11
|
+
require 'elasticity/simple_job'
|
12
|
+
require 'elasticity/hive_job'
|
13
|
+
require 'elasticity/pig_job'
|
14
|
+
|
15
|
+
module Elasticity
|
16
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
module Elasticity
|
2
|
+
|
3
|
+
class AwsRequest
|
4
|
+
|
5
|
+
def initialize(aws_access_key_id, aws_secret_access_key, options = {})
|
6
|
+
@access_key = aws_access_key_id
|
7
|
+
@secret_key = aws_secret_access_key
|
8
|
+
@options = {:secure => true}.merge(options)
|
9
|
+
end
|
10
|
+
|
11
|
+
def aws_emr_request(params)
|
12
|
+
host = @options[:region] ? "elasticmapreduce.#{@options[:region]}.amazonaws.com" : "elasticmapreduce.amazonaws.com"
|
13
|
+
protocol = @options[:secure] ? "https" : "http"
|
14
|
+
|
15
|
+
signed_params = sign_params(params, "GET", host, "/")
|
16
|
+
signed_request = "#{protocol}://#{host}?#{signed_params}"
|
17
|
+
RestClient.get signed_request
|
18
|
+
end
|
19
|
+
|
20
|
+
# (Used from RightScale's right_aws gem.)
|
21
|
+
# EC2, SQS, SDB and EMR requests must be signed by this guy.
|
22
|
+
# See: http://docs.amazonwebservices.com/AmazonSimpleDB/2007-11-07/DeveloperGuide/index.html?REST_RESTAuth.html
|
23
|
+
# http://developer.amazonwebservices.com/connect/entry.jspa?externalID=1928
|
24
|
+
def sign_params(service_hash, http_verb, host, uri)
|
25
|
+
service_hash["AWSAccessKeyId"] = @access_key
|
26
|
+
service_hash["Timestamp"] = Time.now.utc.strftime("%Y-%m-%dT%H:%M:%S.000Z")
|
27
|
+
service_hash["SignatureVersion"] = "2"
|
28
|
+
service_hash['SignatureMethod'] = 'HmacSHA256'
|
29
|
+
canonical_string = service_hash.keys.sort.map do |key|
|
30
|
+
"#{AwsRequest.aws_escape(key)}=#{AwsRequest.aws_escape(service_hash[key])}"
|
31
|
+
end.join('&')
|
32
|
+
string_to_sign = "#{http_verb.to_s.upcase}\n#{host.downcase}\n#{uri}\n#{canonical_string}"
|
33
|
+
signature = AwsRequest.aws_escape(Base64.encode64(OpenSSL::HMAC.digest("sha256", @secret_key, string_to_sign)).strip)
|
34
|
+
"#{canonical_string}&Signature=#{signature}"
|
35
|
+
end
|
36
|
+
|
37
|
+
class << self
|
38
|
+
|
39
|
+
# (Used from RightScale's right_aws gem)
|
40
|
+
# Escape a string according to Amazon's rules.
|
41
|
+
# See: http://docs.amazonwebservices.com/AmazonSimpleDB/2007-11-07/DeveloperGuide/index.html?REST_RESTAuth.html
|
42
|
+
def aws_escape(param)
|
43
|
+
param.to_s.gsub(/([^a-zA-Z0-9._~-]+)/n) do
|
44
|
+
'%' + $1.unpack('H2' * $1.size).join('%').upcase
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
@@ -0,0 +1,282 @@
|
|
1
|
+
module Elasticity
|
2
|
+
|
3
|
+
class EMR
|
4
|
+
|
5
|
+
def initialize(aws_access_key_id, aws_secret_access_key, options = {})
|
6
|
+
@aws_request = Elasticity::AwsRequest.new(aws_access_key_id, aws_secret_access_key, options)
|
7
|
+
end
|
8
|
+
|
9
|
+
# Lists all jobflows in all states.
|
10
|
+
def describe_jobflows(params = {})
|
11
|
+
aws_result = @aws_request.aws_emr_request(EMR.convert_ruby_to_aws(
|
12
|
+
params.merge({:operation => "DescribeJobFlows"}))
|
13
|
+
)
|
14
|
+
xml_doc = Nokogiri::XML(aws_result)
|
15
|
+
xml_doc.remove_namespaces!
|
16
|
+
yield aws_result if block_given?
|
17
|
+
JobFlow.from_members_nodeset(xml_doc.xpath("/DescribeJobFlowsResponse/DescribeJobFlowsResult/JobFlows/member"))
|
18
|
+
end
|
19
|
+
|
20
|
+
# Adds a new group of instances to the specified jobflow. Elasticity maps a
|
21
|
+
# more Ruby-like syntax to the Amazon options. An exhaustive hash follows although
|
22
|
+
# not all of these options are required (or valid!) at once. Please see the
|
23
|
+
# EMR docs for details although even then you're going to need to experiment :)
|
24
|
+
#
|
25
|
+
# instance_group_config = {
|
26
|
+
# :bid_price => 5,
|
27
|
+
# :instance_count => 1,
|
28
|
+
# :instance_role => "TASK",
|
29
|
+
# :market => "SPOT",
|
30
|
+
# :name => "Go Canucks Go!"
|
31
|
+
# :type => "m1.small",
|
32
|
+
# }
|
33
|
+
#
|
34
|
+
# add_instance_groups takes an array of {}. Returns an array of the instance IDs
|
35
|
+
# that were created by the specified configs.
|
36
|
+
#
|
37
|
+
# ["ig-2GOVEN6HVJZID", "ig-1DU9M2UQMM051", "ig-3DZRW4Y2X4S", ...]
|
38
|
+
def add_instance_groups(jobflow_id, instance_group_configs)
|
39
|
+
params = {
|
40
|
+
:operation => "AddInstanceGroups",
|
41
|
+
:job_flow_id => jobflow_id,
|
42
|
+
:instance_groups => instance_group_configs
|
43
|
+
}
|
44
|
+
begin
|
45
|
+
aws_result = @aws_request.aws_emr_request(EMR.convert_ruby_to_aws(params))
|
46
|
+
xml_doc = Nokogiri::XML(aws_result)
|
47
|
+
xml_doc.remove_namespaces!
|
48
|
+
instance_group_ids = []
|
49
|
+
xml_doc.xpath("/AddInstanceGroupsResponse/AddInstanceGroupsResult/InstanceGroupIds/member").each do |member|
|
50
|
+
instance_group_ids << member.text
|
51
|
+
end
|
52
|
+
yield aws_result if block_given?
|
53
|
+
instance_group_ids
|
54
|
+
rescue RestClient::BadRequest => e
|
55
|
+
raise ArgumentError, EMR.parse_error_response(e.http_body)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# Add a step (or steps) to the specified job flow.
|
60
|
+
#
|
61
|
+
# emr.add_jobflow_step("j-123", {
|
62
|
+
# :steps => [
|
63
|
+
# {
|
64
|
+
# :action_on_failure => "TERMINATE_JOB_FLOW",
|
65
|
+
# :hadoop_jar_step => {
|
66
|
+
# :args => [
|
67
|
+
# "s3://elasticmapreduce/libs/pig/pig-script",
|
68
|
+
# "--base-path",
|
69
|
+
# "s3://elasticmapreduce/libs/pig/",
|
70
|
+
# "--install-pig"
|
71
|
+
# ],
|
72
|
+
# :jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar"
|
73
|
+
# },
|
74
|
+
# :name => "Setup Pig"
|
75
|
+
# }
|
76
|
+
# ]
|
77
|
+
# })
|
78
|
+
def add_jobflow_steps(jobflow_id, steps_config)
|
79
|
+
params = {
|
80
|
+
:operation => "AddJobFlowSteps",
|
81
|
+
:job_flow_id => jobflow_id
|
82
|
+
}.merge!(steps_config)
|
83
|
+
begin
|
84
|
+
aws_result = @aws_request.aws_emr_request(EMR.convert_ruby_to_aws(params))
|
85
|
+
yield aws_result if block_given?
|
86
|
+
rescue RestClient::BadRequest => e
|
87
|
+
raise ArgumentError, EMR.parse_error_response(e.http_body)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
# Set the number of instances in the specified instance groups to the
|
92
|
+
# specified counts. Note that this modifies the *request* count, which
|
93
|
+
# is not the same as the *running* count. I.e. you request instances
|
94
|
+
# and then wait for them to be created.
|
95
|
+
#
|
96
|
+
# Takes a {} of instance group IDs => desired instance count.
|
97
|
+
#
|
98
|
+
# {"ig-1" => 40, "ig-2" => 5, ...}
|
99
|
+
def modify_instance_groups(instance_group_config)
|
100
|
+
params = {
|
101
|
+
:operation => "ModifyInstanceGroups",
|
102
|
+
:instance_groups => instance_group_config.map { |k, v| {:instance_group_id => k, :instance_count => v} }
|
103
|
+
}
|
104
|
+
begin
|
105
|
+
aws_result = @aws_request.aws_emr_request(EMR.convert_ruby_to_aws(params))
|
106
|
+
yield aws_result if block_given?
|
107
|
+
rescue RestClient::BadRequest => e
|
108
|
+
raise ArgumentError, EMR.parse_error_response(e.http_body)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
# Start a job flow with the specified configuration. This is a very thin
|
113
|
+
# wrapper around the AWS API, so in order to use it directly you'll need
|
114
|
+
# to have the PDF API reference handy, which can be found here:
|
115
|
+
#
|
116
|
+
# http://awsdocs.s3.amazonaws.com/ElasticMapReduce/20090331/emr-api-20090331.pdf
|
117
|
+
#
|
118
|
+
# Here is a sample job flow configuration that should help. This job flow
|
119
|
+
# starts by installing Pig then running a Pig script. It is based off of the
|
120
|
+
# Pig demo script from Amazon.
|
121
|
+
#
|
122
|
+
# emr.run_job_flow({
|
123
|
+
# :name => "Elasticity Test Flow (EMR Pig Script)",
|
124
|
+
# :instances => {
|
125
|
+
# :ec2_key_name => "sharethrough-dev",
|
126
|
+
# :hadoop_version => "0.20",
|
127
|
+
# :instance_count => 2,
|
128
|
+
# :master_instance_type => "m1.small",
|
129
|
+
# :placement => {
|
130
|
+
# :availability_zone => "us-east-1a"
|
131
|
+
# },
|
132
|
+
# :slave_instance_type => "m1.small",
|
133
|
+
# },
|
134
|
+
# :steps => [
|
135
|
+
# {
|
136
|
+
# :action_on_failure => "TERMINATE_JOB_FLOW",
|
137
|
+
# :hadoop_jar_step => {
|
138
|
+
# :args => [
|
139
|
+
# "s3://elasticmapreduce/libs/pig/pig-script",
|
140
|
+
# "--base-path",
|
141
|
+
# "s3://elasticmapreduce/libs/pig/",
|
142
|
+
# "--install-pig"
|
143
|
+
# ],
|
144
|
+
# :jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar"
|
145
|
+
# },
|
146
|
+
# :name => "Setup Pig"
|
147
|
+
# },
|
148
|
+
# {
|
149
|
+
# :action_on_failure => "TERMINATE_JOB_FLOW",
|
150
|
+
# :hadoop_jar_step => {
|
151
|
+
# :args => [
|
152
|
+
# "s3://elasticmapreduce/libs/pig/pig-script",
|
153
|
+
# "--run-pig-script",
|
154
|
+
# "--args",
|
155
|
+
# "-p",
|
156
|
+
# "INPUT=s3n://elasticmapreduce/samples/pig-apache/input",
|
157
|
+
# "-p",
|
158
|
+
# "OUTPUT=s3n://slif-elasticity/pig-apache/output/2011-04-19",
|
159
|
+
# "s3n://elasticmapreduce/samples/pig-apache/do-reports.pig"
|
160
|
+
# ],
|
161
|
+
# :jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar"
|
162
|
+
# },
|
163
|
+
# :name => "Run Pig Script"
|
164
|
+
# }
|
165
|
+
# ]
|
166
|
+
# })
|
167
|
+
def run_job_flow(job_flow_config)
|
168
|
+
params = {
|
169
|
+
:operation => "RunJobFlow",
|
170
|
+
}.merge!(job_flow_config)
|
171
|
+
begin
|
172
|
+
aws_result = @aws_request.aws_emr_request(EMR.convert_ruby_to_aws(params))
|
173
|
+
yield aws_result if block_given?
|
174
|
+
xml_doc = Nokogiri::XML(aws_result)
|
175
|
+
xml_doc.remove_namespaces!
|
176
|
+
xml_doc.xpath("/RunJobFlowResponse/RunJobFlowResult/JobFlowId").text
|
177
|
+
rescue RestClient::BadRequest => e
|
178
|
+
raise ArgumentError, EMR.parse_error_response(e.http_body)
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
# Enabled or disable "termination protection" on the specified job flows.
|
183
|
+
# Termination protection prevents a job flow from being terminated by a
|
184
|
+
# user initiated action, although the job flow will still terminate
|
185
|
+
# naturally.
|
186
|
+
#
|
187
|
+
# Takes an [] of job flow IDs.
|
188
|
+
#
|
189
|
+
# ["j-1B4D1XP0C0A35", "j-1YG2MYL0HVYS5", ...]
|
190
|
+
def set_termination_protection(jobflow_ids, protection_enabled=true)
|
191
|
+
params = {
|
192
|
+
:operation => "SetTerminationProtection",
|
193
|
+
:termination_protected => protection_enabled,
|
194
|
+
:job_flow_ids => jobflow_ids
|
195
|
+
}
|
196
|
+
begin
|
197
|
+
aws_result = @aws_request.aws_emr_request(EMR.convert_ruby_to_aws(params))
|
198
|
+
yield aws_result if block_given?
|
199
|
+
rescue RestClient::BadRequest => e
|
200
|
+
raise ArgumentError, EMR.parse_error_response(e.http_body)
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
# Terminate the specified jobflow. Amazon does not define a return value
|
205
|
+
# for this operation, so you'll need to poll #describe_jobflows to see
|
206
|
+
# the state of the jobflow. Raises ArgumentError if the specified job
|
207
|
+
# flow does not exist.
|
208
|
+
def terminate_jobflows(jobflow_id)
|
209
|
+
params = {
|
210
|
+
:operation => "TerminateJobFlows",
|
211
|
+
:job_flow_ids => [jobflow_id]
|
212
|
+
}
|
213
|
+
begin
|
214
|
+
aws_result = @aws_request.aws_emr_request(EMR.convert_ruby_to_aws(params))
|
215
|
+
yield aws_result if block_given?
|
216
|
+
rescue RestClient::BadRequest
|
217
|
+
raise ArgumentError, "Job flow '#{jobflow_id}' does not exist."
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
# Pass the specified params hash directly through to the AWS request URL.
|
222
|
+
# Use this if you want to perform an operation that hasn't yet been wrapped
|
223
|
+
# by Elasticity or you just want to see the response XML for yourself :)
|
224
|
+
def direct(params)
|
225
|
+
@aws_request.aws_emr_request(params)
|
226
|
+
end
|
227
|
+
|
228
|
+
private
|
229
|
+
|
230
|
+
class << self
|
231
|
+
|
232
|
+
# AWS error responses all follow the same form. Extract the message from
|
233
|
+
# the error document.
|
234
|
+
def parse_error_response(error_xml)
|
235
|
+
xml_doc = Nokogiri::XML(error_xml)
|
236
|
+
xml_doc.remove_namespaces!
|
237
|
+
xml_doc.xpath("/ErrorResponse/Error/Message").text
|
238
|
+
end
|
239
|
+
|
240
|
+
# Since we use the same structure as AWS, we can generate AWS param names
|
241
|
+
# from the Ruby versions of those names (and the param nesting).
|
242
|
+
def convert_ruby_to_aws(params)
|
243
|
+
result = {}
|
244
|
+
params.each do |key, value|
|
245
|
+
case value
|
246
|
+
when Array
|
247
|
+
prefix = "#{camelize(key.to_s)}.member"
|
248
|
+
value.each_with_index do |item, index|
|
249
|
+
if item.is_a?(String)
|
250
|
+
result["#{prefix}.#{index+1}"] = item
|
251
|
+
else
|
252
|
+
convert_ruby_to_aws(item).each do |nested_key, nested_value|
|
253
|
+
result["#{prefix}.#{index+1}.#{nested_key}"] = nested_value
|
254
|
+
end
|
255
|
+
end
|
256
|
+
end
|
257
|
+
when Hash
|
258
|
+
prefix = "#{camelize(key.to_s)}"
|
259
|
+
convert_ruby_to_aws(value).each do |nested_key, nested_value|
|
260
|
+
result["#{prefix}.#{nested_key}"] = nested_value
|
261
|
+
end
|
262
|
+
else
|
263
|
+
result[camelize(key.to_s)] = value
|
264
|
+
end
|
265
|
+
end
|
266
|
+
result
|
267
|
+
end
|
268
|
+
|
269
|
+
# (Used from Rails' ActiveSupport)
|
270
|
+
def camelize(lower_case_and_underscored_word, first_letter_in_uppercase = true)
|
271
|
+
if first_letter_in_uppercase
|
272
|
+
lower_case_and_underscored_word.to_s.gsub(/\/(.?)/) { "::" + $1.upcase }.gsub(/(^|_)(.)/) { $2.upcase }
|
273
|
+
else
|
274
|
+
lower_case_and_underscored_word.first + camelize(lower_case_and_underscored_word)[1..-1]
|
275
|
+
end
|
276
|
+
end
|
277
|
+
|
278
|
+
end
|
279
|
+
|
280
|
+
end
|
281
|
+
|
282
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
module Elasticity
|
2
|
+
|
3
|
+
# HiveJob allows you quickly easily kick off a Hive jobflow without
|
4
|
+
# having to understand the entirety of the EMR API.
|
5
|
+
class HiveJob < Elasticity::SimpleJob
|
6
|
+
|
7
|
+
def initialize(aws_access_key_id, aws_secret_access_key)
|
8
|
+
super
|
9
|
+
@name = "Elasticity Hive Job"
|
10
|
+
end
|
11
|
+
|
12
|
+
# Run the specified Hive script with the specified variables.
|
13
|
+
#
|
14
|
+
# hive = Elasticity::HiveJob.new("access", "secret")
|
15
|
+
# jobflow_id = hive.run('s3n://slif-hive/test.q', {
|
16
|
+
# 'SCRIPTS' => 's3n://slif-test/scripts',
|
17
|
+
# 'OUTPUT' => 's3n://slif-test/output',
|
18
|
+
# 'XREFS' => 's3n://slif-test/xrefs'
|
19
|
+
# })
|
20
|
+
#
|
21
|
+
# The variables are accessible within your Hive scripts by using the
|
22
|
+
# standard ${NAME} syntax. E.g.
|
23
|
+
#
|
24
|
+
# ADD JAR ${SCRIPTS}/jsonserde.jar;
|
25
|
+
def run(hive_script, hive_variables={})
|
26
|
+
script_arguments = ["s3://elasticmapreduce/libs/hive/hive-script", "--run-hive-script", "--args"]
|
27
|
+
script_arguments.concat(["-f", hive_script])
|
28
|
+
hive_variables.each do |variable_name, value|
|
29
|
+
script_arguments.concat(["-d", "#{variable_name}=#{value}"])
|
30
|
+
end
|
31
|
+
jobflow_config = {
|
32
|
+
:name => @name,
|
33
|
+
:instances => {
|
34
|
+
:ec2_key_name => @ec2_key_name,
|
35
|
+
:hadoop_version => @hadoop_version,
|
36
|
+
:instance_count => @instance_count,
|
37
|
+
:master_instance_type => @master_instance_type,
|
38
|
+
:slave_instance_type => @slave_instance_type,
|
39
|
+
},
|
40
|
+
:steps => [
|
41
|
+
{
|
42
|
+
:action_on_failure => "TERMINATE_JOB_FLOW",
|
43
|
+
:hadoop_jar_step => {
|
44
|
+
:jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
|
45
|
+
:args => [
|
46
|
+
"s3://elasticmapreduce/libs/hive/hive-script",
|
47
|
+
"--base-path", "s3://elasticmapreduce/libs/hive/",
|
48
|
+
"--install-hive"
|
49
|
+
],
|
50
|
+
},
|
51
|
+
:name => "Setup Hive"
|
52
|
+
},
|
53
|
+
{
|
54
|
+
:action_on_failure => @action_on_failure,
|
55
|
+
:hadoop_jar_step => {
|
56
|
+
:jar => "s3://elasticmapreduce/libs/script-runner/script-runner.jar",
|
57
|
+
:args => script_arguments,
|
58
|
+
},
|
59
|
+
:name => "Run Hive Script"
|
60
|
+
}
|
61
|
+
]
|
62
|
+
}
|
63
|
+
|
64
|
+
jobflow_config.merge!(:log_uri => @log_uri) if @log_uri
|
65
|
+
|
66
|
+
@emr.run_job_flow(jobflow_config)
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|