elastic-mapreduce 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +51 -0
- data/Gemfile +13 -0
- data/Gemfile.lock +16 -0
- data/LICENSE.txt +393 -0
- data/NOTICE.txt +26 -0
- data/README +1007 -0
- data/Rakefile +35 -0
- data/VERSION +1 -0
- data/bin/elastic-mapreduce +27 -0
- data/cacert.pem +280 -0
- data/elastic-mapreduce.gemspec +104 -0
- data/lib/amazon/aws/exceptions.rb +211 -0
- data/lib/amazon/coral/awsquery.rb +128 -0
- data/lib/amazon/coral/awsquerychainhelper.rb +92 -0
- data/lib/amazon/coral/awsqueryhandler.rb +170 -0
- data/lib/amazon/coral/awsqueryurihandler.rb +34 -0
- data/lib/amazon/coral/call.rb +68 -0
- data/lib/amazon/coral/dispatcher.rb +33 -0
- data/lib/amazon/coral/ec2client.rb +91 -0
- data/lib/amazon/coral/elasticmapreduceclient.rb +198 -0
- data/lib/amazon/coral/handler.rb +20 -0
- data/lib/amazon/coral/httpdelegationhelper.rb +27 -0
- data/lib/amazon/coral/httpdestinationhandler.rb +36 -0
- data/lib/amazon/coral/httphandler.rb +124 -0
- data/lib/amazon/coral/identityhandler.rb +32 -0
- data/lib/amazon/coral/job.rb +25 -0
- data/lib/amazon/coral/logfactory.rb +35 -0
- data/lib/amazon/coral/option.rb +70 -0
- data/lib/amazon/coral/orchestrator.rb +49 -0
- data/lib/amazon/coral/querystringmap.rb +93 -0
- data/lib/amazon/coral/service.rb +130 -0
- data/lib/amazon/coral/simplelog.rb +98 -0
- data/lib/amazon/coral/urlencoding.rb +19 -0
- data/lib/amazon/coral/v0signaturehandler.rb +33 -0
- data/lib/amazon/coral/v0signaturehelper.rb +83 -0
- data/lib/amazon/coral/v1signaturehandler.rb +32 -0
- data/lib/amazon/coral/v1signaturehelper.rb +58 -0
- data/lib/amazon/coral/v2signaturehandler.rb +46 -0
- data/lib/amazon/coral/v2signaturehelper.rb +76 -0
- data/lib/amazon/retry_delegator.rb +66 -0
- data/lib/amazon/stderr_logger.rb +23 -0
- data/lib/client.rb +117 -0
- data/lib/commands.rb +1690 -0
- data/lib/credentials.rb +86 -0
- data/lib/ec2_client_wrapper.rb +73 -0
- data/lib/json/lexer.rb +294 -0
- data/lib/json/objects.rb +200 -0
- data/lib/json.rb +58 -0
- data/lib/simple_executor.rb +11 -0
- data/lib/simple_logger.rb +38 -0
- data/lib/uuidtools/version.rb +32 -0
- data/lib/uuidtools.rb +655 -0
- data/run_tests.rb +8 -0
- data/samples/freebase/code/freebase_jobflow.json +44 -0
- data/samples/similarity/lastfm_jobflow.json +78 -0
- data/samples/wordSplitter.py +18 -0
- data/tests/commands_test.rb +587 -0
- data/tests/credentials.json +7 -0
- data/tests/example.json +14 -0
- metadata +154 -0
data/lib/commands.rb
ADDED
|
@@ -0,0 +1,1690 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright 2008-2010 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
3
|
+
|
|
4
|
+
require 'set'
|
|
5
|
+
require 'credentials'
|
|
6
|
+
require 'optparse'
|
|
7
|
+
require 'client'
|
|
8
|
+
require 'ec2_client_wrapper'
|
|
9
|
+
require 'open3'
|
|
10
|
+
|
|
11
|
+
module Commands
|
|
12
|
+
|
|
13
|
+
ELASTIC_MAPREDUCE_CLIENT_VERSION = "2010-11-11"
|
|
14
|
+
|
|
15
|
+
class Commands
|
|
16
|
+
attr_accessor :opts, :global_options, :commands, :logger, :executor
|
|
17
|
+
|
|
18
|
+
def initialize(logger, executor)
|
|
19
|
+
@commands = []
|
|
20
|
+
@opts = nil
|
|
21
|
+
@global_options = {
|
|
22
|
+
:jobflow => []
|
|
23
|
+
}
|
|
24
|
+
@logger = logger
|
|
25
|
+
@executor = executor
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def last
|
|
29
|
+
@commands.last
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def <<(value)
|
|
33
|
+
@commands << value
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def size
|
|
37
|
+
@commands.size
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def validate
|
|
41
|
+
@commands.each { |x| x.validate }
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def enact(client)
|
|
45
|
+
@commands.each { |x| x.enact(client) }
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def each(&block)
|
|
49
|
+
@commands.each(&block)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def parse_command(klass, name, description)
|
|
53
|
+
@opts.on(name, description) do |arg|
|
|
54
|
+
self << klass.new(name, description, arg, self)
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def parse_option(klass, name, description, parent_commands, *args)
|
|
59
|
+
@opts.on(name, description) do |arg|
|
|
60
|
+
klass.new(name, description, arg, parent_commands, self, *args).attach(commands)
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def parse_options(parent_commands, options)
|
|
65
|
+
for option in options do
|
|
66
|
+
klass, name, description = option[0..2]
|
|
67
|
+
args = option[3..-1]
|
|
68
|
+
self.parse_option(klass, name, description, parent_commands, *args)
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def parse_jobflows(args)
|
|
73
|
+
for arg in args do
|
|
74
|
+
if arg =~ /^j-\w{5,20}$/ then
|
|
75
|
+
@global_options[:jobflow] << arg
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def have(field_symbol)
|
|
81
|
+
return @global_options[field_symbol] != nil
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def get_field(field_symbol, default_value=nil)
|
|
85
|
+
value = @global_options[field_symbol]
|
|
86
|
+
if ( value == nil ) then
|
|
87
|
+
return default_value
|
|
88
|
+
else
|
|
89
|
+
return value
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def exec(cmd)
|
|
94
|
+
@executor.exec(cmd)
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
class Command
|
|
99
|
+
attr_accessor :name, :description, :arg, :commands, :logger
|
|
100
|
+
|
|
101
|
+
def initialize(name, description, arg, commands)
|
|
102
|
+
@name = name
|
|
103
|
+
@description = description
|
|
104
|
+
@arg = arg
|
|
105
|
+
@commands = commands
|
|
106
|
+
@logger = commands.logger
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# test any constraints that the command has
|
|
110
|
+
def validate
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# action the command
|
|
114
|
+
def enact(client)
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def option(argument_name, argument_symbol, value)
|
|
118
|
+
var = self.send(argument_symbol)
|
|
119
|
+
if var == nil then
|
|
120
|
+
self.send((argument_symbol.to_s + "=").to_sym, value)
|
|
121
|
+
elsif var.is_a?(Array) then
|
|
122
|
+
var << value
|
|
123
|
+
else
|
|
124
|
+
raise RuntimeError, "Repeating #{argument_name} is not allowed, previous value was #{var.inspect}"
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def get_field(field_symbol, default_value=nil)
|
|
129
|
+
value = nil
|
|
130
|
+
if respond_to?(field_symbol) then
|
|
131
|
+
value = self.send(field_symbol)
|
|
132
|
+
end
|
|
133
|
+
if value == nil then
|
|
134
|
+
value = @commands.global_options[field_symbol]
|
|
135
|
+
end
|
|
136
|
+
default_field_symbol = ("default_" + field_symbol.to_s).to_sym
|
|
137
|
+
if value == nil && respond_to?(default_field_symbol) then
|
|
138
|
+
value = self.send(default_field_symbol)
|
|
139
|
+
end
|
|
140
|
+
if value == nil then
|
|
141
|
+
value = default_value
|
|
142
|
+
end
|
|
143
|
+
return value
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def require(field_symbol, error_msg)
|
|
147
|
+
value = get_field(field_symbol)
|
|
148
|
+
if value == nil then
|
|
149
|
+
raise RuntimeError, error_msg
|
|
150
|
+
end
|
|
151
|
+
return value
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def have(field_symbol)
|
|
155
|
+
value = get_field(field_symbol)
|
|
156
|
+
return value != nil
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def has_value(obj, *args)
|
|
160
|
+
while obj != nil && args.size > 1 do
|
|
161
|
+
obj = obj[args.shift]
|
|
162
|
+
end
|
|
163
|
+
return obj == args[0]
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
def resolve(obj, *args)
|
|
167
|
+
while obj != nil && args.size > 0 do
|
|
168
|
+
obj = obj[args.shift]
|
|
169
|
+
end
|
|
170
|
+
return obj
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def require_single_jobflow
|
|
174
|
+
jobflow_ids = get_field(:jobflow)
|
|
175
|
+
if jobflow_ids.size == 0 then
|
|
176
|
+
raise RuntimeError, "A jobflow is required to use option #{name}"
|
|
177
|
+
elsif jobflow_ids.size > 1 then
|
|
178
|
+
raise RuntimeError, "The option #{name} can only act on a single jobflow"
|
|
179
|
+
end
|
|
180
|
+
return jobflow_ids.first
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
class CommandOption
|
|
186
|
+
attr_accessor :name, :description, :arg, :parent_commands, :commands
|
|
187
|
+
|
|
188
|
+
def initialize(name, description, arg, parent_commands, commands, field_symbol=nil, pattern=nil)
|
|
189
|
+
@name = name
|
|
190
|
+
@description = description
|
|
191
|
+
@arg = arg
|
|
192
|
+
@parent_commands = parent_commands
|
|
193
|
+
@commands = commands
|
|
194
|
+
@field_symbol = field_symbol
|
|
195
|
+
@pattern = pattern
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
def attach(commands)
|
|
199
|
+
for command in commands.reverse do
|
|
200
|
+
command_name = command.name.split(/\s+/).first
|
|
201
|
+
if @parent_commands.include?(command_name) || @parent_commands.include?(command.class) then
|
|
202
|
+
return command
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
raise RuntimeError, "Expected argument #{name} to follow one of #{parent_commands.join(", ")}"
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
class StepCommand < Command
|
|
210
|
+
attr_accessor :args, :step_name, :step_action, :apps_path, :beta_path
|
|
211
|
+
attr_accessor :script_runner_path, :pig_path, :hive_path, :pig_cmd, :hive_cmd, :enable_debugging_path
|
|
212
|
+
|
|
213
|
+
def initialize(*args)
|
|
214
|
+
super(*args)
|
|
215
|
+
@args = []
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
def default_script_runner_path
|
|
219
|
+
File.join(get_field(:apps_path), "libs/script-runner/script-runner.jar")
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
def default_pig_path
|
|
223
|
+
File.join(get_field(:apps_path), "libs/pig/")
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
def default_pig_cmd
|
|
227
|
+
[ File.join(get_field(:pig_path), "pig-script"), "--base-path",
|
|
228
|
+
get_field(:pig_path) ]
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
def default_hive_path
|
|
232
|
+
File.join(get_field(:apps_path), "libs/hive/")
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
def default_hive_cmd
|
|
236
|
+
[ File.join(get_field(:hive_path), "hive-script"), "--base-path",
|
|
237
|
+
get_field(:hive_path) ]
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
def default_resize_jobflow_cmd
|
|
241
|
+
File.join(get_field(:apps_path), "libs/resize-job-flow/0.1/resize-job-flow.jar")
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
def default_enable_debugging_path
|
|
245
|
+
File.join(get_field(:apps_path), "libs/state-pusher/0.1")
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
def validate
|
|
249
|
+
super
|
|
250
|
+
require(:apps_path, "--apps-path path must be defined")
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
def script_args
|
|
254
|
+
if @arg then
|
|
255
|
+
[ @arg ] + @args
|
|
256
|
+
else
|
|
257
|
+
@args
|
|
258
|
+
end
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
def extra_args
|
|
262
|
+
if @args != nil && @args.size > 0 then
|
|
263
|
+
return ["--args"] + @args
|
|
264
|
+
else
|
|
265
|
+
return []
|
|
266
|
+
end
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
def ensure_install_cmd(jobflow, sc, install_step_class)
|
|
270
|
+
has_install = false
|
|
271
|
+
install_step = install_step_class.new_from_commands(commands, self)
|
|
272
|
+
if install_step.jobflow_has_install_step(jobflow) then
|
|
273
|
+
return sc
|
|
274
|
+
else
|
|
275
|
+
new_sc = []
|
|
276
|
+
has_install_pi = false
|
|
277
|
+
for sc_cmd in sc do
|
|
278
|
+
if sc_cmd.is_a?(install_step_class) then
|
|
279
|
+
if has_install_pi then
|
|
280
|
+
next
|
|
281
|
+
else
|
|
282
|
+
has_install_pi = true
|
|
283
|
+
end
|
|
284
|
+
end
|
|
285
|
+
if sc_cmd.is_a?(self.class) then
|
|
286
|
+
if ! has_install_pi then
|
|
287
|
+
has_install_pi = true
|
|
288
|
+
new_sc << install_step
|
|
289
|
+
install_step.validate
|
|
290
|
+
end
|
|
291
|
+
end
|
|
292
|
+
new_sc << sc_cmd
|
|
293
|
+
end
|
|
294
|
+
end
|
|
295
|
+
return new_sc
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
def reorder_steps(jobflow, sc)
|
|
299
|
+
return sc
|
|
300
|
+
end
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
class ResizeJobflowCommand < StepCommand
|
|
304
|
+
def validate
|
|
305
|
+
super
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
def steps
|
|
309
|
+
step = {
|
|
310
|
+
"Name" => get_field(:step_name, "Resize Job Flow Command"),
|
|
311
|
+
"ActionOnFailure" => get_field(:step_action, "CANCEL_AND_WAIT"),
|
|
312
|
+
"HadoopJarStep" => {
|
|
313
|
+
"Jar" => get_field(:resize_jobflow_cmd),
|
|
314
|
+
"Args" => @args
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
return [ step ]
|
|
318
|
+
end
|
|
319
|
+
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
class EnableDebuggingCommand < StepCommand
|
|
323
|
+
def steps
|
|
324
|
+
step = {
|
|
325
|
+
"Name" => get_field(:step_name, "Setup Hadoop Debugging"),
|
|
326
|
+
"ActionOnFailure" => get_field(:step_action, "TERMINATE_JOB_FLOW"),
|
|
327
|
+
"HadoopJarStep" => {
|
|
328
|
+
"Jar" => get_field(:script_runner_path),
|
|
329
|
+
"Args" => [ File.join(get_field(:enable_debugging_path), "fetch") ]
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
return [ step ]
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
def reorder_steps(jobflow, sc)
|
|
336
|
+
# remove enable debugging steps and add self at start
|
|
337
|
+
new_sc = []
|
|
338
|
+
for step_cmd in sc do
|
|
339
|
+
if ! step_cmd.is_a?(EnableDebuggingCommand) then
|
|
340
|
+
new_sc << step_cmd
|
|
341
|
+
end
|
|
342
|
+
end
|
|
343
|
+
return [ self ] + new_sc
|
|
344
|
+
end
|
|
345
|
+
end
|
|
346
|
+
|
|
347
|
+
class PigScriptCommand < StepCommand
|
|
348
|
+
def steps
|
|
349
|
+
mandatory_args = [ "--run-pig-script", "--args", "-f" ]
|
|
350
|
+
if @arg then
|
|
351
|
+
mandatory_args << @arg
|
|
352
|
+
end
|
|
353
|
+
step = {
|
|
354
|
+
"Name" => get_field(:step_name, "Run Pig Script"),
|
|
355
|
+
"ActionOnFailure" => get_field(:step_action, "CANCEL_AND_WAIT"),
|
|
356
|
+
"HadoopJarStep" => {
|
|
357
|
+
"Jar" => get_field(:script_runner_path),
|
|
358
|
+
"Args" => get_field(:pig_cmd) + mandatory_args + @args
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
return [ step ]
|
|
362
|
+
end
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def reorder_steps(jobflow, sc)
|
|
366
|
+
return ensure_install_cmd(jobflow, sc, PigInteractiveCommand)
|
|
367
|
+
end
|
|
368
|
+
end
|
|
369
|
+
|
|
370
|
+
class PigInteractiveCommand < StepCommand
|
|
371
|
+
def self.new_from_commands(commands, parent)
|
|
372
|
+
sc = self.new("--pig-interactive", "Run a jobflow with Pig Installed", nil, commands)
|
|
373
|
+
sc.step_action = parent.step_action
|
|
374
|
+
return sc
|
|
375
|
+
end
|
|
376
|
+
|
|
377
|
+
def steps
|
|
378
|
+
step = {
|
|
379
|
+
"Name" => get_field(:step_name, "Setup Pig"),
|
|
380
|
+
"ActionOnFailure" => get_field(:step_action, "TERMINATE_JOB_FLOW"),
|
|
381
|
+
"HadoopJarStep" => {
|
|
382
|
+
"Jar" => get_field(:script_runner_path),
|
|
383
|
+
"Args" => get_field(:pig_cmd) + ["--install-pig"] + extra_args
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
return [ step ]
|
|
387
|
+
end
|
|
388
|
+
|
|
389
|
+
def jobflow_has_install_step(jobflow)
|
|
390
|
+
install_steps = jobflow['Steps'].select do |step|
|
|
391
|
+
step["ExecutionStatusDetail"]["State"] != "FAILED" &&
|
|
392
|
+
has_value(step, 'StepConfig', 'HadoopJarStep', 'Jar', get_field(:script_runner_path)) &&
|
|
393
|
+
has_value(step, 'StepConfig', 'HadoopJarStep', 'Args', 3, "--install-pig")
|
|
394
|
+
end
|
|
395
|
+
return install_steps.size > 0
|
|
396
|
+
end
|
|
397
|
+
end
|
|
398
|
+
|
|
399
|
+
class HiveCommand < StepCommand
|
|
400
|
+
attr_accessor :hive_versions
|
|
401
|
+
|
|
402
|
+
def get_version_args(require_single_version)
|
|
403
|
+
versions = get_field(:hive_versions, nil)
|
|
404
|
+
if versions == nil then
|
|
405
|
+
return []
|
|
406
|
+
end
|
|
407
|
+
if require_single_version then
|
|
408
|
+
if versions.split(",").size != 1 then
|
|
409
|
+
raise RuntimeError, "Only one version my be specified for --hive-script"
|
|
410
|
+
end
|
|
411
|
+
end
|
|
412
|
+
return ["--hive-versions", versions]
|
|
413
|
+
end
|
|
414
|
+
|
|
415
|
+
end
|
|
416
|
+
|
|
417
|
+
class HiveSiteCommand < HiveCommand
|
|
418
|
+
|
|
419
|
+
def steps
|
|
420
|
+
step = {
|
|
421
|
+
"Name" => get_field(:step_name, "Install Hive Site Configuration"),
|
|
422
|
+
"ActionOnFailure" => get_field(:step_action, "CANCEL_AND_WAIT"),
|
|
423
|
+
"HadoopJarStep" => {
|
|
424
|
+
"Jar" => get_field(:script_runner_path),
|
|
425
|
+
"Args" => get_field(:hive_cmd) + [ "--install-hive-site", "--hive-site=#{@arg}" ] +
|
|
426
|
+
extra_args + get_version_args(true)
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
return [ step ]
|
|
430
|
+
end
|
|
431
|
+
|
|
432
|
+
def reorder_steps(jobflow, sc)
|
|
433
|
+
return ensure_install_cmd(jobflow, sc, HiveInteractiveCommand)
|
|
434
|
+
end
|
|
435
|
+
end
|
|
436
|
+
|
|
437
|
+
class HiveScriptCommand < HiveCommand
|
|
438
|
+
|
|
439
|
+
def steps
|
|
440
|
+
mandatory_args = [ "--run-hive-script", "--args", "-f" ]
|
|
441
|
+
if @arg then
|
|
442
|
+
mandatory_args << @arg
|
|
443
|
+
end
|
|
444
|
+
step = {
|
|
445
|
+
"Name" => get_field(:step_name, "Run Hive Script"),
|
|
446
|
+
"ActionOnFailure" => get_field(:step_action, "CANCEL_AND_WAIT"),
|
|
447
|
+
"HadoopJarStep" => {
|
|
448
|
+
"Jar" => get_field(:script_runner_path),
|
|
449
|
+
"Args" => get_field(:hive_cmd) + get_version_args(true) + mandatory_args + @args
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
[ step ]
|
|
453
|
+
end
|
|
454
|
+
|
|
455
|
+
def reorder_steps(jobflow, sc)
|
|
456
|
+
return ensure_install_cmd(jobflow, sc, HiveInteractiveCommand)
|
|
457
|
+
end
|
|
458
|
+
end
|
|
459
|
+
|
|
460
|
+
class HiveInteractiveCommand < HiveCommand
|
|
461
|
+
|
|
462
|
+
def steps
|
|
463
|
+
step = {
|
|
464
|
+
"Name" => get_field(:step_name, "Setup Hive"),
|
|
465
|
+
"ActionOnFailure" => get_field(:step_action, "TERMINATE_JOB_FLOW"),
|
|
466
|
+
"HadoopJarStep" => {
|
|
467
|
+
"Jar" => get_field(:script_runner_path),
|
|
468
|
+
"Args" => get_field(:hive_cmd) + [ "--install-hive" ] +
|
|
469
|
+
get_version_args(false) + extra_args
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
[ step ]
|
|
473
|
+
end
|
|
474
|
+
|
|
475
|
+
def jobflow_has_install_step(jobflow)
|
|
476
|
+
install_steps = jobflow['Steps'].select do |step|
|
|
477
|
+
step["ExecutionStatusDetail"]["State"] != "FAILED" &&
|
|
478
|
+
has_value(step, 'StepConfig', 'HadoopJarStep', 'Jar', get_field(:script_runner_path)) &&
|
|
479
|
+
has_value(step, 'StepConfig', 'HadoopJarStep', 'Args', 3, "--install-hive") &&
|
|
480
|
+
has_value(step, 'StepConfig', 'HadoopJarStep', 'Args', 5, get_version_args(true)[1])
|
|
481
|
+
end
|
|
482
|
+
return install_steps.size > 0
|
|
483
|
+
end
|
|
484
|
+
|
|
485
|
+
def self.new_from_commands(commands, parent)
|
|
486
|
+
sc = self.new("--hive-interactive", "Run a jobflow with Hive Installed", nil, commands)
|
|
487
|
+
sc.hive_versions = parent.hive_versions
|
|
488
|
+
sc.step_action = parent.step_action
|
|
489
|
+
return sc
|
|
490
|
+
end
|
|
491
|
+
end
|
|
492
|
+
|
|
493
|
+
class JarStepCommand < StepCommand
|
|
494
|
+
attr_accessor :main_class
|
|
495
|
+
|
|
496
|
+
def steps
|
|
497
|
+
step = {
|
|
498
|
+
"Name" => get_field(:step_name, "Example Jar Step"),
|
|
499
|
+
"ActionOnFailure" => get_field(:step_action, "CANCEL_AND_WAIT"),
|
|
500
|
+
"HadoopJarStep" => {
|
|
501
|
+
"Jar" => get_field(:arg),
|
|
502
|
+
"Args" => get_field(:args, [])
|
|
503
|
+
}
|
|
504
|
+
}
|
|
505
|
+
if get_field(:main_class) then
|
|
506
|
+
step["HadoopJarStep"]["MainClass"] = get_field(:main_class)
|
|
507
|
+
end
|
|
508
|
+
return [ step ]
|
|
509
|
+
end
|
|
510
|
+
end
|
|
511
|
+
|
|
512
|
+
class StreamStepCommand < StepCommand
|
|
513
|
+
attr_accessor :input, :output, :mapper, :cache, :cache_archive, :jobconf, :reducer, :args
|
|
514
|
+
|
|
515
|
+
GENERIC_OPTIONS = Set.new(%w(-conf -D -fs -jt -files -libjars -archives))
|
|
516
|
+
|
|
517
|
+
def steps
|
|
518
|
+
timestr = Time.now.strftime("%Y-%m-%dT%H%M%S")
|
|
519
|
+
stream_options = []
|
|
520
|
+
for ca in get_field(:cache, []) do
|
|
521
|
+
stream_options << "-cacheFile" << ca
|
|
522
|
+
end
|
|
523
|
+
|
|
524
|
+
for ca in get_field(:cache_archive, []) do
|
|
525
|
+
stream_options << "-cacheArchive" << ca
|
|
526
|
+
end
|
|
527
|
+
|
|
528
|
+
for jc in get_field(:jobconf, []) do
|
|
529
|
+
stream_options << "-jobconf" << jc
|
|
530
|
+
end
|
|
531
|
+
|
|
532
|
+
# Note that the streaming options should go before command options for
|
|
533
|
+
# Hadoop 0.20
|
|
534
|
+
step = {
|
|
535
|
+
"Name" => get_field(:step_name, "Example Streaming Step"),
|
|
536
|
+
"ActionOnFailure" => get_field(:step_action, "CANCEL_AND_WAIT"),
|
|
537
|
+
"HadoopJarStep" => {
|
|
538
|
+
"Jar" => "/home/hadoop/contrib/streaming/hadoop-streaming.jar",
|
|
539
|
+
"Args" => (sort_streaming_args(get_field(:args))) + (stream_options) + [
|
|
540
|
+
"-input", get_field(:input, "s3n://elasticmapreduce/samples/wordcount/input"),
|
|
541
|
+
"-output", get_field(:output, "hdfs:///examples/output/#{timestr}"),
|
|
542
|
+
"-mapper", get_field(:mapper, "s3n://elasticmapreduce/samples/wordcount/wordSplitter.py"),
|
|
543
|
+
"-reducer", get_field(:reducer, "aggregate")
|
|
544
|
+
]
|
|
545
|
+
}
|
|
546
|
+
}
|
|
547
|
+
return [ step ]
|
|
548
|
+
end
|
|
549
|
+
|
|
550
|
+
def sort_streaming_args(streaming_args)
|
|
551
|
+
sorted_streaming_args = []
|
|
552
|
+
i=0
|
|
553
|
+
while streaming_args && i < streaming_args.length
|
|
554
|
+
if GENERIC_OPTIONS.include?(streaming_args[i]) then
|
|
555
|
+
if i+1 < streaming_args.length
|
|
556
|
+
sorted_streaming_args.unshift(streaming_args[i+1])
|
|
557
|
+
sorted_streaming_args.unshift(streaming_args[i])
|
|
558
|
+
i=i+2
|
|
559
|
+
else
|
|
560
|
+
raise RuntimeError, "Missing value for argument #{streaming_args[i]}"
|
|
561
|
+
end
|
|
562
|
+
else
|
|
563
|
+
sorted_streaming_args << streaming_args[i]
|
|
564
|
+
i=i+1
|
|
565
|
+
end
|
|
566
|
+
end
|
|
567
|
+
return sorted_streaming_args
|
|
568
|
+
end
|
|
569
|
+
end
|
|
570
|
+
|
|
571
|
+
class AbstractSSHCommand < Command
|
|
572
|
+
attr_accessor :no_wait, :dest, :hostname, :key_pair_file, :jobflow_id, :jobflow_detail
|
|
573
|
+
|
|
574
|
+
CLOSED_DOWN_STATES = Set.new(%w(TERMINATED SHUTTING_DOWN COMPLETED FAILED))
|
|
575
|
+
WAITING_OR_RUNNING_STATES = Set.new(%w(WAITING RUNNING))
|
|
576
|
+
|
|
577
|
+
def exec(cmd)
|
|
578
|
+
commands.exec(cmd)
|
|
579
|
+
end
|
|
580
|
+
|
|
581
|
+
def wait_for_jobflow(client)
|
|
582
|
+
while true do
|
|
583
|
+
state = resolve(self.jobflow_detail, "ExecutionStatusDetail", "State")
|
|
584
|
+
if WAITING_OR_RUNNING_STATES.include?(state) then
|
|
585
|
+
break
|
|
586
|
+
elsif CLOSED_DOWN_STATES.include?(state) then
|
|
587
|
+
raise RuntimeError, "Jobflow entered #{state} while waiting to ssh"
|
|
588
|
+
else
|
|
589
|
+
logger.info("Jobflow is in state #{state}, waiting....")
|
|
590
|
+
sleep(30)
|
|
591
|
+
self.jobflow_detail = client.describe_jobflow_with_id(jobflow_id)
|
|
592
|
+
end
|
|
593
|
+
end
|
|
594
|
+
end
|
|
595
|
+
|
|
596
|
+
def enact(client)
|
|
597
|
+
self.jobflow_id = require_single_jobflow
|
|
598
|
+
self.jobflow_detail = client.describe_jobflow_with_id(self.jobflow_id)
|
|
599
|
+
if ! get_field(:no_wait) then
|
|
600
|
+
wait_for_jobflow(client)
|
|
601
|
+
end
|
|
602
|
+
self.hostname = self.jobflow_detail['Instances']['MasterPublicDnsName']
|
|
603
|
+
self.key_pair_file = require(:key_pair_file, "Missing required option --key-pair-file for #{name}")
|
|
604
|
+
end
|
|
605
|
+
end
|
|
606
|
+
|
|
607
|
+
class SSHCommand < AbstractSSHCommand
|
|
608
|
+
attr_accessor :cmd
|
|
609
|
+
|
|
610
|
+
def initialize(*args)
|
|
611
|
+
super(*args)
|
|
612
|
+
if @arg =~ /j-[A-Z0-9]{8,20}/ then
|
|
613
|
+
commands.global_options[:jobflow] << @arg
|
|
614
|
+
else
|
|
615
|
+
self.cmd = @arg
|
|
616
|
+
end
|
|
617
|
+
end
|
|
618
|
+
|
|
619
|
+
def enact(client)
|
|
620
|
+
super(client)
|
|
621
|
+
exec "ssh -i #{key_pair_file} hadoop@#{hostname} #{get_field(:cmd, "")}"
|
|
622
|
+
end
|
|
623
|
+
end
|
|
624
|
+
|
|
625
|
+
class PutCommand < AbstractSSHCommand
|
|
626
|
+
def enact(client)
|
|
627
|
+
super(client)
|
|
628
|
+
if get_field(:dest) then
|
|
629
|
+
exec "scp -i #{key_pair_file} #{@arg} hadoop@#{hostname}:#{get_field(:dest)}"
|
|
630
|
+
else
|
|
631
|
+
exec "scp -i #{key_pair_file} #{@arg} hadoop@#{hostname}:#{File.basename(@arg)}"
|
|
632
|
+
end
|
|
633
|
+
end
|
|
634
|
+
end
|
|
635
|
+
|
|
636
|
+
class GetCommand < AbstractSSHCommand
|
|
637
|
+
def enact(client)
|
|
638
|
+
super(client)
|
|
639
|
+
if get_field(:dest) then
|
|
640
|
+
exec "scp -i #{key_pair_file} hadoop@#{hostname}:#{@arg} #{get_field(:dest)}"
|
|
641
|
+
else
|
|
642
|
+
exec "scp -i #{key_pair_file} hadoop@#{hostname}:#{@arg} #{File.basename(@arg)}"
|
|
643
|
+
end
|
|
644
|
+
end
|
|
645
|
+
end
|
|
646
|
+
|
|
647
|
+
class PrintHiveVersionCommand < AbstractSSHCommand
|
|
648
|
+
def enact(client)
|
|
649
|
+
super(client)
|
|
650
|
+
stdin, stdout, stderr = Open3.popen3("ssh -i #{key_pair_file} hadoop@#{hostname} '/home/hadoop/bin/hive -v'")
|
|
651
|
+
version = stdout.readlines.join
|
|
652
|
+
err = stderr.readlines.join
|
|
653
|
+
if version.length > 0
|
|
654
|
+
puts version
|
|
655
|
+
elsif err =~ /Unrecognised option/ or err =~ /Error while determing Hive version/
|
|
656
|
+
stdin, stdout, stderr = Open3.popen3("ssh -i #{key_pair_file} hadoop@#{hostname} 'ls -l /home/hadoop/bin/hive'")
|
|
657
|
+
version = stdout.readlines.join
|
|
658
|
+
version =~ /hive-(.*)\/bin\/hive/
|
|
659
|
+
puts "Hive version " + $1
|
|
660
|
+
else
|
|
661
|
+
puts "Unable to determine Hive version"
|
|
662
|
+
end
|
|
663
|
+
end
|
|
664
|
+
end
|
|
665
|
+
|
|
666
|
+
class LogsCommand < AbstractSSHCommand
|
|
667
|
+
attr_accessor :step_index
|
|
668
|
+
|
|
669
|
+
INTERESTING_STEP_STATES = ['RUNNING', 'COMPLETED', 'FAILED']
|
|
670
|
+
|
|
671
|
+
def enact(client)
|
|
672
|
+
super(client)
|
|
673
|
+
|
|
674
|
+
# find the last interesting step if that exists
|
|
675
|
+
if get_field(:step_index) == nil then
|
|
676
|
+
steps = resolve(jobflow_detail, "Steps")
|
|
677
|
+
self.step_index = (0 ... steps.size).select { |index|
|
|
678
|
+
INTERESTING_STEP_STATES.include?(resolve(steps, index, 'ExecutionStatusDetail', 'State'))
|
|
679
|
+
}.last + 1
|
|
680
|
+
end
|
|
681
|
+
|
|
682
|
+
if get_field(:step_index) then
|
|
683
|
+
logger.puts "Listing steps for step #{get_field(:step_index)}"
|
|
684
|
+
exec "ssh -i #{key_pair_file} hadoop@#{hostname} cat /mnt/var/log/hadoop/steps/#{get_field(:step_index)}/{syslog,stderr,stdout}"
|
|
685
|
+
else
|
|
686
|
+
raise RuntimeError, "No steps that could have logs found in jobflow"
|
|
687
|
+
end
|
|
688
|
+
end
|
|
689
|
+
end
|
|
690
|
+
|
|
691
|
+
class GlobalOption < CommandOption
|
|
692
|
+
def attach(commands)
|
|
693
|
+
global_options = @commands.global_options
|
|
694
|
+
value = global_options[@field_symbol]
|
|
695
|
+
if value.is_a?(Array) then
|
|
696
|
+
value << @arg
|
|
697
|
+
elsif value == nil then
|
|
698
|
+
global_options[@field_symbol] = @arg
|
|
699
|
+
else
|
|
700
|
+
raise RuntimeError, "You may not specify #{@name} twice"
|
|
701
|
+
end
|
|
702
|
+
return nil
|
|
703
|
+
end
|
|
704
|
+
end
|
|
705
|
+
|
|
706
|
+
class GlobalFlagOption < CommandOption
|
|
707
|
+
def attach(command)
|
|
708
|
+
global_options = @commands.global_options
|
|
709
|
+
value = global_options[@field_symbol]
|
|
710
|
+
if value == nil then
|
|
711
|
+
global_options[@field_symbol] = @arg
|
|
712
|
+
else
|
|
713
|
+
raise RuntimeError, "You may not specify #{@name} twice"
|
|
714
|
+
end
|
|
715
|
+
end
|
|
716
|
+
end
|
|
717
|
+
|
|
718
|
+
class StepProcessingCommand < Command
|
|
719
|
+
attr_accessor :step_commands
|
|
720
|
+
|
|
721
|
+
def initialize(*args)
|
|
722
|
+
super(*args)
|
|
723
|
+
@step_commands = []
|
|
724
|
+
end
|
|
725
|
+
|
|
726
|
+
def reorder_steps(jobflow, sc)
|
|
727
|
+
new_step_commands = sc.dup
|
|
728
|
+
for step_command in sc do
|
|
729
|
+
new_step_commands = step_command.reorder_steps(jobflow, new_step_commands)
|
|
730
|
+
end
|
|
731
|
+
|
|
732
|
+
return new_step_commands
|
|
733
|
+
end
|
|
734
|
+
end
|
|
735
|
+
|
|
736
|
+
class AddJobFlowStepsCommand < StepProcessingCommand
|
|
737
|
+
|
|
738
|
+
def add_step_command(step)
|
|
739
|
+
@step_commands << step
|
|
740
|
+
end
|
|
741
|
+
|
|
742
|
+
def validate
|
|
743
|
+
for cmd in step_commands do
|
|
744
|
+
cmd.validate
|
|
745
|
+
end
|
|
746
|
+
end
|
|
747
|
+
|
|
748
|
+
def enact(client)
|
|
749
|
+
jobflow_id = require_single_jobflow
|
|
750
|
+
jobflow = client.describe_jobflow_with_id(jobflow_id)
|
|
751
|
+
self.step_commands = reorder_steps(jobflow, self.step_commands)
|
|
752
|
+
jobflow_steps = step_commands.map { |x| x.steps }.flatten
|
|
753
|
+
client.add_steps(jobflow_id, jobflow_steps)
|
|
754
|
+
logger.puts("Added jobflow steps")
|
|
755
|
+
end
|
|
756
|
+
end
|
|
757
|
+
|
|
758
|
+
class CreateJobFlowCommand < StepProcessingCommand
|
|
759
|
+
attr_accessor :jobflow_name, :alive, :with_termination_protection, :instance_count, :slave_instance_type,
|
|
760
|
+
:master_instance_type, :key_pair, :key_pair_file, :log_uri, :az, :ainfo,
|
|
761
|
+
:hadoop_version, :plain_output, :instance_type,
|
|
762
|
+
:instance_group_commands, :bootstrap_commands
|
|
763
|
+
|
|
764
|
+
|
|
765
|
+
OLD_OPTIONS = [:instance_count, :slave_instance_type, :master_instance_type]
|
|
766
|
+
# FIXME: add code to setup collapse instance group commands
|
|
767
|
+
|
|
768
|
+
DEFAULT_HADOOP_VERSION = "0.20"
|
|
769
|
+
|
|
770
|
+
def initialize(*args)
|
|
771
|
+
super(*args)
|
|
772
|
+
@instance_group_commands = []
|
|
773
|
+
@bootstrap_commands = []
|
|
774
|
+
end
|
|
775
|
+
|
|
776
|
+
def add_step_command(step)
|
|
777
|
+
@step_commands << step
|
|
778
|
+
end
|
|
779
|
+
|
|
780
|
+
def add_bootstrap_command(bootstrap_command)
|
|
781
|
+
@bootstrap_commands << bootstrap_command
|
|
782
|
+
end
|
|
783
|
+
|
|
784
|
+
def add_instance_group_command(instance_group_command)
|
|
785
|
+
@instance_group_commands << instance_group_command
|
|
786
|
+
end
|
|
787
|
+
|
|
788
|
+
def validate
|
|
789
|
+
for step in step_commands do
|
|
790
|
+
if step.is_a?(EnableDebuggingCommand) then
|
|
791
|
+
require(:log_uri, "You must supply a logUri if you enable debugging when creating a job flow")
|
|
792
|
+
end
|
|
793
|
+
end
|
|
794
|
+
|
|
795
|
+
for cmd in step_commands + instance_group_commands + bootstrap_commands do
|
|
796
|
+
cmd.validate
|
|
797
|
+
end
|
|
798
|
+
|
|
799
|
+
if ! have(:hadoop_version) then
|
|
800
|
+
@hadoop_version = DEFAULT_HADOOP_VERSION
|
|
801
|
+
end
|
|
802
|
+
end
|
|
803
|
+
|
|
804
|
+
def enact(client)
|
|
805
|
+
@jobflow = create_jobflow
|
|
806
|
+
|
|
807
|
+
apply_jobflow_option(:ainfo, "AdditionalInfo")
|
|
808
|
+
apply_jobflow_option(:key_pair, "Instances", "Ec2KeyName")
|
|
809
|
+
apply_jobflow_option(:hadoop_version, "Instances", "HadoopVersion")
|
|
810
|
+
apply_jobflow_option(:az, "Instances", "Placement", "AvailabilityZone")
|
|
811
|
+
apply_jobflow_option(:log_uri, "LogUri")
|
|
812
|
+
|
|
813
|
+
self.step_commands = reorder_steps(@jobflow, self.step_commands)
|
|
814
|
+
@jobflow["Steps"] = step_commands.map { |x| x.steps }.flatten
|
|
815
|
+
|
|
816
|
+
setup_instance_groups
|
|
817
|
+
@jobflow["Instances"]["InstanceGroups"] = instance_group_commands.map { |x| x.instance_group }
|
|
818
|
+
|
|
819
|
+
bootstrap_action_index = 1
|
|
820
|
+
for bootstrap_action_command in bootstrap_commands do
|
|
821
|
+
@jobflow["BootstrapActions"] << bootstrap_action_command.bootstrap_action(
|
|
822
|
+
bootstrap_action_index)
|
|
823
|
+
bootstrap_action_index += 1
|
|
824
|
+
end
|
|
825
|
+
|
|
826
|
+
run_result = client.run_jobflow(@jobflow)
|
|
827
|
+
jobflow_id = run_result['JobFlowId']
|
|
828
|
+
commands.global_options[:jobflow] << jobflow_id
|
|
829
|
+
|
|
830
|
+
if have(:plain_output) then
|
|
831
|
+
logger.puts jobflow_id
|
|
832
|
+
else
|
|
833
|
+
logger.puts "Created job flow " + jobflow_id
|
|
834
|
+
end
|
|
835
|
+
end
|
|
836
|
+
|
|
837
|
+
def apply_jobflow_option(field_symbol, *keys)
|
|
838
|
+
value = get_field(field_symbol)
|
|
839
|
+
if value != nil then
|
|
840
|
+
map = @jobflow
|
|
841
|
+
for key in keys[0..-2] do
|
|
842
|
+
nmap = map[key]
|
|
843
|
+
if nmap == nil then
|
|
844
|
+
map[key] = {}
|
|
845
|
+
nmap = map[key]
|
|
846
|
+
end
|
|
847
|
+
map = nmap
|
|
848
|
+
end
|
|
849
|
+
map[keys.last] = value
|
|
850
|
+
end
|
|
851
|
+
end
|
|
852
|
+
|
|
853
|
+
def new_instance_group_command(role, instance_count, instance_type)
|
|
854
|
+
igc = CreateInstanceGroupCommand.new(
|
|
855
|
+
"--instance-group ROLE", "Specify an instance group", role, commands
|
|
856
|
+
)
|
|
857
|
+
igc.instance_count = instance_count
|
|
858
|
+
igc.instance_type = instance_type
|
|
859
|
+
return igc
|
|
860
|
+
end
|
|
861
|
+
|
|
862
|
+
def have_role(instance_group_commands, role)
|
|
863
|
+
instance_group_commands.select { |x|
|
|
864
|
+
x.instance_role.upcase == role
|
|
865
|
+
}.size > 0
|
|
866
|
+
end
|
|
867
|
+
|
|
868
|
+
def setup_instance_groups
|
|
869
|
+
instance_groups = []
|
|
870
|
+
if ! have_role(instance_group_commands, "MASTER") then
|
|
871
|
+
mit = get_field(:master_instance_type, get_field(:instance_type, "m1.small"))
|
|
872
|
+
master_instance_group = new_instance_group_command("MASTER", 1, mit)
|
|
873
|
+
instance_group_commands << master_instance_group
|
|
874
|
+
end
|
|
875
|
+
if ! have_role(instance_group_commands, "CORE") then
|
|
876
|
+
ni = get_field(:instance_count, 1).to_i
|
|
877
|
+
if ni > 1 then
|
|
878
|
+
sit = get_field(:slave_instance_type, get_field(:instance_type, "m1.small"))
|
|
879
|
+
slave_instance_group = new_instance_group_command("CORE", ni-1, sit)
|
|
880
|
+
slave_instance_group.instance_role = "CORE"
|
|
881
|
+
instance_group_commands << slave_instance_group
|
|
882
|
+
end
|
|
883
|
+
else
|
|
884
|
+
# Verify that user has not specified both --instance-group core and --num-instances
|
|
885
|
+
if get_field(:instance_count) != nil then
|
|
886
|
+
raise RuntimeError, "option --num-instances cannot be used when a core instance group is specified."
|
|
887
|
+
end
|
|
888
|
+
end
|
|
889
|
+
end
|
|
890
|
+
|
|
891
|
+
def create_jobflow
|
|
892
|
+
@jobflow = {
|
|
893
|
+
"Name" => get_field(:jobflow_name, default_job_flow_name),
|
|
894
|
+
"Instances" => {
|
|
895
|
+
"KeepJobFlowAliveWhenNoSteps" => (get_field(:alive) ? "true" : "false"),
|
|
896
|
+
"TerminationProtected" => (get_field(:with_termination_protection) ? "true" : "false"),
|
|
897
|
+
"InstanceGroups" => []
|
|
898
|
+
},
|
|
899
|
+
"Steps" => [],
|
|
900
|
+
"BootstrapActions" => []
|
|
901
|
+
}
|
|
902
|
+
end
|
|
903
|
+
|
|
904
|
+
def default_job_flow_name
|
|
905
|
+
name = "Development Job Flow"
|
|
906
|
+
if get_field(:alive) then
|
|
907
|
+
name += " (requires manual termination)"
|
|
908
|
+
end
|
|
909
|
+
return name
|
|
910
|
+
end
|
|
911
|
+
end
|
|
912
|
+
|
|
913
|
+
class BootstrapActionCommand < Command
|
|
914
|
+
attr_accessor :bootstrap_name, :args
|
|
915
|
+
|
|
916
|
+
def initialize(*args)
|
|
917
|
+
super(*args)
|
|
918
|
+
@args = []
|
|
919
|
+
end
|
|
920
|
+
|
|
921
|
+
def bootstrap_action(index)
|
|
922
|
+
action = {
|
|
923
|
+
"Name" => get_field(:bootstrap_name, "Bootstrap Action #{index}"),
|
|
924
|
+
"ScriptBootstrapAction" => {
|
|
925
|
+
"Path" => @arg,
|
|
926
|
+
"Args" => @args
|
|
927
|
+
}
|
|
928
|
+
}
|
|
929
|
+
return action
|
|
930
|
+
end
|
|
931
|
+
end
|
|
932
|
+
|
|
933
|
+
class AbstractListCommand < Command
|
|
934
|
+
attr_accessor :state, :max_results, :active, :all, :no_steps
|
|
935
|
+
|
|
936
|
+
def enact(client)
|
|
937
|
+
options = {}
|
|
938
|
+
states = []
|
|
939
|
+
if get_field(:jobflow, []).size > 0 then
|
|
940
|
+
options = { 'JobFlowIds' => get_field(:jobflow) }
|
|
941
|
+
else
|
|
942
|
+
if get_field(:active) then
|
|
943
|
+
states = %w(RUNNING SHUTTING_DOWN STARTING WAITING BOOTSTRAPPING)
|
|
944
|
+
end
|
|
945
|
+
if get_field(:states) then
|
|
946
|
+
states += get_field(states)
|
|
947
|
+
end
|
|
948
|
+
if get_field(:active) || get_field(:states) then
|
|
949
|
+
options = { 'JobFlowStates' => states }
|
|
950
|
+
elsif get_field(:all) then
|
|
951
|
+
options = { }
|
|
952
|
+
else
|
|
953
|
+
options = { 'CreatedAfter' => (Time.now - (2 * 24 * 3600)).xmlschema }
|
|
954
|
+
end
|
|
955
|
+
end
|
|
956
|
+
result = client.describe_jobflow(options)
|
|
957
|
+
# add the described jobflow to the supplied jobflows
|
|
958
|
+
commands.global_options[:jobflow] += result['JobFlows'].map { |x| x['JobFlowId'] }
|
|
959
|
+
commands.global_options[:jobflow].uniq!
|
|
960
|
+
|
|
961
|
+
return result
|
|
962
|
+
end
|
|
963
|
+
end
|
|
964
|
+
|
|
965
|
+
class ListActionCommand < AbstractListCommand
|
|
966
|
+
|
|
967
|
+
def format(map, *fields)
|
|
968
|
+
result = []
|
|
969
|
+
for field in fields do
|
|
970
|
+
key = field[0].split(".")
|
|
971
|
+
value = map
|
|
972
|
+
while key.size > 0 do
|
|
973
|
+
value = value[key.first]
|
|
974
|
+
key.shift
|
|
975
|
+
end
|
|
976
|
+
result << sprintf("%-#{field[1]}s", value)
|
|
977
|
+
end
|
|
978
|
+
result.join("")
|
|
979
|
+
end
|
|
980
|
+
|
|
981
|
+
def enact(client)
|
|
982
|
+
result = super(client)
|
|
983
|
+
job_flows = result['JobFlows']
|
|
984
|
+
count = 0
|
|
985
|
+
for job_flow in job_flows do
|
|
986
|
+
if get_field(:max_results) && (count += 1) > get_field(:max_results) then
|
|
987
|
+
break
|
|
988
|
+
end
|
|
989
|
+
logger.puts format(job_flow, ['JobFlowId', 20], ['ExecutionStatusDetail.State', 15],
|
|
990
|
+
['Instances.MasterPublicDnsName', 50]) + job_flow['Name']
|
|
991
|
+
if ! get_field(:no_steps) then
|
|
992
|
+
for step in job_flow['Steps'] do
|
|
993
|
+
logger.puts " " + format(step, ['ExecutionStatusDetail.State', 15], ['StepConfig.Name', 30])
|
|
994
|
+
end
|
|
995
|
+
end
|
|
996
|
+
end
|
|
997
|
+
end
|
|
998
|
+
end
|
|
999
|
+
|
|
1000
|
+
class DescribeActionCommand < AbstractListCommand
|
|
1001
|
+
def enact(client)
|
|
1002
|
+
result = super(client)
|
|
1003
|
+
logger.puts(JSON.pretty_generate(result))
|
|
1004
|
+
end
|
|
1005
|
+
end
|
|
1006
|
+
|
|
1007
|
+
class SetTerminationProtection < Command
|
|
1008
|
+
def enact(client)
|
|
1009
|
+
job_flow = get_field(:jobflow)
|
|
1010
|
+
termination_protected = @arg == 'true'
|
|
1011
|
+
client.set_termination_protection(job_flow, termination_protected)
|
|
1012
|
+
logger.puts "#{termination_protected ? "Disabled":"Enabled"} job flow termination " + job_flow.join(" ")
|
|
1013
|
+
end
|
|
1014
|
+
end
|
|
1015
|
+
|
|
1016
|
+
class TerminateActionCommand < Command
|
|
1017
|
+
def enact(client)
|
|
1018
|
+
job_flow = get_field(:jobflow)
|
|
1019
|
+
client.terminate_jobflows(job_flow)
|
|
1020
|
+
logger.puts "Terminated job flow " + job_flow.join(" ")
|
|
1021
|
+
end
|
|
1022
|
+
end
|
|
1023
|
+
|
|
1024
|
+
class VersionCommand < Command
|
|
1025
|
+
def enact(client)
|
|
1026
|
+
logger.puts "Version #{ELASTIC_MAPREDUCE_CLIENT_VERSION}"
|
|
1027
|
+
end
|
|
1028
|
+
end
|
|
1029
|
+
|
|
1030
|
+
class HelpCommand < Command
|
|
1031
|
+
def enact(client)
|
|
1032
|
+
logger.puts commands.opts
|
|
1033
|
+
end
|
|
1034
|
+
end
|
|
1035
|
+
|
|
1036
|
+
class ArgsOption < CommandOption
|
|
1037
|
+
def attach(commands)
|
|
1038
|
+
command = super(commands)
|
|
1039
|
+
command.args += @arg.split(",")
|
|
1040
|
+
return command
|
|
1041
|
+
end
|
|
1042
|
+
end
|
|
1043
|
+
|
|
1044
|
+
class ArgOption < CommandOption
|
|
1045
|
+
def attach(commands)
|
|
1046
|
+
command = super(commands)
|
|
1047
|
+
command.args << @arg
|
|
1048
|
+
return command
|
|
1049
|
+
end
|
|
1050
|
+
end
|
|
1051
|
+
|
|
1052
|
+
class AbstractInstanceGroupCommand < Command
|
|
1053
|
+
attr_accessor :instance_group_id, :instance_type, :instance_role,
|
|
1054
|
+
:instance_count, :instance_group_name
|
|
1055
|
+
|
|
1056
|
+
def initialize(*args)
|
|
1057
|
+
super(*args)
|
|
1058
|
+
if @arg =~ /^ig-/ then
|
|
1059
|
+
@instance_group_id = @arg
|
|
1060
|
+
else
|
|
1061
|
+
@instance_role = @arg.upcase
|
|
1062
|
+
end
|
|
1063
|
+
end
|
|
1064
|
+
|
|
1065
|
+
def default_instance_group_name
|
|
1066
|
+
get_field(:instance_role).downcase.capitalize + " Instance Group"
|
|
1067
|
+
end
|
|
1068
|
+
|
|
1069
|
+
def instance_group
|
|
1070
|
+
return {
|
|
1071
|
+
"Name" => get_field(:instance_group_name),
|
|
1072
|
+
"Market" => get_field(:instance_group_market, "ON_DEMAND"),
|
|
1073
|
+
"InstanceRole" => get_field(:instance_role),
|
|
1074
|
+
"InstanceCount" => get_field(:instance_count),
|
|
1075
|
+
"InstanceType" => get_field(:instance_type)
|
|
1076
|
+
}
|
|
1077
|
+
end
|
|
1078
|
+
|
|
1079
|
+
def require_singleton_array(arr, msg)
|
|
1080
|
+
if arr.size != 1 then
|
|
1081
|
+
raise RuntimeError, "Expected to find one " + msg + " but found #{arr.size}."
|
|
1082
|
+
end
|
|
1083
|
+
end
|
|
1084
|
+
|
|
1085
|
+
end
|
|
1086
|
+
|
|
1087
|
+
class AddInstanceGroupCommand < AbstractInstanceGroupCommand
|
|
1088
|
+
def validate
|
|
1089
|
+
if ! ["TASK"].include?(get_field(:instance_role)) then
|
|
1090
|
+
raise RuntimeError, "Invalid argument to #{name}, expected 'task'"
|
|
1091
|
+
end
|
|
1092
|
+
require(:instance_type, "Option #{name} is missing --instance-type")
|
|
1093
|
+
require(:instance_count, "Option #{name} is missing --instance-count")
|
|
1094
|
+
end
|
|
1095
|
+
|
|
1096
|
+
def enact(client)
|
|
1097
|
+
client.add_instance_groups(
|
|
1098
|
+
'JobFlowId' => require_single_jobflow, 'InstanceGroups' => [instance_group]
|
|
1099
|
+
)
|
|
1100
|
+
logger.puts("Added instance group " + get_field(:instance_role))
|
|
1101
|
+
end
|
|
1102
|
+
end
|
|
1103
|
+
|
|
1104
|
+
class CreateInstanceGroupCommand < AbstractInstanceGroupCommand
|
|
1105
|
+
def validate
|
|
1106
|
+
if ! ["MASTER", "CORE", "TASK"].include?(get_field(:instance_role)) then
|
|
1107
|
+
raise RuntimeError, "Invalid argument to #{name}, expected master, core or task"
|
|
1108
|
+
end
|
|
1109
|
+
require(:instance_type, "Option #{name} is missing --instance-type")
|
|
1110
|
+
require(:instance_count, "Option #{name} is missing --instance-count")
|
|
1111
|
+
end
|
|
1112
|
+
end
|
|
1113
|
+
|
|
1114
|
+
class ModifyInstanceGroupCommand < AbstractInstanceGroupCommand
|
|
1115
|
+
attr_accessor :jobflow_detail, :jobflow_id
|
|
1116
|
+
|
|
1117
|
+
def validate
|
|
1118
|
+
if get_field(:instance_group_id) == nil then
|
|
1119
|
+
if ! ["CORE", "TASK"].include?(get_field(:instance_role)) then
|
|
1120
|
+
raise RuntimeError, "Invalid argument to #{name}, #{@arg} is not valid"
|
|
1121
|
+
end
|
|
1122
|
+
if get_field(:jobflow, []).size == 0 then
|
|
1123
|
+
raise RuntimeError, "You must specify a jobflow when using #{name} and specifying a role #{instance_role}"
|
|
1124
|
+
end
|
|
1125
|
+
end
|
|
1126
|
+
require(:instance_count, "Option #{name} is missing --instance-count")
|
|
1127
|
+
end
|
|
1128
|
+
|
|
1129
|
+
def enact(client)
|
|
1130
|
+
if get_field(:instance_group_id) == nil then
|
|
1131
|
+
self.jobflow_id = require_single_jobflow
|
|
1132
|
+
self.jobflow_detail = client.describe_jobflow_with_id(self.jobflow_id)
|
|
1133
|
+
matching_instance_groups =
|
|
1134
|
+
jobflow_detail['Instances']['InstanceGroups'].select { |x| x['InstanceRole'] == instance_role }
|
|
1135
|
+
require_singleton_array(matching_instance_groups, "instance group with role #{instance_role}")
|
|
1136
|
+
self.instance_group_id = matching_instance_groups.first['InstanceGroupId']
|
|
1137
|
+
end
|
|
1138
|
+
options = {
|
|
1139
|
+
'InstanceGroups' => [{
|
|
1140
|
+
'InstanceGroupId' => get_field(:instance_group_id),
|
|
1141
|
+
'InstanceCount' => get_field(:instance_count)
|
|
1142
|
+
}]
|
|
1143
|
+
}
|
|
1144
|
+
client.modify_instance_groups(options)
|
|
1145
|
+
ig_modified = nil
|
|
1146
|
+
if get_field(:instance_role) != nil then
|
|
1147
|
+
ig_modified = get_field(:instance_role)
|
|
1148
|
+
else
|
|
1149
|
+
ig_modified = get_field(:instance_group_id)
|
|
1150
|
+
end
|
|
1151
|
+
logger.puts("Modified instance group " + ig_modified)
|
|
1152
|
+
end
|
|
1153
|
+
end
|
|
1154
|
+
|
|
1155
|
+
class UnarrestInstanceGroupCommand < AbstractInstanceGroupCommand
|
|
1156
|
+
|
|
1157
|
+
attr_accessor :jobflow_id, :jobflow_detail
|
|
1158
|
+
|
|
1159
|
+
def validate
|
|
1160
|
+
require_single_jobflow
|
|
1161
|
+
if get_field(:instance_group_id) == nil then
|
|
1162
|
+
if ! ["CORE", "TASK"].include?(get_field(:instance_role)) then
|
|
1163
|
+
raise RuntimeError, "Invalid argument to #{name}, #{@arg} is not valid"
|
|
1164
|
+
end
|
|
1165
|
+
end
|
|
1166
|
+
end
|
|
1167
|
+
|
|
1168
|
+
def enact(client)
|
|
1169
|
+
self.jobflow_id = require_single_jobflow
|
|
1170
|
+
self.jobflow_detail = client.describe_jobflow_with_id(self.jobflow_id)
|
|
1171
|
+
|
|
1172
|
+
matching_instance_groups = nil
|
|
1173
|
+
if get_field(:instance_group_id) == nil then
|
|
1174
|
+
matching_instance_groups =
|
|
1175
|
+
jobflow_detail['Instances']['InstanceGroups'].select { |x| x['InstanceRole'] == instance_role }
|
|
1176
|
+
else
|
|
1177
|
+
matching_instance_groups =
|
|
1178
|
+
jobflow_detail['Instances']['InstanceGroups'].select { |x| x['InstanceGroupId'] == get_field(:instance_group_id) }
|
|
1179
|
+
end
|
|
1180
|
+
|
|
1181
|
+
require_singleton_array(matching_instance_groups, "instance group with role #{instance_role}")
|
|
1182
|
+
instance_group_detail = matching_instance_groups.first
|
|
1183
|
+
self.instance_group_id = instance_group_detail['InstanceGroupId']
|
|
1184
|
+
self.instance_count = instance_group_detail['InstanceRequestCount']
|
|
1185
|
+
|
|
1186
|
+
options = {
|
|
1187
|
+
'InstanceGroups' => [{
|
|
1188
|
+
'InstanceGroupId' => get_field(:instance_group_id),
|
|
1189
|
+
'InstanceCount' => get_field(:instance_count)
|
|
1190
|
+
}]
|
|
1191
|
+
}
|
|
1192
|
+
client.modify_instance_groups(options)
|
|
1193
|
+
logger.puts "Unarrested instance group #{get_field(:instance_group_id)}."
|
|
1194
|
+
end
|
|
1195
|
+
end
|
|
1196
|
+
|
|
1197
|
+
class InstanceCountOption < CommandOption
|
|
1198
|
+
def attach(commands)
|
|
1199
|
+
command = super(commands)
|
|
1200
|
+
command.instance_count = @arg.to_i
|
|
1201
|
+
return command
|
|
1202
|
+
end
|
|
1203
|
+
end
|
|
1204
|
+
|
|
1205
|
+
class InstanceTypeOption < CommandOption
|
|
1206
|
+
def attach(commands)
|
|
1207
|
+
command = super(commands)
|
|
1208
|
+
command.instance_type = @arg
|
|
1209
|
+
return command
|
|
1210
|
+
end
|
|
1211
|
+
end
|
|
1212
|
+
|
|
1213
|
+
class OptionWithArg < CommandOption
|
|
1214
|
+
def attach(commands)
|
|
1215
|
+
command = super(commands)
|
|
1216
|
+
if @pattern && ! @arg.match(@pattern) then
|
|
1217
|
+
raise RuntimeError, "Expected argument to #{@name} to match #{@pattern.inspect}, but it didn't"
|
|
1218
|
+
end
|
|
1219
|
+
command.option(@name, @field_symbol, @arg)
|
|
1220
|
+
return command
|
|
1221
|
+
end
|
|
1222
|
+
end
|
|
1223
|
+
|
|
1224
|
+
class FlagOption < CommandOption
|
|
1225
|
+
|
|
1226
|
+
def initialize(name, description, arg, parent_commands, commands, field_symbol)
|
|
1227
|
+
super(name, description, arg, parent_commands, commands)
|
|
1228
|
+
@field_symbol = field_symbol
|
|
1229
|
+
end
|
|
1230
|
+
|
|
1231
|
+
def attach(commands)
|
|
1232
|
+
command = super(commands)
|
|
1233
|
+
command.option(@name, @field_symbol, true)
|
|
1234
|
+
end
|
|
1235
|
+
end
|
|
1236
|
+
|
|
1237
|
+
class JsonStepCommand < StepCommand
|
|
1238
|
+
attr_accessor :variables
|
|
1239
|
+
|
|
1240
|
+
def initialize(*args)
|
|
1241
|
+
super(*args)
|
|
1242
|
+
@variables = []
|
|
1243
|
+
end
|
|
1244
|
+
|
|
1245
|
+
def steps
|
|
1246
|
+
content = steps = nil
|
|
1247
|
+
filename = get_field(:arg)
|
|
1248
|
+
begin
|
|
1249
|
+
content = File.read(filename)
|
|
1250
|
+
rescue Exception => e
|
|
1251
|
+
raise RuntimeError, "Couldn't read json file #{filename}"
|
|
1252
|
+
end
|
|
1253
|
+
for var in get_field(:variables, []) do
|
|
1254
|
+
content.gsub!(var[:key], var[:value])
|
|
1255
|
+
end
|
|
1256
|
+
begin
|
|
1257
|
+
steps = JSON.parse(content)
|
|
1258
|
+
rescue Exception => e
|
|
1259
|
+
raise RuntimeError, "Error parsing json from file #{filename}"
|
|
1260
|
+
end
|
|
1261
|
+
if steps.is_a?(Array) then
|
|
1262
|
+
return steps
|
|
1263
|
+
else
|
|
1264
|
+
return [ steps ]
|
|
1265
|
+
end
|
|
1266
|
+
end
|
|
1267
|
+
end
|
|
1268
|
+
|
|
1269
|
+
class ParamOption < CommandOption
|
|
1270
|
+
def initialize(*args)
|
|
1271
|
+
super(*args)
|
|
1272
|
+
@params = []
|
|
1273
|
+
end
|
|
1274
|
+
|
|
1275
|
+
def attach(commands)
|
|
1276
|
+
command = super(commands)
|
|
1277
|
+
if match = @arg.match(/([^=]+)=(.*)/) then
|
|
1278
|
+
command.option(@name, @field_symbol, { :key => match[1], :value => match[2] })
|
|
1279
|
+
else
|
|
1280
|
+
raise RuntimeError, "Expected '#{@arg}' to be in the form VARIABLE=VALUE"
|
|
1281
|
+
end
|
|
1282
|
+
return command
|
|
1283
|
+
end
|
|
1284
|
+
end
|
|
1285
|
+
|
|
1286
|
+
class EipCommand < Command
|
|
1287
|
+
attr_accessor :no_wait, :instance_id, :key_pair_file, :jobflow_id, :jobflow_detail
|
|
1288
|
+
|
|
1289
|
+
CLOSED_DOWN_STATES = Set.new(%w(TERMINATED SHUTTING_DOWN COMPLETED FAILED))
|
|
1290
|
+
WAITING_OR_RUNNING_STATES = Set.new(%w(WAITING RUNNING))
|
|
1291
|
+
|
|
1292
|
+
def initialize(*args)
|
|
1293
|
+
super(*args)
|
|
1294
|
+
end
|
|
1295
|
+
|
|
1296
|
+
def exec(cmd)
|
|
1297
|
+
commands.exec(cmd)
|
|
1298
|
+
end
|
|
1299
|
+
|
|
1300
|
+
def wait_for_jobflow(client)
|
|
1301
|
+
while true do
|
|
1302
|
+
state = resolve(self.jobflow_detail, "ExecutionStatusDetail", "State")
|
|
1303
|
+
if WAITING_OR_RUNNING_STATES.include?(state) then
|
|
1304
|
+
break
|
|
1305
|
+
elsif CLOSED_DOWN_STATES.include?(state) then
|
|
1306
|
+
raise RuntimeError, "Jobflow entered #{state} while waiting to assign Elastic IP"
|
|
1307
|
+
else
|
|
1308
|
+
logger.info("Jobflow is in state #{state}, waiting....")
|
|
1309
|
+
sleep(30)
|
|
1310
|
+
self.jobflow_detail = client.describe_jobflow_with_id(jobflow_id)
|
|
1311
|
+
end
|
|
1312
|
+
end
|
|
1313
|
+
end
|
|
1314
|
+
|
|
1315
|
+
def enact(client)
|
|
1316
|
+
self.jobflow_id = require_single_jobflow
|
|
1317
|
+
self.jobflow_detail = client.describe_jobflow_with_id(self.jobflow_id)
|
|
1318
|
+
if ! get_field(:no_wait) then
|
|
1319
|
+
wait_for_jobflow(client)
|
|
1320
|
+
end
|
|
1321
|
+
self.instance_id = self.jobflow_detail['Instances']['MasterInstanceId']
|
|
1322
|
+
if ! self.instance_id then
|
|
1323
|
+
logger.error("The master instance is not available yet for jobflow #{self.jobflow_id}. It might still be starting.")
|
|
1324
|
+
exit(-1)
|
|
1325
|
+
end
|
|
1326
|
+
|
|
1327
|
+
ec2_endpoint = "https://ec2.amazonaws.com"
|
|
1328
|
+
az = self.jobflow_detail['Instances']['Placement']['AvailabilityZone']
|
|
1329
|
+
reg_length = "us-east-1".length
|
|
1330
|
+
if az[0, reg_length] == "us-east-1" then
|
|
1331
|
+
ec2_endpoint = "https://ec2.us-east-1.amazonaws.com"
|
|
1332
|
+
elsif az[0, reg_length] == "us-west-1" then
|
|
1333
|
+
ec2_endpoint = "https://ec2.us-west-1.amazonaws.com"
|
|
1334
|
+
elsif az[0, reg_length] == "eu-west-1" then
|
|
1335
|
+
ec2_endpoint = "https://ec2.eu-west-1.amazonaws.com"
|
|
1336
|
+
elsif az[0, reg_length] == "ap-southeast-1" then
|
|
1337
|
+
ec2_endpoint = "https://ec2.ap-southeast-1.amazonaws.com"
|
|
1338
|
+
elsif az[0, reg_length] == "ap-northeast-1" then
|
|
1339
|
+
ec2_endpoint = "https://ec2.ap-northeast-1.amazonaws.com"
|
|
1340
|
+
end
|
|
1341
|
+
commands.global_options[:ec2_endpoint] = ec2_endpoint
|
|
1342
|
+
|
|
1343
|
+
self.key_pair_file = require(:key_pair_file, "Missing required option --key-pair-file for #{name}")
|
|
1344
|
+
eip = get_field(:arg)
|
|
1345
|
+
|
|
1346
|
+
ec2_client = Ec2ClientWrapper.new(commands, logger)
|
|
1347
|
+
|
|
1348
|
+
if ! eip then
|
|
1349
|
+
begin
|
|
1350
|
+
response = ec2_client.allocate_address()
|
|
1351
|
+
rescue Exception => e
|
|
1352
|
+
logger.error("Error during AllocateAddres: " + e.message)
|
|
1353
|
+
if get_field(:trace) then
|
|
1354
|
+
logger.puts(e.backtrace.join("\n"))
|
|
1355
|
+
end
|
|
1356
|
+
exit(-1)
|
|
1357
|
+
end
|
|
1358
|
+
|
|
1359
|
+
eip = response['publicIp']
|
|
1360
|
+
logger.info("Allocated Public IP: #{eip}...")
|
|
1361
|
+
end
|
|
1362
|
+
|
|
1363
|
+
begin
|
|
1364
|
+
response = ec2_client.associate_address(self.instance_id, eip)
|
|
1365
|
+
logger.info("Public IP: #{eip} was assigned to jobflow #{self.jobflow_id}")
|
|
1366
|
+
rescue Exception => e
|
|
1367
|
+
logger.error("Error during AssociateAddres: " + e.to_s)
|
|
1368
|
+
if get_field(:trace) then
|
|
1369
|
+
logger.puts(e.backtrace.join("\n"))
|
|
1370
|
+
end
|
|
1371
|
+
exit(-1)
|
|
1372
|
+
end
|
|
1373
|
+
|
|
1374
|
+
end
|
|
1375
|
+
end
|
|
1376
|
+
|
|
1377
|
+
def self.add_commands(commands, opts)
|
|
1378
|
+
# FIXME: add --wait-for-step function
|
|
1379
|
+
|
|
1380
|
+
commands.opts = opts
|
|
1381
|
+
|
|
1382
|
+
step_commands = ["--jar", "--resize-jobflow", "--enable-debugging", "--hive-interactive", "--pig-interactive", "--hive-script", "--pig-script"]
|
|
1383
|
+
|
|
1384
|
+
opts.separator "\n Creating Job Flows\n"
|
|
1385
|
+
|
|
1386
|
+
commands.parse_command(CreateJobFlowCommand, "--create", "Create a new job flow")
|
|
1387
|
+
commands.parse_options(["--create"], [
|
|
1388
|
+
[ OptionWithArg, "--name NAME", "The name of the job flow being created", :jobflow_name ],
|
|
1389
|
+
[ FlagOption, "--alive", "Create a job flow that stays running even though it has executed all its steps", :alive ],
|
|
1390
|
+
[ OptionWithArg, "--with-termination-protection", "Create a job with termination protection (default is no termination protection)", :with_termination_protection ],
|
|
1391
|
+
[ OptionWithArg, "--num-instances NUM", "Number of instances in the job flow", :instance_count ],
|
|
1392
|
+
[ OptionWithArg, "--slave-instance-type TYPE", "The type of the slave instances to launch", :slave_instance_type ],
|
|
1393
|
+
[ OptionWithArg, "--master-instance-type TYPE", "The type of the master instance to launch", :master_instance_type ],
|
|
1394
|
+
[ OptionWithArg, "--key-pair KEY_PAIR", "The name of your Amazon EC2 Keypair", :key_pair ],
|
|
1395
|
+
[ OptionWithArg, "--availability-zone A_Z", "Specify the Availability Zone in which to launch the job flow", :az ],
|
|
1396
|
+
[ OptionWithArg, "--info INFO", "Specify additional info to job flow creation", :ainfo ],
|
|
1397
|
+
[ OptionWithArg, "--hadoop-version INFO", "Specify the Hadoop Version to install", :hadoop_version ],
|
|
1398
|
+
[ FlagOption, "--plain-output", "Return the job flow id from create step as simple text", :plain_output ],
|
|
1399
|
+
])
|
|
1400
|
+
commands.parse_command(CreateInstanceGroupCommand, "--instance-group ROLE", "Specify an instance group while creating a jobflow")
|
|
1401
|
+
|
|
1402
|
+
opts.separator "\n Passing arguments to steps\n"
|
|
1403
|
+
|
|
1404
|
+
commands.parse_options(step_commands + ["--bootstrap-action", "--stream"], [
|
|
1405
|
+
[ ArgsOption, "--args ARGS", "A command separated list of arguments to pass to the step" ],
|
|
1406
|
+
[ ArgOption, "--arg ARG", "An argument to pass to the step" ],
|
|
1407
|
+
[ OptionWithArg, "--step-name STEP_NAME", "Set name for the step", :step_name ],
|
|
1408
|
+
[ OptionWithArg, "--step-action STEP_ACTION", "Action to take when step finishes. One of CANCEL_AND_WAIT, TERMINATE_JOB_FLOW or CONTINUE", :step_action ],
|
|
1409
|
+
])
|
|
1410
|
+
|
|
1411
|
+
opts.separator "\n Specific Steps\n"
|
|
1412
|
+
|
|
1413
|
+
commands.parse_command(ResizeJobflowCommand, "--resize-jobflow", "Add a step to resize the job flow")
|
|
1414
|
+
commands.parse_command(EnableDebuggingCommand, "--enable-debugging", "Enable job flow debugging (you must be signed up to SimpleDB for this to work)")
|
|
1415
|
+
|
|
1416
|
+
opts.separator "\n Adding Steps from a Json File to Job Flows\n"
|
|
1417
|
+
|
|
1418
|
+
commands.parse_command(JsonStepCommand, "--json FILE", "Add a sequence of steps stored in the json file FILE")
|
|
1419
|
+
commands.parse_options(["--json"], [
|
|
1420
|
+
[ ParamOption, "--param VARIABLE=VALUE ARGS", "Substitute the string VARIABLE with the string VALUE in the json file", :variables ],
|
|
1421
|
+
])
|
|
1422
|
+
|
|
1423
|
+
opts.separator "\n Pig Steps\n"
|
|
1424
|
+
|
|
1425
|
+
commands.parse_command(PigScriptCommand, "--pig-script [SCRIPT]", "Add a step that runs a Pig script")
|
|
1426
|
+
commands.parse_command(PigInteractiveCommand, "--pig-interactive", "Add a step that sets up the job flow for an interactive (via SSH) pig session")
|
|
1427
|
+
|
|
1428
|
+
opts.separator "\n Hive Steps\n"
|
|
1429
|
+
|
|
1430
|
+
commands.parse_command(HiveScriptCommand, "--hive-script [SCRIPT]", "Add a step that runs a Hive script")
|
|
1431
|
+
commands.parse_command(HiveInteractiveCommand, "--hive-interactive", "Add a step that sets up the job flow for an interactive (via SSH) hive session")
|
|
1432
|
+
commands.parse_command(HiveSiteCommand, "--hive-site HIVE_SITE", "Override Hive configuration with configuration from HIVE_SITE")
|
|
1433
|
+
commands.parse_options(["--hive-script", "--hive-interactive", "--hive-site"], [
|
|
1434
|
+
[ OptionWithArg, "--hive-versions VERSIONS", "A comma separated list of Hive version", :hive_versions],
|
|
1435
|
+
])
|
|
1436
|
+
|
|
1437
|
+
opts.separator "\n Adding Jar Steps to Job Flows\n"
|
|
1438
|
+
|
|
1439
|
+
commands.parse_command(JarStepCommand, "--jar JAR", "Run a Hadoop Jar in a step")
|
|
1440
|
+
commands.parse_options(["--jar"], [
|
|
1441
|
+
[ OptionWithArg, "--main-class MAIN_CLASS", "The main class of the jar", :main_class ]
|
|
1442
|
+
])
|
|
1443
|
+
|
|
1444
|
+
opts.separator "\n Adding Streaming Steps to Job Flows\n"
|
|
1445
|
+
|
|
1446
|
+
commands.parse_command(StreamStepCommand, "--stream", "Add a step that performs hadoop streaming")
|
|
1447
|
+
commands.parse_options(["--stream"], [
|
|
1448
|
+
[ OptionWithArg, "--input INPUT", "Input to the steps, e.g. s3n://mybucket/input", :input],
|
|
1449
|
+
[ OptionWithArg, "--output OUTPUT", "The output to the steps, e.g. s3n://mybucket/output", :output],
|
|
1450
|
+
[ OptionWithArg, "--mapper MAPPER", "The mapper program or class", :mapper],
|
|
1451
|
+
[ OptionWithArg, "--cache CACHE_FILE", "A file to load into the cache, e.g. s3n://mybucket/sample.py#sample.py", :cache ],
|
|
1452
|
+
[ OptionWithArg, "--cache-archive CACHE_FILE", "A file to unpack into the cache, e.g. s3n://mybucket/sample.jar", :cache_archive, ],
|
|
1453
|
+
[ OptionWithArg, "--jobconf KEY=VALUE", "Specify jobconf arguments to pass to streaming, e.g. mapred.task.timeout=800000", :jobconf],
|
|
1454
|
+
[ OptionWithArg, "--reducer REDUCER", "The reducer program or class", :reducer],
|
|
1455
|
+
])
|
|
1456
|
+
|
|
1457
|
+
opts.separator "\n Adding and Modifying Instance Groups\n"
|
|
1458
|
+
|
|
1459
|
+
commands.parse_command(ModifyInstanceGroupCommand, "--modify-instance-group INSTANCE_GROUP", "Modify an existing instance group")
|
|
1460
|
+
commands.parse_command(AddInstanceGroupCommand, "--add-instance-group ROLE", "Add an instance group to an existing jobflow")
|
|
1461
|
+
commands.parse_command(UnarrestInstanceGroupCommand, "--unarrest-instance-group ROLE", "Unarrest an instance group of the supplied jobflow")
|
|
1462
|
+
commands.parse_options(["--instance-group", "--modify-instance-group", "--add-instance-group", "--create"], [
|
|
1463
|
+
[ InstanceCountOption, "--instance-count INSTANCE_COUNT", "Set the instance count of an instance group", :instance_count ]
|
|
1464
|
+
])
|
|
1465
|
+
commands.parse_options(["--instance-group", "--add-instance-group", "--create"], [
|
|
1466
|
+
[ InstanceTypeOption, "--instance-type INSTANCE_TYPE", "Set the instance type of an instance group", :instance_type ],
|
|
1467
|
+
])
|
|
1468
|
+
|
|
1469
|
+
opts.separator "\n Contacting the Master Node\n"
|
|
1470
|
+
|
|
1471
|
+
# commands.parse_options(["--ssh", "--scp", "--eip"], [
|
|
1472
|
+
# [ FlagOption, "--no-wait", "Don't wait for the Master node to start before executing scp or ssh or assigning EIP", :no_wait ],
|
|
1473
|
+
# ])
|
|
1474
|
+
|
|
1475
|
+
commands.parse_command(SSHCommand, "--ssh [COMMAND]", "SSH to the master node and optionally run a command")
|
|
1476
|
+
commands.parse_command(PutCommand, "--put SRC", "Copy a file to the job flow using scp")
|
|
1477
|
+
commands.parse_command(GetCommand, "--get SRC", "Copy a file from the job flow using scp")
|
|
1478
|
+
commands.parse_command(PutCommand, "--scp SRC", "Copy a file to the job flow using scp")
|
|
1479
|
+
|
|
1480
|
+
commands.parse_options(["--get", "--put", "--scp"], [
|
|
1481
|
+
[ OptionWithArg, "--to DEST", "Destination location when copying files", :dest ],
|
|
1482
|
+
])
|
|
1483
|
+
|
|
1484
|
+
commands.parse_command(LogsCommand, "--logs", "Display the step logs for the last executed step")
|
|
1485
|
+
|
|
1486
|
+
opts.separator "\n Assigning Elastic IP to Master Node\n"
|
|
1487
|
+
|
|
1488
|
+
commands.parse_command(EipCommand, "--eip [ElasticIP]", "Associate ElasticIP to master node. If no ElasticIP is specified, allocate and associate a new one.")
|
|
1489
|
+
|
|
1490
|
+
opts.separator "\n Settings common to all step types\n"
|
|
1491
|
+
|
|
1492
|
+
commands.parse_options(["--ssh", "--scp", "--eip"], [
|
|
1493
|
+
[ FlagOption, "--no-wait", "Don't wait for the Master node to start before executing scp or ssh or assigning EIP", :no_wait ],
|
|
1494
|
+
[ GlobalOption, "--key-pair-file FILE_PATH", "Path to your local pem file for your EC2 key pair", :key_pair_file ],
|
|
1495
|
+
])
|
|
1496
|
+
|
|
1497
|
+
opts.separator "\n Specifying Bootstrap Actions\n"
|
|
1498
|
+
|
|
1499
|
+
commands.parse_command(BootstrapActionCommand, "--bootstrap-action SCRIPT", "Run a bootstrap action script on all instances")
|
|
1500
|
+
commands.parse_options(["--bootstrap-action"], [
|
|
1501
|
+
[ OptionWithArg, "--bootstrap-name NAME", "Set the name of the bootstrap action", :bootstrap_name ],
|
|
1502
|
+
])
|
|
1503
|
+
|
|
1504
|
+
|
|
1505
|
+
opts.separator "\n Listing and Describing Job flows\n"
|
|
1506
|
+
|
|
1507
|
+
commands.parse_command(ListActionCommand, "--list", "List all job flows created in the last 2 days")
|
|
1508
|
+
commands.parse_command(DescribeActionCommand, "--describe", "Dump a JSON description of the supplied job flows")
|
|
1509
|
+
commands.parse_command(PrintHiveVersionCommand, "--print-hive-version", "Prints the version of Hive that's currently active on the job flow")
|
|
1510
|
+
commands.parse_options(["--list", "--describe"], [
|
|
1511
|
+
[ OptionWithArg, "--state NAME", "Set the name of the bootstrap action", :state ],
|
|
1512
|
+
[ FlagOption, "--active", "List running, starting or shutting down job flows", :active ],
|
|
1513
|
+
[ FlagOption, "--all", "List all job flows in the last 2 months", :all ],
|
|
1514
|
+
[ FlagOption, "--no-steps", "Do not list steps when listing jobs", :no_steps ],
|
|
1515
|
+
])
|
|
1516
|
+
|
|
1517
|
+
opts.separator "\n Terminating Job Flows\n"
|
|
1518
|
+
|
|
1519
|
+
commands.parse_command(SetTerminationProtection, "--set-termination-protection BOOL", "Enable or disable job flow termination protection. Either true or false")
|
|
1520
|
+
|
|
1521
|
+
commands.parse_command(TerminateActionCommand, "--terminate", "Terminate job flows")
|
|
1522
|
+
|
|
1523
|
+
opts.separator "\n Common Options\n"
|
|
1524
|
+
|
|
1525
|
+
commands.parse_options(["--jobflow", "--describe"], [
|
|
1526
|
+
[ GlobalOption, "--jobflow JOB_FLOW_ID", "The job flow to act on", :jobflow, /^j-[A-Z0-9]+$/],
|
|
1527
|
+
])
|
|
1528
|
+
|
|
1529
|
+
commands.parse_options(:global, [
|
|
1530
|
+
[ GlobalFlagOption, "--verbose", "Turn on verbose logging of program interaction", :verbose ],
|
|
1531
|
+
[ GlobalFlagOption, "--trace", "Trace commands made to the webservice", :trace ],
|
|
1532
|
+
[ GlobalOption, "--credentials CRED_FILE", "File containing access-id and private-key", :credentials],
|
|
1533
|
+
[ GlobalOption, "--access-id ACCESS_ID", "AWS Access Id", :aws_access_id],
|
|
1534
|
+
[ GlobalOption, "--private-key PRIVATE_KEY", "AWS Private Key", :aws_secret_key],
|
|
1535
|
+
[ GlobalOption, "--log-uri LOG_URI", "Location in S3 to store logs from the job flow, e.g. s3n://mybucket/logs", :log_uri ],
|
|
1536
|
+
])
|
|
1537
|
+
commands.parse_command(VersionCommand, "--version", "Print version string")
|
|
1538
|
+
commands.parse_command(HelpCommand, "--help", "Show help message")
|
|
1539
|
+
|
|
1540
|
+
opts.separator "\n Uncommon Options\n"
|
|
1541
|
+
|
|
1542
|
+
commands.parse_options(:global, [
|
|
1543
|
+
[ GlobalFlagOption, "--debug", "Print stack traces when exceptions occur", :debug],
|
|
1544
|
+
[ GlobalOption, "--endpoint ENDPOINT", "File containing access-id and private-key", :endpoint],
|
|
1545
|
+
[ GlobalOption, "--region REGION", "The region to use for the endpoint", :region],
|
|
1546
|
+
[ GlobalOption, "--apps-path APPS_PATH", "Specify s3:// path to the base of the emr public bucket to use. e.g s3://us-east-1.elasticmapreduce", :apps_path],
|
|
1547
|
+
[ GlobalOption, "--beta-path BETA_PATH", "Specify s3:// path to the base of the emr public bucket to use for beta apps. e.g s3://beta.elasticmapreduce", :beta_path],
|
|
1548
|
+
])
|
|
1549
|
+
|
|
1550
|
+
opts.separator "\n Short Options\n"
|
|
1551
|
+
commands.parse_command(HelpCommand, "-h", "Show help message")
|
|
1552
|
+
commands.parse_options(:global, [
|
|
1553
|
+
[ GlobalFlagOption, "-v", "Turn on verbose logging of program interaction", :verbose ],
|
|
1554
|
+
[ GlobalOption, "-c CRED_FILE", "File containing access-id and private-key", :credentials ],
|
|
1555
|
+
[ GlobalOption, "-a ACCESS_ID", "AWS Access Id", :aws_access_id],
|
|
1556
|
+
[ GlobalOption, "-p PRIVATE_KEY", "AWS Private Key", :aws_secret_key],
|
|
1557
|
+
[ GlobalOption, "-j JOB_FLOW_ID", "The job flow to act on", :jobflow, /^j-[A-Z0-9]+$/],
|
|
1558
|
+
])
|
|
1559
|
+
|
|
1560
|
+
end
|
|
1561
|
+
|
|
1562
|
+
def self.is_create_child_command(cmd)
|
|
1563
|
+
return cmd.is_a?(StepCommand) ||
|
|
1564
|
+
cmd.is_a?(BootstrapActionCommand) ||
|
|
1565
|
+
cmd.is_a?(AddInstanceGroupCommand) ||
|
|
1566
|
+
cmd.is_a?(CreateInstanceGroupCommand)
|
|
1567
|
+
end
|
|
1568
|
+
|
|
1569
|
+
# this function pull out steps if there is a create command that preceeds them
|
|
1570
|
+
def self.fold_commands(commands)
|
|
1571
|
+
last_create_command = nil
|
|
1572
|
+
new_commands = []
|
|
1573
|
+
for cmd in commands do
|
|
1574
|
+
if cmd.is_a?(CreateJobFlowCommand) then
|
|
1575
|
+
last_create_command = cmd
|
|
1576
|
+
elsif is_create_child_command(cmd) then
|
|
1577
|
+
if last_create_command == nil then
|
|
1578
|
+
if cmd.is_a?(StepCommand) then
|
|
1579
|
+
last_create_command = AddJobFlowStepsCommand.new(
|
|
1580
|
+
"--add-steps", "Add job flow steps", nil, commands
|
|
1581
|
+
)
|
|
1582
|
+
new_commands << last_create_command
|
|
1583
|
+
elsif cmd.is_a?(BootstrapActionCommand) then
|
|
1584
|
+
raise RuntimeError, "the option #{cmd.name} must come after the --create option"
|
|
1585
|
+
elsif cmd.is_a?(CreateInstanceGroupCommand) then
|
|
1586
|
+
raise RuntimeError, "the option #{cmd.name} must come after the --create option"
|
|
1587
|
+
elsif cmd.is_a?(AddInstanceGroupCommand) then
|
|
1588
|
+
new_commands << cmd
|
|
1589
|
+
next
|
|
1590
|
+
else
|
|
1591
|
+
next
|
|
1592
|
+
end
|
|
1593
|
+
end
|
|
1594
|
+
|
|
1595
|
+
if cmd.is_a?(StepCommand) then
|
|
1596
|
+
if ! last_create_command.respond_to?(:add_step_command) then
|
|
1597
|
+
last_create_command = AddJobFlowStepsCommand.new(
|
|
1598
|
+
"--add-steps", "Add job flow steps", nil, commands
|
|
1599
|
+
)
|
|
1600
|
+
end
|
|
1601
|
+
last_create_command.add_step_command(cmd)
|
|
1602
|
+
elsif cmd.is_a?(BootstrapActionCommand) then
|
|
1603
|
+
if ! last_create_command.respond_to?(:add_bootstrap_command) then
|
|
1604
|
+
raise RuntimeError, "Bootstrap actions must follow a --create command"
|
|
1605
|
+
end
|
|
1606
|
+
last_create_command.add_bootstrap_command(cmd)
|
|
1607
|
+
elsif cmd.is_a?(CreateInstanceGroupCommand) || cmd.is_a?(AddInstanceGroupCommand) then
|
|
1608
|
+
if last_create_command.respond_to?(:add_instance_group_command) then
|
|
1609
|
+
last_create_command.add_instance_group_command(cmd)
|
|
1610
|
+
else
|
|
1611
|
+
new_commands << cmd
|
|
1612
|
+
end
|
|
1613
|
+
else
|
|
1614
|
+
raise RuntimeError, "Unknown child command #{cmd.name} following #{last_create_command.name}"
|
|
1615
|
+
end
|
|
1616
|
+
next
|
|
1617
|
+
end
|
|
1618
|
+
new_commands << cmd
|
|
1619
|
+
end
|
|
1620
|
+
|
|
1621
|
+
commands.commands = new_commands
|
|
1622
|
+
end
|
|
1623
|
+
|
|
1624
|
+
def self.create_and_execute_commands(args, client_class, logger, executor, exit_on_error=true)
|
|
1625
|
+
commands = Commands.new(logger, executor)
|
|
1626
|
+
|
|
1627
|
+
begin
|
|
1628
|
+
opts = OptionParser.new do |opts|
|
|
1629
|
+
add_commands(commands, opts)
|
|
1630
|
+
end
|
|
1631
|
+
opts.parse!(args)
|
|
1632
|
+
|
|
1633
|
+
if commands.get_field(:trace) then
|
|
1634
|
+
logger.level = :trace
|
|
1635
|
+
end
|
|
1636
|
+
|
|
1637
|
+
commands.parse_jobflows(args)
|
|
1638
|
+
|
|
1639
|
+
if commands.commands.size == 0 then
|
|
1640
|
+
commands.commands << HelpCommand.new("--help", "Print help text", nil, commands)
|
|
1641
|
+
end
|
|
1642
|
+
|
|
1643
|
+
credentials = Credentials.new(commands)
|
|
1644
|
+
credentials.parse_credentials(commands.get_field(:credentials, "credentials.json"),
|
|
1645
|
+
commands.global_options)
|
|
1646
|
+
|
|
1647
|
+
work_out_globals(commands)
|
|
1648
|
+
fold_commands(commands)
|
|
1649
|
+
commands.validate
|
|
1650
|
+
client = EmrClient.new(commands, logger, client_class)
|
|
1651
|
+
commands.enact(client)
|
|
1652
|
+
rescue RuntimeError => e
|
|
1653
|
+
logger.puts("Error: " + e.message)
|
|
1654
|
+
if commands.get_field(:trace) then
|
|
1655
|
+
logger.puts(e.backtrace.join("\n"))
|
|
1656
|
+
end
|
|
1657
|
+
if exit_on_error then
|
|
1658
|
+
exit(-1)
|
|
1659
|
+
else
|
|
1660
|
+
raise e
|
|
1661
|
+
end
|
|
1662
|
+
end
|
|
1663
|
+
return commands
|
|
1664
|
+
end
|
|
1665
|
+
|
|
1666
|
+
def self.work_out_globals(commands)
|
|
1667
|
+
options = commands.global_options
|
|
1668
|
+
if commands.have(:region) then
|
|
1669
|
+
if commands.have(:endpoint) then
|
|
1670
|
+
raise RuntimeError, "You may not specify --region together with --endpoint"
|
|
1671
|
+
end
|
|
1672
|
+
|
|
1673
|
+
endpoint = "https://#{options[:region]}.elasticmapreduce.amazonaws.com"
|
|
1674
|
+
commands.global_options[:endpoint] = endpoint
|
|
1675
|
+
end
|
|
1676
|
+
|
|
1677
|
+
if commands.have(:endpoint) then
|
|
1678
|
+
region_match = commands.get_field(:endpoint).match("^https*://(.*)\.elasticmapreduce")
|
|
1679
|
+
if ! commands.have(:apps_path) && region_match != nil then
|
|
1680
|
+
options[:apps_path] = "s3://#{region_match[1]}.elasticmapreduce"
|
|
1681
|
+
end
|
|
1682
|
+
end
|
|
1683
|
+
|
|
1684
|
+
options[:apps_path] ||= "s3://us-east-1.elasticmapreduce"
|
|
1685
|
+
options[:beta_path] ||= "s3://beta.elasticmapreduce"
|
|
1686
|
+
for key in [:apps_path, :beta_path] do
|
|
1687
|
+
options[key].chomp!("/")
|
|
1688
|
+
end
|
|
1689
|
+
end
|
|
1690
|
+
end
|