awscli 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,472 @@
1
+ module Awscli
2
+ module Emr
3
+ class EMR
4
+ def initialize(connection)
5
+ @conn = connection
6
+ end
7
+
8
+ def list options
9
+ validate_job_ids options[:job_flow_ids] if options[:job_flow_ids]
10
+ opts = Marshal.load(Marshal.dump(options))
11
+ opts.reject! { |k| k == 'table' } if options[:table]
12
+ if job_flow_ids = opts.delete(:job_flow_ids)
13
+ opts.merge!('JobFlowIds' => job_flow_ids)
14
+ end
15
+ if job_flow_status = opts.delete(:job_flow_status)
16
+ opts.merge!('JobFlowStates' => job_flow_status)
17
+ end
18
+ if options[:table]
19
+ puts 'For detailed information, dont pass --table option'
20
+ job_flows = @conn.describe_job_flows(opts).body['JobFlows']
21
+ table_data = Array.new
22
+ unless job_flows.empty?
23
+ job_flows.each do |job_flow|
24
+ table_data << {
25
+ :job_flow_id => job_flow['JobFlowId'],
26
+ :name => job_flow['Name'],
27
+ :instance_count => job_flow['Instances']['InstanceCount'],
28
+ :master_dns => job_flow['Instances']['MasterPublicDnsName'],
29
+ :ec2_key_name => job_flow['Instances']['Ec2KeyName'],
30
+ :state => job_flow['ExecutionStatusDetail']['State']
31
+ }
32
+ end
33
+ end
34
+ Formatador.display_table(table_data, [:job_flow_id, :name, :state, :instance_count, :master_dns, :ec2_key_name])
35
+ else
36
+ puts 'For less information, pass --table option'
37
+ puts @conn.describe_job_flows(opts).body['JobFlows'].to_yaml
38
+ end
39
+ end
40
+
41
+ def create_job_flow(options)
42
+ # => BOOTSTRAP ACTIONS
43
+ boot_strap_actions = []
44
+ if options[:bootstrap_actions]
45
+ options[:bootstrap_actions].each do |step|
46
+ boot_strap_actions << parse_boot_strap_actions(step)
47
+ end
48
+ end
49
+
50
+ # => STEPS
51
+ steps = []
52
+ if options[:custom_jar_steps]
53
+ options[:custom_jar_steps].each do |step|
54
+ steps << parse_custom_jar(step)
55
+ end
56
+ end
57
+ if options[:hive_interactive]
58
+ steps << hive_install(options[:hadoop_version])
59
+ end
60
+ if options[:pig_interactive]
61
+ steps << pig_install
62
+ end
63
+ if options[:hive_steps]
64
+ steps << hive_install(options[:hadoop_version]) unless options[:hive_interactive]
65
+ options[:hive_steps].each do |step|
66
+ steps << parse_hive_steps(step)
67
+ end
68
+ end
69
+ if options[:pig_steps]
70
+ steps << pig_install unless options[:pig_interactive]
71
+ options[:pig_steps].each do |step|
72
+ steps << parse_pig_steps(step, options[:hadoop_version])
73
+ end
74
+ end
75
+ if options[:streaming_steps]
76
+ options[:streaming_steps].each do |step|
77
+ steps << parse_streaming_steps(step)
78
+ end
79
+ end
80
+ if options[:hbase_install]
81
+ boot_strap_actions << hbase_install_boot_strap
82
+ steps << hbase_install_steps
83
+ #validate hadoop version and instance size
84
+ abort "Invalid hadoop version #{options[:hadoop_version]}, supported Hadoop Versions for HBase are: #{Awscli::EMR::HBASE_SUPPORTED_HADOOP_VERSIONS.join(',')}" unless Awscli::EMR::HBASE_SUPPORTED_HADOOP_VERSIONS.include?(options[:hadoop_version])
85
+ options[:instance_groups] && parse_instance_groups(options[:instance_groups]).each do |group|
86
+ unless is_valid_instance_type?(group['InstanceType'])
87
+ abort "Instance type #{group['InstanceType']} is not compatible with HBase, instance size should be equal or greater than m1.large"
88
+ end
89
+ end
90
+ if options[:master_instance_type]
91
+ unless is_valid_instance_type?(options[:master_instance_type])
92
+ abort "Instance type #{options[:master_instance_type]} is not compatible with HBase, instance size should be equal or greater than m1.large"
93
+ end
94
+ end
95
+ if options[:slave_instance_type]
96
+ unless is_valid_instance_type?(options[:slave_instance_type])
97
+ abort "Instance type #{options[:slave_instance_type]} is not compatible with HBase, instance size should be equal or greater than m1.large"
98
+ end
99
+ end
100
+ # => HBase backups
101
+ if options[:hbase_backup_schedule]
102
+ # Backup
103
+ if options[:hbase_consistent_backup]
104
+ steps << parse_hbase_backup(options[:hbase_backup_schedule], true)
105
+ else
106
+ steps << parse_hbase_backup(options[:hbase_backup_schedule])
107
+ end
108
+ elsif options[:hbase_backup_restore]
109
+ # Restore
110
+ steps << parse_hbase_restore(options[:hbase_backup_restore])
111
+ end
112
+ end
113
+
114
+ # => INSTANCES
115
+ instances = Hash.new
116
+ instances['HadoopVersion'] = options[:hadoop_version]
117
+ if options[:hive_interactive] or options[:pig_interactive] or options[:hbase_install] #then job flow should not be terminated
118
+ instances['KeepJobFlowAliveWhenNoSteps'] = true
119
+ else
120
+ instances['KeepJobFlowAliveWhenNoSteps'] = options[:alive]
121
+ end
122
+ instances['Ec2KeyName'] = options[:instance_ec2_key_name] if options[:instance_ec2_key_name]
123
+ instances['InstanceCount'] = options[:instance_count] if options[:instance_count]
124
+ instances['MasterInstanceType'] = options[:master_instance_type] if options[:master_instance_type]
125
+ instances['SlaveInstanceType'] = options[:slave_instance_type] if options[:slave_instance_type]
126
+ instances['TerminationProtected'] = options[:termination_protection] if options[:termination_protection]
127
+ # => Instance Groups
128
+ instances['InstanceGroups'] = parse_instance_groups(options[:instance_groups]) if options[:instance_groups]
129
+
130
+ # => Build final request
131
+ job_flow = Hash.new
132
+ job_flow['AmiVersion'] = Awscli::EMR::HADOOP_AMI_MAPPING[options[:hadoop_version]]
133
+ job_flow['LogUri'] = options[:log_uri] if options[:log_uri]
134
+ job_flow['BootstrapActions'] = boot_strap_actions if options[:bootstrap_actions] or options[:hbase_install]
135
+ job_flow['Instances'] = instances
136
+ job_flow['Steps'] = steps
137
+ if options[:alive] or options[:hive_interactive] or options[:pig_interactive] or options[:hbase_install]
138
+ @conn.run_job_flow("#{options[:name]} (requires manual termination)", job_flow)
139
+ else
140
+ @conn.run_job_flow(options[:name], job_flow)
141
+ end
142
+ puts "Create JobFlow '#{options[:name]}' Successfully!"
143
+ end
144
+
145
+ def add_instance_group(options)
146
+ opts = Marshal.load(Marshal.dump(options))
147
+ opts.reject! { |key| key == 'job_flow_id' }
148
+ opts.reject! { |key| key == 'region' }
149
+ abort 'invalid job id' unless @conn.describe_job_flows.body['JobFlows'].map { |job| job['JobFlowId'] }.include?(options[:job_flow_id])
150
+ abort 'invalid instance type' unless Awscli::Instances::INSTANCE_SIZES.include?(options[:instance_type])
151
+ if instance_count = opts.delete(:instance_count)
152
+ opts.merge!('InstanceCount' => instance_count)
153
+ end
154
+ if instance_type = opts.delete(:instance_type)
155
+ opts.merge!('InstanceType' => instance_type)
156
+ end
157
+ if instance_role = opts.delete(:instance_role)
158
+ opts.merge!('InstanceRole' => instance_role)
159
+ end
160
+ if name = opts.delete(:name)
161
+ opts.merge!('Name' => name)
162
+ end
163
+ if bid_price = opts.delete(:bid_price)
164
+ opts.merge!('BidPrice' => bid_price)
165
+ opts.merge!('MarketType' => 'SPOT')
166
+ else
167
+ opts.merge!('MarketType' => 'ON_DEMAND')
168
+ end
169
+ (instance_groups ||= []) << opts
170
+ @conn.add_instance_groups(options[:job_flow_id], 'InstanceGroups' => instance_groups)
171
+ puts "Added instance group to job flow(with id): #{options[:job_flow_id]}"
172
+ end
173
+
174
+ def add_steps(job_flow_id, job_steps)
175
+ validate_job_ids job_flow_id
176
+ @conn.add_job_flow_steps(job_flow_id, 'Steps' => parse_custom_jar(job_steps))
177
+ puts "Added step to job flow id: #{job_flow_id}"
178
+ end
179
+
180
+ def modify_instance_group(options)
181
+ abort "Invalid instance group id: #{options[:instance_group_id]}" unless validate_instance_group_id?(options[:instance_group_id])
182
+ @conn.modify_instance_groups(
183
+ 'InstanceGroups' => [
184
+ 'InstanceCount' => options[:instance_count],
185
+ 'InstanceGroupId' => options[:instance_group_id]
186
+ ]
187
+ )
188
+ rescue Excon::Errors::BadRequest
189
+ puts "[Error]: #{$!}"
190
+ else
191
+ puts "Modified instance group #{options[:instance_group_id]} size to #{options[:instance_count]}"
192
+ end
193
+
194
+ def set_termination_protection(job_flow_ids, terminate_protection)
195
+ validate_job_ids job_flow_ids
196
+ @conn.set_termination_protection(
197
+ terminate_protection,
198
+ {
199
+ 'JobFlowIds' => job_flow_ids
200
+ }
201
+ )
202
+ terminate_protection ?
203
+ puts("Termination protection flag added to job_flows: #{job_flow_ids.join(',')}") :
204
+ puts("Termination protection flag removed from job_flows: #{job_flow_ids.join(',')}")
205
+ end
206
+
207
+ def add_instance_groups(job_flow_id, groups)
208
+ validate_job_ids job_flow_id
209
+ instance_groups = parse_instance_groups(groups)
210
+ @conn.add_instance_groups(job_flow_id, 'InstanceGroups' => instance_groups)
211
+ end
212
+
213
+ def delete(job_ids)
214
+ validate_job_ids job_ids
215
+ @conn.terminate_job_flows('JobFlowIds' => job_ids)
216
+ puts "Terminated Job Flows: #{job_ids.join(',')}"
217
+ end
218
+
219
+ private
220
+
221
+ def validate_job_ids(job_ids)
222
+ available_job_ids = @conn.describe_job_flows.body['JobFlows'].map { |job| job['JobFlowId'] }
223
+ abort 'invalid job id\'s' unless available_job_ids.each_cons(job_ids.size).include? job_ids
224
+ end
225
+
226
+ def validate_instance_group_id?(group_id)
227
+ @conn.describe_job_flows.body['JobFlows'].map { |j| j['Instances']['InstanceGroups'].map {|g| g['InstanceGroupId']} }.flatten.include?(group_id)
228
+ end
229
+
230
+ def is_valid_instance_type?(instance_type)
231
+ ! Awscli::EMR::HBASE_INVALID_INSTANCES.member?(instance_type)
232
+ end
233
+
234
+ def parse_instance_groups(groups)
235
+ #parse instance_groups => instance_count,instance_role(MASTER | CORE | TASK),instance_type,name,bid_price
236
+ instance_groups = []
237
+ groups.each do |group|
238
+ instance_count, instance_role, instance_size, name, bid_price = ig.split(',')
239
+ if instance_count.empty? or instance_role.empty? or instance_size.empty?
240
+ abort 'instance_count, instance_role and instance_size are required'
241
+ end
242
+ abort "Invalid instance role: #{instance_role}" unless %w(MASTER CORE TASK).include?(instance_role.upcase)
243
+ abort "Invalid instance type: #{instance_size}" unless Awscli::Instances::INSTANCE_SIZES.include?(instance_size)
244
+ if bid_price
245
+ instance_groups << {
246
+ 'BidPrice' => bid_price,
247
+ 'InstanceCount' => instance_count.to_i,
248
+ 'InstanceRole' => instance_role,
249
+ 'InstanceType' => instance_size,
250
+ 'MarketType' => 'SPOT',
251
+ 'Name' => name || "awscli-emr-#{instance_role}-group",
252
+ }
253
+ else
254
+ instance_groups << {
255
+ 'InstanceCount' => instance_count.to_i,
256
+ 'InstanceRole' => instance_role,
257
+ 'InstanceType' => instance_size,
258
+ 'MarketType' => 'ON_DEMAND',
259
+ 'Name' => name || "awscli-emr-#{instance_role}-group",
260
+ }
261
+ end
262
+ end
263
+ instance_groups
264
+ end
265
+
266
+ def parse_boot_strap_actions(step)
267
+ #parse => name,bootstrap_action_path,bootstrap_action_args
268
+ name, path, *args = step.split(',')
269
+ if name.empty? or path.empty?
270
+ abort 'name and path are required'
271
+ end
272
+ boot_strap_actions = {
273
+ 'Name' => name,
274
+ 'ScriptBootstrapAction' => {
275
+ 'Args' => args || [],
276
+ 'Path' => path
277
+ }
278
+ }
279
+ boot_strap_actions
280
+ end
281
+
282
+ def parse_custom_jar(steps)
283
+ #parse jar_path(s3)*,name_of_step*,main_class,action_on_failure(TERMINATE_JOB_FLOW | CANCEL_AND_WAIT | CONTINUE),arg1=agr2=arg3,properties(k=v,k=v)
284
+ abort "invalid step pattern, expecting 'jar_path(s3)*,name_of_step*,main_class,action_on_failure,arg1=agr2=arg3,prop_k1=prop_v1,prop_k2=prop_v2)'" unless step =~ /(.*),(.*),(.*),(.*),(.*),(.*),(.*)/
285
+ jar, name, main_class, action_on_failure, extra_args, *job_conf = step.split(',')
286
+ if jar.empty? or name.empty?
287
+ abort 'jar and name are required for a step'
288
+ end
289
+ step_to_run = {
290
+ 'ActionOnFailure' => action_on_failure.empty? ? 'TERMINATE_JOB_FLOW' : action_on_failure,
291
+ 'Name' => name,
292
+ 'HadoopJarStep' => {
293
+ 'Jar' => jar,
294
+ 'Args' => extra_args.empty? ? [] : extra_args.split('='),
295
+ 'Properties' => []
296
+ }
297
+ }
298
+ #steps['HadoopJarStep']['Args'] + extra_args.split('=') unless extra_args
299
+ step_to_run['HadoopJarStep']['MainClass'] = main_class unless main_class.empty?
300
+ unless job_conf.empty?
301
+ job_conf.each do |kv_pair|
302
+ properties = {}
303
+ properties['Key'], properties['Value'] = kv_pair.split('=')
304
+ step_to_run['HadoopJarStep']['Properties'] << properties
305
+ end
306
+ end
307
+ step_to_run
308
+ end
309
+
310
+ def parse_hive_steps(step)
311
+ #parse script_path(s3)*,input_path(s3),output_path(s3),'-d','args1','-d','args2','-d','arg3'
312
+ path, input_path, output_path, *args = step.split(',')
313
+ abort 'path to the hive script is required' if path.empty?
314
+ hive_step = {
315
+ 'ActionOnFailure' => 'TERMINATE_JOB_FLOW',
316
+ 'Name' => 'awscli-emr-hive-step',
317
+ 'HadoopJarStep' => {
318
+ "Jar" => 's3://us-west-1.elasticmapreduce/libs/script-runner/script-runner.jar',
319
+ "Args" => [
320
+ 's3://us-west-1.elasticmapreduce/libs/hive/hive-script',
321
+ '--base-path',
322
+ 's3://us-west-1.elasticmapreduce/libs/hive/',
323
+ '--run-hive-script',
324
+ '--args',
325
+ '-f',
326
+ path
327
+ ]
328
+ }
329
+ }
330
+ hive_step['HadoopJarStep']['Args'] << '-d' << "INPUT=#{input_path}" unless input_path.empty?
331
+ hive_step['HadoopJarStep']['Args'] << '-d' << "OUTPUT=#{output_path}" unless output_path.empty?
332
+ hive_step['HadoopJarStep']['Args'] += args unless args.empty?
333
+ hive_step
334
+ end
335
+
336
+ def parse_pig_steps(step, hadoop_version)
337
+ #parse script_path(s3)*,input_path(s3),output_path(s3),'-p','args1','-p','args2','-p','arg3'
338
+ path, input_path, output_path, *args = step.split(',')
339
+ abort 'path to the hive script is required' if path.empty?
340
+ pig_step = {
341
+ 'ActionOnFailure' => 'TERMINATE_JOB_FLOW',
342
+ 'Name' => 'awscli-emr-pig-step',
343
+ 'HadoopJarStep' => {
344
+ "Jar" => 's3://us-west-1.elasticmapreduce/libs/script-runner/script-runner.jar',
345
+ "Args" => %w(s3://us-west-1.elasticmapreduce/libs/pig/pig-script --base-path s3://us-west-1.elasticmapreduce/libs/pig/ --run-pig-script --pig-versions latest --args)
346
+ }
347
+ }
348
+ pig_step['HadoopJarStep']['Args'] << '-p' << "INPUT=#{input_path}" unless input_path.empty?
349
+ pig_step['HadoopJarStep']['Args'] << '-p' << "OUTPUT=#{output_path}" unless output_path.empty?
350
+ pig_step['HadoopJarStep']['Args'] += args unless args.empty?
351
+ pig_step['HadoopJarStep']['Args'] << path
352
+ pig_step
353
+ end
354
+
355
+ def parse_streaming_steps(step)
356
+ #parse input*:output*:mapper*:reducer*:extra_arg1:extra_arg2
357
+ input, output, mapper, reducer, *args = step.split(',')
358
+ #input, output, mapper, reducer, args, *job_conf = step.split(',')
359
+ if input.empty? or output.empty? or mapper.empty? or reducer.empty?
360
+ abort 'input, output, mapper and reducer are required'
361
+ end
362
+ streaming_step = {
363
+ 'ActionOnFailure' => 'TERMINATE_JOB_FLOW',
364
+ 'Name' => 'awscli-emr-streaming-step',
365
+ 'HadoopJarStep' => {
366
+ "Jar" => '/home/hadoop/contrib/streaming/hadoop-streaming.jar',
367
+ "Args" => [
368
+ '-input', input,
369
+ '-output', output,
370
+ '-mapper', mapper,
371
+ '-reducer', reducer
372
+ ]
373
+ }
374
+ }
375
+ streaming_step['HadoopJarStep']['Args'] + args unless args.empty?
376
+ #TODO: Add -jobconf params as k=v,k=v,k=v
377
+ #streaming_step['HadoopJarStep']['Args'] << '-job_conf' + job_conf if job_conf.empty?
378
+ streaming_step
379
+ end
380
+
381
+ def hive_install(hadoop_version)
382
+ {
383
+ 'ActionOnFailure' => 'TERMINATE_JOB_FLOW',
384
+ 'Name' => 'awscli-emr-hive-setup',
385
+ 'HadoopJarStep' => {
386
+ 'Args' => ['s3://us-east-1.elasticmapreduce/libs/hive/hive-script',
387
+ '--base-path',
388
+ 's3://us-east-1.elasticmapreduce/libs/hive/',
389
+ '--install-hive',
390
+ '--hive-versions',
391
+ Awscli::EMR::HADOOP_HIVE_COMPATIBILITY[hadoop_version]
392
+ ],
393
+ 'Jar' => 's3://us-east-1.elasticmapreduce/libs/script-runner/script-runner.jar'
394
+ }
395
+ }
396
+ end
397
+
398
+ def pig_install
399
+ {
400
+ 'ActionOnFailure' => 'TERMINATE_JOB_FLOW',
401
+ 'Name' => 'awscli-emr-pig-setup',
402
+ 'HadoopJarStep' => {
403
+ 'Args' => %w(s3://us-east-1.elasticmapreduce/libs/pig/pig-script --base-path s3://us-east-1.elasticmapreduce/libs/pig/ --install-pig --pig-versions latest),
404
+ 'Jar' => 's3://us-east-1.elasticmapreduce/libs/script-runner/script-runner.jar'
405
+ }
406
+ }
407
+ end
408
+
409
+ def hbase_install_boot_strap
410
+ {
411
+ 'Name' => 'awscli-emr-install-hbase',
412
+ 'ScriptBootstrapAction' => {
413
+ 'Args' => [],
414
+ 'Path' => 's3://us-west-1.elasticmapreduce/bootstrap-actions/setup-hbase'
415
+ }
416
+ }
417
+ end
418
+
419
+ def hbase_install_steps
420
+ {
421
+ 'ActionOnFailure' => 'CANCEL_AND_WAIT',
422
+ 'Name' => 'awscli-emr-start-hbase',
423
+ 'HadoopJarStep' => {
424
+ 'Jar' => '/home/hadoop/lib/hbase-0.92.0.jar',
425
+ 'Args' => %w(emr.hbase.backup.Main --start-master)
426
+ }
427
+ }
428
+ end
429
+
430
+ def parse_hbase_backup(backup_step, consistent=false)
431
+ #parse frequency*,frequency_unit*(Days|Hrs|Mins),path(s3)*,start_time*(now|iso-format)
432
+ frequency, frequency_unit, path, start_time = backup_step.split(',')
433
+ abort 'Invalid backup step pattern, expecting frequency,frequency_unit(days|hrs|mins),path(s3),start_time(now|iso-format)' unless backup_step =~ /(.*),(.*),(.*),(.*)/
434
+ if frequency.empty? or frequency_unit.empty? or path.empty? or start_time.empty?
435
+ abort 'frequency, frequency_unit, path, start_time are required to perform a backup'
436
+ end
437
+ abort "Invalid frequency unit : #{frequency_unit}" unless %w(days hrs mins).include?(frequency_unit)
438
+ hbase_backup_step = {
439
+ 'Name' => 'awscli-emr-schedule-hbase-backup',
440
+ 'ActionOnFailure' => 'CANCEL_AND_WAIT',
441
+ 'HadoopJarStep' => {
442
+ 'Jar' => '/home/hadoop/lib/hbase-0.92.0.jar',
443
+ 'Args' => ['emr.hbase.backup.Main', '--backup-dir', path, '--set-scheduled-backup', true, '--full-backup-time-interval',
444
+ frequency, '--incremental-backup-time-unit', frequency_unit, '--start-time', start_time]
445
+ }
446
+ }
447
+ hbase_backup_step['HadoopJarStep']['Args'] << '--consistent' if consistent
448
+ hbase_backup_step
449
+ end
450
+
451
+ def parse_hbase_restore(restore_step)
452
+ #parse path(s3)*,version
453
+ path, version = restore_step.split(',')
454
+ if path.empty?
455
+ abort 'path is required'
456
+ end
457
+ hbase_restore_step = {
458
+ 'Name' => 'awscli-emr-restore-hbase-backup',
459
+ 'ActionOnFailure' => 'CANCEL_AND_WAIT',
460
+ 'HadoopJarStep' => {
461
+ 'Jar' => '/home/hadoop/lib/hbase-0.92.0.jar',
462
+ 'Args' => ['emr.hbase.backup.Main', '--restore', '--backup-dir', path]
463
+ }
464
+ }
465
+ if defined?(version).nil?
466
+ hbase_restore_step['HadoopJarStep']['Args'] << '--backup-version' << version unless version.empty?
467
+ end
468
+ hbase_restore_step
469
+ end
470
+ end
471
+ end
472
+ end