awscli 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,472 @@
1
+ module Awscli
2
+ module Emr
3
+ class EMR
4
+ def initialize(connection)
5
+ @conn = connection
6
+ end
7
+
8
+ def list options
9
+ validate_job_ids options[:job_flow_ids] if options[:job_flow_ids]
10
+ opts = Marshal.load(Marshal.dump(options))
11
+ opts.reject! { |k| k == 'table' } if options[:table]
12
+ if job_flow_ids = opts.delete(:job_flow_ids)
13
+ opts.merge!('JobFlowIds' => job_flow_ids)
14
+ end
15
+ if job_flow_status = opts.delete(:job_flow_status)
16
+ opts.merge!('JobFlowStates' => job_flow_status)
17
+ end
18
+ if options[:table]
19
+ puts 'For detailed information, dont pass --table option'
20
+ job_flows = @conn.describe_job_flows(opts).body['JobFlows']
21
+ table_data = Array.new
22
+ unless job_flows.empty?
23
+ job_flows.each do |job_flow|
24
+ table_data << {
25
+ :job_flow_id => job_flow['JobFlowId'],
26
+ :name => job_flow['Name'],
27
+ :instance_count => job_flow['Instances']['InstanceCount'],
28
+ :master_dns => job_flow['Instances']['MasterPublicDnsName'],
29
+ :ec2_key_name => job_flow['Instances']['Ec2KeyName'],
30
+ :state => job_flow['ExecutionStatusDetail']['State']
31
+ }
32
+ end
33
+ end
34
+ Formatador.display_table(table_data, [:job_flow_id, :name, :state, :instance_count, :master_dns, :ec2_key_name])
35
+ else
36
+ puts 'For less information, pass --table option'
37
+ puts @conn.describe_job_flows(opts).body['JobFlows'].to_yaml
38
+ end
39
+ end
40
+
41
+ def create_job_flow(options)
42
+ # => BOOTSTRAP ACTIONS
43
+ boot_strap_actions = []
44
+ if options[:bootstrap_actions]
45
+ options[:bootstrap_actions].each do |step|
46
+ boot_strap_actions << parse_boot_strap_actions(step)
47
+ end
48
+ end
49
+
50
+ # => STEPS
51
+ steps = []
52
+ if options[:custom_jar_steps]
53
+ options[:custom_jar_steps].each do |step|
54
+ steps << parse_custom_jar(step)
55
+ end
56
+ end
57
+ if options[:hive_interactive]
58
+ steps << hive_install(options[:hadoop_version])
59
+ end
60
+ if options[:pig_interactive]
61
+ steps << pig_install
62
+ end
63
+ if options[:hive_steps]
64
+ steps << hive_install(options[:hadoop_version]) unless options[:hive_interactive]
65
+ options[:hive_steps].each do |step|
66
+ steps << parse_hive_steps(step)
67
+ end
68
+ end
69
+ if options[:pig_steps]
70
+ steps << pig_install unless options[:pig_interactive]
71
+ options[:pig_steps].each do |step|
72
+ steps << parse_pig_steps(step, options[:hadoop_version])
73
+ end
74
+ end
75
+ if options[:streaming_steps]
76
+ options[:streaming_steps].each do |step|
77
+ steps << parse_streaming_steps(step)
78
+ end
79
+ end
80
+ if options[:hbase_install]
81
+ boot_strap_actions << hbase_install_boot_strap
82
+ steps << hbase_install_steps
83
+ #validate hadoop version and instance size
84
+ abort "Invalid hadoop version #{options[:hadoop_version]}, supported Hadoop Versions for HBase are: #{Awscli::EMR::HBASE_SUPPORTED_HADOOP_VERSIONS.join(',')}" unless Awscli::EMR::HBASE_SUPPORTED_HADOOP_VERSIONS.include?(options[:hadoop_version])
85
+ options[:instance_groups] && parse_instance_groups(options[:instance_groups]).each do |group|
86
+ unless is_valid_instance_type?(group['InstanceType'])
87
+ abort "Instance type #{group['InstanceType']} is not compatible with HBase, instance size should be equal or greater than m1.large"
88
+ end
89
+ end
90
+ if options[:master_instance_type]
91
+ unless is_valid_instance_type?(options[:master_instance_type])
92
+ abort "Instance type #{options[:master_instance_type]} is not compatible with HBase, instance size should be equal or greater than m1.large"
93
+ end
94
+ end
95
+ if options[:slave_instance_type]
96
+ unless is_valid_instance_type?(options[:slave_instance_type])
97
+ abort "Instance type #{options[:slave_instance_type]} is not compatible with HBase, instance size should be equal or greater than m1.large"
98
+ end
99
+ end
100
+ # => HBase backups
101
+ if options[:hbase_backup_schedule]
102
+ # Backup
103
+ if options[:hbase_consistent_backup]
104
+ steps << parse_hbase_backup(options[:hbase_backup_schedule], true)
105
+ else
106
+ steps << parse_hbase_backup(options[:hbase_backup_schedule])
107
+ end
108
+ elsif options[:hbase_backup_restore]
109
+ # Restore
110
+ steps << parse_hbase_restore(options[:hbase_backup_restore])
111
+ end
112
+ end
113
+
114
+ # => INSTANCES
115
+ instances = Hash.new
116
+ instances['HadoopVersion'] = options[:hadoop_version]
117
+ if options[:hive_interactive] or options[:pig_interactive] or options[:hbase_install] #then job flow should not be terminated
118
+ instances['KeepJobFlowAliveWhenNoSteps'] = true
119
+ else
120
+ instances['KeepJobFlowAliveWhenNoSteps'] = options[:alive]
121
+ end
122
+ instances['Ec2KeyName'] = options[:instance_ec2_key_name] if options[:instance_ec2_key_name]
123
+ instances['InstanceCount'] = options[:instance_count] if options[:instance_count]
124
+ instances['MasterInstanceType'] = options[:master_instance_type] if options[:master_instance_type]
125
+ instances['SlaveInstanceType'] = options[:slave_instance_type] if options[:slave_instance_type]
126
+ instances['TerminationProtected'] = options[:termination_protection] if options[:termination_protection]
127
+ # => Instance Groups
128
+ instances['InstanceGroups'] = parse_instance_groups(options[:instance_groups]) if options[:instance_groups]
129
+
130
+ # => Build final request
131
+ job_flow = Hash.new
132
+ job_flow['AmiVersion'] = Awscli::EMR::HADOOP_AMI_MAPPING[options[:hadoop_version]]
133
+ job_flow['LogUri'] = options[:log_uri] if options[:log_uri]
134
+ job_flow['BootstrapActions'] = boot_strap_actions if options[:bootstrap_actions] or options[:hbase_install]
135
+ job_flow['Instances'] = instances
136
+ job_flow['Steps'] = steps
137
+ if options[:alive] or options[:hive_interactive] or options[:pig_interactive] or options[:hbase_install]
138
+ @conn.run_job_flow("#{options[:name]} (requires manual termination)", job_flow)
139
+ else
140
+ @conn.run_job_flow(options[:name], job_flow)
141
+ end
142
+ puts "Create JobFlow '#{options[:name]}' Successfully!"
143
+ end
144
+
145
+ def add_instance_group(options)
146
+ opts = Marshal.load(Marshal.dump(options))
147
+ opts.reject! { |key| key == 'job_flow_id' }
148
+ opts.reject! { |key| key == 'region' }
149
+ abort 'invalid job id' unless @conn.describe_job_flows.body['JobFlows'].map { |job| job['JobFlowId'] }.include?(options[:job_flow_id])
150
+ abort 'invalid instance type' unless Awscli::Instances::INSTANCE_SIZES.include?(options[:instance_type])
151
+ if instance_count = opts.delete(:instance_count)
152
+ opts.merge!('InstanceCount' => instance_count)
153
+ end
154
+ if instance_type = opts.delete(:instance_type)
155
+ opts.merge!('InstanceType' => instance_type)
156
+ end
157
+ if instance_role = opts.delete(:instance_role)
158
+ opts.merge!('InstanceRole' => instance_role)
159
+ end
160
+ if name = opts.delete(:name)
161
+ opts.merge!('Name' => name)
162
+ end
163
+ if bid_price = opts.delete(:bid_price)
164
+ opts.merge!('BidPrice' => bid_price)
165
+ opts.merge!('MarketType' => 'SPOT')
166
+ else
167
+ opts.merge!('MarketType' => 'ON_DEMAND')
168
+ end
169
+ (instance_groups ||= []) << opts
170
+ @conn.add_instance_groups(options[:job_flow_id], 'InstanceGroups' => instance_groups)
171
+ puts "Added instance group to job flow(with id): #{options[:job_flow_id]}"
172
+ end
173
+
174
+ def add_steps(job_flow_id, job_steps)
175
+ validate_job_ids job_flow_id
176
+ @conn.add_job_flow_steps(job_flow_id, 'Steps' => parse_custom_jar(job_steps))
177
+ puts "Added step to job flow id: #{job_flow_id}"
178
+ end
179
+
180
+ def modify_instance_group(options)
181
+ abort "Invalid instance group id: #{options[:instance_group_id]}" unless validate_instance_group_id?(options[:instance_group_id])
182
+ @conn.modify_instance_groups(
183
+ 'InstanceGroups' => [
184
+ 'InstanceCount' => options[:instance_count],
185
+ 'InstanceGroupId' => options[:instance_group_id]
186
+ ]
187
+ )
188
+ rescue Excon::Errors::BadRequest
189
+ puts "[Error]: #{$!}"
190
+ else
191
+ puts "Modified instance group #{options[:instance_group_id]} size to #{options[:instance_count]}"
192
+ end
193
+
194
+ def set_termination_protection(job_flow_ids, terminate_protection)
195
+ validate_job_ids job_flow_ids
196
+ @conn.set_termination_protection(
197
+ terminate_protection,
198
+ {
199
+ 'JobFlowIds' => job_flow_ids
200
+ }
201
+ )
202
+ terminate_protection ?
203
+ puts("Termination protection flag added to job_flows: #{job_flow_ids.join(',')}") :
204
+ puts("Termination protection flag removed from job_flows: #{job_flow_ids.join(',')}")
205
+ end
206
+
207
+ def add_instance_groups(job_flow_id, groups)
208
+ validate_job_ids job_flow_id
209
+ instance_groups = parse_instance_groups(groups)
210
+ @conn.add_instance_groups(job_flow_id, 'InstanceGroups' => instance_groups)
211
+ end
212
+
213
+ def delete(job_ids)
214
+ validate_job_ids job_ids
215
+ @conn.terminate_job_flows('JobFlowIds' => job_ids)
216
+ puts "Terminated Job Flows: #{job_ids.join(',')}"
217
+ end
218
+
219
+ private
220
+
221
+ def validate_job_ids(job_ids)
222
+ available_job_ids = @conn.describe_job_flows.body['JobFlows'].map { |job| job['JobFlowId'] }
223
+ abort 'invalid job id\'s' unless available_job_ids.each_cons(job_ids.size).include? job_ids
224
+ end
225
+
226
+ def validate_instance_group_id?(group_id)
227
+ @conn.describe_job_flows.body['JobFlows'].map { |j| j['Instances']['InstanceGroups'].map {|g| g['InstanceGroupId']} }.flatten.include?(group_id)
228
+ end
229
+
230
+ def is_valid_instance_type?(instance_type)
231
+ ! Awscli::EMR::HBASE_INVALID_INSTANCES.member?(instance_type)
232
+ end
233
+
234
+ def parse_instance_groups(groups)
235
+ #parse instance_groups => instance_count,instance_role(MASTER | CORE | TASK),instance_type,name,bid_price
236
+ instance_groups = []
237
+ groups.each do |group|
238
+ instance_count, instance_role, instance_size, name, bid_price = ig.split(',')
239
+ if instance_count.empty? or instance_role.empty? or instance_size.empty?
240
+ abort 'instance_count, instance_role and instance_size are required'
241
+ end
242
+ abort "Invalid instance role: #{instance_role}" unless %w(MASTER CORE TASK).include?(instance_role.upcase)
243
+ abort "Invalid instance type: #{instance_size}" unless Awscli::Instances::INSTANCE_SIZES.include?(instance_size)
244
+ if bid_price
245
+ instance_groups << {
246
+ 'BidPrice' => bid_price,
247
+ 'InstanceCount' => instance_count.to_i,
248
+ 'InstanceRole' => instance_role,
249
+ 'InstanceType' => instance_size,
250
+ 'MarketType' => 'SPOT',
251
+ 'Name' => name || "awscli-emr-#{instance_role}-group",
252
+ }
253
+ else
254
+ instance_groups << {
255
+ 'InstanceCount' => instance_count.to_i,
256
+ 'InstanceRole' => instance_role,
257
+ 'InstanceType' => instance_size,
258
+ 'MarketType' => 'ON_DEMAND',
259
+ 'Name' => name || "awscli-emr-#{instance_role}-group",
260
+ }
261
+ end
262
+ end
263
+ instance_groups
264
+ end
265
+
266
+ def parse_boot_strap_actions(step)
267
+ #parse => name,bootstrap_action_path,bootstrap_action_args
268
+ name, path, *args = step.split(',')
269
+ if name.empty? or path.empty?
270
+ abort 'name and path are required'
271
+ end
272
+ boot_strap_actions = {
273
+ 'Name' => name,
274
+ 'ScriptBootstrapAction' => {
275
+ 'Args' => args || [],
276
+ 'Path' => path
277
+ }
278
+ }
279
+ boot_strap_actions
280
+ end
281
+
282
+ def parse_custom_jar(steps)
283
+ #parse jar_path(s3)*,name_of_step*,main_class,action_on_failure(TERMINATE_JOB_FLOW | CANCEL_AND_WAIT | CONTINUE),arg1=agr2=arg3,properties(k=v,k=v)
284
+ abort "invalid step pattern, expecting 'jar_path(s3)*,name_of_step*,main_class,action_on_failure,arg1=agr2=arg3,prop_k1=prop_v1,prop_k2=prop_v2)'" unless step =~ /(.*),(.*),(.*),(.*),(.*),(.*),(.*)/
285
+ jar, name, main_class, action_on_failure, extra_args, *job_conf = step.split(',')
286
+ if jar.empty? or name.empty?
287
+ abort 'jar and name are required for a step'
288
+ end
289
+ step_to_run = {
290
+ 'ActionOnFailure' => action_on_failure.empty? ? 'TERMINATE_JOB_FLOW' : action_on_failure,
291
+ 'Name' => name,
292
+ 'HadoopJarStep' => {
293
+ 'Jar' => jar,
294
+ 'Args' => extra_args.empty? ? [] : extra_args.split('='),
295
+ 'Properties' => []
296
+ }
297
+ }
298
+ #steps['HadoopJarStep']['Args'] + extra_args.split('=') unless extra_args
299
+ step_to_run['HadoopJarStep']['MainClass'] = main_class unless main_class.empty?
300
+ unless job_conf.empty?
301
+ job_conf.each do |kv_pair|
302
+ properties = {}
303
+ properties['Key'], properties['Value'] = kv_pair.split('=')
304
+ step_to_run['HadoopJarStep']['Properties'] << properties
305
+ end
306
+ end
307
+ step_to_run
308
+ end
309
+
310
+ def parse_hive_steps(step)
311
+ #parse script_path(s3)*,input_path(s3),output_path(s3),'-d','args1','-d','args2','-d','arg3'
312
+ path, input_path, output_path, *args = step.split(',')
313
+ abort 'path to the hive script is required' if path.empty?
314
+ hive_step = {
315
+ 'ActionOnFailure' => 'TERMINATE_JOB_FLOW',
316
+ 'Name' => 'awscli-emr-hive-step',
317
+ 'HadoopJarStep' => {
318
+ "Jar" => 's3://us-west-1.elasticmapreduce/libs/script-runner/script-runner.jar',
319
+ "Args" => [
320
+ 's3://us-west-1.elasticmapreduce/libs/hive/hive-script',
321
+ '--base-path',
322
+ 's3://us-west-1.elasticmapreduce/libs/hive/',
323
+ '--run-hive-script',
324
+ '--args',
325
+ '-f',
326
+ path
327
+ ]
328
+ }
329
+ }
330
+ hive_step['HadoopJarStep']['Args'] << '-d' << "INPUT=#{input_path}" unless input_path.empty?
331
+ hive_step['HadoopJarStep']['Args'] << '-d' << "OUTPUT=#{output_path}" unless output_path.empty?
332
+ hive_step['HadoopJarStep']['Args'] += args unless args.empty?
333
+ hive_step
334
+ end
335
+
336
+ def parse_pig_steps(step, hadoop_version)
337
+ #parse script_path(s3)*,input_path(s3),output_path(s3),'-p','args1','-p','args2','-p','arg3'
338
+ path, input_path, output_path, *args = step.split(',')
339
+ abort 'path to the hive script is required' if path.empty?
340
+ pig_step = {
341
+ 'ActionOnFailure' => 'TERMINATE_JOB_FLOW',
342
+ 'Name' => 'awscli-emr-pig-step',
343
+ 'HadoopJarStep' => {
344
+ "Jar" => 's3://us-west-1.elasticmapreduce/libs/script-runner/script-runner.jar',
345
+ "Args" => %w(s3://us-west-1.elasticmapreduce/libs/pig/pig-script --base-path s3://us-west-1.elasticmapreduce/libs/pig/ --run-pig-script --pig-versions latest --args)
346
+ }
347
+ }
348
+ pig_step['HadoopJarStep']['Args'] << '-p' << "INPUT=#{input_path}" unless input_path.empty?
349
+ pig_step['HadoopJarStep']['Args'] << '-p' << "OUTPUT=#{output_path}" unless output_path.empty?
350
+ pig_step['HadoopJarStep']['Args'] += args unless args.empty?
351
+ pig_step['HadoopJarStep']['Args'] << path
352
+ pig_step
353
+ end
354
+
355
+ def parse_streaming_steps(step)
356
+ #parse input*:output*:mapper*:reducer*:extra_arg1:extra_arg2
357
+ input, output, mapper, reducer, *args = step.split(',')
358
+ #input, output, mapper, reducer, args, *job_conf = step.split(',')
359
+ if input.empty? or output.empty? or mapper.empty? or reducer.empty?
360
+ abort 'input, output, mapper and reducer are required'
361
+ end
362
+ streaming_step = {
363
+ 'ActionOnFailure' => 'TERMINATE_JOB_FLOW',
364
+ 'Name' => 'awscli-emr-streaming-step',
365
+ 'HadoopJarStep' => {
366
+ "Jar" => '/home/hadoop/contrib/streaming/hadoop-streaming.jar',
367
+ "Args" => [
368
+ '-input', input,
369
+ '-output', output,
370
+ '-mapper', mapper,
371
+ '-reducer', reducer
372
+ ]
373
+ }
374
+ }
375
+ streaming_step['HadoopJarStep']['Args'] + args unless args.empty?
376
+ #TODO: Add -jobconf params as k=v,k=v,k=v
377
+ #streaming_step['HadoopJarStep']['Args'] << '-job_conf' + job_conf if job_conf.empty?
378
+ streaming_step
379
+ end
380
+
381
+ def hive_install(hadoop_version)
382
+ {
383
+ 'ActionOnFailure' => 'TERMINATE_JOB_FLOW',
384
+ 'Name' => 'awscli-emr-hive-setup',
385
+ 'HadoopJarStep' => {
386
+ 'Args' => ['s3://us-east-1.elasticmapreduce/libs/hive/hive-script',
387
+ '--base-path',
388
+ 's3://us-east-1.elasticmapreduce/libs/hive/',
389
+ '--install-hive',
390
+ '--hive-versions',
391
+ Awscli::EMR::HADOOP_HIVE_COMPATIBILITY[hadoop_version]
392
+ ],
393
+ 'Jar' => 's3://us-east-1.elasticmapreduce/libs/script-runner/script-runner.jar'
394
+ }
395
+ }
396
+ end
397
+
398
+ def pig_install
399
+ {
400
+ 'ActionOnFailure' => 'TERMINATE_JOB_FLOW',
401
+ 'Name' => 'awscli-emr-pig-setup',
402
+ 'HadoopJarStep' => {
403
+ 'Args' => %w(s3://us-east-1.elasticmapreduce/libs/pig/pig-script --base-path s3://us-east-1.elasticmapreduce/libs/pig/ --install-pig --pig-versions latest),
404
+ 'Jar' => 's3://us-east-1.elasticmapreduce/libs/script-runner/script-runner.jar'
405
+ }
406
+ }
407
+ end
408
+
409
+ def hbase_install_boot_strap
410
+ {
411
+ 'Name' => 'awscli-emr-install-hbase',
412
+ 'ScriptBootstrapAction' => {
413
+ 'Args' => [],
414
+ 'Path' => 's3://us-west-1.elasticmapreduce/bootstrap-actions/setup-hbase'
415
+ }
416
+ }
417
+ end
418
+
419
+ def hbase_install_steps
420
+ {
421
+ 'ActionOnFailure' => 'CANCEL_AND_WAIT',
422
+ 'Name' => 'awscli-emr-start-hbase',
423
+ 'HadoopJarStep' => {
424
+ 'Jar' => '/home/hadoop/lib/hbase-0.92.0.jar',
425
+ 'Args' => %w(emr.hbase.backup.Main --start-master)
426
+ }
427
+ }
428
+ end
429
+
430
+ def parse_hbase_backup(backup_step, consistent=false)
431
+ #parse frequency*,frequency_unit*(Days|Hrs|Mins),path(s3)*,start_time*(now|iso-format)
432
+ frequency, frequency_unit, path, start_time = backup_step.split(',')
433
+ abort 'Invalid backup step pattern, expecting frequency,frequency_unit(days|hrs|mins),path(s3),start_time(now|iso-format)' unless backup_step =~ /(.*),(.*),(.*),(.*)/
434
+ if frequency.empty? or frequency_unit.empty? or path.empty? or start_time.empty?
435
+ abort 'frequency, frequency_unit, path, start_time are required to perform a backup'
436
+ end
437
+ abort "Invalid frequency unit : #{frequency_unit}" unless %w(days hrs mins).include?(frequency_unit)
438
+ hbase_backup_step = {
439
+ 'Name' => 'awscli-emr-schedule-hbase-backup',
440
+ 'ActionOnFailure' => 'CANCEL_AND_WAIT',
441
+ 'HadoopJarStep' => {
442
+ 'Jar' => '/home/hadoop/lib/hbase-0.92.0.jar',
443
+ 'Args' => ['emr.hbase.backup.Main', '--backup-dir', path, '--set-scheduled-backup', true, '--full-backup-time-interval',
444
+ frequency, '--incremental-backup-time-unit', frequency_unit, '--start-time', start_time]
445
+ }
446
+ }
447
+ hbase_backup_step['HadoopJarStep']['Args'] << '--consistent' if consistent
448
+ hbase_backup_step
449
+ end
450
+
451
+ def parse_hbase_restore(restore_step)
452
+ #parse path(s3)*,version
453
+ path, version = restore_step.split(',')
454
+ if path.empty?
455
+ abort 'path is required'
456
+ end
457
+ hbase_restore_step = {
458
+ 'Name' => 'awscli-emr-restore-hbase-backup',
459
+ 'ActionOnFailure' => 'CANCEL_AND_WAIT',
460
+ 'HadoopJarStep' => {
461
+ 'Jar' => '/home/hadoop/lib/hbase-0.92.0.jar',
462
+ 'Args' => ['emr.hbase.backup.Main', '--restore', '--backup-dir', path]
463
+ }
464
+ }
465
+ if defined?(version).nil?
466
+ hbase_restore_step['HadoopJarStep']['Args'] << '--backup-version' << version unless version.empty?
467
+ end
468
+ hbase_restore_step
469
+ end
470
+ end
471
+ end
472
+ end