awscli 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/bin/awscli +7 -7
- data/lib/awscli.rb +3 -1
- data/lib/awscli/as.rb +56 -56
- data/lib/awscli/cli.rb +1 -1
- data/lib/awscli/cli/UsageExamples/emr +49 -0
- data/lib/awscli/cli/emr.rb +157 -0
- data/lib/awscli/cli/iam/user.rb +18 -7
- data/lib/awscli/cli/s3/files.rb +43 -37
- data/lib/awscli/connection.rb +34 -31
- data/lib/awscli/ec2.rb +234 -234
- data/lib/awscli/emr.rb +472 -0
- data/lib/awscli/errors.rb +1 -1
- data/lib/awscli/helper.rb +17 -0
- data/lib/awscli/iam.rb +281 -218
- data/lib/awscli/s3.rb +51 -39
- data/lib/awscli/version.rb +1 -1
- metadata +5 -2
data/lib/awscli/emr.rb
ADDED
@@ -0,0 +1,472 @@
|
|
1
|
+
module Awscli
|
2
|
+
module Emr
|
3
|
+
class EMR
|
4
|
+
def initialize(connection)
|
5
|
+
@conn = connection
|
6
|
+
end
|
7
|
+
|
8
|
+
def list options
|
9
|
+
validate_job_ids options[:job_flow_ids] if options[:job_flow_ids]
|
10
|
+
opts = Marshal.load(Marshal.dump(options))
|
11
|
+
opts.reject! { |k| k == 'table' } if options[:table]
|
12
|
+
if job_flow_ids = opts.delete(:job_flow_ids)
|
13
|
+
opts.merge!('JobFlowIds' => job_flow_ids)
|
14
|
+
end
|
15
|
+
if job_flow_status = opts.delete(:job_flow_status)
|
16
|
+
opts.merge!('JobFlowStates' => job_flow_status)
|
17
|
+
end
|
18
|
+
if options[:table]
|
19
|
+
puts 'For detailed information, dont pass --table option'
|
20
|
+
job_flows = @conn.describe_job_flows(opts).body['JobFlows']
|
21
|
+
table_data = Array.new
|
22
|
+
unless job_flows.empty?
|
23
|
+
job_flows.each do |job_flow|
|
24
|
+
table_data << {
|
25
|
+
:job_flow_id => job_flow['JobFlowId'],
|
26
|
+
:name => job_flow['Name'],
|
27
|
+
:instance_count => job_flow['Instances']['InstanceCount'],
|
28
|
+
:master_dns => job_flow['Instances']['MasterPublicDnsName'],
|
29
|
+
:ec2_key_name => job_flow['Instances']['Ec2KeyName'],
|
30
|
+
:state => job_flow['ExecutionStatusDetail']['State']
|
31
|
+
}
|
32
|
+
end
|
33
|
+
end
|
34
|
+
Formatador.display_table(table_data, [:job_flow_id, :name, :state, :instance_count, :master_dns, :ec2_key_name])
|
35
|
+
else
|
36
|
+
puts 'For less information, pass --table option'
|
37
|
+
puts @conn.describe_job_flows(opts).body['JobFlows'].to_yaml
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def create_job_flow(options)
|
42
|
+
# => BOOTSTRAP ACTIONS
|
43
|
+
boot_strap_actions = []
|
44
|
+
if options[:bootstrap_actions]
|
45
|
+
options[:bootstrap_actions].each do |step|
|
46
|
+
boot_strap_actions << parse_boot_strap_actions(step)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# => STEPS
|
51
|
+
steps = []
|
52
|
+
if options[:custom_jar_steps]
|
53
|
+
options[:custom_jar_steps].each do |step|
|
54
|
+
steps << parse_custom_jar(step)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
if options[:hive_interactive]
|
58
|
+
steps << hive_install(options[:hadoop_version])
|
59
|
+
end
|
60
|
+
if options[:pig_interactive]
|
61
|
+
steps << pig_install
|
62
|
+
end
|
63
|
+
if options[:hive_steps]
|
64
|
+
steps << hive_install(options[:hadoop_version]) unless options[:hive_interactive]
|
65
|
+
options[:hive_steps].each do |step|
|
66
|
+
steps << parse_hive_steps(step)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
if options[:pig_steps]
|
70
|
+
steps << pig_install unless options[:pig_interactive]
|
71
|
+
options[:pig_steps].each do |step|
|
72
|
+
steps << parse_pig_steps(step, options[:hadoop_version])
|
73
|
+
end
|
74
|
+
end
|
75
|
+
if options[:streaming_steps]
|
76
|
+
options[:streaming_steps].each do |step|
|
77
|
+
steps << parse_streaming_steps(step)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
if options[:hbase_install]
|
81
|
+
boot_strap_actions << hbase_install_boot_strap
|
82
|
+
steps << hbase_install_steps
|
83
|
+
#validate hadoop version and instance size
|
84
|
+
abort "Invalid hadoop version #{options[:hadoop_version]}, supported Hadoop Versions for HBase are: #{Awscli::EMR::HBASE_SUPPORTED_HADOOP_VERSIONS.join(',')}" unless Awscli::EMR::HBASE_SUPPORTED_HADOOP_VERSIONS.include?(options[:hadoop_version])
|
85
|
+
options[:instance_groups] && parse_instance_groups(options[:instance_groups]).each do |group|
|
86
|
+
unless is_valid_instance_type?(group['InstanceType'])
|
87
|
+
abort "Instance type #{group['InstanceType']} is not compatible with HBase, instance size should be equal or greater than m1.large"
|
88
|
+
end
|
89
|
+
end
|
90
|
+
if options[:master_instance_type]
|
91
|
+
unless is_valid_instance_type?(options[:master_instance_type])
|
92
|
+
abort "Instance type #{options[:master_instance_type]} is not compatible with HBase, instance size should be equal or greater than m1.large"
|
93
|
+
end
|
94
|
+
end
|
95
|
+
if options[:slave_instance_type]
|
96
|
+
unless is_valid_instance_type?(options[:slave_instance_type])
|
97
|
+
abort "Instance type #{options[:slave_instance_type]} is not compatible with HBase, instance size should be equal or greater than m1.large"
|
98
|
+
end
|
99
|
+
end
|
100
|
+
# => HBase backups
|
101
|
+
if options[:hbase_backup_schedule]
|
102
|
+
# Backup
|
103
|
+
if options[:hbase_consistent_backup]
|
104
|
+
steps << parse_hbase_backup(options[:hbase_backup_schedule], true)
|
105
|
+
else
|
106
|
+
steps << parse_hbase_backup(options[:hbase_backup_schedule])
|
107
|
+
end
|
108
|
+
elsif options[:hbase_backup_restore]
|
109
|
+
# Restore
|
110
|
+
steps << parse_hbase_restore(options[:hbase_backup_restore])
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
# => INSTANCES
|
115
|
+
instances = Hash.new
|
116
|
+
instances['HadoopVersion'] = options[:hadoop_version]
|
117
|
+
if options[:hive_interactive] or options[:pig_interactive] or options[:hbase_install] #then job flow should not be terminated
|
118
|
+
instances['KeepJobFlowAliveWhenNoSteps'] = true
|
119
|
+
else
|
120
|
+
instances['KeepJobFlowAliveWhenNoSteps'] = options[:alive]
|
121
|
+
end
|
122
|
+
instances['Ec2KeyName'] = options[:instance_ec2_key_name] if options[:instance_ec2_key_name]
|
123
|
+
instances['InstanceCount'] = options[:instance_count] if options[:instance_count]
|
124
|
+
instances['MasterInstanceType'] = options[:master_instance_type] if options[:master_instance_type]
|
125
|
+
instances['SlaveInstanceType'] = options[:slave_instance_type] if options[:slave_instance_type]
|
126
|
+
instances['TerminationProtected'] = options[:termination_protection] if options[:termination_protection]
|
127
|
+
# => Instance Groups
|
128
|
+
instances['InstanceGroups'] = parse_instance_groups(options[:instance_groups]) if options[:instance_groups]
|
129
|
+
|
130
|
+
# => Build final request
|
131
|
+
job_flow = Hash.new
|
132
|
+
job_flow['AmiVersion'] = Awscli::EMR::HADOOP_AMI_MAPPING[options[:hadoop_version]]
|
133
|
+
job_flow['LogUri'] = options[:log_uri] if options[:log_uri]
|
134
|
+
job_flow['BootstrapActions'] = boot_strap_actions if options[:bootstrap_actions] or options[:hbase_install]
|
135
|
+
job_flow['Instances'] = instances
|
136
|
+
job_flow['Steps'] = steps
|
137
|
+
if options[:alive] or options[:hive_interactive] or options[:pig_interactive] or options[:hbase_install]
|
138
|
+
@conn.run_job_flow("#{options[:name]} (requires manual termination)", job_flow)
|
139
|
+
else
|
140
|
+
@conn.run_job_flow(options[:name], job_flow)
|
141
|
+
end
|
142
|
+
puts "Create JobFlow '#{options[:name]}' Successfully!"
|
143
|
+
end
|
144
|
+
|
145
|
+
def add_instance_group(options)
|
146
|
+
opts = Marshal.load(Marshal.dump(options))
|
147
|
+
opts.reject! { |key| key == 'job_flow_id' }
|
148
|
+
opts.reject! { |key| key == 'region' }
|
149
|
+
abort 'invalid job id' unless @conn.describe_job_flows.body['JobFlows'].map { |job| job['JobFlowId'] }.include?(options[:job_flow_id])
|
150
|
+
abort 'invalid instance type' unless Awscli::Instances::INSTANCE_SIZES.include?(options[:instance_type])
|
151
|
+
if instance_count = opts.delete(:instance_count)
|
152
|
+
opts.merge!('InstanceCount' => instance_count)
|
153
|
+
end
|
154
|
+
if instance_type = opts.delete(:instance_type)
|
155
|
+
opts.merge!('InstanceType' => instance_type)
|
156
|
+
end
|
157
|
+
if instance_role = opts.delete(:instance_role)
|
158
|
+
opts.merge!('InstanceRole' => instance_role)
|
159
|
+
end
|
160
|
+
if name = opts.delete(:name)
|
161
|
+
opts.merge!('Name' => name)
|
162
|
+
end
|
163
|
+
if bid_price = opts.delete(:bid_price)
|
164
|
+
opts.merge!('BidPrice' => bid_price)
|
165
|
+
opts.merge!('MarketType' => 'SPOT')
|
166
|
+
else
|
167
|
+
opts.merge!('MarketType' => 'ON_DEMAND')
|
168
|
+
end
|
169
|
+
(instance_groups ||= []) << opts
|
170
|
+
@conn.add_instance_groups(options[:job_flow_id], 'InstanceGroups' => instance_groups)
|
171
|
+
puts "Added instance group to job flow(with id): #{options[:job_flow_id]}"
|
172
|
+
end
|
173
|
+
|
174
|
+
def add_steps(job_flow_id, job_steps)
|
175
|
+
validate_job_ids job_flow_id
|
176
|
+
@conn.add_job_flow_steps(job_flow_id, 'Steps' => parse_custom_jar(job_steps))
|
177
|
+
puts "Added step to job flow id: #{job_flow_id}"
|
178
|
+
end
|
179
|
+
|
180
|
+
def modify_instance_group(options)
|
181
|
+
abort "Invalid instance group id: #{options[:instance_group_id]}" unless validate_instance_group_id?(options[:instance_group_id])
|
182
|
+
@conn.modify_instance_groups(
|
183
|
+
'InstanceGroups' => [
|
184
|
+
'InstanceCount' => options[:instance_count],
|
185
|
+
'InstanceGroupId' => options[:instance_group_id]
|
186
|
+
]
|
187
|
+
)
|
188
|
+
rescue Excon::Errors::BadRequest
|
189
|
+
puts "[Error]: #{$!}"
|
190
|
+
else
|
191
|
+
puts "Modified instance group #{options[:instance_group_id]} size to #{options[:instance_count]}"
|
192
|
+
end
|
193
|
+
|
194
|
+
def set_termination_protection(job_flow_ids, terminate_protection)
|
195
|
+
validate_job_ids job_flow_ids
|
196
|
+
@conn.set_termination_protection(
|
197
|
+
terminate_protection,
|
198
|
+
{
|
199
|
+
'JobFlowIds' => job_flow_ids
|
200
|
+
}
|
201
|
+
)
|
202
|
+
terminate_protection ?
|
203
|
+
puts("Termination protection flag added to job_flows: #{job_flow_ids.join(',')}") :
|
204
|
+
puts("Termination protection flag removed from job_flows: #{job_flow_ids.join(',')}")
|
205
|
+
end
|
206
|
+
|
207
|
+
def add_instance_groups(job_flow_id, groups)
|
208
|
+
validate_job_ids job_flow_id
|
209
|
+
instance_groups = parse_instance_groups(groups)
|
210
|
+
@conn.add_instance_groups(job_flow_id, 'InstanceGroups' => instance_groups)
|
211
|
+
end
|
212
|
+
|
213
|
+
def delete(job_ids)
|
214
|
+
validate_job_ids job_ids
|
215
|
+
@conn.terminate_job_flows('JobFlowIds' => job_ids)
|
216
|
+
puts "Terminated Job Flows: #{job_ids.join(',')}"
|
217
|
+
end
|
218
|
+
|
219
|
+
private
|
220
|
+
|
221
|
+
def validate_job_ids(job_ids)
|
222
|
+
available_job_ids = @conn.describe_job_flows.body['JobFlows'].map { |job| job['JobFlowId'] }
|
223
|
+
abort 'invalid job id\'s' unless available_job_ids.each_cons(job_ids.size).include? job_ids
|
224
|
+
end
|
225
|
+
|
226
|
+
def validate_instance_group_id?(group_id)
|
227
|
+
@conn.describe_job_flows.body['JobFlows'].map { |j| j['Instances']['InstanceGroups'].map {|g| g['InstanceGroupId']} }.flatten.include?(group_id)
|
228
|
+
end
|
229
|
+
|
230
|
+
def is_valid_instance_type?(instance_type)
|
231
|
+
! Awscli::EMR::HBASE_INVALID_INSTANCES.member?(instance_type)
|
232
|
+
end
|
233
|
+
|
234
|
+
def parse_instance_groups(groups)
|
235
|
+
#parse instance_groups => instance_count,instance_role(MASTER | CORE | TASK),instance_type,name,bid_price
|
236
|
+
instance_groups = []
|
237
|
+
groups.each do |group|
|
238
|
+
instance_count, instance_role, instance_size, name, bid_price = ig.split(',')
|
239
|
+
if instance_count.empty? or instance_role.empty? or instance_size.empty?
|
240
|
+
abort 'instance_count, instance_role and instance_size are required'
|
241
|
+
end
|
242
|
+
abort "Invalid instance role: #{instance_role}" unless %w(MASTER CORE TASK).include?(instance_role.upcase)
|
243
|
+
abort "Invalid instance type: #{instance_size}" unless Awscli::Instances::INSTANCE_SIZES.include?(instance_size)
|
244
|
+
if bid_price
|
245
|
+
instance_groups << {
|
246
|
+
'BidPrice' => bid_price,
|
247
|
+
'InstanceCount' => instance_count.to_i,
|
248
|
+
'InstanceRole' => instance_role,
|
249
|
+
'InstanceType' => instance_size,
|
250
|
+
'MarketType' => 'SPOT',
|
251
|
+
'Name' => name || "awscli-emr-#{instance_role}-group",
|
252
|
+
}
|
253
|
+
else
|
254
|
+
instance_groups << {
|
255
|
+
'InstanceCount' => instance_count.to_i,
|
256
|
+
'InstanceRole' => instance_role,
|
257
|
+
'InstanceType' => instance_size,
|
258
|
+
'MarketType' => 'ON_DEMAND',
|
259
|
+
'Name' => name || "awscli-emr-#{instance_role}-group",
|
260
|
+
}
|
261
|
+
end
|
262
|
+
end
|
263
|
+
instance_groups
|
264
|
+
end
|
265
|
+
|
266
|
+
def parse_boot_strap_actions(step)
|
267
|
+
#parse => name,bootstrap_action_path,bootstrap_action_args
|
268
|
+
name, path, *args = step.split(',')
|
269
|
+
if name.empty? or path.empty?
|
270
|
+
abort 'name and path are required'
|
271
|
+
end
|
272
|
+
boot_strap_actions = {
|
273
|
+
'Name' => name,
|
274
|
+
'ScriptBootstrapAction' => {
|
275
|
+
'Args' => args || [],
|
276
|
+
'Path' => path
|
277
|
+
}
|
278
|
+
}
|
279
|
+
boot_strap_actions
|
280
|
+
end
|
281
|
+
|
282
|
+
def parse_custom_jar(steps)
|
283
|
+
#parse jar_path(s3)*,name_of_step*,main_class,action_on_failure(TERMINATE_JOB_FLOW | CANCEL_AND_WAIT | CONTINUE),arg1=agr2=arg3,properties(k=v,k=v)
|
284
|
+
abort "invalid step pattern, expecting 'jar_path(s3)*,name_of_step*,main_class,action_on_failure,arg1=agr2=arg3,prop_k1=prop_v1,prop_k2=prop_v2)'" unless step =~ /(.*),(.*),(.*),(.*),(.*),(.*),(.*)/
|
285
|
+
jar, name, main_class, action_on_failure, extra_args, *job_conf = step.split(',')
|
286
|
+
if jar.empty? or name.empty?
|
287
|
+
abort 'jar and name are required for a step'
|
288
|
+
end
|
289
|
+
step_to_run = {
|
290
|
+
'ActionOnFailure' => action_on_failure.empty? ? 'TERMINATE_JOB_FLOW' : action_on_failure,
|
291
|
+
'Name' => name,
|
292
|
+
'HadoopJarStep' => {
|
293
|
+
'Jar' => jar,
|
294
|
+
'Args' => extra_args.empty? ? [] : extra_args.split('='),
|
295
|
+
'Properties' => []
|
296
|
+
}
|
297
|
+
}
|
298
|
+
#steps['HadoopJarStep']['Args'] + extra_args.split('=') unless extra_args
|
299
|
+
step_to_run['HadoopJarStep']['MainClass'] = main_class unless main_class.empty?
|
300
|
+
unless job_conf.empty?
|
301
|
+
job_conf.each do |kv_pair|
|
302
|
+
properties = {}
|
303
|
+
properties['Key'], properties['Value'] = kv_pair.split('=')
|
304
|
+
step_to_run['HadoopJarStep']['Properties'] << properties
|
305
|
+
end
|
306
|
+
end
|
307
|
+
step_to_run
|
308
|
+
end
|
309
|
+
|
310
|
+
def parse_hive_steps(step)
|
311
|
+
#parse script_path(s3)*,input_path(s3),output_path(s3),'-d','args1','-d','args2','-d','arg3'
|
312
|
+
path, input_path, output_path, *args = step.split(',')
|
313
|
+
abort 'path to the hive script is required' if path.empty?
|
314
|
+
hive_step = {
|
315
|
+
'ActionOnFailure' => 'TERMINATE_JOB_FLOW',
|
316
|
+
'Name' => 'awscli-emr-hive-step',
|
317
|
+
'HadoopJarStep' => {
|
318
|
+
"Jar" => 's3://us-west-1.elasticmapreduce/libs/script-runner/script-runner.jar',
|
319
|
+
"Args" => [
|
320
|
+
's3://us-west-1.elasticmapreduce/libs/hive/hive-script',
|
321
|
+
'--base-path',
|
322
|
+
's3://us-west-1.elasticmapreduce/libs/hive/',
|
323
|
+
'--run-hive-script',
|
324
|
+
'--args',
|
325
|
+
'-f',
|
326
|
+
path
|
327
|
+
]
|
328
|
+
}
|
329
|
+
}
|
330
|
+
hive_step['HadoopJarStep']['Args'] << '-d' << "INPUT=#{input_path}" unless input_path.empty?
|
331
|
+
hive_step['HadoopJarStep']['Args'] << '-d' << "OUTPUT=#{output_path}" unless output_path.empty?
|
332
|
+
hive_step['HadoopJarStep']['Args'] += args unless args.empty?
|
333
|
+
hive_step
|
334
|
+
end
|
335
|
+
|
336
|
+
def parse_pig_steps(step, hadoop_version)
|
337
|
+
#parse script_path(s3)*,input_path(s3),output_path(s3),'-p','args1','-p','args2','-p','arg3'
|
338
|
+
path, input_path, output_path, *args = step.split(',')
|
339
|
+
abort 'path to the hive script is required' if path.empty?
|
340
|
+
pig_step = {
|
341
|
+
'ActionOnFailure' => 'TERMINATE_JOB_FLOW',
|
342
|
+
'Name' => 'awscli-emr-pig-step',
|
343
|
+
'HadoopJarStep' => {
|
344
|
+
"Jar" => 's3://us-west-1.elasticmapreduce/libs/script-runner/script-runner.jar',
|
345
|
+
"Args" => %w(s3://us-west-1.elasticmapreduce/libs/pig/pig-script --base-path s3://us-west-1.elasticmapreduce/libs/pig/ --run-pig-script --pig-versions latest --args)
|
346
|
+
}
|
347
|
+
}
|
348
|
+
pig_step['HadoopJarStep']['Args'] << '-p' << "INPUT=#{input_path}" unless input_path.empty?
|
349
|
+
pig_step['HadoopJarStep']['Args'] << '-p' << "OUTPUT=#{output_path}" unless output_path.empty?
|
350
|
+
pig_step['HadoopJarStep']['Args'] += args unless args.empty?
|
351
|
+
pig_step['HadoopJarStep']['Args'] << path
|
352
|
+
pig_step
|
353
|
+
end
|
354
|
+
|
355
|
+
def parse_streaming_steps(step)
|
356
|
+
#parse input*:output*:mapper*:reducer*:extra_arg1:extra_arg2
|
357
|
+
input, output, mapper, reducer, *args = step.split(',')
|
358
|
+
#input, output, mapper, reducer, args, *job_conf = step.split(',')
|
359
|
+
if input.empty? or output.empty? or mapper.empty? or reducer.empty?
|
360
|
+
abort 'input, output, mapper and reducer are required'
|
361
|
+
end
|
362
|
+
streaming_step = {
|
363
|
+
'ActionOnFailure' => 'TERMINATE_JOB_FLOW',
|
364
|
+
'Name' => 'awscli-emr-streaming-step',
|
365
|
+
'HadoopJarStep' => {
|
366
|
+
"Jar" => '/home/hadoop/contrib/streaming/hadoop-streaming.jar',
|
367
|
+
"Args" => [
|
368
|
+
'-input', input,
|
369
|
+
'-output', output,
|
370
|
+
'-mapper', mapper,
|
371
|
+
'-reducer', reducer
|
372
|
+
]
|
373
|
+
}
|
374
|
+
}
|
375
|
+
streaming_step['HadoopJarStep']['Args'] + args unless args.empty?
|
376
|
+
#TODO: Add -jobconf params as k=v,k=v,k=v
|
377
|
+
#streaming_step['HadoopJarStep']['Args'] << '-job_conf' + job_conf if job_conf.empty?
|
378
|
+
streaming_step
|
379
|
+
end
|
380
|
+
|
381
|
+
def hive_install(hadoop_version)
|
382
|
+
{
|
383
|
+
'ActionOnFailure' => 'TERMINATE_JOB_FLOW',
|
384
|
+
'Name' => 'awscli-emr-hive-setup',
|
385
|
+
'HadoopJarStep' => {
|
386
|
+
'Args' => ['s3://us-east-1.elasticmapreduce/libs/hive/hive-script',
|
387
|
+
'--base-path',
|
388
|
+
's3://us-east-1.elasticmapreduce/libs/hive/',
|
389
|
+
'--install-hive',
|
390
|
+
'--hive-versions',
|
391
|
+
Awscli::EMR::HADOOP_HIVE_COMPATIBILITY[hadoop_version]
|
392
|
+
],
|
393
|
+
'Jar' => 's3://us-east-1.elasticmapreduce/libs/script-runner/script-runner.jar'
|
394
|
+
}
|
395
|
+
}
|
396
|
+
end
|
397
|
+
|
398
|
+
def pig_install
|
399
|
+
{
|
400
|
+
'ActionOnFailure' => 'TERMINATE_JOB_FLOW',
|
401
|
+
'Name' => 'awscli-emr-pig-setup',
|
402
|
+
'HadoopJarStep' => {
|
403
|
+
'Args' => %w(s3://us-east-1.elasticmapreduce/libs/pig/pig-script --base-path s3://us-east-1.elasticmapreduce/libs/pig/ --install-pig --pig-versions latest),
|
404
|
+
'Jar' => 's3://us-east-1.elasticmapreduce/libs/script-runner/script-runner.jar'
|
405
|
+
}
|
406
|
+
}
|
407
|
+
end
|
408
|
+
|
409
|
+
def hbase_install_boot_strap
|
410
|
+
{
|
411
|
+
'Name' => 'awscli-emr-install-hbase',
|
412
|
+
'ScriptBootstrapAction' => {
|
413
|
+
'Args' => [],
|
414
|
+
'Path' => 's3://us-west-1.elasticmapreduce/bootstrap-actions/setup-hbase'
|
415
|
+
}
|
416
|
+
}
|
417
|
+
end
|
418
|
+
|
419
|
+
def hbase_install_steps
|
420
|
+
{
|
421
|
+
'ActionOnFailure' => 'CANCEL_AND_WAIT',
|
422
|
+
'Name' => 'awscli-emr-start-hbase',
|
423
|
+
'HadoopJarStep' => {
|
424
|
+
'Jar' => '/home/hadoop/lib/hbase-0.92.0.jar',
|
425
|
+
'Args' => %w(emr.hbase.backup.Main --start-master)
|
426
|
+
}
|
427
|
+
}
|
428
|
+
end
|
429
|
+
|
430
|
+
def parse_hbase_backup(backup_step, consistent=false)
|
431
|
+
#parse frequency*,frequency_unit*(Days|Hrs|Mins),path(s3)*,start_time*(now|iso-format)
|
432
|
+
frequency, frequency_unit, path, start_time = backup_step.split(',')
|
433
|
+
abort 'Invalid backup step pattern, expecting frequency,frequency_unit(days|hrs|mins),path(s3),start_time(now|iso-format)' unless backup_step =~ /(.*),(.*),(.*),(.*)/
|
434
|
+
if frequency.empty? or frequency_unit.empty? or path.empty? or start_time.empty?
|
435
|
+
abort 'frequency, frequency_unit, path, start_time are required to perform a backup'
|
436
|
+
end
|
437
|
+
abort "Invalid frequency unit : #{frequency_unit}" unless %w(days hrs mins).include?(frequency_unit)
|
438
|
+
hbase_backup_step = {
|
439
|
+
'Name' => 'awscli-emr-schedule-hbase-backup',
|
440
|
+
'ActionOnFailure' => 'CANCEL_AND_WAIT',
|
441
|
+
'HadoopJarStep' => {
|
442
|
+
'Jar' => '/home/hadoop/lib/hbase-0.92.0.jar',
|
443
|
+
'Args' => ['emr.hbase.backup.Main', '--backup-dir', path, '--set-scheduled-backup', true, '--full-backup-time-interval',
|
444
|
+
frequency, '--incremental-backup-time-unit', frequency_unit, '--start-time', start_time]
|
445
|
+
}
|
446
|
+
}
|
447
|
+
hbase_backup_step['HadoopJarStep']['Args'] << '--consistent' if consistent
|
448
|
+
hbase_backup_step
|
449
|
+
end
|
450
|
+
|
451
|
+
def parse_hbase_restore(restore_step)
|
452
|
+
#parse path(s3)*,version
|
453
|
+
path, version = restore_step.split(',')
|
454
|
+
if path.empty?
|
455
|
+
abort 'path is required'
|
456
|
+
end
|
457
|
+
hbase_restore_step = {
|
458
|
+
'Name' => 'awscli-emr-restore-hbase-backup',
|
459
|
+
'ActionOnFailure' => 'CANCEL_AND_WAIT',
|
460
|
+
'HadoopJarStep' => {
|
461
|
+
'Jar' => '/home/hadoop/lib/hbase-0.92.0.jar',
|
462
|
+
'Args' => ['emr.hbase.backup.Main', '--restore', '--backup-dir', path]
|
463
|
+
}
|
464
|
+
}
|
465
|
+
if defined?(version).nil?
|
466
|
+
hbase_restore_step['HadoopJarStep']['Args'] << '--backup-version' << version unless version.empty?
|
467
|
+
end
|
468
|
+
hbase_restore_step
|
469
|
+
end
|
470
|
+
end
|
471
|
+
end
|
472
|
+
end
|