awscli 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/bin/awscli +7 -7
- data/lib/awscli.rb +3 -1
- data/lib/awscli/as.rb +56 -56
- data/lib/awscli/cli.rb +1 -1
- data/lib/awscli/cli/UsageExamples/emr +49 -0
- data/lib/awscli/cli/emr.rb +157 -0
- data/lib/awscli/cli/iam/user.rb +18 -7
- data/lib/awscli/cli/s3/files.rb +43 -37
- data/lib/awscli/connection.rb +34 -31
- data/lib/awscli/ec2.rb +234 -234
- data/lib/awscli/emr.rb +472 -0
- data/lib/awscli/errors.rb +1 -1
- data/lib/awscli/helper.rb +17 -0
- data/lib/awscli/iam.rb +281 -218
- data/lib/awscli/s3.rb +51 -39
- data/lib/awscli/version.rb +1 -1
- metadata +5 -2
data/lib/awscli/emr.rb
ADDED
@@ -0,0 +1,472 @@
|
|
1
|
+
module Awscli
|
2
|
+
module Emr
|
3
|
+
class EMR
|
4
|
+
def initialize(connection)
|
5
|
+
@conn = connection
|
6
|
+
end
|
7
|
+
|
8
|
+
def list options
|
9
|
+
validate_job_ids options[:job_flow_ids] if options[:job_flow_ids]
|
10
|
+
opts = Marshal.load(Marshal.dump(options))
|
11
|
+
opts.reject! { |k| k == 'table' } if options[:table]
|
12
|
+
if job_flow_ids = opts.delete(:job_flow_ids)
|
13
|
+
opts.merge!('JobFlowIds' => job_flow_ids)
|
14
|
+
end
|
15
|
+
if job_flow_status = opts.delete(:job_flow_status)
|
16
|
+
opts.merge!('JobFlowStates' => job_flow_status)
|
17
|
+
end
|
18
|
+
if options[:table]
|
19
|
+
puts 'For detailed information, dont pass --table option'
|
20
|
+
job_flows = @conn.describe_job_flows(opts).body['JobFlows']
|
21
|
+
table_data = Array.new
|
22
|
+
unless job_flows.empty?
|
23
|
+
job_flows.each do |job_flow|
|
24
|
+
table_data << {
|
25
|
+
:job_flow_id => job_flow['JobFlowId'],
|
26
|
+
:name => job_flow['Name'],
|
27
|
+
:instance_count => job_flow['Instances']['InstanceCount'],
|
28
|
+
:master_dns => job_flow['Instances']['MasterPublicDnsName'],
|
29
|
+
:ec2_key_name => job_flow['Instances']['Ec2KeyName'],
|
30
|
+
:state => job_flow['ExecutionStatusDetail']['State']
|
31
|
+
}
|
32
|
+
end
|
33
|
+
end
|
34
|
+
Formatador.display_table(table_data, [:job_flow_id, :name, :state, :instance_count, :master_dns, :ec2_key_name])
|
35
|
+
else
|
36
|
+
puts 'For less information, pass --table option'
|
37
|
+
puts @conn.describe_job_flows(opts).body['JobFlows'].to_yaml
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def create_job_flow(options)
|
42
|
+
# => BOOTSTRAP ACTIONS
|
43
|
+
boot_strap_actions = []
|
44
|
+
if options[:bootstrap_actions]
|
45
|
+
options[:bootstrap_actions].each do |step|
|
46
|
+
boot_strap_actions << parse_boot_strap_actions(step)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# => STEPS
|
51
|
+
steps = []
|
52
|
+
if options[:custom_jar_steps]
|
53
|
+
options[:custom_jar_steps].each do |step|
|
54
|
+
steps << parse_custom_jar(step)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
if options[:hive_interactive]
|
58
|
+
steps << hive_install(options[:hadoop_version])
|
59
|
+
end
|
60
|
+
if options[:pig_interactive]
|
61
|
+
steps << pig_install
|
62
|
+
end
|
63
|
+
if options[:hive_steps]
|
64
|
+
steps << hive_install(options[:hadoop_version]) unless options[:hive_interactive]
|
65
|
+
options[:hive_steps].each do |step|
|
66
|
+
steps << parse_hive_steps(step)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
if options[:pig_steps]
|
70
|
+
steps << pig_install unless options[:pig_interactive]
|
71
|
+
options[:pig_steps].each do |step|
|
72
|
+
steps << parse_pig_steps(step, options[:hadoop_version])
|
73
|
+
end
|
74
|
+
end
|
75
|
+
if options[:streaming_steps]
|
76
|
+
options[:streaming_steps].each do |step|
|
77
|
+
steps << parse_streaming_steps(step)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
if options[:hbase_install]
|
81
|
+
boot_strap_actions << hbase_install_boot_strap
|
82
|
+
steps << hbase_install_steps
|
83
|
+
#validate hadoop version and instance size
|
84
|
+
abort "Invalid hadoop version #{options[:hadoop_version]}, supported Hadoop Versions for HBase are: #{Awscli::EMR::HBASE_SUPPORTED_HADOOP_VERSIONS.join(',')}" unless Awscli::EMR::HBASE_SUPPORTED_HADOOP_VERSIONS.include?(options[:hadoop_version])
|
85
|
+
options[:instance_groups] && parse_instance_groups(options[:instance_groups]).each do |group|
|
86
|
+
unless is_valid_instance_type?(group['InstanceType'])
|
87
|
+
abort "Instance type #{group['InstanceType']} is not compatible with HBase, instance size should be equal or greater than m1.large"
|
88
|
+
end
|
89
|
+
end
|
90
|
+
if options[:master_instance_type]
|
91
|
+
unless is_valid_instance_type?(options[:master_instance_type])
|
92
|
+
abort "Instance type #{options[:master_instance_type]} is not compatible with HBase, instance size should be equal or greater than m1.large"
|
93
|
+
end
|
94
|
+
end
|
95
|
+
if options[:slave_instance_type]
|
96
|
+
unless is_valid_instance_type?(options[:slave_instance_type])
|
97
|
+
abort "Instance type #{options[:slave_instance_type]} is not compatible with HBase, instance size should be equal or greater than m1.large"
|
98
|
+
end
|
99
|
+
end
|
100
|
+
# => HBase backups
|
101
|
+
if options[:hbase_backup_schedule]
|
102
|
+
# Backup
|
103
|
+
if options[:hbase_consistent_backup]
|
104
|
+
steps << parse_hbase_backup(options[:hbase_backup_schedule], true)
|
105
|
+
else
|
106
|
+
steps << parse_hbase_backup(options[:hbase_backup_schedule])
|
107
|
+
end
|
108
|
+
elsif options[:hbase_backup_restore]
|
109
|
+
# Restore
|
110
|
+
steps << parse_hbase_restore(options[:hbase_backup_restore])
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
# => INSTANCES
|
115
|
+
instances = Hash.new
|
116
|
+
instances['HadoopVersion'] = options[:hadoop_version]
|
117
|
+
if options[:hive_interactive] or options[:pig_interactive] or options[:hbase_install] #then job flow should not be terminated
|
118
|
+
instances['KeepJobFlowAliveWhenNoSteps'] = true
|
119
|
+
else
|
120
|
+
instances['KeepJobFlowAliveWhenNoSteps'] = options[:alive]
|
121
|
+
end
|
122
|
+
instances['Ec2KeyName'] = options[:instance_ec2_key_name] if options[:instance_ec2_key_name]
|
123
|
+
instances['InstanceCount'] = options[:instance_count] if options[:instance_count]
|
124
|
+
instances['MasterInstanceType'] = options[:master_instance_type] if options[:master_instance_type]
|
125
|
+
instances['SlaveInstanceType'] = options[:slave_instance_type] if options[:slave_instance_type]
|
126
|
+
instances['TerminationProtected'] = options[:termination_protection] if options[:termination_protection]
|
127
|
+
# => Instance Groups
|
128
|
+
instances['InstanceGroups'] = parse_instance_groups(options[:instance_groups]) if options[:instance_groups]
|
129
|
+
|
130
|
+
# => Build final request
|
131
|
+
job_flow = Hash.new
|
132
|
+
job_flow['AmiVersion'] = Awscli::EMR::HADOOP_AMI_MAPPING[options[:hadoop_version]]
|
133
|
+
job_flow['LogUri'] = options[:log_uri] if options[:log_uri]
|
134
|
+
job_flow['BootstrapActions'] = boot_strap_actions if options[:bootstrap_actions] or options[:hbase_install]
|
135
|
+
job_flow['Instances'] = instances
|
136
|
+
job_flow['Steps'] = steps
|
137
|
+
if options[:alive] or options[:hive_interactive] or options[:pig_interactive] or options[:hbase_install]
|
138
|
+
@conn.run_job_flow("#{options[:name]} (requires manual termination)", job_flow)
|
139
|
+
else
|
140
|
+
@conn.run_job_flow(options[:name], job_flow)
|
141
|
+
end
|
142
|
+
puts "Create JobFlow '#{options[:name]}' Successfully!"
|
143
|
+
end
|
144
|
+
|
145
|
+
def add_instance_group(options)
|
146
|
+
opts = Marshal.load(Marshal.dump(options))
|
147
|
+
opts.reject! { |key| key == 'job_flow_id' }
|
148
|
+
opts.reject! { |key| key == 'region' }
|
149
|
+
abort 'invalid job id' unless @conn.describe_job_flows.body['JobFlows'].map { |job| job['JobFlowId'] }.include?(options[:job_flow_id])
|
150
|
+
abort 'invalid instance type' unless Awscli::Instances::INSTANCE_SIZES.include?(options[:instance_type])
|
151
|
+
if instance_count = opts.delete(:instance_count)
|
152
|
+
opts.merge!('InstanceCount' => instance_count)
|
153
|
+
end
|
154
|
+
if instance_type = opts.delete(:instance_type)
|
155
|
+
opts.merge!('InstanceType' => instance_type)
|
156
|
+
end
|
157
|
+
if instance_role = opts.delete(:instance_role)
|
158
|
+
opts.merge!('InstanceRole' => instance_role)
|
159
|
+
end
|
160
|
+
if name = opts.delete(:name)
|
161
|
+
opts.merge!('Name' => name)
|
162
|
+
end
|
163
|
+
if bid_price = opts.delete(:bid_price)
|
164
|
+
opts.merge!('BidPrice' => bid_price)
|
165
|
+
opts.merge!('MarketType' => 'SPOT')
|
166
|
+
else
|
167
|
+
opts.merge!('MarketType' => 'ON_DEMAND')
|
168
|
+
end
|
169
|
+
(instance_groups ||= []) << opts
|
170
|
+
@conn.add_instance_groups(options[:job_flow_id], 'InstanceGroups' => instance_groups)
|
171
|
+
puts "Added instance group to job flow(with id): #{options[:job_flow_id]}"
|
172
|
+
end
|
173
|
+
|
174
|
+
def add_steps(job_flow_id, job_steps)
|
175
|
+
validate_job_ids job_flow_id
|
176
|
+
@conn.add_job_flow_steps(job_flow_id, 'Steps' => parse_custom_jar(job_steps))
|
177
|
+
puts "Added step to job flow id: #{job_flow_id}"
|
178
|
+
end
|
179
|
+
|
180
|
+
def modify_instance_group(options)
|
181
|
+
abort "Invalid instance group id: #{options[:instance_group_id]}" unless validate_instance_group_id?(options[:instance_group_id])
|
182
|
+
@conn.modify_instance_groups(
|
183
|
+
'InstanceGroups' => [
|
184
|
+
'InstanceCount' => options[:instance_count],
|
185
|
+
'InstanceGroupId' => options[:instance_group_id]
|
186
|
+
]
|
187
|
+
)
|
188
|
+
rescue Excon::Errors::BadRequest
|
189
|
+
puts "[Error]: #{$!}"
|
190
|
+
else
|
191
|
+
puts "Modified instance group #{options[:instance_group_id]} size to #{options[:instance_count]}"
|
192
|
+
end
|
193
|
+
|
194
|
+
def set_termination_protection(job_flow_ids, terminate_protection)
|
195
|
+
validate_job_ids job_flow_ids
|
196
|
+
@conn.set_termination_protection(
|
197
|
+
terminate_protection,
|
198
|
+
{
|
199
|
+
'JobFlowIds' => job_flow_ids
|
200
|
+
}
|
201
|
+
)
|
202
|
+
terminate_protection ?
|
203
|
+
puts("Termination protection flag added to job_flows: #{job_flow_ids.join(',')}") :
|
204
|
+
puts("Termination protection flag removed from job_flows: #{job_flow_ids.join(',')}")
|
205
|
+
end
|
206
|
+
|
207
|
+
def add_instance_groups(job_flow_id, groups)
|
208
|
+
validate_job_ids job_flow_id
|
209
|
+
instance_groups = parse_instance_groups(groups)
|
210
|
+
@conn.add_instance_groups(job_flow_id, 'InstanceGroups' => instance_groups)
|
211
|
+
end
|
212
|
+
|
213
|
+
def delete(job_ids)
|
214
|
+
validate_job_ids job_ids
|
215
|
+
@conn.terminate_job_flows('JobFlowIds' => job_ids)
|
216
|
+
puts "Terminated Job Flows: #{job_ids.join(',')}"
|
217
|
+
end
|
218
|
+
|
219
|
+
private
|
220
|
+
|
221
|
+
def validate_job_ids(job_ids)
|
222
|
+
available_job_ids = @conn.describe_job_flows.body['JobFlows'].map { |job| job['JobFlowId'] }
|
223
|
+
abort 'invalid job id\'s' unless available_job_ids.each_cons(job_ids.size).include? job_ids
|
224
|
+
end
|
225
|
+
|
226
|
+
def validate_instance_group_id?(group_id)
|
227
|
+
@conn.describe_job_flows.body['JobFlows'].map { |j| j['Instances']['InstanceGroups'].map {|g| g['InstanceGroupId']} }.flatten.include?(group_id)
|
228
|
+
end
|
229
|
+
|
230
|
+
def is_valid_instance_type?(instance_type)
|
231
|
+
! Awscli::EMR::HBASE_INVALID_INSTANCES.member?(instance_type)
|
232
|
+
end
|
233
|
+
|
234
|
+
def parse_instance_groups(groups)
|
235
|
+
#parse instance_groups => instance_count,instance_role(MASTER | CORE | TASK),instance_type,name,bid_price
|
236
|
+
instance_groups = []
|
237
|
+
groups.each do |group|
|
238
|
+
instance_count, instance_role, instance_size, name, bid_price = ig.split(',')
|
239
|
+
if instance_count.empty? or instance_role.empty? or instance_size.empty?
|
240
|
+
abort 'instance_count, instance_role and instance_size are required'
|
241
|
+
end
|
242
|
+
abort "Invalid instance role: #{instance_role}" unless %w(MASTER CORE TASK).include?(instance_role.upcase)
|
243
|
+
abort "Invalid instance type: #{instance_size}" unless Awscli::Instances::INSTANCE_SIZES.include?(instance_size)
|
244
|
+
if bid_price
|
245
|
+
instance_groups << {
|
246
|
+
'BidPrice' => bid_price,
|
247
|
+
'InstanceCount' => instance_count.to_i,
|
248
|
+
'InstanceRole' => instance_role,
|
249
|
+
'InstanceType' => instance_size,
|
250
|
+
'MarketType' => 'SPOT',
|
251
|
+
'Name' => name || "awscli-emr-#{instance_role}-group",
|
252
|
+
}
|
253
|
+
else
|
254
|
+
instance_groups << {
|
255
|
+
'InstanceCount' => instance_count.to_i,
|
256
|
+
'InstanceRole' => instance_role,
|
257
|
+
'InstanceType' => instance_size,
|
258
|
+
'MarketType' => 'ON_DEMAND',
|
259
|
+
'Name' => name || "awscli-emr-#{instance_role}-group",
|
260
|
+
}
|
261
|
+
end
|
262
|
+
end
|
263
|
+
instance_groups
|
264
|
+
end
|
265
|
+
|
266
|
+
def parse_boot_strap_actions(step)
|
267
|
+
#parse => name,bootstrap_action_path,bootstrap_action_args
|
268
|
+
name, path, *args = step.split(',')
|
269
|
+
if name.empty? or path.empty?
|
270
|
+
abort 'name and path are required'
|
271
|
+
end
|
272
|
+
boot_strap_actions = {
|
273
|
+
'Name' => name,
|
274
|
+
'ScriptBootstrapAction' => {
|
275
|
+
'Args' => args || [],
|
276
|
+
'Path' => path
|
277
|
+
}
|
278
|
+
}
|
279
|
+
boot_strap_actions
|
280
|
+
end
|
281
|
+
|
282
|
+
def parse_custom_jar(steps)
|
283
|
+
#parse jar_path(s3)*,name_of_step*,main_class,action_on_failure(TERMINATE_JOB_FLOW | CANCEL_AND_WAIT | CONTINUE),arg1=agr2=arg3,properties(k=v,k=v)
|
284
|
+
abort "invalid step pattern, expecting 'jar_path(s3)*,name_of_step*,main_class,action_on_failure,arg1=agr2=arg3,prop_k1=prop_v1,prop_k2=prop_v2)'" unless step =~ /(.*),(.*),(.*),(.*),(.*),(.*),(.*)/
|
285
|
+
jar, name, main_class, action_on_failure, extra_args, *job_conf = step.split(',')
|
286
|
+
if jar.empty? or name.empty?
|
287
|
+
abort 'jar and name are required for a step'
|
288
|
+
end
|
289
|
+
step_to_run = {
|
290
|
+
'ActionOnFailure' => action_on_failure.empty? ? 'TERMINATE_JOB_FLOW' : action_on_failure,
|
291
|
+
'Name' => name,
|
292
|
+
'HadoopJarStep' => {
|
293
|
+
'Jar' => jar,
|
294
|
+
'Args' => extra_args.empty? ? [] : extra_args.split('='),
|
295
|
+
'Properties' => []
|
296
|
+
}
|
297
|
+
}
|
298
|
+
#steps['HadoopJarStep']['Args'] + extra_args.split('=') unless extra_args
|
299
|
+
step_to_run['HadoopJarStep']['MainClass'] = main_class unless main_class.empty?
|
300
|
+
unless job_conf.empty?
|
301
|
+
job_conf.each do |kv_pair|
|
302
|
+
properties = {}
|
303
|
+
properties['Key'], properties['Value'] = kv_pair.split('=')
|
304
|
+
step_to_run['HadoopJarStep']['Properties'] << properties
|
305
|
+
end
|
306
|
+
end
|
307
|
+
step_to_run
|
308
|
+
end
|
309
|
+
|
310
|
+
def parse_hive_steps(step)
|
311
|
+
#parse script_path(s3)*,input_path(s3),output_path(s3),'-d','args1','-d','args2','-d','arg3'
|
312
|
+
path, input_path, output_path, *args = step.split(',')
|
313
|
+
abort 'path to the hive script is required' if path.empty?
|
314
|
+
hive_step = {
|
315
|
+
'ActionOnFailure' => 'TERMINATE_JOB_FLOW',
|
316
|
+
'Name' => 'awscli-emr-hive-step',
|
317
|
+
'HadoopJarStep' => {
|
318
|
+
"Jar" => 's3://us-west-1.elasticmapreduce/libs/script-runner/script-runner.jar',
|
319
|
+
"Args" => [
|
320
|
+
's3://us-west-1.elasticmapreduce/libs/hive/hive-script',
|
321
|
+
'--base-path',
|
322
|
+
's3://us-west-1.elasticmapreduce/libs/hive/',
|
323
|
+
'--run-hive-script',
|
324
|
+
'--args',
|
325
|
+
'-f',
|
326
|
+
path
|
327
|
+
]
|
328
|
+
}
|
329
|
+
}
|
330
|
+
hive_step['HadoopJarStep']['Args'] << '-d' << "INPUT=#{input_path}" unless input_path.empty?
|
331
|
+
hive_step['HadoopJarStep']['Args'] << '-d' << "OUTPUT=#{output_path}" unless output_path.empty?
|
332
|
+
hive_step['HadoopJarStep']['Args'] += args unless args.empty?
|
333
|
+
hive_step
|
334
|
+
end
|
335
|
+
|
336
|
+
def parse_pig_steps(step, hadoop_version)
|
337
|
+
#parse script_path(s3)*,input_path(s3),output_path(s3),'-p','args1','-p','args2','-p','arg3'
|
338
|
+
path, input_path, output_path, *args = step.split(',')
|
339
|
+
abort 'path to the hive script is required' if path.empty?
|
340
|
+
pig_step = {
|
341
|
+
'ActionOnFailure' => 'TERMINATE_JOB_FLOW',
|
342
|
+
'Name' => 'awscli-emr-pig-step',
|
343
|
+
'HadoopJarStep' => {
|
344
|
+
"Jar" => 's3://us-west-1.elasticmapreduce/libs/script-runner/script-runner.jar',
|
345
|
+
"Args" => %w(s3://us-west-1.elasticmapreduce/libs/pig/pig-script --base-path s3://us-west-1.elasticmapreduce/libs/pig/ --run-pig-script --pig-versions latest --args)
|
346
|
+
}
|
347
|
+
}
|
348
|
+
pig_step['HadoopJarStep']['Args'] << '-p' << "INPUT=#{input_path}" unless input_path.empty?
|
349
|
+
pig_step['HadoopJarStep']['Args'] << '-p' << "OUTPUT=#{output_path}" unless output_path.empty?
|
350
|
+
pig_step['HadoopJarStep']['Args'] += args unless args.empty?
|
351
|
+
pig_step['HadoopJarStep']['Args'] << path
|
352
|
+
pig_step
|
353
|
+
end
|
354
|
+
|
355
|
+
def parse_streaming_steps(step)
|
356
|
+
#parse input*:output*:mapper*:reducer*:extra_arg1:extra_arg2
|
357
|
+
input, output, mapper, reducer, *args = step.split(',')
|
358
|
+
#input, output, mapper, reducer, args, *job_conf = step.split(',')
|
359
|
+
if input.empty? or output.empty? or mapper.empty? or reducer.empty?
|
360
|
+
abort 'input, output, mapper and reducer are required'
|
361
|
+
end
|
362
|
+
streaming_step = {
|
363
|
+
'ActionOnFailure' => 'TERMINATE_JOB_FLOW',
|
364
|
+
'Name' => 'awscli-emr-streaming-step',
|
365
|
+
'HadoopJarStep' => {
|
366
|
+
"Jar" => '/home/hadoop/contrib/streaming/hadoop-streaming.jar',
|
367
|
+
"Args" => [
|
368
|
+
'-input', input,
|
369
|
+
'-output', output,
|
370
|
+
'-mapper', mapper,
|
371
|
+
'-reducer', reducer
|
372
|
+
]
|
373
|
+
}
|
374
|
+
}
|
375
|
+
streaming_step['HadoopJarStep']['Args'] + args unless args.empty?
|
376
|
+
#TODO: Add -jobconf params as k=v,k=v,k=v
|
377
|
+
#streaming_step['HadoopJarStep']['Args'] << '-job_conf' + job_conf if job_conf.empty?
|
378
|
+
streaming_step
|
379
|
+
end
|
380
|
+
|
381
|
+
def hive_install(hadoop_version)
|
382
|
+
{
|
383
|
+
'ActionOnFailure' => 'TERMINATE_JOB_FLOW',
|
384
|
+
'Name' => 'awscli-emr-hive-setup',
|
385
|
+
'HadoopJarStep' => {
|
386
|
+
'Args' => ['s3://us-east-1.elasticmapreduce/libs/hive/hive-script',
|
387
|
+
'--base-path',
|
388
|
+
's3://us-east-1.elasticmapreduce/libs/hive/',
|
389
|
+
'--install-hive',
|
390
|
+
'--hive-versions',
|
391
|
+
Awscli::EMR::HADOOP_HIVE_COMPATIBILITY[hadoop_version]
|
392
|
+
],
|
393
|
+
'Jar' => 's3://us-east-1.elasticmapreduce/libs/script-runner/script-runner.jar'
|
394
|
+
}
|
395
|
+
}
|
396
|
+
end
|
397
|
+
|
398
|
+
def pig_install
|
399
|
+
{
|
400
|
+
'ActionOnFailure' => 'TERMINATE_JOB_FLOW',
|
401
|
+
'Name' => 'awscli-emr-pig-setup',
|
402
|
+
'HadoopJarStep' => {
|
403
|
+
'Args' => %w(s3://us-east-1.elasticmapreduce/libs/pig/pig-script --base-path s3://us-east-1.elasticmapreduce/libs/pig/ --install-pig --pig-versions latest),
|
404
|
+
'Jar' => 's3://us-east-1.elasticmapreduce/libs/script-runner/script-runner.jar'
|
405
|
+
}
|
406
|
+
}
|
407
|
+
end
|
408
|
+
|
409
|
+
def hbase_install_boot_strap
|
410
|
+
{
|
411
|
+
'Name' => 'awscli-emr-install-hbase',
|
412
|
+
'ScriptBootstrapAction' => {
|
413
|
+
'Args' => [],
|
414
|
+
'Path' => 's3://us-west-1.elasticmapreduce/bootstrap-actions/setup-hbase'
|
415
|
+
}
|
416
|
+
}
|
417
|
+
end
|
418
|
+
|
419
|
+
def hbase_install_steps
|
420
|
+
{
|
421
|
+
'ActionOnFailure' => 'CANCEL_AND_WAIT',
|
422
|
+
'Name' => 'awscli-emr-start-hbase',
|
423
|
+
'HadoopJarStep' => {
|
424
|
+
'Jar' => '/home/hadoop/lib/hbase-0.92.0.jar',
|
425
|
+
'Args' => %w(emr.hbase.backup.Main --start-master)
|
426
|
+
}
|
427
|
+
}
|
428
|
+
end
|
429
|
+
|
430
|
+
def parse_hbase_backup(backup_step, consistent=false)
|
431
|
+
#parse frequency*,frequency_unit*(Days|Hrs|Mins),path(s3)*,start_time*(now|iso-format)
|
432
|
+
frequency, frequency_unit, path, start_time = backup_step.split(',')
|
433
|
+
abort 'Invalid backup step pattern, expecting frequency,frequency_unit(days|hrs|mins),path(s3),start_time(now|iso-format)' unless backup_step =~ /(.*),(.*),(.*),(.*)/
|
434
|
+
if frequency.empty? or frequency_unit.empty? or path.empty? or start_time.empty?
|
435
|
+
abort 'frequency, frequency_unit, path, start_time are required to perform a backup'
|
436
|
+
end
|
437
|
+
abort "Invalid frequency unit : #{frequency_unit}" unless %w(days hrs mins).include?(frequency_unit)
|
438
|
+
hbase_backup_step = {
|
439
|
+
'Name' => 'awscli-emr-schedule-hbase-backup',
|
440
|
+
'ActionOnFailure' => 'CANCEL_AND_WAIT',
|
441
|
+
'HadoopJarStep' => {
|
442
|
+
'Jar' => '/home/hadoop/lib/hbase-0.92.0.jar',
|
443
|
+
'Args' => ['emr.hbase.backup.Main', '--backup-dir', path, '--set-scheduled-backup', true, '--full-backup-time-interval',
|
444
|
+
frequency, '--incremental-backup-time-unit', frequency_unit, '--start-time', start_time]
|
445
|
+
}
|
446
|
+
}
|
447
|
+
hbase_backup_step['HadoopJarStep']['Args'] << '--consistent' if consistent
|
448
|
+
hbase_backup_step
|
449
|
+
end
|
450
|
+
|
451
|
+
def parse_hbase_restore(restore_step)
|
452
|
+
#parse path(s3)*,version
|
453
|
+
path, version = restore_step.split(',')
|
454
|
+
if path.empty?
|
455
|
+
abort 'path is required'
|
456
|
+
end
|
457
|
+
hbase_restore_step = {
|
458
|
+
'Name' => 'awscli-emr-restore-hbase-backup',
|
459
|
+
'ActionOnFailure' => 'CANCEL_AND_WAIT',
|
460
|
+
'HadoopJarStep' => {
|
461
|
+
'Jar' => '/home/hadoop/lib/hbase-0.92.0.jar',
|
462
|
+
'Args' => ['emr.hbase.backup.Main', '--restore', '--backup-dir', path]
|
463
|
+
}
|
464
|
+
}
|
465
|
+
if defined?(version).nil?
|
466
|
+
hbase_restore_step['HadoopJarStep']['Args'] << '--backup-version' << version unless version.empty?
|
467
|
+
end
|
468
|
+
hbase_restore_step
|
469
|
+
end
|
470
|
+
end
|
471
|
+
end
|
472
|
+
end
|