mobilize-hive 1.29 → 1.31
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +11 -0
- data/lib/mobilize-hive/handlers/hive.rb +65 -75
- data/lib/mobilize-hive/helpers/hive_helper.rb +63 -0
- data/lib/mobilize-hive/version.rb +1 -1
- data/mobilize-hive.gemspec +1 -1
- data/test/hive_job_rows.yml +9 -1
- data/test/mobilize-hive_test.rb +17 -1
- metadata +20 -9
- checksums.yaml +0 -7
data/README.md
CHANGED
@@ -142,6 +142,17 @@ Start
|
|
142
142
|
* cluster and user are optional for all of the below.
|
143
143
|
* cluster defaults to the first cluster listed;
|
144
144
|
* user is treated the same way as in [mobilize-ssh][mobilize-ssh].
|
145
|
+
* params are also optional for all of the below. They replace HQL in sources.
|
146
|
+
* params are passed as a YML or JSON, as in:
|
147
|
+
* `hive.run source:<source_path>, params:{'date': '2013-03-01', 'unit': 'widgets'}`
|
148
|
+
* this example replaces all the keys, preceded by '@' in all source hqls with the value.
|
149
|
+
* The preceding '@' is used to keep from replacing instances
|
150
|
+
of "date" and "unit" in the HQL; you should have `@date` and `@unit` in your actual HQL
|
151
|
+
if you'd like to replace those tokens.
|
152
|
+
* in addition, the following params are substituted automatically:
|
153
|
+
* `$utc_date` - replaced with YYYY-MM-DD date, UTC
|
154
|
+
* `$utc_time` - replaced with HH:MM time, UTC
|
155
|
+
* any occurrence of these values in HQL will be replaced at runtime.
|
145
156
|
* hive.run `hql:<hql> || source:<gsheet_path>, user:<user>, cluster:<cluster>`, which executes the
|
146
157
|
script in the hql or source sheet and returns any output specified at the
|
147
158
|
end. If the cmd or last query in source is a select statement, column headers will be
|
@@ -1,56 +1,7 @@
|
|
1
1
|
module Mobilize
|
2
2
|
module Hive
|
3
|
-
|
4
|
-
|
5
|
-
end
|
6
|
-
|
7
|
-
def Hive.exec_path(cluster)
|
8
|
-
Hive.clusters[cluster]['exec_path']
|
9
|
-
end
|
10
|
-
|
11
|
-
def Hive.output_db(cluster)
|
12
|
-
Hive.clusters[cluster]['output_db']
|
13
|
-
end
|
14
|
-
|
15
|
-
def Hive.output_db_user(cluster)
|
16
|
-
output_db_node = Hadoop.gateway_node(cluster)
|
17
|
-
output_db_user = Ssh.host(output_db_node)['user']
|
18
|
-
output_db_user
|
19
|
-
end
|
20
|
-
|
21
|
-
def Hive.clusters
|
22
|
-
Hive.config['clusters']
|
23
|
-
end
|
24
|
-
|
25
|
-
def Hive.slot_ids(cluster)
|
26
|
-
(1..Hive.clusters[cluster]['max_slots']).to_a.map{|s| "#{cluster}_#{s.to_s}"}
|
27
|
-
end
|
28
|
-
|
29
|
-
def Hive.slot_worker_by_cluster_and_path(cluster,path)
|
30
|
-
working_slots = Mobilize::Resque.jobs.map{|j| begin j['args'][1]['hive_slot'];rescue;nil;end}.compact.uniq
|
31
|
-
Hive.slot_ids(cluster).each do |slot_id|
|
32
|
-
unless working_slots.include?(slot_id)
|
33
|
-
Mobilize::Resque.set_worker_args_by_path(path,{'hive_slot'=>slot_id})
|
34
|
-
return slot_id
|
35
|
-
end
|
36
|
-
end
|
37
|
-
#return false if none are available
|
38
|
-
return false
|
39
|
-
end
|
40
|
-
|
41
|
-
def Hive.unslot_worker_by_path(path)
|
42
|
-
begin
|
43
|
-
Mobilize::Resque.set_worker_args_by_path(path,{'hive_slot'=>nil})
|
44
|
-
return true
|
45
|
-
rescue
|
46
|
-
return false
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
def Hive.databases(cluster,user_name)
|
51
|
-
Hive.run(cluster,"show databases",user_name)['stdout'].split("\n")
|
52
|
-
end
|
53
|
-
|
3
|
+
#adds convenience methods
|
4
|
+
require "#{File.dirname(__FILE__)}/../helpers/hive_helper"
|
54
5
|
# converts a source path or target path to a dst in the context of handler and stage
|
55
6
|
def Hive.path_to_dst(path,stage_path,gdrive_slot)
|
56
7
|
has_handler = true if path.index("://")
|
@@ -142,12 +93,25 @@ module Mobilize
|
|
142
93
|
end
|
143
94
|
|
144
95
|
#run a generic hive command, with the option of passing a file hash to be locally available
|
145
|
-
def Hive.run(cluster,hql,user_name,file_hash=nil)
|
96
|
+
def Hive.run(cluster,hql,user_name,params=nil,file_hash=nil)
|
146
97
|
# no TempStatsStore
|
147
98
|
hql = "set hive.stats.autogather=false;#{hql}"
|
148
99
|
filename = hql.to_md5
|
149
100
|
file_hash||= {}
|
150
101
|
file_hash[filename] = hql
|
102
|
+
#add in default params
|
103
|
+
params ||= {}
|
104
|
+
params = params.merge(Hive.default_params)
|
105
|
+
#replace any params in the file_hash and command
|
106
|
+
params.each do |k,v|
|
107
|
+
file_hash.each do |name,data|
|
108
|
+
if k.starts_with?("$")
|
109
|
+
data.gsub!(k,v)
|
110
|
+
else
|
111
|
+
data.gsub!("@#{k}",v)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
151
115
|
#silent mode so we don't have logs in stderr; clip output
|
152
116
|
#at hadoop read limit
|
153
117
|
command = "#{Hive.exec_path(cluster)} -S -f #{filename} | head -c #{Hadoop.read_limit}"
|
@@ -192,7 +156,8 @@ module Mobilize
|
|
192
156
|
|
193
157
|
#check for select at end
|
194
158
|
hql_array = hql.split(";").map{|hc| hc.strip}.reject{|hc| hc.length==0}
|
195
|
-
|
159
|
+
last_statement = hql_array.last.downcase.split("\n").reject{|l| l.starts_with?("-- ")}.first
|
160
|
+
if last_statement.to_s.starts_with?("select")
|
196
161
|
#nil if no prior commands
|
197
162
|
prior_hql = hql_array[0..-2].join(";") if hql_array.length > 1
|
198
163
|
select_hql = hql_array.last
|
@@ -200,10 +165,10 @@ module Mobilize
|
|
200
165
|
"drop table if exists #{output_path}",
|
201
166
|
"create table #{output_path} as #{select_hql};"].join(";")
|
202
167
|
full_hql = [prior_hql, output_table_hql].compact.join(";")
|
203
|
-
result = Hive.run(cluster,full_hql, user_name)
|
168
|
+
result = Hive.run(cluster,full_hql, user_name,params['params'])
|
204
169
|
Dataset.find_or_create_by_url(out_url)
|
205
170
|
else
|
206
|
-
result = Hive.run(cluster, hql, user_name)
|
171
|
+
result = Hive.run(cluster, hql, user_name,params['params'])
|
207
172
|
Dataset.find_or_create_by_url(out_url)
|
208
173
|
Dataset.write_by_url(out_url,result['stdout'],user_name) if result['stdout'].to_s.length>0
|
209
174
|
end
|
@@ -244,9 +209,10 @@ module Mobilize
|
|
244
209
|
schema_hash
|
245
210
|
end
|
246
211
|
|
247
|
-
def Hive.hql_to_table(cluster, db, table, part_array, source_hql, user_name, job_name, drop=false, schema_hash=nil)
|
212
|
+
def Hive.hql_to_table(cluster, db, table, part_array, source_hql, user_name, job_name, drop=false, schema_hash=nil, params=nil)
|
248
213
|
table_path = [db,table].join(".")
|
249
214
|
table_stats = Hive.table_stats(cluster, db, table, user_name)
|
215
|
+
url = "hive://" + [cluster,db,table,part_array.compact.join("/")].join("/")
|
250
216
|
|
251
217
|
source_hql_array = source_hql.split(";")
|
252
218
|
last_select_i = source_hql_array.rindex{|hql| hql.downcase.strip.starts_with?("select")}
|
@@ -262,7 +228,8 @@ module Mobilize
|
|
262
228
|
temp_set_hql = "set mapred.job.name=#{job_name} (temp table);"
|
263
229
|
temp_drop_hql = "drop table if exists #{temp_table_path};"
|
264
230
|
temp_create_hql = "#{temp_set_hql}#{prior_hql}#{temp_drop_hql}create table #{temp_table_path} as #{last_select_hql}"
|
265
|
-
Hive.run(cluster,temp_create_hql,user_name)
|
231
|
+
response = Hive.run(cluster,temp_create_hql,user_name,params)
|
232
|
+
raise response['stderr'] if response['stderr'].to_s.ie{|s| s.index("FAILED") or s.index("KILLED")}
|
266
233
|
|
267
234
|
source_table_stats = Hive.table_stats(cluster,temp_db,temp_table_name,user_name)
|
268
235
|
source_fields = source_table_stats['field_defs']
|
@@ -300,10 +267,12 @@ module Mobilize
|
|
300
267
|
target_insert_hql,
|
301
268
|
temp_drop_hql].join
|
302
269
|
|
303
|
-
Hive.run(cluster, target_full_hql, user_name)
|
270
|
+
response = Hive.run(cluster, target_full_hql, user_name, params)
|
271
|
+
|
272
|
+
raise response['stderr'] if response['stderr'].to_s.ie{|s| s.index("FAILED") or s.index("KILLED")}
|
304
273
|
|
305
274
|
elsif part_array.length > 0 and
|
306
|
-
table_stats.ie{|tts| tts.nil? || drop || tts['partitions'].to_a.map{|p| p['name']} == part_array}
|
275
|
+
table_stats.ie{|tts| tts.nil? || drop || tts['partitions'].to_a.map{|p| p['name']}.sort == part_array.sort}
|
307
276
|
#partitions and no target table or same partitions in both target table and user params
|
308
277
|
|
309
278
|
target_headers = source_fields.map{|f| f['name']}.reject{|h| part_array.include?(h)}
|
@@ -350,9 +319,17 @@ module Mobilize
|
|
350
319
|
part_set_hql = "set hive.cli.print.header=true;set mapred.job.name=#{job_name} (permutations);"
|
351
320
|
part_select_hql = "select distinct #{target_part_stmt} from #{temp_table_path};"
|
352
321
|
part_perm_hql = part_set_hql + part_select_hql
|
353
|
-
|
322
|
+
response = Hive.run(cluster, part_perm_hql, user_name, params)
|
323
|
+
raise response['stderr'] if response['stderr'].to_s.ie{|s| s.index("FAILED") or s.index("KILLED")}
|
324
|
+
part_perm_tsv = response['stdout']
|
354
325
|
#having gotten the permutations, ensure they are dropped
|
355
326
|
part_hash_array = part_perm_tsv.tsv_to_hash_array
|
327
|
+
#make sure there is data
|
328
|
+
if part_hash_array.first.nil? or part_hash_array.first.values.include?(nil)
|
329
|
+
#blank result set, return url
|
330
|
+
return url
|
331
|
+
end
|
332
|
+
|
356
333
|
part_drop_hql = part_hash_array.map do |h|
|
357
334
|
part_drop_stmt = h.map do |name,value|
|
358
335
|
part_defs[name[1..-2]]=="string" ? "#{name}='#{value}'" : "#{name}=#{value}"
|
@@ -368,12 +345,12 @@ module Mobilize
|
|
368
345
|
|
369
346
|
target_full_hql = [target_set_hql, target_create_hql, target_insert_hql, temp_drop_hql].join
|
370
347
|
|
371
|
-
Hive.run(cluster, target_full_hql, user_name)
|
348
|
+
response = Hive.run(cluster, target_full_hql, user_name, params)
|
349
|
+
raise response['stderr'] if response['stderr'].to_s.ie{|s| s.index("FAILED") or s.index("KILLED")}
|
372
350
|
else
|
373
351
|
error_msg = "Incompatible partition specs"
|
374
352
|
raise error_msg
|
375
353
|
end
|
376
|
-
url = "hive://" + [cluster,db,table,part_array.compact.join("/")].join("/")
|
377
354
|
return url
|
378
355
|
end
|
379
356
|
|
@@ -381,6 +358,12 @@ module Mobilize
|
|
381
358
|
#Accepts options to drop existing target if any
|
382
359
|
#also schema with column datatype overrides
|
383
360
|
def Hive.tsv_to_table(cluster, db, table, part_array, source_tsv, user_name, drop=false, schema_hash=nil)
|
361
|
+
return nil if source_tsv.strip.length==0
|
362
|
+
if source_tsv.index("\r\n")
|
363
|
+
source_tsv = source_tsv.gsub("\r\n","\n")
|
364
|
+
elsif source_tsv.index("\r")
|
365
|
+
source_tsv = source_tsv.gsub("\r","\n")
|
366
|
+
end
|
384
367
|
source_headers = source_tsv.tsv_header_array
|
385
368
|
|
386
369
|
table_path = [db,table].join(".")
|
@@ -388,6 +371,8 @@ module Mobilize
|
|
388
371
|
|
389
372
|
schema_hash ||= {}
|
390
373
|
|
374
|
+
url = "hive://" + [cluster,db,table,part_array.compact.join("/")].join("/")
|
375
|
+
|
391
376
|
if part_array.length == 0 and
|
392
377
|
table_stats.ie{|tts| tts.nil? || drop || tts['partitions'].nil?}
|
393
378
|
#no partitions in either user params or the target table
|
@@ -414,10 +399,11 @@ module Mobilize
|
|
414
399
|
|
415
400
|
target_full_hql = [target_drop_hql,target_create_hql,target_insert_hql].join(";")
|
416
401
|
|
417
|
-
Hive.run(cluster, target_full_hql, user_name, file_hash)
|
402
|
+
response = Hive.run(cluster, target_full_hql, user_name, nil, file_hash)
|
403
|
+
raise response['stderr'] if response['stderr'].to_s.ie{|s| s.index("FAILED") or s.index("KILLED")}
|
418
404
|
|
419
405
|
elsif part_array.length > 0 and
|
420
|
-
table_stats.ie{|tts| tts.nil? || drop || tts['partitions'].to_a.map{|p| p['name']} == part_array}
|
406
|
+
table_stats.ie{|tts| tts.nil? || drop || tts['partitions'].to_a.map{|p| p['name']}.sort == part_array.sort}
|
421
407
|
#partitions and no target table
|
422
408
|
#or same partitions in both target table and user params
|
423
409
|
#or drop and start fresh
|
@@ -441,13 +427,17 @@ module Mobilize
|
|
441
427
|
"partitioned by #{partition_defs}"
|
442
428
|
|
443
429
|
#create target table early if not here
|
444
|
-
Hive.run(cluster, target_create_hql, user_name)
|
430
|
+
response = Hive.run(cluster, target_create_hql, user_name)
|
431
|
+
raise response['stderr'] if response['stderr'].to_s.ie{|s| s.index("FAILED") or s.index("KILLED")}
|
432
|
+
|
433
|
+
#return url (operation complete) if there's no data
|
434
|
+
source_hash_array = source_tsv.tsv_to_hash_array
|
435
|
+
return url if source_hash_array.length==1 and source_hash_array.first.values.compact.length==0
|
445
436
|
|
446
437
|
table_stats = Hive.table_stats(cluster, db, table, user_name)
|
447
438
|
|
448
439
|
#create data hash from source hash array
|
449
440
|
data_hash = {}
|
450
|
-
source_hash_array = source_tsv.tsv_to_hash_array
|
451
441
|
source_hash_array.each do |ha|
|
452
442
|
tpmk = part_array.map{|pn| "#{pn}=#{ha[pn]}"}.join("/")
|
453
443
|
tpmv = ha.reject{|k,v| part_array.include?(k)}.values.join("\001")
|
@@ -480,7 +470,8 @@ module Mobilize
|
|
480
470
|
#run actual partition adds all at once
|
481
471
|
if target_part_hql.length>0
|
482
472
|
puts "Adding partitions to #{cluster}/#{db}/#{table} for #{user_name} at #{Time.now.utc}"
|
483
|
-
Hive.run(cluster, target_part_hql, user_name)
|
473
|
+
response = Hive.run(cluster, target_part_hql, user_name)
|
474
|
+
raise response['stderr'] if response['stderr'].to_s.ie{|s| s.index("FAILED") or s.index("KILLED")}
|
484
475
|
end
|
485
476
|
else
|
486
477
|
error_msg = "Incompatible partition specs: " +
|
@@ -488,7 +479,7 @@ module Mobilize
|
|
488
479
|
"user_params:#{part_array.to_s}"
|
489
480
|
raise error_msg
|
490
481
|
end
|
491
|
-
|
482
|
+
|
492
483
|
return url
|
493
484
|
end
|
494
485
|
|
@@ -525,11 +516,11 @@ module Mobilize
|
|
525
516
|
#source table
|
526
517
|
cluster,source_path = source.path.split("/").ie{|sp| [sp.first, sp[1..-1].join(".")]}
|
527
518
|
source_hql = "select * from #{source_path};"
|
528
|
-
elsif ['gsheet','gridfs','hdfs'].include?(source.handler)
|
519
|
+
elsif ['gsheet','gfile','gridfs','hdfs'].include?(source.handler)
|
529
520
|
if source.path.ie{|sdp| sdp.index(/\.[A-Za-z]ql$/) or sdp.ends_with?(".ql")}
|
530
521
|
source_hql = source.read(user_name,gdrive_slot)
|
531
522
|
else
|
532
|
-
#tsv from sheet
|
523
|
+
#tsv from sheet or file
|
533
524
|
source_tsv = source.read(user_name,gdrive_slot)
|
534
525
|
end
|
535
526
|
end
|
@@ -554,6 +545,8 @@ module Mobilize
|
|
554
545
|
Hive.hql_to_table(cluster, db, table, part_array, source_hql, user_name, job_name, drop, schema_hash)
|
555
546
|
elsif source_tsv
|
556
547
|
Hive.tsv_to_table(cluster, db, table, part_array, source_tsv, user_name, drop, schema_hash)
|
548
|
+
elsif source
|
549
|
+
#null sheet
|
557
550
|
else
|
558
551
|
raise "Unable to determine source tsv or source hql"
|
559
552
|
end
|
@@ -580,11 +573,8 @@ module Mobilize
|
|
580
573
|
select_hql = "select * from #{source_path};"
|
581
574
|
hql = [set_hql,select_hql].join
|
582
575
|
response = Hive.run(cluster, hql,user_name)
|
583
|
-
if response['
|
584
|
-
|
585
|
-
else
|
586
|
-
raise "Unable to read hive://#{dst_path} with error: #{response['stderr']}"
|
587
|
-
end
|
576
|
+
raise "Unable to read hive://#{dst_path} with error: #{response['stderr']}" if response['stderr'].to_s.ie{|s| s.index("FAILED") or s.index("KILLED")}
|
577
|
+
return response['stdout']
|
588
578
|
end
|
589
579
|
|
590
580
|
def Hive.write_by_dataset_path(dst_path,source_tsv,user_name,*args)
|
@@ -0,0 +1,63 @@
|
|
1
|
+
module Mobilize
|
2
|
+
module Hive
|
3
|
+
def self.config
|
4
|
+
Base.config('hive')
|
5
|
+
end
|
6
|
+
|
7
|
+
def self.exec_path(cluster)
|
8
|
+
self.clusters[cluster]['exec_path']
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.output_db(cluster)
|
12
|
+
self.clusters[cluster]['output_db']
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.output_db_user(cluster)
|
16
|
+
output_db_node = Hadoop.gateway_node(cluster)
|
17
|
+
output_db_user = Ssh.host(output_db_node)['user']
|
18
|
+
output_db_user
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.clusters
|
22
|
+
self.config['clusters']
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.slot_ids(cluster)
|
26
|
+
(1..self.clusters[cluster]['max_slots']).to_a.map{|s| "#{cluster}_#{s.to_s}"}
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.slot_worker_by_cluster_and_path(cluster,path)
|
30
|
+
working_slots = Mobilize::Resque.jobs.map{|j| begin j['args'][1]['hive_slot'];rescue;nil;end}.compact.uniq
|
31
|
+
self.slot_ids(cluster).each do |slot_id|
|
32
|
+
unless working_slots.include?(slot_id)
|
33
|
+
Mobilize::Resque.set_worker_args_by_path(path,{'hive_slot'=>slot_id})
|
34
|
+
return slot_id
|
35
|
+
end
|
36
|
+
end
|
37
|
+
#return false if none are available
|
38
|
+
return false
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.unslot_worker_by_path(path)
|
42
|
+
begin
|
43
|
+
Mobilize::Resque.set_worker_args_by_path(path,{'hive_slot'=>nil})
|
44
|
+
return true
|
45
|
+
rescue
|
46
|
+
return false
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.databases(cluster,user_name)
|
51
|
+
self.run(cluster,"show databases",user_name)['stdout'].split("\n")
|
52
|
+
end
|
53
|
+
|
54
|
+
def self.default_params
|
55
|
+
time = Time.now.utc
|
56
|
+
{
|
57
|
+
'$utc_date'=>time.strftime("%Y-%m-%d"),
|
58
|
+
'$utc_time'=>time.strftime("%H:%M"),
|
59
|
+
}
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
data/mobilize-hive.gemspec
CHANGED
@@ -16,5 +16,5 @@ Gem::Specification.new do |gem|
|
|
16
16
|
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
17
17
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
18
18
|
gem.require_paths = ["lib"]
|
19
|
-
gem.add_runtime_dependency "mobilize-hdfs","1.
|
19
|
+
gem.add_runtime_dependency "mobilize-hdfs","1.31"
|
20
20
|
end
|
data/test/hive_job_rows.yml
CHANGED
@@ -20,7 +20,15 @@
|
|
20
20
|
active: true
|
21
21
|
trigger: after hive_test_2
|
22
22
|
status: ""
|
23
|
-
stage1: hive.run hql:"select
|
23
|
+
stage1: hive.run hql:"select '@date' as `date`,product,category,value from mobilize.hive_test_1;", params:{'date':'2013-01-01'}
|
24
24
|
stage2: hive.write source:"stage1",target:"mobilize/hive_test_3", partitions:"date/product", drop:true
|
25
25
|
stage3: hive.write hql:"select * from mobilize.hive_test_3;",target:"mobilize/hive_test_3", partitions:"date/product", drop:false
|
26
26
|
stage4: gsheet.write source:"hive://mobilize/hive_test_3", target:"hive_test_3.out"
|
27
|
+
- name: hive_test_4
|
28
|
+
active: true
|
29
|
+
trigger: after hive_test_3
|
30
|
+
status: ""
|
31
|
+
stage1: hive.write source:"hive_test_4_stage_1.in", target:"mobilize/hive_test_1", partitions:"act_date"
|
32
|
+
stage2: hive.write source:"hive_test_4_stage_2.in", target:"mobilize/hive_test_1", partitions:"act_date"
|
33
|
+
stage3: hive.run hql:"select '$utc_date $utc_time' as `date_time`,product,category,value from mobilize.hive_test_1;"
|
34
|
+
stage4: gsheet.write source:stage3, target:"hive_test_4.out"
|
data/test/mobilize-hive_test.rb
CHANGED
@@ -25,6 +25,18 @@ describe "Mobilize" do
|
|
25
25
|
hive_1_in_tsv = YAML.load_file("#{Mobilize::Base.root}/test/hive_test_1_in.yml").hash_array_to_tsv
|
26
26
|
hive_1_in_sheet.write(hive_1_in_tsv,Mobilize::Gdrive.owner_name)
|
27
27
|
|
28
|
+
#create blank sheet
|
29
|
+
hive_4_stage_1_in_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_4_stage_1.in",gdrive_slot)
|
30
|
+
[hive_4_stage_1_in_sheet].each {|s| s.delete if s}
|
31
|
+
hive_4_stage_1_in_sheet = Mobilize::Gsheet.find_or_create_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_4_stage_1.in",gdrive_slot)
|
32
|
+
|
33
|
+
#create sheet w just headers
|
34
|
+
hive_4_stage_2_in_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_4_stage_2.in",gdrive_slot)
|
35
|
+
[hive_4_stage_2_in_sheet].each {|s| s.delete if s}
|
36
|
+
hive_4_stage_2_in_sheet = Mobilize::Gsheet.find_or_create_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_4_stage_2.in",gdrive_slot)
|
37
|
+
hive_4_stage_2_in_sheet_header = hive_1_in_tsv.tsv_header_array.join("\t")
|
38
|
+
hive_4_stage_2_in_sheet.write(hive_4_stage_2_in_sheet_header,Mobilize::Gdrive.owner_name)
|
39
|
+
|
28
40
|
hive_1_schema_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_1.schema",gdrive_slot)
|
29
41
|
[hive_1_schema_sheet].each {|s| s.delete if s}
|
30
42
|
hive_1_schema_sheet = Mobilize::Gsheet.find_or_create_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_1.schema",gdrive_slot)
|
@@ -51,21 +63,25 @@ describe "Mobilize" do
|
|
51
63
|
[hive_2_target_sheet].each{|s| s.delete if s}
|
52
64
|
hive_3_target_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_3.out",gdrive_slot)
|
53
65
|
[hive_3_target_sheet].each{|s| s.delete if s}
|
66
|
+
hive_4_target_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_4.out",gdrive_slot)
|
67
|
+
[hive_4_target_sheet].each{|s| s.delete if s}
|
54
68
|
|
55
69
|
puts "job row added, force enqueued requestor, wait for stages"
|
56
70
|
r.enqueue!
|
57
|
-
wait_for_stages(
|
71
|
+
wait_for_stages(2100)
|
58
72
|
|
59
73
|
puts "jobtracker posted data to test sheet"
|
60
74
|
hive_1_stage_2_target_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_1_stage_2.out",gdrive_slot)
|
61
75
|
hive_1_stage_3_target_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_1_stage_3.out",gdrive_slot)
|
62
76
|
hive_2_target_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_2.out",gdrive_slot)
|
63
77
|
hive_3_target_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_3.out",gdrive_slot)
|
78
|
+
hive_4_target_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_4.out",gdrive_slot)
|
64
79
|
|
65
80
|
assert hive_1_stage_2_target_sheet.read(u.name).length == 219
|
66
81
|
assert hive_1_stage_3_target_sheet.read(u.name).length > 3
|
67
82
|
assert hive_2_target_sheet.read(u.name).length == 599
|
68
83
|
assert hive_3_target_sheet.read(u.name).length == 347
|
84
|
+
assert hive_4_target_sheet.read(u.name).length == 432
|
69
85
|
end
|
70
86
|
|
71
87
|
def wait_for_stages(time_limit=600,stage_limit=120,wait_length=10)
|
metadata
CHANGED
@@ -1,29 +1,32 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mobilize-hive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '1.
|
4
|
+
version: '1.31'
|
5
|
+
prerelease:
|
5
6
|
platform: ruby
|
6
7
|
authors:
|
7
8
|
- Cassio Paes-Leme
|
8
9
|
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
|
-
date: 2013-
|
12
|
+
date: 2013-04-18 00:00:00.000000000 Z
|
12
13
|
dependencies:
|
13
14
|
- !ruby/object:Gem::Dependency
|
14
15
|
name: mobilize-hdfs
|
15
16
|
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
16
18
|
requirements:
|
17
19
|
- - '='
|
18
20
|
- !ruby/object:Gem::Version
|
19
|
-
version: '1.
|
21
|
+
version: '1.31'
|
20
22
|
type: :runtime
|
21
23
|
prerelease: false
|
22
24
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
23
26
|
requirements:
|
24
27
|
- - '='
|
25
28
|
- !ruby/object:Gem::Version
|
26
|
-
version: '1.
|
29
|
+
version: '1.31'
|
27
30
|
description: Adds hive read, write, and run support to mobilize-hdfs
|
28
31
|
email:
|
29
32
|
- cpaesleme@dena.com
|
@@ -38,6 +41,7 @@ files:
|
|
38
41
|
- Rakefile
|
39
42
|
- lib/mobilize-hive.rb
|
40
43
|
- lib/mobilize-hive/handlers/hive.rb
|
44
|
+
- lib/mobilize-hive/helpers/hive_helper.rb
|
41
45
|
- lib/mobilize-hive/tasks.rb
|
42
46
|
- lib/mobilize-hive/version.rb
|
43
47
|
- lib/samples/hive.yml
|
@@ -51,26 +55,33 @@ files:
|
|
51
55
|
- test/test_helper.rb
|
52
56
|
homepage: http://github.com/dena/mobilize-hive
|
53
57
|
licenses: []
|
54
|
-
metadata: {}
|
55
58
|
post_install_message:
|
56
59
|
rdoc_options: []
|
57
60
|
require_paths:
|
58
61
|
- lib
|
59
62
|
required_ruby_version: !ruby/object:Gem::Requirement
|
63
|
+
none: false
|
60
64
|
requirements:
|
61
|
-
- - '>='
|
65
|
+
- - ! '>='
|
62
66
|
- !ruby/object:Gem::Version
|
63
67
|
version: '0'
|
68
|
+
segments:
|
69
|
+
- 0
|
70
|
+
hash: -4285752485316531029
|
64
71
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
72
|
+
none: false
|
65
73
|
requirements:
|
66
|
-
- - '>='
|
74
|
+
- - ! '>='
|
67
75
|
- !ruby/object:Gem::Version
|
68
76
|
version: '0'
|
77
|
+
segments:
|
78
|
+
- 0
|
79
|
+
hash: -4285752485316531029
|
69
80
|
requirements: []
|
70
81
|
rubyforge_project:
|
71
|
-
rubygems_version:
|
82
|
+
rubygems_version: 1.8.25
|
72
83
|
signing_key:
|
73
|
-
specification_version:
|
84
|
+
specification_version: 3
|
74
85
|
summary: Adds hive read, write, and run support to mobilize-hdfs
|
75
86
|
test_files:
|
76
87
|
- test/hive_job_rows.yml
|
checksums.yaml
DELETED
@@ -1,7 +0,0 @@
|
|
1
|
-
---
|
2
|
-
SHA1:
|
3
|
-
metadata.gz: a7bf2935cac4914e2530e45a969942a5ae856e1c
|
4
|
-
data.tar.gz: 4b5b751411661d78e1ce3e4c65a8e979ffe3318b
|
5
|
-
SHA512:
|
6
|
-
metadata.gz: b1a7f94de8452cb8aecdaba6e33b20dfeea208f86e046a8f9b48e2387758ef6fda9a74773775d1d7b7fe2e5631190d4958327fd747d526b4f2381c379f9a8b8d
|
7
|
-
data.tar.gz: 2a3e60b51db89a7e43ae465d9d0853f4cd875d9590b91d4b51ac7211debb7ea79c87b906a25eea1fbd8e4080ee60dac926dae3c69bf81f848d67c63b85cff407
|