mobilize-hive 1.29 → 1.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +11 -0
- data/lib/mobilize-hive/handlers/hive.rb +65 -75
- data/lib/mobilize-hive/helpers/hive_helper.rb +63 -0
- data/lib/mobilize-hive/version.rb +1 -1
- data/mobilize-hive.gemspec +1 -1
- data/test/hive_job_rows.yml +9 -1
- data/test/mobilize-hive_test.rb +17 -1
- metadata +20 -9
- checksums.yaml +0 -7
data/README.md
CHANGED
@@ -142,6 +142,17 @@ Start
|
|
142
142
|
* cluster and user are optional for all of the below.
|
143
143
|
* cluster defaults to the first cluster listed;
|
144
144
|
* user is treated the same way as in [mobilize-ssh][mobilize-ssh].
|
145
|
+
* params are also optional for all of the below. They replace HQL in sources.
|
146
|
+
* params are passed as a YML or JSON, as in:
|
147
|
+
* `hive.run source:<source_path>, params:{'date': '2013-03-01', 'unit': 'widgets'}`
|
148
|
+
* this example replaces all the keys, preceded by '@' in all source hqls with the value.
|
149
|
+
* The preceding '@' is used to keep from replacing instances
|
150
|
+
of "date" and "unit" in the HQL; you should have `@date` and `@unit` in your actual HQL
|
151
|
+
if you'd like to replace those tokens.
|
152
|
+
* in addition, the following params are substituted automatically:
|
153
|
+
* `$utc_date` - replaced with YYYY-MM-DD date, UTC
|
154
|
+
* `$utc_time` - replaced with HH:MM time, UTC
|
155
|
+
* any occurrence of these values in HQL will be replaced at runtime.
|
145
156
|
* hive.run `hql:<hql> || source:<gsheet_path>, user:<user>, cluster:<cluster>`, which executes the
|
146
157
|
script in the hql or source sheet and returns any output specified at the
|
147
158
|
end. If the cmd or last query in source is a select statement, column headers will be
|
@@ -1,56 +1,7 @@
|
|
1
1
|
module Mobilize
|
2
2
|
module Hive
|
3
|
-
|
4
|
-
|
5
|
-
end
|
6
|
-
|
7
|
-
def Hive.exec_path(cluster)
|
8
|
-
Hive.clusters[cluster]['exec_path']
|
9
|
-
end
|
10
|
-
|
11
|
-
def Hive.output_db(cluster)
|
12
|
-
Hive.clusters[cluster]['output_db']
|
13
|
-
end
|
14
|
-
|
15
|
-
def Hive.output_db_user(cluster)
|
16
|
-
output_db_node = Hadoop.gateway_node(cluster)
|
17
|
-
output_db_user = Ssh.host(output_db_node)['user']
|
18
|
-
output_db_user
|
19
|
-
end
|
20
|
-
|
21
|
-
def Hive.clusters
|
22
|
-
Hive.config['clusters']
|
23
|
-
end
|
24
|
-
|
25
|
-
def Hive.slot_ids(cluster)
|
26
|
-
(1..Hive.clusters[cluster]['max_slots']).to_a.map{|s| "#{cluster}_#{s.to_s}"}
|
27
|
-
end
|
28
|
-
|
29
|
-
def Hive.slot_worker_by_cluster_and_path(cluster,path)
|
30
|
-
working_slots = Mobilize::Resque.jobs.map{|j| begin j['args'][1]['hive_slot'];rescue;nil;end}.compact.uniq
|
31
|
-
Hive.slot_ids(cluster).each do |slot_id|
|
32
|
-
unless working_slots.include?(slot_id)
|
33
|
-
Mobilize::Resque.set_worker_args_by_path(path,{'hive_slot'=>slot_id})
|
34
|
-
return slot_id
|
35
|
-
end
|
36
|
-
end
|
37
|
-
#return false if none are available
|
38
|
-
return false
|
39
|
-
end
|
40
|
-
|
41
|
-
def Hive.unslot_worker_by_path(path)
|
42
|
-
begin
|
43
|
-
Mobilize::Resque.set_worker_args_by_path(path,{'hive_slot'=>nil})
|
44
|
-
return true
|
45
|
-
rescue
|
46
|
-
return false
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
def Hive.databases(cluster,user_name)
|
51
|
-
Hive.run(cluster,"show databases",user_name)['stdout'].split("\n")
|
52
|
-
end
|
53
|
-
|
3
|
+
#adds convenience methods
|
4
|
+
require "#{File.dirname(__FILE__)}/../helpers/hive_helper"
|
54
5
|
# converts a source path or target path to a dst in the context of handler and stage
|
55
6
|
def Hive.path_to_dst(path,stage_path,gdrive_slot)
|
56
7
|
has_handler = true if path.index("://")
|
@@ -142,12 +93,25 @@ module Mobilize
|
|
142
93
|
end
|
143
94
|
|
144
95
|
#run a generic hive command, with the option of passing a file hash to be locally available
|
145
|
-
def Hive.run(cluster,hql,user_name,file_hash=nil)
|
96
|
+
def Hive.run(cluster,hql,user_name,params=nil,file_hash=nil)
|
146
97
|
# no TempStatsStore
|
147
98
|
hql = "set hive.stats.autogather=false;#{hql}"
|
148
99
|
filename = hql.to_md5
|
149
100
|
file_hash||= {}
|
150
101
|
file_hash[filename] = hql
|
102
|
+
#add in default params
|
103
|
+
params ||= {}
|
104
|
+
params = params.merge(Hive.default_params)
|
105
|
+
#replace any params in the file_hash and command
|
106
|
+
params.each do |k,v|
|
107
|
+
file_hash.each do |name,data|
|
108
|
+
if k.starts_with?("$")
|
109
|
+
data.gsub!(k,v)
|
110
|
+
else
|
111
|
+
data.gsub!("@#{k}",v)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
151
115
|
#silent mode so we don't have logs in stderr; clip output
|
152
116
|
#at hadoop read limit
|
153
117
|
command = "#{Hive.exec_path(cluster)} -S -f #{filename} | head -c #{Hadoop.read_limit}"
|
@@ -192,7 +156,8 @@ module Mobilize
|
|
192
156
|
|
193
157
|
#check for select at end
|
194
158
|
hql_array = hql.split(";").map{|hc| hc.strip}.reject{|hc| hc.length==0}
|
195
|
-
|
159
|
+
last_statement = hql_array.last.downcase.split("\n").reject{|l| l.starts_with?("-- ")}.first
|
160
|
+
if last_statement.to_s.starts_with?("select")
|
196
161
|
#nil if no prior commands
|
197
162
|
prior_hql = hql_array[0..-2].join(";") if hql_array.length > 1
|
198
163
|
select_hql = hql_array.last
|
@@ -200,10 +165,10 @@ module Mobilize
|
|
200
165
|
"drop table if exists #{output_path}",
|
201
166
|
"create table #{output_path} as #{select_hql};"].join(";")
|
202
167
|
full_hql = [prior_hql, output_table_hql].compact.join(";")
|
203
|
-
result = Hive.run(cluster,full_hql, user_name)
|
168
|
+
result = Hive.run(cluster,full_hql, user_name,params['params'])
|
204
169
|
Dataset.find_or_create_by_url(out_url)
|
205
170
|
else
|
206
|
-
result = Hive.run(cluster, hql, user_name)
|
171
|
+
result = Hive.run(cluster, hql, user_name,params['params'])
|
207
172
|
Dataset.find_or_create_by_url(out_url)
|
208
173
|
Dataset.write_by_url(out_url,result['stdout'],user_name) if result['stdout'].to_s.length>0
|
209
174
|
end
|
@@ -244,9 +209,10 @@ module Mobilize
|
|
244
209
|
schema_hash
|
245
210
|
end
|
246
211
|
|
247
|
-
def Hive.hql_to_table(cluster, db, table, part_array, source_hql, user_name, job_name, drop=false, schema_hash=nil)
|
212
|
+
def Hive.hql_to_table(cluster, db, table, part_array, source_hql, user_name, job_name, drop=false, schema_hash=nil, params=nil)
|
248
213
|
table_path = [db,table].join(".")
|
249
214
|
table_stats = Hive.table_stats(cluster, db, table, user_name)
|
215
|
+
url = "hive://" + [cluster,db,table,part_array.compact.join("/")].join("/")
|
250
216
|
|
251
217
|
source_hql_array = source_hql.split(";")
|
252
218
|
last_select_i = source_hql_array.rindex{|hql| hql.downcase.strip.starts_with?("select")}
|
@@ -262,7 +228,8 @@ module Mobilize
|
|
262
228
|
temp_set_hql = "set mapred.job.name=#{job_name} (temp table);"
|
263
229
|
temp_drop_hql = "drop table if exists #{temp_table_path};"
|
264
230
|
temp_create_hql = "#{temp_set_hql}#{prior_hql}#{temp_drop_hql}create table #{temp_table_path} as #{last_select_hql}"
|
265
|
-
Hive.run(cluster,temp_create_hql,user_name)
|
231
|
+
response = Hive.run(cluster,temp_create_hql,user_name,params)
|
232
|
+
raise response['stderr'] if response['stderr'].to_s.ie{|s| s.index("FAILED") or s.index("KILLED")}
|
266
233
|
|
267
234
|
source_table_stats = Hive.table_stats(cluster,temp_db,temp_table_name,user_name)
|
268
235
|
source_fields = source_table_stats['field_defs']
|
@@ -300,10 +267,12 @@ module Mobilize
|
|
300
267
|
target_insert_hql,
|
301
268
|
temp_drop_hql].join
|
302
269
|
|
303
|
-
Hive.run(cluster, target_full_hql, user_name)
|
270
|
+
response = Hive.run(cluster, target_full_hql, user_name, params)
|
271
|
+
|
272
|
+
raise response['stderr'] if response['stderr'].to_s.ie{|s| s.index("FAILED") or s.index("KILLED")}
|
304
273
|
|
305
274
|
elsif part_array.length > 0 and
|
306
|
-
table_stats.ie{|tts| tts.nil? || drop || tts['partitions'].to_a.map{|p| p['name']} == part_array}
|
275
|
+
table_stats.ie{|tts| tts.nil? || drop || tts['partitions'].to_a.map{|p| p['name']}.sort == part_array.sort}
|
307
276
|
#partitions and no target table or same partitions in both target table and user params
|
308
277
|
|
309
278
|
target_headers = source_fields.map{|f| f['name']}.reject{|h| part_array.include?(h)}
|
@@ -350,9 +319,17 @@ module Mobilize
|
|
350
319
|
part_set_hql = "set hive.cli.print.header=true;set mapred.job.name=#{job_name} (permutations);"
|
351
320
|
part_select_hql = "select distinct #{target_part_stmt} from #{temp_table_path};"
|
352
321
|
part_perm_hql = part_set_hql + part_select_hql
|
353
|
-
|
322
|
+
response = Hive.run(cluster, part_perm_hql, user_name, params)
|
323
|
+
raise response['stderr'] if response['stderr'].to_s.ie{|s| s.index("FAILED") or s.index("KILLED")}
|
324
|
+
part_perm_tsv = response['stdout']
|
354
325
|
#having gotten the permutations, ensure they are dropped
|
355
326
|
part_hash_array = part_perm_tsv.tsv_to_hash_array
|
327
|
+
#make sure there is data
|
328
|
+
if part_hash_array.first.nil? or part_hash_array.first.values.include?(nil)
|
329
|
+
#blank result set, return url
|
330
|
+
return url
|
331
|
+
end
|
332
|
+
|
356
333
|
part_drop_hql = part_hash_array.map do |h|
|
357
334
|
part_drop_stmt = h.map do |name,value|
|
358
335
|
part_defs[name[1..-2]]=="string" ? "#{name}='#{value}'" : "#{name}=#{value}"
|
@@ -368,12 +345,12 @@ module Mobilize
|
|
368
345
|
|
369
346
|
target_full_hql = [target_set_hql, target_create_hql, target_insert_hql, temp_drop_hql].join
|
370
347
|
|
371
|
-
Hive.run(cluster, target_full_hql, user_name)
|
348
|
+
response = Hive.run(cluster, target_full_hql, user_name, params)
|
349
|
+
raise response['stderr'] if response['stderr'].to_s.ie{|s| s.index("FAILED") or s.index("KILLED")}
|
372
350
|
else
|
373
351
|
error_msg = "Incompatible partition specs"
|
374
352
|
raise error_msg
|
375
353
|
end
|
376
|
-
url = "hive://" + [cluster,db,table,part_array.compact.join("/")].join("/")
|
377
354
|
return url
|
378
355
|
end
|
379
356
|
|
@@ -381,6 +358,12 @@ module Mobilize
|
|
381
358
|
#Accepts options to drop existing target if any
|
382
359
|
#also schema with column datatype overrides
|
383
360
|
def Hive.tsv_to_table(cluster, db, table, part_array, source_tsv, user_name, drop=false, schema_hash=nil)
|
361
|
+
return nil if source_tsv.strip.length==0
|
362
|
+
if source_tsv.index("\r\n")
|
363
|
+
source_tsv = source_tsv.gsub("\r\n","\n")
|
364
|
+
elsif source_tsv.index("\r")
|
365
|
+
source_tsv = source_tsv.gsub("\r","\n")
|
366
|
+
end
|
384
367
|
source_headers = source_tsv.tsv_header_array
|
385
368
|
|
386
369
|
table_path = [db,table].join(".")
|
@@ -388,6 +371,8 @@ module Mobilize
|
|
388
371
|
|
389
372
|
schema_hash ||= {}
|
390
373
|
|
374
|
+
url = "hive://" + [cluster,db,table,part_array.compact.join("/")].join("/")
|
375
|
+
|
391
376
|
if part_array.length == 0 and
|
392
377
|
table_stats.ie{|tts| tts.nil? || drop || tts['partitions'].nil?}
|
393
378
|
#no partitions in either user params or the target table
|
@@ -414,10 +399,11 @@ module Mobilize
|
|
414
399
|
|
415
400
|
target_full_hql = [target_drop_hql,target_create_hql,target_insert_hql].join(";")
|
416
401
|
|
417
|
-
Hive.run(cluster, target_full_hql, user_name, file_hash)
|
402
|
+
response = Hive.run(cluster, target_full_hql, user_name, nil, file_hash)
|
403
|
+
raise response['stderr'] if response['stderr'].to_s.ie{|s| s.index("FAILED") or s.index("KILLED")}
|
418
404
|
|
419
405
|
elsif part_array.length > 0 and
|
420
|
-
table_stats.ie{|tts| tts.nil? || drop || tts['partitions'].to_a.map{|p| p['name']} == part_array}
|
406
|
+
table_stats.ie{|tts| tts.nil? || drop || tts['partitions'].to_a.map{|p| p['name']}.sort == part_array.sort}
|
421
407
|
#partitions and no target table
|
422
408
|
#or same partitions in both target table and user params
|
423
409
|
#or drop and start fresh
|
@@ -441,13 +427,17 @@ module Mobilize
|
|
441
427
|
"partitioned by #{partition_defs}"
|
442
428
|
|
443
429
|
#create target table early if not here
|
444
|
-
Hive.run(cluster, target_create_hql, user_name)
|
430
|
+
response = Hive.run(cluster, target_create_hql, user_name)
|
431
|
+
raise response['stderr'] if response['stderr'].to_s.ie{|s| s.index("FAILED") or s.index("KILLED")}
|
432
|
+
|
433
|
+
#return url (operation complete) if there's no data
|
434
|
+
source_hash_array = source_tsv.tsv_to_hash_array
|
435
|
+
return url if source_hash_array.length==1 and source_hash_array.first.values.compact.length==0
|
445
436
|
|
446
437
|
table_stats = Hive.table_stats(cluster, db, table, user_name)
|
447
438
|
|
448
439
|
#create data hash from source hash array
|
449
440
|
data_hash = {}
|
450
|
-
source_hash_array = source_tsv.tsv_to_hash_array
|
451
441
|
source_hash_array.each do |ha|
|
452
442
|
tpmk = part_array.map{|pn| "#{pn}=#{ha[pn]}"}.join("/")
|
453
443
|
tpmv = ha.reject{|k,v| part_array.include?(k)}.values.join("\001")
|
@@ -480,7 +470,8 @@ module Mobilize
|
|
480
470
|
#run actual partition adds all at once
|
481
471
|
if target_part_hql.length>0
|
482
472
|
puts "Adding partitions to #{cluster}/#{db}/#{table} for #{user_name} at #{Time.now.utc}"
|
483
|
-
Hive.run(cluster, target_part_hql, user_name)
|
473
|
+
response = Hive.run(cluster, target_part_hql, user_name)
|
474
|
+
raise response['stderr'] if response['stderr'].to_s.ie{|s| s.index("FAILED") or s.index("KILLED")}
|
484
475
|
end
|
485
476
|
else
|
486
477
|
error_msg = "Incompatible partition specs: " +
|
@@ -488,7 +479,7 @@ module Mobilize
|
|
488
479
|
"user_params:#{part_array.to_s}"
|
489
480
|
raise error_msg
|
490
481
|
end
|
491
|
-
|
482
|
+
|
492
483
|
return url
|
493
484
|
end
|
494
485
|
|
@@ -525,11 +516,11 @@ module Mobilize
|
|
525
516
|
#source table
|
526
517
|
cluster,source_path = source.path.split("/").ie{|sp| [sp.first, sp[1..-1].join(".")]}
|
527
518
|
source_hql = "select * from #{source_path};"
|
528
|
-
elsif ['gsheet','gridfs','hdfs'].include?(source.handler)
|
519
|
+
elsif ['gsheet','gfile','gridfs','hdfs'].include?(source.handler)
|
529
520
|
if source.path.ie{|sdp| sdp.index(/\.[A-Za-z]ql$/) or sdp.ends_with?(".ql")}
|
530
521
|
source_hql = source.read(user_name,gdrive_slot)
|
531
522
|
else
|
532
|
-
#tsv from sheet
|
523
|
+
#tsv from sheet or file
|
533
524
|
source_tsv = source.read(user_name,gdrive_slot)
|
534
525
|
end
|
535
526
|
end
|
@@ -554,6 +545,8 @@ module Mobilize
|
|
554
545
|
Hive.hql_to_table(cluster, db, table, part_array, source_hql, user_name, job_name, drop, schema_hash)
|
555
546
|
elsif source_tsv
|
556
547
|
Hive.tsv_to_table(cluster, db, table, part_array, source_tsv, user_name, drop, schema_hash)
|
548
|
+
elsif source
|
549
|
+
#null sheet
|
557
550
|
else
|
558
551
|
raise "Unable to determine source tsv or source hql"
|
559
552
|
end
|
@@ -580,11 +573,8 @@ module Mobilize
|
|
580
573
|
select_hql = "select * from #{source_path};"
|
581
574
|
hql = [set_hql,select_hql].join
|
582
575
|
response = Hive.run(cluster, hql,user_name)
|
583
|
-
if response['
|
584
|
-
|
585
|
-
else
|
586
|
-
raise "Unable to read hive://#{dst_path} with error: #{response['stderr']}"
|
587
|
-
end
|
576
|
+
raise "Unable to read hive://#{dst_path} with error: #{response['stderr']}" if response['stderr'].to_s.ie{|s| s.index("FAILED") or s.index("KILLED")}
|
577
|
+
return response['stdout']
|
588
578
|
end
|
589
579
|
|
590
580
|
def Hive.write_by_dataset_path(dst_path,source_tsv,user_name,*args)
|
@@ -0,0 +1,63 @@
|
|
1
|
+
module Mobilize
|
2
|
+
module Hive
|
3
|
+
def self.config
|
4
|
+
Base.config('hive')
|
5
|
+
end
|
6
|
+
|
7
|
+
def self.exec_path(cluster)
|
8
|
+
self.clusters[cluster]['exec_path']
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.output_db(cluster)
|
12
|
+
self.clusters[cluster]['output_db']
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.output_db_user(cluster)
|
16
|
+
output_db_node = Hadoop.gateway_node(cluster)
|
17
|
+
output_db_user = Ssh.host(output_db_node)['user']
|
18
|
+
output_db_user
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.clusters
|
22
|
+
self.config['clusters']
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.slot_ids(cluster)
|
26
|
+
(1..self.clusters[cluster]['max_slots']).to_a.map{|s| "#{cluster}_#{s.to_s}"}
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.slot_worker_by_cluster_and_path(cluster,path)
|
30
|
+
working_slots = Mobilize::Resque.jobs.map{|j| begin j['args'][1]['hive_slot'];rescue;nil;end}.compact.uniq
|
31
|
+
self.slot_ids(cluster).each do |slot_id|
|
32
|
+
unless working_slots.include?(slot_id)
|
33
|
+
Mobilize::Resque.set_worker_args_by_path(path,{'hive_slot'=>slot_id})
|
34
|
+
return slot_id
|
35
|
+
end
|
36
|
+
end
|
37
|
+
#return false if none are available
|
38
|
+
return false
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.unslot_worker_by_path(path)
|
42
|
+
begin
|
43
|
+
Mobilize::Resque.set_worker_args_by_path(path,{'hive_slot'=>nil})
|
44
|
+
return true
|
45
|
+
rescue
|
46
|
+
return false
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.databases(cluster,user_name)
|
51
|
+
self.run(cluster,"show databases",user_name)['stdout'].split("\n")
|
52
|
+
end
|
53
|
+
|
54
|
+
def self.default_params
|
55
|
+
time = Time.now.utc
|
56
|
+
{
|
57
|
+
'$utc_date'=>time.strftime("%Y-%m-%d"),
|
58
|
+
'$utc_time'=>time.strftime("%H:%M"),
|
59
|
+
}
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
data/mobilize-hive.gemspec
CHANGED
@@ -16,5 +16,5 @@ Gem::Specification.new do |gem|
|
|
16
16
|
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
17
17
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
18
18
|
gem.require_paths = ["lib"]
|
19
|
-
gem.add_runtime_dependency "mobilize-hdfs","1.
|
19
|
+
gem.add_runtime_dependency "mobilize-hdfs","1.31"
|
20
20
|
end
|
data/test/hive_job_rows.yml
CHANGED
@@ -20,7 +20,15 @@
|
|
20
20
|
active: true
|
21
21
|
trigger: after hive_test_2
|
22
22
|
status: ""
|
23
|
-
stage1: hive.run hql:"select
|
23
|
+
stage1: hive.run hql:"select '@date' as `date`,product,category,value from mobilize.hive_test_1;", params:{'date':'2013-01-01'}
|
24
24
|
stage2: hive.write source:"stage1",target:"mobilize/hive_test_3", partitions:"date/product", drop:true
|
25
25
|
stage3: hive.write hql:"select * from mobilize.hive_test_3;",target:"mobilize/hive_test_3", partitions:"date/product", drop:false
|
26
26
|
stage4: gsheet.write source:"hive://mobilize/hive_test_3", target:"hive_test_3.out"
|
27
|
+
- name: hive_test_4
|
28
|
+
active: true
|
29
|
+
trigger: after hive_test_3
|
30
|
+
status: ""
|
31
|
+
stage1: hive.write source:"hive_test_4_stage_1.in", target:"mobilize/hive_test_1", partitions:"act_date"
|
32
|
+
stage2: hive.write source:"hive_test_4_stage_2.in", target:"mobilize/hive_test_1", partitions:"act_date"
|
33
|
+
stage3: hive.run hql:"select '$utc_date $utc_time' as `date_time`,product,category,value from mobilize.hive_test_1;"
|
34
|
+
stage4: gsheet.write source:stage3, target:"hive_test_4.out"
|
data/test/mobilize-hive_test.rb
CHANGED
@@ -25,6 +25,18 @@ describe "Mobilize" do
|
|
25
25
|
hive_1_in_tsv = YAML.load_file("#{Mobilize::Base.root}/test/hive_test_1_in.yml").hash_array_to_tsv
|
26
26
|
hive_1_in_sheet.write(hive_1_in_tsv,Mobilize::Gdrive.owner_name)
|
27
27
|
|
28
|
+
#create blank sheet
|
29
|
+
hive_4_stage_1_in_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_4_stage_1.in",gdrive_slot)
|
30
|
+
[hive_4_stage_1_in_sheet].each {|s| s.delete if s}
|
31
|
+
hive_4_stage_1_in_sheet = Mobilize::Gsheet.find_or_create_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_4_stage_1.in",gdrive_slot)
|
32
|
+
|
33
|
+
#create sheet w just headers
|
34
|
+
hive_4_stage_2_in_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_4_stage_2.in",gdrive_slot)
|
35
|
+
[hive_4_stage_2_in_sheet].each {|s| s.delete if s}
|
36
|
+
hive_4_stage_2_in_sheet = Mobilize::Gsheet.find_or_create_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_4_stage_2.in",gdrive_slot)
|
37
|
+
hive_4_stage_2_in_sheet_header = hive_1_in_tsv.tsv_header_array.join("\t")
|
38
|
+
hive_4_stage_2_in_sheet.write(hive_4_stage_2_in_sheet_header,Mobilize::Gdrive.owner_name)
|
39
|
+
|
28
40
|
hive_1_schema_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_1.schema",gdrive_slot)
|
29
41
|
[hive_1_schema_sheet].each {|s| s.delete if s}
|
30
42
|
hive_1_schema_sheet = Mobilize::Gsheet.find_or_create_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_1.schema",gdrive_slot)
|
@@ -51,21 +63,25 @@ describe "Mobilize" do
|
|
51
63
|
[hive_2_target_sheet].each{|s| s.delete if s}
|
52
64
|
hive_3_target_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_3.out",gdrive_slot)
|
53
65
|
[hive_3_target_sheet].each{|s| s.delete if s}
|
66
|
+
hive_4_target_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_4.out",gdrive_slot)
|
67
|
+
[hive_4_target_sheet].each{|s| s.delete if s}
|
54
68
|
|
55
69
|
puts "job row added, force enqueued requestor, wait for stages"
|
56
70
|
r.enqueue!
|
57
|
-
wait_for_stages(
|
71
|
+
wait_for_stages(2100)
|
58
72
|
|
59
73
|
puts "jobtracker posted data to test sheet"
|
60
74
|
hive_1_stage_2_target_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_1_stage_2.out",gdrive_slot)
|
61
75
|
hive_1_stage_3_target_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_1_stage_3.out",gdrive_slot)
|
62
76
|
hive_2_target_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_2.out",gdrive_slot)
|
63
77
|
hive_3_target_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_3.out",gdrive_slot)
|
78
|
+
hive_4_target_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_4.out",gdrive_slot)
|
64
79
|
|
65
80
|
assert hive_1_stage_2_target_sheet.read(u.name).length == 219
|
66
81
|
assert hive_1_stage_3_target_sheet.read(u.name).length > 3
|
67
82
|
assert hive_2_target_sheet.read(u.name).length == 599
|
68
83
|
assert hive_3_target_sheet.read(u.name).length == 347
|
84
|
+
assert hive_4_target_sheet.read(u.name).length == 432
|
69
85
|
end
|
70
86
|
|
71
87
|
def wait_for_stages(time_limit=600,stage_limit=120,wait_length=10)
|
metadata
CHANGED
@@ -1,29 +1,32 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mobilize-hive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '1.
|
4
|
+
version: '1.31'
|
5
|
+
prerelease:
|
5
6
|
platform: ruby
|
6
7
|
authors:
|
7
8
|
- Cassio Paes-Leme
|
8
9
|
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
|
-
date: 2013-
|
12
|
+
date: 2013-04-18 00:00:00.000000000 Z
|
12
13
|
dependencies:
|
13
14
|
- !ruby/object:Gem::Dependency
|
14
15
|
name: mobilize-hdfs
|
15
16
|
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
16
18
|
requirements:
|
17
19
|
- - '='
|
18
20
|
- !ruby/object:Gem::Version
|
19
|
-
version: '1.
|
21
|
+
version: '1.31'
|
20
22
|
type: :runtime
|
21
23
|
prerelease: false
|
22
24
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
23
26
|
requirements:
|
24
27
|
- - '='
|
25
28
|
- !ruby/object:Gem::Version
|
26
|
-
version: '1.
|
29
|
+
version: '1.31'
|
27
30
|
description: Adds hive read, write, and run support to mobilize-hdfs
|
28
31
|
email:
|
29
32
|
- cpaesleme@dena.com
|
@@ -38,6 +41,7 @@ files:
|
|
38
41
|
- Rakefile
|
39
42
|
- lib/mobilize-hive.rb
|
40
43
|
- lib/mobilize-hive/handlers/hive.rb
|
44
|
+
- lib/mobilize-hive/helpers/hive_helper.rb
|
41
45
|
- lib/mobilize-hive/tasks.rb
|
42
46
|
- lib/mobilize-hive/version.rb
|
43
47
|
- lib/samples/hive.yml
|
@@ -51,26 +55,33 @@ files:
|
|
51
55
|
- test/test_helper.rb
|
52
56
|
homepage: http://github.com/dena/mobilize-hive
|
53
57
|
licenses: []
|
54
|
-
metadata: {}
|
55
58
|
post_install_message:
|
56
59
|
rdoc_options: []
|
57
60
|
require_paths:
|
58
61
|
- lib
|
59
62
|
required_ruby_version: !ruby/object:Gem::Requirement
|
63
|
+
none: false
|
60
64
|
requirements:
|
61
|
-
- - '>='
|
65
|
+
- - ! '>='
|
62
66
|
- !ruby/object:Gem::Version
|
63
67
|
version: '0'
|
68
|
+
segments:
|
69
|
+
- 0
|
70
|
+
hash: -4285752485316531029
|
64
71
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
72
|
+
none: false
|
65
73
|
requirements:
|
66
|
-
- - '>='
|
74
|
+
- - ! '>='
|
67
75
|
- !ruby/object:Gem::Version
|
68
76
|
version: '0'
|
77
|
+
segments:
|
78
|
+
- 0
|
79
|
+
hash: -4285752485316531029
|
69
80
|
requirements: []
|
70
81
|
rubyforge_project:
|
71
|
-
rubygems_version:
|
82
|
+
rubygems_version: 1.8.25
|
72
83
|
signing_key:
|
73
|
-
specification_version:
|
84
|
+
specification_version: 3
|
74
85
|
summary: Adds hive read, write, and run support to mobilize-hdfs
|
75
86
|
test_files:
|
76
87
|
- test/hive_job_rows.yml
|
checksums.yaml
DELETED
@@ -1,7 +0,0 @@
|
|
1
|
-
---
|
2
|
-
SHA1:
|
3
|
-
metadata.gz: a7bf2935cac4914e2530e45a969942a5ae856e1c
|
4
|
-
data.tar.gz: 4b5b751411661d78e1ce3e4c65a8e979ffe3318b
|
5
|
-
SHA512:
|
6
|
-
metadata.gz: b1a7f94de8452cb8aecdaba6e33b20dfeea208f86e046a8f9b48e2387758ef6fda9a74773775d1d7b7fe2e5631190d4958327fd747d526b4f2381c379f9a8b8d
|
7
|
-
data.tar.gz: 2a3e60b51db89a7e43ae465d9d0853f4cd875d9590b91d4b51ac7211debb7ea79c87b906a25eea1fbd8e4080ee60dac926dae3c69bf81f848d67c63b85cff407
|