mobilize-hive 1.376 → 1.377
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/mobilize-hive/handlers/hive.rb +21 -3
- data/lib/mobilize-hive/version.rb +1 -1
- data/mobilize-hive.gemspec +1 -1
- data/test/fixtures/hive5.in.yml +9 -0
- data/test/fixtures/integration_expected.yml +20 -0
- data/test/fixtures/integration_jobs.yml +11 -2
- data/test/integration/mobilize-hive_test.rb +2 -1
- metadata +6 -4
@@ -211,7 +211,7 @@ module Mobilize
|
|
211
211
|
schema_hash
|
212
212
|
end
|
213
213
|
|
214
|
-
def Hive.hql_to_table(cluster, db, table, part_array, source_hql, user_name, stage_path, drop=false, schema_hash=nil, run_params=nil)
|
214
|
+
def Hive.hql_to_table(cluster, db, table, part_array, source_hql, user_name, stage_path, drop=false, schema_hash=nil, run_params=nil,compress=false)
|
215
215
|
job_name = stage_path.sub("Runner_","")
|
216
216
|
table_path = [db,table].join(".")
|
217
217
|
table_stats = Hive.table_stats(cluster, db, table, user_name)
|
@@ -261,6 +261,12 @@ module Mobilize
|
|
261
261
|
#always drop when no partititons
|
262
262
|
target_name_hql = "set mapred.job.name=#{job_name};"
|
263
263
|
|
264
|
+
if compress
|
265
|
+
target_name_hql = target_name_hql+["set hive.exec.compress.output=true;",
|
266
|
+
"set mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec;",
|
267
|
+
"set mapred.output.compression.type=BLOCK;"].join
|
268
|
+
end
|
269
|
+
|
264
270
|
target_drop_hql = "drop table if exists #{table_path};"
|
265
271
|
|
266
272
|
target_create_hql = "create table if not exists #{table_path} #{field_def_stmt};"
|
@@ -273,6 +279,10 @@ module Mobilize
|
|
273
279
|
target_insert_hql,
|
274
280
|
temp_drop_hql].join
|
275
281
|
|
282
|
+
|
283
|
+
puts "FULL HQL QUERY: " + target_full_hql
|
284
|
+
|
285
|
+
|
276
286
|
response = Hive.run(cluster, target_full_hql, user_name, run_params)
|
277
287
|
|
278
288
|
raise response['stderr'] if response['stderr'].to_s.ie{|s| s.index("FAILED") or s.index("KILLED")}
|
@@ -313,6 +323,11 @@ module Mobilize
|
|
313
323
|
"set hive.exec.dynamic.partition=true;",
|
314
324
|
"set hive.exec.max.created.files = 200000;",
|
315
325
|
"set hive.max.created.files = 200000;"].join
|
326
|
+
if compress
|
327
|
+
target_set_hql = target_set_hql+["set hive.exec.compress.output=true;",
|
328
|
+
"set mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec;",
|
329
|
+
"set mapred.output.compression.type=BLOCK;"].join
|
330
|
+
end
|
316
331
|
|
317
332
|
if drop or table_stats.nil?
|
318
333
|
target_drop_hql = "drop table if exists #{table_path};"
|
@@ -352,6 +367,8 @@ module Mobilize
|
|
352
367
|
|
353
368
|
target_full_hql = [target_set_hql, target_create_hql, target_insert_hql, temp_drop_hql].join
|
354
369
|
|
370
|
+
puts "FULL HQL QUERY: " + target_full_hql
|
371
|
+
|
355
372
|
response = Hive.run(cluster, target_full_hql, user_name, run_params)
|
356
373
|
raise response['stderr'] if response['stderr'].to_s.ie{|s| s.index("FAILED") or s.index("KILLED")}
|
357
374
|
else
|
@@ -426,6 +443,7 @@ module Mobilize
|
|
426
443
|
end
|
427
444
|
#drop target before create/insert?
|
428
445
|
drop = params['drop']
|
446
|
+
compress = params['compress']
|
429
447
|
|
430
448
|
#determine source
|
431
449
|
source_tsv,source_hql = [nil]*2
|
@@ -464,7 +482,7 @@ module Mobilize
|
|
464
482
|
url = if source_hql
|
465
483
|
#include any params (or nil) at the end
|
466
484
|
run_params = params['params']
|
467
|
-
Hive.hql_to_table(cluster, db, table, part_array, source_hql, user_name, stage_path,drop, schema_hash,run_params)
|
485
|
+
Hive.hql_to_table(cluster, db, table, part_array, source_hql, user_name, stage_path,drop, schema_hash,run_params,compress)
|
468
486
|
elsif source_tsv
|
469
487
|
#first write tsv to temp table
|
470
488
|
temp_table_path = "#{Hive.output_db(cluster)}.temptsv_#{job_name.downcase.alphanunderscore}"
|
@@ -472,7 +490,7 @@ module Mobilize
|
|
472
490
|
if has_data
|
473
491
|
#then do the regular insert, with source hql being select * from temp table
|
474
492
|
source_hql = "select * from #{temp_table_path}"
|
475
|
-
Hive.hql_to_table(cluster, db, table, part_array, source_hql, user_name, stage_path, drop, schema_hash)
|
493
|
+
Hive.hql_to_table(cluster, db, table, part_array, source_hql, user_name, stage_path, drop, schema_hash,nil,compress)
|
476
494
|
else
|
477
495
|
nil
|
478
496
|
end
|
data/mobilize-hive.gemspec
CHANGED
@@ -17,5 +17,5 @@ Gem::Specification.new do |gem|
|
|
17
17
|
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
18
18
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
19
19
|
gem.require_paths = ["lib"]
|
20
|
-
gem.add_runtime_dependency "mobilize-hdfs","1.
|
20
|
+
gem.add_runtime_dependency "mobilize-hdfs","1.377"
|
21
21
|
end
|
@@ -67,3 +67,23 @@
|
|
67
67
|
state: working
|
68
68
|
count: 1
|
69
69
|
confirmed_ats: []
|
70
|
+
- path: "Runner_mobilize(test)/jobs/hive5/stage1"
|
71
|
+
state: working
|
72
|
+
count: 1
|
73
|
+
confirmed_ats: []
|
74
|
+
- path: "Runner_mobilize(test)/jobs/hive5/stage2"
|
75
|
+
state: working
|
76
|
+
count: 1
|
77
|
+
confirmed_ats: []
|
78
|
+
- path: "Runner_mobilize(test)/jobs/hive5/stage3"
|
79
|
+
state: working
|
80
|
+
count: 1
|
81
|
+
confirmed_ats: []
|
82
|
+
- path: "Runner_mobilize(test)/jobs/hive5/stage4"
|
83
|
+
state: working
|
84
|
+
count: 1
|
85
|
+
confirmed_ats: []
|
86
|
+
- path: "Runner_mobilize(test)/jobs/hive5/stage5"
|
87
|
+
state: working
|
88
|
+
count: 1
|
89
|
+
confirmed_ats: []
|
@@ -13,7 +13,7 @@
|
|
13
13
|
active: true
|
14
14
|
trigger: after hive1
|
15
15
|
status: ""
|
16
|
-
stage1: hive.write save_logs:true, retries:3, source:"hdfs://user/mobilize/test/hdfs1.out", target:"mobilize.hive2", drop:true
|
16
|
+
stage1: hive.write save_logs:true, retries:3, source:"hdfs://user/mobilize/test/hdfs1.out", target:"mobilize.hive2", drop:true, compress:true
|
17
17
|
stage2: hive.run save_logs:true, retries:3, hql:"select * from mobilize.hive2;"
|
18
18
|
stage3: gsheet.write source:"stage2", target:"hive2.out"
|
19
19
|
- name: hive3
|
@@ -22,7 +22,7 @@
|
|
22
22
|
status: ""
|
23
23
|
stage1: hive.run save_logs:true, retries:3, hql:"select '@date' as `date`,product,category,value from mobilize.hive1;", params:{'date':'2013-01-01'}
|
24
24
|
stage2: hive.write save_logs:true, retries:3, source:"stage1",target:"mobilize/hive3", partitions:"date/product", drop:true
|
25
|
-
stage3: hive.write save_logs:true, retries:3, hql:"select * from mobilize.hive3;",target:"mobilize/hive3", partitions:"date/product", drop:false
|
25
|
+
stage3: hive.write save_logs:true, retries:3, hql:"select * from mobilize.hive3;",target:"mobilize/hive3", partitions:"date/product", drop:false, compress:true
|
26
26
|
stage4: gsheet.write source:"hive://mobilize/hive3", target:"hive3.out"
|
27
27
|
- name: hive4
|
28
28
|
active: true
|
@@ -32,3 +32,12 @@
|
|
32
32
|
stage2: hive.write save_logs:true, retries:3, source:"hive4_stage2.in", target:"mobilize/hive1", partitions:"act_date"
|
33
33
|
stage3: hive.run save_logs:true, retries:3, hql:"select '@date $utc_time' as `date_time`,product,category,value from mobilize.hive1;", params:{'date':'$utc_date'}
|
34
34
|
stage4: gsheet.write source:stage3, target:"hive4.out"
|
35
|
+
- name: hive5
|
36
|
+
active: true
|
37
|
+
trigger: after hive4
|
38
|
+
status: ""
|
39
|
+
stage1: hive.write save_logs:true, retries:3, source:"hive5.in", target:"mobilize/hive5_in", drop:true
|
40
|
+
stage2: hive.write save_logs:true, retries:3, hql:"select act_date,product,category,value from mobilize.hive5_in where act_date='@date'", target:"mobilize/hive5", partitions:"act_date", params:{'date':'2013-01-01'}, drop:true
|
41
|
+
stage3: hive.write save_logs:true, retries:3, hql:"select act_date,product,category,value from mobilize.hive5_in where act_date='@date'", target:"mobilize/hive5", partitions:"act_date", params:{'date':'2013-01-02'}, compress:true
|
42
|
+
stage4: hive.run save_logs:true, retries:3, hql:"select act_date,product,category,value from mobilize.hive5;"
|
43
|
+
stage5: gsheet.write source:stage4, target:"hive5.out"
|
@@ -20,7 +20,7 @@ describe "Mobilize" do
|
|
20
20
|
assert Mobilize::Jobtracker.workers.length == Mobilize::Resque.config['max_workers'].to_i
|
21
21
|
|
22
22
|
puts "add test data"
|
23
|
-
["hive1.in","hive4_stage1.in","hive4_stage2.in","hive1.schema","hive1.sql"].each do |fixture_name|
|
23
|
+
["hive1.in","hive4_stage1.in","hive4_stage2.in","hive1.schema","hive1.sql", "hive5.in"].each do |fixture_name|
|
24
24
|
target_url = "gsheet://#{r.title}/#{fixture_name}"
|
25
25
|
TestHelper.write_fixture(fixture_name, target_url, 'replace')
|
26
26
|
end
|
@@ -47,5 +47,6 @@ describe "Mobilize" do
|
|
47
47
|
assert TestHelper.check_output("gsheet://#{r.title}/hive2.out", 'min_length' => 599) == true
|
48
48
|
assert TestHelper.check_output("gsheet://#{r.title}/hive3.out", 'min_length' => 347) == true
|
49
49
|
assert TestHelper.check_output("gsheet://#{r.title}/hive4.out", 'min_length' => 432) == true
|
50
|
+
assert TestHelper.check_output("gsheet://#{r.title}/hive5.out", 'min_length' => 500) == true
|
50
51
|
end
|
51
52
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mobilize-hive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '1.
|
4
|
+
version: '1.377'
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2013-08-
|
13
|
+
date: 2013-08-28 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: mobilize-hdfs
|
@@ -19,7 +19,7 @@ dependencies:
|
|
19
19
|
requirements:
|
20
20
|
- - '='
|
21
21
|
- !ruby/object:Gem::Version
|
22
|
-
version: '1.
|
22
|
+
version: '1.377'
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
25
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -27,7 +27,7 @@ dependencies:
|
|
27
27
|
requirements:
|
28
28
|
- - '='
|
29
29
|
- !ruby/object:Gem::Version
|
30
|
-
version: '1.
|
30
|
+
version: '1.377'
|
31
31
|
description: Adds hive read, write, and run support to mobilize-hdfs
|
32
32
|
email:
|
33
33
|
- cpaesleme@dena.com
|
@@ -54,6 +54,7 @@ files:
|
|
54
54
|
- test/fixtures/hive1.sql
|
55
55
|
- test/fixtures/hive4_stage1.in
|
56
56
|
- test/fixtures/hive4_stage2.in.yml
|
57
|
+
- test/fixtures/hive5.in.yml
|
57
58
|
- test/fixtures/integration_expected.yml
|
58
59
|
- test/fixtures/integration_jobs.yml
|
59
60
|
- test/integration/mobilize-hive_test.rb
|
@@ -91,6 +92,7 @@ test_files:
|
|
91
92
|
- test/fixtures/hive1.sql
|
92
93
|
- test/fixtures/hive4_stage1.in
|
93
94
|
- test/fixtures/hive4_stage2.in.yml
|
95
|
+
- test/fixtures/hive5.in.yml
|
94
96
|
- test/fixtures/integration_expected.yml
|
95
97
|
- test/fixtures/integration_jobs.yml
|
96
98
|
- test/integration/mobilize-hive_test.rb
|