mobilize-hive 1.376 → 1.377

Sign up to get free protection for your applications and to get access to all the features.
@@ -211,7 +211,7 @@ module Mobilize
211
211
  schema_hash
212
212
  end
213
213
 
214
- def Hive.hql_to_table(cluster, db, table, part_array, source_hql, user_name, stage_path, drop=false, schema_hash=nil, run_params=nil)
214
+ def Hive.hql_to_table(cluster, db, table, part_array, source_hql, user_name, stage_path, drop=false, schema_hash=nil, run_params=nil,compress=false)
215
215
  job_name = stage_path.sub("Runner_","")
216
216
  table_path = [db,table].join(".")
217
217
  table_stats = Hive.table_stats(cluster, db, table, user_name)
@@ -261,6 +261,12 @@ module Mobilize
261
261
  #always drop when no partititons
262
262
  target_name_hql = "set mapred.job.name=#{job_name};"
263
263
 
264
+ if compress
265
+ target_name_hql = target_name_hql+["set hive.exec.compress.output=true;",
266
+ "set mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec;",
267
+ "set mapred.output.compression.type=BLOCK;"].join
268
+ end
269
+
264
270
  target_drop_hql = "drop table if exists #{table_path};"
265
271
 
266
272
  target_create_hql = "create table if not exists #{table_path} #{field_def_stmt};"
@@ -273,6 +279,10 @@ module Mobilize
273
279
  target_insert_hql,
274
280
  temp_drop_hql].join
275
281
 
282
+
283
+ puts "FULL HQL QUERY: " + target_full_hql
284
+
285
+
276
286
  response = Hive.run(cluster, target_full_hql, user_name, run_params)
277
287
 
278
288
  raise response['stderr'] if response['stderr'].to_s.ie{|s| s.index("FAILED") or s.index("KILLED")}
@@ -313,6 +323,11 @@ module Mobilize
313
323
  "set hive.exec.dynamic.partition=true;",
314
324
  "set hive.exec.max.created.files = 200000;",
315
325
  "set hive.max.created.files = 200000;"].join
326
+ if compress
327
+ target_set_hql = target_set_hql+["set hive.exec.compress.output=true;",
328
+ "set mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec;",
329
+ "set mapred.output.compression.type=BLOCK;"].join
330
+ end
316
331
 
317
332
  if drop or table_stats.nil?
318
333
  target_drop_hql = "drop table if exists #{table_path};"
@@ -352,6 +367,8 @@ module Mobilize
352
367
 
353
368
  target_full_hql = [target_set_hql, target_create_hql, target_insert_hql, temp_drop_hql].join
354
369
 
370
+ puts "FULL HQL QUERY: " + target_full_hql
371
+
355
372
  response = Hive.run(cluster, target_full_hql, user_name, run_params)
356
373
  raise response['stderr'] if response['stderr'].to_s.ie{|s| s.index("FAILED") or s.index("KILLED")}
357
374
  else
@@ -426,6 +443,7 @@ module Mobilize
426
443
  end
427
444
  #drop target before create/insert?
428
445
  drop = params['drop']
446
+ compress = params['compress']
429
447
 
430
448
  #determine source
431
449
  source_tsv,source_hql = [nil]*2
@@ -464,7 +482,7 @@ module Mobilize
464
482
  url = if source_hql
465
483
  #include any params (or nil) at the end
466
484
  run_params = params['params']
467
- Hive.hql_to_table(cluster, db, table, part_array, source_hql, user_name, stage_path,drop, schema_hash,run_params)
485
+ Hive.hql_to_table(cluster, db, table, part_array, source_hql, user_name, stage_path,drop, schema_hash,run_params,compress)
468
486
  elsif source_tsv
469
487
  #first write tsv to temp table
470
488
  temp_table_path = "#{Hive.output_db(cluster)}.temptsv_#{job_name.downcase.alphanunderscore}"
@@ -472,7 +490,7 @@ module Mobilize
472
490
  if has_data
473
491
  #then do the regular insert, with source hql being select * from temp table
474
492
  source_hql = "select * from #{temp_table_path}"
475
- Hive.hql_to_table(cluster, db, table, part_array, source_hql, user_name, stage_path, drop, schema_hash)
493
+ Hive.hql_to_table(cluster, db, table, part_array, source_hql, user_name, stage_path, drop, schema_hash,nil,compress)
476
494
  else
477
495
  nil
478
496
  end
@@ -1,5 +1,5 @@
1
1
  module Mobilize
2
2
  module Hive
3
- VERSION = "1.376"
3
+ VERSION = "1.377"
4
4
  end
5
5
  end
@@ -17,5 +17,5 @@ Gem::Specification.new do |gem|
17
17
  gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
18
18
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
19
19
  gem.require_paths = ["lib"]
20
- gem.add_runtime_dependency "mobilize-hdfs","1.376"
20
+ gem.add_runtime_dependency "mobilize-hdfs","1.377"
21
21
  end
@@ -0,0 +1,9 @@
1
+ ---
2
+ - act_date: 2013-01-01
3
+ product: product1
4
+ category: category1
5
+ value: 7
6
+ - act_date: 2013-01-02
7
+ product: product1
8
+ category: category1
9
+ value: 8
@@ -67,3 +67,23 @@
67
67
  state: working
68
68
  count: 1
69
69
  confirmed_ats: []
70
+ - path: "Runner_mobilize(test)/jobs/hive5/stage1"
71
+ state: working
72
+ count: 1
73
+ confirmed_ats: []
74
+ - path: "Runner_mobilize(test)/jobs/hive5/stage2"
75
+ state: working
76
+ count: 1
77
+ confirmed_ats: []
78
+ - path: "Runner_mobilize(test)/jobs/hive5/stage3"
79
+ state: working
80
+ count: 1
81
+ confirmed_ats: []
82
+ - path: "Runner_mobilize(test)/jobs/hive5/stage4"
83
+ state: working
84
+ count: 1
85
+ confirmed_ats: []
86
+ - path: "Runner_mobilize(test)/jobs/hive5/stage5"
87
+ state: working
88
+ count: 1
89
+ confirmed_ats: []
@@ -13,7 +13,7 @@
13
13
  active: true
14
14
  trigger: after hive1
15
15
  status: ""
16
- stage1: hive.write save_logs:true, retries:3, source:"hdfs://user/mobilize/test/hdfs1.out", target:"mobilize.hive2", drop:true
16
+ stage1: hive.write save_logs:true, retries:3, source:"hdfs://user/mobilize/test/hdfs1.out", target:"mobilize.hive2", drop:true, compress:true
17
17
  stage2: hive.run save_logs:true, retries:3, hql:"select * from mobilize.hive2;"
18
18
  stage3: gsheet.write source:"stage2", target:"hive2.out"
19
19
  - name: hive3
@@ -22,7 +22,7 @@
22
22
  status: ""
23
23
  stage1: hive.run save_logs:true, retries:3, hql:"select '@date' as `date`,product,category,value from mobilize.hive1;", params:{'date':'2013-01-01'}
24
24
  stage2: hive.write save_logs:true, retries:3, source:"stage1",target:"mobilize/hive3", partitions:"date/product", drop:true
25
- stage3: hive.write save_logs:true, retries:3, hql:"select * from mobilize.hive3;",target:"mobilize/hive3", partitions:"date/product", drop:false
25
+ stage3: hive.write save_logs:true, retries:3, hql:"select * from mobilize.hive3;",target:"mobilize/hive3", partitions:"date/product", drop:false, compress:true
26
26
  stage4: gsheet.write source:"hive://mobilize/hive3", target:"hive3.out"
27
27
  - name: hive4
28
28
  active: true
@@ -32,3 +32,12 @@
32
32
  stage2: hive.write save_logs:true, retries:3, source:"hive4_stage2.in", target:"mobilize/hive1", partitions:"act_date"
33
33
  stage3: hive.run save_logs:true, retries:3, hql:"select '@date $utc_time' as `date_time`,product,category,value from mobilize.hive1;", params:{'date':'$utc_date'}
34
34
  stage4: gsheet.write source:stage3, target:"hive4.out"
35
+ - name: hive5
36
+ active: true
37
+ trigger: after hive4
38
+ status: ""
39
+ stage1: hive.write save_logs:true, retries:3, source:"hive5.in", target:"mobilize/hive5_in", drop:true
40
+ stage2: hive.write save_logs:true, retries:3, hql:"select act_date,product,category,value from mobilize.hive5_in where act_date='@date'", target:"mobilize/hive5", partitions:"act_date", params:{'date':'2013-01-01'}, drop:true
41
+ stage3: hive.write save_logs:true, retries:3, hql:"select act_date,product,category,value from mobilize.hive5_in where act_date='@date'", target:"mobilize/hive5", partitions:"act_date", params:{'date':'2013-01-02'}, compress:true
42
+ stage4: hive.run save_logs:true, retries:3, hql:"select act_date,product,category,value from mobilize.hive5;"
43
+ stage5: gsheet.write source:stage4, target:"hive5.out"
@@ -20,7 +20,7 @@ describe "Mobilize" do
20
20
  assert Mobilize::Jobtracker.workers.length == Mobilize::Resque.config['max_workers'].to_i
21
21
 
22
22
  puts "add test data"
23
- ["hive1.in","hive4_stage1.in","hive4_stage2.in","hive1.schema","hive1.sql"].each do |fixture_name|
23
+ ["hive1.in","hive4_stage1.in","hive4_stage2.in","hive1.schema","hive1.sql", "hive5.in"].each do |fixture_name|
24
24
  target_url = "gsheet://#{r.title}/#{fixture_name}"
25
25
  TestHelper.write_fixture(fixture_name, target_url, 'replace')
26
26
  end
@@ -47,5 +47,6 @@ describe "Mobilize" do
47
47
  assert TestHelper.check_output("gsheet://#{r.title}/hive2.out", 'min_length' => 599) == true
48
48
  assert TestHelper.check_output("gsheet://#{r.title}/hive3.out", 'min_length' => 347) == true
49
49
  assert TestHelper.check_output("gsheet://#{r.title}/hive4.out", 'min_length' => 432) == true
50
+ assert TestHelper.check_output("gsheet://#{r.title}/hive5.out", 'min_length' => 500) == true
50
51
  end
51
52
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mobilize-hive
3
3
  version: !ruby/object:Gem::Version
4
- version: '1.376'
4
+ version: '1.377'
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2013-08-21 00:00:00.000000000 Z
13
+ date: 2013-08-28 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: mobilize-hdfs
@@ -19,7 +19,7 @@ dependencies:
19
19
  requirements:
20
20
  - - '='
21
21
  - !ruby/object:Gem::Version
22
- version: '1.376'
22
+ version: '1.377'
23
23
  type: :runtime
24
24
  prerelease: false
25
25
  version_requirements: !ruby/object:Gem::Requirement
@@ -27,7 +27,7 @@ dependencies:
27
27
  requirements:
28
28
  - - '='
29
29
  - !ruby/object:Gem::Version
30
- version: '1.376'
30
+ version: '1.377'
31
31
  description: Adds hive read, write, and run support to mobilize-hdfs
32
32
  email:
33
33
  - cpaesleme@dena.com
@@ -54,6 +54,7 @@ files:
54
54
  - test/fixtures/hive1.sql
55
55
  - test/fixtures/hive4_stage1.in
56
56
  - test/fixtures/hive4_stage2.in.yml
57
+ - test/fixtures/hive5.in.yml
57
58
  - test/fixtures/integration_expected.yml
58
59
  - test/fixtures/integration_jobs.yml
59
60
  - test/integration/mobilize-hive_test.rb
@@ -91,6 +92,7 @@ test_files:
91
92
  - test/fixtures/hive1.sql
92
93
  - test/fixtures/hive4_stage1.in
93
94
  - test/fixtures/hive4_stage2.in.yml
95
+ - test/fixtures/hive5.in.yml
94
96
  - test/fixtures/integration_expected.yml
95
97
  - test/fixtures/integration_jobs.yml
96
98
  - test/integration/mobilize-hive_test.rb