mobilize-hive 1.292 → 1.294
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/mobilize-hive/handlers/hive.rb +11 -4
- data/lib/mobilize-hive/version.rb +1 -1
- data/mobilize-hive.gemspec +1 -1
- data/test/hive_job_rows.yml +6 -0
- data/test/mobilize-hive_test.rb +13 -1
- metadata +19 -9
- checksums.yaml +0 -7
@@ -306,7 +306,7 @@ module Mobilize
|
|
306
306
|
raise response['stderr'] if response['stderr'].to_s.ie{|s| s.index("FAILED") or s.index("KILLED")}
|
307
307
|
|
308
308
|
elsif part_array.length > 0 and
|
309
|
-
table_stats.ie{|tts| tts.nil? || drop || tts['partitions'].to_a.map{|p| p['name']} == part_array}
|
309
|
+
table_stats.ie{|tts| tts.nil? || drop || tts['partitions'].to_a.map{|p| p['name']}.sort == part_array.sort}
|
310
310
|
#partitions and no target table or same partitions in both target table and user params
|
311
311
|
|
312
312
|
target_headers = source_fields.map{|f| f['name']}.reject{|h| part_array.include?(h)}
|
@@ -387,6 +387,7 @@ module Mobilize
|
|
387
387
|
#Accepts options to drop existing target if any
|
388
388
|
#also schema with column datatype overrides
|
389
389
|
def Hive.tsv_to_table(cluster, db, table, part_array, source_tsv, user_name, drop=false, schema_hash=nil)
|
390
|
+
return nil if source_tsv.strip.length==0
|
390
391
|
source_headers = source_tsv.tsv_header_array
|
391
392
|
|
392
393
|
table_path = [db,table].join(".")
|
@@ -394,6 +395,8 @@ module Mobilize
|
|
394
395
|
|
395
396
|
schema_hash ||= {}
|
396
397
|
|
398
|
+
url = "hive://" + [cluster,db,table,part_array.compact.join("/")].join("/")
|
399
|
+
|
397
400
|
if part_array.length == 0 and
|
398
401
|
table_stats.ie{|tts| tts.nil? || drop || tts['partitions'].nil?}
|
399
402
|
#no partitions in either user params or the target table
|
@@ -424,7 +427,7 @@ module Mobilize
|
|
424
427
|
raise response['stderr'] if response['stderr'].to_s.ie{|s| s.index("FAILED") or s.index("KILLED")}
|
425
428
|
|
426
429
|
elsif part_array.length > 0 and
|
427
|
-
table_stats.ie{|tts| tts.nil? || drop || tts['partitions'].to_a.map{|p| p['name']} == part_array}
|
430
|
+
table_stats.ie{|tts| tts.nil? || drop || tts['partitions'].to_a.map{|p| p['name']}.sort == part_array.sort}
|
428
431
|
#partitions and no target table
|
429
432
|
#or same partitions in both target table and user params
|
430
433
|
#or drop and start fresh
|
@@ -451,11 +454,14 @@ module Mobilize
|
|
451
454
|
response = Hive.run(cluster, target_create_hql, user_name)
|
452
455
|
raise response['stderr'] if response['stderr'].to_s.ie{|s| s.index("FAILED") or s.index("KILLED")}
|
453
456
|
|
457
|
+
#return url (operation complete) if there's no data
|
458
|
+
source_hash_array = source_tsv.tsv_to_hash_array
|
459
|
+
return url if source_hash_array.length==1 and source_hash_array.first.values.compact.length==0
|
460
|
+
|
454
461
|
table_stats = Hive.table_stats(cluster, db, table, user_name)
|
455
462
|
|
456
463
|
#create data hash from source hash array
|
457
464
|
data_hash = {}
|
458
|
-
source_hash_array = source_tsv.tsv_to_hash_array
|
459
465
|
source_hash_array.each do |ha|
|
460
466
|
tpmk = part_array.map{|pn| "#{pn}=#{ha[pn]}"}.join("/")
|
461
467
|
tpmv = ha.reject{|k,v| part_array.include?(k)}.values.join("\001")
|
@@ -498,7 +504,6 @@ module Mobilize
|
|
498
504
|
raise error_msg
|
499
505
|
end
|
500
506
|
|
501
|
-
url = "hive://" + [cluster,db,table,part_array.compact.join("/")].join("/")
|
502
507
|
return url
|
503
508
|
end
|
504
509
|
|
@@ -564,6 +569,8 @@ module Mobilize
|
|
564
569
|
Hive.hql_to_table(cluster, db, table, part_array, source_hql, user_name, job_name, drop, schema_hash)
|
565
570
|
elsif source_tsv
|
566
571
|
Hive.tsv_to_table(cluster, db, table, part_array, source_tsv, user_name, drop, schema_hash)
|
572
|
+
elsif source
|
573
|
+
#null sheet
|
567
574
|
else
|
568
575
|
raise "Unable to determine source tsv or source hql"
|
569
576
|
end
|
data/mobilize-hive.gemspec
CHANGED
@@ -16,5 +16,5 @@ Gem::Specification.new do |gem|
|
|
16
16
|
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
17
17
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
18
18
|
gem.require_paths = ["lib"]
|
19
|
-
gem.add_runtime_dependency "mobilize-hdfs","1.
|
19
|
+
gem.add_runtime_dependency "mobilize-hdfs","1.294"
|
20
20
|
end
|
data/test/hive_job_rows.yml
CHANGED
@@ -24,3 +24,9 @@
|
|
24
24
|
stage2: hive.write source:"stage1",target:"mobilize/hive_test_3", partitions:"date/product", drop:true
|
25
25
|
stage3: hive.write hql:"select * from mobilize.hive_test_3;",target:"mobilize/hive_test_3", partitions:"date/product", drop:false
|
26
26
|
stage4: gsheet.write source:"hive://mobilize/hive_test_3", target:"hive_test_3.out"
|
27
|
+
- name: hive_test_4
|
28
|
+
active: true
|
29
|
+
trigger: after hive_test_2
|
30
|
+
status: ""
|
31
|
+
stage1: hive.write source:"hive_test_4_stage_1.in", target:"mobilize/hive_test_1", partitions:"act_date"
|
32
|
+
stage2: hive.write source:"hive_test_4_stage_2.in", target:"mobilize/hive_test_1", partitions:"act_date"
|
data/test/mobilize-hive_test.rb
CHANGED
@@ -25,6 +25,18 @@ describe "Mobilize" do
|
|
25
25
|
hive_1_in_tsv = YAML.load_file("#{Mobilize::Base.root}/test/hive_test_1_in.yml").hash_array_to_tsv
|
26
26
|
hive_1_in_sheet.write(hive_1_in_tsv,Mobilize::Gdrive.owner_name)
|
27
27
|
|
28
|
+
#create blank sheet
|
29
|
+
hive_4_stage_1_in_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_4_stage_1.in",gdrive_slot)
|
30
|
+
[hive_4_stage_1_in_sheet].each {|s| s.delete if s}
|
31
|
+
hive_4_stage_1_in_sheet = Mobilize::Gsheet.find_or_create_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_4_stage_1.in",gdrive_slot)
|
32
|
+
|
33
|
+
#create sheet w just headers
|
34
|
+
hive_4_stage_2_in_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_4_stage_2.in",gdrive_slot)
|
35
|
+
[hive_4_stage_2_in_sheet].each {|s| s.delete if s}
|
36
|
+
hive_4_stage_2_in_sheet = Mobilize::Gsheet.find_or_create_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_4_stage_2.in",gdrive_slot)
|
37
|
+
hive_4_stage_2_in_sheet_header = hive_1_in_tsv.tsv_header_array.join("\t")
|
38
|
+
hive_4_stage_2_in_sheet.write(hive_4_stage_2_in_sheet_header,Mobilize::Gdrive.owner_name)
|
39
|
+
|
28
40
|
hive_1_schema_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_1.schema",gdrive_slot)
|
29
41
|
[hive_1_schema_sheet].each {|s| s.delete if s}
|
30
42
|
hive_1_schema_sheet = Mobilize::Gsheet.find_or_create_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_1.schema",gdrive_slot)
|
@@ -54,7 +66,7 @@ describe "Mobilize" do
|
|
54
66
|
|
55
67
|
puts "job row added, force enqueued requestor, wait for stages"
|
56
68
|
r.enqueue!
|
57
|
-
wait_for_stages(
|
69
|
+
wait_for_stages(2100)
|
58
70
|
|
59
71
|
puts "jobtracker posted data to test sheet"
|
60
72
|
hive_1_stage_2_target_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_1_stage_2.out",gdrive_slot)
|
metadata
CHANGED
@@ -1,29 +1,32 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mobilize-hive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '1.
|
4
|
+
version: '1.294'
|
5
|
+
prerelease:
|
5
6
|
platform: ruby
|
6
7
|
authors:
|
7
8
|
- Cassio Paes-Leme
|
8
9
|
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
|
-
date: 2013-
|
12
|
+
date: 2013-04-01 00:00:00.000000000 Z
|
12
13
|
dependencies:
|
13
14
|
- !ruby/object:Gem::Dependency
|
14
15
|
name: mobilize-hdfs
|
15
16
|
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
16
18
|
requirements:
|
17
19
|
- - '='
|
18
20
|
- !ruby/object:Gem::Version
|
19
|
-
version: '1.
|
21
|
+
version: '1.294'
|
20
22
|
type: :runtime
|
21
23
|
prerelease: false
|
22
24
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
23
26
|
requirements:
|
24
27
|
- - '='
|
25
28
|
- !ruby/object:Gem::Version
|
26
|
-
version: '1.
|
29
|
+
version: '1.294'
|
27
30
|
description: Adds hive read, write, and run support to mobilize-hdfs
|
28
31
|
email:
|
29
32
|
- cpaesleme@dena.com
|
@@ -51,26 +54,33 @@ files:
|
|
51
54
|
- test/test_helper.rb
|
52
55
|
homepage: http://github.com/dena/mobilize-hive
|
53
56
|
licenses: []
|
54
|
-
metadata: {}
|
55
57
|
post_install_message:
|
56
58
|
rdoc_options: []
|
57
59
|
require_paths:
|
58
60
|
- lib
|
59
61
|
required_ruby_version: !ruby/object:Gem::Requirement
|
62
|
+
none: false
|
60
63
|
requirements:
|
61
|
-
- - '>='
|
64
|
+
- - ! '>='
|
62
65
|
- !ruby/object:Gem::Version
|
63
66
|
version: '0'
|
67
|
+
segments:
|
68
|
+
- 0
|
69
|
+
hash: 2996483111251873179
|
64
70
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
71
|
+
none: false
|
65
72
|
requirements:
|
66
|
-
- - '>='
|
73
|
+
- - ! '>='
|
67
74
|
- !ruby/object:Gem::Version
|
68
75
|
version: '0'
|
76
|
+
segments:
|
77
|
+
- 0
|
78
|
+
hash: 2996483111251873179
|
69
79
|
requirements: []
|
70
80
|
rubyforge_project:
|
71
|
-
rubygems_version:
|
81
|
+
rubygems_version: 1.8.25
|
72
82
|
signing_key:
|
73
|
-
specification_version:
|
83
|
+
specification_version: 3
|
74
84
|
summary: Adds hive read, write, and run support to mobilize-hdfs
|
75
85
|
test_files:
|
76
86
|
- test/hive_job_rows.yml
|
checksums.yaml
DELETED
@@ -1,7 +0,0 @@
|
|
1
|
-
---
|
2
|
-
SHA1:
|
3
|
-
metadata.gz: c6d52c5de86351ccf5ab3c4da09b7341f27e5163
|
4
|
-
data.tar.gz: 3e560d834babc1377d8e3c6c80b799e9c4655acf
|
5
|
-
SHA512:
|
6
|
-
metadata.gz: d910e35cefda69f9640105949454b04b6d08df9069e2e9fdb61b9875806c921fb187701977afee7efcc3a5b84dadbbd91abf4d6bf9b4817bcf4f3c1945effbfb
|
7
|
-
data.tar.gz: 9a41441ce0ddf76b81fe63350a2ece3d1cc5ab0daccb1bc09f6db394dcb618078c4d01c1893d3a16090656dfdd36ec5fa8332aa5a699d096abcd85b726a1efa2
|