mobilize-hive 1.292 → 1.294
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/mobilize-hive/handlers/hive.rb +11 -4
- data/lib/mobilize-hive/version.rb +1 -1
- data/mobilize-hive.gemspec +1 -1
- data/test/hive_job_rows.yml +6 -0
- data/test/mobilize-hive_test.rb +13 -1
- metadata +19 -9
- checksums.yaml +0 -7
|
@@ -306,7 +306,7 @@ module Mobilize
|
|
|
306
306
|
raise response['stderr'] if response['stderr'].to_s.ie{|s| s.index("FAILED") or s.index("KILLED")}
|
|
307
307
|
|
|
308
308
|
elsif part_array.length > 0 and
|
|
309
|
-
table_stats.ie{|tts| tts.nil? || drop || tts['partitions'].to_a.map{|p| p['name']} == part_array}
|
|
309
|
+
table_stats.ie{|tts| tts.nil? || drop || tts['partitions'].to_a.map{|p| p['name']}.sort == part_array.sort}
|
|
310
310
|
#partitions and no target table or same partitions in both target table and user params
|
|
311
311
|
|
|
312
312
|
target_headers = source_fields.map{|f| f['name']}.reject{|h| part_array.include?(h)}
|
|
@@ -387,6 +387,7 @@ module Mobilize
|
|
|
387
387
|
#Accepts options to drop existing target if any
|
|
388
388
|
#also schema with column datatype overrides
|
|
389
389
|
def Hive.tsv_to_table(cluster, db, table, part_array, source_tsv, user_name, drop=false, schema_hash=nil)
|
|
390
|
+
return nil if source_tsv.strip.length==0
|
|
390
391
|
source_headers = source_tsv.tsv_header_array
|
|
391
392
|
|
|
392
393
|
table_path = [db,table].join(".")
|
|
@@ -394,6 +395,8 @@ module Mobilize
|
|
|
394
395
|
|
|
395
396
|
schema_hash ||= {}
|
|
396
397
|
|
|
398
|
+
url = "hive://" + [cluster,db,table,part_array.compact.join("/")].join("/")
|
|
399
|
+
|
|
397
400
|
if part_array.length == 0 and
|
|
398
401
|
table_stats.ie{|tts| tts.nil? || drop || tts['partitions'].nil?}
|
|
399
402
|
#no partitions in either user params or the target table
|
|
@@ -424,7 +427,7 @@ module Mobilize
|
|
|
424
427
|
raise response['stderr'] if response['stderr'].to_s.ie{|s| s.index("FAILED") or s.index("KILLED")}
|
|
425
428
|
|
|
426
429
|
elsif part_array.length > 0 and
|
|
427
|
-
table_stats.ie{|tts| tts.nil? || drop || tts['partitions'].to_a.map{|p| p['name']} == part_array}
|
|
430
|
+
table_stats.ie{|tts| tts.nil? || drop || tts['partitions'].to_a.map{|p| p['name']}.sort == part_array.sort}
|
|
428
431
|
#partitions and no target table
|
|
429
432
|
#or same partitions in both target table and user params
|
|
430
433
|
#or drop and start fresh
|
|
@@ -451,11 +454,14 @@ module Mobilize
|
|
|
451
454
|
response = Hive.run(cluster, target_create_hql, user_name)
|
|
452
455
|
raise response['stderr'] if response['stderr'].to_s.ie{|s| s.index("FAILED") or s.index("KILLED")}
|
|
453
456
|
|
|
457
|
+
#return url (operation complete) if there's no data
|
|
458
|
+
source_hash_array = source_tsv.tsv_to_hash_array
|
|
459
|
+
return url if source_hash_array.length==1 and source_hash_array.first.values.compact.length==0
|
|
460
|
+
|
|
454
461
|
table_stats = Hive.table_stats(cluster, db, table, user_name)
|
|
455
462
|
|
|
456
463
|
#create data hash from source hash array
|
|
457
464
|
data_hash = {}
|
|
458
|
-
source_hash_array = source_tsv.tsv_to_hash_array
|
|
459
465
|
source_hash_array.each do |ha|
|
|
460
466
|
tpmk = part_array.map{|pn| "#{pn}=#{ha[pn]}"}.join("/")
|
|
461
467
|
tpmv = ha.reject{|k,v| part_array.include?(k)}.values.join("\001")
|
|
@@ -498,7 +504,6 @@ module Mobilize
|
|
|
498
504
|
raise error_msg
|
|
499
505
|
end
|
|
500
506
|
|
|
501
|
-
url = "hive://" + [cluster,db,table,part_array.compact.join("/")].join("/")
|
|
502
507
|
return url
|
|
503
508
|
end
|
|
504
509
|
|
|
@@ -564,6 +569,8 @@ module Mobilize
|
|
|
564
569
|
Hive.hql_to_table(cluster, db, table, part_array, source_hql, user_name, job_name, drop, schema_hash)
|
|
565
570
|
elsif source_tsv
|
|
566
571
|
Hive.tsv_to_table(cluster, db, table, part_array, source_tsv, user_name, drop, schema_hash)
|
|
572
|
+
elsif source
|
|
573
|
+
#null sheet
|
|
567
574
|
else
|
|
568
575
|
raise "Unable to determine source tsv or source hql"
|
|
569
576
|
end
|
data/mobilize-hive.gemspec
CHANGED
|
@@ -16,5 +16,5 @@ Gem::Specification.new do |gem|
|
|
|
16
16
|
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
|
17
17
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
|
18
18
|
gem.require_paths = ["lib"]
|
|
19
|
-
gem.add_runtime_dependency "mobilize-hdfs","1.
|
|
19
|
+
gem.add_runtime_dependency "mobilize-hdfs","1.294"
|
|
20
20
|
end
|
data/test/hive_job_rows.yml
CHANGED
|
@@ -24,3 +24,9 @@
|
|
|
24
24
|
stage2: hive.write source:"stage1",target:"mobilize/hive_test_3", partitions:"date/product", drop:true
|
|
25
25
|
stage3: hive.write hql:"select * from mobilize.hive_test_3;",target:"mobilize/hive_test_3", partitions:"date/product", drop:false
|
|
26
26
|
stage4: gsheet.write source:"hive://mobilize/hive_test_3", target:"hive_test_3.out"
|
|
27
|
+
- name: hive_test_4
|
|
28
|
+
active: true
|
|
29
|
+
trigger: after hive_test_2
|
|
30
|
+
status: ""
|
|
31
|
+
stage1: hive.write source:"hive_test_4_stage_1.in", target:"mobilize/hive_test_1", partitions:"act_date"
|
|
32
|
+
stage2: hive.write source:"hive_test_4_stage_2.in", target:"mobilize/hive_test_1", partitions:"act_date"
|
data/test/mobilize-hive_test.rb
CHANGED
|
@@ -25,6 +25,18 @@ describe "Mobilize" do
|
|
|
25
25
|
hive_1_in_tsv = YAML.load_file("#{Mobilize::Base.root}/test/hive_test_1_in.yml").hash_array_to_tsv
|
|
26
26
|
hive_1_in_sheet.write(hive_1_in_tsv,Mobilize::Gdrive.owner_name)
|
|
27
27
|
|
|
28
|
+
#create blank sheet
|
|
29
|
+
hive_4_stage_1_in_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_4_stage_1.in",gdrive_slot)
|
|
30
|
+
[hive_4_stage_1_in_sheet].each {|s| s.delete if s}
|
|
31
|
+
hive_4_stage_1_in_sheet = Mobilize::Gsheet.find_or_create_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_4_stage_1.in",gdrive_slot)
|
|
32
|
+
|
|
33
|
+
#create sheet w just headers
|
|
34
|
+
hive_4_stage_2_in_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_4_stage_2.in",gdrive_slot)
|
|
35
|
+
[hive_4_stage_2_in_sheet].each {|s| s.delete if s}
|
|
36
|
+
hive_4_stage_2_in_sheet = Mobilize::Gsheet.find_or_create_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_4_stage_2.in",gdrive_slot)
|
|
37
|
+
hive_4_stage_2_in_sheet_header = hive_1_in_tsv.tsv_header_array.join("\t")
|
|
38
|
+
hive_4_stage_2_in_sheet.write(hive_4_stage_2_in_sheet_header,Mobilize::Gdrive.owner_name)
|
|
39
|
+
|
|
28
40
|
hive_1_schema_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_1.schema",gdrive_slot)
|
|
29
41
|
[hive_1_schema_sheet].each {|s| s.delete if s}
|
|
30
42
|
hive_1_schema_sheet = Mobilize::Gsheet.find_or_create_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_1.schema",gdrive_slot)
|
|
@@ -54,7 +66,7 @@ describe "Mobilize" do
|
|
|
54
66
|
|
|
55
67
|
puts "job row added, force enqueued requestor, wait for stages"
|
|
56
68
|
r.enqueue!
|
|
57
|
-
wait_for_stages(
|
|
69
|
+
wait_for_stages(2100)
|
|
58
70
|
|
|
59
71
|
puts "jobtracker posted data to test sheet"
|
|
60
72
|
hive_1_stage_2_target_sheet = Mobilize::Gsheet.find_by_path("#{r.path.split("/")[0..-2].join("/")}/hive_test_1_stage_2.out",gdrive_slot)
|
metadata
CHANGED
|
@@ -1,29 +1,32 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: mobilize-hive
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: '1.
|
|
4
|
+
version: '1.294'
|
|
5
|
+
prerelease:
|
|
5
6
|
platform: ruby
|
|
6
7
|
authors:
|
|
7
8
|
- Cassio Paes-Leme
|
|
8
9
|
autorequire:
|
|
9
10
|
bindir: bin
|
|
10
11
|
cert_chain: []
|
|
11
|
-
date: 2013-
|
|
12
|
+
date: 2013-04-01 00:00:00.000000000 Z
|
|
12
13
|
dependencies:
|
|
13
14
|
- !ruby/object:Gem::Dependency
|
|
14
15
|
name: mobilize-hdfs
|
|
15
16
|
requirement: !ruby/object:Gem::Requirement
|
|
17
|
+
none: false
|
|
16
18
|
requirements:
|
|
17
19
|
- - '='
|
|
18
20
|
- !ruby/object:Gem::Version
|
|
19
|
-
version: '1.
|
|
21
|
+
version: '1.294'
|
|
20
22
|
type: :runtime
|
|
21
23
|
prerelease: false
|
|
22
24
|
version_requirements: !ruby/object:Gem::Requirement
|
|
25
|
+
none: false
|
|
23
26
|
requirements:
|
|
24
27
|
- - '='
|
|
25
28
|
- !ruby/object:Gem::Version
|
|
26
|
-
version: '1.
|
|
29
|
+
version: '1.294'
|
|
27
30
|
description: Adds hive read, write, and run support to mobilize-hdfs
|
|
28
31
|
email:
|
|
29
32
|
- cpaesleme@dena.com
|
|
@@ -51,26 +54,33 @@ files:
|
|
|
51
54
|
- test/test_helper.rb
|
|
52
55
|
homepage: http://github.com/dena/mobilize-hive
|
|
53
56
|
licenses: []
|
|
54
|
-
metadata: {}
|
|
55
57
|
post_install_message:
|
|
56
58
|
rdoc_options: []
|
|
57
59
|
require_paths:
|
|
58
60
|
- lib
|
|
59
61
|
required_ruby_version: !ruby/object:Gem::Requirement
|
|
62
|
+
none: false
|
|
60
63
|
requirements:
|
|
61
|
-
- - '>='
|
|
64
|
+
- - ! '>='
|
|
62
65
|
- !ruby/object:Gem::Version
|
|
63
66
|
version: '0'
|
|
67
|
+
segments:
|
|
68
|
+
- 0
|
|
69
|
+
hash: 2996483111251873179
|
|
64
70
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
71
|
+
none: false
|
|
65
72
|
requirements:
|
|
66
|
-
- - '>='
|
|
73
|
+
- - ! '>='
|
|
67
74
|
- !ruby/object:Gem::Version
|
|
68
75
|
version: '0'
|
|
76
|
+
segments:
|
|
77
|
+
- 0
|
|
78
|
+
hash: 2996483111251873179
|
|
69
79
|
requirements: []
|
|
70
80
|
rubyforge_project:
|
|
71
|
-
rubygems_version:
|
|
81
|
+
rubygems_version: 1.8.25
|
|
72
82
|
signing_key:
|
|
73
|
-
specification_version:
|
|
83
|
+
specification_version: 3
|
|
74
84
|
summary: Adds hive read, write, and run support to mobilize-hdfs
|
|
75
85
|
test_files:
|
|
76
86
|
- test/hive_job_rows.yml
|
checksums.yaml
DELETED
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
SHA1:
|
|
3
|
-
metadata.gz: c6d52c5de86351ccf5ab3c4da09b7341f27e5163
|
|
4
|
-
data.tar.gz: 3e560d834babc1377d8e3c6c80b799e9c4655acf
|
|
5
|
-
SHA512:
|
|
6
|
-
metadata.gz: d910e35cefda69f9640105949454b04b6d08df9069e2e9fdb61b9875806c921fb187701977afee7efcc3a5b84dadbbd91abf4d6bf9b4817bcf4f3c1945effbfb
|
|
7
|
-
data.tar.gz: 9a41441ce0ddf76b81fe63350a2ece3d1cc5ab0daccb1bc09f6db394dcb618078c4d01c1893d3a16090656dfdd36ec5fa8332aa5a699d096abcd85b726a1efa2
|