mobilize-hive 1.22 → 1.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/mobilize-hive/handlers/hive.rb +8 -26
- data/lib/mobilize-hive/version.rb +1 -1
- data/mobilize-hive.gemspec +1 -1
- metadata +8 -18
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 4c7e46212ccfe926790779712c8c2d5141d8343a
|
4
|
+
data.tar.gz: 099d652b673589e567e98d542cae516b4cfb1c3a
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: c7567c9d84ae911df3f586101314c32079abc512ce1b44b8d435a65de696b821531f1af3d63e5f39b4dca3a6e020d8f666837b58af8920d0b8a1110636635188
|
7
|
+
data.tar.gz: 8fe69c1b8a3f96eab10aa03e4d15a81437c337ef23d16d91668bd508bdf422d7b7e9c7a0d51b067fe5d445ea77aad034387f8fc807d784a9c46188d1ff040ad0
|
@@ -108,7 +108,7 @@ module Mobilize
|
|
108
108
|
def Hive.table_stats(cluster,db,table,user_name)
|
109
109
|
describe_sql = "use #{db};describe extended #{table};"
|
110
110
|
describe_response = Hive.run(cluster, describe_sql,user_name)
|
111
|
-
return
|
111
|
+
return nil if describe_response['stdout'].length==0
|
112
112
|
describe_output = describe_response['stdout']
|
113
113
|
describe_output.split("location:").last.split(",").first
|
114
114
|
#get location, fields, partitions
|
@@ -232,24 +232,9 @@ module Mobilize
|
|
232
232
|
schema_hash
|
233
233
|
end
|
234
234
|
|
235
|
-
def Hive.path_params(cluster, path, user_name)
|
236
|
-
db, table, partitions = path.gsub(".","/").split("/").ie{|sp| [sp.first, sp.second, sp[2..-1]]}
|
237
|
-
#get existing table stats if any
|
238
|
-
curr_stats = begin
|
239
|
-
Hive.table_stats(cluster, db, table, user_name)
|
240
|
-
rescue
|
241
|
-
nil
|
242
|
-
end
|
243
|
-
{"db"=>db,
|
244
|
-
"table"=>table,
|
245
|
-
"partitions"=>partitions,
|
246
|
-
"curr_stats"=>curr_stats}
|
247
|
-
end
|
248
|
-
|
249
235
|
def Hive.hql_to_table(cluster, db, table, part_array, source_hql, user_name, job_name, drop=false, schema_hash=nil)
|
250
236
|
table_path = [db,table].join(".")
|
251
|
-
|
252
|
-
table_stats = target_params['curr_stats']
|
237
|
+
table_stats = Hive.table_stats(cluster, db, table, user_name)
|
253
238
|
|
254
239
|
source_hql_array = source_hql.split(";")
|
255
240
|
last_select_i = source_hql_array.rindex{|hql| hql.downcase.strip.starts_with?("select")}
|
@@ -267,9 +252,7 @@ module Mobilize
|
|
267
252
|
temp_create_hql = "#{temp_set_hql}#{prior_hql}#{temp_drop_hql}create table #{temp_table_path} as #{last_select_hql}"
|
268
253
|
Hive.run(cluster,temp_create_hql,user_name)
|
269
254
|
|
270
|
-
|
271
|
-
source_table_path = ['db','table'].map{|k| source_params[k]}.join(".")
|
272
|
-
source_table_stats = source_params['curr_stats']
|
255
|
+
source_table_stats = Hive.table_stats(cluster,temp_db,temp_table_name,user_name)
|
273
256
|
source_fields = source_table_stats['field_defs']
|
274
257
|
|
275
258
|
if part_array.length == 0 and
|
@@ -297,7 +280,7 @@ module Mobilize
|
|
297
280
|
|
298
281
|
target_create_hql = "create table if not exists #{table_path} #{field_def_stmt};"
|
299
282
|
|
300
|
-
target_insert_hql = "insert overwrite table #{table_path} select #{target_field_stmt} from #{
|
283
|
+
target_insert_hql = "insert overwrite table #{table_path} select #{target_field_stmt} from #{temp_table_path};"
|
301
284
|
|
302
285
|
target_full_hql = [target_name_hql,
|
303
286
|
target_drop_hql,
|
@@ -353,7 +336,7 @@ module Mobilize
|
|
353
336
|
else
|
354
337
|
#get all the permutations of possible partititons
|
355
338
|
part_set_hql = "set hive.cli.print.header=true;set mapred.job.name=#{job_name} (permutations);"
|
356
|
-
part_select_hql = "select distinct #{target_part_stmt} from #{
|
339
|
+
part_select_hql = "select distinct #{target_part_stmt} from #{temp_table_path};"
|
357
340
|
part_perm_hql = part_set_hql + part_select_hql
|
358
341
|
part_perm_tsv = Hive.run(cluster, part_perm_hql, user_name)['stdout']
|
359
342
|
#having gotten the permutations, ensure they are dropped
|
@@ -369,7 +352,7 @@ module Mobilize
|
|
369
352
|
|
370
353
|
target_insert_hql = "insert overwrite table #{table_path} " +
|
371
354
|
"partition (#{target_part_stmt}) " +
|
372
|
-
"select #{target_field_stmt},#{target_part_stmt} from #{
|
355
|
+
"select #{target_field_stmt},#{target_part_stmt} from #{temp_table_path};"
|
373
356
|
|
374
357
|
target_full_hql = [target_set_hql, target_create_hql, target_insert_hql, temp_drop_hql].join
|
375
358
|
|
@@ -389,8 +372,7 @@ module Mobilize
|
|
389
372
|
source_headers = source_tsv.tsv_header_array
|
390
373
|
|
391
374
|
table_path = [db,table].join(".")
|
392
|
-
|
393
|
-
table_stats = target_params['curr_stats']
|
375
|
+
table_stats = Hive.table_stats(cluster, db, table, user_name)
|
394
376
|
|
395
377
|
schema_hash ||= {}
|
396
378
|
|
@@ -566,7 +548,7 @@ module Mobilize
|
|
566
548
|
end
|
567
549
|
{'stdout'=>url,'exit_code'=>0}
|
568
550
|
rescue => exc
|
569
|
-
{'stderr'=>exc.to_s, 'exit_code'=>500}
|
551
|
+
{'stderr'=>"#{exc.to_s}\n#{exc.backtrace.join("\n")}", 'exit_code'=>500}
|
570
552
|
end
|
571
553
|
|
572
554
|
#unslot worker and write result
|
data/mobilize-hive.gemspec
CHANGED
@@ -16,5 +16,5 @@ Gem::Specification.new do |gem|
|
|
16
16
|
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
17
17
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
18
18
|
gem.require_paths = ["lib"]
|
19
|
-
gem.add_runtime_dependency "mobilize-hdfs","1.
|
19
|
+
gem.add_runtime_dependency "mobilize-hdfs","1.22"
|
20
20
|
end
|
metadata
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mobilize-hive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '1.
|
5
|
-
prerelease:
|
4
|
+
version: '1.23'
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Cassio Paes-Leme
|
@@ -14,19 +13,17 @@ dependencies:
|
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: mobilize-hdfs
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
17
|
- - '='
|
20
18
|
- !ruby/object:Gem::Version
|
21
|
-
version: '1.
|
19
|
+
version: '1.22'
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
24
|
- - '='
|
28
25
|
- !ruby/object:Gem::Version
|
29
|
-
version: '1.
|
26
|
+
version: '1.22'
|
30
27
|
description: Adds hive read, write, and run support to mobilize-hdfs
|
31
28
|
email:
|
32
29
|
- cpaesleme@dena.com
|
@@ -54,33 +51,26 @@ files:
|
|
54
51
|
- test/test_helper.rb
|
55
52
|
homepage: http://github.com/dena/mobilize-hive
|
56
53
|
licenses: []
|
54
|
+
metadata: {}
|
57
55
|
post_install_message:
|
58
56
|
rdoc_options: []
|
59
57
|
require_paths:
|
60
58
|
- lib
|
61
59
|
required_ruby_version: !ruby/object:Gem::Requirement
|
62
|
-
none: false
|
63
60
|
requirements:
|
64
|
-
- -
|
61
|
+
- - '>='
|
65
62
|
- !ruby/object:Gem::Version
|
66
63
|
version: '0'
|
67
|
-
segments:
|
68
|
-
- 0
|
69
|
-
hash: -4582611704376704444
|
70
64
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
71
|
-
none: false
|
72
65
|
requirements:
|
73
|
-
- -
|
66
|
+
- - '>='
|
74
67
|
- !ruby/object:Gem::Version
|
75
68
|
version: '0'
|
76
|
-
segments:
|
77
|
-
- 0
|
78
|
-
hash: -4582611704376704444
|
79
69
|
requirements: []
|
80
70
|
rubyforge_project:
|
81
|
-
rubygems_version:
|
71
|
+
rubygems_version: 2.0.3
|
82
72
|
signing_key:
|
83
|
-
specification_version:
|
73
|
+
specification_version: 4
|
84
74
|
summary: Adds hive read, write, and run support to mobilize-hdfs
|
85
75
|
test_files:
|
86
76
|
- test/hive_job_rows.yml
|