mobilize-hive 1.22 → 1.23
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/mobilize-hive/handlers/hive.rb +8 -26
- data/lib/mobilize-hive/version.rb +1 -1
- data/mobilize-hive.gemspec +1 -1
- metadata +8 -18
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 4c7e46212ccfe926790779712c8c2d5141d8343a
|
4
|
+
data.tar.gz: 099d652b673589e567e98d542cae516b4cfb1c3a
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: c7567c9d84ae911df3f586101314c32079abc512ce1b44b8d435a65de696b821531f1af3d63e5f39b4dca3a6e020d8f666837b58af8920d0b8a1110636635188
|
7
|
+
data.tar.gz: 8fe69c1b8a3f96eab10aa03e4d15a81437c337ef23d16d91668bd508bdf422d7b7e9c7a0d51b067fe5d445ea77aad034387f8fc807d784a9c46188d1ff040ad0
|
@@ -108,7 +108,7 @@ module Mobilize
|
|
108
108
|
def Hive.table_stats(cluster,db,table,user_name)
|
109
109
|
describe_sql = "use #{db};describe extended #{table};"
|
110
110
|
describe_response = Hive.run(cluster, describe_sql,user_name)
|
111
|
-
return
|
111
|
+
return nil if describe_response['stdout'].length==0
|
112
112
|
describe_output = describe_response['stdout']
|
113
113
|
describe_output.split("location:").last.split(",").first
|
114
114
|
#get location, fields, partitions
|
@@ -232,24 +232,9 @@ module Mobilize
|
|
232
232
|
schema_hash
|
233
233
|
end
|
234
234
|
|
235
|
-
def Hive.path_params(cluster, path, user_name)
|
236
|
-
db, table, partitions = path.gsub(".","/").split("/").ie{|sp| [sp.first, sp.second, sp[2..-1]]}
|
237
|
-
#get existing table stats if any
|
238
|
-
curr_stats = begin
|
239
|
-
Hive.table_stats(cluster, db, table, user_name)
|
240
|
-
rescue
|
241
|
-
nil
|
242
|
-
end
|
243
|
-
{"db"=>db,
|
244
|
-
"table"=>table,
|
245
|
-
"partitions"=>partitions,
|
246
|
-
"curr_stats"=>curr_stats}
|
247
|
-
end
|
248
|
-
|
249
235
|
def Hive.hql_to_table(cluster, db, table, part_array, source_hql, user_name, job_name, drop=false, schema_hash=nil)
|
250
236
|
table_path = [db,table].join(".")
|
251
|
-
|
252
|
-
table_stats = target_params['curr_stats']
|
237
|
+
table_stats = Hive.table_stats(cluster, db, table, user_name)
|
253
238
|
|
254
239
|
source_hql_array = source_hql.split(";")
|
255
240
|
last_select_i = source_hql_array.rindex{|hql| hql.downcase.strip.starts_with?("select")}
|
@@ -267,9 +252,7 @@ module Mobilize
|
|
267
252
|
temp_create_hql = "#{temp_set_hql}#{prior_hql}#{temp_drop_hql}create table #{temp_table_path} as #{last_select_hql}"
|
268
253
|
Hive.run(cluster,temp_create_hql,user_name)
|
269
254
|
|
270
|
-
|
271
|
-
source_table_path = ['db','table'].map{|k| source_params[k]}.join(".")
|
272
|
-
source_table_stats = source_params['curr_stats']
|
255
|
+
source_table_stats = Hive.table_stats(cluster,temp_db,temp_table_name,user_name)
|
273
256
|
source_fields = source_table_stats['field_defs']
|
274
257
|
|
275
258
|
if part_array.length == 0 and
|
@@ -297,7 +280,7 @@ module Mobilize
|
|
297
280
|
|
298
281
|
target_create_hql = "create table if not exists #{table_path} #{field_def_stmt};"
|
299
282
|
|
300
|
-
target_insert_hql = "insert overwrite table #{table_path} select #{target_field_stmt} from #{
|
283
|
+
target_insert_hql = "insert overwrite table #{table_path} select #{target_field_stmt} from #{temp_table_path};"
|
301
284
|
|
302
285
|
target_full_hql = [target_name_hql,
|
303
286
|
target_drop_hql,
|
@@ -353,7 +336,7 @@ module Mobilize
|
|
353
336
|
else
|
354
337
|
#get all the permutations of possible partititons
|
355
338
|
part_set_hql = "set hive.cli.print.header=true;set mapred.job.name=#{job_name} (permutations);"
|
356
|
-
part_select_hql = "select distinct #{target_part_stmt} from #{
|
339
|
+
part_select_hql = "select distinct #{target_part_stmt} from #{temp_table_path};"
|
357
340
|
part_perm_hql = part_set_hql + part_select_hql
|
358
341
|
part_perm_tsv = Hive.run(cluster, part_perm_hql, user_name)['stdout']
|
359
342
|
#having gotten the permutations, ensure they are dropped
|
@@ -369,7 +352,7 @@ module Mobilize
|
|
369
352
|
|
370
353
|
target_insert_hql = "insert overwrite table #{table_path} " +
|
371
354
|
"partition (#{target_part_stmt}) " +
|
372
|
-
"select #{target_field_stmt},#{target_part_stmt} from #{
|
355
|
+
"select #{target_field_stmt},#{target_part_stmt} from #{temp_table_path};"
|
373
356
|
|
374
357
|
target_full_hql = [target_set_hql, target_create_hql, target_insert_hql, temp_drop_hql].join
|
375
358
|
|
@@ -389,8 +372,7 @@ module Mobilize
|
|
389
372
|
source_headers = source_tsv.tsv_header_array
|
390
373
|
|
391
374
|
table_path = [db,table].join(".")
|
392
|
-
|
393
|
-
table_stats = target_params['curr_stats']
|
375
|
+
table_stats = Hive.table_stats(cluster, db, table, user_name)
|
394
376
|
|
395
377
|
schema_hash ||= {}
|
396
378
|
|
@@ -566,7 +548,7 @@ module Mobilize
|
|
566
548
|
end
|
567
549
|
{'stdout'=>url,'exit_code'=>0}
|
568
550
|
rescue => exc
|
569
|
-
{'stderr'=>exc.to_s, 'exit_code'=>500}
|
551
|
+
{'stderr'=>"#{exc.to_s}\n#{exc.backtrace.join("\n")}", 'exit_code'=>500}
|
570
552
|
end
|
571
553
|
|
572
554
|
#unslot worker and write result
|
data/mobilize-hive.gemspec
CHANGED
@@ -16,5 +16,5 @@ Gem::Specification.new do |gem|
|
|
16
16
|
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
17
17
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
18
18
|
gem.require_paths = ["lib"]
|
19
|
-
gem.add_runtime_dependency "mobilize-hdfs","1.
|
19
|
+
gem.add_runtime_dependency "mobilize-hdfs","1.22"
|
20
20
|
end
|
metadata
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mobilize-hive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '1.
|
5
|
-
prerelease:
|
4
|
+
version: '1.23'
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Cassio Paes-Leme
|
@@ -14,19 +13,17 @@ dependencies:
|
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: mobilize-hdfs
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
17
|
- - '='
|
20
18
|
- !ruby/object:Gem::Version
|
21
|
-
version: '1.
|
19
|
+
version: '1.22'
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
24
|
- - '='
|
28
25
|
- !ruby/object:Gem::Version
|
29
|
-
version: '1.
|
26
|
+
version: '1.22'
|
30
27
|
description: Adds hive read, write, and run support to mobilize-hdfs
|
31
28
|
email:
|
32
29
|
- cpaesleme@dena.com
|
@@ -54,33 +51,26 @@ files:
|
|
54
51
|
- test/test_helper.rb
|
55
52
|
homepage: http://github.com/dena/mobilize-hive
|
56
53
|
licenses: []
|
54
|
+
metadata: {}
|
57
55
|
post_install_message:
|
58
56
|
rdoc_options: []
|
59
57
|
require_paths:
|
60
58
|
- lib
|
61
59
|
required_ruby_version: !ruby/object:Gem::Requirement
|
62
|
-
none: false
|
63
60
|
requirements:
|
64
|
-
- -
|
61
|
+
- - '>='
|
65
62
|
- !ruby/object:Gem::Version
|
66
63
|
version: '0'
|
67
|
-
segments:
|
68
|
-
- 0
|
69
|
-
hash: -4582611704376704444
|
70
64
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
71
|
-
none: false
|
72
65
|
requirements:
|
73
|
-
- -
|
66
|
+
- - '>='
|
74
67
|
- !ruby/object:Gem::Version
|
75
68
|
version: '0'
|
76
|
-
segments:
|
77
|
-
- 0
|
78
|
-
hash: -4582611704376704444
|
79
69
|
requirements: []
|
80
70
|
rubyforge_project:
|
81
|
-
rubygems_version:
|
71
|
+
rubygems_version: 2.0.3
|
82
72
|
signing_key:
|
83
|
-
specification_version:
|
73
|
+
specification_version: 4
|
84
74
|
summary: Adds hive read, write, and run support to mobilize-hdfs
|
85
75
|
test_files:
|
86
76
|
- test/hive_job_rows.yml
|