mobilize-hive 1.22 → 1.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 4c7e46212ccfe926790779712c8c2d5141d8343a
4
+ data.tar.gz: 099d652b673589e567e98d542cae516b4cfb1c3a
5
+ SHA512:
6
+ metadata.gz: c7567c9d84ae911df3f586101314c32079abc512ce1b44b8d435a65de696b821531f1af3d63e5f39b4dca3a6e020d8f666837b58af8920d0b8a1110636635188
7
+ data.tar.gz: 8fe69c1b8a3f96eab10aa03e4d15a81437c337ef23d16d91668bd508bdf422d7b7e9c7a0d51b067fe5d445ea77aad034387f8fc807d784a9c46188d1ff040ad0
@@ -108,7 +108,7 @@ module Mobilize
108
108
  def Hive.table_stats(cluster,db,table,user_name)
109
109
  describe_sql = "use #{db};describe extended #{table};"
110
110
  describe_response = Hive.run(cluster, describe_sql,user_name)
111
- return describe_response if describe_response['stdout'].length==0
111
+ return nil if describe_response['stdout'].length==0
112
112
  describe_output = describe_response['stdout']
113
113
  describe_output.split("location:").last.split(",").first
114
114
  #get location, fields, partitions
@@ -232,24 +232,9 @@ module Mobilize
232
232
  schema_hash
233
233
  end
234
234
 
235
- def Hive.path_params(cluster, path, user_name)
236
- db, table, partitions = path.gsub(".","/").split("/").ie{|sp| [sp.first, sp.second, sp[2..-1]]}
237
- #get existing table stats if any
238
- curr_stats = begin
239
- Hive.table_stats(cluster, db, table, user_name)
240
- rescue
241
- nil
242
- end
243
- {"db"=>db,
244
- "table"=>table,
245
- "partitions"=>partitions,
246
- "curr_stats"=>curr_stats}
247
- end
248
-
249
235
  def Hive.hql_to_table(cluster, db, table, part_array, source_hql, user_name, job_name, drop=false, schema_hash=nil)
250
236
  table_path = [db,table].join(".")
251
- target_params = Hive.path_params(cluster, table_path, user_name)
252
- table_stats = target_params['curr_stats']
237
+ table_stats = Hive.table_stats(cluster, db, table, user_name)
253
238
 
254
239
  source_hql_array = source_hql.split(";")
255
240
  last_select_i = source_hql_array.rindex{|hql| hql.downcase.strip.starts_with?("select")}
@@ -267,9 +252,7 @@ module Mobilize
267
252
  temp_create_hql = "#{temp_set_hql}#{prior_hql}#{temp_drop_hql}create table #{temp_table_path} as #{last_select_hql}"
268
253
  Hive.run(cluster,temp_create_hql,user_name)
269
254
 
270
- source_params = Hive.path_params(cluster, temp_table_path, user_name)
271
- source_table_path = ['db','table'].map{|k| source_params[k]}.join(".")
272
- source_table_stats = source_params['curr_stats']
255
+ source_table_stats = Hive.table_stats(cluster,temp_db,temp_table_name,user_name)
273
256
  source_fields = source_table_stats['field_defs']
274
257
 
275
258
  if part_array.length == 0 and
@@ -297,7 +280,7 @@ module Mobilize
297
280
 
298
281
  target_create_hql = "create table if not exists #{table_path} #{field_def_stmt};"
299
282
 
300
- target_insert_hql = "insert overwrite table #{table_path} select #{target_field_stmt} from #{source_table_path};"
283
+ target_insert_hql = "insert overwrite table #{table_path} select #{target_field_stmt} from #{temp_table_path};"
301
284
 
302
285
  target_full_hql = [target_name_hql,
303
286
  target_drop_hql,
@@ -353,7 +336,7 @@ module Mobilize
353
336
  else
354
337
  #get all the permutations of possible partititons
355
338
  part_set_hql = "set hive.cli.print.header=true;set mapred.job.name=#{job_name} (permutations);"
356
- part_select_hql = "select distinct #{target_part_stmt} from #{source_table_path};"
339
+ part_select_hql = "select distinct #{target_part_stmt} from #{temp_table_path};"
357
340
  part_perm_hql = part_set_hql + part_select_hql
358
341
  part_perm_tsv = Hive.run(cluster, part_perm_hql, user_name)['stdout']
359
342
  #having gotten the permutations, ensure they are dropped
@@ -369,7 +352,7 @@ module Mobilize
369
352
 
370
353
  target_insert_hql = "insert overwrite table #{table_path} " +
371
354
  "partition (#{target_part_stmt}) " +
372
- "select #{target_field_stmt},#{target_part_stmt} from #{source_table_path};"
355
+ "select #{target_field_stmt},#{target_part_stmt} from #{temp_table_path};"
373
356
 
374
357
  target_full_hql = [target_set_hql, target_create_hql, target_insert_hql, temp_drop_hql].join
375
358
 
@@ -389,8 +372,7 @@ module Mobilize
389
372
  source_headers = source_tsv.tsv_header_array
390
373
 
391
374
  table_path = [db,table].join(".")
392
- target_params = Hive.path_params(cluster, table_path, user_name)
393
- table_stats = target_params['curr_stats']
375
+ table_stats = Hive.table_stats(cluster, db, table, user_name)
394
376
 
395
377
  schema_hash ||= {}
396
378
 
@@ -566,7 +548,7 @@ module Mobilize
566
548
  end
567
549
  {'stdout'=>url,'exit_code'=>0}
568
550
  rescue => exc
569
- {'stderr'=>exc.to_s, 'exit_code'=>500}
551
+ {'stderr'=>"#{exc.to_s}\n#{exc.backtrace.join("\n")}", 'exit_code'=>500}
570
552
  end
571
553
 
572
554
  #unslot worker and write result
@@ -1,5 +1,5 @@
1
1
  module Mobilize
2
2
  module Hive
3
- VERSION = "1.22"
3
+ VERSION = "1.23"
4
4
  end
5
5
  end
@@ -16,5 +16,5 @@ Gem::Specification.new do |gem|
16
16
  gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
17
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
18
  gem.require_paths = ["lib"]
19
- gem.add_runtime_dependency "mobilize-hdfs","1.21"
19
+ gem.add_runtime_dependency "mobilize-hdfs","1.22"
20
20
  end
metadata CHANGED
@@ -1,8 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mobilize-hive
3
3
  version: !ruby/object:Gem::Version
4
- version: '1.22'
5
- prerelease:
4
+ version: '1.23'
6
5
  platform: ruby
7
6
  authors:
8
7
  - Cassio Paes-Leme
@@ -14,19 +13,17 @@ dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: mobilize-hdfs
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
17
  - - '='
20
18
  - !ruby/object:Gem::Version
21
- version: '1.21'
19
+ version: '1.22'
22
20
  type: :runtime
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
24
  - - '='
28
25
  - !ruby/object:Gem::Version
29
- version: '1.21'
26
+ version: '1.22'
30
27
  description: Adds hive read, write, and run support to mobilize-hdfs
31
28
  email:
32
29
  - cpaesleme@dena.com
@@ -54,33 +51,26 @@ files:
54
51
  - test/test_helper.rb
55
52
  homepage: http://github.com/dena/mobilize-hive
56
53
  licenses: []
54
+ metadata: {}
57
55
  post_install_message:
58
56
  rdoc_options: []
59
57
  require_paths:
60
58
  - lib
61
59
  required_ruby_version: !ruby/object:Gem::Requirement
62
- none: false
63
60
  requirements:
64
- - - ! '>='
61
+ - - '>='
65
62
  - !ruby/object:Gem::Version
66
63
  version: '0'
67
- segments:
68
- - 0
69
- hash: -4582611704376704444
70
64
  required_rubygems_version: !ruby/object:Gem::Requirement
71
- none: false
72
65
  requirements:
73
- - - ! '>='
66
+ - - '>='
74
67
  - !ruby/object:Gem::Version
75
68
  version: '0'
76
- segments:
77
- - 0
78
- hash: -4582611704376704444
79
69
  requirements: []
80
70
  rubyforge_project:
81
- rubygems_version: 1.8.25
71
+ rubygems_version: 2.0.3
82
72
  signing_key:
83
- specification_version: 3
73
+ specification_version: 4
84
74
  summary: Adds hive read, write, and run support to mobilize-hdfs
85
75
  test_files:
86
76
  - test/hive_job_rows.yml