mobilize-hive 1.22 → 1.23

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 4c7e46212ccfe926790779712c8c2d5141d8343a
4
+ data.tar.gz: 099d652b673589e567e98d542cae516b4cfb1c3a
5
+ SHA512:
6
+ metadata.gz: c7567c9d84ae911df3f586101314c32079abc512ce1b44b8d435a65de696b821531f1af3d63e5f39b4dca3a6e020d8f666837b58af8920d0b8a1110636635188
7
+ data.tar.gz: 8fe69c1b8a3f96eab10aa03e4d15a81437c337ef23d16d91668bd508bdf422d7b7e9c7a0d51b067fe5d445ea77aad034387f8fc807d784a9c46188d1ff040ad0
@@ -108,7 +108,7 @@ module Mobilize
108
108
  def Hive.table_stats(cluster,db,table,user_name)
109
109
  describe_sql = "use #{db};describe extended #{table};"
110
110
  describe_response = Hive.run(cluster, describe_sql,user_name)
111
- return describe_response if describe_response['stdout'].length==0
111
+ return nil if describe_response['stdout'].length==0
112
112
  describe_output = describe_response['stdout']
113
113
  describe_output.split("location:").last.split(",").first
114
114
  #get location, fields, partitions
@@ -232,24 +232,9 @@ module Mobilize
232
232
  schema_hash
233
233
  end
234
234
 
235
- def Hive.path_params(cluster, path, user_name)
236
- db, table, partitions = path.gsub(".","/").split("/").ie{|sp| [sp.first, sp.second, sp[2..-1]]}
237
- #get existing table stats if any
238
- curr_stats = begin
239
- Hive.table_stats(cluster, db, table, user_name)
240
- rescue
241
- nil
242
- end
243
- {"db"=>db,
244
- "table"=>table,
245
- "partitions"=>partitions,
246
- "curr_stats"=>curr_stats}
247
- end
248
-
249
235
  def Hive.hql_to_table(cluster, db, table, part_array, source_hql, user_name, job_name, drop=false, schema_hash=nil)
250
236
  table_path = [db,table].join(".")
251
- target_params = Hive.path_params(cluster, table_path, user_name)
252
- table_stats = target_params['curr_stats']
237
+ table_stats = Hive.table_stats(cluster, db, table, user_name)
253
238
 
254
239
  source_hql_array = source_hql.split(";")
255
240
  last_select_i = source_hql_array.rindex{|hql| hql.downcase.strip.starts_with?("select")}
@@ -267,9 +252,7 @@ module Mobilize
267
252
  temp_create_hql = "#{temp_set_hql}#{prior_hql}#{temp_drop_hql}create table #{temp_table_path} as #{last_select_hql}"
268
253
  Hive.run(cluster,temp_create_hql,user_name)
269
254
 
270
- source_params = Hive.path_params(cluster, temp_table_path, user_name)
271
- source_table_path = ['db','table'].map{|k| source_params[k]}.join(".")
272
- source_table_stats = source_params['curr_stats']
255
+ source_table_stats = Hive.table_stats(cluster,temp_db,temp_table_name,user_name)
273
256
  source_fields = source_table_stats['field_defs']
274
257
 
275
258
  if part_array.length == 0 and
@@ -297,7 +280,7 @@ module Mobilize
297
280
 
298
281
  target_create_hql = "create table if not exists #{table_path} #{field_def_stmt};"
299
282
 
300
- target_insert_hql = "insert overwrite table #{table_path} select #{target_field_stmt} from #{source_table_path};"
283
+ target_insert_hql = "insert overwrite table #{table_path} select #{target_field_stmt} from #{temp_table_path};"
301
284
 
302
285
  target_full_hql = [target_name_hql,
303
286
  target_drop_hql,
@@ -353,7 +336,7 @@ module Mobilize
353
336
  else
354
337
  #get all the permutations of possible partititons
355
338
  part_set_hql = "set hive.cli.print.header=true;set mapred.job.name=#{job_name} (permutations);"
356
- part_select_hql = "select distinct #{target_part_stmt} from #{source_table_path};"
339
+ part_select_hql = "select distinct #{target_part_stmt} from #{temp_table_path};"
357
340
  part_perm_hql = part_set_hql + part_select_hql
358
341
  part_perm_tsv = Hive.run(cluster, part_perm_hql, user_name)['stdout']
359
342
  #having gotten the permutations, ensure they are dropped
@@ -369,7 +352,7 @@ module Mobilize
369
352
 
370
353
  target_insert_hql = "insert overwrite table #{table_path} " +
371
354
  "partition (#{target_part_stmt}) " +
372
- "select #{target_field_stmt},#{target_part_stmt} from #{source_table_path};"
355
+ "select #{target_field_stmt},#{target_part_stmt} from #{temp_table_path};"
373
356
 
374
357
  target_full_hql = [target_set_hql, target_create_hql, target_insert_hql, temp_drop_hql].join
375
358
 
@@ -389,8 +372,7 @@ module Mobilize
389
372
  source_headers = source_tsv.tsv_header_array
390
373
 
391
374
  table_path = [db,table].join(".")
392
- target_params = Hive.path_params(cluster, table_path, user_name)
393
- table_stats = target_params['curr_stats']
375
+ table_stats = Hive.table_stats(cluster, db, table, user_name)
394
376
 
395
377
  schema_hash ||= {}
396
378
 
@@ -566,7 +548,7 @@ module Mobilize
566
548
  end
567
549
  {'stdout'=>url,'exit_code'=>0}
568
550
  rescue => exc
569
- {'stderr'=>exc.to_s, 'exit_code'=>500}
551
+ {'stderr'=>"#{exc.to_s}\n#{exc.backtrace.join("\n")}", 'exit_code'=>500}
570
552
  end
571
553
 
572
554
  #unslot worker and write result
@@ -1,5 +1,5 @@
1
1
  module Mobilize
2
2
  module Hive
3
- VERSION = "1.22"
3
+ VERSION = "1.23"
4
4
  end
5
5
  end
@@ -16,5 +16,5 @@ Gem::Specification.new do |gem|
16
16
  gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
17
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
18
  gem.require_paths = ["lib"]
19
- gem.add_runtime_dependency "mobilize-hdfs","1.21"
19
+ gem.add_runtime_dependency "mobilize-hdfs","1.22"
20
20
  end
metadata CHANGED
@@ -1,8 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mobilize-hive
3
3
  version: !ruby/object:Gem::Version
4
- version: '1.22'
5
- prerelease:
4
+ version: '1.23'
6
5
  platform: ruby
7
6
  authors:
8
7
  - Cassio Paes-Leme
@@ -14,19 +13,17 @@ dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: mobilize-hdfs
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
17
  - - '='
20
18
  - !ruby/object:Gem::Version
21
- version: '1.21'
19
+ version: '1.22'
22
20
  type: :runtime
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
24
  - - '='
28
25
  - !ruby/object:Gem::Version
29
- version: '1.21'
26
+ version: '1.22'
30
27
  description: Adds hive read, write, and run support to mobilize-hdfs
31
28
  email:
32
29
  - cpaesleme@dena.com
@@ -54,33 +51,26 @@ files:
54
51
  - test/test_helper.rb
55
52
  homepage: http://github.com/dena/mobilize-hive
56
53
  licenses: []
54
+ metadata: {}
57
55
  post_install_message:
58
56
  rdoc_options: []
59
57
  require_paths:
60
58
  - lib
61
59
  required_ruby_version: !ruby/object:Gem::Requirement
62
- none: false
63
60
  requirements:
64
- - - ! '>='
61
+ - - '>='
65
62
  - !ruby/object:Gem::Version
66
63
  version: '0'
67
- segments:
68
- - 0
69
- hash: -4582611704376704444
70
64
  required_rubygems_version: !ruby/object:Gem::Requirement
71
- none: false
72
65
  requirements:
73
- - - ! '>='
66
+ - - '>='
74
67
  - !ruby/object:Gem::Version
75
68
  version: '0'
76
- segments:
77
- - 0
78
- hash: -4582611704376704444
79
69
  requirements: []
80
70
  rubyforge_project:
81
- rubygems_version: 1.8.25
71
+ rubygems_version: 2.0.3
82
72
  signing_key:
83
- specification_version: 3
73
+ specification_version: 4
84
74
  summary: Adds hive read, write, and run support to mobilize-hdfs
85
75
  test_files:
86
76
  - test/hive_job_rows.yml