mobilize-hive 1.0.07 → 1.0.08

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -102,6 +102,8 @@ in hadoop.yml. Each cluster has:
102
102
  * max_slots - defines the total number of simultaneous slots to be
103
103
  used for hive jobs on this cluster
104
104
  * output_db - defines the db which should be used to hold stage outputs.
105
+ * This db must have open permissions (777) so any user on the system can
106
+ write to it -- the tables inside will be owned by the users themselves.
105
107
  * exec_path - defines the path to the hive executable
106
108
 
107
109
  Sample hive.yml:
@@ -227,3 +229,4 @@ Cassio Paes-Leme :: cpaesleme@dena.com :: @cpaesleme
227
229
 
228
230
  [mobilize-base]: https://github.com/dena/mobilize-base
229
231
  [mobilize-ssh]: https://github.com/dena/mobilize-ssh
232
+ [mobilize-hdfs]: https://github.com/dena/mobilize-hdfs
@@ -84,6 +84,8 @@ module Mobilize
84
84
 
85
85
  #run a generic hive command, with the option of passing a file hash to be locally available
86
86
  def Hive.run(hql,cluster,user,file_hash=nil)
87
+ # no TempStatsStore
88
+ hql = "set hive.stats.autogather=false;#{hql}"
87
89
  filename = hql.to_md5
88
90
  file_hash||= {}
89
91
  file_hash[filename] = hql
@@ -101,7 +103,6 @@ module Mobilize
101
103
  user = params['user']
102
104
  cluster = params['cluster'] || Hive.clusters.keys.first
103
105
  node = Hadoop.gateway_node(cluster)
104
- node_user = Ssh.host(node)['user']
105
106
  if user and !Ssh.sudoers(node).include?(u.name)
106
107
  raise "#{u.name} does not have su permissions for #{node}"
107
108
  elsif user.nil? and Ssh.su_all_users(node)
@@ -140,17 +141,12 @@ module Mobilize
140
141
  "create table #{output_path} as #{select_hql};"].join(";")
141
142
  full_hql = [prior_hql, output_table_hql].compact.join(";")
142
143
  Hive.run(full_hql, cluster, user)
143
- #make sure node user owns the stage result directory
144
- output_table_stats = Hive.table_stats(output_db,output_table,cluster,node_user)
145
- output_table_location = output_table_stats['location']
146
- chown_command = "#{Hadoop.exec_path(cluster)} fs -chown -R #{node_user} '#{output_table_location}'"
147
- Ssh.run(node,chown_command,node_user)
148
144
  #already populated, make sure dataset exists
149
145
  Dataset.find_or_create_by_url(out_url)
150
146
  else
151
147
  out_string = Hive.run(hql, cluster, user)
152
148
  out_string = "result\n#{out_string}"
153
- Dataset.write_by_url(out_url,out_string,node_user)
149
+ Dataset.write_by_url(out_url,out_string,user)
154
150
  end
155
151
  #unslot worker
156
152
  Hive.unslot_worker_by_path(stage_path)
@@ -442,7 +438,6 @@ module Mobilize
442
438
  return false unless slot_id
443
439
 
444
440
  node = Hadoop.gateway_node(cluster)
445
- node_user = Ssh.host(node)['user']
446
441
  if user and !Ssh.sudoers(node).include?(u.name)
447
442
  raise "#{u.name} does not have su permissions for #{node}"
448
443
  elsif user.nil? and Ssh.su_all_users(node)
@@ -498,7 +493,7 @@ module Mobilize
498
493
  out_string = "result\n#{out_string}"
499
494
  output_db,output_table = [Hive.output_db(cluster),stage_path.gridsafe]
500
495
  out_url = "hive://#{cluster}/#{output_db}/#{output_table}"
501
- Dataset.write_by_url(out_url,out_string,node_user)
496
+ Dataset.write_by_url(out_url,out_string,user)
502
497
  out_url
503
498
  end
504
499
 
@@ -1,5 +1,5 @@
1
1
  module Mobilize
2
2
  module Hive
3
- VERSION = "1.0.07"
3
+ VERSION = "1.0.08"
4
4
  end
5
5
  end
@@ -16,5 +16,5 @@ Gem::Specification.new do |gem|
16
16
  gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
17
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
18
  gem.require_paths = ["lib"]
19
- gem.add_runtime_dependency "mobilize-hdfs","1.0.07"
19
+ gem.add_runtime_dependency "mobilize-hdfs","1.0.08"
20
20
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mobilize-hive
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.07
4
+ version: 1.0.08
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -18,7 +18,7 @@ dependencies:
18
18
  requirements:
19
19
  - - '='
20
20
  - !ruby/object:Gem::Version
21
- version: 1.0.07
21
+ version: 1.0.08
22
22
  type: :runtime
23
23
  prerelease: false
24
24
  version_requirements: !ruby/object:Gem::Requirement
@@ -26,7 +26,7 @@ dependencies:
26
26
  requirements:
27
27
  - - '='
28
28
  - !ruby/object:Gem::Version
29
- version: 1.0.07
29
+ version: 1.0.08
30
30
  description: Adds hive read, write, and run support to mobilize-hdfs
31
31
  email:
32
32
  - cpaesleme@ngmoco.com