mobilize-hive 1.0.07 → 1.0.08

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -102,6 +102,8 @@ in hadoop.yml. Each cluster has:
102
102
  * max_slots - defines the total number of simultaneous slots to be
103
103
  used for hive jobs on this cluster
104
104
  * output_db - defines the db which should be used to hold stage outputs.
105
+ * This db must have open permissions (777) so any user on the system can
106
+ write to it -- the tables inside will be owned by the users themselves.
105
107
  * exec_path - defines the path to the hive executable
106
108
 
107
109
  Sample hive.yml:
@@ -227,3 +229,4 @@ Cassio Paes-Leme :: cpaesleme@dena.com :: @cpaesleme
227
229
 
228
230
  [mobilize-base]: https://github.com/dena/mobilize-base
229
231
  [mobilize-ssh]: https://github.com/dena/mobilize-ssh
232
+ [mobilize-hdfs]: https://github.com/dena/mobilize-hdfs
@@ -84,6 +84,8 @@ module Mobilize
84
84
 
85
85
  #run a generic hive command, with the option of passing a file hash to be locally available
86
86
  def Hive.run(hql,cluster,user,file_hash=nil)
87
+ # no TempStatsStore
88
+ hql = "set hive.stats.autogather=false;#{hql}"
87
89
  filename = hql.to_md5
88
90
  file_hash||= {}
89
91
  file_hash[filename] = hql
@@ -101,7 +103,6 @@ module Mobilize
101
103
  user = params['user']
102
104
  cluster = params['cluster'] || Hive.clusters.keys.first
103
105
  node = Hadoop.gateway_node(cluster)
104
- node_user = Ssh.host(node)['user']
105
106
  if user and !Ssh.sudoers(node).include?(u.name)
106
107
  raise "#{u.name} does not have su permissions for #{node}"
107
108
  elsif user.nil? and Ssh.su_all_users(node)
@@ -140,17 +141,12 @@ module Mobilize
140
141
  "create table #{output_path} as #{select_hql};"].join(";")
141
142
  full_hql = [prior_hql, output_table_hql].compact.join(";")
142
143
  Hive.run(full_hql, cluster, user)
143
- #make sure node user owns the stage result directory
144
- output_table_stats = Hive.table_stats(output_db,output_table,cluster,node_user)
145
- output_table_location = output_table_stats['location']
146
- chown_command = "#{Hadoop.exec_path(cluster)} fs -chown -R #{node_user} '#{output_table_location}'"
147
- Ssh.run(node,chown_command,node_user)
148
144
  #already populated, make sure dataset exists
149
145
  Dataset.find_or_create_by_url(out_url)
150
146
  else
151
147
  out_string = Hive.run(hql, cluster, user)
152
148
  out_string = "result\n#{out_string}"
153
- Dataset.write_by_url(out_url,out_string,node_user)
149
+ Dataset.write_by_url(out_url,out_string,user)
154
150
  end
155
151
  #unslot worker
156
152
  Hive.unslot_worker_by_path(stage_path)
@@ -442,7 +438,6 @@ module Mobilize
442
438
  return false unless slot_id
443
439
 
444
440
  node = Hadoop.gateway_node(cluster)
445
- node_user = Ssh.host(node)['user']
446
441
  if user and !Ssh.sudoers(node).include?(u.name)
447
442
  raise "#{u.name} does not have su permissions for #{node}"
448
443
  elsif user.nil? and Ssh.su_all_users(node)
@@ -498,7 +493,7 @@ module Mobilize
498
493
  out_string = "result\n#{out_string}"
499
494
  output_db,output_table = [Hive.output_db(cluster),stage_path.gridsafe]
500
495
  out_url = "hive://#{cluster}/#{output_db}/#{output_table}"
501
- Dataset.write_by_url(out_url,out_string,node_user)
496
+ Dataset.write_by_url(out_url,out_string,user)
502
497
  out_url
503
498
  end
504
499
 
@@ -1,5 +1,5 @@
1
1
  module Mobilize
2
2
  module Hive
3
- VERSION = "1.0.07"
3
+ VERSION = "1.0.08"
4
4
  end
5
5
  end
@@ -16,5 +16,5 @@ Gem::Specification.new do |gem|
16
16
  gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
17
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
18
  gem.require_paths = ["lib"]
19
- gem.add_runtime_dependency "mobilize-hdfs","1.0.07"
19
+ gem.add_runtime_dependency "mobilize-hdfs","1.0.08"
20
20
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mobilize-hive
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.07
4
+ version: 1.0.08
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -18,7 +18,7 @@ dependencies:
18
18
  requirements:
19
19
  - - '='
20
20
  - !ruby/object:Gem::Version
21
- version: 1.0.07
21
+ version: 1.0.08
22
22
  type: :runtime
23
23
  prerelease: false
24
24
  version_requirements: !ruby/object:Gem::Requirement
@@ -26,7 +26,7 @@ dependencies:
26
26
  requirements:
27
27
  - - '='
28
28
  - !ruby/object:Gem::Version
29
- version: 1.0.07
29
+ version: 1.0.08
30
30
  description: Adds hive read, write, and run support to mobilize-hdfs
31
31
  email:
32
32
  - cpaesleme@ngmoco.com