mobilize-hive 1.0.07 → 1.0.08
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +3 -0
- data/lib/mobilize-hive/handlers/hive.rb +4 -9
- data/lib/mobilize-hive/version.rb +1 -1
- data/mobilize-hive.gemspec +1 -1
- metadata +3 -3
data/README.md
CHANGED
@@ -102,6 +102,8 @@ in hadoop.yml. Each cluster has:
|
|
102
102
|
* max_slots - defines the total number of simultaneous slots to be
|
103
103
|
used for hive jobs on this cluster
|
104
104
|
* output_db - defines the db which should be used to hold stage outputs.
|
105
|
+
* This db must have open permissions (777) so any user on the system can
|
106
|
+
write to it -- the tables inside will be owned by the users themselves.
|
105
107
|
* exec_path - defines the path to the hive executable
|
106
108
|
|
107
109
|
Sample hive.yml:
|
@@ -227,3 +229,4 @@ Cassio Paes-Leme :: cpaesleme@dena.com :: @cpaesleme
|
|
227
229
|
|
228
230
|
[mobilize-base]: https://github.com/dena/mobilize-base
|
229
231
|
[mobilize-ssh]: https://github.com/dena/mobilize-ssh
|
232
|
+
[mobilize-hdfs]: https://github.com/dena/mobilize-hdfs
|
@@ -84,6 +84,8 @@ module Mobilize
|
|
84
84
|
|
85
85
|
#run a generic hive command, with the option of passing a file hash to be locally available
|
86
86
|
def Hive.run(hql,cluster,user,file_hash=nil)
|
87
|
+
# no TempStatsStore
|
88
|
+
hql = "set hive.stats.autogather=false;#{hql}"
|
87
89
|
filename = hql.to_md5
|
88
90
|
file_hash||= {}
|
89
91
|
file_hash[filename] = hql
|
@@ -101,7 +103,6 @@ module Mobilize
|
|
101
103
|
user = params['user']
|
102
104
|
cluster = params['cluster'] || Hive.clusters.keys.first
|
103
105
|
node = Hadoop.gateway_node(cluster)
|
104
|
-
node_user = Ssh.host(node)['user']
|
105
106
|
if user and !Ssh.sudoers(node).include?(u.name)
|
106
107
|
raise "#{u.name} does not have su permissions for #{node}"
|
107
108
|
elsif user.nil? and Ssh.su_all_users(node)
|
@@ -140,17 +141,12 @@ module Mobilize
|
|
140
141
|
"create table #{output_path} as #{select_hql};"].join(";")
|
141
142
|
full_hql = [prior_hql, output_table_hql].compact.join(";")
|
142
143
|
Hive.run(full_hql, cluster, user)
|
143
|
-
#make sure node user owns the stage result directory
|
144
|
-
output_table_stats = Hive.table_stats(output_db,output_table,cluster,node_user)
|
145
|
-
output_table_location = output_table_stats['location']
|
146
|
-
chown_command = "#{Hadoop.exec_path(cluster)} fs -chown -R #{node_user} '#{output_table_location}'"
|
147
|
-
Ssh.run(node,chown_command,node_user)
|
148
144
|
#already populated, make sure dataset exists
|
149
145
|
Dataset.find_or_create_by_url(out_url)
|
150
146
|
else
|
151
147
|
out_string = Hive.run(hql, cluster, user)
|
152
148
|
out_string = "result\n#{out_string}"
|
153
|
-
Dataset.write_by_url(out_url,out_string,
|
149
|
+
Dataset.write_by_url(out_url,out_string,user)
|
154
150
|
end
|
155
151
|
#unslot worker
|
156
152
|
Hive.unslot_worker_by_path(stage_path)
|
@@ -442,7 +438,6 @@ module Mobilize
|
|
442
438
|
return false unless slot_id
|
443
439
|
|
444
440
|
node = Hadoop.gateway_node(cluster)
|
445
|
-
node_user = Ssh.host(node)['user']
|
446
441
|
if user and !Ssh.sudoers(node).include?(u.name)
|
447
442
|
raise "#{u.name} does not have su permissions for #{node}"
|
448
443
|
elsif user.nil? and Ssh.su_all_users(node)
|
@@ -498,7 +493,7 @@ module Mobilize
|
|
498
493
|
out_string = "result\n#{out_string}"
|
499
494
|
output_db,output_table = [Hive.output_db(cluster),stage_path.gridsafe]
|
500
495
|
out_url = "hive://#{cluster}/#{output_db}/#{output_table}"
|
501
|
-
Dataset.write_by_url(out_url,out_string,
|
496
|
+
Dataset.write_by_url(out_url,out_string,user)
|
502
497
|
out_url
|
503
498
|
end
|
504
499
|
|
data/mobilize-hive.gemspec
CHANGED
@@ -16,5 +16,5 @@ Gem::Specification.new do |gem|
|
|
16
16
|
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
17
17
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
18
18
|
gem.require_paths = ["lib"]
|
19
|
-
gem.add_runtime_dependency "mobilize-hdfs","1.0.
|
19
|
+
gem.add_runtime_dependency "mobilize-hdfs","1.0.08"
|
20
20
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mobilize-hive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.08
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -18,7 +18,7 @@ dependencies:
|
|
18
18
|
requirements:
|
19
19
|
- - '='
|
20
20
|
- !ruby/object:Gem::Version
|
21
|
-
version: 1.0.
|
21
|
+
version: 1.0.08
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
24
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -26,7 +26,7 @@ dependencies:
|
|
26
26
|
requirements:
|
27
27
|
- - '='
|
28
28
|
- !ruby/object:Gem::Version
|
29
|
-
version: 1.0.
|
29
|
+
version: 1.0.08
|
30
30
|
description: Adds hive read, write, and run support to mobilize-hdfs
|
31
31
|
email:
|
32
32
|
- cpaesleme@ngmoco.com
|