mobilize-hive 1.0.07 → 1.0.08
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +3 -0
- data/lib/mobilize-hive/handlers/hive.rb +4 -9
- data/lib/mobilize-hive/version.rb +1 -1
- data/mobilize-hive.gemspec +1 -1
- metadata +3 -3
data/README.md
CHANGED
@@ -102,6 +102,8 @@ in hadoop.yml. Each cluster has:
|
|
102
102
|
* max_slots - defines the total number of simultaneous slots to be
|
103
103
|
used for hive jobs on this cluster
|
104
104
|
* output_db - defines the db which should be used to hold stage outputs.
|
105
|
+
* This db must have open permissions (777) so any user on the system can
|
106
|
+
write to it -- the tables inside will be owned by the users themselves.
|
105
107
|
* exec_path - defines the path to the hive executable
|
106
108
|
|
107
109
|
Sample hive.yml:
|
@@ -227,3 +229,4 @@ Cassio Paes-Leme :: cpaesleme@dena.com :: @cpaesleme
|
|
227
229
|
|
228
230
|
[mobilize-base]: https://github.com/dena/mobilize-base
|
229
231
|
[mobilize-ssh]: https://github.com/dena/mobilize-ssh
|
232
|
+
[mobilize-hdfs]: https://github.com/dena/mobilize-hdfs
|
@@ -84,6 +84,8 @@ module Mobilize
|
|
84
84
|
|
85
85
|
#run a generic hive command, with the option of passing a file hash to be locally available
|
86
86
|
def Hive.run(hql,cluster,user,file_hash=nil)
|
87
|
+
# no TempStatsStore
|
88
|
+
hql = "set hive.stats.autogather=false;#{hql}"
|
87
89
|
filename = hql.to_md5
|
88
90
|
file_hash||= {}
|
89
91
|
file_hash[filename] = hql
|
@@ -101,7 +103,6 @@ module Mobilize
|
|
101
103
|
user = params['user']
|
102
104
|
cluster = params['cluster'] || Hive.clusters.keys.first
|
103
105
|
node = Hadoop.gateway_node(cluster)
|
104
|
-
node_user = Ssh.host(node)['user']
|
105
106
|
if user and !Ssh.sudoers(node).include?(u.name)
|
106
107
|
raise "#{u.name} does not have su permissions for #{node}"
|
107
108
|
elsif user.nil? and Ssh.su_all_users(node)
|
@@ -140,17 +141,12 @@ module Mobilize
|
|
140
141
|
"create table #{output_path} as #{select_hql};"].join(";")
|
141
142
|
full_hql = [prior_hql, output_table_hql].compact.join(";")
|
142
143
|
Hive.run(full_hql, cluster, user)
|
143
|
-
#make sure node user owns the stage result directory
|
144
|
-
output_table_stats = Hive.table_stats(output_db,output_table,cluster,node_user)
|
145
|
-
output_table_location = output_table_stats['location']
|
146
|
-
chown_command = "#{Hadoop.exec_path(cluster)} fs -chown -R #{node_user} '#{output_table_location}'"
|
147
|
-
Ssh.run(node,chown_command,node_user)
|
148
144
|
#already populated, make sure dataset exists
|
149
145
|
Dataset.find_or_create_by_url(out_url)
|
150
146
|
else
|
151
147
|
out_string = Hive.run(hql, cluster, user)
|
152
148
|
out_string = "result\n#{out_string}"
|
153
|
-
Dataset.write_by_url(out_url,out_string,
|
149
|
+
Dataset.write_by_url(out_url,out_string,user)
|
154
150
|
end
|
155
151
|
#unslot worker
|
156
152
|
Hive.unslot_worker_by_path(stage_path)
|
@@ -442,7 +438,6 @@ module Mobilize
|
|
442
438
|
return false unless slot_id
|
443
439
|
|
444
440
|
node = Hadoop.gateway_node(cluster)
|
445
|
-
node_user = Ssh.host(node)['user']
|
446
441
|
if user and !Ssh.sudoers(node).include?(u.name)
|
447
442
|
raise "#{u.name} does not have su permissions for #{node}"
|
448
443
|
elsif user.nil? and Ssh.su_all_users(node)
|
@@ -498,7 +493,7 @@ module Mobilize
|
|
498
493
|
out_string = "result\n#{out_string}"
|
499
494
|
output_db,output_table = [Hive.output_db(cluster),stage_path.gridsafe]
|
500
495
|
out_url = "hive://#{cluster}/#{output_db}/#{output_table}"
|
501
|
-
Dataset.write_by_url(out_url,out_string,
|
496
|
+
Dataset.write_by_url(out_url,out_string,user)
|
502
497
|
out_url
|
503
498
|
end
|
504
499
|
|
data/mobilize-hive.gemspec
CHANGED
@@ -16,5 +16,5 @@ Gem::Specification.new do |gem|
|
|
16
16
|
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
17
17
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
18
18
|
gem.require_paths = ["lib"]
|
19
|
-
gem.add_runtime_dependency "mobilize-hdfs","1.0.
|
19
|
+
gem.add_runtime_dependency "mobilize-hdfs","1.0.08"
|
20
20
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mobilize-hive
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.08
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -18,7 +18,7 @@ dependencies:
|
|
18
18
|
requirements:
|
19
19
|
- - '='
|
20
20
|
- !ruby/object:Gem::Version
|
21
|
-
version: 1.0.
|
21
|
+
version: 1.0.08
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
24
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -26,7 +26,7 @@ dependencies:
|
|
26
26
|
requirements:
|
27
27
|
- - '='
|
28
28
|
- !ruby/object:Gem::Version
|
29
|
-
version: 1.0.
|
29
|
+
version: 1.0.08
|
30
30
|
description: Adds hive read, write, and run support to mobilize-hdfs
|
31
31
|
email:
|
32
32
|
- cpaesleme@ngmoco.com
|