patriot-hadoop 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/patriot_hadoop/command/hive.rb +15 -2
- data/lib/patriot_hadoop/ext/hive.rb +12 -3
- metadata +5 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3a75278502a8dd0d24ffca6600f006728ca4b97e
|
4
|
+
data.tar.gz: be5d6721503438d426a33b466bd9ccd7e6385c03
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 60b66f64532ad327acfa9a5804b1085788f4f07e2ac87b0cb8d2c210553287c8296d117add0ada7e60fbd4859f13a56109a27b3678e16f9667b8c48acaa037f7
|
7
|
+
data.tar.gz: d5ba491a269f4d55789c090fdb8eb88cd5c57a9b13e6cd6ff118c3c90b9781ede959db5720519733860d7c96fd2daa943933651c1423a924717a0d94574c3cb1
|
@@ -4,7 +4,7 @@ module PatriotHadoop
|
|
4
4
|
declare_command_name :hive
|
5
5
|
include PatriotHadoop::Ext::Hive
|
6
6
|
|
7
|
-
command_attr :hive_ql, :output_prefix, :exec_user, :props, :name_suffix
|
7
|
+
command_attr :hive_ql, :output_prefix, :compression, :exec_user, :props, :name_suffix
|
8
8
|
|
9
9
|
def job_id
|
10
10
|
job_id = "#{command_name}"
|
@@ -28,8 +28,8 @@ module PatriotHadoop
|
|
28
28
|
|
29
29
|
tmpfile = output_prefix + '.hql'
|
30
30
|
_create_hivequery_tmpfile(@hive_ql, tmpfile, opt)
|
31
|
+
output_file = _create_output_filename(output_prefix, @compression)
|
31
32
|
|
32
|
-
output_file = output_prefix + '.tsv'
|
33
33
|
execute_hivequery(tmpfile, output_file, @exec_user)
|
34
34
|
|
35
35
|
if File.zero?(output_file)
|
@@ -41,6 +41,19 @@ module PatriotHadoop
|
|
41
41
|
end
|
42
42
|
|
43
43
|
|
44
|
+
# true / 'gzip' / 'bzip2' are available.
|
45
|
+
def _create_output_filename(output_prefix, compression)
|
46
|
+
output_file = output_prefix + '.tsv'
|
47
|
+
case compression
|
48
|
+
when true, 'gzip'
|
49
|
+
output_file += '.gz'
|
50
|
+
when 'bzip2'
|
51
|
+
output_file += '.bz2'
|
52
|
+
end
|
53
|
+
return output_file
|
54
|
+
end
|
55
|
+
|
56
|
+
|
44
57
|
def _create_hivequery_tmpfile(hive_ql, tmpfile, opt={})
|
45
58
|
hive_ql = _add_udfs(hive_ql, opt[:udf]) if opt.has_key?(:udf)
|
46
59
|
hive_ql = "#{_set_hive_property_prefix(opt[:props])}#{hive_ql}" if opt.has_key?(:props)
|
@@ -16,7 +16,7 @@ module PatriotHadoop
|
|
16
16
|
|
17
17
|
|
18
18
|
def execute_hivequery(hql_file, output_file=nil, user=nil)
|
19
|
-
command = "hive -f \"#{hql_file}\""
|
19
|
+
command = "hive -f \"#{hql_file}\"#{_compress_option(File.extname(output_file))}"
|
20
20
|
unless user.nil?
|
21
21
|
if user !~ /^[a-z_][a-z0-9_]{0,30}$/
|
22
22
|
raise HiveException, "Invalid username"
|
@@ -27,8 +27,17 @@ module PatriotHadoop
|
|
27
27
|
end
|
28
28
|
|
29
29
|
|
30
|
+
def _compress_option(extension)
|
31
|
+
return case extension
|
32
|
+
when '.gz' then ' | gzip --stdout --force'
|
33
|
+
when '.bz2' then ' | bzip2 --stdout --force'
|
34
|
+
else ''
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
|
30
39
|
def _execute_hivequery_internal(command, output_file)
|
31
|
-
|
40
|
+
res = execute_command(command) do |status, so, se|
|
32
41
|
err_size = File.stat(se).size
|
33
42
|
err_msg = ""
|
34
43
|
max_err_size = HIVE_MAX_ERROR_MSG_SIZE
|
@@ -41,7 +50,7 @@ module PatriotHadoop
|
|
41
50
|
end
|
42
51
|
raise HiveException, "#{command}\n#{err_msg}"
|
43
52
|
end
|
44
|
-
File.rename(
|
53
|
+
File.rename(res, output_file) unless output_file.nil?
|
45
54
|
end
|
46
55
|
|
47
56
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: patriot-hadoop
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hitoshi Tsuda
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-05-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: patriot-workflow-scheduler
|
@@ -38,9 +38,9 @@ files:
|
|
38
38
|
- lib/patriot_hadoop/ext.rb
|
39
39
|
- lib/patriot_hadoop.rb
|
40
40
|
- init.rb
|
41
|
-
homepage: https://github.com/CyberAgent/patriot-workflow-scheduler
|
41
|
+
homepage: https://github.com/CyberAgent/patriot-workflow-scheduler/tree/master/plugins/patriot-hadoop
|
42
42
|
licenses:
|
43
|
-
- Apache
|
43
|
+
- Apache-2.0
|
44
44
|
metadata: {}
|
45
45
|
post_install_message:
|
46
46
|
rdoc_options: []
|
@@ -58,9 +58,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
58
58
|
version: '0'
|
59
59
|
requirements: []
|
60
60
|
rubyforge_project: patriot-hadoop
|
61
|
-
rubygems_version: 2.0.14
|
61
|
+
rubygems_version: 2.0.14.1
|
62
62
|
signing_key:
|
63
63
|
specification_version: 4
|
64
64
|
summary: Hadoop plugin for Patriot Workflow Scheduler
|
65
65
|
test_files: []
|
66
|
-
has_rdoc:
|