patriot-hadoop 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 21e992800beb77008e49114125a7509e42820106
4
- data.tar.gz: fc571743a6fdb51e62e1b02f1ee2337e330b0f30
3
+ metadata.gz: 3a75278502a8dd0d24ffca6600f006728ca4b97e
4
+ data.tar.gz: be5d6721503438d426a33b466bd9ccd7e6385c03
5
5
  SHA512:
6
- metadata.gz: 993f0ad53ac7b18c264fa11d52bd0e0ab2c7fa15b9a9b805d99bfd05985e7c49d80dc5459058ec75bfc2339ca79edf9e55180a081c2785868e800b78391f4d23
7
- data.tar.gz: 537a5e6a57c39727a57bb8f909006fde86d6f3ad420eb78818b430e6a1ac0adc0e4d850ed4b5ee3c3b132bc1f6d468e00bc0fee212e71117cdd720ac80ae76d9
6
+ metadata.gz: 60b66f64532ad327acfa9a5804b1085788f4f07e2ac87b0cb8d2c210553287c8296d117add0ada7e60fbd4859f13a56109a27b3678e16f9667b8c48acaa037f7
7
+ data.tar.gz: d5ba491a269f4d55789c090fdb8eb88cd5c57a9b13e6cd6ff118c3c90b9781ede959db5720519733860d7c96fd2daa943933651c1423a924717a0d94574c3cb1
@@ -4,7 +4,7 @@ module PatriotHadoop
4
4
  declare_command_name :hive
5
5
  include PatriotHadoop::Ext::Hive
6
6
 
7
- command_attr :hive_ql, :output_prefix, :exec_user, :props, :name_suffix
7
+ command_attr :hive_ql, :output_prefix, :compression, :exec_user, :props, :name_suffix
8
8
 
9
9
  def job_id
10
10
  job_id = "#{command_name}"
@@ -28,8 +28,8 @@ module PatriotHadoop
28
28
 
29
29
  tmpfile = output_prefix + '.hql'
30
30
  _create_hivequery_tmpfile(@hive_ql, tmpfile, opt)
31
+ output_file = _create_output_filename(output_prefix, @compression)
31
32
 
32
- output_file = output_prefix + '.tsv'
33
33
  execute_hivequery(tmpfile, output_file, @exec_user)
34
34
 
35
35
  if File.zero?(output_file)
@@ -41,6 +41,19 @@ module PatriotHadoop
41
41
  end
42
42
 
43
43
 
44
+ # true / 'gzip' / 'bzip2' are available.
45
+ def _create_output_filename(output_prefix, compression)
46
+ output_file = output_prefix + '.tsv'
47
+ case compression
48
+ when true, 'gzip'
49
+ output_file += '.gz'
50
+ when 'bzip2'
51
+ output_file += '.bz2'
52
+ end
53
+ return output_file
54
+ end
55
+
56
+
44
57
  def _create_hivequery_tmpfile(hive_ql, tmpfile, opt={})
45
58
  hive_ql = _add_udfs(hive_ql, opt[:udf]) if opt.has_key?(:udf)
46
59
  hive_ql = "#{_set_hive_property_prefix(opt[:props])}#{hive_ql}" if opt.has_key?(:props)
@@ -16,7 +16,7 @@ module PatriotHadoop
16
16
 
17
17
 
18
18
  def execute_hivequery(hql_file, output_file=nil, user=nil)
19
- command = "hive -f \"#{hql_file}\""
19
+ command = "hive -f \"#{hql_file}\"#{_compress_option(File.extname(output_file))}"
20
20
  unless user.nil?
21
21
  if user !~ /^[a-z_][a-z0-9_]{0,30}$/
22
22
  raise HiveException, "Invalid username"
@@ -27,8 +27,17 @@ module PatriotHadoop
27
27
  end
28
28
 
29
29
 
30
+ def _compress_option(extension)
31
+ return case extension
32
+ when '.gz' then ' | gzip --stdout --force'
33
+ when '.bz2' then ' | bzip2 --stdout --force'
34
+ else ''
35
+ end
36
+ end
37
+
38
+
30
39
  def _execute_hivequery_internal(command, output_file)
31
- so = execute_command(command) do |status, so, se|
40
+ res = execute_command(command) do |status, so, se|
32
41
  err_size = File.stat(se).size
33
42
  err_msg = ""
34
43
  max_err_size = HIVE_MAX_ERROR_MSG_SIZE
@@ -41,7 +50,7 @@ module PatriotHadoop
41
50
  end
42
51
  raise HiveException, "#{command}\n#{err_msg}"
43
52
  end
44
- File.rename(so, output_file) unless output_file.nil?
53
+ File.rename(res, output_file) unless output_file.nil?
45
54
  end
46
55
 
47
56
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: patriot-hadoop
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hitoshi Tsuda
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-19 00:00:00.000000000 Z
11
+ date: 2018-05-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: patriot-workflow-scheduler
@@ -38,9 +38,9 @@ files:
38
38
  - lib/patriot_hadoop/ext.rb
39
39
  - lib/patriot_hadoop.rb
40
40
  - init.rb
41
- homepage: https://github.com/CyberAgent/patriot-workflow-scheduler
41
+ homepage: https://github.com/CyberAgent/patriot-workflow-scheduler/tree/master/plugins/patriot-hadoop
42
42
  licenses:
43
- - Apache License, Version 2.0
43
+ - Apache-2.0
44
44
  metadata: {}
45
45
  post_install_message:
46
46
  rdoc_options: []
@@ -58,9 +58,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
58
58
  version: '0'
59
59
  requirements: []
60
60
  rubyforge_project: patriot-hadoop
61
- rubygems_version: 2.0.14
61
+ rubygems_version: 2.0.14.1
62
62
  signing_key:
63
63
  specification_version: 4
64
64
  summary: Hadoop plugin for Patriot Workflow Scheduler
65
65
  test_files: []
66
- has_rdoc: