patriot-hadoop 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 21e992800beb77008e49114125a7509e42820106
4
- data.tar.gz: fc571743a6fdb51e62e1b02f1ee2337e330b0f30
3
+ metadata.gz: 3a75278502a8dd0d24ffca6600f006728ca4b97e
4
+ data.tar.gz: be5d6721503438d426a33b466bd9ccd7e6385c03
5
5
  SHA512:
6
- metadata.gz: 993f0ad53ac7b18c264fa11d52bd0e0ab2c7fa15b9a9b805d99bfd05985e7c49d80dc5459058ec75bfc2339ca79edf9e55180a081c2785868e800b78391f4d23
7
- data.tar.gz: 537a5e6a57c39727a57bb8f909006fde86d6f3ad420eb78818b430e6a1ac0adc0e4d850ed4b5ee3c3b132bc1f6d468e00bc0fee212e71117cdd720ac80ae76d9
6
+ metadata.gz: 60b66f64532ad327acfa9a5804b1085788f4f07e2ac87b0cb8d2c210553287c8296d117add0ada7e60fbd4859f13a56109a27b3678e16f9667b8c48acaa037f7
7
+ data.tar.gz: d5ba491a269f4d55789c090fdb8eb88cd5c57a9b13e6cd6ff118c3c90b9781ede959db5720519733860d7c96fd2daa943933651c1423a924717a0d94574c3cb1
@@ -4,7 +4,7 @@ module PatriotHadoop
4
4
  declare_command_name :hive
5
5
  include PatriotHadoop::Ext::Hive
6
6
 
7
- command_attr :hive_ql, :output_prefix, :exec_user, :props, :name_suffix
7
+ command_attr :hive_ql, :output_prefix, :compression, :exec_user, :props, :name_suffix
8
8
 
9
9
  def job_id
10
10
  job_id = "#{command_name}"
@@ -28,8 +28,8 @@ module PatriotHadoop
28
28
 
29
29
  tmpfile = output_prefix + '.hql'
30
30
  _create_hivequery_tmpfile(@hive_ql, tmpfile, opt)
31
+ output_file = _create_output_filename(output_prefix, @compression)
31
32
 
32
- output_file = output_prefix + '.tsv'
33
33
  execute_hivequery(tmpfile, output_file, @exec_user)
34
34
 
35
35
  if File.zero?(output_file)
@@ -41,6 +41,19 @@ module PatriotHadoop
41
41
  end
42
42
 
43
43
 
44
+ # true / 'gzip' / 'bzip2' are available.
45
+ def _create_output_filename(output_prefix, compression)
46
+ output_file = output_prefix + '.tsv'
47
+ case compression
48
+ when true, 'gzip'
49
+ output_file += '.gz'
50
+ when 'bzip2'
51
+ output_file += '.bz2'
52
+ end
53
+ return output_file
54
+ end
55
+
56
+
44
57
  def _create_hivequery_tmpfile(hive_ql, tmpfile, opt={})
45
58
  hive_ql = _add_udfs(hive_ql, opt[:udf]) if opt.has_key?(:udf)
46
59
  hive_ql = "#{_set_hive_property_prefix(opt[:props])}#{hive_ql}" if opt.has_key?(:props)
@@ -16,7 +16,7 @@ module PatriotHadoop
16
16
 
17
17
 
18
18
  def execute_hivequery(hql_file, output_file=nil, user=nil)
19
- command = "hive -f \"#{hql_file}\""
19
+ command = "hive -f \"#{hql_file}\"#{_compress_option(File.extname(output_file))}"
20
20
  unless user.nil?
21
21
  if user !~ /^[a-z_][a-z0-9_]{0,30}$/
22
22
  raise HiveException, "Invalid username"
@@ -27,8 +27,17 @@ module PatriotHadoop
27
27
  end
28
28
 
29
29
 
30
+ def _compress_option(extension)
31
+ return case extension
32
+ when '.gz' then ' | gzip --stdout --force'
33
+ when '.bz2' then ' | bzip2 --stdout --force'
34
+ else ''
35
+ end
36
+ end
37
+
38
+
30
39
  def _execute_hivequery_internal(command, output_file)
31
- so = execute_command(command) do |status, so, se|
40
+ res = execute_command(command) do |status, so, se|
32
41
  err_size = File.stat(se).size
33
42
  err_msg = ""
34
43
  max_err_size = HIVE_MAX_ERROR_MSG_SIZE
@@ -41,7 +50,7 @@ module PatriotHadoop
41
50
  end
42
51
  raise HiveException, "#{command}\n#{err_msg}"
43
52
  end
44
- File.rename(so, output_file) unless output_file.nil?
53
+ File.rename(res, output_file) unless output_file.nil?
45
54
  end
46
55
 
47
56
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: patriot-hadoop
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hitoshi Tsuda
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-19 00:00:00.000000000 Z
11
+ date: 2018-05-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: patriot-workflow-scheduler
@@ -38,9 +38,9 @@ files:
38
38
  - lib/patriot_hadoop/ext.rb
39
39
  - lib/patriot_hadoop.rb
40
40
  - init.rb
41
- homepage: https://github.com/CyberAgent/patriot-workflow-scheduler
41
+ homepage: https://github.com/CyberAgent/patriot-workflow-scheduler/tree/master/plugins/patriot-hadoop
42
42
  licenses:
43
- - Apache License, Version 2.0
43
+ - Apache-2.0
44
44
  metadata: {}
45
45
  post_install_message:
46
46
  rdoc_options: []
@@ -58,9 +58,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
58
58
  version: '0'
59
59
  requirements: []
60
60
  rubyforge_project: patriot-hadoop
61
- rubygems_version: 2.0.14
61
+ rubygems_version: 2.0.14.1
62
62
  signing_key:
63
63
  specification_version: 4
64
64
  summary: Hadoop plugin for Patriot Workflow Scheduler
65
65
  test_files: []
66
- has_rdoc: