patriot-hadoop 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/init.rb +3 -0
- data/lib/patriot_hadoop.rb +4 -0
- data/lib/patriot_hadoop/command.rb +1 -0
- data/lib/patriot_hadoop/command/hive.rb +74 -0
- data/lib/patriot_hadoop/ext.rb +1 -0
- data/lib/patriot_hadoop/ext/hive.rb +49 -0
- metadata +66 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 21e992800beb77008e49114125a7509e42820106
|
4
|
+
data.tar.gz: fc571743a6fdb51e62e1b02f1ee2337e330b0f30
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 993f0ad53ac7b18c264fa11d52bd0e0ab2c7fa15b9a9b805d99bfd05985e7c49d80dc5459058ec75bfc2339ca79edf9e55180a081c2785868e800b78391f4d23
|
7
|
+
data.tar.gz: 537a5e6a57c39727a57bb8f909006fde86d6f3ad420eb78818b430e6a1ac0adc0e4d850ed4b5ee3c3b132bc1f6d468e00bc0fee212e71117cdd720ac80ae76d9
|
data/init.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "patriot_hadoop/command/hive"
|
@@ -0,0 +1,74 @@
|
|
1
|
+
module PatriotHadoop
|
2
|
+
module Command
|
3
|
+
class HiveCommand < Patriot::Command::Base
|
4
|
+
declare_command_name :hive
|
5
|
+
include PatriotHadoop::Ext::Hive
|
6
|
+
|
7
|
+
command_attr :hive_ql, :output_prefix, :exec_user, :props, :name_suffix
|
8
|
+
|
9
|
+
def job_id
|
10
|
+
job_id = "#{command_name}"
|
11
|
+
job_id = "#{job_id}_#{@name_suffix}" unless @name_suffix.nil?
|
12
|
+
return job_id
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
def execute
|
17
|
+
@logger.info "start hive"
|
18
|
+
|
19
|
+
opt = {}
|
20
|
+
opt[:udf] = @udf unless @udf.nil?
|
21
|
+
opt[:props] = @props unless @props.nil?
|
22
|
+
|
23
|
+
output_prefix = @output_prefix.nil? ? File.join('/tmp', job_id()) : @output_prefix
|
24
|
+
output_directory = File.dirname(output_prefix)
|
25
|
+
if not Dir.exist?(output_directory)
|
26
|
+
FileUtils.mkdir_p(output_directory)
|
27
|
+
end
|
28
|
+
|
29
|
+
tmpfile = output_prefix + '.hql'
|
30
|
+
_create_hivequery_tmpfile(@hive_ql, tmpfile, opt)
|
31
|
+
|
32
|
+
output_file = output_prefix + '.tsv'
|
33
|
+
execute_hivequery(tmpfile, output_file, @exec_user)
|
34
|
+
|
35
|
+
if File.zero?(output_file)
|
36
|
+
@logger.warn "#{@hive_ql} generated empty result"
|
37
|
+
return
|
38
|
+
end
|
39
|
+
|
40
|
+
@logger.info "end hive"
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
def _create_hivequery_tmpfile(hive_ql, tmpfile, opt={})
|
45
|
+
hive_ql = _add_udfs(hive_ql, opt[:udf]) if opt.has_key?(:udf)
|
46
|
+
hive_ql = "#{_set_hive_property_prefix(opt[:props])}#{hive_ql}" if opt.has_key?(:props)
|
47
|
+
File.write(tmpfile, hive_ql)
|
48
|
+
end
|
49
|
+
|
50
|
+
|
51
|
+
def _set_hive_property_prefix(props={})
|
52
|
+
return "" if props.nil?
|
53
|
+
return props.map{|k,v| "set #{k}=#{v};"}.join
|
54
|
+
end
|
55
|
+
|
56
|
+
|
57
|
+
def _add_udfs(hive_ql, udfs)
|
58
|
+
return hive_ql if udfs.nil?
|
59
|
+
register = ""
|
60
|
+
udfs = [udfs] unless udfs.is_a?(Array)
|
61
|
+
udfs.each do |udf|
|
62
|
+
register += "add jar #{udf['jar']};"
|
63
|
+
functions = udf['functions']
|
64
|
+
functions = [functions] unless functions.is_a?(Array)
|
65
|
+
functions.each do |f|
|
66
|
+
register += "create temporary function #{f['name']} as \"#{f['class']}\";"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
return "#{register}#{hive_ql}"
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
require "patriot_hadoop/ext/hive"
|
@@ -0,0 +1,49 @@
|
|
1
|
+
module PatriotHadoop
|
2
|
+
module Ext
|
3
|
+
module Hive
|
4
|
+
|
5
|
+
HIVE_MAX_ERROR_MSG_SIZE = 512
|
6
|
+
|
7
|
+
include Patriot::Util::Logger
|
8
|
+
include Patriot::Util::DBClient
|
9
|
+
include Patriot::Util::System
|
10
|
+
|
11
|
+
def self.included(cls)
|
12
|
+
cls.send(:include, Patriot::Util::System)
|
13
|
+
end
|
14
|
+
|
15
|
+
class HiveException < Exception; end
|
16
|
+
|
17
|
+
|
18
|
+
def execute_hivequery(hql_file, output_file=nil, user=nil)
|
19
|
+
command = "hive -f \"#{hql_file}\""
|
20
|
+
unless user.nil?
|
21
|
+
if user !~ /^[a-z_][a-z0-9_]{0,30}$/
|
22
|
+
raise HiveException, "Invalid username"
|
23
|
+
end
|
24
|
+
command = "sudo -u #{user} #{command}"
|
25
|
+
end
|
26
|
+
return _execute_hivequery_internal(command, output_file)
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
def _execute_hivequery_internal(command, output_file)
|
31
|
+
so = execute_command(command) do |status, so, se|
|
32
|
+
err_size = File.stat(se).size
|
33
|
+
err_msg = ""
|
34
|
+
max_err_size = HIVE_MAX_ERROR_MSG_SIZE
|
35
|
+
File.open(se) do |f|
|
36
|
+
if err_size > max_err_size
|
37
|
+
f.seek(-1 * max_err_size, IO::SEEK_END)
|
38
|
+
err_msg = "\n(#{err_size - max_err_size} bytes are truncated)"
|
39
|
+
end
|
40
|
+
err_msg = "#{f.read}#{err_msg}"
|
41
|
+
end
|
42
|
+
raise HiveException, "#{command}\n#{err_msg}"
|
43
|
+
end
|
44
|
+
File.rename(so, output_file) unless output_file.nil?
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
metadata
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: patriot-hadoop
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Hitoshi Tsuda
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-11-19 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: patriot-workflow-scheduler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0.7'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0.7'
|
27
|
+
description: a plugin for Patriot Workflow Scheduler, which deal with Hadoop-related
|
28
|
+
softwares.
|
29
|
+
email:
|
30
|
+
- tsuda_hitoshi@cyberagent.co.jp
|
31
|
+
executables: []
|
32
|
+
extensions: []
|
33
|
+
extra_rdoc_files: []
|
34
|
+
files:
|
35
|
+
- lib/patriot_hadoop/command/hive.rb
|
36
|
+
- lib/patriot_hadoop/command.rb
|
37
|
+
- lib/patriot_hadoop/ext/hive.rb
|
38
|
+
- lib/patriot_hadoop/ext.rb
|
39
|
+
- lib/patriot_hadoop.rb
|
40
|
+
- init.rb
|
41
|
+
homepage: https://github.com/CyberAgent/patriot-workflow-scheduler
|
42
|
+
licenses:
|
43
|
+
- Apache License, Version 2.0
|
44
|
+
metadata: {}
|
45
|
+
post_install_message:
|
46
|
+
rdoc_options: []
|
47
|
+
require_paths:
|
48
|
+
- lib
|
49
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
55
|
+
requirements:
|
56
|
+
- - '>='
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
version: '0'
|
59
|
+
requirements: []
|
60
|
+
rubyforge_project: patriot-hadoop
|
61
|
+
rubygems_version: 2.0.14
|
62
|
+
signing_key:
|
63
|
+
specification_version: 4
|
64
|
+
summary: Hadoop plugin for Patriot Workflow Scheduler
|
65
|
+
test_files: []
|
66
|
+
has_rdoc:
|