patriot-hadoop 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/init.rb +3 -0
- data/lib/patriot_hadoop.rb +4 -0
- data/lib/patriot_hadoop/command.rb +1 -0
- data/lib/patriot_hadoop/command/hive.rb +74 -0
- data/lib/patriot_hadoop/ext.rb +1 -0
- data/lib/patriot_hadoop/ext/hive.rb +49 -0
- metadata +66 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 21e992800beb77008e49114125a7509e42820106
|
4
|
+
data.tar.gz: fc571743a6fdb51e62e1b02f1ee2337e330b0f30
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 993f0ad53ac7b18c264fa11d52bd0e0ab2c7fa15b9a9b805d99bfd05985e7c49d80dc5459058ec75bfc2339ca79edf9e55180a081c2785868e800b78391f4d23
|
7
|
+
data.tar.gz: 537a5e6a57c39727a57bb8f909006fde86d6f3ad420eb78818b430e6a1ac0adc0e4d850ed4b5ee3c3b132bc1f6d468e00bc0fee212e71117cdd720ac80ae76d9
|
data/init.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "patriot_hadoop/command/hive"
|
@@ -0,0 +1,74 @@
|
|
1
|
+
module PatriotHadoop
|
2
|
+
module Command
|
3
|
+
class HiveCommand < Patriot::Command::Base
|
4
|
+
declare_command_name :hive
|
5
|
+
include PatriotHadoop::Ext::Hive
|
6
|
+
|
7
|
+
command_attr :hive_ql, :output_prefix, :exec_user, :props, :name_suffix
|
8
|
+
|
9
|
+
def job_id
|
10
|
+
job_id = "#{command_name}"
|
11
|
+
job_id = "#{job_id}_#{@name_suffix}" unless @name_suffix.nil?
|
12
|
+
return job_id
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
def execute
|
17
|
+
@logger.info "start hive"
|
18
|
+
|
19
|
+
opt = {}
|
20
|
+
opt[:udf] = @udf unless @udf.nil?
|
21
|
+
opt[:props] = @props unless @props.nil?
|
22
|
+
|
23
|
+
output_prefix = @output_prefix.nil? ? File.join('/tmp', job_id()) : @output_prefix
|
24
|
+
output_directory = File.dirname(output_prefix)
|
25
|
+
if not Dir.exist?(output_directory)
|
26
|
+
FileUtils.mkdir_p(output_directory)
|
27
|
+
end
|
28
|
+
|
29
|
+
tmpfile = output_prefix + '.hql'
|
30
|
+
_create_hivequery_tmpfile(@hive_ql, tmpfile, opt)
|
31
|
+
|
32
|
+
output_file = output_prefix + '.tsv'
|
33
|
+
execute_hivequery(tmpfile, output_file, @exec_user)
|
34
|
+
|
35
|
+
if File.zero?(output_file)
|
36
|
+
@logger.warn "#{@hive_ql} generated empty result"
|
37
|
+
return
|
38
|
+
end
|
39
|
+
|
40
|
+
@logger.info "end hive"
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
def _create_hivequery_tmpfile(hive_ql, tmpfile, opt={})
|
45
|
+
hive_ql = _add_udfs(hive_ql, opt[:udf]) if opt.has_key?(:udf)
|
46
|
+
hive_ql = "#{_set_hive_property_prefix(opt[:props])}#{hive_ql}" if opt.has_key?(:props)
|
47
|
+
File.write(tmpfile, hive_ql)
|
48
|
+
end
|
49
|
+
|
50
|
+
|
51
|
+
def _set_hive_property_prefix(props={})
|
52
|
+
return "" if props.nil?
|
53
|
+
return props.map{|k,v| "set #{k}=#{v};"}.join
|
54
|
+
end
|
55
|
+
|
56
|
+
|
57
|
+
def _add_udfs(hive_ql, udfs)
|
58
|
+
return hive_ql if udfs.nil?
|
59
|
+
register = ""
|
60
|
+
udfs = [udfs] unless udfs.is_a?(Array)
|
61
|
+
udfs.each do |udf|
|
62
|
+
register += "add jar #{udf['jar']};"
|
63
|
+
functions = udf['functions']
|
64
|
+
functions = [functions] unless functions.is_a?(Array)
|
65
|
+
functions.each do |f|
|
66
|
+
register += "create temporary function #{f['name']} as \"#{f['class']}\";"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
return "#{register}#{hive_ql}"
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
require "patriot_hadoop/ext/hive"
|
@@ -0,0 +1,49 @@
|
|
1
|
+
module PatriotHadoop
|
2
|
+
module Ext
|
3
|
+
module Hive
|
4
|
+
|
5
|
+
HIVE_MAX_ERROR_MSG_SIZE = 512
|
6
|
+
|
7
|
+
include Patriot::Util::Logger
|
8
|
+
include Patriot::Util::DBClient
|
9
|
+
include Patriot::Util::System
|
10
|
+
|
11
|
+
def self.included(cls)
|
12
|
+
cls.send(:include, Patriot::Util::System)
|
13
|
+
end
|
14
|
+
|
15
|
+
class HiveException < Exception; end
|
16
|
+
|
17
|
+
|
18
|
+
def execute_hivequery(hql_file, output_file=nil, user=nil)
|
19
|
+
command = "hive -f \"#{hql_file}\""
|
20
|
+
unless user.nil?
|
21
|
+
if user !~ /^[a-z_][a-z0-9_]{0,30}$/
|
22
|
+
raise HiveException, "Invalid username"
|
23
|
+
end
|
24
|
+
command = "sudo -u #{user} #{command}"
|
25
|
+
end
|
26
|
+
return _execute_hivequery_internal(command, output_file)
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
def _execute_hivequery_internal(command, output_file)
|
31
|
+
so = execute_command(command) do |status, so, se|
|
32
|
+
err_size = File.stat(se).size
|
33
|
+
err_msg = ""
|
34
|
+
max_err_size = HIVE_MAX_ERROR_MSG_SIZE
|
35
|
+
File.open(se) do |f|
|
36
|
+
if err_size > max_err_size
|
37
|
+
f.seek(-1 * max_err_size, IO::SEEK_END)
|
38
|
+
err_msg = "\n(#{err_size - max_err_size} bytes are truncated)"
|
39
|
+
end
|
40
|
+
err_msg = "#{f.read}#{err_msg}"
|
41
|
+
end
|
42
|
+
raise HiveException, "#{command}\n#{err_msg}"
|
43
|
+
end
|
44
|
+
File.rename(so, output_file) unless output_file.nil?
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
metadata
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: patriot-hadoop
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Hitoshi Tsuda
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-11-19 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: patriot-workflow-scheduler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0.7'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0.7'
|
27
|
+
description: a plugin for Patriot Workflow Scheduler, which deal with Hadoop-related
|
28
|
+
softwares.
|
29
|
+
email:
|
30
|
+
- tsuda_hitoshi@cyberagent.co.jp
|
31
|
+
executables: []
|
32
|
+
extensions: []
|
33
|
+
extra_rdoc_files: []
|
34
|
+
files:
|
35
|
+
- lib/patriot_hadoop/command/hive.rb
|
36
|
+
- lib/patriot_hadoop/command.rb
|
37
|
+
- lib/patriot_hadoop/ext/hive.rb
|
38
|
+
- lib/patriot_hadoop/ext.rb
|
39
|
+
- lib/patriot_hadoop.rb
|
40
|
+
- init.rb
|
41
|
+
homepage: https://github.com/CyberAgent/patriot-workflow-scheduler
|
42
|
+
licenses:
|
43
|
+
- Apache License, Version 2.0
|
44
|
+
metadata: {}
|
45
|
+
post_install_message:
|
46
|
+
rdoc_options: []
|
47
|
+
require_paths:
|
48
|
+
- lib
|
49
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
55
|
+
requirements:
|
56
|
+
- - '>='
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
version: '0'
|
59
|
+
requirements: []
|
60
|
+
rubyforge_project: patriot-hadoop
|
61
|
+
rubygems_version: 2.0.14
|
62
|
+
signing_key:
|
63
|
+
specification_version: 4
|
64
|
+
summary: Hadoop plugin for Patriot Workflow Scheduler
|
65
|
+
test_files: []
|
66
|
+
has_rdoc:
|