patriot-hadoop 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 21e992800beb77008e49114125a7509e42820106
4
+ data.tar.gz: fc571743a6fdb51e62e1b02f1ee2337e330b0f30
5
+ SHA512:
6
+ metadata.gz: 993f0ad53ac7b18c264fa11d52bd0e0ab2c7fa15b9a9b805d99bfd05985e7c49d80dc5459058ec75bfc2339ca79edf9e55180a081c2785868e800b78391f4d23
7
+ data.tar.gz: 537a5e6a57c39727a57bb8f909006fde86d6f3ad420eb78818b430e6a1ac0adc0e4d850ed4b5ee3c3b132bc1f6d468e00bc0fee212e71117cdd720ac80ae76d9
data/init.rb ADDED
@@ -0,0 +1,3 @@
1
+ require 'patriot'
2
+ require 'patriot_hadoop'
3
+
@@ -0,0 +1,4 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'patriot_hadoop/ext'
3
+ require 'patriot_hadoop/command'
4
+
@@ -0,0 +1 @@
1
+ require "patriot_hadoop/command/hive"
@@ -0,0 +1,74 @@
1
+ module PatriotHadoop
2
+ module Command
3
+ class HiveCommand < Patriot::Command::Base
4
+ declare_command_name :hive
5
+ include PatriotHadoop::Ext::Hive
6
+
7
+ command_attr :hive_ql, :output_prefix, :exec_user, :props, :name_suffix
8
+
9
+ def job_id
10
+ job_id = "#{command_name}"
11
+ job_id = "#{job_id}_#{@name_suffix}" unless @name_suffix.nil?
12
+ return job_id
13
+ end
14
+
15
+
16
+ def execute
17
+ @logger.info "start hive"
18
+
19
+ opt = {}
20
+ opt[:udf] = @udf unless @udf.nil?
21
+ opt[:props] = @props unless @props.nil?
22
+
23
+ output_prefix = @output_prefix.nil? ? File.join('/tmp', job_id()) : @output_prefix
24
+ output_directory = File.dirname(output_prefix)
25
+ if not Dir.exist?(output_directory)
26
+ FileUtils.mkdir_p(output_directory)
27
+ end
28
+
29
+ tmpfile = output_prefix + '.hql'
30
+ _create_hivequery_tmpfile(@hive_ql, tmpfile, opt)
31
+
32
+ output_file = output_prefix + '.tsv'
33
+ execute_hivequery(tmpfile, output_file, @exec_user)
34
+
35
+ if File.zero?(output_file)
36
+ @logger.warn "#{@hive_ql} generated empty result"
37
+ return
38
+ end
39
+
40
+ @logger.info "end hive"
41
+ end
42
+
43
+
44
+ def _create_hivequery_tmpfile(hive_ql, tmpfile, opt={})
45
+ hive_ql = _add_udfs(hive_ql, opt[:udf]) if opt.has_key?(:udf)
46
+ hive_ql = "#{_set_hive_property_prefix(opt[:props])}#{hive_ql}" if opt.has_key?(:props)
47
+ File.write(tmpfile, hive_ql)
48
+ end
49
+
50
+
51
+ def _set_hive_property_prefix(props={})
52
+ return "" if props.nil?
53
+ return props.map{|k,v| "set #{k}=#{v};"}.join
54
+ end
55
+
56
+
57
+ def _add_udfs(hive_ql, udfs)
58
+ return hive_ql if udfs.nil?
59
+ register = ""
60
+ udfs = [udfs] unless udfs.is_a?(Array)
61
+ udfs.each do |udf|
62
+ register += "add jar #{udf['jar']};"
63
+ functions = udf['functions']
64
+ functions = [functions] unless functions.is_a?(Array)
65
+ functions.each do |f|
66
+ register += "create temporary function #{f['name']} as \"#{f['class']}\";"
67
+ end
68
+ end
69
+ return "#{register}#{hive_ql}"
70
+ end
71
+
72
+ end
73
+ end
74
+ end
@@ -0,0 +1 @@
1
+ require "patriot_hadoop/ext/hive"
@@ -0,0 +1,49 @@
1
+ module PatriotHadoop
2
+ module Ext
3
+ module Hive
4
+
5
+ HIVE_MAX_ERROR_MSG_SIZE = 512
6
+
7
+ include Patriot::Util::Logger
8
+ include Patriot::Util::DBClient
9
+ include Patriot::Util::System
10
+
11
+ def self.included(cls)
12
+ cls.send(:include, Patriot::Util::System)
13
+ end
14
+
15
+ class HiveException < Exception; end
16
+
17
+
18
+ def execute_hivequery(hql_file, output_file=nil, user=nil)
19
+ command = "hive -f \"#{hql_file}\""
20
+ unless user.nil?
21
+ if user !~ /^[a-z_][a-z0-9_]{0,30}$/
22
+ raise HiveException, "Invalid username"
23
+ end
24
+ command = "sudo -u #{user} #{command}"
25
+ end
26
+ return _execute_hivequery_internal(command, output_file)
27
+ end
28
+
29
+
30
+ def _execute_hivequery_internal(command, output_file)
31
+ so = execute_command(command) do |status, so, se|
32
+ err_size = File.stat(se).size
33
+ err_msg = ""
34
+ max_err_size = HIVE_MAX_ERROR_MSG_SIZE
35
+ File.open(se) do |f|
36
+ if err_size > max_err_size
37
+ f.seek(-1 * max_err_size, IO::SEEK_END)
38
+ err_msg = "\n(#{err_size - max_err_size} bytes are truncated)"
39
+ end
40
+ err_msg = "#{f.read}#{err_msg}"
41
+ end
42
+ raise HiveException, "#{command}\n#{err_msg}"
43
+ end
44
+ File.rename(so, output_file) unless output_file.nil?
45
+ end
46
+
47
+ end
48
+ end
49
+ end
metadata ADDED
@@ -0,0 +1,66 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: patriot-hadoop
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - Hitoshi Tsuda
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-11-19 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: patriot-workflow-scheduler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '0.7'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '0.7'
27
+ description: a plugin for Patriot Workflow Scheduler, which deal with Hadoop-related
28
+ softwares.
29
+ email:
30
+ - tsuda_hitoshi@cyberagent.co.jp
31
+ executables: []
32
+ extensions: []
33
+ extra_rdoc_files: []
34
+ files:
35
+ - lib/patriot_hadoop/command/hive.rb
36
+ - lib/patriot_hadoop/command.rb
37
+ - lib/patriot_hadoop/ext/hive.rb
38
+ - lib/patriot_hadoop/ext.rb
39
+ - lib/patriot_hadoop.rb
40
+ - init.rb
41
+ homepage: https://github.com/CyberAgent/patriot-workflow-scheduler
42
+ licenses:
43
+ - Apache License, Version 2.0
44
+ metadata: {}
45
+ post_install_message:
46
+ rdoc_options: []
47
+ require_paths:
48
+ - lib
49
+ required_ruby_version: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ required_rubygems_version: !ruby/object:Gem::Requirement
55
+ requirements:
56
+ - - '>='
57
+ - !ruby/object:Gem::Version
58
+ version: '0'
59
+ requirements: []
60
+ rubyforge_project: patriot-hadoop
61
+ rubygems_version: 2.0.14
62
+ signing_key:
63
+ specification_version: 4
64
+ summary: Hadoop plugin for Patriot Workflow Scheduler
65
+ test_files: []
66
+ has_rdoc: