patriot-hadoop 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 21e992800beb77008e49114125a7509e42820106
4
+ data.tar.gz: fc571743a6fdb51e62e1b02f1ee2337e330b0f30
5
+ SHA512:
6
+ metadata.gz: 993f0ad53ac7b18c264fa11d52bd0e0ab2c7fa15b9a9b805d99bfd05985e7c49d80dc5459058ec75bfc2339ca79edf9e55180a081c2785868e800b78391f4d23
7
+ data.tar.gz: 537a5e6a57c39727a57bb8f909006fde86d6f3ad420eb78818b430e6a1ac0adc0e4d850ed4b5ee3c3b132bc1f6d468e00bc0fee212e71117cdd720ac80ae76d9
data/init.rb ADDED
@@ -0,0 +1,3 @@
1
+ require 'patriot'
2
+ require 'patriot_hadoop'
3
+
@@ -0,0 +1,4 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'patriot_hadoop/ext'
3
+ require 'patriot_hadoop/command'
4
+
@@ -0,0 +1 @@
1
+ require "patriot_hadoop/command/hive"
@@ -0,0 +1,74 @@
1
+ module PatriotHadoop
2
+ module Command
3
+ class HiveCommand < Patriot::Command::Base
4
+ declare_command_name :hive
5
+ include PatriotHadoop::Ext::Hive
6
+
7
+ command_attr :hive_ql, :output_prefix, :exec_user, :props, :name_suffix
8
+
9
+ def job_id
10
+ job_id = "#{command_name}"
11
+ job_id = "#{job_id}_#{@name_suffix}" unless @name_suffix.nil?
12
+ return job_id
13
+ end
14
+
15
+
16
+ def execute
17
+ @logger.info "start hive"
18
+
19
+ opt = {}
20
+ opt[:udf] = @udf unless @udf.nil?
21
+ opt[:props] = @props unless @props.nil?
22
+
23
+ output_prefix = @output_prefix.nil? ? File.join('/tmp', job_id()) : @output_prefix
24
+ output_directory = File.dirname(output_prefix)
25
+ if not Dir.exist?(output_directory)
26
+ FileUtils.mkdir_p(output_directory)
27
+ end
28
+
29
+ tmpfile = output_prefix + '.hql'
30
+ _create_hivequery_tmpfile(@hive_ql, tmpfile, opt)
31
+
32
+ output_file = output_prefix + '.tsv'
33
+ execute_hivequery(tmpfile, output_file, @exec_user)
34
+
35
+ if File.zero?(output_file)
36
+ @logger.warn "#{@hive_ql} generated empty result"
37
+ return
38
+ end
39
+
40
+ @logger.info "end hive"
41
+ end
42
+
43
+
44
+ def _create_hivequery_tmpfile(hive_ql, tmpfile, opt={})
45
+ hive_ql = _add_udfs(hive_ql, opt[:udf]) if opt.has_key?(:udf)
46
+ hive_ql = "#{_set_hive_property_prefix(opt[:props])}#{hive_ql}" if opt.has_key?(:props)
47
+ File.write(tmpfile, hive_ql)
48
+ end
49
+
50
+
51
+ def _set_hive_property_prefix(props={})
52
+ return "" if props.nil?
53
+ return props.map{|k,v| "set #{k}=#{v};"}.join
54
+ end
55
+
56
+
57
+ def _add_udfs(hive_ql, udfs)
58
+ return hive_ql if udfs.nil?
59
+ register = ""
60
+ udfs = [udfs] unless udfs.is_a?(Array)
61
+ udfs.each do |udf|
62
+ register += "add jar #{udf['jar']};"
63
+ functions = udf['functions']
64
+ functions = [functions] unless functions.is_a?(Array)
65
+ functions.each do |f|
66
+ register += "create temporary function #{f['name']} as \"#{f['class']}\";"
67
+ end
68
+ end
69
+ return "#{register}#{hive_ql}"
70
+ end
71
+
72
+ end
73
+ end
74
+ end
@@ -0,0 +1 @@
1
+ require "patriot_hadoop/ext/hive"
@@ -0,0 +1,49 @@
1
+ module PatriotHadoop
2
+ module Ext
3
+ module Hive
4
+
5
+ HIVE_MAX_ERROR_MSG_SIZE = 512
6
+
7
+ include Patriot::Util::Logger
8
+ include Patriot::Util::DBClient
9
+ include Patriot::Util::System
10
+
11
+ def self.included(cls)
12
+ cls.send(:include, Patriot::Util::System)
13
+ end
14
+
15
+ class HiveException < Exception; end
16
+
17
+
18
+ def execute_hivequery(hql_file, output_file=nil, user=nil)
19
+ command = "hive -f \"#{hql_file}\""
20
+ unless user.nil?
21
+ if user !~ /^[a-z_][a-z0-9_]{0,30}$/
22
+ raise HiveException, "Invalid username"
23
+ end
24
+ command = "sudo -u #{user} #{command}"
25
+ end
26
+ return _execute_hivequery_internal(command, output_file)
27
+ end
28
+
29
+
30
+ def _execute_hivequery_internal(command, output_file)
31
+ so = execute_command(command) do |status, so, se|
32
+ err_size = File.stat(se).size
33
+ err_msg = ""
34
+ max_err_size = HIVE_MAX_ERROR_MSG_SIZE
35
+ File.open(se) do |f|
36
+ if err_size > max_err_size
37
+ f.seek(-1 * max_err_size, IO::SEEK_END)
38
+ err_msg = "\n(#{err_size - max_err_size} bytes are truncated)"
39
+ end
40
+ err_msg = "#{f.read}#{err_msg}"
41
+ end
42
+ raise HiveException, "#{command}\n#{err_msg}"
43
+ end
44
+ File.rename(so, output_file) unless output_file.nil?
45
+ end
46
+
47
+ end
48
+ end
49
+ end
metadata ADDED
@@ -0,0 +1,66 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: patriot-hadoop
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - Hitoshi Tsuda
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-11-19 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: patriot-workflow-scheduler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '0.7'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '0.7'
27
+ description: a plugin for Patriot Workflow Scheduler, which deal with Hadoop-related
28
+ softwares.
29
+ email:
30
+ - tsuda_hitoshi@cyberagent.co.jp
31
+ executables: []
32
+ extensions: []
33
+ extra_rdoc_files: []
34
+ files:
35
+ - lib/patriot_hadoop/command/hive.rb
36
+ - lib/patriot_hadoop/command.rb
37
+ - lib/patriot_hadoop/ext/hive.rb
38
+ - lib/patriot_hadoop/ext.rb
39
+ - lib/patriot_hadoop.rb
40
+ - init.rb
41
+ homepage: https://github.com/CyberAgent/patriot-workflow-scheduler
42
+ licenses:
43
+ - Apache License, Version 2.0
44
+ metadata: {}
45
+ post_install_message:
46
+ rdoc_options: []
47
+ require_paths:
48
+ - lib
49
+ required_ruby_version: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ required_rubygems_version: !ruby/object:Gem::Requirement
55
+ requirements:
56
+ - - '>='
57
+ - !ruby/object:Gem::Version
58
+ version: '0'
59
+ requirements: []
60
+ rubyforge_project: patriot-hadoop
61
+ rubygems_version: 2.0.14
62
+ signing_key:
63
+ specification_version: 4
64
+ summary: Hadoop plugin for Patriot Workflow Scheduler
65
+ test_files: []
66
+ has_rdoc: