katte_hive_autodep 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Hikaru Ojima
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,29 @@
1
+ # KatteHiveAutodep
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'katte_hive_autodep'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install katte_hive_autodep
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it ( http://github.com/<my-github-username>/katte_hive_autodep/fork )
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
@@ -0,0 +1,70 @@
1
+ #!/usr/bin/env bash
2
+
3
+ export HADOOP_HOME=${HADOOP_HOME:-/usr/lib/hadoop}
4
+
5
+ export HIVE_HOME=/usr/lib/hive
6
+
7
+ bin=/usr/lib/hive/bin
8
+
9
+ . "$bin"/hive-config.sh
10
+
11
+ if [ -f "${HIVE_CONF_DIR}/hive-env.sh" ]; then
12
+ . "${HIVE_CONF_DIR}/hive-env.sh"
13
+ fi
14
+
15
+ CLASSPATH="${HIVE_CONF_DIR}"
16
+
17
+ HIVE_LIB=${HIVE_HOME}/lib
18
+
19
+ # needed for execution
20
+ if [ ! -f ${HIVE_LIB}/hive-exec-*.jar ]; then
21
+ echo "Missing Hive Execution Jar: ${HIVE_LIB}/hive-exec-*.jar"
22
+ exit 1;
23
+ fi
24
+
25
+ if [ ! -f ${HIVE_LIB}/hive-builtins-*.jar ]; then
26
+ echo "Missing Hive Builtins Jar: ${HIVE_LIB}/hive-builtins-*.jar"
27
+ exit 1;
28
+ fi
29
+
30
+ if [ ! -f ${HIVE_LIB}/hive-metastore-*.jar ]; then
31
+ echo "Missing Hive MetaStore Jar"
32
+ exit 2;
33
+ fi
34
+
35
+ # cli specific code
36
+ if [ ! -f ${HIVE_LIB}/hive-cli-*.jar ]; then
37
+ echo "Missing Hive CLI Jar"
38
+ exit 3;
39
+ fi
40
+
41
+ for f in ${HIVE_LIB}/*.jar; do
42
+ CLASSPATH=${CLASSPATH}:$f;
43
+ done
44
+
45
+ # add the auxillary jars such as serdes
46
+ if [ -d "${HIVE_AUX_JARS_PATH}" ]; then
47
+ for f in ${HIVE_AUX_JARS_PATH}/*.jar; do
48
+ if [[ ! -f $f ]]; then
49
+ continue;
50
+ fi
51
+ AUX_CLASSPATH=${AUX_CLASSPATH}:$f
52
+ done
53
+ elif [ "${HIVE_AUX_JARS_PATH}" != "" ]; then
54
+ AUX_CLASSPATH=${HIVE_AUX_JARS_PATH}
55
+ fi
56
+
57
+ # adding jars from auxlib directory
58
+ for f in ${HIVE_HOME}/auxlib/*.jar; do
59
+ if [[ ! -f $f ]]; then
60
+ continue;
61
+ fi
62
+ AUX_CLASSPATH=${AUX_CLASSPATH}:$f
63
+ done
64
+ CLASSPATH=${CLASSPATH}:${AUX_CLASSPATH}
65
+
66
+ CLASSPATH=${CLASSPATH}:`hadoop classpath`
67
+
68
+ CLASSPATH=${CLASSPATH}:`pwd`/hive-parse-test-1.0.jar
69
+
70
+ export CLASSPATH
@@ -0,0 +1,8 @@
1
+ require "katte_hive_autodep/version"
2
+ require "katte_hive_autodep/resolver"
3
+
4
+ require "katte"
5
+
6
+ module KatteHiveAutodep
7
+ Katte::Runner.after(:load_all, &Resolver.method(:call))
8
+ end
@@ -0,0 +1,33 @@
1
+ require 'json'
2
+
3
+ module KatteHiveAutodep
4
+ class HDP
5
+ # wrapper class for hive-dependency-parser
6
+
7
+ EXT_LIB_ROOT = File.expand_path("../../ext/", __FILE__)
8
+ ENV_SCRIPT = File.join(EXT_LIB_ROOT, (ENV["KATTE_MODE"] == 'test' ? "env-debug.sh" : "env.sh"))
9
+ HDP_JAR = File.join(EXT_LIB_ROOT, "hive-dependency-parser.jar")
10
+ HDP_MAIN_CLASS = "org.mixi.analysis.hive.dependency.parser.Driver"
11
+
12
+ def initialize
13
+ end
14
+
15
+ def run(file)
16
+ return unless File.file? file
17
+
18
+ dependency = IO.pipe do |o_r,o_w|
19
+ ret = system(<<EOF, :out => o_w, :err => "/dev/null")
20
+ source #{ENV_SCRIPT}
21
+ CLASSPATH=$CLASSPATH:#{HDP_JAR} java -Xmx256m #{HDP_MAIN_CLASS} #{file}
22
+ EOF
23
+
24
+ return unless ret
25
+
26
+ o_w.close
27
+ o_r.to_a.join
28
+ end
29
+
30
+ JSON.parse(dependency)
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,62 @@
1
+ require "katte_hive_autodep/hdp"
2
+
3
+ module KatteHiveAutodep
4
+ class Resolver
5
+ def initialize
6
+ @table_node_mapping = {}
7
+ @dependency_mapping = {}
8
+ end
9
+
10
+ def self.call(nodes)
11
+ new.call(nodes)
12
+ end
13
+ def call(nodes)
14
+ hive_nodes = find_hive_nodes(nodes)
15
+
16
+ load_all(hive_nodes)
17
+
18
+ resolve
19
+ end
20
+
21
+ private
22
+ def add_table(node, hive_table)
23
+ (@table_node_mapping[hive_table] ||= []) << node
24
+ end
25
+ def add_dependency(node, hive_table)
26
+ (@dependency_mapping[node] ||= []) << hive_table
27
+ end
28
+
29
+ def load(node)
30
+ dependency = HDP.new.run(node.path)
31
+
32
+ dependency["sources"].each do |src|
33
+ add_dependency(node, src)
34
+ end
35
+ dependency["destinations"].each do |dst|
36
+ add_table(node, dst)
37
+ end
38
+ end
39
+ def load_all(nodes)
40
+ nodes.each {|node| load node }
41
+ end
42
+ def find_hive_nodes(nodes)
43
+ nodes.select {|node|
44
+ node.file_type.extname == 'sql'
45
+ }
46
+ end
47
+
48
+ def resolve
49
+ @dependency_mapping.each {|node, hive_tables|
50
+ hive_tables.each {|hive_table|
51
+ next unless @table_node_mapping[hive_table]
52
+
53
+ @table_node_mapping[hive_table].each do |depending_node|
54
+ next unless depending_node.is_a? Katte::Node::Base
55
+
56
+ node.requires << depending_node.name
57
+ end
58
+ }
59
+ }
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,3 @@
1
+ module KatteHiveAutodep
2
+ VERSION = "0.0.1"
3
+ end
metadata ADDED
@@ -0,0 +1,118 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: katte_hive_autodep
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Hikaru Ojima
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2014-02-28 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bundler
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '1.5'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '1.5'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rake
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rspec
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: katte
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ description: hive dependency parser wrappe for katte
79
+ email:
80
+ - hikaru.ojima@mixi.co.jp
81
+ executables: []
82
+ extensions: []
83
+ extra_rdoc_files: []
84
+ files:
85
+ - lib/katte_hive_autodep.rb
86
+ - lib/katte_hive_autodep/hdp.rb
87
+ - lib/katte_hive_autodep/resolver.rb
88
+ - lib/katte_hive_autodep/version.rb
89
+ - Readme.md
90
+ - LICENSE.txt
91
+ - lib/ext/hive-dependency-parser.jar
92
+ - lib/ext/env.sh
93
+ homepage: ''
94
+ licenses:
95
+ - MIT
96
+ post_install_message:
97
+ rdoc_options: []
98
+ require_paths:
99
+ - lib
100
+ required_ruby_version: !ruby/object:Gem::Requirement
101
+ none: false
102
+ requirements:
103
+ - - ! '>='
104
+ - !ruby/object:Gem::Version
105
+ version: '0'
106
+ required_rubygems_version: !ruby/object:Gem::Requirement
107
+ none: false
108
+ requirements:
109
+ - - ! '>='
110
+ - !ruby/object:Gem::Version
111
+ version: '0'
112
+ requirements: []
113
+ rubyforge_project:
114
+ rubygems_version: 1.8.23
115
+ signing_key:
116
+ specification_version: 3
117
+ summary: hive dependency parser wrappe for katte
118
+ test_files: []