hivemeta 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +5 -0
- data/README +8 -3
- data/examples/hivemeta_query.rb +9 -0
- data/examples/sample-mapper.rb +1 -1
- data/lib/hivemeta/connection.rb +19 -3
- data/lib/hivemeta.rb +1 -1
- metadata +3 -3
data/CHANGELOG
CHANGED
data/README
CHANGED
@@ -1,16 +1,22 @@
|
|
1
1
|
hivemeta
|
2
2
|
|
3
3
|
A ruby API for access to the Hive metastore. Useful for querying columns
|
4
|
-
in map/reduce applications. Includes a demo application to spit out
|
4
|
+
in Hadoop map/reduce applications. Includes a demo application to spit out
|
5
5
|
table information from the command-line via table name search or
|
6
6
|
by the table's location in HDFS.
|
7
7
|
|
8
|
+
Installation
|
9
|
+
|
10
|
+
gem install hivemeta
|
11
|
+
|
12
|
+
Usage
|
13
|
+
|
8
14
|
streaming map/reduce code snippet:
|
9
15
|
|
10
16
|
require 'hivemeta'
|
11
17
|
|
12
18
|
h = HiveMeta::Connection.new(...) # see sample-mapper.rb for detail
|
13
|
-
|
19
|
+
inv_table = h.table 'sample_inventory'
|
14
20
|
|
15
21
|
STDIN.each_line do |line|
|
16
22
|
begin
|
@@ -24,7 +30,6 @@ STDIN.each_line do |line|
|
|
24
30
|
puts "#{item_id}\t#{count}" if count >= 1000
|
25
31
|
end
|
26
32
|
|
27
|
-
|
28
33
|
sample usage for the demo app:
|
29
34
|
|
30
35
|
# query by table names
|
data/examples/hivemeta_query.rb
CHANGED
@@ -100,6 +100,15 @@ tables.each do |table|
|
|
100
100
|
next if list_tables
|
101
101
|
puts table.path
|
102
102
|
next if list_paths
|
103
|
+
|
104
|
+
delim = table.delimiter
|
105
|
+
a = table.delimiter.ord
|
106
|
+
delim = '\\t' if a == 9
|
107
|
+
if (1..7).include? a
|
108
|
+
delim = "^#{('A'.ord + a - 1).chr}"
|
109
|
+
end
|
110
|
+
puts "delimiter: \"#{delim}\" (ASCII #{a})"
|
111
|
+
|
103
112
|
tput_cols = `tput cols`.chomp.to_i rescue tput_cols = 0
|
104
113
|
|
105
114
|
table.each_with_index do |col_name, i|
|
data/examples/sample-mapper.rb
CHANGED
@@ -13,7 +13,7 @@ db_name = 'hivemeta'
|
|
13
13
|
dbi_string = "DBI:Mysql:#{db_name}:#{db_host}"
|
14
14
|
h = HiveMeta::Connection.new(dbi_string, db_user, db_pass)
|
15
15
|
|
16
|
-
|
16
|
+
inv_table = h.table 'sample_inventory'
|
17
17
|
|
18
18
|
STDIN.each_line do |line|
|
19
19
|
begin
|
data/lib/hivemeta/connection.rb
CHANGED
@@ -64,9 +64,11 @@ module HiveMeta
|
|
64
64
|
#p name
|
65
65
|
table = Table.new(name)
|
66
66
|
|
67
|
-
sql = "select c.INTEGER_IDX, c.column_name, c.COMMENT,
|
67
|
+
sql = "select c.INTEGER_IDX, c.column_name, c.COMMENT,
|
68
|
+
s.LOCATION, s.SD_ID
|
68
69
|
from TBLS t, COLUMNS c, SDS s
|
69
|
-
where t.SD_ID = c.SD_ID and t.SD_ID = s.SD_ID
|
70
|
+
where t.SD_ID = c.SD_ID and t.SD_ID = s.SD_ID
|
71
|
+
and t.TBL_NAME = ?"
|
70
72
|
query sql, name do |rec|
|
71
73
|
#puts "REC:"
|
72
74
|
#p rec
|
@@ -74,10 +76,24 @@ module HiveMeta
|
|
74
76
|
col_name = rec[1]
|
75
77
|
col_cmt = rec[2]
|
76
78
|
tbl_loc = rec[3]
|
79
|
+
sd_id = rec[4]
|
77
80
|
table.columns[col_idx] = col_name
|
78
81
|
table.comments[col_idx] = col_cmt
|
79
|
-
table.path
|
82
|
+
table.path = tbl_loc
|
83
|
+
|
84
|
+
end
|
85
|
+
|
86
|
+
sql = "select sp.PARAM_VALUE
|
87
|
+
from SERDE_PARAMS sp, TBLS t
|
88
|
+
where t.SD_ID = sp.SERDE_ID
|
89
|
+
and PARAM_KEY = 'field.delim'
|
90
|
+
and t.TBL_NAME = ?"
|
91
|
+
results = query sql, name
|
92
|
+
if results and results[0] and results[0][0]
|
93
|
+
table.delimiter = results[0][0]
|
80
94
|
end
|
95
|
+
#puts "#{name}: found delim '#{table.delimiter}'" if results[0]
|
96
|
+
#puts "#{name}: no delim" if not results[0]
|
81
97
|
|
82
98
|
tables << table
|
83
99
|
end
|
data/lib/hivemeta.rb
CHANGED
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 0.0.
|
8
|
+
- 2
|
9
|
+
version: 0.0.2
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Frank Fejes
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-05-
|
17
|
+
date: 2011-05-04 00:00:00 -05:00
|
18
18
|
default_executable:
|
19
19
|
dependencies: []
|
20
20
|
|