hivemeta 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +5 -0
- data/README +8 -3
- data/examples/hivemeta_query.rb +9 -0
- data/examples/sample-mapper.rb +1 -1
- data/lib/hivemeta/connection.rb +19 -3
- data/lib/hivemeta.rb +1 -1
- metadata +3 -3
data/CHANGELOG
CHANGED
data/README
CHANGED
|
@@ -1,16 +1,22 @@
|
|
|
1
1
|
hivemeta
|
|
2
2
|
|
|
3
3
|
A ruby API for access to the Hive metastore. Useful for querying columns
|
|
4
|
-
in map/reduce applications. Includes a demo application to spit out
|
|
4
|
+
in Hadoop map/reduce applications. Includes a demo application to spit out
|
|
5
5
|
table information from the command-line via table name search or
|
|
6
6
|
by the table's location in HDFS.
|
|
7
7
|
|
|
8
|
+
Installation
|
|
9
|
+
|
|
10
|
+
gem install hivemeta
|
|
11
|
+
|
|
12
|
+
Usage
|
|
13
|
+
|
|
8
14
|
streaming map/reduce code snippet:
|
|
9
15
|
|
|
10
16
|
require 'hivemeta'
|
|
11
17
|
|
|
12
18
|
h = HiveMeta::Connection.new(...) # see sample-mapper.rb for detail
|
|
13
|
-
|
|
19
|
+
inv_table = h.table 'sample_inventory'
|
|
14
20
|
|
|
15
21
|
STDIN.each_line do |line|
|
|
16
22
|
begin
|
|
@@ -24,7 +30,6 @@ STDIN.each_line do |line|
|
|
|
24
30
|
puts "#{item_id}\t#{count}" if count >= 1000
|
|
25
31
|
end
|
|
26
32
|
|
|
27
|
-
|
|
28
33
|
sample usage for the demo app:
|
|
29
34
|
|
|
30
35
|
# query by table names
|
data/examples/hivemeta_query.rb
CHANGED
|
@@ -100,6 +100,15 @@ tables.each do |table|
|
|
|
100
100
|
next if list_tables
|
|
101
101
|
puts table.path
|
|
102
102
|
next if list_paths
|
|
103
|
+
|
|
104
|
+
delim = table.delimiter
|
|
105
|
+
a = table.delimiter.ord
|
|
106
|
+
delim = '\\t' if a == 9
|
|
107
|
+
if (1..7).include? a
|
|
108
|
+
delim = "^#{('A'.ord + a - 1).chr}"
|
|
109
|
+
end
|
|
110
|
+
puts "delimiter: \"#{delim}\" (ASCII #{a})"
|
|
111
|
+
|
|
103
112
|
tput_cols = `tput cols`.chomp.to_i rescue tput_cols = 0
|
|
104
113
|
|
|
105
114
|
table.each_with_index do |col_name, i|
|
data/examples/sample-mapper.rb
CHANGED
|
@@ -13,7 +13,7 @@ db_name = 'hivemeta'
|
|
|
13
13
|
dbi_string = "DBI:Mysql:#{db_name}:#{db_host}"
|
|
14
14
|
h = HiveMeta::Connection.new(dbi_string, db_user, db_pass)
|
|
15
15
|
|
|
16
|
-
|
|
16
|
+
inv_table = h.table 'sample_inventory'
|
|
17
17
|
|
|
18
18
|
STDIN.each_line do |line|
|
|
19
19
|
begin
|
data/lib/hivemeta/connection.rb
CHANGED
|
@@ -64,9 +64,11 @@ module HiveMeta
|
|
|
64
64
|
#p name
|
|
65
65
|
table = Table.new(name)
|
|
66
66
|
|
|
67
|
-
sql = "select c.INTEGER_IDX, c.column_name, c.COMMENT,
|
|
67
|
+
sql = "select c.INTEGER_IDX, c.column_name, c.COMMENT,
|
|
68
|
+
s.LOCATION, s.SD_ID
|
|
68
69
|
from TBLS t, COLUMNS c, SDS s
|
|
69
|
-
where t.SD_ID = c.SD_ID and t.SD_ID = s.SD_ID
|
|
70
|
+
where t.SD_ID = c.SD_ID and t.SD_ID = s.SD_ID
|
|
71
|
+
and t.TBL_NAME = ?"
|
|
70
72
|
query sql, name do |rec|
|
|
71
73
|
#puts "REC:"
|
|
72
74
|
#p rec
|
|
@@ -74,10 +76,24 @@ module HiveMeta
|
|
|
74
76
|
col_name = rec[1]
|
|
75
77
|
col_cmt = rec[2]
|
|
76
78
|
tbl_loc = rec[3]
|
|
79
|
+
sd_id = rec[4]
|
|
77
80
|
table.columns[col_idx] = col_name
|
|
78
81
|
table.comments[col_idx] = col_cmt
|
|
79
|
-
table.path
|
|
82
|
+
table.path = tbl_loc
|
|
83
|
+
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
sql = "select sp.PARAM_VALUE
|
|
87
|
+
from SERDE_PARAMS sp, TBLS t
|
|
88
|
+
where t.SD_ID = sp.SERDE_ID
|
|
89
|
+
and PARAM_KEY = 'field.delim'
|
|
90
|
+
and t.TBL_NAME = ?"
|
|
91
|
+
results = query sql, name
|
|
92
|
+
if results and results[0] and results[0][0]
|
|
93
|
+
table.delimiter = results[0][0]
|
|
80
94
|
end
|
|
95
|
+
#puts "#{name}: found delim '#{table.delimiter}'" if results[0]
|
|
96
|
+
#puts "#{name}: no delim" if not results[0]
|
|
81
97
|
|
|
82
98
|
tables << table
|
|
83
99
|
end
|
data/lib/hivemeta.rb
CHANGED
metadata
CHANGED
|
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
|
5
5
|
segments:
|
|
6
6
|
- 0
|
|
7
7
|
- 0
|
|
8
|
-
-
|
|
9
|
-
version: 0.0.
|
|
8
|
+
- 2
|
|
9
|
+
version: 0.0.2
|
|
10
10
|
platform: ruby
|
|
11
11
|
authors:
|
|
12
12
|
- Frank Fejes
|
|
@@ -14,7 +14,7 @@ autorequire:
|
|
|
14
14
|
bindir: bin
|
|
15
15
|
cert_chain: []
|
|
16
16
|
|
|
17
|
-
date: 2011-05-
|
|
17
|
+
date: 2011-05-04 00:00:00 -05:00
|
|
18
18
|
default_executable:
|
|
19
19
|
dependencies: []
|
|
20
20
|
|