hivemeta 0.1.1 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +8 -0
- data/README +5 -2
- data/examples/hivemeta_query.rb +1 -2
- data/examples/hivemeta_testrec.rb +1 -2
- data/examples/sample-mapper.rb +1 -2
- data/lib/hivemeta/connection.rb +103 -52
- metadata +3 -3
data/CHANGELOG
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
* 2011-07-06 - fsf
|
2
|
+
- new: added introductory jruby/jdbc support
|
3
|
+
- change: HiveMeta::Connection.initialize takes 4 optional arguments now
|
4
|
+
and builds the dbi/jdbc string itself
|
5
|
+
|
6
|
+
* 2011-05-30 - fsf
|
7
|
+
- clean: switched dbi connection loop to an exception retry block
|
8
|
+
|
1
9
|
* 2011-05-29 - fsf
|
2
10
|
- new: added retry logic to the db connection to account for mysql servers
|
3
11
|
with a relatively low max_connections setting
|
data/README
CHANGED
@@ -39,6 +39,10 @@ Installation
|
|
39
39
|
|
40
40
|
gem install hivemeta
|
41
41
|
|
42
|
+
or (for jruby)
|
43
|
+
|
44
|
+
jgem install hivemeta
|
45
|
+
|
42
46
|
---
|
43
47
|
|
44
48
|
API Usage
|
@@ -81,8 +85,7 @@ db_pass = 'hivepasshere'
|
|
81
85
|
db_host = 'localhost'
|
82
86
|
db_name = 'hivemeta'
|
83
87
|
|
84
|
-
|
85
|
-
h = HiveMeta::Connection.new(dbi_string, db_user, db_pass)
|
88
|
+
h = HiveMeta::Connection.new(db_name, db_host, db_user, db_pass)
|
86
89
|
|
87
90
|
establishing a connection (environment variables):
|
88
91
|
|
data/examples/hivemeta_query.rb
CHANGED
@@ -72,8 +72,7 @@ opts.each do |opt, arg|
|
|
72
72
|
exit
|
73
73
|
end end
|
74
74
|
|
75
|
-
|
76
|
-
h = HiveMeta::Connection.new(dbi_string, db_user, db_pass)
|
75
|
+
h = HiveMeta::Connection.new(db_name, db_host, db_user, db_pass)
|
77
76
|
|
78
77
|
tables = []
|
79
78
|
max_col_width = 8
|
@@ -44,8 +44,7 @@ opts.each do |opt, arg|
|
|
44
44
|
exit
|
45
45
|
end end
|
46
46
|
|
47
|
-
|
48
|
-
h = HiveMeta::Connection.new(dbi_string, db_user, db_pass)
|
47
|
+
h = HiveMeta::Connection.new(db_name, db_host, db_user, db_pass)
|
49
48
|
|
50
49
|
# test table has the following schema
|
51
50
|
# i col_name
|
data/examples/sample-mapper.rb
CHANGED
@@ -10,8 +10,7 @@ db_pass = 'hivepasshere'
|
|
10
10
|
db_host = 'localhost'
|
11
11
|
db_name = 'hivemeta'
|
12
12
|
|
13
|
-
|
14
|
-
h = HiveMeta::Connection.new(dbi_string, db_user, db_pass)
|
13
|
+
h = HiveMeta::Connection.new(db_name, db_host, db_user, db_pass)
|
15
14
|
|
16
15
|
inv_table = h.table 'sample_inventory'
|
17
16
|
|
data/lib/hivemeta/connection.rb
CHANGED
@@ -1,48 +1,58 @@
|
|
1
|
-
require 'dbi'
|
2
1
|
require 'hivemeta/table'
|
3
2
|
require 'hivemeta/record'
|
4
3
|
|
4
|
+
if RUBY_PLATFORM == 'java'
|
5
|
+
require 'java'
|
6
|
+
else
|
7
|
+
require 'dbi'
|
8
|
+
# fix for broken row dup in 1.9
|
9
|
+
# http://goo.gl/fx6kW
|
10
|
+
module DBI
|
11
|
+
class Row
|
12
|
+
if RUBY_VERSION =~ /^1\.9/
|
13
|
+
def dup
|
14
|
+
row = super
|
15
|
+
row.instance_variable_set :@arr, @arr.dup
|
16
|
+
row
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
5
23
|
module HiveMeta
|
6
24
|
|
7
25
|
class Connection
|
8
|
-
def initialize(
|
9
|
-
db_name = ENV['hivemeta_db_name']
|
10
|
-
db_host = ENV['hivemeta_db_host']
|
11
|
-
|
12
|
-
|
13
|
-
db_pass ||= ENV['hivemeta_db_pass']
|
14
|
-
|
15
|
-
@dbi_string = dbi_string
|
16
|
-
@db_user = db_user
|
17
|
-
@db_pass = db_pass
|
26
|
+
def initialize(db_name = nil, db_host = nil, db_user = nil, db_pass = nil)
|
27
|
+
@db_name = db_name || ENV['hivemeta_db_name']
|
28
|
+
@db_host = db_host || ENV['hivemeta_db_host']
|
29
|
+
@db_user = db_user || ENV['hivemeta_db_user']
|
30
|
+
@db_pass = db_pass || ENV['hivemeta_db_pass']
|
18
31
|
end
|
19
32
|
|
20
|
-
def
|
33
|
+
def query_dbi(sql, *args)
|
21
34
|
dbh = results = nil
|
35
|
+
dbi_string = "DBI:Mysql:#{@db_name}:#{@db_host}"
|
22
36
|
|
23
37
|
# make a few attempts in the event that mysql has not been
|
24
38
|
# configured with enough connections to handle many mappers
|
25
|
-
max_attempts = 3
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
raise
|
40
|
-
end
|
39
|
+
attempts, max_attempts = 0, 3
|
40
|
+
begin
|
41
|
+
dbh = DBI.connect(dbi_string, @db_user, @db_pass)
|
42
|
+
rescue DBI::DatabaseError => e
|
43
|
+
attempts += 1
|
44
|
+
if attempts < max_attempts
|
45
|
+
s = rand + 0.50
|
46
|
+
STDERR.puts "retrying hivemeta connection after %f seconds..." % s
|
47
|
+
sleep s
|
48
|
+
retry
|
49
|
+
else
|
50
|
+
warn "cannot connect to metastore on %s:\n error %s\n %s" %
|
51
|
+
[@db_host, e.err, e.errstr]
|
52
|
+
raise
|
41
53
|
end
|
42
54
|
end
|
43
55
|
|
44
|
-
#puts "sql: #{sql}"
|
45
|
-
#puts "args: #{args}"
|
46
56
|
sth = dbh.prepare(sql)
|
47
57
|
sth.execute(*args)
|
48
58
|
if block_given?
|
@@ -58,6 +68,63 @@ module HiveMeta
|
|
58
68
|
results # returns nil if a block is given
|
59
69
|
end
|
60
70
|
|
71
|
+
def table_info_jdbc result
|
72
|
+
meta = result.meta_data
|
73
|
+
cols = meta.column_count
|
74
|
+
colnames = []
|
75
|
+
cols.times do |i|
|
76
|
+
colnames[i] = meta.column_name i+1
|
77
|
+
end
|
78
|
+
[cols, colnames]
|
79
|
+
end
|
80
|
+
|
81
|
+
def query_jdbc(sql, *args)
|
82
|
+
results = []
|
83
|
+
db_url = "jdbc:mysql://#{@db_host}/#{@db_name}"
|
84
|
+
|
85
|
+
# make a few attempts in the event that mysql has not been
|
86
|
+
# configured with enough connections to handle many mappers
|
87
|
+
attempts, max_attempts = 0, 3
|
88
|
+
begin
|
89
|
+
c = java.sql.DriverManager.get_connection(db_url, @db_user, @db_pass)
|
90
|
+
rescue => e
|
91
|
+
attempts += 1
|
92
|
+
if attempts < max_attempts
|
93
|
+
s = rand + 0.50
|
94
|
+
STDERR.puts "retrying hivemeta connection after %f seconds..." % s
|
95
|
+
sleep s
|
96
|
+
retry
|
97
|
+
else
|
98
|
+
warn "cannot connect to metastore on %s:\n error %s" %
|
99
|
+
[@db_host, e]
|
100
|
+
raise
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
stmt = c.create_statement
|
105
|
+
|
106
|
+
args.each do |arg|
|
107
|
+
# poor man's prepare
|
108
|
+
sql = sql.sub /\?/, "'#{arg}'"
|
109
|
+
res = stmt.execute_query sql
|
110
|
+
|
111
|
+
cols,names = table_info_jdbc res
|
112
|
+
|
113
|
+
while res.next do
|
114
|
+
row = []
|
115
|
+
1.upto(cols) do |i|
|
116
|
+
row << res.get_string(i)
|
117
|
+
end
|
118
|
+
yield row if block_given?
|
119
|
+
results << row
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
c.close
|
124
|
+
|
125
|
+
results
|
126
|
+
end
|
127
|
+
|
61
128
|
def tables(opts = {})
|
62
129
|
args = nil
|
63
130
|
if opts[:filter_path]
|
@@ -76,13 +143,8 @@ module HiveMeta
|
|
76
143
|
results = query sql, *args
|
77
144
|
table_names = results.map {|result| result[0]}
|
78
145
|
|
79
|
-
#puts "TABLE_NAMES:"
|
80
|
-
#p table_names
|
81
|
-
|
82
146
|
tables = []
|
83
147
|
table_names.each do |name|
|
84
|
-
#puts "NAME: "
|
85
|
-
#p name
|
86
148
|
table = Table.new(name)
|
87
149
|
|
88
150
|
sql = "select c.INTEGER_IDX, c.column_name, c.COMMENT,
|
@@ -91,8 +153,6 @@ module HiveMeta
|
|
91
153
|
where t.SD_ID = c.SD_ID and t.SD_ID = s.SD_ID
|
92
154
|
and t.TBL_NAME = ?"
|
93
155
|
query sql, name do |rec|
|
94
|
-
#puts "REC:"
|
95
|
-
#p rec
|
96
156
|
col_idx = rec[0].to_i
|
97
157
|
col_name = rec[1]
|
98
158
|
col_cmt = rec[2]
|
@@ -113,8 +173,6 @@ module HiveMeta
|
|
113
173
|
if results and results[0] and results[0][0]
|
114
174
|
table.delimiter = results[0][0]
|
115
175
|
end
|
116
|
-
#puts "#{name}: found delim '#{table.delimiter}'" if results[0]
|
117
|
-
#puts "#{name}: no delim" if not results[0]
|
118
176
|
|
119
177
|
tables << table
|
120
178
|
end
|
@@ -125,20 +183,13 @@ module HiveMeta
|
|
125
183
|
t = tables(:filter_name => name) # appeasing the old skool 1.8 users
|
126
184
|
t[0] # if it comes back with multiple tables, return the first
|
127
185
|
end
|
128
|
-
end
|
129
186
|
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
class Row
|
136
|
-
if RUBY_VERSION =~ /^1\.9/
|
137
|
-
def dup
|
138
|
-
row = super
|
139
|
-
row.instance_variable_set :@arr, @arr.dup
|
140
|
-
row
|
141
|
-
end
|
187
|
+
if RUBY_PLATFORM == 'java'
|
188
|
+
alias :query :query_jdbc
|
189
|
+
Java::com.mysql.jdbc.Driver
|
190
|
+
else
|
191
|
+
alias :query :query_dbi
|
142
192
|
end
|
143
193
|
end
|
194
|
+
|
144
195
|
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 1
|
8
|
-
-
|
9
|
-
version: 0.1.
|
8
|
+
- 9
|
9
|
+
version: 0.1.9
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Frank Fejes
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-
|
17
|
+
date: 2011-07-06 00:00:00 -05:00
|
18
18
|
default_executable:
|
19
19
|
dependencies: []
|
20
20
|
|