hivemeta 0.0.5 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +4 -1
- data/README +43 -2
- data/lib/hivemeta/connection.rb +6 -0
- data/lib/hivemeta/table.rb +18 -0
- metadata +2 -2
data/CHANGELOG
CHANGED
@@ -3,7 +3,10 @@
|
|
3
3
|
- perf: 4x+ faster ... now basically on par with manual split into array
|
4
4
|
- perf: create extra hash for column index by name
|
5
5
|
- perf: remove unnecessary string indexed assignment
|
6
|
-
- clean:
|
6
|
+
- clean: Table#each does each inside rather than each_with_index
|
7
|
+
- new: Table#process works on file input, by default STDIN
|
8
|
+
- new: can now use environmental variables in order to minimize code
|
9
|
+
all prefixed by hivemeta_ : db_user, db_pass, db_host, db_name
|
7
10
|
|
8
11
|
* 2011-05-17 - fsf
|
9
12
|
- bugfix: default unspecified delimiter is ^A rather than TAB
|
data/README
CHANGED
@@ -43,11 +43,23 @@ gem install hivemeta
|
|
43
43
|
|
44
44
|
API Usage
|
45
45
|
|
46
|
-
streaming map/reduce code snippet:
|
46
|
+
streaming map/reduce code snippet (abstracted processing loop):
|
47
47
|
|
48
48
|
require 'hivemeta'
|
49
49
|
|
50
|
-
h = HiveMeta::Connection.new
|
50
|
+
h = HiveMeta::Connection.new # see below for detail
|
51
|
+
|
52
|
+
h.table('sample_inventory').process do |row|
|
53
|
+
item_id = row.item_id # can access by method or [:sym] or ['str']
|
54
|
+
count = row.inv_cnt.to_i
|
55
|
+
puts "#{item_id}\t#{count}" if count >= 1000
|
56
|
+
end
|
57
|
+
|
58
|
+
streaming map/reduce code snippet (normal STDIN processing loop):
|
59
|
+
|
60
|
+
require 'hivemeta'
|
61
|
+
|
62
|
+
h = HiveMeta::Connection.new # see below for detail
|
51
63
|
inv_table = h.table 'sample_inventory'
|
52
64
|
|
53
65
|
STDIN.each_line do |line|
|
@@ -62,6 +74,35 @@ STDIN.each_line do |line|
|
|
62
74
|
puts "#{item_id}\t#{count}" if count >= 1000
|
63
75
|
end
|
64
76
|
|
77
|
+
establishing a connection (in ruby code):
|
78
|
+
|
79
|
+
db_user = 'hive'
|
80
|
+
db_pass = 'hivepasshere'
|
81
|
+
db_host = 'localhost'
|
82
|
+
db_name = 'hivemeta'
|
83
|
+
|
84
|
+
dbi_string = "DBI:Mysql:#{db_name}:#{db_host}"
|
85
|
+
h = HiveMeta::Connection.new(dbi_string, db_user, db_pass)
|
86
|
+
|
87
|
+
establishing a connection (environment variables):
|
88
|
+
|
89
|
+
# when no arguments are passed, the following env variables will be used:
|
90
|
+
#
|
91
|
+
# hivemeta_db_host
|
92
|
+
# hivemeta_db_name
|
93
|
+
# hivemeta_db_user
|
94
|
+
# hivemeta_db_pass
|
95
|
+
#
|
96
|
+
# to set these in a streaming map/reduce job, use -D arguments like so:
|
97
|
+
#
|
98
|
+
# -D hivemeta.db_host=mydbhost \
|
99
|
+
# -D hivemeta.db_name=hivemeta \
|
100
|
+
# -D hivemeta.db_user=hive \
|
101
|
+
# -D hivemeta.db_pass=mydbpass \
|
102
|
+
|
103
|
+
# the connection will made with those env variables without any other code
|
104
|
+
h = HiveMeta::Connection.new
|
105
|
+
|
65
106
|
---
|
66
107
|
|
67
108
|
hivemeta_query.rb Usage
|
data/lib/hivemeta/connection.rb
CHANGED
@@ -6,6 +6,12 @@ module HiveMeta
|
|
6
6
|
|
7
7
|
class Connection
|
8
8
|
def initialize(dbi_string = nil, db_user = nil, db_pass = nil)
|
9
|
+
db_name = ENV['hivemeta_db_name']
|
10
|
+
db_host = ENV['hivemeta_db_host']
|
11
|
+
dbi_string ||= "DBI:Mysql:#{db_name}:#{db_host}"
|
12
|
+
db_user ||= ENV['hivemeta_db_user']
|
13
|
+
db_pass ||= ENV['hivemeta_db_pass']
|
14
|
+
|
9
15
|
@dbi_string = dbi_string
|
10
16
|
@db_user = db_user
|
11
17
|
@db_pass = db_pass
|
data/lib/hivemeta/table.rb
CHANGED
@@ -49,6 +49,24 @@ module HiveMeta
|
|
49
49
|
return Record.new(line, self)
|
50
50
|
end
|
51
51
|
end
|
52
|
+
|
53
|
+
# process all input (default to STDIN for Hadoop Streaming)
|
54
|
+
# via a provided block
|
55
|
+
def process(f = STDIN, warning = nil)
|
56
|
+
if not block_given?
|
57
|
+
return process_row f.readline
|
58
|
+
end
|
59
|
+
|
60
|
+
f.each_line do |line|
|
61
|
+
begin
|
62
|
+
process_row(line) {|row| yield row}
|
63
|
+
rescue HiveMeta::FieldCountError
|
64
|
+
warning ||= "reporter:counter:bad_data,row_size,1"
|
65
|
+
STDERR.puts warning
|
66
|
+
next
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
52
70
|
end
|
53
71
|
|
54
72
|
end
|