bigrecord-driver 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README +0 -0
- data/bin/cassandra-driver +5 -0
- data/bin/hbase-driver +5 -0
- data/bin/launcher +155 -0
- data/conf/log4j.properties +48 -0
- data/lib/big_record_driver/bigrecord_server.rb +119 -0
- data/lib/big_record_driver/cassandra_driver/server.rb +135 -0
- data/lib/big_record_driver/client.rb +36 -0
- data/lib/big_record_driver/column_descriptor.rb +23 -0
- data/lib/big_record_driver/driver_manager.rb +34 -0
- data/lib/big_record_driver/exceptions.rb +12 -0
- data/lib/big_record_driver/hbase_driver/server.rb +396 -0
- data/lib/big_record_driver.rb +6 -0
- data/lib/bigrecord_driver.rb +1 -0
- data/test/abstract_test_client.rb +316 -0
- data/test/test_client_cassandra.rb +63 -0
- data/test/test_client_hbase.rb +26 -0
- data/test/test_driver_manager.rb +46 -0
- data/vendor/java/cassandra/cassandra-0.3.0-dev.jar +0 -0
- data/vendor/java/cassandra/libthrift.jar +0 -0
- data/vendor/java/cassandra/log4j-1.2.15.jar +0 -0
- data/vendor/java/hbase/commons-logging-1.0.4.jar +0 -0
- data/vendor/java/hbase/commons-logging-api-1.0.4.jar +0 -0
- data/vendor/java/hbase/hadoop-0.20.0-core.jar +0 -0
- data/vendor/java/hbase/hbase-0.20.0.jar +0 -0
- data/vendor/java/hbase/log4j-1.2.13.jar +0 -0
- data/vendor/java/hbase/zookeeper-r785019-hbase-1329.jar +0 -0
- metadata +83 -0
@@ -0,0 +1,396 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../column_descriptor'
|
2
|
+
require File.dirname(__FILE__) + '/../exceptions'
|
3
|
+
require File.dirname(__FILE__) + '/../bigrecord_server'
|
4
|
+
|
5
|
+
module BigRecordDriver
|
6
|
+
|
7
|
+
class HbaseServer < BigRecordServer
|
8
|
+
include_class "java.util.TreeMap"
|
9
|
+
|
10
|
+
include_class "org.apache.hadoop.hbase.client.HTable"
|
11
|
+
include_class "org.apache.hadoop.hbase.client.HBaseAdmin"
|
12
|
+
include_class "org.apache.hadoop.hbase.io.BatchUpdate"
|
13
|
+
include_class "org.apache.hadoop.hbase.io.hfile.Compression"
|
14
|
+
include_class "org.apache.hadoop.hbase.HBaseConfiguration"
|
15
|
+
include_class "org.apache.hadoop.hbase.HConstants"
|
16
|
+
include_class "org.apache.hadoop.hbase.HStoreKey"
|
17
|
+
include_class "org.apache.hadoop.hbase.HTableDescriptor"
|
18
|
+
include_class "org.apache.hadoop.hbase.HColumnDescriptor"
|
19
|
+
|
20
|
+
include_class "org.apache.hadoop.io.Writable"
|
21
|
+
|
22
|
+
# Establish the connection with HBase with the given configuration parameters.
|
23
|
+
def configure(config = {})
|
24
|
+
config[:zookeeper_quorum] ||= 'localhost'
|
25
|
+
config[:zookeeper_client_port] ||= '2181'
|
26
|
+
|
27
|
+
@config = config
|
28
|
+
|
29
|
+
init_connection
|
30
|
+
end
|
31
|
+
|
32
|
+
# Atomic row insertion/update. Example:
|
33
|
+
# update('entities', 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8', {'attribute:name' => "--- Oahu\n",
|
34
|
+
# 'attribute:travel_rank' => "--- 0.90124565\n"})
|
35
|
+
# => 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8'
|
36
|
+
def update(table_name, row, values, timestamp=nil)
|
37
|
+
safe_exec do
|
38
|
+
return nil unless row
|
39
|
+
table = connect_table(table_name)
|
40
|
+
|
41
|
+
batch = timestamp ? BatchUpdate.new(row, timestamp) : BatchUpdate.new(row)
|
42
|
+
|
43
|
+
values.each do |column, value|
|
44
|
+
batch.put(column, value.to_bytes)
|
45
|
+
end
|
46
|
+
|
47
|
+
table.commit(batch)
|
48
|
+
row
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
# Returns a column of a row. Example:
|
53
|
+
# get('entities', 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8', 'attribute:travel_rank')
|
54
|
+
# => "--- 0.90124565\n"
|
55
|
+
#
|
56
|
+
# valid options:
|
57
|
+
# :timestamp => integer corresponding to the time when the record was saved in hbase
|
58
|
+
# :versions => number of versions to retreive, starting at the specified timestamp (or the latest)
|
59
|
+
def get(table_name, row, column, options={})
|
60
|
+
safe_exec do
|
61
|
+
return nil unless row
|
62
|
+
table = connect_table(table_name)
|
63
|
+
|
64
|
+
# Retreive only the last version by default
|
65
|
+
options[:versions] ||= options[:num_versions]
|
66
|
+
options[:versions] ||= 1
|
67
|
+
|
68
|
+
# validate the arguments
|
69
|
+
raise ArgumentError, "versions must be >= 1" unless options[:versions] >= 1
|
70
|
+
|
71
|
+
# get the raw data from hbase
|
72
|
+
unless options[:timestamp]
|
73
|
+
if options[:versions] == 1
|
74
|
+
raw_data = table.get(row, column)
|
75
|
+
else
|
76
|
+
raw_data = table.get(row,
|
77
|
+
column,
|
78
|
+
options[:versions])
|
79
|
+
end
|
80
|
+
else
|
81
|
+
raw_data = table.get(row,
|
82
|
+
column,
|
83
|
+
options[:timestamp],
|
84
|
+
options[:versions])
|
85
|
+
end
|
86
|
+
|
87
|
+
# Return either a single value or an array, depending on the number of version that have been requested
|
88
|
+
if options[:versions] == 1
|
89
|
+
return nil unless raw_data
|
90
|
+
raw_data = raw_data[0] if options[:timestamp]
|
91
|
+
to_ruby_string(raw_data)
|
92
|
+
else
|
93
|
+
return [] unless raw_data
|
94
|
+
raw_data.collect do |raw_data_version|
|
95
|
+
to_ruby_string(raw_data_version)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
# Returns the last version of the given columns of the given row. The columns works with
|
102
|
+
# regular expressions (e.g. 'attribute:' matches all attributes columns). Example:
|
103
|
+
# get_columns('entities', 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8', ['attribute:'])
|
104
|
+
# => {"attribute:name" => "--- Oahu\n", "attribute:travel_rank" => "--- 0.90124565\n", etc...}
|
105
|
+
def get_columns(table_name, row, columns, options={})
|
106
|
+
safe_exec do
|
107
|
+
return nil unless row
|
108
|
+
table_name = table_name.to_s
|
109
|
+
table = connect_table(table_name)
|
110
|
+
|
111
|
+
java_cols = Java::String[columns.size].new
|
112
|
+
columns.each_with_index do |col, i|
|
113
|
+
java_cols[i] = Java::String.new(col)
|
114
|
+
end
|
115
|
+
|
116
|
+
result =
|
117
|
+
if options[:timestamp]
|
118
|
+
table.getRow(row, java_cols, options[:timestamp])
|
119
|
+
else
|
120
|
+
table.getRow(row, java_cols)
|
121
|
+
end
|
122
|
+
|
123
|
+
unless !result or result.isEmpty
|
124
|
+
values = {}
|
125
|
+
result.entrySet.each do |entry|
|
126
|
+
column_name = Java::String.new(entry.getKey).to_s
|
127
|
+
values[column_name] = to_ruby_string(entry.getValue)
|
128
|
+
end
|
129
|
+
values["id"] = row
|
130
|
+
values
|
131
|
+
else
|
132
|
+
nil
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
# Get consecutive rows. Example to get 100 records starting with the one specified and get all the
|
138
|
+
# columns in the column family 'attribute:' :
|
139
|
+
# get_consecutive_rows('entities', 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8', 100, ['attribute:'])
|
140
|
+
def get_consecutive_rows(table_name, start_row, limit, columns, stop_row = nil)
|
141
|
+
safe_exec do
|
142
|
+
table_name = table_name.to_s
|
143
|
+
table = connect_table(table_name)
|
144
|
+
|
145
|
+
java_cols = Java::String[columns.size].new
|
146
|
+
columns.each_with_index do |col, i|
|
147
|
+
java_cols[i] = Java::String.new(col)
|
148
|
+
end
|
149
|
+
|
150
|
+
start_row ||= ""
|
151
|
+
start_row = start_row.to_s
|
152
|
+
|
153
|
+
# We cannot set stop_row like start_row because a
|
154
|
+
# default stop row would have to be the biggest value possible
|
155
|
+
if stop_row
|
156
|
+
scanner = table.getScanner(java_cols, start_row, stop_row, HConstants::LATEST_TIMESTAMP)
|
157
|
+
else
|
158
|
+
scanner = table.getScanner(java_cols, start_row)
|
159
|
+
end
|
160
|
+
|
161
|
+
row_count = 0 if limit
|
162
|
+
result = []
|
163
|
+
while (row_result = scanner.next) != nil
|
164
|
+
if limit
|
165
|
+
break if row_count == limit
|
166
|
+
row_count += 1
|
167
|
+
end
|
168
|
+
values = {}
|
169
|
+
row_result.entrySet.each do |entry|
|
170
|
+
column_name = Java::String.new(entry.getKey).to_s
|
171
|
+
data = to_ruby_string(entry.getValue)
|
172
|
+
values[column_name] = data
|
173
|
+
end
|
174
|
+
unless values.empty?
|
175
|
+
# TODO: is this really supposed to be hard coded?
|
176
|
+
values['id'] = Java::String.new(row_result.getRow).to_s
|
177
|
+
result << values
|
178
|
+
end
|
179
|
+
end
|
180
|
+
scanner.close
|
181
|
+
result
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
# Delete a whole row.
|
186
|
+
def delete(table_name, row, timestamp = nil)
|
187
|
+
safe_exec do
|
188
|
+
table = connect_table(table_name)
|
189
|
+
timestamp ? table.deleteAll(row.to_bytes, timestamp) : table.deleteAll(row.to_bytes)
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
# Create a table
|
194
|
+
def create_table(table_name, column_descriptors)
|
195
|
+
safe_exec do
|
196
|
+
table_name = table_name.to_s
|
197
|
+
unless table_exists?(table_name)
|
198
|
+
tdesc = HTableDescriptor.new(table_name)
|
199
|
+
|
200
|
+
column_descriptors.each do |cd|
|
201
|
+
cdesc = generate_column_descriptor(cd)
|
202
|
+
|
203
|
+
tdesc.addFamily(cdesc)
|
204
|
+
end
|
205
|
+
@admin.createTable(tdesc)
|
206
|
+
else
|
207
|
+
raise BigRecordDriver::TableAlreadyExists, table_name
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
# Delete a table
|
213
|
+
def drop_table(table_name)
|
214
|
+
safe_exec do
|
215
|
+
table_name = table_name.to_s
|
216
|
+
|
217
|
+
if @admin.tableExists(table_name)
|
218
|
+
@admin.disableTable(table_name)
|
219
|
+
@admin.deleteTable(table_name)
|
220
|
+
|
221
|
+
# Remove the table connection from the cache
|
222
|
+
@tables.delete(table_name) if @tables.has_key?(table_name)
|
223
|
+
else
|
224
|
+
raise BigRecordDriver::TableNotFound, table_name
|
225
|
+
end
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
def add_column(table_name, column_descriptor)
|
230
|
+
safe_exec do
|
231
|
+
table_name = table_name.to_s
|
232
|
+
|
233
|
+
if @admin.tableExists(table_name)
|
234
|
+
@admin.disableTable(table_name)
|
235
|
+
|
236
|
+
cdesc = generate_column_descriptor(column_descriptor)
|
237
|
+
@admin.addColumn(table_name, cdesc)
|
238
|
+
|
239
|
+
@admin.enableTable(table_name)
|
240
|
+
else
|
241
|
+
raise BigRecordDriver::TableNotFound, table_name
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
def remove_column(table_name, column_name)
|
247
|
+
safe_exec do
|
248
|
+
table_name = table_name.to_s
|
249
|
+
column_name = column_name.to_s
|
250
|
+
|
251
|
+
if @admin.tableExists(table_name)
|
252
|
+
@admin.disableTable(table_name)
|
253
|
+
|
254
|
+
column_name << ":" unless column_name =~ /:$/
|
255
|
+
@admin.deleteColumn(table_name, column_name)
|
256
|
+
|
257
|
+
@admin.enableTable(table_name)
|
258
|
+
else
|
259
|
+
raise BigRecordDriver::TableNotFound, table_name
|
260
|
+
end
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
264
|
+
def modify_column(table_name, column_descriptor)
|
265
|
+
safe_exec do
|
266
|
+
table_name = table_name.to_s
|
267
|
+
column_name = column_name.to_s
|
268
|
+
|
269
|
+
if @admin.tableExists(table_name)
|
270
|
+
@admin.disableTable(table_name)
|
271
|
+
|
272
|
+
cdesc = generate_column_descriptor(column_descriptor)
|
273
|
+
@admin.modifyColumn(table_name, column_descriptor.name, cdesc)
|
274
|
+
|
275
|
+
@admin.enableTable(table_name)
|
276
|
+
else
|
277
|
+
raise BigRecordDriver::TableNotFound, table_name
|
278
|
+
end
|
279
|
+
end
|
280
|
+
end
|
281
|
+
|
282
|
+
def truncate_table(table_name)
|
283
|
+
safe_exec do
|
284
|
+
table_name = table_name.to_s
|
285
|
+
table = connect_table(table_name)
|
286
|
+
tableDescriptor = table.getTableDescriptor
|
287
|
+
drop_table(table_name)
|
288
|
+
@admin.createTable(tableDescriptor)
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
def ping
|
293
|
+
safe_exec do
|
294
|
+
@admin.isMasterRunning
|
295
|
+
end
|
296
|
+
end
|
297
|
+
|
298
|
+
def table_exists?(table_name)
|
299
|
+
safe_exec do
|
300
|
+
@admin.tableExists(table_name.to_s)
|
301
|
+
end
|
302
|
+
end
|
303
|
+
|
304
|
+
def table_names
|
305
|
+
safe_exec do
|
306
|
+
@admin.listTables.collect{|td| Java::String.new(td.getName).to_s}
|
307
|
+
end
|
308
|
+
end
|
309
|
+
|
310
|
+
# def const_missing(const)
|
311
|
+
# super
|
312
|
+
# rescue NameError => ex
|
313
|
+
# raise NameError, "uninitialized constant #{const}"
|
314
|
+
# end
|
315
|
+
|
316
|
+
private
|
317
|
+
# Create a connection to a Hbase table and keep it in memory.
|
318
|
+
def connect_table(table_name)
|
319
|
+
safe_exec do
|
320
|
+
table_name = table_name.to_s
|
321
|
+
return @tables[table_name] if @tables.has_key?(table_name)
|
322
|
+
|
323
|
+
if table_exists?(table_name)
|
324
|
+
@tables[table_name] = HTable.new(@conf, table_name)
|
325
|
+
else
|
326
|
+
if table_name and !table_name.empty?
|
327
|
+
raise BigRecordDriver::TableNotFound, table_name
|
328
|
+
else
|
329
|
+
raise ArgumentError, "Table name not specified"
|
330
|
+
end
|
331
|
+
end
|
332
|
+
@tables[table_name]
|
333
|
+
end
|
334
|
+
end
|
335
|
+
|
336
|
+
def init_connection
|
337
|
+
safe_exec do
|
338
|
+
@conf = HBaseConfiguration.new
|
339
|
+
@conf.set('hbase.zookeeper.quorum', "#{@config[:zookeeper_quorum]}")
|
340
|
+
@conf.set('hbase.zookeeper.property.clientPort', "#{@config[:zookeeper_client_port]}")
|
341
|
+
@admin = HBaseAdmin.new(@conf)
|
342
|
+
@tables = {}
|
343
|
+
end
|
344
|
+
end
|
345
|
+
|
346
|
+
def generate_column_descriptor(column_descriptor)
|
347
|
+
raise ArgumentError, "a column descriptor is missing a name" unless column_descriptor.name
|
348
|
+
raise "bloom_filter option not supported yet" if column_descriptor.bloom_filter
|
349
|
+
|
350
|
+
if column_descriptor.compression
|
351
|
+
compression =
|
352
|
+
case column_descriptor.compression.to_s
|
353
|
+
when 'none'; Compression::Algorithm::NONE.getName()
|
354
|
+
when 'gz'; Compression::Algorithm::GZ.getName()
|
355
|
+
when 'lzo'; Compression::Algorithm::LZO.getName()
|
356
|
+
else
|
357
|
+
raise ArgumentError, "Invalid compression type: #{column_descriptor.compression} for the column_family #{column_descriptor.name}"
|
358
|
+
end
|
359
|
+
end
|
360
|
+
|
361
|
+
n_versions = column_descriptor.versions
|
362
|
+
in_memory = column_descriptor.in_memory
|
363
|
+
|
364
|
+
# set the default values of the missing parameters
|
365
|
+
n_versions ||= HColumnDescriptor::DEFAULT_VERSIONS
|
366
|
+
compression ||= HColumnDescriptor::DEFAULT_COMPRESSION
|
367
|
+
in_memory ||= HColumnDescriptor::DEFAULT_IN_MEMORY
|
368
|
+
block_cache ||= HColumnDescriptor::DEFAULT_BLOCKCACHE
|
369
|
+
block_size ||= HColumnDescriptor::DEFAULT_BLOCKSIZE
|
370
|
+
bloomfilter ||= HColumnDescriptor::DEFAULT_BLOOMFILTER
|
371
|
+
ttl ||= HColumnDescriptor::DEFAULT_TTL
|
372
|
+
|
373
|
+
# add the ':' at the end if the user didn't specify it
|
374
|
+
column_descriptor.name << ":" unless column_descriptor.name =~ /:$/
|
375
|
+
|
376
|
+
cdesc = HColumnDescriptor.new(column_descriptor.name.to_bytes,
|
377
|
+
n_versions,
|
378
|
+
compression,
|
379
|
+
in_memory,
|
380
|
+
block_cache,
|
381
|
+
block_size,
|
382
|
+
ttl,
|
383
|
+
bloomfilter)
|
384
|
+
|
385
|
+
return cdesc
|
386
|
+
end
|
387
|
+
|
388
|
+
end
|
389
|
+
|
390
|
+
end
|
391
|
+
|
392
|
+
port = ARGV[0]
|
393
|
+
port ||= 40000
|
394
|
+
DRb.start_service("druby://:#{port}", BigRecordDriver::HbaseServer.new)
|
395
|
+
puts "Started drb server on port #{port}."
|
396
|
+
DRb.thread.join
|
@@ -0,0 +1 @@
|
|
1
|
+
require 'big_record_driver'
|