bigrecord-driver 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +0 -0
- data/bin/cassandra-driver +5 -0
- data/bin/hbase-driver +5 -0
- data/bin/launcher +155 -0
- data/conf/log4j.properties +48 -0
- data/lib/big_record_driver/bigrecord_server.rb +119 -0
- data/lib/big_record_driver/cassandra_driver/server.rb +135 -0
- data/lib/big_record_driver/client.rb +36 -0
- data/lib/big_record_driver/column_descriptor.rb +23 -0
- data/lib/big_record_driver/driver_manager.rb +34 -0
- data/lib/big_record_driver/exceptions.rb +12 -0
- data/lib/big_record_driver/hbase_driver/server.rb +396 -0
- data/lib/big_record_driver.rb +6 -0
- data/lib/bigrecord_driver.rb +1 -0
- data/test/abstract_test_client.rb +316 -0
- data/test/test_client_cassandra.rb +63 -0
- data/test/test_client_hbase.rb +26 -0
- data/test/test_driver_manager.rb +46 -0
- data/vendor/java/cassandra/cassandra-0.3.0-dev.jar +0 -0
- data/vendor/java/cassandra/libthrift.jar +0 -0
- data/vendor/java/cassandra/log4j-1.2.15.jar +0 -0
- data/vendor/java/hbase/commons-logging-1.0.4.jar +0 -0
- data/vendor/java/hbase/commons-logging-api-1.0.4.jar +0 -0
- data/vendor/java/hbase/hadoop-0.20.0-core.jar +0 -0
- data/vendor/java/hbase/hbase-0.20.0.jar +0 -0
- data/vendor/java/hbase/log4j-1.2.13.jar +0 -0
- data/vendor/java/hbase/zookeeper-r785019-hbase-1329.jar +0 -0
- metadata +83 -0
@@ -0,0 +1,396 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../column_descriptor'
|
2
|
+
require File.dirname(__FILE__) + '/../exceptions'
|
3
|
+
require File.dirname(__FILE__) + '/../bigrecord_server'
|
4
|
+
|
5
|
+
module BigRecordDriver
|
6
|
+
|
7
|
+
class HbaseServer < BigRecordServer
|
8
|
+
include_class "java.util.TreeMap"
|
9
|
+
|
10
|
+
include_class "org.apache.hadoop.hbase.client.HTable"
|
11
|
+
include_class "org.apache.hadoop.hbase.client.HBaseAdmin"
|
12
|
+
include_class "org.apache.hadoop.hbase.io.BatchUpdate"
|
13
|
+
include_class "org.apache.hadoop.hbase.io.hfile.Compression"
|
14
|
+
include_class "org.apache.hadoop.hbase.HBaseConfiguration"
|
15
|
+
include_class "org.apache.hadoop.hbase.HConstants"
|
16
|
+
include_class "org.apache.hadoop.hbase.HStoreKey"
|
17
|
+
include_class "org.apache.hadoop.hbase.HTableDescriptor"
|
18
|
+
include_class "org.apache.hadoop.hbase.HColumnDescriptor"
|
19
|
+
|
20
|
+
include_class "org.apache.hadoop.io.Writable"
|
21
|
+
|
22
|
+
# Establish the connection with HBase with the given configuration parameters.
|
23
|
+
def configure(config = {})
|
24
|
+
config[:zookeeper_quorum] ||= 'localhost'
|
25
|
+
config[:zookeeper_client_port] ||= '2181'
|
26
|
+
|
27
|
+
@config = config
|
28
|
+
|
29
|
+
init_connection
|
30
|
+
end
|
31
|
+
|
32
|
+
# Atomic row insertion/update. Example:
|
33
|
+
# update('entities', 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8', {'attribute:name' => "--- Oahu\n",
|
34
|
+
# 'attribute:travel_rank' => "--- 0.90124565\n"})
|
35
|
+
# => 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8'
|
36
|
+
def update(table_name, row, values, timestamp=nil)
|
37
|
+
safe_exec do
|
38
|
+
return nil unless row
|
39
|
+
table = connect_table(table_name)
|
40
|
+
|
41
|
+
batch = timestamp ? BatchUpdate.new(row, timestamp) : BatchUpdate.new(row)
|
42
|
+
|
43
|
+
values.each do |column, value|
|
44
|
+
batch.put(column, value.to_bytes)
|
45
|
+
end
|
46
|
+
|
47
|
+
table.commit(batch)
|
48
|
+
row
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
# Returns a column of a row. Example:
|
53
|
+
# get('entities', 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8', 'attribute:travel_rank')
|
54
|
+
# => "--- 0.90124565\n"
|
55
|
+
#
|
56
|
+
# valid options:
|
57
|
+
# :timestamp => integer corresponding to the time when the record was saved in hbase
|
58
|
+
# :versions => number of versions to retreive, starting at the specified timestamp (or the latest)
|
59
|
+
def get(table_name, row, column, options={})
|
60
|
+
safe_exec do
|
61
|
+
return nil unless row
|
62
|
+
table = connect_table(table_name)
|
63
|
+
|
64
|
+
# Retreive only the last version by default
|
65
|
+
options[:versions] ||= options[:num_versions]
|
66
|
+
options[:versions] ||= 1
|
67
|
+
|
68
|
+
# validate the arguments
|
69
|
+
raise ArgumentError, "versions must be >= 1" unless options[:versions] >= 1
|
70
|
+
|
71
|
+
# get the raw data from hbase
|
72
|
+
unless options[:timestamp]
|
73
|
+
if options[:versions] == 1
|
74
|
+
raw_data = table.get(row, column)
|
75
|
+
else
|
76
|
+
raw_data = table.get(row,
|
77
|
+
column,
|
78
|
+
options[:versions])
|
79
|
+
end
|
80
|
+
else
|
81
|
+
raw_data = table.get(row,
|
82
|
+
column,
|
83
|
+
options[:timestamp],
|
84
|
+
options[:versions])
|
85
|
+
end
|
86
|
+
|
87
|
+
# Return either a single value or an array, depending on the number of version that have been requested
|
88
|
+
if options[:versions] == 1
|
89
|
+
return nil unless raw_data
|
90
|
+
raw_data = raw_data[0] if options[:timestamp]
|
91
|
+
to_ruby_string(raw_data)
|
92
|
+
else
|
93
|
+
return [] unless raw_data
|
94
|
+
raw_data.collect do |raw_data_version|
|
95
|
+
to_ruby_string(raw_data_version)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
# Returns the last version of the given columns of the given row. The columns works with
|
102
|
+
# regular expressions (e.g. 'attribute:' matches all attributes columns). Example:
|
103
|
+
# get_columns('entities', 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8', ['attribute:'])
|
104
|
+
# => {"attribute:name" => "--- Oahu\n", "attribute:travel_rank" => "--- 0.90124565\n", etc...}
|
105
|
+
def get_columns(table_name, row, columns, options={})
|
106
|
+
safe_exec do
|
107
|
+
return nil unless row
|
108
|
+
table_name = table_name.to_s
|
109
|
+
table = connect_table(table_name)
|
110
|
+
|
111
|
+
java_cols = Java::String[columns.size].new
|
112
|
+
columns.each_with_index do |col, i|
|
113
|
+
java_cols[i] = Java::String.new(col)
|
114
|
+
end
|
115
|
+
|
116
|
+
result =
|
117
|
+
if options[:timestamp]
|
118
|
+
table.getRow(row, java_cols, options[:timestamp])
|
119
|
+
else
|
120
|
+
table.getRow(row, java_cols)
|
121
|
+
end
|
122
|
+
|
123
|
+
unless !result or result.isEmpty
|
124
|
+
values = {}
|
125
|
+
result.entrySet.each do |entry|
|
126
|
+
column_name = Java::String.new(entry.getKey).to_s
|
127
|
+
values[column_name] = to_ruby_string(entry.getValue)
|
128
|
+
end
|
129
|
+
values["id"] = row
|
130
|
+
values
|
131
|
+
else
|
132
|
+
nil
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
# Get consecutive rows. Example to get 100 records starting with the one specified and get all the
|
138
|
+
# columns in the column family 'attribute:' :
|
139
|
+
# get_consecutive_rows('entities', 'b9cef848-a4e0-11dc-a7ba-0018f3137ea8', 100, ['attribute:'])
|
140
|
+
def get_consecutive_rows(table_name, start_row, limit, columns, stop_row = nil)
|
141
|
+
safe_exec do
|
142
|
+
table_name = table_name.to_s
|
143
|
+
table = connect_table(table_name)
|
144
|
+
|
145
|
+
java_cols = Java::String[columns.size].new
|
146
|
+
columns.each_with_index do |col, i|
|
147
|
+
java_cols[i] = Java::String.new(col)
|
148
|
+
end
|
149
|
+
|
150
|
+
start_row ||= ""
|
151
|
+
start_row = start_row.to_s
|
152
|
+
|
153
|
+
# We cannot set stop_row like start_row because a
|
154
|
+
# default stop row would have to be the biggest value possible
|
155
|
+
if stop_row
|
156
|
+
scanner = table.getScanner(java_cols, start_row, stop_row, HConstants::LATEST_TIMESTAMP)
|
157
|
+
else
|
158
|
+
scanner = table.getScanner(java_cols, start_row)
|
159
|
+
end
|
160
|
+
|
161
|
+
row_count = 0 if limit
|
162
|
+
result = []
|
163
|
+
while (row_result = scanner.next) != nil
|
164
|
+
if limit
|
165
|
+
break if row_count == limit
|
166
|
+
row_count += 1
|
167
|
+
end
|
168
|
+
values = {}
|
169
|
+
row_result.entrySet.each do |entry|
|
170
|
+
column_name = Java::String.new(entry.getKey).to_s
|
171
|
+
data = to_ruby_string(entry.getValue)
|
172
|
+
values[column_name] = data
|
173
|
+
end
|
174
|
+
unless values.empty?
|
175
|
+
# TODO: is this really supposed to be hard coded?
|
176
|
+
values['id'] = Java::String.new(row_result.getRow).to_s
|
177
|
+
result << values
|
178
|
+
end
|
179
|
+
end
|
180
|
+
scanner.close
|
181
|
+
result
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
# Delete a whole row.
|
186
|
+
def delete(table_name, row, timestamp = nil)
|
187
|
+
safe_exec do
|
188
|
+
table = connect_table(table_name)
|
189
|
+
timestamp ? table.deleteAll(row.to_bytes, timestamp) : table.deleteAll(row.to_bytes)
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
# Create a table
|
194
|
+
def create_table(table_name, column_descriptors)
|
195
|
+
safe_exec do
|
196
|
+
table_name = table_name.to_s
|
197
|
+
unless table_exists?(table_name)
|
198
|
+
tdesc = HTableDescriptor.new(table_name)
|
199
|
+
|
200
|
+
column_descriptors.each do |cd|
|
201
|
+
cdesc = generate_column_descriptor(cd)
|
202
|
+
|
203
|
+
tdesc.addFamily(cdesc)
|
204
|
+
end
|
205
|
+
@admin.createTable(tdesc)
|
206
|
+
else
|
207
|
+
raise BigRecordDriver::TableAlreadyExists, table_name
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
# Delete a table
|
213
|
+
def drop_table(table_name)
|
214
|
+
safe_exec do
|
215
|
+
table_name = table_name.to_s
|
216
|
+
|
217
|
+
if @admin.tableExists(table_name)
|
218
|
+
@admin.disableTable(table_name)
|
219
|
+
@admin.deleteTable(table_name)
|
220
|
+
|
221
|
+
# Remove the table connection from the cache
|
222
|
+
@tables.delete(table_name) if @tables.has_key?(table_name)
|
223
|
+
else
|
224
|
+
raise BigRecordDriver::TableNotFound, table_name
|
225
|
+
end
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
def add_column(table_name, column_descriptor)
|
230
|
+
safe_exec do
|
231
|
+
table_name = table_name.to_s
|
232
|
+
|
233
|
+
if @admin.tableExists(table_name)
|
234
|
+
@admin.disableTable(table_name)
|
235
|
+
|
236
|
+
cdesc = generate_column_descriptor(column_descriptor)
|
237
|
+
@admin.addColumn(table_name, cdesc)
|
238
|
+
|
239
|
+
@admin.enableTable(table_name)
|
240
|
+
else
|
241
|
+
raise BigRecordDriver::TableNotFound, table_name
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
def remove_column(table_name, column_name)
|
247
|
+
safe_exec do
|
248
|
+
table_name = table_name.to_s
|
249
|
+
column_name = column_name.to_s
|
250
|
+
|
251
|
+
if @admin.tableExists(table_name)
|
252
|
+
@admin.disableTable(table_name)
|
253
|
+
|
254
|
+
column_name << ":" unless column_name =~ /:$/
|
255
|
+
@admin.deleteColumn(table_name, column_name)
|
256
|
+
|
257
|
+
@admin.enableTable(table_name)
|
258
|
+
else
|
259
|
+
raise BigRecordDriver::TableNotFound, table_name
|
260
|
+
end
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
264
|
+
def modify_column(table_name, column_descriptor)
|
265
|
+
safe_exec do
|
266
|
+
table_name = table_name.to_s
|
267
|
+
column_name = column_name.to_s
|
268
|
+
|
269
|
+
if @admin.tableExists(table_name)
|
270
|
+
@admin.disableTable(table_name)
|
271
|
+
|
272
|
+
cdesc = generate_column_descriptor(column_descriptor)
|
273
|
+
@admin.modifyColumn(table_name, column_descriptor.name, cdesc)
|
274
|
+
|
275
|
+
@admin.enableTable(table_name)
|
276
|
+
else
|
277
|
+
raise BigRecordDriver::TableNotFound, table_name
|
278
|
+
end
|
279
|
+
end
|
280
|
+
end
|
281
|
+
|
282
|
+
def truncate_table(table_name)
|
283
|
+
safe_exec do
|
284
|
+
table_name = table_name.to_s
|
285
|
+
table = connect_table(table_name)
|
286
|
+
tableDescriptor = table.getTableDescriptor
|
287
|
+
drop_table(table_name)
|
288
|
+
@admin.createTable(tableDescriptor)
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
def ping
|
293
|
+
safe_exec do
|
294
|
+
@admin.isMasterRunning
|
295
|
+
end
|
296
|
+
end
|
297
|
+
|
298
|
+
def table_exists?(table_name)
|
299
|
+
safe_exec do
|
300
|
+
@admin.tableExists(table_name.to_s)
|
301
|
+
end
|
302
|
+
end
|
303
|
+
|
304
|
+
def table_names
|
305
|
+
safe_exec do
|
306
|
+
@admin.listTables.collect{|td| Java::String.new(td.getName).to_s}
|
307
|
+
end
|
308
|
+
end
|
309
|
+
|
310
|
+
# def const_missing(const)
|
311
|
+
# super
|
312
|
+
# rescue NameError => ex
|
313
|
+
# raise NameError, "uninitialized constant #{const}"
|
314
|
+
# end
|
315
|
+
|
316
|
+
private
|
317
|
+
# Create a connection to a Hbase table and keep it in memory.
|
318
|
+
def connect_table(table_name)
|
319
|
+
safe_exec do
|
320
|
+
table_name = table_name.to_s
|
321
|
+
return @tables[table_name] if @tables.has_key?(table_name)
|
322
|
+
|
323
|
+
if table_exists?(table_name)
|
324
|
+
@tables[table_name] = HTable.new(@conf, table_name)
|
325
|
+
else
|
326
|
+
if table_name and !table_name.empty?
|
327
|
+
raise BigRecordDriver::TableNotFound, table_name
|
328
|
+
else
|
329
|
+
raise ArgumentError, "Table name not specified"
|
330
|
+
end
|
331
|
+
end
|
332
|
+
@tables[table_name]
|
333
|
+
end
|
334
|
+
end
|
335
|
+
|
336
|
+
def init_connection
|
337
|
+
safe_exec do
|
338
|
+
@conf = HBaseConfiguration.new
|
339
|
+
@conf.set('hbase.zookeeper.quorum', "#{@config[:zookeeper_quorum]}")
|
340
|
+
@conf.set('hbase.zookeeper.property.clientPort', "#{@config[:zookeeper_client_port]}")
|
341
|
+
@admin = HBaseAdmin.new(@conf)
|
342
|
+
@tables = {}
|
343
|
+
end
|
344
|
+
end
|
345
|
+
|
346
|
+
def generate_column_descriptor(column_descriptor)
|
347
|
+
raise ArgumentError, "a column descriptor is missing a name" unless column_descriptor.name
|
348
|
+
raise "bloom_filter option not supported yet" if column_descriptor.bloom_filter
|
349
|
+
|
350
|
+
if column_descriptor.compression
|
351
|
+
compression =
|
352
|
+
case column_descriptor.compression.to_s
|
353
|
+
when 'none'; Compression::Algorithm::NONE.getName()
|
354
|
+
when 'gz'; Compression::Algorithm::GZ.getName()
|
355
|
+
when 'lzo'; Compression::Algorithm::LZO.getName()
|
356
|
+
else
|
357
|
+
raise ArgumentError, "Invalid compression type: #{column_descriptor.compression} for the column_family #{column_descriptor.name}"
|
358
|
+
end
|
359
|
+
end
|
360
|
+
|
361
|
+
n_versions = column_descriptor.versions
|
362
|
+
in_memory = column_descriptor.in_memory
|
363
|
+
|
364
|
+
# set the default values of the missing parameters
|
365
|
+
n_versions ||= HColumnDescriptor::DEFAULT_VERSIONS
|
366
|
+
compression ||= HColumnDescriptor::DEFAULT_COMPRESSION
|
367
|
+
in_memory ||= HColumnDescriptor::DEFAULT_IN_MEMORY
|
368
|
+
block_cache ||= HColumnDescriptor::DEFAULT_BLOCKCACHE
|
369
|
+
block_size ||= HColumnDescriptor::DEFAULT_BLOCKSIZE
|
370
|
+
bloomfilter ||= HColumnDescriptor::DEFAULT_BLOOMFILTER
|
371
|
+
ttl ||= HColumnDescriptor::DEFAULT_TTL
|
372
|
+
|
373
|
+
# add the ':' at the end if the user didn't specify it
|
374
|
+
column_descriptor.name << ":" unless column_descriptor.name =~ /:$/
|
375
|
+
|
376
|
+
cdesc = HColumnDescriptor.new(column_descriptor.name.to_bytes,
|
377
|
+
n_versions,
|
378
|
+
compression,
|
379
|
+
in_memory,
|
380
|
+
block_cache,
|
381
|
+
block_size,
|
382
|
+
ttl,
|
383
|
+
bloomfilter)
|
384
|
+
|
385
|
+
return cdesc
|
386
|
+
end
|
387
|
+
|
388
|
+
end
|
389
|
+
|
390
|
+
end
|
391
|
+
|
392
|
+
port = ARGV[0]
|
393
|
+
port ||= 40000
|
394
|
+
DRb.start_service("druby://:#{port}", BigRecordDriver::HbaseServer.new)
|
395
|
+
puts "Started drb server on port #{port}."
|
396
|
+
DRb.thread.join
|
@@ -0,0 +1 @@
|
|
1
|
+
require 'big_record_driver'
|