hbase-jruby 0.1.1-java
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +623 -0
- data/Rakefile +7 -0
- data/hbase-jruby.gemspec +23 -0
- data/lib/hbase-jruby.rb +16 -0
- data/lib/hbase-jruby/admin.rb +29 -0
- data/lib/hbase-jruby/byte_array.rb +39 -0
- data/lib/hbase-jruby/cell.rb +122 -0
- data/lib/hbase-jruby/column_key.rb +63 -0
- data/lib/hbase-jruby/dependency.rb +69 -0
- data/lib/hbase-jruby/hbase.rb +77 -0
- data/lib/hbase-jruby/pom/cdh3u5.xml +40 -0
- data/lib/hbase-jruby/pom/cdh4.1.2.xml +47 -0
- data/lib/hbase-jruby/result.rb +382 -0
- data/lib/hbase-jruby/scoped.rb +489 -0
- data/lib/hbase-jruby/table.rb +486 -0
- data/lib/hbase-jruby/util.rb +171 -0
- data/lib/hbase-jruby/version.rb +5 -0
- data/test/helper.rb +53 -0
- data/test/test_byte_array.rb +40 -0
- data/test/test_cell.rb +51 -0
- data/test/test_column_key.rb +49 -0
- data/test/test_hbase.rb +36 -0
- data/test/test_scoped.rb +318 -0
- data/test/test_table.rb +211 -0
- data/test/test_table_admin.rb +148 -0
- data/test/test_util.rb +80 -0
- metadata +116 -0
@@ -0,0 +1,489 @@
|
|
1
|
+
class HBase
|
2
|
+
# Scope of table scan
|
3
|
+
# @author Junegunn Choi <junegunn.c@gmail.com>
|
4
|
+
class Scoped
|
5
|
+
include Enumerable
|
6
|
+
|
7
|
+
# A clean HBase::Scoped object for the same table
|
8
|
+
# @return [HBase::Scope] A clean HBase::Scoped object for the same table
|
9
|
+
def unscope
|
10
|
+
Scoped.send(:new, @table)
|
11
|
+
end
|
12
|
+
|
13
|
+
# Number of rows in the scope
|
14
|
+
# @return [Fixnum, Bignum] The number of rows in the scope
|
15
|
+
def count
|
16
|
+
cnt = 0
|
17
|
+
htable.getScanner(filtered_scan_minimum).each do
|
18
|
+
cnt += 1
|
19
|
+
end
|
20
|
+
cnt
|
21
|
+
end
|
22
|
+
|
23
|
+
# @overload get(rowkey)
|
24
|
+
# Single GET.
|
25
|
+
# Gets a record with the given rowkey. If the record is not found, nil is returned.
|
26
|
+
# @param [Object] rowkey Rowkey
|
27
|
+
# @return [HBase::Result, nil]
|
28
|
+
# @overload get(rowkeys)
|
29
|
+
# Batch GET. Gets an array of records with the given rowkeys.
|
30
|
+
# Nonexistent records will be returned as nils.
|
31
|
+
# @param [Array<Object>] *rowkeys Rowkeys
|
32
|
+
# @return [Array<HBase::Result>]
|
33
|
+
def get rowkeys
|
34
|
+
case rowkeys
|
35
|
+
when Array
|
36
|
+
htable.get(rowkeys.map { |rk| getify rk }).map { |result|
|
37
|
+
result.isEmpty ? nil : Result.new(result)
|
38
|
+
}
|
39
|
+
else
|
40
|
+
result = htable.get(getify rowkeys)
|
41
|
+
result.isEmpty ? nil : Result.new(result)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
# Iterate through the scope.
|
46
|
+
# @yield [HBase::Result] Yields each row in the scope
|
47
|
+
def each
|
48
|
+
if block_given?
|
49
|
+
begin
|
50
|
+
scanner = htable.getScanner(filtered_scan)
|
51
|
+
scanner.each do |result|
|
52
|
+
yield Result.send(:new, result)
|
53
|
+
end
|
54
|
+
ensure
|
55
|
+
scanner.close if scanner
|
56
|
+
end
|
57
|
+
else
|
58
|
+
self
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
# Sets the number of rows for caching that will be passed to scanners.
|
63
|
+
# @param [Fixnum] rows The number of rows to cache
|
64
|
+
# @return [HBase::Scoped] HBase::Scoped object with the caching option
|
65
|
+
def caching rows
|
66
|
+
raise ArgumentError, "Invalid caching size. Must be a non-negative integer." unless rows.is_a?(Fixnum) && rows >= 0
|
67
|
+
spawn :@caching, rows
|
68
|
+
end
|
69
|
+
|
70
|
+
# @overload range(start_key, opts = {})
|
71
|
+
# Returns an HBase::Scoped object with the specified rowkey range
|
72
|
+
# Overrides current range.
|
73
|
+
# @param [Object] start_key Start rowkey
|
74
|
+
# @param [Hash] opts Prefix filter
|
75
|
+
# @option opts [Object, Array<Object>] :prefix Only rows matching any of the given prefixes are returned
|
76
|
+
# @return [HBase::Scoped] HBase::Scoped object with the range
|
77
|
+
# @overload range(start_key, stop_key, opts = {})
|
78
|
+
# Returns an HBase::Scoped object with the specified rowkey range
|
79
|
+
# Overrides current range.
|
80
|
+
# @param [Object, nil] start_key Start rowkey. Can be nil.
|
81
|
+
# @param [Object] stop_key Stop rowkey (exclusive)
|
82
|
+
# @param [Hash] opts Prefix filter
|
83
|
+
# @option opts [Object, Array<Object>] :prefix Only rows matching any of the given prefixes are returned
|
84
|
+
# @return [HBase::Scoped] HBase::Scoped object with the range
|
85
|
+
# @overload range(start_stop_range, opts = {})
|
86
|
+
# Returns an HBase::Scoped object with the specified rowkey range
|
87
|
+
# Overrides current range.
|
88
|
+
# @param [Range] start_stop_range Rowkey scan range
|
89
|
+
# @param [Hash] opts Prefix filter
|
90
|
+
# @option opts [Object, Array<Object>] :prefix Only rows matching any of the given prefixes are returned
|
91
|
+
# @return [HBase::Scoped] HBase::Scoped object with the range
|
92
|
+
# @overload range(opts)
|
93
|
+
# Returns an HBase::Scoped object with the specified rowkey range
|
94
|
+
# Overrides current range.
|
95
|
+
# @param [Hash] opts Prefix filter
|
96
|
+
# @option opts [Object, Array<Object>] :prefix Only rows matching any of the given prefixes are returned
|
97
|
+
# @return [HBase::Scoped] HBase::Scoped object with the range
|
98
|
+
# @example
|
99
|
+
# table.range(:prefix => '2012')
|
100
|
+
# table.range(:prefix => ['2010', '2012'])
|
101
|
+
def range *key_range
|
102
|
+
if key_range.last.is_a?(Hash)
|
103
|
+
prefixes = [*key_range.last[:prefix]]
|
104
|
+
raise ArgumentError,
|
105
|
+
"Invalid range. Unknown option(s) specified." unless (key_range.last.keys - [:prefix]).empty?
|
106
|
+
key_range = key_range[0...-1]
|
107
|
+
end
|
108
|
+
|
109
|
+
if prefixes
|
110
|
+
raise ArgumentError, "Invalid range" unless [0, 1, 2].include?(key_range.length)
|
111
|
+
else
|
112
|
+
raise ArgumentError, "Invalid range" unless [1, 2].include?(key_range.length)
|
113
|
+
end
|
114
|
+
|
115
|
+
spawn :@range,
|
116
|
+
key_range[0].is_a?(Range) ?
|
117
|
+
key_range[0] :
|
118
|
+
(key_range.empty? ? nil : key_range),
|
119
|
+
:@prefixes,
|
120
|
+
prefixes || []
|
121
|
+
end
|
122
|
+
|
123
|
+
# Returns an HBase::Scoped object with the filters added
|
124
|
+
# @param [Array<Hash, FilterBase, FilterList>] filters
|
125
|
+
# @return [HBase::Scoped] HBase::Scoped object also with the specified filters
|
126
|
+
def filter *filters
|
127
|
+
spawn :@filters, @filters + filters.map { |f|
|
128
|
+
case f
|
129
|
+
when Hash
|
130
|
+
f.map { |col, val|
|
131
|
+
cf, cq = Util.parse_column_name col
|
132
|
+
|
133
|
+
case val
|
134
|
+
when Array
|
135
|
+
FilterList.new(FilterList::Operator::MUST_PASS_ONE,
|
136
|
+
val.map { |v| filter_for cf, cq, v })
|
137
|
+
else
|
138
|
+
filter_for cf, cq, val
|
139
|
+
end
|
140
|
+
}.flatten
|
141
|
+
when FilterBase, FilterList
|
142
|
+
f
|
143
|
+
else
|
144
|
+
raise ArgumentError, "Unknown filter type"
|
145
|
+
end
|
146
|
+
}.flatten
|
147
|
+
end
|
148
|
+
|
149
|
+
# Returns an HBase::Scoped object with the specified row number limit
|
150
|
+
# @param [Fixnum] rows Sets the maximum number of rows to return from scan
|
151
|
+
# @return [HBase::Scoped] HBase::Scoped object with the specified row number limit
|
152
|
+
def limit rows
|
153
|
+
raise ArgumentError, "Invalid limit. Must be a non-negative integer." unless rows.is_a?(Fixnum) && rows >= 0
|
154
|
+
spawn :@limit, rows
|
155
|
+
end
|
156
|
+
|
157
|
+
# Returns an HBase::Scoped object with the specified projection
|
158
|
+
# @param [Array<String>] columns Array of column expressions
|
159
|
+
# @return [HBase::Scoped] HBase::Scoped object with the specified projection
|
160
|
+
def project *columns
|
161
|
+
if columns.first.is_a?(Hash)
|
162
|
+
hash = columns.first
|
163
|
+
unless (hash.keys - [:prefix, :range, :limit, :offset]).empty?
|
164
|
+
raise ArgumentError, "Invalid projection"
|
165
|
+
end
|
166
|
+
|
167
|
+
if l = hash[:limit]
|
168
|
+
unless l.is_a?(Fixnum) && l >= 0
|
169
|
+
raise ArgumentError, ":limit must be a non-negative integer"
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
if o = hash[:offset]
|
174
|
+
unless o.is_a?(Fixnum) && o >= 0
|
175
|
+
raise ArgumentError, ":offset must be a non-negative integer"
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
spawn :@project, @project + columns
|
180
|
+
end
|
181
|
+
|
182
|
+
# Returns an HBase::Scoped object with the specified version number limit.
|
183
|
+
# If not set, all versions of each value are fetched by default.
|
184
|
+
# @param [Fixnum] vs Sets the maximum number of versions
|
185
|
+
# @return [HBase::Scoped] HBase::Scoped object with the version number limit
|
186
|
+
def versions vs
|
187
|
+
raise ArgumentError, "Invalid versions. Must be a positive integer." unless vs.is_a?(Fixnum) && vs > 0
|
188
|
+
spawn :@versions, vs
|
189
|
+
end
|
190
|
+
|
191
|
+
# Returns an HBase::Scoped object with the specified batch limit
|
192
|
+
# @param [Fixnum] b Sets the maximum number of values to fetch each time
|
193
|
+
# @return [HBase::Scoped] HBase::Scoped object with the specified batch limit
|
194
|
+
def batch b
|
195
|
+
raise ArgumentError, "Invalid batch size. Must be a positive integer." unless b.is_a?(Fixnum) && b > 0
|
196
|
+
spawn :@batch, b
|
197
|
+
end
|
198
|
+
|
199
|
+
private
|
200
|
+
# @param [HBase::Table] table
|
201
|
+
def initialize table
|
202
|
+
@table = table
|
203
|
+
@filters = []
|
204
|
+
@project = []
|
205
|
+
@prefixes = []
|
206
|
+
@range = nil
|
207
|
+
@versions = nil
|
208
|
+
@batch = nil
|
209
|
+
@caching = nil
|
210
|
+
@limit = nil
|
211
|
+
end
|
212
|
+
|
213
|
+
def spawn *args
|
214
|
+
self.dup.tap do |obj|
|
215
|
+
args.each_slice(2) do |slice|
|
216
|
+
attr, val = slice
|
217
|
+
obj.instance_variable_set attr, val
|
218
|
+
end
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
def htable
|
223
|
+
@table.htable
|
224
|
+
end
|
225
|
+
|
226
|
+
def process_projection! obj
|
227
|
+
limit = offset = nil
|
228
|
+
ranges = prefixes = []
|
229
|
+
filters = []
|
230
|
+
|
231
|
+
@project.each do |col|
|
232
|
+
case col
|
233
|
+
when Hash
|
234
|
+
col.each do |prop, val|
|
235
|
+
case prop
|
236
|
+
when :prefix
|
237
|
+
prefixes += [*val]
|
238
|
+
when :range
|
239
|
+
ranges += val.is_a?(Array) ? val : [val]
|
240
|
+
when :limit
|
241
|
+
limit = val
|
242
|
+
when :offset
|
243
|
+
offset = val
|
244
|
+
else
|
245
|
+
# Shouldn't happen
|
246
|
+
raise ArgumentError, "Invalid projection: #{prop}"
|
247
|
+
end
|
248
|
+
end
|
249
|
+
else
|
250
|
+
cf, cq = Util.parse_column_name col
|
251
|
+
if cq
|
252
|
+
obj.addColumn cf, cq
|
253
|
+
else
|
254
|
+
obj.addFamily cf
|
255
|
+
end
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
if (limit && !offset) || (!limit && offset)
|
260
|
+
raise ArgumentError, "Both `limit` and `offset` must be specified"
|
261
|
+
end
|
262
|
+
|
263
|
+
# Column prefix filter
|
264
|
+
unless prefixes.empty?
|
265
|
+
# disjunctive
|
266
|
+
filters <<
|
267
|
+
MultipleColumnPrefixFilter.new(
|
268
|
+
prefixes.map { |pref| Util.to_bytes(pref).to_a }.to_java(Java::byte[]))
|
269
|
+
end
|
270
|
+
|
271
|
+
# Column range filter
|
272
|
+
unless ranges.empty?
|
273
|
+
# disjunctive
|
274
|
+
filters <<
|
275
|
+
FilterList.new(FilterList::Operator::MUST_PASS_ONE,
|
276
|
+
ranges.map { |range|
|
277
|
+
raise ArgumentError, "Invalid range type" unless range.is_a? Range
|
278
|
+
|
279
|
+
ColumnRangeFilter.new(
|
280
|
+
Util.to_bytes(range.begin), true,
|
281
|
+
Util.to_bytes(range.end), !range.exclude_end?) })
|
282
|
+
end
|
283
|
+
|
284
|
+
# Column pagniation filter (last)
|
285
|
+
if limit && offset
|
286
|
+
filters << ColumnPaginationFilter.new(limit, offset)
|
287
|
+
end
|
288
|
+
|
289
|
+
filters
|
290
|
+
end
|
291
|
+
|
292
|
+
def getify rowkey
|
293
|
+
Get.new(Util.to_bytes rowkey).tap { |get|
|
294
|
+
if @versions
|
295
|
+
get.setMaxVersions @versions
|
296
|
+
else
|
297
|
+
get.setMaxVersions
|
298
|
+
end
|
299
|
+
|
300
|
+
filters = []
|
301
|
+
filters += process_projection!(get)
|
302
|
+
|
303
|
+
range = @range || range_for_prefix
|
304
|
+
case range
|
305
|
+
when Range
|
306
|
+
filters <<
|
307
|
+
RowFilter.new(
|
308
|
+
CompareFilter::CompareOp::GREATER_OR_EQUAL,
|
309
|
+
BinaryComparator.new(Util.to_bytes range.begin))
|
310
|
+
|
311
|
+
filters <<
|
312
|
+
RowFilter.new(
|
313
|
+
(range.exclude_end? ?
|
314
|
+
CompareFilter::CompareOp::LESS :
|
315
|
+
CompareFilter::CompareOp::LESS_OR_EQUAL),
|
316
|
+
BinaryComparator.new(Util.to_bytes range.end))
|
317
|
+
when Array
|
318
|
+
filters <<
|
319
|
+
RowFilter.new(
|
320
|
+
CompareFilter::CompareOp::GREATER_OR_EQUAL,
|
321
|
+
BinaryComparator.new(Util.to_bytes range[0])) if range[0]
|
322
|
+
|
323
|
+
filters <<
|
324
|
+
RowFilter.new(
|
325
|
+
CompareFilter::CompareOp::LESS,
|
326
|
+
BinaryComparator.new(Util.to_bytes range[1])) if range[1]
|
327
|
+
else
|
328
|
+
raise ArgumentError, "Invalid range"
|
329
|
+
end if range
|
330
|
+
|
331
|
+
# Prefix filters
|
332
|
+
filters += [*build_prefix_filter]
|
333
|
+
|
334
|
+
# RowFilter must precede the others
|
335
|
+
filters += @filters
|
336
|
+
|
337
|
+
get.setFilter FilterList.new(filters) unless filters.empty?
|
338
|
+
}
|
339
|
+
end
|
340
|
+
|
341
|
+
def filter_for cf, cq, val
|
342
|
+
case val
|
343
|
+
when Range
|
344
|
+
min, max = [val.begin, val.end].map { |k| Util.to_bytes k }
|
345
|
+
FilterList.new(FilterList::Operator::MUST_PASS_ALL, [
|
346
|
+
SingleColumnValueFilter.new(
|
347
|
+
cf, cq,
|
348
|
+
CompareFilter::CompareOp::GREATER_OR_EQUAL, min),
|
349
|
+
SingleColumnValueFilter.new(
|
350
|
+
cf, cq,
|
351
|
+
(val.exclude_end? ? CompareFilter::CompareOp::LESS :
|
352
|
+
CompareFilter::CompareOp::LESS_OR_EQUAL), max)
|
353
|
+
])
|
354
|
+
when Hash
|
355
|
+
FilterList.new(FilterList::Operator::MUST_PASS_ALL,
|
356
|
+
val.map { |op, v|
|
357
|
+
operator =
|
358
|
+
case op
|
359
|
+
when :gt, :>
|
360
|
+
CompareFilter::CompareOp::GREATER
|
361
|
+
when :gte, :>=
|
362
|
+
CompareFilter::CompareOp::GREATER_OR_EQUAL
|
363
|
+
when :lt, :<
|
364
|
+
CompareFilter::CompareOp::LESS
|
365
|
+
when :lte, :<=
|
366
|
+
CompareFilter::CompareOp::LESS_OR_EQUAL
|
367
|
+
when :eq, :==
|
368
|
+
CompareFilter::CompareOp::EQUAL
|
369
|
+
when :ne, :!=
|
370
|
+
CompareFilter::CompareOp::NOT_EQUAL
|
371
|
+
else
|
372
|
+
raise ArgumentError, "Unknown operator: #{op}"
|
373
|
+
end
|
374
|
+
case v
|
375
|
+
when Array
|
376
|
+
# XXX TODO Undocumented feature
|
377
|
+
FilterList.new(
|
378
|
+
case op
|
379
|
+
when :ne, :!=
|
380
|
+
FilterList::Operator::MUST_PASS_ALL
|
381
|
+
else
|
382
|
+
FilterList::Operator::MUST_PASS_ONE
|
383
|
+
end,
|
384
|
+
v.map { |vv|
|
385
|
+
SingleColumnValueFilter.new(cf, cq, operator, Util.to_bytes(vv))
|
386
|
+
}
|
387
|
+
)
|
388
|
+
when Hash
|
389
|
+
raise ArgumentError, "Hash predicate not supported"
|
390
|
+
else
|
391
|
+
SingleColumnValueFilter.new(cf, cq, operator, Util.to_bytes(v))
|
392
|
+
end
|
393
|
+
}
|
394
|
+
)
|
395
|
+
else
|
396
|
+
SingleColumnValueFilter.new(
|
397
|
+
cf, cq,
|
398
|
+
CompareFilter::CompareOp::EQUAL,
|
399
|
+
Util.to_bytes(val))
|
400
|
+
end
|
401
|
+
end
|
402
|
+
|
403
|
+
def filtered_scan
|
404
|
+
Scan.new.tap { |scan|
|
405
|
+
range = @range || range_for_prefix
|
406
|
+
case range
|
407
|
+
when Range
|
408
|
+
scan.setStartRow Util.to_bytes range.begin
|
409
|
+
|
410
|
+
if range.exclude_end?
|
411
|
+
scan.setStopRow Util.to_bytes range.end
|
412
|
+
else
|
413
|
+
scan.setStopRow Util.append_0(Util.to_bytes range.end)
|
414
|
+
end
|
415
|
+
when Array
|
416
|
+
scan.setStartRow Util.to_bytes range[0] if range[0]
|
417
|
+
scan.setStopRow Util.to_bytes range[1] if range[1]
|
418
|
+
else
|
419
|
+
# This shouldn't happen though.
|
420
|
+
raise ArgumentError, "Invalid range"
|
421
|
+
end if range
|
422
|
+
|
423
|
+
scan.caching = @caching if @caching
|
424
|
+
|
425
|
+
# Filters
|
426
|
+
prefix_filter = [*build_prefix_filter]
|
427
|
+
filters = prefix_filter + @filters
|
428
|
+
filters += process_projection!(scan)
|
429
|
+
|
430
|
+
scan.setFilter FilterList.new(filters) unless filters.empty?
|
431
|
+
|
432
|
+
if @limit
|
433
|
+
# setMaxResultSize not implemented in 0.92
|
434
|
+
if scan.respond_to?(:setMaxResultSize)
|
435
|
+
scan.setMaxResultSize(@limit)
|
436
|
+
else
|
437
|
+
raise NotImplementedError, 'Scan.setMaxResultSize not implemented'
|
438
|
+
end
|
439
|
+
end
|
440
|
+
|
441
|
+
if @versions
|
442
|
+
scan.setMaxVersions @versions
|
443
|
+
else
|
444
|
+
scan.setMaxVersions
|
445
|
+
end
|
446
|
+
|
447
|
+
# Batch
|
448
|
+
scan.setBatch @batch if @batch
|
449
|
+
}
|
450
|
+
end
|
451
|
+
|
452
|
+
def filtered_scan_minimum
|
453
|
+
filtered_scan.tap do |scan|
|
454
|
+
scan.cache_blocks = false
|
455
|
+
|
456
|
+
if flist = scan.getFilter
|
457
|
+
flist.addFilter KeyOnlyFilter.new
|
458
|
+
else
|
459
|
+
scan.setFilter FilterList.new(KeyOnlyFilter.new)
|
460
|
+
end
|
461
|
+
end
|
462
|
+
end
|
463
|
+
|
464
|
+
def build_prefix_filter
|
465
|
+
return nil if @prefixes.empty?
|
466
|
+
|
467
|
+
filters = @prefixes.map { |prefix|
|
468
|
+
PrefixFilter.new(Util.to_bytes prefix)
|
469
|
+
}
|
470
|
+
|
471
|
+
if filters.length == 1
|
472
|
+
filters.first
|
473
|
+
else
|
474
|
+
FilterList.new FilterList::Operator::MUST_PASS_ONE, filters
|
475
|
+
end
|
476
|
+
end
|
477
|
+
|
478
|
+
def range_for_prefix
|
479
|
+
return nil if @prefixes.empty?
|
480
|
+
|
481
|
+
byte_arrays = @prefixes.map { |pref| ByteArray.new(pref) }.sort
|
482
|
+
start = byte_arrays.first
|
483
|
+
stop = byte_arrays.last
|
484
|
+
|
485
|
+
[start.java, stop.stopkey_bytes_for_prefix]
|
486
|
+
end
|
487
|
+
end#Scoped
|
488
|
+
end#HBase
|
489
|
+
|