hbase-jruby 0.1.1-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +623 -0
- data/Rakefile +7 -0
- data/hbase-jruby.gemspec +23 -0
- data/lib/hbase-jruby.rb +16 -0
- data/lib/hbase-jruby/admin.rb +29 -0
- data/lib/hbase-jruby/byte_array.rb +39 -0
- data/lib/hbase-jruby/cell.rb +122 -0
- data/lib/hbase-jruby/column_key.rb +63 -0
- data/lib/hbase-jruby/dependency.rb +69 -0
- data/lib/hbase-jruby/hbase.rb +77 -0
- data/lib/hbase-jruby/pom/cdh3u5.xml +40 -0
- data/lib/hbase-jruby/pom/cdh4.1.2.xml +47 -0
- data/lib/hbase-jruby/result.rb +382 -0
- data/lib/hbase-jruby/scoped.rb +489 -0
- data/lib/hbase-jruby/table.rb +486 -0
- data/lib/hbase-jruby/util.rb +171 -0
- data/lib/hbase-jruby/version.rb +5 -0
- data/test/helper.rb +53 -0
- data/test/test_byte_array.rb +40 -0
- data/test/test_cell.rb +51 -0
- data/test/test_column_key.rb +49 -0
- data/test/test_hbase.rb +36 -0
- data/test/test_scoped.rb +318 -0
- data/test/test_table.rb +211 -0
- data/test/test_table_admin.rb +148 -0
- data/test/test_util.rb +80 -0
- metadata +116 -0
@@ -0,0 +1,489 @@
|
|
1
|
+
class HBase
|
2
|
+
# Scope of table scan
|
3
|
+
# @author Junegunn Choi <junegunn.c@gmail.com>
|
4
|
+
class Scoped
|
5
|
+
include Enumerable
|
6
|
+
|
7
|
+
# A clean HBase::Scoped object for the same table
|
8
|
+
# @return [HBase::Scope] A clean HBase::Scoped object for the same table
|
9
|
+
def unscope
|
10
|
+
Scoped.send(:new, @table)
|
11
|
+
end
|
12
|
+
|
13
|
+
# Number of rows in the scope
|
14
|
+
# @return [Fixnum, Bignum] The number of rows in the scope
|
15
|
+
def count
|
16
|
+
cnt = 0
|
17
|
+
htable.getScanner(filtered_scan_minimum).each do
|
18
|
+
cnt += 1
|
19
|
+
end
|
20
|
+
cnt
|
21
|
+
end
|
22
|
+
|
23
|
+
# @overload get(rowkey)
|
24
|
+
# Single GET.
|
25
|
+
# Gets a record with the given rowkey. If the record is not found, nil is returned.
|
26
|
+
# @param [Object] rowkey Rowkey
|
27
|
+
# @return [HBase::Result, nil]
|
28
|
+
# @overload get(rowkeys)
|
29
|
+
# Batch GET. Gets an array of records with the given rowkeys.
|
30
|
+
# Nonexistent records will be returned as nils.
|
31
|
+
# @param [Array<Object>] *rowkeys Rowkeys
|
32
|
+
# @return [Array<HBase::Result>]
|
33
|
+
def get rowkeys
|
34
|
+
case rowkeys
|
35
|
+
when Array
|
36
|
+
htable.get(rowkeys.map { |rk| getify rk }).map { |result|
|
37
|
+
result.isEmpty ? nil : Result.new(result)
|
38
|
+
}
|
39
|
+
else
|
40
|
+
result = htable.get(getify rowkeys)
|
41
|
+
result.isEmpty ? nil : Result.new(result)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
# Iterate through the scope.
|
46
|
+
# @yield [HBase::Result] Yields each row in the scope
|
47
|
+
def each
|
48
|
+
if block_given?
|
49
|
+
begin
|
50
|
+
scanner = htable.getScanner(filtered_scan)
|
51
|
+
scanner.each do |result|
|
52
|
+
yield Result.send(:new, result)
|
53
|
+
end
|
54
|
+
ensure
|
55
|
+
scanner.close if scanner
|
56
|
+
end
|
57
|
+
else
|
58
|
+
self
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
# Sets the number of rows for caching that will be passed to scanners.
|
63
|
+
# @param [Fixnum] rows The number of rows to cache
|
64
|
+
# @return [HBase::Scoped] HBase::Scoped object with the caching option
|
65
|
+
def caching rows
|
66
|
+
raise ArgumentError, "Invalid caching size. Must be a non-negative integer." unless rows.is_a?(Fixnum) && rows >= 0
|
67
|
+
spawn :@caching, rows
|
68
|
+
end
|
69
|
+
|
70
|
+
# @overload range(start_key, opts = {})
|
71
|
+
# Returns an HBase::Scoped object with the specified rowkey range
|
72
|
+
# Overrides current range.
|
73
|
+
# @param [Object] start_key Start rowkey
|
74
|
+
# @param [Hash] opts Prefix filter
|
75
|
+
# @option opts [Object, Array<Object>] :prefix Only rows matching any of the given prefixes are returned
|
76
|
+
# @return [HBase::Scoped] HBase::Scoped object with the range
|
77
|
+
# @overload range(start_key, stop_key, opts = {})
|
78
|
+
# Returns an HBase::Scoped object with the specified rowkey range
|
79
|
+
# Overrides current range.
|
80
|
+
# @param [Object, nil] start_key Start rowkey. Can be nil.
|
81
|
+
# @param [Object] stop_key Stop rowkey (exclusive)
|
82
|
+
# @param [Hash] opts Prefix filter
|
83
|
+
# @option opts [Object, Array<Object>] :prefix Only rows matching any of the given prefixes are returned
|
84
|
+
# @return [HBase::Scoped] HBase::Scoped object with the range
|
85
|
+
# @overload range(start_stop_range, opts = {})
|
86
|
+
# Returns an HBase::Scoped object with the specified rowkey range
|
87
|
+
# Overrides current range.
|
88
|
+
# @param [Range] start_stop_range Rowkey scan range
|
89
|
+
# @param [Hash] opts Prefix filter
|
90
|
+
# @option opts [Object, Array<Object>] :prefix Only rows matching any of the given prefixes are returned
|
91
|
+
# @return [HBase::Scoped] HBase::Scoped object with the range
|
92
|
+
# @overload range(opts)
|
93
|
+
# Returns an HBase::Scoped object with the specified rowkey range
|
94
|
+
# Overrides current range.
|
95
|
+
# @param [Hash] opts Prefix filter
|
96
|
+
# @option opts [Object, Array<Object>] :prefix Only rows matching any of the given prefixes are returned
|
97
|
+
# @return [HBase::Scoped] HBase::Scoped object with the range
|
98
|
+
# @example
|
99
|
+
# table.range(:prefix => '2012')
|
100
|
+
# table.range(:prefix => ['2010', '2012'])
|
101
|
+
def range *key_range
|
102
|
+
if key_range.last.is_a?(Hash)
|
103
|
+
prefixes = [*key_range.last[:prefix]]
|
104
|
+
raise ArgumentError,
|
105
|
+
"Invalid range. Unknown option(s) specified." unless (key_range.last.keys - [:prefix]).empty?
|
106
|
+
key_range = key_range[0...-1]
|
107
|
+
end
|
108
|
+
|
109
|
+
if prefixes
|
110
|
+
raise ArgumentError, "Invalid range" unless [0, 1, 2].include?(key_range.length)
|
111
|
+
else
|
112
|
+
raise ArgumentError, "Invalid range" unless [1, 2].include?(key_range.length)
|
113
|
+
end
|
114
|
+
|
115
|
+
spawn :@range,
|
116
|
+
key_range[0].is_a?(Range) ?
|
117
|
+
key_range[0] :
|
118
|
+
(key_range.empty? ? nil : key_range),
|
119
|
+
:@prefixes,
|
120
|
+
prefixes || []
|
121
|
+
end
|
122
|
+
|
123
|
+
# Returns an HBase::Scoped object with the filters added
|
124
|
+
# @param [Array<Hash, FilterBase, FilterList>] filters
|
125
|
+
# @return [HBase::Scoped] HBase::Scoped object also with the specified filters
|
126
|
+
def filter *filters
|
127
|
+
spawn :@filters, @filters + filters.map { |f|
|
128
|
+
case f
|
129
|
+
when Hash
|
130
|
+
f.map { |col, val|
|
131
|
+
cf, cq = Util.parse_column_name col
|
132
|
+
|
133
|
+
case val
|
134
|
+
when Array
|
135
|
+
FilterList.new(FilterList::Operator::MUST_PASS_ONE,
|
136
|
+
val.map { |v| filter_for cf, cq, v })
|
137
|
+
else
|
138
|
+
filter_for cf, cq, val
|
139
|
+
end
|
140
|
+
}.flatten
|
141
|
+
when FilterBase, FilterList
|
142
|
+
f
|
143
|
+
else
|
144
|
+
raise ArgumentError, "Unknown filter type"
|
145
|
+
end
|
146
|
+
}.flatten
|
147
|
+
end
|
148
|
+
|
149
|
+
# Returns an HBase::Scoped object with the specified row number limit
|
150
|
+
# @param [Fixnum] rows Sets the maximum number of rows to return from scan
|
151
|
+
# @return [HBase::Scoped] HBase::Scoped object with the specified row number limit
|
152
|
+
def limit rows
|
153
|
+
raise ArgumentError, "Invalid limit. Must be a non-negative integer." unless rows.is_a?(Fixnum) && rows >= 0
|
154
|
+
spawn :@limit, rows
|
155
|
+
end
|
156
|
+
|
157
|
+
# Returns an HBase::Scoped object with the specified projection
|
158
|
+
# @param [Array<String>] columns Array of column expressions
|
159
|
+
# @return [HBase::Scoped] HBase::Scoped object with the specified projection
|
160
|
+
def project *columns
|
161
|
+
if columns.first.is_a?(Hash)
|
162
|
+
hash = columns.first
|
163
|
+
unless (hash.keys - [:prefix, :range, :limit, :offset]).empty?
|
164
|
+
raise ArgumentError, "Invalid projection"
|
165
|
+
end
|
166
|
+
|
167
|
+
if l = hash[:limit]
|
168
|
+
unless l.is_a?(Fixnum) && l >= 0
|
169
|
+
raise ArgumentError, ":limit must be a non-negative integer"
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
if o = hash[:offset]
|
174
|
+
unless o.is_a?(Fixnum) && o >= 0
|
175
|
+
raise ArgumentError, ":offset must be a non-negative integer"
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
spawn :@project, @project + columns
|
180
|
+
end
|
181
|
+
|
182
|
+
# Returns an HBase::Scoped object with the specified version number limit.
|
183
|
+
# If not set, all versions of each value are fetched by default.
|
184
|
+
# @param [Fixnum] vs Sets the maximum number of versions
|
185
|
+
# @return [HBase::Scoped] HBase::Scoped object with the version number limit
|
186
|
+
def versions vs
|
187
|
+
raise ArgumentError, "Invalid versions. Must be a positive integer." unless vs.is_a?(Fixnum) && vs > 0
|
188
|
+
spawn :@versions, vs
|
189
|
+
end
|
190
|
+
|
191
|
+
# Returns an HBase::Scoped object with the specified batch limit
|
192
|
+
# @param [Fixnum] b Sets the maximum number of values to fetch each time
|
193
|
+
# @return [HBase::Scoped] HBase::Scoped object with the specified batch limit
|
194
|
+
def batch b
|
195
|
+
raise ArgumentError, "Invalid batch size. Must be a positive integer." unless b.is_a?(Fixnum) && b > 0
|
196
|
+
spawn :@batch, b
|
197
|
+
end
|
198
|
+
|
199
|
+
private
|
200
|
+
# @param [HBase::Table] table
|
201
|
+
def initialize table
|
202
|
+
@table = table
|
203
|
+
@filters = []
|
204
|
+
@project = []
|
205
|
+
@prefixes = []
|
206
|
+
@range = nil
|
207
|
+
@versions = nil
|
208
|
+
@batch = nil
|
209
|
+
@caching = nil
|
210
|
+
@limit = nil
|
211
|
+
end
|
212
|
+
|
213
|
+
def spawn *args
|
214
|
+
self.dup.tap do |obj|
|
215
|
+
args.each_slice(2) do |slice|
|
216
|
+
attr, val = slice
|
217
|
+
obj.instance_variable_set attr, val
|
218
|
+
end
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
def htable
|
223
|
+
@table.htable
|
224
|
+
end
|
225
|
+
|
226
|
+
def process_projection! obj
|
227
|
+
limit = offset = nil
|
228
|
+
ranges = prefixes = []
|
229
|
+
filters = []
|
230
|
+
|
231
|
+
@project.each do |col|
|
232
|
+
case col
|
233
|
+
when Hash
|
234
|
+
col.each do |prop, val|
|
235
|
+
case prop
|
236
|
+
when :prefix
|
237
|
+
prefixes += [*val]
|
238
|
+
when :range
|
239
|
+
ranges += val.is_a?(Array) ? val : [val]
|
240
|
+
when :limit
|
241
|
+
limit = val
|
242
|
+
when :offset
|
243
|
+
offset = val
|
244
|
+
else
|
245
|
+
# Shouldn't happen
|
246
|
+
raise ArgumentError, "Invalid projection: #{prop}"
|
247
|
+
end
|
248
|
+
end
|
249
|
+
else
|
250
|
+
cf, cq = Util.parse_column_name col
|
251
|
+
if cq
|
252
|
+
obj.addColumn cf, cq
|
253
|
+
else
|
254
|
+
obj.addFamily cf
|
255
|
+
end
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
if (limit && !offset) || (!limit && offset)
|
260
|
+
raise ArgumentError, "Both `limit` and `offset` must be specified"
|
261
|
+
end
|
262
|
+
|
263
|
+
# Column prefix filter
|
264
|
+
unless prefixes.empty?
|
265
|
+
# disjunctive
|
266
|
+
filters <<
|
267
|
+
MultipleColumnPrefixFilter.new(
|
268
|
+
prefixes.map { |pref| Util.to_bytes(pref).to_a }.to_java(Java::byte[]))
|
269
|
+
end
|
270
|
+
|
271
|
+
# Column range filter
|
272
|
+
unless ranges.empty?
|
273
|
+
# disjunctive
|
274
|
+
filters <<
|
275
|
+
FilterList.new(FilterList::Operator::MUST_PASS_ONE,
|
276
|
+
ranges.map { |range|
|
277
|
+
raise ArgumentError, "Invalid range type" unless range.is_a? Range
|
278
|
+
|
279
|
+
ColumnRangeFilter.new(
|
280
|
+
Util.to_bytes(range.begin), true,
|
281
|
+
Util.to_bytes(range.end), !range.exclude_end?) })
|
282
|
+
end
|
283
|
+
|
284
|
+
# Column pagniation filter (last)
|
285
|
+
if limit && offset
|
286
|
+
filters << ColumnPaginationFilter.new(limit, offset)
|
287
|
+
end
|
288
|
+
|
289
|
+
filters
|
290
|
+
end
|
291
|
+
|
292
|
+
def getify rowkey
|
293
|
+
Get.new(Util.to_bytes rowkey).tap { |get|
|
294
|
+
if @versions
|
295
|
+
get.setMaxVersions @versions
|
296
|
+
else
|
297
|
+
get.setMaxVersions
|
298
|
+
end
|
299
|
+
|
300
|
+
filters = []
|
301
|
+
filters += process_projection!(get)
|
302
|
+
|
303
|
+
range = @range || range_for_prefix
|
304
|
+
case range
|
305
|
+
when Range
|
306
|
+
filters <<
|
307
|
+
RowFilter.new(
|
308
|
+
CompareFilter::CompareOp::GREATER_OR_EQUAL,
|
309
|
+
BinaryComparator.new(Util.to_bytes range.begin))
|
310
|
+
|
311
|
+
filters <<
|
312
|
+
RowFilter.new(
|
313
|
+
(range.exclude_end? ?
|
314
|
+
CompareFilter::CompareOp::LESS :
|
315
|
+
CompareFilter::CompareOp::LESS_OR_EQUAL),
|
316
|
+
BinaryComparator.new(Util.to_bytes range.end))
|
317
|
+
when Array
|
318
|
+
filters <<
|
319
|
+
RowFilter.new(
|
320
|
+
CompareFilter::CompareOp::GREATER_OR_EQUAL,
|
321
|
+
BinaryComparator.new(Util.to_bytes range[0])) if range[0]
|
322
|
+
|
323
|
+
filters <<
|
324
|
+
RowFilter.new(
|
325
|
+
CompareFilter::CompareOp::LESS,
|
326
|
+
BinaryComparator.new(Util.to_bytes range[1])) if range[1]
|
327
|
+
else
|
328
|
+
raise ArgumentError, "Invalid range"
|
329
|
+
end if range
|
330
|
+
|
331
|
+
# Prefix filters
|
332
|
+
filters += [*build_prefix_filter]
|
333
|
+
|
334
|
+
# RowFilter must precede the others
|
335
|
+
filters += @filters
|
336
|
+
|
337
|
+
get.setFilter FilterList.new(filters) unless filters.empty?
|
338
|
+
}
|
339
|
+
end
|
340
|
+
|
341
|
+
def filter_for cf, cq, val
|
342
|
+
case val
|
343
|
+
when Range
|
344
|
+
min, max = [val.begin, val.end].map { |k| Util.to_bytes k }
|
345
|
+
FilterList.new(FilterList::Operator::MUST_PASS_ALL, [
|
346
|
+
SingleColumnValueFilter.new(
|
347
|
+
cf, cq,
|
348
|
+
CompareFilter::CompareOp::GREATER_OR_EQUAL, min),
|
349
|
+
SingleColumnValueFilter.new(
|
350
|
+
cf, cq,
|
351
|
+
(val.exclude_end? ? CompareFilter::CompareOp::LESS :
|
352
|
+
CompareFilter::CompareOp::LESS_OR_EQUAL), max)
|
353
|
+
])
|
354
|
+
when Hash
|
355
|
+
FilterList.new(FilterList::Operator::MUST_PASS_ALL,
|
356
|
+
val.map { |op, v|
|
357
|
+
operator =
|
358
|
+
case op
|
359
|
+
when :gt, :>
|
360
|
+
CompareFilter::CompareOp::GREATER
|
361
|
+
when :gte, :>=
|
362
|
+
CompareFilter::CompareOp::GREATER_OR_EQUAL
|
363
|
+
when :lt, :<
|
364
|
+
CompareFilter::CompareOp::LESS
|
365
|
+
when :lte, :<=
|
366
|
+
CompareFilter::CompareOp::LESS_OR_EQUAL
|
367
|
+
when :eq, :==
|
368
|
+
CompareFilter::CompareOp::EQUAL
|
369
|
+
when :ne, :!=
|
370
|
+
CompareFilter::CompareOp::NOT_EQUAL
|
371
|
+
else
|
372
|
+
raise ArgumentError, "Unknown operator: #{op}"
|
373
|
+
end
|
374
|
+
case v
|
375
|
+
when Array
|
376
|
+
# XXX TODO Undocumented feature
|
377
|
+
FilterList.new(
|
378
|
+
case op
|
379
|
+
when :ne, :!=
|
380
|
+
FilterList::Operator::MUST_PASS_ALL
|
381
|
+
else
|
382
|
+
FilterList::Operator::MUST_PASS_ONE
|
383
|
+
end,
|
384
|
+
v.map { |vv|
|
385
|
+
SingleColumnValueFilter.new(cf, cq, operator, Util.to_bytes(vv))
|
386
|
+
}
|
387
|
+
)
|
388
|
+
when Hash
|
389
|
+
raise ArgumentError, "Hash predicate not supported"
|
390
|
+
else
|
391
|
+
SingleColumnValueFilter.new(cf, cq, operator, Util.to_bytes(v))
|
392
|
+
end
|
393
|
+
}
|
394
|
+
)
|
395
|
+
else
|
396
|
+
SingleColumnValueFilter.new(
|
397
|
+
cf, cq,
|
398
|
+
CompareFilter::CompareOp::EQUAL,
|
399
|
+
Util.to_bytes(val))
|
400
|
+
end
|
401
|
+
end
|
402
|
+
|
403
|
+
def filtered_scan
|
404
|
+
Scan.new.tap { |scan|
|
405
|
+
range = @range || range_for_prefix
|
406
|
+
case range
|
407
|
+
when Range
|
408
|
+
scan.setStartRow Util.to_bytes range.begin
|
409
|
+
|
410
|
+
if range.exclude_end?
|
411
|
+
scan.setStopRow Util.to_bytes range.end
|
412
|
+
else
|
413
|
+
scan.setStopRow Util.append_0(Util.to_bytes range.end)
|
414
|
+
end
|
415
|
+
when Array
|
416
|
+
scan.setStartRow Util.to_bytes range[0] if range[0]
|
417
|
+
scan.setStopRow Util.to_bytes range[1] if range[1]
|
418
|
+
else
|
419
|
+
# This shouldn't happen though.
|
420
|
+
raise ArgumentError, "Invalid range"
|
421
|
+
end if range
|
422
|
+
|
423
|
+
scan.caching = @caching if @caching
|
424
|
+
|
425
|
+
# Filters
|
426
|
+
prefix_filter = [*build_prefix_filter]
|
427
|
+
filters = prefix_filter + @filters
|
428
|
+
filters += process_projection!(scan)
|
429
|
+
|
430
|
+
scan.setFilter FilterList.new(filters) unless filters.empty?
|
431
|
+
|
432
|
+
if @limit
|
433
|
+
# setMaxResultSize not implemented in 0.92
|
434
|
+
if scan.respond_to?(:setMaxResultSize)
|
435
|
+
scan.setMaxResultSize(@limit)
|
436
|
+
else
|
437
|
+
raise NotImplementedError, 'Scan.setMaxResultSize not implemented'
|
438
|
+
end
|
439
|
+
end
|
440
|
+
|
441
|
+
if @versions
|
442
|
+
scan.setMaxVersions @versions
|
443
|
+
else
|
444
|
+
scan.setMaxVersions
|
445
|
+
end
|
446
|
+
|
447
|
+
# Batch
|
448
|
+
scan.setBatch @batch if @batch
|
449
|
+
}
|
450
|
+
end
|
451
|
+
|
452
|
+
def filtered_scan_minimum
|
453
|
+
filtered_scan.tap do |scan|
|
454
|
+
scan.cache_blocks = false
|
455
|
+
|
456
|
+
if flist = scan.getFilter
|
457
|
+
flist.addFilter KeyOnlyFilter.new
|
458
|
+
else
|
459
|
+
scan.setFilter FilterList.new(KeyOnlyFilter.new)
|
460
|
+
end
|
461
|
+
end
|
462
|
+
end
|
463
|
+
|
464
|
+
def build_prefix_filter
|
465
|
+
return nil if @prefixes.empty?
|
466
|
+
|
467
|
+
filters = @prefixes.map { |prefix|
|
468
|
+
PrefixFilter.new(Util.to_bytes prefix)
|
469
|
+
}
|
470
|
+
|
471
|
+
if filters.length == 1
|
472
|
+
filters.first
|
473
|
+
else
|
474
|
+
FilterList.new FilterList::Operator::MUST_PASS_ONE, filters
|
475
|
+
end
|
476
|
+
end
|
477
|
+
|
478
|
+
def range_for_prefix
|
479
|
+
return nil if @prefixes.empty?
|
480
|
+
|
481
|
+
byte_arrays = @prefixes.map { |pref| ByteArray.new(pref) }.sort
|
482
|
+
start = byte_arrays.first
|
483
|
+
stop = byte_arrays.last
|
484
|
+
|
485
|
+
[start.java, stop.stopkey_bytes_for_prefix]
|
486
|
+
end
|
487
|
+
end#Scoped
|
488
|
+
end#HBase
|
489
|
+
|