hbase-jruby 0.1.1-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,489 @@
1
+ class HBase
2
+ # Scope of table scan
3
+ # @author Junegunn Choi <junegunn.c@gmail.com>
4
+ class Scoped
5
+ include Enumerable
6
+
7
+ # A clean HBase::Scoped object for the same table
8
+ # @return [HBase::Scope] A clean HBase::Scoped object for the same table
9
+ def unscope
10
+ Scoped.send(:new, @table)
11
+ end
12
+
13
+ # Number of rows in the scope
14
+ # @return [Fixnum, Bignum] The number of rows in the scope
15
+ def count
16
+ cnt = 0
17
+ htable.getScanner(filtered_scan_minimum).each do
18
+ cnt += 1
19
+ end
20
+ cnt
21
+ end
22
+
23
+ # @overload get(rowkey)
24
+ # Single GET.
25
+ # Gets a record with the given rowkey. If the record is not found, nil is returned.
26
+ # @param [Object] rowkey Rowkey
27
+ # @return [HBase::Result, nil]
28
+ # @overload get(rowkeys)
29
+ # Batch GET. Gets an array of records with the given rowkeys.
30
+ # Nonexistent records will be returned as nils.
31
+ # @param [Array<Object>] *rowkeys Rowkeys
32
+ # @return [Array<HBase::Result>]
33
+ def get rowkeys
34
+ case rowkeys
35
+ when Array
36
+ htable.get(rowkeys.map { |rk| getify rk }).map { |result|
37
+ result.isEmpty ? nil : Result.new(result)
38
+ }
39
+ else
40
+ result = htable.get(getify rowkeys)
41
+ result.isEmpty ? nil : Result.new(result)
42
+ end
43
+ end
44
+
45
+ # Iterate through the scope.
46
+ # @yield [HBase::Result] Yields each row in the scope
47
+ def each
48
+ if block_given?
49
+ begin
50
+ scanner = htable.getScanner(filtered_scan)
51
+ scanner.each do |result|
52
+ yield Result.send(:new, result)
53
+ end
54
+ ensure
55
+ scanner.close if scanner
56
+ end
57
+ else
58
+ self
59
+ end
60
+ end
61
+
62
+ # Sets the number of rows for caching that will be passed to scanners.
63
+ # @param [Fixnum] rows The number of rows to cache
64
+ # @return [HBase::Scoped] HBase::Scoped object with the caching option
65
+ def caching rows
66
+ raise ArgumentError, "Invalid caching size. Must be a non-negative integer." unless rows.is_a?(Fixnum) && rows >= 0
67
+ spawn :@caching, rows
68
+ end
69
+
70
+ # @overload range(start_key, opts = {})
71
+ # Returns an HBase::Scoped object with the specified rowkey range
72
+ # Overrides current range.
73
+ # @param [Object] start_key Start rowkey
74
+ # @param [Hash] opts Prefix filter
75
+ # @option opts [Object, Array<Object>] :prefix Only rows matching any of the given prefixes are returned
76
+ # @return [HBase::Scoped] HBase::Scoped object with the range
77
+ # @overload range(start_key, stop_key, opts = {})
78
+ # Returns an HBase::Scoped object with the specified rowkey range
79
+ # Overrides current range.
80
+ # @param [Object, nil] start_key Start rowkey. Can be nil.
81
+ # @param [Object] stop_key Stop rowkey (exclusive)
82
+ # @param [Hash] opts Prefix filter
83
+ # @option opts [Object, Array<Object>] :prefix Only rows matching any of the given prefixes are returned
84
+ # @return [HBase::Scoped] HBase::Scoped object with the range
85
+ # @overload range(start_stop_range, opts = {})
86
+ # Returns an HBase::Scoped object with the specified rowkey range
87
+ # Overrides current range.
88
+ # @param [Range] start_stop_range Rowkey scan range
89
+ # @param [Hash] opts Prefix filter
90
+ # @option opts [Object, Array<Object>] :prefix Only rows matching any of the given prefixes are returned
91
+ # @return [HBase::Scoped] HBase::Scoped object with the range
92
+ # @overload range(opts)
93
+ # Returns an HBase::Scoped object with the specified rowkey range
94
+ # Overrides current range.
95
+ # @param [Hash] opts Prefix filter
96
+ # @option opts [Object, Array<Object>] :prefix Only rows matching any of the given prefixes are returned
97
+ # @return [HBase::Scoped] HBase::Scoped object with the range
98
+ # @example
99
+ # table.range(:prefix => '2012')
100
+ # table.range(:prefix => ['2010', '2012'])
101
+ def range *key_range
102
+ if key_range.last.is_a?(Hash)
103
+ prefixes = [*key_range.last[:prefix]]
104
+ raise ArgumentError,
105
+ "Invalid range. Unknown option(s) specified." unless (key_range.last.keys - [:prefix]).empty?
106
+ key_range = key_range[0...-1]
107
+ end
108
+
109
+ if prefixes
110
+ raise ArgumentError, "Invalid range" unless [0, 1, 2].include?(key_range.length)
111
+ else
112
+ raise ArgumentError, "Invalid range" unless [1, 2].include?(key_range.length)
113
+ end
114
+
115
+ spawn :@range,
116
+ key_range[0].is_a?(Range) ?
117
+ key_range[0] :
118
+ (key_range.empty? ? nil : key_range),
119
+ :@prefixes,
120
+ prefixes || []
121
+ end
122
+
123
+ # Returns an HBase::Scoped object with the filters added
124
+ # @param [Array<Hash, FilterBase, FilterList>] filters
125
+ # @return [HBase::Scoped] HBase::Scoped object also with the specified filters
126
+ def filter *filters
127
+ spawn :@filters, @filters + filters.map { |f|
128
+ case f
129
+ when Hash
130
+ f.map { |col, val|
131
+ cf, cq = Util.parse_column_name col
132
+
133
+ case val
134
+ when Array
135
+ FilterList.new(FilterList::Operator::MUST_PASS_ONE,
136
+ val.map { |v| filter_for cf, cq, v })
137
+ else
138
+ filter_for cf, cq, val
139
+ end
140
+ }.flatten
141
+ when FilterBase, FilterList
142
+ f
143
+ else
144
+ raise ArgumentError, "Unknown filter type"
145
+ end
146
+ }.flatten
147
+ end
148
+
149
+ # Returns an HBase::Scoped object with the specified row number limit
150
+ # @param [Fixnum] rows Sets the maximum number of rows to return from scan
151
+ # @return [HBase::Scoped] HBase::Scoped object with the specified row number limit
152
+ def limit rows
153
+ raise ArgumentError, "Invalid limit. Must be a non-negative integer." unless rows.is_a?(Fixnum) && rows >= 0
154
+ spawn :@limit, rows
155
+ end
156
+
157
+ # Returns an HBase::Scoped object with the specified projection
158
+ # @param [Array<String>] columns Array of column expressions
159
+ # @return [HBase::Scoped] HBase::Scoped object with the specified projection
160
+ def project *columns
161
+ if columns.first.is_a?(Hash)
162
+ hash = columns.first
163
+ unless (hash.keys - [:prefix, :range, :limit, :offset]).empty?
164
+ raise ArgumentError, "Invalid projection"
165
+ end
166
+
167
+ if l = hash[:limit]
168
+ unless l.is_a?(Fixnum) && l >= 0
169
+ raise ArgumentError, ":limit must be a non-negative integer"
170
+ end
171
+ end
172
+
173
+ if o = hash[:offset]
174
+ unless o.is_a?(Fixnum) && o >= 0
175
+ raise ArgumentError, ":offset must be a non-negative integer"
176
+ end
177
+ end
178
+ end
179
+ spawn :@project, @project + columns
180
+ end
181
+
182
+ # Returns an HBase::Scoped object with the specified version number limit.
183
+ # If not set, all versions of each value are fetched by default.
184
+ # @param [Fixnum] vs Sets the maximum number of versions
185
+ # @return [HBase::Scoped] HBase::Scoped object with the version number limit
186
+ def versions vs
187
+ raise ArgumentError, "Invalid versions. Must be a positive integer." unless vs.is_a?(Fixnum) && vs > 0
188
+ spawn :@versions, vs
189
+ end
190
+
191
+ # Returns an HBase::Scoped object with the specified batch limit
192
+ # @param [Fixnum] b Sets the maximum number of values to fetch each time
193
+ # @return [HBase::Scoped] HBase::Scoped object with the specified batch limit
194
+ def batch b
195
+ raise ArgumentError, "Invalid batch size. Must be a positive integer." unless b.is_a?(Fixnum) && b > 0
196
+ spawn :@batch, b
197
+ end
198
+
199
+ private
200
+ # @param [HBase::Table] table
201
+ def initialize table
202
+ @table = table
203
+ @filters = []
204
+ @project = []
205
+ @prefixes = []
206
+ @range = nil
207
+ @versions = nil
208
+ @batch = nil
209
+ @caching = nil
210
+ @limit = nil
211
+ end
212
+
213
+ def spawn *args
214
+ self.dup.tap do |obj|
215
+ args.each_slice(2) do |slice|
216
+ attr, val = slice
217
+ obj.instance_variable_set attr, val
218
+ end
219
+ end
220
+ end
221
+
222
+ def htable
223
+ @table.htable
224
+ end
225
+
226
+ def process_projection! obj
227
+ limit = offset = nil
228
+ ranges = prefixes = []
229
+ filters = []
230
+
231
+ @project.each do |col|
232
+ case col
233
+ when Hash
234
+ col.each do |prop, val|
235
+ case prop
236
+ when :prefix
237
+ prefixes += [*val]
238
+ when :range
239
+ ranges += val.is_a?(Array) ? val : [val]
240
+ when :limit
241
+ limit = val
242
+ when :offset
243
+ offset = val
244
+ else
245
+ # Shouldn't happen
246
+ raise ArgumentError, "Invalid projection: #{prop}"
247
+ end
248
+ end
249
+ else
250
+ cf, cq = Util.parse_column_name col
251
+ if cq
252
+ obj.addColumn cf, cq
253
+ else
254
+ obj.addFamily cf
255
+ end
256
+ end
257
+ end
258
+
259
+ if (limit && !offset) || (!limit && offset)
260
+ raise ArgumentError, "Both `limit` and `offset` must be specified"
261
+ end
262
+
263
+ # Column prefix filter
264
+ unless prefixes.empty?
265
+ # disjunctive
266
+ filters <<
267
+ MultipleColumnPrefixFilter.new(
268
+ prefixes.map { |pref| Util.to_bytes(pref).to_a }.to_java(Java::byte[]))
269
+ end
270
+
271
+ # Column range filter
272
+ unless ranges.empty?
273
+ # disjunctive
274
+ filters <<
275
+ FilterList.new(FilterList::Operator::MUST_PASS_ONE,
276
+ ranges.map { |range|
277
+ raise ArgumentError, "Invalid range type" unless range.is_a? Range
278
+
279
+ ColumnRangeFilter.new(
280
+ Util.to_bytes(range.begin), true,
281
+ Util.to_bytes(range.end), !range.exclude_end?) })
282
+ end
283
+
284
+ # Column pagniation filter (last)
285
+ if limit && offset
286
+ filters << ColumnPaginationFilter.new(limit, offset)
287
+ end
288
+
289
+ filters
290
+ end
291
+
292
+ def getify rowkey
293
+ Get.new(Util.to_bytes rowkey).tap { |get|
294
+ if @versions
295
+ get.setMaxVersions @versions
296
+ else
297
+ get.setMaxVersions
298
+ end
299
+
300
+ filters = []
301
+ filters += process_projection!(get)
302
+
303
+ range = @range || range_for_prefix
304
+ case range
305
+ when Range
306
+ filters <<
307
+ RowFilter.new(
308
+ CompareFilter::CompareOp::GREATER_OR_EQUAL,
309
+ BinaryComparator.new(Util.to_bytes range.begin))
310
+
311
+ filters <<
312
+ RowFilter.new(
313
+ (range.exclude_end? ?
314
+ CompareFilter::CompareOp::LESS :
315
+ CompareFilter::CompareOp::LESS_OR_EQUAL),
316
+ BinaryComparator.new(Util.to_bytes range.end))
317
+ when Array
318
+ filters <<
319
+ RowFilter.new(
320
+ CompareFilter::CompareOp::GREATER_OR_EQUAL,
321
+ BinaryComparator.new(Util.to_bytes range[0])) if range[0]
322
+
323
+ filters <<
324
+ RowFilter.new(
325
+ CompareFilter::CompareOp::LESS,
326
+ BinaryComparator.new(Util.to_bytes range[1])) if range[1]
327
+ else
328
+ raise ArgumentError, "Invalid range"
329
+ end if range
330
+
331
+ # Prefix filters
332
+ filters += [*build_prefix_filter]
333
+
334
+ # RowFilter must precede the others
335
+ filters += @filters
336
+
337
+ get.setFilter FilterList.new(filters) unless filters.empty?
338
+ }
339
+ end
340
+
341
+ def filter_for cf, cq, val
342
+ case val
343
+ when Range
344
+ min, max = [val.begin, val.end].map { |k| Util.to_bytes k }
345
+ FilterList.new(FilterList::Operator::MUST_PASS_ALL, [
346
+ SingleColumnValueFilter.new(
347
+ cf, cq,
348
+ CompareFilter::CompareOp::GREATER_OR_EQUAL, min),
349
+ SingleColumnValueFilter.new(
350
+ cf, cq,
351
+ (val.exclude_end? ? CompareFilter::CompareOp::LESS :
352
+ CompareFilter::CompareOp::LESS_OR_EQUAL), max)
353
+ ])
354
+ when Hash
355
+ FilterList.new(FilterList::Operator::MUST_PASS_ALL,
356
+ val.map { |op, v|
357
+ operator =
358
+ case op
359
+ when :gt, :>
360
+ CompareFilter::CompareOp::GREATER
361
+ when :gte, :>=
362
+ CompareFilter::CompareOp::GREATER_OR_EQUAL
363
+ when :lt, :<
364
+ CompareFilter::CompareOp::LESS
365
+ when :lte, :<=
366
+ CompareFilter::CompareOp::LESS_OR_EQUAL
367
+ when :eq, :==
368
+ CompareFilter::CompareOp::EQUAL
369
+ when :ne, :!=
370
+ CompareFilter::CompareOp::NOT_EQUAL
371
+ else
372
+ raise ArgumentError, "Unknown operator: #{op}"
373
+ end
374
+ case v
375
+ when Array
376
+ # XXX TODO Undocumented feature
377
+ FilterList.new(
378
+ case op
379
+ when :ne, :!=
380
+ FilterList::Operator::MUST_PASS_ALL
381
+ else
382
+ FilterList::Operator::MUST_PASS_ONE
383
+ end,
384
+ v.map { |vv|
385
+ SingleColumnValueFilter.new(cf, cq, operator, Util.to_bytes(vv))
386
+ }
387
+ )
388
+ when Hash
389
+ raise ArgumentError, "Hash predicate not supported"
390
+ else
391
+ SingleColumnValueFilter.new(cf, cq, operator, Util.to_bytes(v))
392
+ end
393
+ }
394
+ )
395
+ else
396
+ SingleColumnValueFilter.new(
397
+ cf, cq,
398
+ CompareFilter::CompareOp::EQUAL,
399
+ Util.to_bytes(val))
400
+ end
401
+ end
402
+
403
+ def filtered_scan
404
+ Scan.new.tap { |scan|
405
+ range = @range || range_for_prefix
406
+ case range
407
+ when Range
408
+ scan.setStartRow Util.to_bytes range.begin
409
+
410
+ if range.exclude_end?
411
+ scan.setStopRow Util.to_bytes range.end
412
+ else
413
+ scan.setStopRow Util.append_0(Util.to_bytes range.end)
414
+ end
415
+ when Array
416
+ scan.setStartRow Util.to_bytes range[0] if range[0]
417
+ scan.setStopRow Util.to_bytes range[1] if range[1]
418
+ else
419
+ # This shouldn't happen though.
420
+ raise ArgumentError, "Invalid range"
421
+ end if range
422
+
423
+ scan.caching = @caching if @caching
424
+
425
+ # Filters
426
+ prefix_filter = [*build_prefix_filter]
427
+ filters = prefix_filter + @filters
428
+ filters += process_projection!(scan)
429
+
430
+ scan.setFilter FilterList.new(filters) unless filters.empty?
431
+
432
+ if @limit
433
+ # setMaxResultSize not implemented in 0.92
434
+ if scan.respond_to?(:setMaxResultSize)
435
+ scan.setMaxResultSize(@limit)
436
+ else
437
+ raise NotImplementedError, 'Scan.setMaxResultSize not implemented'
438
+ end
439
+ end
440
+
441
+ if @versions
442
+ scan.setMaxVersions @versions
443
+ else
444
+ scan.setMaxVersions
445
+ end
446
+
447
+ # Batch
448
+ scan.setBatch @batch if @batch
449
+ }
450
+ end
451
+
452
+ def filtered_scan_minimum
453
+ filtered_scan.tap do |scan|
454
+ scan.cache_blocks = false
455
+
456
+ if flist = scan.getFilter
457
+ flist.addFilter KeyOnlyFilter.new
458
+ else
459
+ scan.setFilter FilterList.new(KeyOnlyFilter.new)
460
+ end
461
+ end
462
+ end
463
+
464
+ def build_prefix_filter
465
+ return nil if @prefixes.empty?
466
+
467
+ filters = @prefixes.map { |prefix|
468
+ PrefixFilter.new(Util.to_bytes prefix)
469
+ }
470
+
471
+ if filters.length == 1
472
+ filters.first
473
+ else
474
+ FilterList.new FilterList::Operator::MUST_PASS_ONE, filters
475
+ end
476
+ end
477
+
478
+ def range_for_prefix
479
+ return nil if @prefixes.empty?
480
+
481
+ byte_arrays = @prefixes.map { |pref| ByteArray.new(pref) }.sort
482
+ start = byte_arrays.first
483
+ stop = byte_arrays.last
484
+
485
+ [start.java, stop.stopkey_bytes_for_prefix]
486
+ end
487
+ end#Scoped
488
+ end#HBase
489
+