hbase-jruby 0.1.1-java

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,489 @@
1
+ class HBase
2
+ # Scope of table scan
3
+ # @author Junegunn Choi <junegunn.c@gmail.com>
4
+ class Scoped
5
+ include Enumerable
6
+
7
+ # A clean HBase::Scoped object for the same table
8
+ # @return [HBase::Scope] A clean HBase::Scoped object for the same table
9
+ def unscope
10
+ Scoped.send(:new, @table)
11
+ end
12
+
13
+ # Number of rows in the scope
14
+ # @return [Fixnum, Bignum] The number of rows in the scope
15
+ def count
16
+ cnt = 0
17
+ htable.getScanner(filtered_scan_minimum).each do
18
+ cnt += 1
19
+ end
20
+ cnt
21
+ end
22
+
23
+ # @overload get(rowkey)
24
+ # Single GET.
25
+ # Gets a record with the given rowkey. If the record is not found, nil is returned.
26
+ # @param [Object] rowkey Rowkey
27
+ # @return [HBase::Result, nil]
28
+ # @overload get(rowkeys)
29
+ # Batch GET. Gets an array of records with the given rowkeys.
30
+ # Nonexistent records will be returned as nils.
31
+ # @param [Array<Object>] *rowkeys Rowkeys
32
+ # @return [Array<HBase::Result>]
33
+ def get rowkeys
34
+ case rowkeys
35
+ when Array
36
+ htable.get(rowkeys.map { |rk| getify rk }).map { |result|
37
+ result.isEmpty ? nil : Result.new(result)
38
+ }
39
+ else
40
+ result = htable.get(getify rowkeys)
41
+ result.isEmpty ? nil : Result.new(result)
42
+ end
43
+ end
44
+
45
+ # Iterate through the scope.
46
+ # @yield [HBase::Result] Yields each row in the scope
47
+ def each
48
+ if block_given?
49
+ begin
50
+ scanner = htable.getScanner(filtered_scan)
51
+ scanner.each do |result|
52
+ yield Result.send(:new, result)
53
+ end
54
+ ensure
55
+ scanner.close if scanner
56
+ end
57
+ else
58
+ self
59
+ end
60
+ end
61
+
62
+ # Sets the number of rows for caching that will be passed to scanners.
63
+ # @param [Fixnum] rows The number of rows to cache
64
+ # @return [HBase::Scoped] HBase::Scoped object with the caching option
65
+ def caching rows
66
+ raise ArgumentError, "Invalid caching size. Must be a non-negative integer." unless rows.is_a?(Fixnum) && rows >= 0
67
+ spawn :@caching, rows
68
+ end
69
+
70
+ # @overload range(start_key, opts = {})
71
+ # Returns an HBase::Scoped object with the specified rowkey range
72
+ # Overrides current range.
73
+ # @param [Object] start_key Start rowkey
74
+ # @param [Hash] opts Prefix filter
75
+ # @option opts [Object, Array<Object>] :prefix Only rows matching any of the given prefixes are returned
76
+ # @return [HBase::Scoped] HBase::Scoped object with the range
77
+ # @overload range(start_key, stop_key, opts = {})
78
+ # Returns an HBase::Scoped object with the specified rowkey range
79
+ # Overrides current range.
80
+ # @param [Object, nil] start_key Start rowkey. Can be nil.
81
+ # @param [Object] stop_key Stop rowkey (exclusive)
82
+ # @param [Hash] opts Prefix filter
83
+ # @option opts [Object, Array<Object>] :prefix Only rows matching any of the given prefixes are returned
84
+ # @return [HBase::Scoped] HBase::Scoped object with the range
85
+ # @overload range(start_stop_range, opts = {})
86
+ # Returns an HBase::Scoped object with the specified rowkey range
87
+ # Overrides current range.
88
+ # @param [Range] start_stop_range Rowkey scan range
89
+ # @param [Hash] opts Prefix filter
90
+ # @option opts [Object, Array<Object>] :prefix Only rows matching any of the given prefixes are returned
91
+ # @return [HBase::Scoped] HBase::Scoped object with the range
92
+ # @overload range(opts)
93
+ # Returns an HBase::Scoped object with the specified rowkey range
94
+ # Overrides current range.
95
+ # @param [Hash] opts Prefix filter
96
+ # @option opts [Object, Array<Object>] :prefix Only rows matching any of the given prefixes are returned
97
+ # @return [HBase::Scoped] HBase::Scoped object with the range
98
+ # @example
99
+ # table.range(:prefix => '2012')
100
+ # table.range(:prefix => ['2010', '2012'])
101
+ def range *key_range
102
+ if key_range.last.is_a?(Hash)
103
+ prefixes = [*key_range.last[:prefix]]
104
+ raise ArgumentError,
105
+ "Invalid range. Unknown option(s) specified." unless (key_range.last.keys - [:prefix]).empty?
106
+ key_range = key_range[0...-1]
107
+ end
108
+
109
+ if prefixes
110
+ raise ArgumentError, "Invalid range" unless [0, 1, 2].include?(key_range.length)
111
+ else
112
+ raise ArgumentError, "Invalid range" unless [1, 2].include?(key_range.length)
113
+ end
114
+
115
+ spawn :@range,
116
+ key_range[0].is_a?(Range) ?
117
+ key_range[0] :
118
+ (key_range.empty? ? nil : key_range),
119
+ :@prefixes,
120
+ prefixes || []
121
+ end
122
+
123
+ # Returns an HBase::Scoped object with the filters added
124
+ # @param [Array<Hash, FilterBase, FilterList>] filters
125
+ # @return [HBase::Scoped] HBase::Scoped object also with the specified filters
126
+ def filter *filters
127
+ spawn :@filters, @filters + filters.map { |f|
128
+ case f
129
+ when Hash
130
+ f.map { |col, val|
131
+ cf, cq = Util.parse_column_name col
132
+
133
+ case val
134
+ when Array
135
+ FilterList.new(FilterList::Operator::MUST_PASS_ONE,
136
+ val.map { |v| filter_for cf, cq, v })
137
+ else
138
+ filter_for cf, cq, val
139
+ end
140
+ }.flatten
141
+ when FilterBase, FilterList
142
+ f
143
+ else
144
+ raise ArgumentError, "Unknown filter type"
145
+ end
146
+ }.flatten
147
+ end
148
+
149
+ # Returns an HBase::Scoped object with the specified row number limit
150
+ # @param [Fixnum] rows Sets the maximum number of rows to return from scan
151
+ # @return [HBase::Scoped] HBase::Scoped object with the specified row number limit
152
+ def limit rows
153
+ raise ArgumentError, "Invalid limit. Must be a non-negative integer." unless rows.is_a?(Fixnum) && rows >= 0
154
+ spawn :@limit, rows
155
+ end
156
+
157
+ # Returns an HBase::Scoped object with the specified projection
158
+ # @param [Array<String>] columns Array of column expressions
159
+ # @return [HBase::Scoped] HBase::Scoped object with the specified projection
160
+ def project *columns
161
+ if columns.first.is_a?(Hash)
162
+ hash = columns.first
163
+ unless (hash.keys - [:prefix, :range, :limit, :offset]).empty?
164
+ raise ArgumentError, "Invalid projection"
165
+ end
166
+
167
+ if l = hash[:limit]
168
+ unless l.is_a?(Fixnum) && l >= 0
169
+ raise ArgumentError, ":limit must be a non-negative integer"
170
+ end
171
+ end
172
+
173
+ if o = hash[:offset]
174
+ unless o.is_a?(Fixnum) && o >= 0
175
+ raise ArgumentError, ":offset must be a non-negative integer"
176
+ end
177
+ end
178
+ end
179
+ spawn :@project, @project + columns
180
+ end
181
+
182
+ # Returns an HBase::Scoped object with the specified version number limit.
183
+ # If not set, all versions of each value are fetched by default.
184
+ # @param [Fixnum] vs Sets the maximum number of versions
185
+ # @return [HBase::Scoped] HBase::Scoped object with the version number limit
186
+ def versions vs
187
+ raise ArgumentError, "Invalid versions. Must be a positive integer." unless vs.is_a?(Fixnum) && vs > 0
188
+ spawn :@versions, vs
189
+ end
190
+
191
+ # Returns an HBase::Scoped object with the specified batch limit
192
+ # @param [Fixnum] b Sets the maximum number of values to fetch each time
193
+ # @return [HBase::Scoped] HBase::Scoped object with the specified batch limit
194
+ def batch b
195
+ raise ArgumentError, "Invalid batch size. Must be a positive integer." unless b.is_a?(Fixnum) && b > 0
196
+ spawn :@batch, b
197
+ end
198
+
199
+ private
200
+ # @param [HBase::Table] table
201
+ def initialize table
202
+ @table = table
203
+ @filters = []
204
+ @project = []
205
+ @prefixes = []
206
+ @range = nil
207
+ @versions = nil
208
+ @batch = nil
209
+ @caching = nil
210
+ @limit = nil
211
+ end
212
+
213
+ def spawn *args
214
+ self.dup.tap do |obj|
215
+ args.each_slice(2) do |slice|
216
+ attr, val = slice
217
+ obj.instance_variable_set attr, val
218
+ end
219
+ end
220
+ end
221
+
222
+ def htable
223
+ @table.htable
224
+ end
225
+
226
+ def process_projection! obj
227
+ limit = offset = nil
228
+ ranges = prefixes = []
229
+ filters = []
230
+
231
+ @project.each do |col|
232
+ case col
233
+ when Hash
234
+ col.each do |prop, val|
235
+ case prop
236
+ when :prefix
237
+ prefixes += [*val]
238
+ when :range
239
+ ranges += val.is_a?(Array) ? val : [val]
240
+ when :limit
241
+ limit = val
242
+ when :offset
243
+ offset = val
244
+ else
245
+ # Shouldn't happen
246
+ raise ArgumentError, "Invalid projection: #{prop}"
247
+ end
248
+ end
249
+ else
250
+ cf, cq = Util.parse_column_name col
251
+ if cq
252
+ obj.addColumn cf, cq
253
+ else
254
+ obj.addFamily cf
255
+ end
256
+ end
257
+ end
258
+
259
+ if (limit && !offset) || (!limit && offset)
260
+ raise ArgumentError, "Both `limit` and `offset` must be specified"
261
+ end
262
+
263
+ # Column prefix filter
264
+ unless prefixes.empty?
265
+ # disjunctive
266
+ filters <<
267
+ MultipleColumnPrefixFilter.new(
268
+ prefixes.map { |pref| Util.to_bytes(pref).to_a }.to_java(Java::byte[]))
269
+ end
270
+
271
+ # Column range filter
272
+ unless ranges.empty?
273
+ # disjunctive
274
+ filters <<
275
+ FilterList.new(FilterList::Operator::MUST_PASS_ONE,
276
+ ranges.map { |range|
277
+ raise ArgumentError, "Invalid range type" unless range.is_a? Range
278
+
279
+ ColumnRangeFilter.new(
280
+ Util.to_bytes(range.begin), true,
281
+ Util.to_bytes(range.end), !range.exclude_end?) })
282
+ end
283
+
284
+ # Column pagniation filter (last)
285
+ if limit && offset
286
+ filters << ColumnPaginationFilter.new(limit, offset)
287
+ end
288
+
289
+ filters
290
+ end
291
+
292
+ def getify rowkey
293
+ Get.new(Util.to_bytes rowkey).tap { |get|
294
+ if @versions
295
+ get.setMaxVersions @versions
296
+ else
297
+ get.setMaxVersions
298
+ end
299
+
300
+ filters = []
301
+ filters += process_projection!(get)
302
+
303
+ range = @range || range_for_prefix
304
+ case range
305
+ when Range
306
+ filters <<
307
+ RowFilter.new(
308
+ CompareFilter::CompareOp::GREATER_OR_EQUAL,
309
+ BinaryComparator.new(Util.to_bytes range.begin))
310
+
311
+ filters <<
312
+ RowFilter.new(
313
+ (range.exclude_end? ?
314
+ CompareFilter::CompareOp::LESS :
315
+ CompareFilter::CompareOp::LESS_OR_EQUAL),
316
+ BinaryComparator.new(Util.to_bytes range.end))
317
+ when Array
318
+ filters <<
319
+ RowFilter.new(
320
+ CompareFilter::CompareOp::GREATER_OR_EQUAL,
321
+ BinaryComparator.new(Util.to_bytes range[0])) if range[0]
322
+
323
+ filters <<
324
+ RowFilter.new(
325
+ CompareFilter::CompareOp::LESS,
326
+ BinaryComparator.new(Util.to_bytes range[1])) if range[1]
327
+ else
328
+ raise ArgumentError, "Invalid range"
329
+ end if range
330
+
331
+ # Prefix filters
332
+ filters += [*build_prefix_filter]
333
+
334
+ # RowFilter must precede the others
335
+ filters += @filters
336
+
337
+ get.setFilter FilterList.new(filters) unless filters.empty?
338
+ }
339
+ end
340
+
341
+ def filter_for cf, cq, val
342
+ case val
343
+ when Range
344
+ min, max = [val.begin, val.end].map { |k| Util.to_bytes k }
345
+ FilterList.new(FilterList::Operator::MUST_PASS_ALL, [
346
+ SingleColumnValueFilter.new(
347
+ cf, cq,
348
+ CompareFilter::CompareOp::GREATER_OR_EQUAL, min),
349
+ SingleColumnValueFilter.new(
350
+ cf, cq,
351
+ (val.exclude_end? ? CompareFilter::CompareOp::LESS :
352
+ CompareFilter::CompareOp::LESS_OR_EQUAL), max)
353
+ ])
354
+ when Hash
355
+ FilterList.new(FilterList::Operator::MUST_PASS_ALL,
356
+ val.map { |op, v|
357
+ operator =
358
+ case op
359
+ when :gt, :>
360
+ CompareFilter::CompareOp::GREATER
361
+ when :gte, :>=
362
+ CompareFilter::CompareOp::GREATER_OR_EQUAL
363
+ when :lt, :<
364
+ CompareFilter::CompareOp::LESS
365
+ when :lte, :<=
366
+ CompareFilter::CompareOp::LESS_OR_EQUAL
367
+ when :eq, :==
368
+ CompareFilter::CompareOp::EQUAL
369
+ when :ne, :!=
370
+ CompareFilter::CompareOp::NOT_EQUAL
371
+ else
372
+ raise ArgumentError, "Unknown operator: #{op}"
373
+ end
374
+ case v
375
+ when Array
376
+ # XXX TODO Undocumented feature
377
+ FilterList.new(
378
+ case op
379
+ when :ne, :!=
380
+ FilterList::Operator::MUST_PASS_ALL
381
+ else
382
+ FilterList::Operator::MUST_PASS_ONE
383
+ end,
384
+ v.map { |vv|
385
+ SingleColumnValueFilter.new(cf, cq, operator, Util.to_bytes(vv))
386
+ }
387
+ )
388
+ when Hash
389
+ raise ArgumentError, "Hash predicate not supported"
390
+ else
391
+ SingleColumnValueFilter.new(cf, cq, operator, Util.to_bytes(v))
392
+ end
393
+ }
394
+ )
395
+ else
396
+ SingleColumnValueFilter.new(
397
+ cf, cq,
398
+ CompareFilter::CompareOp::EQUAL,
399
+ Util.to_bytes(val))
400
+ end
401
+ end
402
+
403
+ def filtered_scan
404
+ Scan.new.tap { |scan|
405
+ range = @range || range_for_prefix
406
+ case range
407
+ when Range
408
+ scan.setStartRow Util.to_bytes range.begin
409
+
410
+ if range.exclude_end?
411
+ scan.setStopRow Util.to_bytes range.end
412
+ else
413
+ scan.setStopRow Util.append_0(Util.to_bytes range.end)
414
+ end
415
+ when Array
416
+ scan.setStartRow Util.to_bytes range[0] if range[0]
417
+ scan.setStopRow Util.to_bytes range[1] if range[1]
418
+ else
419
+ # This shouldn't happen though.
420
+ raise ArgumentError, "Invalid range"
421
+ end if range
422
+
423
+ scan.caching = @caching if @caching
424
+
425
+ # Filters
426
+ prefix_filter = [*build_prefix_filter]
427
+ filters = prefix_filter + @filters
428
+ filters += process_projection!(scan)
429
+
430
+ scan.setFilter FilterList.new(filters) unless filters.empty?
431
+
432
+ if @limit
433
+ # setMaxResultSize not implemented in 0.92
434
+ if scan.respond_to?(:setMaxResultSize)
435
+ scan.setMaxResultSize(@limit)
436
+ else
437
+ raise NotImplementedError, 'Scan.setMaxResultSize not implemented'
438
+ end
439
+ end
440
+
441
+ if @versions
442
+ scan.setMaxVersions @versions
443
+ else
444
+ scan.setMaxVersions
445
+ end
446
+
447
+ # Batch
448
+ scan.setBatch @batch if @batch
449
+ }
450
+ end
451
+
452
+ def filtered_scan_minimum
453
+ filtered_scan.tap do |scan|
454
+ scan.cache_blocks = false
455
+
456
+ if flist = scan.getFilter
457
+ flist.addFilter KeyOnlyFilter.new
458
+ else
459
+ scan.setFilter FilterList.new(KeyOnlyFilter.new)
460
+ end
461
+ end
462
+ end
463
+
464
+ def build_prefix_filter
465
+ return nil if @prefixes.empty?
466
+
467
+ filters = @prefixes.map { |prefix|
468
+ PrefixFilter.new(Util.to_bytes prefix)
469
+ }
470
+
471
+ if filters.length == 1
472
+ filters.first
473
+ else
474
+ FilterList.new FilterList::Operator::MUST_PASS_ONE, filters
475
+ end
476
+ end
477
+
478
+ def range_for_prefix
479
+ return nil if @prefixes.empty?
480
+
481
+ byte_arrays = @prefixes.map { |pref| ByteArray.new(pref) }.sort
482
+ start = byte_arrays.first
483
+ stop = byte_arrays.last
484
+
485
+ [start.java, stop.stopkey_bytes_for_prefix]
486
+ end
487
+ end#Scoped
488
+ end#HBase
489
+