hbase-jruby 0.1.3-java → 0.1.4-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG.md CHANGED
@@ -1,6 +1,15 @@
1
1
  Changelog
2
2
  =========
3
3
 
4
+ 0.1.4
5
+ -----
6
+ - Fix: Start/stop row not correctly set when byte array rowkey range specified
7
+ - More efficient count with FirstKeyOnlyFilter
8
+ - Added `HBase::ByteArray` method as a shorthand notation for `HBase::ByteArray.new`
9
+ - Added `HBase::ByteArray#+` method for concatenating two byte arrays
10
+ - Added `HBase::Util::java_bytes?` method
11
+ - Documentation
12
+
4
13
  0.1.3
5
14
  -----
6
15
  - Supports Ruby 1.8 compatibility mode
data/README.md CHANGED
@@ -109,7 +109,9 @@ hbase = HBase.new 'hbase.zookeeper.quorum' => 'remote-server.mydomain.net'
109
109
 
110
110
  # Extra configuration
111
111
  hbase = HBase.new 'hbase.zookeeper.quorum' => 'remote-server.mydomain.net',
112
- 'hbase.client.retries.number' => 3
112
+ 'hbase.client.retries.number' => 3,
113
+ 'hbase.client.scanner.caching' => 1000,
114
+ 'hbase.rpc.timeout' => 120000
113
115
 
114
116
  # Close HBase connection
115
117
  hbase.close
@@ -310,7 +312,7 @@ table.truncate!
310
312
  # Atomically increase cf1:counter by one
311
313
  table.increment('rowkey1', 'cf1:counter', 1)
312
314
 
313
- # Atomically increase two columns by one an two respectively
315
+ # Atomically increase two columns by one and two respectively
314
316
  table.increment('rowkey1', 'cf1:counter' => 1, 'cf1:counter2' => 2)
315
317
  ```
316
318
 
@@ -504,7 +506,7 @@ scoped.project(:prefix => 'alice').
504
506
  scoped.project(:range => 'a'...'c').
505
507
  project(:range => ['i'...'k', 'x'...'z'])
506
508
 
507
- # Column pagination filter (Cannot be chained. Must be called exactly once.):
509
+ # Column pagination filter:
508
510
  # Fetch columns within the specified intra-scan offset and limit
509
511
  scoped.project(:offset => 1000, :limit => 10)
510
512
  ```
@@ -556,6 +558,9 @@ end
556
558
  # instead of just iterating through the scope, as it internally
557
559
  # minimizes amount of data fetched with KeyOnlyFilter
558
560
  scoped.count
561
+
562
+ # This should be even faster as it dramatically reduces the number of RPC calls
563
+ scoped.caching(1000).count
559
564
  ```
560
565
 
561
566
  ## Basic aggregation using coprocessor
@@ -674,7 +679,7 @@ table.remove_coprocessor! cp_class_name1
674
679
  ```
675
680
 
676
681
  You can perform other types of administrative tasks
677
- with Native Java [HBaseAdmin object](http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HBaseAdmin.html),
682
+ with native Java [HBaseAdmin object](http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HBaseAdmin.html),
678
683
  which can be obtained by `HBase#admin` method. Optionally, a block can be given
679
684
  so that the HBaseAdmin object is automatically closed at the end of the given block.
680
685
 
@@ -1,21 +1,49 @@
1
1
  class HBase
2
- # @private
2
+ class << self
3
+ # Shortcut method to HBase::ByteArray.new
4
+ # @param [Object] value
5
+ def ByteArray value
6
+ ByteArray.new value
7
+ end
8
+ end
9
+ # Boxed class for Java byte arrays
10
+ # @!attribute [r] java
11
+ # @return [byte[]] Java byte array
3
12
  class ByteArray
4
13
  attr_reader :java
5
14
 
15
+ # @param [Object] value
6
16
  def initialize value
7
17
  @java = Util.to_bytes value
8
18
  end
9
19
 
20
+ # Checks if the two byte arrays are the same
21
+ # @param [HBase::ByteArray] other
10
22
  def eql? other
11
23
  Arrays.equals(@java, other.java)
12
24
  end
13
25
  alias == eql?
14
26
 
27
+ # Compares two ByteArray objects
28
+ # @param [HBase::ByteArray] other
15
29
  def <=> other
16
30
  Bytes.compareTo(@java, other.java)
17
31
  end
18
32
 
33
+ # Concats two byte arrays
34
+ # @param [HBase::ByteArray] other
35
+ def + other
36
+ ByteArray.new(Bytes.add @java, other.java)
37
+ end
38
+
39
+ # Returns the Java byte array
40
+ # @return [byte[]]
41
+ def to_java_bytes
42
+ @java
43
+ end
44
+
45
+ # Returns the first byte array whose prefix doesn't match this byte array
46
+ # @return [byte[]]
19
47
  def stopkey_bytes_for_prefix
20
48
  arr = @java.to_a
21
49
  csr = arr.length - 1
@@ -31,6 +59,8 @@ class ByteArray
31
59
  end
32
60
  end
33
61
 
62
+ # Returns a hash number for the byte array
63
+ # @return [Fixnum]
34
64
  def hash
35
65
  Arrays.java_send(:hashCode, [Util::JAVA_BYTE_ARRAY_CLASS], @java)
36
66
  end
@@ -105,6 +105,7 @@ class Cell
105
105
  KeyValue.COMPARATOR.compare(@java, other.java)
106
106
  end
107
107
 
108
+ # Returns a printable version of this cell
108
109
  # @return [String]
109
110
  def inspect
110
111
  %[#{cf}:#{cq} = "#{string}"@#{ts}]
@@ -8,6 +8,8 @@ class << self
8
8
  end
9
9
  end
10
10
  # Boxed class for column keys
11
+ # @!attribute [r] cf
12
+ # @return [String] The column family
11
13
  class ColumnKey
12
14
  attr_reader :cf
13
15
  alias family cf
@@ -20,12 +22,14 @@ class ColumnKey
20
22
  @cq = Util.to_bytes(cq)
21
23
  end
22
24
 
25
+ # Returns the column qualifer decoded as the given type
23
26
  # @param [Symbol] type
24
27
  def cq type = :string
25
28
  Util.from_bytes type, @cq
26
29
  end
27
30
  alias qualifier cq
28
31
 
32
+ # Checks whether if the two ColumnKeys are equal
29
33
  # @param [Object] other
30
34
  def eql? other
31
35
  other = other_as_ck(other)
@@ -33,6 +37,7 @@ class ColumnKey
33
37
  end
34
38
  alias == eql?
35
39
 
40
+ # Compares two ColumnKeys
36
41
  # @param [Object] other
37
42
  def <=> other
38
43
  other = other_as_ck(other)
@@ -40,10 +45,14 @@ class ColumnKey
40
45
  d != 0 ? d : Bytes.compareTo(@cq, other.cq(:raw))
41
46
  end
42
47
 
48
+ # Returns a hash number for this ColumnKey
49
+ # @return [Fixnum]
43
50
  def hash
44
51
  [@cf, Arrays.java_send(:hashCode, [Util::JAVA_BYTE_ARRAY_CLASS], @cq)].hash
45
52
  end
46
53
 
54
+ # Returns String representation of the column key (Qualifier decoded as a String)
55
+ # @return [String]
47
56
  def to_s
48
57
  [@cf, @cq.empty? ? nil : cq].compact.join(':')
49
58
  end
@@ -66,6 +66,7 @@ class Result
66
66
  }
67
67
  end
68
68
 
69
+ # Returns column values as byte arrays
69
70
  # @overload raw(column)
70
71
  # Returns the latest column value as a byte array
71
72
  # @param [String, HBase::ColumnKey] column "FAMILY:QUALIFIER" expression or ColumnKey object.
@@ -86,6 +87,7 @@ class Result
86
87
  end
87
88
  end
88
89
 
90
+ # Returns all versions of column values as byte arrays in a Hash indexed by their timestamps
89
91
  # @overload raws(column)
90
92
  # Returns all versions of column values as byte arrays in a Hash indexed by their timestamps
91
93
  # @param [String, HBase::ColumnKey] column "FAMILY:QUALIFIER" expression or ColumnKey object.
@@ -106,6 +108,7 @@ class Result
106
108
  end
107
109
  end
108
110
 
111
+ # Returns column values as Strings
109
112
  # @overload string(column)
110
113
  # Returns the latest column value as a String
111
114
  # @param [String, HBase::ColumnKey] column "FAMILY:QUALIFIER" expression or ColumnKey object.
@@ -120,6 +123,7 @@ class Result
120
123
  end
121
124
  alias str string
122
125
 
126
+ # Returns all versions of column values as Strings in a Hash indexed by their timestamps
123
127
  # @overload strings(column)
124
128
  # Returns all versions of column values as Strings in a Hash indexed by their timestamps
125
129
  # @param [String, HBase::ColumnKey] column "FAMILY:QUALIFIER" expression or ColumnKey object.
@@ -134,6 +138,7 @@ class Result
134
138
  end
135
139
  alias strs strings
136
140
 
141
+ # Returns column values as Symbols
137
142
  # @overload symbol(column)
138
143
  # Returns the latest column value as a Symbol
139
144
  # @param [String, HBase::ColumnKey] column "FAMILY:QUALIFIER" expression or ColumnKey object.
@@ -148,6 +153,7 @@ class Result
148
153
  end
149
154
  alias sym symbol
150
155
 
156
+ # Returns all versions of column values as Symbols in a Hash indexed by their timestamps
151
157
  # @overload symbols(column)
152
158
  # Returns all versions of column values as Symbols in a Hash indexed by their timestamps
153
159
  # @param [String, HBase::ColumnKey] column "FAMILY:QUALIFIER" expression or ColumnKey object.
@@ -162,6 +168,7 @@ class Result
162
168
  end
163
169
  alias syms symbols
164
170
 
171
+ # Returns column values as Fixnums
165
172
  # @overload fixnum(column)
166
173
  # Returns the latest column value as a Fixnum
167
174
  # @param [String, HBase::ColumnKey] column "FAMILY:QUALIFIER" expression or ColumnKey object.
@@ -177,6 +184,7 @@ class Result
177
184
  alias integer fixnum
178
185
  alias int fixnum
179
186
 
187
+ # Returns all versions of column values as Fixnums in a Hash indexed by their timestamps
180
188
  # @overload fixnums(column)
181
189
  # Returns all versions of column values as Fixnums in a Hash indexed by their timestamps
182
190
  # @param [String, HBase::ColumnKey] column "FAMILY:QUALIFIER" expression or ColumnKey object.
@@ -192,6 +200,7 @@ class Result
192
200
  alias integers fixnums
193
201
  alias ints fixnums
194
202
 
203
+ # Returns column values as Bigdecimals
195
204
  # @overload bigdecimal(column)
196
205
  # Returns the latest column value as a BigDecimal
197
206
  # @param [String, HBase::ColumnKey] column "FAMILY:QUALIFIER" expression or ColumnKey object.
@@ -205,6 +214,7 @@ class Result
205
214
  decode_values :bigdecimal, cols
206
215
  end
207
216
 
217
+ # Returns all versions of column values as BigDecimals in a Hash indexed by their timestamps
208
218
  # @overload bigdecimals(column)
209
219
  # Returns all versions of column values as BigDecimals in a Hash indexed by their timestamps
210
220
  # @param [String, HBase::ColumnKey] column "FAMILY:QUALIFIER" expression or ColumnKey object.
@@ -218,6 +228,7 @@ class Result
218
228
  decode_values :bigdecimal, cols, true
219
229
  end
220
230
 
231
+ # Returns column values as Floats
221
232
  # @overload float(column)
222
233
  # Returns the latest column value as a Float
223
234
  # @param [String, HBase::ColumnKey] column "FAMILY:QUALIFIER" expression or ColumnKey object.
@@ -232,6 +243,7 @@ class Result
232
243
  end
233
244
  alias double float
234
245
 
246
+ # Returns all versions of column values as Floats in a Hash indexed by their timestamps
235
247
  # @overload floats(column)
236
248
  # Returns all versions of column values as Floats in a Hash indexed by their timestamps
237
249
  # @param [String, HBase::ColumnKey] column "FAMILY:QUALIFIER" expression or ColumnKey object.
@@ -246,6 +258,7 @@ class Result
246
258
  end
247
259
  alias doubles floats
248
260
 
261
+ # Returns column values as Booleans
249
262
  # @overload boolean(column)
250
263
  # Returns the latest column value as a boolean value
251
264
  # @param [String, HBase::ColumnKey] column "FAMILY:QUALIFIER" expression or ColumnKey object.
@@ -260,6 +273,7 @@ class Result
260
273
  end
261
274
  alias bool boolean
262
275
 
276
+ # Returns all versions of column values as Booleans in a Hash indexed by their timestamps
263
277
  # @overload booleans(column)
264
278
  # Returns all versions of column values as boolean values in a Hash indexed by their timestamps
265
279
  # @param [String, HBase::ColumnKey] column "FAMILY:QUALIFIER" expression or ColumnKey object.
@@ -274,6 +288,7 @@ class Result
274
288
  end
275
289
  alias bools booleans
276
290
 
291
+ # Compares two Result instances on their row keys
277
292
  def <=> other
278
293
  Bytes.compareTo(rowkey(:raw), other.rowkey(:raw))
279
294
  end
@@ -35,6 +35,7 @@ class Scoped
35
35
  cnt
36
36
  end
37
37
 
38
+ # Performs GET operations
38
39
  # @overload get(rowkey)
39
40
  # Single GET.
40
41
  # Gets a record with the given rowkey. If the record is not found, nil is returned.
@@ -115,7 +116,7 @@ class Scoped
115
116
  # table.range(:prefix => ['2010', '2012'])
116
117
  def range *key_range
117
118
  if key_range.last.is_a?(Hash)
118
- prefixes = [*key_range.last[:prefix]].compact
119
+ prefixes = arrayfy(key_range.last[:prefix]).compact
119
120
  raise ArgumentError,
120
121
  "Invalid range. Unknown option(s) specified." unless (key_range.last.keys - [:prefix]).empty?
121
122
  key_range = key_range[0...-1]
@@ -237,9 +238,9 @@ private
237
238
  col.each do |prop, val|
238
239
  case prop
239
240
  when :prefix
240
- prefixes += [*val]
241
+ prefixes += arrayfy(val)
241
242
  when :range
242
- ranges += val.is_a?(Array) ? val : [val]
243
+ ranges += arrayfy(val)
243
244
  when :limit
244
245
  limit = val
245
246
  when :offset
@@ -462,10 +463,15 @@ private
462
463
  filtered_scan.tap do |scan|
463
464
  scan.cache_blocks = false
464
465
 
466
+ # A filter that will only return the first KV from each row
467
+ # A filter that will only return the key component of each KV
468
+ filters = [FirstKeyOnlyFilter.new, KeyOnlyFilter.new]
465
469
  if flist = scan.getFilter
466
- flist.addFilter KeyOnlyFilter.new
470
+ filters.each do |filter|
471
+ flist.addFilter filter
472
+ end
467
473
  else
468
- scan.setFilter FilterList.new(KeyOnlyFilter.new)
474
+ scan.setFilter FilterList.new(filters)
469
475
  end
470
476
  end
471
477
  end
@@ -516,6 +522,17 @@ private
516
522
  end
517
523
  }.flatten
518
524
  end
525
+
526
+ def arrayfy val
527
+ # No range splat
528
+ if Util.java_bytes?(val)
529
+ [val]
530
+ elsif val.is_a?(Array)
531
+ val
532
+ else
533
+ [val]
534
+ end
535
+ end
519
536
  end#Scoped
520
537
  end#HBase
521
538
 
@@ -47,6 +47,7 @@ class Table
47
47
  !enabled?
48
48
  end
49
49
 
50
+ # Creates the table
50
51
  # @overload create!(column_family_name, props = {})
51
52
  # Create the table with one column family of the given name
52
53
  # @param [#to_s] The name of the column family
@@ -257,6 +258,7 @@ class Table
257
258
  end
258
259
  end
259
260
 
261
+ # Performs PUT operations
260
262
  # @overload put(rowkey, data)
261
263
  # Put operation on a rowkey
262
264
  # @param [Object] rowkey Rowkey
@@ -274,6 +276,7 @@ class Table
274
276
  puts.length
275
277
  end
276
278
 
279
+ # Deletes data
277
280
  # @overload delete(rowkey)
278
281
  # Deletes a row with the given rowkey
279
282
  # @param [Object] rowkey
@@ -334,6 +337,7 @@ class Table
334
337
  }
335
338
  end
336
339
 
340
+ # Atomically increase numeric values
337
341
  # @overload increment(rowkey, column, by)
338
342
  # Atomically increase column value by the specified amount
339
343
  # @param [Object] rowkey Rowkey
@@ -383,7 +387,7 @@ class Table
383
387
  (local_htables[@name] = @pool.get_table(@name))
384
388
  end
385
389
 
386
- # Returns table description
390
+ # Returns a printable version of the table description
387
391
  # @return [String] Table description
388
392
  def inspect
389
393
  if exists?
@@ -6,12 +6,16 @@ module Util
6
6
  JAVA_BYTE_ARRAY_CLASS = JAVA_BYTE_ARRAY_EMPTY.java_class
7
7
 
8
8
  class << self
9
+ def java_bytes? v
10
+ v.respond_to?(:java_class) && v.java_class == JAVA_BYTE_ARRAY_CLASS
11
+ end
12
+
9
13
  # Returns byte array representation of the Ruby object
10
14
  # @param [byte[]] v
11
15
  # @return [byte[]]
12
16
  def to_bytes v
13
17
  case v
14
- when String
18
+ when String, ByteArray
15
19
  v.to_java_bytes
16
20
  when Fixnum
17
21
  Bytes.java_send :toBytes, [Java::long], v
@@ -30,7 +34,7 @@ module Util
30
34
  when java.math.BigDecimal
31
35
  Bytes.java_send :toBytes, [java.math.BigDecimal], v
32
36
  else
33
- if v.respond_to?(:java_class) && v.java_class == JAVA_BYTE_ARRAY_CLASS
37
+ if java_bytes?(v)
34
38
  v
35
39
  else
36
40
  raise ArgumentError.new("Don't know how to convert #{v.class} into Java bytes")
@@ -94,6 +98,7 @@ module Util
94
98
  end
95
99
  end
96
100
 
101
+ # Import Java classes (Prerequisite for classes in hbase-jruby)
97
102
  # @return [nil]
98
103
  def import_java_classes!
99
104
  imp = lambda { |base, classes|
@@ -161,6 +166,7 @@ module Util
161
166
  org.apache.hadoop.hbase.filter.FilterBase
162
167
  org.apache.hadoop.hbase.filter.FilterList
163
168
  org.apache.hadoop.hbase.filter.KeyOnlyFilter
169
+ org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter
164
170
  org.apache.hadoop.hbase.filter.MultipleColumnPrefixFilter
165
171
  org.apache.hadoop.hbase.filter.PrefixFilter
166
172
  org.apache.hadoop.hbase.filter.RegexStringComparator
@@ -1,5 +1,5 @@
1
1
  class HBase
2
2
  module JRuby
3
- VERSION = "0.1.3"
3
+ VERSION = "0.1.4"
4
4
  end
5
5
  end
@@ -37,4 +37,13 @@ class TestByteArray < Test::Unit::TestCase
37
37
  assert_equal 1, hash[ HBase::ByteArray.new("Hello") ]
38
38
  assert_equal 2, hash[ HBase::ByteArray.new("World".to_java_bytes) ]
39
39
  end
40
+
41
+ def test_concat
42
+ concat = HBase::ByteArray(100) + HBase::ByteArray(200)
43
+ assert_instance_of HBase::ByteArray, concat
44
+ assert_equal 16, concat.to_java_bytes.to_a.length
45
+
46
+ assert_equal 100, HBase::Util.from_bytes( :fixnum, concat.to_java_bytes.to_a[0, 8].to_java(Java::byte) )
47
+ assert_equal 200, HBase::Util.from_bytes( :fixnum, concat.java.to_a[8, 8].to_java(Java::byte) )
48
+ end
40
49
  end
data/test/test_scoped.rb CHANGED
@@ -358,5 +358,25 @@ class TestScoped < TestHBaseJRubyBase
358
358
  assert_equal 28, @table.filter('cf1:a' => ['aa', 'cc', /^g/]).count
359
359
  assert_equal 54, @table.filter('cf1:a' => ['aa', 'cc', /^g/, { :gte => 'xa', :lt => 'y'}]).count
360
360
  end
361
+
362
+ def test_java_bytes_prefix
363
+ (1..100).each do |i|
364
+ (1..100).each do |j|
365
+ @table.put((HBase::ByteArray(i) + HBase::ByteArray(j)).to_java_bytes, 'cf1:a' => i * j)
366
+ end
367
+ end
368
+
369
+ assert_equal 100, @table.range(:prefix => HBase::ByteArray(50)).count
370
+ assert_equal 100, @table.range(:prefix => HBase::ByteArray(50).to_java_bytes).count
371
+ assert_equal 200, @table.range(HBase::ByteArray(50), HBase::ByteArray(52)).count
372
+ assert_equal 1, @table.range(:prefix => (HBase::ByteArray(50) + HBase::ByteArray(50))).count
373
+
374
+ assert_equal 2, @table.range(:prefix => [
375
+ (HBase::ByteArray(50) + HBase::ByteArray(50)).java,
376
+ (HBase::ByteArray(50) + HBase::ByteArray(51)).java ]).count
377
+
378
+ # Fails on 0.1.3
379
+ assert_equal 1, @table.range(:prefix => (HBase::ByteArray(50) + HBase::ByteArray(50)).java).count
380
+ end
361
381
  end
362
382
 
data/test/test_util.rb CHANGED
@@ -14,12 +14,14 @@ class TestUtil < Test::Unit::TestCase
14
14
 
15
15
  [:fixnum, :int, :integer].each do |type|
16
16
  assert_equal 100, Util.from_bytes( type, Util.to_bytes(100) )
17
+ assert_equal 100, Util.from_bytes( type, Util.to_bytes(HBase::ByteArray(100)) )
17
18
  end
18
19
  [:float, :double].each do |type|
19
20
  assert_equal 3.14, Util.from_bytes( type, Util.to_bytes(3.14) )
20
21
  end
21
22
  [:string, :str].each do |type|
22
23
  assert_equal "Hello", Util.from_bytes( type, Util.to_bytes("Hello") )
24
+ assert_equal "Hello", Util.from_bytes( type, Util.to_bytes(HBase::ByteArray("Hello")) )
23
25
  end
24
26
  [:bool, :boolean].each do |type|
25
27
  assert_equal true, Util.from_bytes( type, Util.to_bytes(true) )
@@ -46,8 +48,8 @@ class TestUtil < Test::Unit::TestCase
46
48
  end
47
49
 
48
50
  def test_parse_column_name
49
- assert_equal ['abc', 'def'], parse_to_str('abc:def')
50
- assert_equal ['abc', 'def:'], parse_to_str('abc:def:')
51
+ assert_equal ['abc', 'def'], parse_to_str('abc:def')
52
+ assert_equal ['abc', 'def:'], parse_to_str('abc:def:')
51
53
  assert_equal ['abc', ''], parse_to_str('abc:')
52
54
  assert_equal ['abc', nil], parse_to_str('abc')
53
55
  assert_equal ['abc', ':::'], parse_to_str('abc::::')
@@ -72,6 +74,16 @@ class TestUtil < Test::Unit::TestCase
72
74
  assert_equal [97, 97, 97, 0], Util.append_0("aaa".to_java_bytes).to_a
73
75
  end
74
76
 
77
+ def test_java_bytes
78
+ ["Hello", 1234, :symbol].each do |v|
79
+ assert_false Util.java_bytes?(v)
80
+ end
81
+
82
+ ["Hello".to_java_bytes, Util.to_bytes(1234), Util.to_bytes(:symbol)].each do |v|
83
+ assert Util.java_bytes?(v)
84
+ end
85
+ end
86
+
75
87
  private
76
88
  def parse_to_str v, type = :string
77
89
  Util.parse_column_name(v).map { |e| Util.from_bytes type, e }
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: hbase-jruby
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.1.3
5
+ version: 0.1.4
6
6
  platform: java
7
7
  authors:
8
8
  - Junegunn Choi