hbase-jruby 0.1.3-java → 0.1.4-java

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG.md CHANGED
@@ -1,6 +1,15 @@
1
1
  Changelog
2
2
  =========
3
3
 
4
+ 0.1.4
5
+ -----
6
+ - Fix: Start/stop row not correctly set when byte array rowkey range specified
7
+ - More efficient count with FirstKeyOnlyFilter
8
+ - Added `HBase::ByteArray` method as a shorthand notation for `HBase::ByteArray.new`
9
+ - Added `HBase::ByteArray#+` method for concatenating two byte arrays
10
+ - Added `HBase::Util::java_bytes?` method
11
+ - Documentation
12
+
4
13
  0.1.3
5
14
  -----
6
15
  - Supports Ruby 1.8 compatibility mode
data/README.md CHANGED
@@ -109,7 +109,9 @@ hbase = HBase.new 'hbase.zookeeper.quorum' => 'remote-server.mydomain.net'
109
109
 
110
110
  # Extra configuration
111
111
  hbase = HBase.new 'hbase.zookeeper.quorum' => 'remote-server.mydomain.net',
112
- 'hbase.client.retries.number' => 3
112
+ 'hbase.client.retries.number' => 3,
113
+ 'hbase.client.scanner.caching' => 1000,
114
+ 'hbase.rpc.timeout' => 120000
113
115
 
114
116
  # Close HBase connection
115
117
  hbase.close
@@ -310,7 +312,7 @@ table.truncate!
310
312
  # Atomically increase cf1:counter by one
311
313
  table.increment('rowkey1', 'cf1:counter', 1)
312
314
 
313
- # Atomically increase two columns by one an two respectively
315
+ # Atomically increase two columns by one and two respectively
314
316
  table.increment('rowkey1', 'cf1:counter' => 1, 'cf1:counter2' => 2)
315
317
  ```
316
318
 
@@ -504,7 +506,7 @@ scoped.project(:prefix => 'alice').
504
506
  scoped.project(:range => 'a'...'c').
505
507
  project(:range => ['i'...'k', 'x'...'z'])
506
508
 
507
- # Column pagination filter (Cannot be chained. Must be called exactly once.):
509
+ # Column pagination filter:
508
510
  # Fetch columns within the specified intra-scan offset and limit
509
511
  scoped.project(:offset => 1000, :limit => 10)
510
512
  ```
@@ -556,6 +558,9 @@ end
556
558
  # instead of just iterating through the scope, as it internally
557
559
  # minimizes amount of data fetched with KeyOnlyFilter
558
560
  scoped.count
561
+
562
+ # This should be even faster as it dramatically reduces the number of RPC calls
563
+ scoped.caching(1000).count
559
564
  ```
560
565
 
561
566
  ## Basic aggregation using coprocessor
@@ -674,7 +679,7 @@ table.remove_coprocessor! cp_class_name1
674
679
  ```
675
680
 
676
681
  You can perform other types of administrative tasks
677
- with Native Java [HBaseAdmin object](http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HBaseAdmin.html),
682
+ with native Java [HBaseAdmin object](http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/HBaseAdmin.html),
678
683
  which can be obtained by `HBase#admin` method. Optionally, a block can be given
679
684
  so that the HBaseAdmin object is automatically closed at the end of the given block.
680
685
 
@@ -1,21 +1,49 @@
1
1
  class HBase
2
- # @private
2
+ class << self
3
+ # Shortcut method to HBase::ByteArray.new
4
+ # @param [Object] value
5
+ def ByteArray value
6
+ ByteArray.new value
7
+ end
8
+ end
9
+ # Boxed class for Java byte arrays
10
+ # @!attribute [r] java
11
+ # @return [byte[]] Java byte array
3
12
  class ByteArray
4
13
  attr_reader :java
5
14
 
15
+ # @param [Object] value
6
16
  def initialize value
7
17
  @java = Util.to_bytes value
8
18
  end
9
19
 
20
+ # Checks if the two byte arrays are the same
21
+ # @param [HBase::ByteArray] other
10
22
  def eql? other
11
23
  Arrays.equals(@java, other.java)
12
24
  end
13
25
  alias == eql?
14
26
 
27
+ # Compares two ByteArray objects
28
+ # @param [HBase::ByteArray] other
15
29
  def <=> other
16
30
  Bytes.compareTo(@java, other.java)
17
31
  end
18
32
 
33
+ # Concats two byte arrays
34
+ # @param [HBase::ByteArray] other
35
+ def + other
36
+ ByteArray.new(Bytes.add @java, other.java)
37
+ end
38
+
39
+ # Returns the Java byte array
40
+ # @return [byte[]]
41
+ def to_java_bytes
42
+ @java
43
+ end
44
+
45
+ # Returns the first byte array whose prefix doesn't match this byte array
46
+ # @return [byte[]]
19
47
  def stopkey_bytes_for_prefix
20
48
  arr = @java.to_a
21
49
  csr = arr.length - 1
@@ -31,6 +59,8 @@ class ByteArray
31
59
  end
32
60
  end
33
61
 
62
+ # Returns a hash number for the byte array
63
+ # @return [Fixnum]
34
64
  def hash
35
65
  Arrays.java_send(:hashCode, [Util::JAVA_BYTE_ARRAY_CLASS], @java)
36
66
  end
@@ -105,6 +105,7 @@ class Cell
105
105
  KeyValue.COMPARATOR.compare(@java, other.java)
106
106
  end
107
107
 
108
+ # Returns a printable version of this cell
108
109
  # @return [String]
109
110
  def inspect
110
111
  %[#{cf}:#{cq} = "#{string}"@#{ts}]
@@ -8,6 +8,8 @@ class << self
8
8
  end
9
9
  end
10
10
  # Boxed class for column keys
11
+ # @!attribute [r] cf
12
+ # @return [String] The column family
11
13
  class ColumnKey
12
14
  attr_reader :cf
13
15
  alias family cf
@@ -20,12 +22,14 @@ class ColumnKey
20
22
  @cq = Util.to_bytes(cq)
21
23
  end
22
24
 
25
+ # Returns the column qualifer decoded as the given type
23
26
  # @param [Symbol] type
24
27
  def cq type = :string
25
28
  Util.from_bytes type, @cq
26
29
  end
27
30
  alias qualifier cq
28
31
 
32
+ # Checks whether if the two ColumnKeys are equal
29
33
  # @param [Object] other
30
34
  def eql? other
31
35
  other = other_as_ck(other)
@@ -33,6 +37,7 @@ class ColumnKey
33
37
  end
34
38
  alias == eql?
35
39
 
40
+ # Compares two ColumnKeys
36
41
  # @param [Object] other
37
42
  def <=> other
38
43
  other = other_as_ck(other)
@@ -40,10 +45,14 @@ class ColumnKey
40
45
  d != 0 ? d : Bytes.compareTo(@cq, other.cq(:raw))
41
46
  end
42
47
 
48
+ # Returns a hash number for this ColumnKey
49
+ # @return [Fixnum]
43
50
  def hash
44
51
  [@cf, Arrays.java_send(:hashCode, [Util::JAVA_BYTE_ARRAY_CLASS], @cq)].hash
45
52
  end
46
53
 
54
+ # Returns String representation of the column key (Qualifier decoded as a String)
55
+ # @return [String]
47
56
  def to_s
48
57
  [@cf, @cq.empty? ? nil : cq].compact.join(':')
49
58
  end
@@ -66,6 +66,7 @@ class Result
66
66
  }
67
67
  end
68
68
 
69
+ # Returns column values as byte arrays
69
70
  # @overload raw(column)
70
71
  # Returns the latest column value as a byte array
71
72
  # @param [String, HBase::ColumnKey] column "FAMILY:QUALIFIER" expression or ColumnKey object.
@@ -86,6 +87,7 @@ class Result
86
87
  end
87
88
  end
88
89
 
90
+ # Returns all versions of column values as byte arrays in a Hash indexed by their timestamps
89
91
  # @overload raws(column)
90
92
  # Returns all versions of column values as byte arrays in a Hash indexed by their timestamps
91
93
  # @param [String, HBase::ColumnKey] column "FAMILY:QUALIFIER" expression or ColumnKey object.
@@ -106,6 +108,7 @@ class Result
106
108
  end
107
109
  end
108
110
 
111
+ # Returns column values as Strings
109
112
  # @overload string(column)
110
113
  # Returns the latest column value as a String
111
114
  # @param [String, HBase::ColumnKey] column "FAMILY:QUALIFIER" expression or ColumnKey object.
@@ -120,6 +123,7 @@ class Result
120
123
  end
121
124
  alias str string
122
125
 
126
+ # Returns all versions of column values as Strings in a Hash indexed by their timestamps
123
127
  # @overload strings(column)
124
128
  # Returns all versions of column values as Strings in a Hash indexed by their timestamps
125
129
  # @param [String, HBase::ColumnKey] column "FAMILY:QUALIFIER" expression or ColumnKey object.
@@ -134,6 +138,7 @@ class Result
134
138
  end
135
139
  alias strs strings
136
140
 
141
+ # Returns column values as Symbols
137
142
  # @overload symbol(column)
138
143
  # Returns the latest column value as a Symbol
139
144
  # @param [String, HBase::ColumnKey] column "FAMILY:QUALIFIER" expression or ColumnKey object.
@@ -148,6 +153,7 @@ class Result
148
153
  end
149
154
  alias sym symbol
150
155
 
156
+ # Returns all versions of column values as Symbols in a Hash indexed by their timestamps
151
157
  # @overload symbols(column)
152
158
  # Returns all versions of column values as Symbols in a Hash indexed by their timestamps
153
159
  # @param [String, HBase::ColumnKey] column "FAMILY:QUALIFIER" expression or ColumnKey object.
@@ -162,6 +168,7 @@ class Result
162
168
  end
163
169
  alias syms symbols
164
170
 
171
+ # Returns column values as Fixnums
165
172
  # @overload fixnum(column)
166
173
  # Returns the latest column value as a Fixnum
167
174
  # @param [String, HBase::ColumnKey] column "FAMILY:QUALIFIER" expression or ColumnKey object.
@@ -177,6 +184,7 @@ class Result
177
184
  alias integer fixnum
178
185
  alias int fixnum
179
186
 
187
+ # Returns all versions of column values as Fixnums in a Hash indexed by their timestamps
180
188
  # @overload fixnums(column)
181
189
  # Returns all versions of column values as Fixnums in a Hash indexed by their timestamps
182
190
  # @param [String, HBase::ColumnKey] column "FAMILY:QUALIFIER" expression or ColumnKey object.
@@ -192,6 +200,7 @@ class Result
192
200
  alias integers fixnums
193
201
  alias ints fixnums
194
202
 
203
+ # Returns column values as Bigdecimals
195
204
  # @overload bigdecimal(column)
196
205
  # Returns the latest column value as a BigDecimal
197
206
  # @param [String, HBase::ColumnKey] column "FAMILY:QUALIFIER" expression or ColumnKey object.
@@ -205,6 +214,7 @@ class Result
205
214
  decode_values :bigdecimal, cols
206
215
  end
207
216
 
217
+ # Returns all versions of column values as BigDecimals in a Hash indexed by their timestamps
208
218
  # @overload bigdecimals(column)
209
219
  # Returns all versions of column values as BigDecimals in a Hash indexed by their timestamps
210
220
  # @param [String, HBase::ColumnKey] column "FAMILY:QUALIFIER" expression or ColumnKey object.
@@ -218,6 +228,7 @@ class Result
218
228
  decode_values :bigdecimal, cols, true
219
229
  end
220
230
 
231
+ # Returns column values as Floats
221
232
  # @overload float(column)
222
233
  # Returns the latest column value as a Float
223
234
  # @param [String, HBase::ColumnKey] column "FAMILY:QUALIFIER" expression or ColumnKey object.
@@ -232,6 +243,7 @@ class Result
232
243
  end
233
244
  alias double float
234
245
 
246
+ # Returns all versions of column values as Floats in a Hash indexed by their timestamps
235
247
  # @overload floats(column)
236
248
  # Returns all versions of column values as Floats in a Hash indexed by their timestamps
237
249
  # @param [String, HBase::ColumnKey] column "FAMILY:QUALIFIER" expression or ColumnKey object.
@@ -246,6 +258,7 @@ class Result
246
258
  end
247
259
  alias doubles floats
248
260
 
261
+ # Returns column values as Booleans
249
262
  # @overload boolean(column)
250
263
  # Returns the latest column value as a boolean value
251
264
  # @param [String, HBase::ColumnKey] column "FAMILY:QUALIFIER" expression or ColumnKey object.
@@ -260,6 +273,7 @@ class Result
260
273
  end
261
274
  alias bool boolean
262
275
 
276
+ # Returns all versions of column values as Booleans in a Hash indexed by their timestamps
263
277
  # @overload booleans(column)
264
278
  # Returns all versions of column values as boolean values in a Hash indexed by their timestamps
265
279
  # @param [String, HBase::ColumnKey] column "FAMILY:QUALIFIER" expression or ColumnKey object.
@@ -274,6 +288,7 @@ class Result
274
288
  end
275
289
  alias bools booleans
276
290
 
291
+ # Compares two Result instances on their row keys
277
292
  def <=> other
278
293
  Bytes.compareTo(rowkey(:raw), other.rowkey(:raw))
279
294
  end
@@ -35,6 +35,7 @@ class Scoped
35
35
  cnt
36
36
  end
37
37
 
38
+ # Performs GET operations
38
39
  # @overload get(rowkey)
39
40
  # Single GET.
40
41
  # Gets a record with the given rowkey. If the record is not found, nil is returned.
@@ -115,7 +116,7 @@ class Scoped
115
116
  # table.range(:prefix => ['2010', '2012'])
116
117
  def range *key_range
117
118
  if key_range.last.is_a?(Hash)
118
- prefixes = [*key_range.last[:prefix]].compact
119
+ prefixes = arrayfy(key_range.last[:prefix]).compact
119
120
  raise ArgumentError,
120
121
  "Invalid range. Unknown option(s) specified." unless (key_range.last.keys - [:prefix]).empty?
121
122
  key_range = key_range[0...-1]
@@ -237,9 +238,9 @@ private
237
238
  col.each do |prop, val|
238
239
  case prop
239
240
  when :prefix
240
- prefixes += [*val]
241
+ prefixes += arrayfy(val)
241
242
  when :range
242
- ranges += val.is_a?(Array) ? val : [val]
243
+ ranges += arrayfy(val)
243
244
  when :limit
244
245
  limit = val
245
246
  when :offset
@@ -462,10 +463,15 @@ private
462
463
  filtered_scan.tap do |scan|
463
464
  scan.cache_blocks = false
464
465
 
466
+ # A filter that will only return the first KV from each row
467
+ # A filter that will only return the key component of each KV
468
+ filters = [FirstKeyOnlyFilter.new, KeyOnlyFilter.new]
465
469
  if flist = scan.getFilter
466
- flist.addFilter KeyOnlyFilter.new
470
+ filters.each do |filter|
471
+ flist.addFilter filter
472
+ end
467
473
  else
468
- scan.setFilter FilterList.new(KeyOnlyFilter.new)
474
+ scan.setFilter FilterList.new(filters)
469
475
  end
470
476
  end
471
477
  end
@@ -516,6 +522,17 @@ private
516
522
  end
517
523
  }.flatten
518
524
  end
525
+
526
+ def arrayfy val
527
+ # No range splat
528
+ if Util.java_bytes?(val)
529
+ [val]
530
+ elsif val.is_a?(Array)
531
+ val
532
+ else
533
+ [val]
534
+ end
535
+ end
519
536
  end#Scoped
520
537
  end#HBase
521
538
 
@@ -47,6 +47,7 @@ class Table
47
47
  !enabled?
48
48
  end
49
49
 
50
+ # Creates the table
50
51
  # @overload create!(column_family_name, props = {})
51
52
  # Create the table with one column family of the given name
52
53
  # @param [#to_s] The name of the column family
@@ -257,6 +258,7 @@ class Table
257
258
  end
258
259
  end
259
260
 
261
+ # Performs PUT operations
260
262
  # @overload put(rowkey, data)
261
263
  # Put operation on a rowkey
262
264
  # @param [Object] rowkey Rowkey
@@ -274,6 +276,7 @@ class Table
274
276
  puts.length
275
277
  end
276
278
 
279
+ # Deletes data
277
280
  # @overload delete(rowkey)
278
281
  # Deletes a row with the given rowkey
279
282
  # @param [Object] rowkey
@@ -334,6 +337,7 @@ class Table
334
337
  }
335
338
  end
336
339
 
340
+ # Atomically increase numeric values
337
341
  # @overload increment(rowkey, column, by)
338
342
  # Atomically increase column value by the specified amount
339
343
  # @param [Object] rowkey Rowkey
@@ -383,7 +387,7 @@ class Table
383
387
  (local_htables[@name] = @pool.get_table(@name))
384
388
  end
385
389
 
386
- # Returns table description
390
+ # Returns a printable version of the table description
387
391
  # @return [String] Table description
388
392
  def inspect
389
393
  if exists?
@@ -6,12 +6,16 @@ module Util
6
6
  JAVA_BYTE_ARRAY_CLASS = JAVA_BYTE_ARRAY_EMPTY.java_class
7
7
 
8
8
  class << self
9
+ def java_bytes? v
10
+ v.respond_to?(:java_class) && v.java_class == JAVA_BYTE_ARRAY_CLASS
11
+ end
12
+
9
13
  # Returns byte array representation of the Ruby object
10
14
  # @param [byte[]] v
11
15
  # @return [byte[]]
12
16
  def to_bytes v
13
17
  case v
14
- when String
18
+ when String, ByteArray
15
19
  v.to_java_bytes
16
20
  when Fixnum
17
21
  Bytes.java_send :toBytes, [Java::long], v
@@ -30,7 +34,7 @@ module Util
30
34
  when java.math.BigDecimal
31
35
  Bytes.java_send :toBytes, [java.math.BigDecimal], v
32
36
  else
33
- if v.respond_to?(:java_class) && v.java_class == JAVA_BYTE_ARRAY_CLASS
37
+ if java_bytes?(v)
34
38
  v
35
39
  else
36
40
  raise ArgumentError.new("Don't know how to convert #{v.class} into Java bytes")
@@ -94,6 +98,7 @@ module Util
94
98
  end
95
99
  end
96
100
 
101
+ # Import Java classes (Prerequisite for classes in hbase-jruby)
97
102
  # @return [nil]
98
103
  def import_java_classes!
99
104
  imp = lambda { |base, classes|
@@ -161,6 +166,7 @@ module Util
161
166
  org.apache.hadoop.hbase.filter.FilterBase
162
167
  org.apache.hadoop.hbase.filter.FilterList
163
168
  org.apache.hadoop.hbase.filter.KeyOnlyFilter
169
+ org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter
164
170
  org.apache.hadoop.hbase.filter.MultipleColumnPrefixFilter
165
171
  org.apache.hadoop.hbase.filter.PrefixFilter
166
172
  org.apache.hadoop.hbase.filter.RegexStringComparator
@@ -1,5 +1,5 @@
1
1
  class HBase
2
2
  module JRuby
3
- VERSION = "0.1.3"
3
+ VERSION = "0.1.4"
4
4
  end
5
5
  end
@@ -37,4 +37,13 @@ class TestByteArray < Test::Unit::TestCase
37
37
  assert_equal 1, hash[ HBase::ByteArray.new("Hello") ]
38
38
  assert_equal 2, hash[ HBase::ByteArray.new("World".to_java_bytes) ]
39
39
  end
40
+
41
+ def test_concat
42
+ concat = HBase::ByteArray(100) + HBase::ByteArray(200)
43
+ assert_instance_of HBase::ByteArray, concat
44
+ assert_equal 16, concat.to_java_bytes.to_a.length
45
+
46
+ assert_equal 100, HBase::Util.from_bytes( :fixnum, concat.to_java_bytes.to_a[0, 8].to_java(Java::byte) )
47
+ assert_equal 200, HBase::Util.from_bytes( :fixnum, concat.java.to_a[8, 8].to_java(Java::byte) )
48
+ end
40
49
  end
data/test/test_scoped.rb CHANGED
@@ -358,5 +358,25 @@ class TestScoped < TestHBaseJRubyBase
358
358
  assert_equal 28, @table.filter('cf1:a' => ['aa', 'cc', /^g/]).count
359
359
  assert_equal 54, @table.filter('cf1:a' => ['aa', 'cc', /^g/, { :gte => 'xa', :lt => 'y'}]).count
360
360
  end
361
+
362
+ def test_java_bytes_prefix
363
+ (1..100).each do |i|
364
+ (1..100).each do |j|
365
+ @table.put((HBase::ByteArray(i) + HBase::ByteArray(j)).to_java_bytes, 'cf1:a' => i * j)
366
+ end
367
+ end
368
+
369
+ assert_equal 100, @table.range(:prefix => HBase::ByteArray(50)).count
370
+ assert_equal 100, @table.range(:prefix => HBase::ByteArray(50).to_java_bytes).count
371
+ assert_equal 200, @table.range(HBase::ByteArray(50), HBase::ByteArray(52)).count
372
+ assert_equal 1, @table.range(:prefix => (HBase::ByteArray(50) + HBase::ByteArray(50))).count
373
+
374
+ assert_equal 2, @table.range(:prefix => [
375
+ (HBase::ByteArray(50) + HBase::ByteArray(50)).java,
376
+ (HBase::ByteArray(50) + HBase::ByteArray(51)).java ]).count
377
+
378
+ # Fails on 0.1.3
379
+ assert_equal 1, @table.range(:prefix => (HBase::ByteArray(50) + HBase::ByteArray(50)).java).count
380
+ end
361
381
  end
362
382
 
data/test/test_util.rb CHANGED
@@ -14,12 +14,14 @@ class TestUtil < Test::Unit::TestCase
14
14
 
15
15
  [:fixnum, :int, :integer].each do |type|
16
16
  assert_equal 100, Util.from_bytes( type, Util.to_bytes(100) )
17
+ assert_equal 100, Util.from_bytes( type, Util.to_bytes(HBase::ByteArray(100)) )
17
18
  end
18
19
  [:float, :double].each do |type|
19
20
  assert_equal 3.14, Util.from_bytes( type, Util.to_bytes(3.14) )
20
21
  end
21
22
  [:string, :str].each do |type|
22
23
  assert_equal "Hello", Util.from_bytes( type, Util.to_bytes("Hello") )
24
+ assert_equal "Hello", Util.from_bytes( type, Util.to_bytes(HBase::ByteArray("Hello")) )
23
25
  end
24
26
  [:bool, :boolean].each do |type|
25
27
  assert_equal true, Util.from_bytes( type, Util.to_bytes(true) )
@@ -46,8 +48,8 @@ class TestUtil < Test::Unit::TestCase
46
48
  end
47
49
 
48
50
  def test_parse_column_name
49
- assert_equal ['abc', 'def'], parse_to_str('abc:def')
50
- assert_equal ['abc', 'def:'], parse_to_str('abc:def:')
51
+ assert_equal ['abc', 'def'], parse_to_str('abc:def')
52
+ assert_equal ['abc', 'def:'], parse_to_str('abc:def:')
51
53
  assert_equal ['abc', ''], parse_to_str('abc:')
52
54
  assert_equal ['abc', nil], parse_to_str('abc')
53
55
  assert_equal ['abc', ':::'], parse_to_str('abc::::')
@@ -72,6 +74,16 @@ class TestUtil < Test::Unit::TestCase
72
74
  assert_equal [97, 97, 97, 0], Util.append_0("aaa".to_java_bytes).to_a
73
75
  end
74
76
 
77
+ def test_java_bytes
78
+ ["Hello", 1234, :symbol].each do |v|
79
+ assert_false Util.java_bytes?(v)
80
+ end
81
+
82
+ ["Hello".to_java_bytes, Util.to_bytes(1234), Util.to_bytes(:symbol)].each do |v|
83
+ assert Util.java_bytes?(v)
84
+ end
85
+ end
86
+
75
87
  private
76
88
  def parse_to_str v, type = :string
77
89
  Util.parse_column_name(v).map { |e| Util.from_bytes type, e }
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: hbase-jruby
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.1.3
5
+ version: 0.1.4
6
6
  platform: java
7
7
  authors:
8
8
  - Junegunn Choi