hbase-jruby 0.3.4-java → 0.3.5-java

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG.md CHANGED
@@ -1,6 +1,15 @@
1
1
  Changelog
2
2
  =========
3
3
 
4
+ 0.3.5
5
+ -----
6
+ - Improved `Scoped#count` method
7
+ - KeyOnlyFilter turned out to be compatible with SingleColumnValueFilter
8
+ - Now takes an optional Hash: `scoped.count(cache_blocks: false, caching: 100)`
9
+ - Changed not to disable server-side block caching by default
10
+ - Supports `Scoped#limit` even when `Scan.setMaxResultSize` is not implemented
11
+ - `Scoped#limit(nil)` will remove the previously set value
12
+
4
13
  0.3.4
5
14
  -----
6
15
  - Cleanup all thread-local (fiber-local) HTable references when connection is closed
data/README.md CHANGED
@@ -710,11 +710,17 @@ end
710
710
  # Scoped COUNT
711
711
  # When counting the number of rows, use `HTable::Scoped#count`
712
712
  # instead of just iterating through the scope, as it internally
713
- # minimizes amount of data fetched with KeyOnlyFilter
713
+ # minimizes the amount of data transfer using KeyOnlyFilter
714
+ # (and FirstKeyOnlyFilter when no filter is set)
714
715
  scoped.count
715
716
 
716
717
  # This should be even faster as it dramatically reduces the number of RPC calls
717
- scoped.caching(5000).count
718
+ scoped.caching(1000).count
719
+
720
+ # count method takes an options Hash:
721
+ # - :caching (default: nil)
722
+ # - :cache_blocks (default: true)
723
+ scoped.count(caching: 5000, cache_blocks: false)
718
724
  ```
719
725
 
720
726
  ## Basic aggregation using coprocessor
@@ -16,22 +16,29 @@ class Scoped
16
16
  Scoped.send(:new, @table)
17
17
  end
18
18
 
19
- # Number of rows in the scope
19
+ # Count the number of rows in the scope
20
20
  # @return [Fixnum, Bignum] The number of rows in the scope
21
- def count
21
+ # @param [Hash] options Counting options
22
+ # @option options [Fixnum|nil] :caching The number of rows for caching that will be passed to scanners.
23
+ # Use higher values for faster scan.
24
+ # @option options [Boolean] :cache_blocks Whether blocks should be cached for this scan
25
+ def count options = {}
26
+ options = { :caching => nil,
27
+ :cache_blocks => true }.merge(options)
28
+
29
+ scan = block_given? ? filtered_scan : filtered_scan_minimum
30
+ scan.cache_blocks = options[:cache_blocks]
31
+ if options[:caching] && (@mlimit.nil? || options[:caching] < @mlimit)
32
+ scan.caching = options[:caching]
33
+ end
34
+
22
35
  cnt = 0
23
- begin
24
- if block_given?
25
- scanner = htable.getScanner(filtered_scan)
26
- scanner.each do |result|
27
- cnt += 1 if yield(Row.send(:new, @table, result))
28
- end
29
- else
30
- scanner = htable.getScanner(filtered_scan_minimum)
31
- scanner.each { cnt += 1 }
36
+ if block_given?
37
+ iterate(scan) do |result|
38
+ cnt += 1 if yield(Row.send(:new, @table, result))
32
39
  end
33
- ensure
34
- scanner.close if scanner
40
+ else
41
+ iterate(scan) { |r| cnt += 1 }
35
42
  end
36
43
  cnt
37
44
  end
@@ -65,13 +72,8 @@ class Scoped
65
72
  def each
66
73
  return enum_for(:each) unless block_given?
67
74
 
68
- begin
69
- scanner = htable.getScanner(filtered_scan)
70
- scanner.each do |result|
71
- yield Row.send(:new, @table, result)
72
- end
73
- ensure
74
- scanner.close if scanner
75
+ iterate(filtered_scan) do |result|
76
+ yield Row.send(:new, @table, result)
75
77
  end
76
78
  end
77
79
 
@@ -158,11 +160,13 @@ class Scoped
158
160
  end
159
161
 
160
162
  # Returns an HBase::Scoped object with the specified row number limit
161
- # @param [Fixnum] rows Sets the maximum number of rows to return from scan
163
+ # @param [Fixnum|nil] rows Sets the maximum number of rows to return from scan
162
164
  # @return [HBase::Scoped] HBase::Scoped object with the specified row number limit
163
165
  def limit rows
164
- raise ArgumentError, "Invalid limit. Must be a non-negative integer." unless rows.is_a?(Fixnum) && rows >= 0
165
- spawn :@limit, rows
166
+ unless (rows.is_a?(Fixnum) && rows >= 0) || rows.nil?
167
+ raise ArgumentError, "Invalid limit. Must be a non-negative integer or nil."
168
+ end
169
+ spawn :@limit, rows, :@mlimit, nil
166
170
  end
167
171
 
168
172
  # Returns an HBase::Scoped object with the specified time range
@@ -259,6 +263,7 @@ private
259
263
  @batch = nil
260
264
  @caching = nil
261
265
  @limit = nil
266
+ @mlimit = nil
262
267
  @trange = nil
263
268
  @scan_cbs = []
264
269
  @get_cbs = []
@@ -523,11 +528,12 @@ private
523
528
 
524
529
  # Limit
525
530
  if @limit
526
- # setMaxResultSize not implemented in 0.92
531
+ # setMaxResultSize not yet implemented in 0.94
527
532
  if scan.respond_to?(:setMaxResultSize)
528
533
  scan.setMaxResultSize(@limit)
529
534
  else
530
- raise NotImplementedError, 'Scan.setMaxResultSize not implemented'
535
+ @mlimit = @limit
536
+ scan.caching = [@mlimit, @caching].compact.min
531
537
  end
532
538
  end
533
539
 
@@ -560,15 +566,23 @@ private
560
566
  # @private
561
567
  def filtered_scan_minimum
562
568
  filtered_scan.tap do |scan|
563
- scan.cache_blocks = false
564
569
  scan.setMaxVersions 1
565
570
 
566
- # A filter that will only return the first KV from each row
567
- # A filter that will only return the key component of each KV
568
- unless scan.getFilter
569
- filters = [FirstKeyOnlyFilter.new, KeyOnlyFilter.new]
570
- scan.setFilter FilterList.new(filters)
571
+ # FirstKeyOnlyFilter: A filter that will only return the first KV from each row-
572
+ # - Not compatible with SingleColumnValueFilter
573
+ # KeyOnlyFilter: A filter that will only return the key component of each KV
574
+ # - Compatible with SingleColumnValueFilter
575
+ ko = KeyOnlyFilter.new
576
+ if flist = scan.getFilter
577
+ if flist.is_a?(FilterList)
578
+ flist.addFilter ko
579
+ else
580
+ flist = FilterList.new([flist, ko])
581
+ end
582
+ else
583
+ flist = FilterList.new([ko, FirstKeyOnlyFilter.new])
571
584
  end
585
+ scan.setFilter flist
572
586
  end
573
587
  end
574
588
 
@@ -633,6 +647,22 @@ private
633
647
  def check_closed
634
648
  raise RuntimeError, "HBase connection is already closed" if @table.closed?
635
649
  end
650
+
651
+ def iterate scan
652
+ scanner = htable.getScanner(scan)
653
+ if @mlimit
654
+ scanner.each_with_index do |result, idx|
655
+ yield result
656
+ break if idx == @mlimit - 1
657
+ end
658
+ else
659
+ scanner.each do |result|
660
+ yield result
661
+ end
662
+ end
663
+ ensure
664
+ scanner.close if scanner
665
+ end
636
666
  end#Scoped
637
667
  end#HBase
638
668
 
@@ -1,5 +1,5 @@
1
1
  class HBase
2
2
  module JRuby
3
- VERSION = '0.3.4'
3
+ VERSION = '0.3.5'
4
4
  end
5
5
  end
data/test/test_scoped.rb CHANGED
@@ -81,14 +81,34 @@ class TestScoped < TestHBaseJRubyBase
81
81
  assert_equal 10, @table.range(111..120).count
82
82
 
83
83
  # Start key ~ Stop key (inclusive) + limit
84
- begin
85
- assert_equal 5, @table.range(111..120).limit(5).count
86
- rescue NotImplementedError
87
- end
84
+ assert_equal 5, @table.range(111..120).limit(5).count
85
+ assert_equal 5, @table.range(111..120).limit(5).to_a.count
86
+ assert_equal 10, @table.range(111..120).limit(5).limit(nil).to_a.count
87
+ scoped = @table.range(111..120).limit(5)
88
+ assert_equal 5, scoped.count
89
+
90
+ # Scan.setCaching should be called when setMaxResultSize is not implemented
91
+ done = false
92
+ scoped.with_java_scan { |scan|
93
+ assert scan.respond_to?(:setMaxResultSize) || scan.caching == 5
94
+ done = true
95
+ }.count
96
+ assert done
97
+
98
+ assert_equal 10, scoped.limit(nil).count
99
+
100
+ # Filters
101
+ assert_equal 1, @table.filter('cf1:a' => 135).count
102
+ assert_equal 1, @table.filter('cf2:b' => 135).count
103
+ assert_equal 10, @table.filter('cf1:a' => 131..140).count
104
+ assert_equal 10, @table.filter('cf2:b' => 131..140).count
105
+ assert_equal 10, @table.project(:prefix => 'a').filter('cf2:b' => 131..140).count
88
106
 
89
107
  # Start key ~ Stop key (inclusive) + filters
90
108
  assert_equal 10, @table.range(111..150).filter('cf1:a' => 131..140).count
109
+ assert_equal 10, @table.range(111..150).filter('cf2:b' => 131..140).count
91
110
  assert_equal 9, @table.range(111..150).filter('cf1:a' => 131...140).count
111
+ assert_equal 9, @table.range(111..150).filter('cf2:b' => 131...140).count
92
112
  assert_equal 2, @table.range(111..150).filter('cf1:a' => 131...140, 'cf2:b' => 132..133).count
93
113
 
94
114
  # Count with block
@@ -522,5 +542,22 @@ class TestScoped < TestHBaseJRubyBase
522
542
  get.setTimeRange(t2.to_i * 1000, t4.to_i * 1000)
523
543
  }.get(rks).compact.count
524
544
  end
545
+
546
+ def test_count_options
547
+ # TODO how to confirm?
548
+
549
+ (101..150).each do |i|
550
+ @table.put(i, 'cf1:a' => i, 'cf2:b' => i, 'cf3:c' => i * 3)
551
+ end
552
+
553
+ assert_equal 50, @table.count(:cache_blocks => false)
554
+ assert_equal 50, @table.count(:cache_blocks => true)
555
+
556
+ assert_equal 50, @table.count(:caching => nil)
557
+ assert_equal 50, @table.count(:caching => 5)
558
+ assert_equal 50, @table.count(:caching => 500)
559
+ assert_equal 15, @table.limit(15).count(:caching => 500)
560
+ assert_equal 15, @table.limit(15).count(:caching => 500, :cache_blocks => false)
561
+ end
525
562
  end
526
563
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hbase-jruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.4
4
+ version: 0.3.5
5
5
  prerelease:
6
6
  platform: java
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-07-05 00:00:00.000000000 Z
12
+ date: 2013-07-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: test-unit