hbase-jruby 0.3.4-java → 0.3.5-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG.md CHANGED
@@ -1,6 +1,15 @@
1
1
  Changelog
2
2
  =========
3
3
 
4
+ 0.3.5
5
+ -----
6
+ - Improved `Scoped#count` method
7
+ - KeyOnlyFilter turned out to be compatible with SingleColumnValueFilter
8
+ - Now takes an optional Hash: `scoped.count(cache_blocks: false, caching: 100)`
9
+ - Changed not to disable server-side block caching by default
10
+ - Supports `Scoped#limit` even when `Scan.setMaxResultSize` is not implemented
11
+ - `Scoped#limit(nil)` will remove the previously set value
12
+
4
13
  0.3.4
5
14
  -----
6
15
  - Cleanup all thread-local (fiber-local) HTable references when connection is closed
data/README.md CHANGED
@@ -710,11 +710,17 @@ end
710
710
  # Scoped COUNT
711
711
  # When counting the number of rows, use `HTable::Scoped#count`
712
712
  # instead of just iterating through the scope, as it internally
713
- # minimizes amount of data fetched with KeyOnlyFilter
713
+ # minimizes the amount of data transfer using KeyOnlyFilter
714
+ # (and FirstKeyOnlyFilter when no filter is set)
714
715
  scoped.count
715
716
 
716
717
  # This should be even faster as it dramatically reduces the number of RPC calls
717
- scoped.caching(5000).count
718
+ scoped.caching(1000).count
719
+
720
+ # count method takes an options Hash:
721
+ # - :caching (default: nil)
722
+ # - :cache_blocks (default: true)
723
+ scoped.count(caching: 5000, cache_blocks: false)
718
724
  ```
719
725
 
720
726
  ## Basic aggregation using coprocessor
@@ -16,22 +16,29 @@ class Scoped
16
16
  Scoped.send(:new, @table)
17
17
  end
18
18
 
19
- # Number of rows in the scope
19
+ # Count the number of rows in the scope
20
20
  # @return [Fixnum, Bignum] The number of rows in the scope
21
- def count
21
+ # @param [Hash] options Counting options
22
+ # @option options [Fixnum|nil] :caching The number of rows for caching that will be passed to scanners.
23
+ # Use higher values for faster scan.
24
+ # @option options [Boolean] :cache_blocks Whether blocks should be cached for this scan
25
+ def count options = {}
26
+ options = { :caching => nil,
27
+ :cache_blocks => true }.merge(options)
28
+
29
+ scan = block_given? ? filtered_scan : filtered_scan_minimum
30
+ scan.cache_blocks = options[:cache_blocks]
31
+ if options[:caching] && (@mlimit.nil? || options[:caching] < @mlimit)
32
+ scan.caching = options[:caching]
33
+ end
34
+
22
35
  cnt = 0
23
- begin
24
- if block_given?
25
- scanner = htable.getScanner(filtered_scan)
26
- scanner.each do |result|
27
- cnt += 1 if yield(Row.send(:new, @table, result))
28
- end
29
- else
30
- scanner = htable.getScanner(filtered_scan_minimum)
31
- scanner.each { cnt += 1 }
36
+ if block_given?
37
+ iterate(scan) do |result|
38
+ cnt += 1 if yield(Row.send(:new, @table, result))
32
39
  end
33
- ensure
34
- scanner.close if scanner
40
+ else
41
+ iterate(scan) { |r| cnt += 1 }
35
42
  end
36
43
  cnt
37
44
  end
@@ -65,13 +72,8 @@ class Scoped
65
72
  def each
66
73
  return enum_for(:each) unless block_given?
67
74
 
68
- begin
69
- scanner = htable.getScanner(filtered_scan)
70
- scanner.each do |result|
71
- yield Row.send(:new, @table, result)
72
- end
73
- ensure
74
- scanner.close if scanner
75
+ iterate(filtered_scan) do |result|
76
+ yield Row.send(:new, @table, result)
75
77
  end
76
78
  end
77
79
 
@@ -158,11 +160,13 @@ class Scoped
158
160
  end
159
161
 
160
162
  # Returns an HBase::Scoped object with the specified row number limit
161
- # @param [Fixnum] rows Sets the maximum number of rows to return from scan
163
+ # @param [Fixnum|nil] rows Sets the maximum number of rows to return from scan
162
164
  # @return [HBase::Scoped] HBase::Scoped object with the specified row number limit
163
165
  def limit rows
164
- raise ArgumentError, "Invalid limit. Must be a non-negative integer." unless rows.is_a?(Fixnum) && rows >= 0
165
- spawn :@limit, rows
166
+ unless (rows.is_a?(Fixnum) && rows >= 0) || rows.nil?
167
+ raise ArgumentError, "Invalid limit. Must be a non-negative integer or nil."
168
+ end
169
+ spawn :@limit, rows, :@mlimit, nil
166
170
  end
167
171
 
168
172
  # Returns an HBase::Scoped object with the specified time range
@@ -259,6 +263,7 @@ private
259
263
  @batch = nil
260
264
  @caching = nil
261
265
  @limit = nil
266
+ @mlimit = nil
262
267
  @trange = nil
263
268
  @scan_cbs = []
264
269
  @get_cbs = []
@@ -523,11 +528,12 @@ private
523
528
 
524
529
  # Limit
525
530
  if @limit
526
- # setMaxResultSize not implemented in 0.92
531
+ # setMaxResultSize not yet implemented in 0.94
527
532
  if scan.respond_to?(:setMaxResultSize)
528
533
  scan.setMaxResultSize(@limit)
529
534
  else
530
- raise NotImplementedError, 'Scan.setMaxResultSize not implemented'
535
+ @mlimit = @limit
536
+ scan.caching = [@mlimit, @caching].compact.min
531
537
  end
532
538
  end
533
539
 
@@ -560,15 +566,23 @@ private
560
566
  # @private
561
567
  def filtered_scan_minimum
562
568
  filtered_scan.tap do |scan|
563
- scan.cache_blocks = false
564
569
  scan.setMaxVersions 1
565
570
 
566
- # A filter that will only return the first KV from each row
567
- # A filter that will only return the key component of each KV
568
- unless scan.getFilter
569
- filters = [FirstKeyOnlyFilter.new, KeyOnlyFilter.new]
570
- scan.setFilter FilterList.new(filters)
571
+ # FirstKeyOnlyFilter: A filter that will only return the first KV from each row-
572
+ # - Not compatible with SingleColumnValueFilter
573
+ # KeyOnlyFilter: A filter that will only return the key component of each KV
574
+ # - Compatible with SingleColumnValueFilter
575
+ ko = KeyOnlyFilter.new
576
+ if flist = scan.getFilter
577
+ if flist.is_a?(FilterList)
578
+ flist.addFilter ko
579
+ else
580
+ flist = FilterList.new([flist, ko])
581
+ end
582
+ else
583
+ flist = FilterList.new([ko, FirstKeyOnlyFilter.new])
571
584
  end
585
+ scan.setFilter flist
572
586
  end
573
587
  end
574
588
 
@@ -633,6 +647,22 @@ private
633
647
  def check_closed
634
648
  raise RuntimeError, "HBase connection is already closed" if @table.closed?
635
649
  end
650
+
651
+ def iterate scan
652
+ scanner = htable.getScanner(scan)
653
+ if @mlimit
654
+ scanner.each_with_index do |result, idx|
655
+ yield result
656
+ break if idx == @mlimit - 1
657
+ end
658
+ else
659
+ scanner.each do |result|
660
+ yield result
661
+ end
662
+ end
663
+ ensure
664
+ scanner.close if scanner
665
+ end
636
666
  end#Scoped
637
667
  end#HBase
638
668
 
@@ -1,5 +1,5 @@
1
1
  class HBase
2
2
  module JRuby
3
- VERSION = '0.3.4'
3
+ VERSION = '0.3.5'
4
4
  end
5
5
  end
data/test/test_scoped.rb CHANGED
@@ -81,14 +81,34 @@ class TestScoped < TestHBaseJRubyBase
81
81
  assert_equal 10, @table.range(111..120).count
82
82
 
83
83
  # Start key ~ Stop key (inclusive) + limit
84
- begin
85
- assert_equal 5, @table.range(111..120).limit(5).count
86
- rescue NotImplementedError
87
- end
84
+ assert_equal 5, @table.range(111..120).limit(5).count
85
+ assert_equal 5, @table.range(111..120).limit(5).to_a.count
86
+ assert_equal 10, @table.range(111..120).limit(5).limit(nil).to_a.count
87
+ scoped = @table.range(111..120).limit(5)
88
+ assert_equal 5, scoped.count
89
+
90
+ # Scan.setCaching should be called when setMaxResultSize is not implemented
91
+ done = false
92
+ scoped.with_java_scan { |scan|
93
+ assert scan.respond_to?(:setMaxResultSize) || scan.caching == 5
94
+ done = true
95
+ }.count
96
+ assert done
97
+
98
+ assert_equal 10, scoped.limit(nil).count
99
+
100
+ # Filters
101
+ assert_equal 1, @table.filter('cf1:a' => 135).count
102
+ assert_equal 1, @table.filter('cf2:b' => 135).count
103
+ assert_equal 10, @table.filter('cf1:a' => 131..140).count
104
+ assert_equal 10, @table.filter('cf2:b' => 131..140).count
105
+ assert_equal 10, @table.project(:prefix => 'a').filter('cf2:b' => 131..140).count
88
106
 
89
107
  # Start key ~ Stop key (inclusive) + filters
90
108
  assert_equal 10, @table.range(111..150).filter('cf1:a' => 131..140).count
109
+ assert_equal 10, @table.range(111..150).filter('cf2:b' => 131..140).count
91
110
  assert_equal 9, @table.range(111..150).filter('cf1:a' => 131...140).count
111
+ assert_equal 9, @table.range(111..150).filter('cf2:b' => 131...140).count
92
112
  assert_equal 2, @table.range(111..150).filter('cf1:a' => 131...140, 'cf2:b' => 132..133).count
93
113
 
94
114
  # Count with block
@@ -522,5 +542,22 @@ class TestScoped < TestHBaseJRubyBase
522
542
  get.setTimeRange(t2.to_i * 1000, t4.to_i * 1000)
523
543
  }.get(rks).compact.count
524
544
  end
545
+
546
+ def test_count_options
547
+ # TODO how to confirm?
548
+
549
+ (101..150).each do |i|
550
+ @table.put(i, 'cf1:a' => i, 'cf2:b' => i, 'cf3:c' => i * 3)
551
+ end
552
+
553
+ assert_equal 50, @table.count(:cache_blocks => false)
554
+ assert_equal 50, @table.count(:cache_blocks => true)
555
+
556
+ assert_equal 50, @table.count(:caching => nil)
557
+ assert_equal 50, @table.count(:caching => 5)
558
+ assert_equal 50, @table.count(:caching => 500)
559
+ assert_equal 15, @table.limit(15).count(:caching => 500)
560
+ assert_equal 15, @table.limit(15).count(:caching => 500, :cache_blocks => false)
561
+ end
525
562
  end
526
563
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hbase-jruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.4
4
+ version: 0.3.5
5
5
  prerelease:
6
6
  platform: java
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-07-05 00:00:00.000000000 Z
12
+ date: 2013-07-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: test-unit