hbase-jruby 0.3.4-java → 0.3.5-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +9 -0
- data/README.md +8 -2
- data/lib/hbase-jruby/scoped.rb +61 -31
- data/lib/hbase-jruby/version.rb +1 -1
- data/test/test_scoped.rb +41 -4
- metadata +2 -2
data/CHANGELOG.md
CHANGED
@@ -1,6 +1,15 @@
|
|
1
1
|
Changelog
|
2
2
|
=========
|
3
3
|
|
4
|
+
0.3.5
|
5
|
+
-----
|
6
|
+
- Improved `Scoped#count` method
|
7
|
+
- KeyOnlyFilter turned out to be compatible with SingleColumnValueFilter
|
8
|
+
- Now takes an optional Hash: `scoped.count(cache_blocks: false, caching: 100)`
|
9
|
+
- Changed not to disable server-side block caching by default
|
10
|
+
- Supports `Scoped#limit` even when `Scan.setMaxResultSize` is not implemented
|
11
|
+
- `Scoped#limit(nil)` will remove the previously set value
|
12
|
+
|
4
13
|
0.3.4
|
5
14
|
-----
|
6
15
|
- Cleanup all thread-local (fiber-local) HTable references when connection is closed
|
data/README.md
CHANGED
@@ -710,11 +710,17 @@ end
|
|
710
710
|
# Scoped COUNT
|
711
711
|
# When counting the number of rows, use `HTable::Scoped#count`
|
712
712
|
# instead of just iterating through the scope, as it internally
|
713
|
-
# minimizes amount of data
|
713
|
+
# minimizes the amount of data transfer using KeyOnlyFilter
|
714
|
+
# (and FirstKeyOnlyFilter when no filter is set)
|
714
715
|
scoped.count
|
715
716
|
|
716
717
|
# This should be even faster as it dramatically reduces the number of RPC calls
|
717
|
-
scoped.caching(
|
718
|
+
scoped.caching(1000).count
|
719
|
+
|
720
|
+
# count method takes an options Hash:
|
721
|
+
# - :caching (default: nil)
|
722
|
+
# - :cache_blocks (default: true)
|
723
|
+
scoped.count(caching: 5000, cache_blocks: false)
|
718
724
|
```
|
719
725
|
|
720
726
|
## Basic aggregation using coprocessor
|
data/lib/hbase-jruby/scoped.rb
CHANGED
@@ -16,22 +16,29 @@ class Scoped
|
|
16
16
|
Scoped.send(:new, @table)
|
17
17
|
end
|
18
18
|
|
19
|
-
#
|
19
|
+
# Count the number of rows in the scope
|
20
20
|
# @return [Fixnum, Bignum] The number of rows in the scope
|
21
|
-
|
21
|
+
# @param [Hash] options Counting options
|
22
|
+
# @option options [Fixnum|nil] :caching The number of rows for caching that will be passed to scanners.
|
23
|
+
# Use higher values for faster scan.
|
24
|
+
# @option options [Boolean] :cache_blocks Whether blocks should be cached for this scan
|
25
|
+
def count options = {}
|
26
|
+
options = { :caching => nil,
|
27
|
+
:cache_blocks => true }.merge(options)
|
28
|
+
|
29
|
+
scan = block_given? ? filtered_scan : filtered_scan_minimum
|
30
|
+
scan.cache_blocks = options[:cache_blocks]
|
31
|
+
if options[:caching] && (@mlimit.nil? || options[:caching] < @mlimit)
|
32
|
+
scan.caching = options[:caching]
|
33
|
+
end
|
34
|
+
|
22
35
|
cnt = 0
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
scanner.each do |result|
|
27
|
-
cnt += 1 if yield(Row.send(:new, @table, result))
|
28
|
-
end
|
29
|
-
else
|
30
|
-
scanner = htable.getScanner(filtered_scan_minimum)
|
31
|
-
scanner.each { cnt += 1 }
|
36
|
+
if block_given?
|
37
|
+
iterate(scan) do |result|
|
38
|
+
cnt += 1 if yield(Row.send(:new, @table, result))
|
32
39
|
end
|
33
|
-
|
34
|
-
|
40
|
+
else
|
41
|
+
iterate(scan) { |r| cnt += 1 }
|
35
42
|
end
|
36
43
|
cnt
|
37
44
|
end
|
@@ -65,13 +72,8 @@ class Scoped
|
|
65
72
|
def each
|
66
73
|
return enum_for(:each) unless block_given?
|
67
74
|
|
68
|
-
|
69
|
-
|
70
|
-
scanner.each do |result|
|
71
|
-
yield Row.send(:new, @table, result)
|
72
|
-
end
|
73
|
-
ensure
|
74
|
-
scanner.close if scanner
|
75
|
+
iterate(filtered_scan) do |result|
|
76
|
+
yield Row.send(:new, @table, result)
|
75
77
|
end
|
76
78
|
end
|
77
79
|
|
@@ -158,11 +160,13 @@ class Scoped
|
|
158
160
|
end
|
159
161
|
|
160
162
|
# Returns an HBase::Scoped object with the specified row number limit
|
161
|
-
# @param [Fixnum] rows Sets the maximum number of rows to return from scan
|
163
|
+
# @param [Fixnum|nil] rows Sets the maximum number of rows to return from scan
|
162
164
|
# @return [HBase::Scoped] HBase::Scoped object with the specified row number limit
|
163
165
|
def limit rows
|
164
|
-
|
165
|
-
|
166
|
+
unless (rows.is_a?(Fixnum) && rows >= 0) || rows.nil?
|
167
|
+
raise ArgumentError, "Invalid limit. Must be a non-negative integer or nil."
|
168
|
+
end
|
169
|
+
spawn :@limit, rows, :@mlimit, nil
|
166
170
|
end
|
167
171
|
|
168
172
|
# Returns an HBase::Scoped object with the specified time range
|
@@ -259,6 +263,7 @@ private
|
|
259
263
|
@batch = nil
|
260
264
|
@caching = nil
|
261
265
|
@limit = nil
|
266
|
+
@mlimit = nil
|
262
267
|
@trange = nil
|
263
268
|
@scan_cbs = []
|
264
269
|
@get_cbs = []
|
@@ -523,11 +528,12 @@ private
|
|
523
528
|
|
524
529
|
# Limit
|
525
530
|
if @limit
|
526
|
-
# setMaxResultSize not implemented in 0.
|
531
|
+
# setMaxResultSize not yet implemented in 0.94
|
527
532
|
if scan.respond_to?(:setMaxResultSize)
|
528
533
|
scan.setMaxResultSize(@limit)
|
529
534
|
else
|
530
|
-
|
535
|
+
@mlimit = @limit
|
536
|
+
scan.caching = [@mlimit, @caching].compact.min
|
531
537
|
end
|
532
538
|
end
|
533
539
|
|
@@ -560,15 +566,23 @@ private
|
|
560
566
|
# @private
|
561
567
|
def filtered_scan_minimum
|
562
568
|
filtered_scan.tap do |scan|
|
563
|
-
scan.cache_blocks = false
|
564
569
|
scan.setMaxVersions 1
|
565
570
|
|
566
|
-
# A filter that will only return the first KV from each row
|
567
|
-
#
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
+
# FirstKeyOnlyFilter: A filter that will only return the first KV from each row-
|
572
|
+
# - Not compatible with SingleColumnValueFilter
|
573
|
+
# KeyOnlyFilter: A filter that will only return the key component of each KV
|
574
|
+
# - Compatible with SingleColumnValueFilter
|
575
|
+
ko = KeyOnlyFilter.new
|
576
|
+
if flist = scan.getFilter
|
577
|
+
if flist.is_a?(FilterList)
|
578
|
+
flist.addFilter ko
|
579
|
+
else
|
580
|
+
flist = FilterList.new([flist, ko])
|
581
|
+
end
|
582
|
+
else
|
583
|
+
flist = FilterList.new([ko, FirstKeyOnlyFilter.new])
|
571
584
|
end
|
585
|
+
scan.setFilter flist
|
572
586
|
end
|
573
587
|
end
|
574
588
|
|
@@ -633,6 +647,22 @@ private
|
|
633
647
|
def check_closed
|
634
648
|
raise RuntimeError, "HBase connection is already closed" if @table.closed?
|
635
649
|
end
|
650
|
+
|
651
|
+
def iterate scan
|
652
|
+
scanner = htable.getScanner(scan)
|
653
|
+
if @mlimit
|
654
|
+
scanner.each_with_index do |result, idx|
|
655
|
+
yield result
|
656
|
+
break if idx == @mlimit - 1
|
657
|
+
end
|
658
|
+
else
|
659
|
+
scanner.each do |result|
|
660
|
+
yield result
|
661
|
+
end
|
662
|
+
end
|
663
|
+
ensure
|
664
|
+
scanner.close if scanner
|
665
|
+
end
|
636
666
|
end#Scoped
|
637
667
|
end#HBase
|
638
668
|
|
data/lib/hbase-jruby/version.rb
CHANGED
data/test/test_scoped.rb
CHANGED
@@ -81,14 +81,34 @@ class TestScoped < TestHBaseJRubyBase
|
|
81
81
|
assert_equal 10, @table.range(111..120).count
|
82
82
|
|
83
83
|
# Start key ~ Stop key (inclusive) + limit
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
84
|
+
assert_equal 5, @table.range(111..120).limit(5).count
|
85
|
+
assert_equal 5, @table.range(111..120).limit(5).to_a.count
|
86
|
+
assert_equal 10, @table.range(111..120).limit(5).limit(nil).to_a.count
|
87
|
+
scoped = @table.range(111..120).limit(5)
|
88
|
+
assert_equal 5, scoped.count
|
89
|
+
|
90
|
+
# Scan.setCaching should be called when setMaxResultSize is not implemented
|
91
|
+
done = false
|
92
|
+
scoped.with_java_scan { |scan|
|
93
|
+
assert scan.respond_to?(:setMaxResultSize) || scan.caching == 5
|
94
|
+
done = true
|
95
|
+
}.count
|
96
|
+
assert done
|
97
|
+
|
98
|
+
assert_equal 10, scoped.limit(nil).count
|
99
|
+
|
100
|
+
# Filters
|
101
|
+
assert_equal 1, @table.filter('cf1:a' => 135).count
|
102
|
+
assert_equal 1, @table.filter('cf2:b' => 135).count
|
103
|
+
assert_equal 10, @table.filter('cf1:a' => 131..140).count
|
104
|
+
assert_equal 10, @table.filter('cf2:b' => 131..140).count
|
105
|
+
assert_equal 10, @table.project(:prefix => 'a').filter('cf2:b' => 131..140).count
|
88
106
|
|
89
107
|
# Start key ~ Stop key (inclusive) + filters
|
90
108
|
assert_equal 10, @table.range(111..150).filter('cf1:a' => 131..140).count
|
109
|
+
assert_equal 10, @table.range(111..150).filter('cf2:b' => 131..140).count
|
91
110
|
assert_equal 9, @table.range(111..150).filter('cf1:a' => 131...140).count
|
111
|
+
assert_equal 9, @table.range(111..150).filter('cf2:b' => 131...140).count
|
92
112
|
assert_equal 2, @table.range(111..150).filter('cf1:a' => 131...140, 'cf2:b' => 132..133).count
|
93
113
|
|
94
114
|
# Count with block
|
@@ -522,5 +542,22 @@ class TestScoped < TestHBaseJRubyBase
|
|
522
542
|
get.setTimeRange(t2.to_i * 1000, t4.to_i * 1000)
|
523
543
|
}.get(rks).compact.count
|
524
544
|
end
|
545
|
+
|
546
|
+
def test_count_options
|
547
|
+
# TODO how to confirm?
|
548
|
+
|
549
|
+
(101..150).each do |i|
|
550
|
+
@table.put(i, 'cf1:a' => i, 'cf2:b' => i, 'cf3:c' => i * 3)
|
551
|
+
end
|
552
|
+
|
553
|
+
assert_equal 50, @table.count(:cache_blocks => false)
|
554
|
+
assert_equal 50, @table.count(:cache_blocks => true)
|
555
|
+
|
556
|
+
assert_equal 50, @table.count(:caching => nil)
|
557
|
+
assert_equal 50, @table.count(:caching => 5)
|
558
|
+
assert_equal 50, @table.count(:caching => 500)
|
559
|
+
assert_equal 15, @table.limit(15).count(:caching => 500)
|
560
|
+
assert_equal 15, @table.limit(15).count(:caching => 500, :cache_blocks => false)
|
561
|
+
end
|
525
562
|
end
|
526
563
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hbase-jruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.5
|
5
5
|
prerelease:
|
6
6
|
platform: java
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-07-
|
12
|
+
date: 2013-07-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: test-unit
|