hbase-jruby 0.3.4-java → 0.3.5-java
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.md +9 -0
- data/README.md +8 -2
- data/lib/hbase-jruby/scoped.rb +61 -31
- data/lib/hbase-jruby/version.rb +1 -1
- data/test/test_scoped.rb +41 -4
- metadata +2 -2
data/CHANGELOG.md
CHANGED
@@ -1,6 +1,15 @@
|
|
1
1
|
Changelog
|
2
2
|
=========
|
3
3
|
|
4
|
+
0.3.5
|
5
|
+
-----
|
6
|
+
- Improved `Scoped#count` method
|
7
|
+
- KeyOnlyFilter turned out to be compatible with SingleColumnValueFilter
|
8
|
+
- Now takes an optional Hash: `scoped.count(cache_blocks: false, caching: 100)`
|
9
|
+
- Changed not to disable server-side block caching by default
|
10
|
+
- Supports `Scoped#limit` even when `Scan.setMaxResultSize` is not implemented
|
11
|
+
- `Scoped#limit(nil)` will remove the previously set value
|
12
|
+
|
4
13
|
0.3.4
|
5
14
|
-----
|
6
15
|
- Cleanup all thread-local (fiber-local) HTable references when connection is closed
|
data/README.md
CHANGED
@@ -710,11 +710,17 @@ end
|
|
710
710
|
# Scoped COUNT
|
711
711
|
# When counting the number of rows, use `HTable::Scoped#count`
|
712
712
|
# instead of just iterating through the scope, as it internally
|
713
|
-
# minimizes amount of data
|
713
|
+
# minimizes the amount of data transfer using KeyOnlyFilter
|
714
|
+
# (and FirstKeyOnlyFilter when no filter is set)
|
714
715
|
scoped.count
|
715
716
|
|
716
717
|
# This should be even faster as it dramatically reduces the number of RPC calls
|
717
|
-
scoped.caching(
|
718
|
+
scoped.caching(1000).count
|
719
|
+
|
720
|
+
# count method takes an options Hash:
|
721
|
+
# - :caching (default: nil)
|
722
|
+
# - :cache_blocks (default: true)
|
723
|
+
scoped.count(caching: 5000, cache_blocks: false)
|
718
724
|
```
|
719
725
|
|
720
726
|
## Basic aggregation using coprocessor
|
data/lib/hbase-jruby/scoped.rb
CHANGED
@@ -16,22 +16,29 @@ class Scoped
|
|
16
16
|
Scoped.send(:new, @table)
|
17
17
|
end
|
18
18
|
|
19
|
-
#
|
19
|
+
# Count the number of rows in the scope
|
20
20
|
# @return [Fixnum, Bignum] The number of rows in the scope
|
21
|
-
|
21
|
+
# @param [Hash] options Counting options
|
22
|
+
# @option options [Fixnum|nil] :caching The number of rows for caching that will be passed to scanners.
|
23
|
+
# Use higher values for faster scan.
|
24
|
+
# @option options [Boolean] :cache_blocks Whether blocks should be cached for this scan
|
25
|
+
def count options = {}
|
26
|
+
options = { :caching => nil,
|
27
|
+
:cache_blocks => true }.merge(options)
|
28
|
+
|
29
|
+
scan = block_given? ? filtered_scan : filtered_scan_minimum
|
30
|
+
scan.cache_blocks = options[:cache_blocks]
|
31
|
+
if options[:caching] && (@mlimit.nil? || options[:caching] < @mlimit)
|
32
|
+
scan.caching = options[:caching]
|
33
|
+
end
|
34
|
+
|
22
35
|
cnt = 0
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
scanner.each do |result|
|
27
|
-
cnt += 1 if yield(Row.send(:new, @table, result))
|
28
|
-
end
|
29
|
-
else
|
30
|
-
scanner = htable.getScanner(filtered_scan_minimum)
|
31
|
-
scanner.each { cnt += 1 }
|
36
|
+
if block_given?
|
37
|
+
iterate(scan) do |result|
|
38
|
+
cnt += 1 if yield(Row.send(:new, @table, result))
|
32
39
|
end
|
33
|
-
|
34
|
-
|
40
|
+
else
|
41
|
+
iterate(scan) { |r| cnt += 1 }
|
35
42
|
end
|
36
43
|
cnt
|
37
44
|
end
|
@@ -65,13 +72,8 @@ class Scoped
|
|
65
72
|
def each
|
66
73
|
return enum_for(:each) unless block_given?
|
67
74
|
|
68
|
-
|
69
|
-
|
70
|
-
scanner.each do |result|
|
71
|
-
yield Row.send(:new, @table, result)
|
72
|
-
end
|
73
|
-
ensure
|
74
|
-
scanner.close if scanner
|
75
|
+
iterate(filtered_scan) do |result|
|
76
|
+
yield Row.send(:new, @table, result)
|
75
77
|
end
|
76
78
|
end
|
77
79
|
|
@@ -158,11 +160,13 @@ class Scoped
|
|
158
160
|
end
|
159
161
|
|
160
162
|
# Returns an HBase::Scoped object with the specified row number limit
|
161
|
-
# @param [Fixnum] rows Sets the maximum number of rows to return from scan
|
163
|
+
# @param [Fixnum|nil] rows Sets the maximum number of rows to return from scan
|
162
164
|
# @return [HBase::Scoped] HBase::Scoped object with the specified row number limit
|
163
165
|
def limit rows
|
164
|
-
|
165
|
-
|
166
|
+
unless (rows.is_a?(Fixnum) && rows >= 0) || rows.nil?
|
167
|
+
raise ArgumentError, "Invalid limit. Must be a non-negative integer or nil."
|
168
|
+
end
|
169
|
+
spawn :@limit, rows, :@mlimit, nil
|
166
170
|
end
|
167
171
|
|
168
172
|
# Returns an HBase::Scoped object with the specified time range
|
@@ -259,6 +263,7 @@ private
|
|
259
263
|
@batch = nil
|
260
264
|
@caching = nil
|
261
265
|
@limit = nil
|
266
|
+
@mlimit = nil
|
262
267
|
@trange = nil
|
263
268
|
@scan_cbs = []
|
264
269
|
@get_cbs = []
|
@@ -523,11 +528,12 @@ private
|
|
523
528
|
|
524
529
|
# Limit
|
525
530
|
if @limit
|
526
|
-
# setMaxResultSize not implemented in 0.
|
531
|
+
# setMaxResultSize not yet implemented in 0.94
|
527
532
|
if scan.respond_to?(:setMaxResultSize)
|
528
533
|
scan.setMaxResultSize(@limit)
|
529
534
|
else
|
530
|
-
|
535
|
+
@mlimit = @limit
|
536
|
+
scan.caching = [@mlimit, @caching].compact.min
|
531
537
|
end
|
532
538
|
end
|
533
539
|
|
@@ -560,15 +566,23 @@ private
|
|
560
566
|
# @private
|
561
567
|
def filtered_scan_minimum
|
562
568
|
filtered_scan.tap do |scan|
|
563
|
-
scan.cache_blocks = false
|
564
569
|
scan.setMaxVersions 1
|
565
570
|
|
566
|
-
# A filter that will only return the first KV from each row
|
567
|
-
#
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
+
# FirstKeyOnlyFilter: A filter that will only return the first KV from each row-
|
572
|
+
# - Not compatible with SingleColumnValueFilter
|
573
|
+
# KeyOnlyFilter: A filter that will only return the key component of each KV
|
574
|
+
# - Compatible with SingleColumnValueFilter
|
575
|
+
ko = KeyOnlyFilter.new
|
576
|
+
if flist = scan.getFilter
|
577
|
+
if flist.is_a?(FilterList)
|
578
|
+
flist.addFilter ko
|
579
|
+
else
|
580
|
+
flist = FilterList.new([flist, ko])
|
581
|
+
end
|
582
|
+
else
|
583
|
+
flist = FilterList.new([ko, FirstKeyOnlyFilter.new])
|
571
584
|
end
|
585
|
+
scan.setFilter flist
|
572
586
|
end
|
573
587
|
end
|
574
588
|
|
@@ -633,6 +647,22 @@ private
|
|
633
647
|
def check_closed
|
634
648
|
raise RuntimeError, "HBase connection is already closed" if @table.closed?
|
635
649
|
end
|
650
|
+
|
651
|
+
def iterate scan
|
652
|
+
scanner = htable.getScanner(scan)
|
653
|
+
if @mlimit
|
654
|
+
scanner.each_with_index do |result, idx|
|
655
|
+
yield result
|
656
|
+
break if idx == @mlimit - 1
|
657
|
+
end
|
658
|
+
else
|
659
|
+
scanner.each do |result|
|
660
|
+
yield result
|
661
|
+
end
|
662
|
+
end
|
663
|
+
ensure
|
664
|
+
scanner.close if scanner
|
665
|
+
end
|
636
666
|
end#Scoped
|
637
667
|
end#HBase
|
638
668
|
|
data/lib/hbase-jruby/version.rb
CHANGED
data/test/test_scoped.rb
CHANGED
@@ -81,14 +81,34 @@ class TestScoped < TestHBaseJRubyBase
|
|
81
81
|
assert_equal 10, @table.range(111..120).count
|
82
82
|
|
83
83
|
# Start key ~ Stop key (inclusive) + limit
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
84
|
+
assert_equal 5, @table.range(111..120).limit(5).count
|
85
|
+
assert_equal 5, @table.range(111..120).limit(5).to_a.count
|
86
|
+
assert_equal 10, @table.range(111..120).limit(5).limit(nil).to_a.count
|
87
|
+
scoped = @table.range(111..120).limit(5)
|
88
|
+
assert_equal 5, scoped.count
|
89
|
+
|
90
|
+
# Scan.setCaching should be called when setMaxResultSize is not implemented
|
91
|
+
done = false
|
92
|
+
scoped.with_java_scan { |scan|
|
93
|
+
assert scan.respond_to?(:setMaxResultSize) || scan.caching == 5
|
94
|
+
done = true
|
95
|
+
}.count
|
96
|
+
assert done
|
97
|
+
|
98
|
+
assert_equal 10, scoped.limit(nil).count
|
99
|
+
|
100
|
+
# Filters
|
101
|
+
assert_equal 1, @table.filter('cf1:a' => 135).count
|
102
|
+
assert_equal 1, @table.filter('cf2:b' => 135).count
|
103
|
+
assert_equal 10, @table.filter('cf1:a' => 131..140).count
|
104
|
+
assert_equal 10, @table.filter('cf2:b' => 131..140).count
|
105
|
+
assert_equal 10, @table.project(:prefix => 'a').filter('cf2:b' => 131..140).count
|
88
106
|
|
89
107
|
# Start key ~ Stop key (inclusive) + filters
|
90
108
|
assert_equal 10, @table.range(111..150).filter('cf1:a' => 131..140).count
|
109
|
+
assert_equal 10, @table.range(111..150).filter('cf2:b' => 131..140).count
|
91
110
|
assert_equal 9, @table.range(111..150).filter('cf1:a' => 131...140).count
|
111
|
+
assert_equal 9, @table.range(111..150).filter('cf2:b' => 131...140).count
|
92
112
|
assert_equal 2, @table.range(111..150).filter('cf1:a' => 131...140, 'cf2:b' => 132..133).count
|
93
113
|
|
94
114
|
# Count with block
|
@@ -522,5 +542,22 @@ class TestScoped < TestHBaseJRubyBase
|
|
522
542
|
get.setTimeRange(t2.to_i * 1000, t4.to_i * 1000)
|
523
543
|
}.get(rks).compact.count
|
524
544
|
end
|
545
|
+
|
546
|
+
def test_count_options
|
547
|
+
# TODO how to confirm?
|
548
|
+
|
549
|
+
(101..150).each do |i|
|
550
|
+
@table.put(i, 'cf1:a' => i, 'cf2:b' => i, 'cf3:c' => i * 3)
|
551
|
+
end
|
552
|
+
|
553
|
+
assert_equal 50, @table.count(:cache_blocks => false)
|
554
|
+
assert_equal 50, @table.count(:cache_blocks => true)
|
555
|
+
|
556
|
+
assert_equal 50, @table.count(:caching => nil)
|
557
|
+
assert_equal 50, @table.count(:caching => 5)
|
558
|
+
assert_equal 50, @table.count(:caching => 500)
|
559
|
+
assert_equal 15, @table.limit(15).count(:caching => 500)
|
560
|
+
assert_equal 15, @table.limit(15).count(:caching => 500, :cache_blocks => false)
|
561
|
+
end
|
525
562
|
end
|
526
563
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hbase-jruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.5
|
5
5
|
prerelease:
|
6
6
|
platform: java
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-07-
|
12
|
+
date: 2013-07-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: test-unit
|