hyperll 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/.rspec +2 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +43 -0
- data/Rakefile +5 -0
- data/hyperll.gemspec +24 -0
- data/lib/hyperll.rb +2 -0
- data/lib/hyperll/hyper_log_log.rb +86 -0
- data/lib/hyperll/murmur_hash.rb +94 -0
- data/lib/hyperll/register_set.rb +86 -0
- data/lib/hyperll/version.rb +3 -0
- data/spec/hyperll/hyper_log_log_spec.rb +70 -0
- data/spec/hyperll/murmur_hash_spec.rb +25 -0
- data/spec/hyperll/register_set_spec.rb +71 -0
- data/spec/spec_helper.rb +17 -0
- metadata +106 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 08b6c537f72013125c31cab21a5fc92592d5e3fc
|
4
|
+
data.tar.gz: 4ceb4deff0578637df2c0330347fc530b676aa81
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 61bb663e66ccf133d5db20518961867efb350f2a372227de0f49c1c28ac32484c7fd609024274d6fbea9995a39a8ce9e47e8977ae5b91c2c05242dbc1365849d
|
7
|
+
data.tar.gz: 3ce7d45798faf5c98a779767760fd325edbe015fe7e8537a063aa25b00d85e123006f3d1be6158ebd023904e0365b18a4a4b195cae32b2cbc22d3a5026700f69
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Andy Lindeman
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
# Hyperll
|
2
|
+
|
3
|
+
HyperLogLog implementation in pure Ruby
|
4
|
+
|
5
|
+
## Usage
|
6
|
+
|
7
|
+
HyperLogLog stores an estimation of the cardinality of a set. It can be merged
|
8
|
+
with other HyperLogLog instances.
|
9
|
+
|
10
|
+
```ruby
|
11
|
+
hll = Hyperll::HyperLogLog.new(10)
|
12
|
+
hll.offer(1)
|
13
|
+
hll.offer(2)
|
14
|
+
hll.offer(3)
|
15
|
+
hll.cardinality # => 3
|
16
|
+
|
17
|
+
hll2 = Hyperll::HyperLogLog.new(10)
|
18
|
+
hll2.offer(3)
|
19
|
+
hll2.offer(4)
|
20
|
+
hll2.offer(5)
|
21
|
+
hll.cardinality # => 3
|
22
|
+
|
23
|
+
merged = Hyperll::HyperLogLog.new(10)
|
24
|
+
merged.merge(hll, hll2)
|
25
|
+
merged.cardinality # => 5
|
26
|
+
```
|
27
|
+
|
28
|
+
### Serialization
|
29
|
+
|
30
|
+
HyperLogLog can be serialized to a binary string. It is compatible with the
|
31
|
+
binary format from the Java [stream-lib](https://github.com/addthis/stream-lib)
|
32
|
+
library.
|
33
|
+
|
34
|
+
```ruby
|
35
|
+
hll = Hyperll::HyperLogLog.new(4)
|
36
|
+
hll.offer(1)
|
37
|
+
hll.offer(2)
|
38
|
+
hll.offer(3)
|
39
|
+
hll.serialize # => "\x00\x00\x00\x04\x00\x00\x00\f\x02\x00\x00\x00\x00\x00\x88\x00\x00\x00\x00\x00"
|
40
|
+
|
41
|
+
hll2 = Hyperll::HyperLogLog.unserialize("\x00\x00\x00\x04\x00\x00\x00\f\x02\x00\x00\x00\x00\x00\x88\x00\x00\x00\x00\x00")
|
42
|
+
hll2.cardinality # => 3
|
43
|
+
```
|
data/Rakefile
ADDED
data/hyperll.gemspec
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'hyperll/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "hyperll"
|
8
|
+
spec.version = Hyperll::VERSION
|
9
|
+
spec.authors = ["Andy Lindeman"]
|
10
|
+
spec.email = ["andy@andylindeman.com"]
|
11
|
+
spec.description = %q{HyperLogLog implementation in pure Ruby}
|
12
|
+
spec.summary = %q{HyperLogLog implementation in pure Ruby}
|
13
|
+
spec.homepage = "https://github.com/alindeman/hyperll"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
22
|
+
spec.add_development_dependency "rake"
|
23
|
+
spec.add_development_dependency "rspec", "~> 2.14"
|
24
|
+
end
|
data/lib/hyperll.rb
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
require_relative 'register_set'
|
2
|
+
require_relative 'murmur_hash'
|
3
|
+
|
4
|
+
module Hyperll
|
5
|
+
class HyperLogLog
|
6
|
+
INT_SIZE = 32
|
7
|
+
INT_HASH = 0xFFFFFFFF
|
8
|
+
|
9
|
+
attr_reader :log2m
|
10
|
+
|
11
|
+
def self.unserialize(serialized)
|
12
|
+
log2m, rs_size, *rs_values = serialized.unpack("N*")
|
13
|
+
new(log2m, RegisterSet.new(2 ** log2m, rs_values))
|
14
|
+
end
|
15
|
+
|
16
|
+
# Constructs a new HyperLogLog instance
|
17
|
+
#
|
18
|
+
# log2m - accuracy of the counter; larger values are more accurate
|
19
|
+
def initialize(log2m, register_set = nil)
|
20
|
+
@log2m = log2m
|
21
|
+
@count = 2 ** log2m
|
22
|
+
@register_set = register_set || RegisterSet.new(@count)
|
23
|
+
|
24
|
+
case log2m
|
25
|
+
when 4
|
26
|
+
@alphaMM = 0.673 * @count * @count
|
27
|
+
when 5
|
28
|
+
@alphaMM = 0.697 * @count * @count
|
29
|
+
when 6
|
30
|
+
@alphaMM = 0.709 * @count * @count
|
31
|
+
else
|
32
|
+
@alphaMM = (0.7213 / (1 + 1.079 / @count)) * @count * @count
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def offer(obj)
|
37
|
+
offer_hashed(MurmurHash.hash(obj))
|
38
|
+
end
|
39
|
+
|
40
|
+
def offer_hashed(value)
|
41
|
+
j = value >> (INT_SIZE - @log2m)
|
42
|
+
r = number_of_leading_zeros(((value << @log2m) & INT_HASH) | (1 << (@log2m - 1)) + 1) + 1
|
43
|
+
@register_set.update_if_greater(j, r)
|
44
|
+
end
|
45
|
+
|
46
|
+
def cardinality
|
47
|
+
register_sum = 0.0
|
48
|
+
zeros = 0.0
|
49
|
+
@register_set.each do |value|
|
50
|
+
register_sum += 1.0 / (1 << value)
|
51
|
+
zeros += 1 if value == 0
|
52
|
+
end
|
53
|
+
|
54
|
+
estimate = @alphaMM * (1 / register_sum)
|
55
|
+
if estimate <= (5.0 / 2.0) * @count
|
56
|
+
# small range estimate
|
57
|
+
(@count * Math.log(@count / zeros)).round
|
58
|
+
else
|
59
|
+
estimate.round
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def merge(*others)
|
64
|
+
raise "Cannot merge hyperloglogs of different sizes" unless others.all? { |o| o.log2m == log2m }
|
65
|
+
|
66
|
+
others.each do |other|
|
67
|
+
@register_set.merge(other.register_set)
|
68
|
+
end
|
69
|
+
|
70
|
+
self
|
71
|
+
end
|
72
|
+
|
73
|
+
def serialize
|
74
|
+
[@log2m, @register_set.size * 4].pack("N*") + @register_set.serialize
|
75
|
+
end
|
76
|
+
|
77
|
+
protected
|
78
|
+
def number_of_leading_zeros(int)
|
79
|
+
-(Math.log2(int).to_i - 31)
|
80
|
+
end
|
81
|
+
|
82
|
+
def register_set
|
83
|
+
@register_set
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
module Hyperll
|
2
|
+
# Adapted from <https://github.com/addthis/stream-lib/blob/master/src/main/java/com/clearspring/analytics/hash/MurmurHash.java>
|
3
|
+
class MurmurHash
|
4
|
+
INT_MASK = 0xFFFFFFFF
|
5
|
+
|
6
|
+
def self.hash(obj)
|
7
|
+
if Integer === obj
|
8
|
+
hash_int(obj)
|
9
|
+
else
|
10
|
+
hash_string(obj.to_s)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.hash_int(data)
|
15
|
+
m = 0x5bd1e995
|
16
|
+
r = 24
|
17
|
+
|
18
|
+
h = 0
|
19
|
+
|
20
|
+
k = (data * m) & INT_MASK
|
21
|
+
k ^= k >> r
|
22
|
+
h ^= (k * m) & INT_MASK
|
23
|
+
|
24
|
+
k = ((data >> 32) * m) & INT_MASK
|
25
|
+
k ^= k >> r
|
26
|
+
h *= m
|
27
|
+
h ^= k * m
|
28
|
+
h &= INT_MASK
|
29
|
+
|
30
|
+
h ^= h >> 13
|
31
|
+
h *= m
|
32
|
+
h &= INT_MASK
|
33
|
+
h ^= h >> 15
|
34
|
+
|
35
|
+
h & INT_MASK
|
36
|
+
end
|
37
|
+
private_class_method :hash_int
|
38
|
+
|
39
|
+
def self.hash_string(str, seed = -1)
|
40
|
+
data = str.bytes
|
41
|
+
length = data.length
|
42
|
+
|
43
|
+
m = 0x5bd1e995
|
44
|
+
r = 24
|
45
|
+
|
46
|
+
h = seed ^ length
|
47
|
+
len_4 = length >> 2
|
48
|
+
|
49
|
+
0.upto(len_4 - 1) do |i|
|
50
|
+
i_4 = i << 2
|
51
|
+
k = data[i_4 + 3]
|
52
|
+
k = k << 8
|
53
|
+
k = k | (data[i_4 + 2] & 0xff)
|
54
|
+
k = k << 8
|
55
|
+
k = k | (data[i_4 + 1] & 0xff)
|
56
|
+
k = k << 8
|
57
|
+
k = k | (data[i_4 + 0] & 0xff)
|
58
|
+
k *= m
|
59
|
+
k &= INT_MASK
|
60
|
+
k ^= k >> r
|
61
|
+
k *= m
|
62
|
+
h *= m
|
63
|
+
h ^= k
|
64
|
+
h &= INT_MASK
|
65
|
+
end
|
66
|
+
|
67
|
+
len_m = len_4 << 2
|
68
|
+
left = length - len_m
|
69
|
+
|
70
|
+
if left != 0
|
71
|
+
if left >= 3
|
72
|
+
h ^= data[length - 3] << 16
|
73
|
+
end
|
74
|
+
if left >= 2
|
75
|
+
h ^= data[length - 2] << 8
|
76
|
+
end
|
77
|
+
if left >= 1
|
78
|
+
h ^= data[length - 1]
|
79
|
+
end
|
80
|
+
|
81
|
+
h *= m
|
82
|
+
h &= INT_MASK
|
83
|
+
end
|
84
|
+
|
85
|
+
h ^= h >> 13
|
86
|
+
h *= m
|
87
|
+
h &= INT_MASK
|
88
|
+
h ^= h >> 15
|
89
|
+
|
90
|
+
h
|
91
|
+
end
|
92
|
+
private_class_method :hash_string
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
module Hyperll
|
2
|
+
class RegisterSet
|
3
|
+
include Enumerable
|
4
|
+
|
5
|
+
LOG2_BITS_PER_WORD = 6
|
6
|
+
REGISTER_SIZE = 5
|
7
|
+
INTEGER_SIZE = 32
|
8
|
+
|
9
|
+
attr_reader :count, :size
|
10
|
+
|
11
|
+
def initialize(count, values = nil)
|
12
|
+
@count = count
|
13
|
+
|
14
|
+
@bits = count / LOG2_BITS_PER_WORD
|
15
|
+
if @bits.zero?
|
16
|
+
@size = 1
|
17
|
+
elsif (@bits % INTEGER_SIZE).zero?
|
18
|
+
@size = @bits
|
19
|
+
else
|
20
|
+
@size = @bits + 1
|
21
|
+
end
|
22
|
+
|
23
|
+
@values = values || Array.new(@size, 0)
|
24
|
+
end
|
25
|
+
|
26
|
+
def []=(position, value)
|
27
|
+
bucket = position / LOG2_BITS_PER_WORD
|
28
|
+
shift = REGISTER_SIZE * (position - (bucket * LOG2_BITS_PER_WORD))
|
29
|
+
|
30
|
+
@values[bucket] = (@values[bucket] & ~(0x1f << shift)) | (value << shift)
|
31
|
+
end
|
32
|
+
|
33
|
+
def [](position)
|
34
|
+
bucket = position / LOG2_BITS_PER_WORD
|
35
|
+
shift = REGISTER_SIZE * (position - (bucket * LOG2_BITS_PER_WORD))
|
36
|
+
|
37
|
+
return (@values[bucket] & (0x1f << shift)) >> shift
|
38
|
+
end
|
39
|
+
|
40
|
+
def each
|
41
|
+
return enum_for(:each) unless block_given?
|
42
|
+
@count.times do |i|
|
43
|
+
yield self[i]
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def update_if_greater(position, value)
|
48
|
+
bucket = position / LOG2_BITS_PER_WORD
|
49
|
+
shift = REGISTER_SIZE * (position - (bucket * LOG2_BITS_PER_WORD));
|
50
|
+
mask = 0x1f << shift;
|
51
|
+
|
52
|
+
current_value = @values[bucket] & mask
|
53
|
+
new_value = value << shift
|
54
|
+
if current_value < new_value
|
55
|
+
@values[bucket] = (@values[bucket] & ~mask) | new_value
|
56
|
+
true
|
57
|
+
else
|
58
|
+
false
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def merge(other)
|
63
|
+
@size.times do |bucket|
|
64
|
+
word = 0
|
65
|
+
LOG2_BITS_PER_WORD.times do |j|
|
66
|
+
mask = 0x1f << (REGISTER_SIZE * j);
|
67
|
+
|
68
|
+
this_val = self.values[bucket] & mask
|
69
|
+
other_val = other.values[bucket] & mask
|
70
|
+
word |= [this_val, other_val].max
|
71
|
+
end
|
72
|
+
|
73
|
+
@values[bucket] = word
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def serialize
|
78
|
+
@values.pack("N*")
|
79
|
+
end
|
80
|
+
|
81
|
+
protected
|
82
|
+
def values
|
83
|
+
@values
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require 'hyperll/hyper_log_log'
|
2
|
+
|
3
|
+
module Hyperll
|
4
|
+
describe HyperLogLog do
|
5
|
+
it 'computes cardinality' do
|
6
|
+
hll = HyperLogLog.new(16)
|
7
|
+
hll.offer(0)
|
8
|
+
hll.offer(1)
|
9
|
+
hll.offer(2)
|
10
|
+
hll.offer(3)
|
11
|
+
hll.offer(16)
|
12
|
+
hll.offer(17)
|
13
|
+
hll.offer(18)
|
14
|
+
hll.offer(19)
|
15
|
+
hll.offer(19)
|
16
|
+
|
17
|
+
expect(hll.cardinality).to eq(8)
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'is accurate within an expected amount for high cardinalities' do
|
21
|
+
hll = HyperLogLog.new(10)
|
22
|
+
|
23
|
+
size = 1_000_000
|
24
|
+
size.times do
|
25
|
+
hll.offer(rand(2**63))
|
26
|
+
end
|
27
|
+
|
28
|
+
expect(hll.cardinality).to be_within(10).percent_of(size)
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'merges with other hyperloglog instances' do
|
32
|
+
size = 100_000
|
33
|
+
hlls = Array.new(5) do
|
34
|
+
HyperLogLog.new(16).tap { |hll|
|
35
|
+
size.times { hll.offer(rand(2**63)) }
|
36
|
+
}
|
37
|
+
end
|
38
|
+
|
39
|
+
merged = HyperLogLog.new(16)
|
40
|
+
merged.merge(*hlls)
|
41
|
+
|
42
|
+
expect(merged.cardinality).to be_within(10).percent_of(size * hlls.length)
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'serializes to a string' do
|
46
|
+
hll = HyperLogLog.new(4)
|
47
|
+
hll.offer(1)
|
48
|
+
hll.offer(2)
|
49
|
+
|
50
|
+
# h = Java::com::clearspring::analytics::stream::cardinality::HyperLogLog.new(4)
|
51
|
+
# h.offer(1)
|
52
|
+
# h.offer(2)
|
53
|
+
# h.getBytes()
|
54
|
+
expect(hll.serialize.unpack("C*")).to eq(
|
55
|
+
[0, 0, 0, 4, 0, 0, 0, 12, 2, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0]
|
56
|
+
)
|
57
|
+
end
|
58
|
+
|
59
|
+
it 'unserializes from a string' do
|
60
|
+
serialized = [0, 0, 0, 4, 0, 0, 0, 12, 2, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0].pack("C*")
|
61
|
+
hll = HyperLogLog.unserialize(serialized)
|
62
|
+
|
63
|
+
expect(hll.cardinality).to eq(2)
|
64
|
+
hll.offer(1)
|
65
|
+
hll.offer(2)
|
66
|
+
hll.offer(3)
|
67
|
+
expect(hll.cardinality).to eq(3)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'hyperll/murmur_hash'
|
2
|
+
|
3
|
+
module Hyperll
|
4
|
+
describe MurmurHash do
|
5
|
+
it 'hashes integers' do
|
6
|
+
# java.lang.Integer.toHexString(Java::com::clearspring::analytics::hash::MurmurHash.hash(1))
|
7
|
+
expect(MurmurHash.hash(1)).to eq("5b04c018".to_i(16))
|
8
|
+
expect(MurmurHash.hash(2)).to eq("86e25492".to_i(16))
|
9
|
+
expect(MurmurHash.hash(1000)).to eq("a373b8db".to_i(16))
|
10
|
+
expect(MurmurHash.hash(5000)).to eq("5e1abaac".to_i(16))
|
11
|
+
expect(MurmurHash.hash(18_000_000)).to eq("347b61c7".to_i(16))
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'hashes integers larger than 32 bits' do
|
15
|
+
expect(MurmurHash.hash(2 ** 33)).to eq("ab332279".to_i(16))
|
16
|
+
expect(MurmurHash.hash((2 ** 36) - 1)).to eq("db264be3".to_i(16))
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'hashes strings' do
|
20
|
+
# java.lang.Integer.toHexString(Java::com::clearspring::analytics::hash::MurmurHash.hash("abc123"))
|
21
|
+
expect(MurmurHash.hash("abc123")).to eq("375301eb".to_i(16))
|
22
|
+
expect(MurmurHash.hash("The quick brown fox jumped over the lazy dog")).to eq("fe639b68".to_i(16))
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
require 'hyperll/register_set'
|
2
|
+
|
3
|
+
module Hyperll
|
4
|
+
describe RegisterSet do
|
5
|
+
it "retrieves previously set values" do
|
6
|
+
rs = RegisterSet.new(2 ** 4)
|
7
|
+
|
8
|
+
rs[0] = 11
|
9
|
+
expect(rs[0]).to eq(11)
|
10
|
+
end
|
11
|
+
|
12
|
+
it "retrieves previously set values for small bits" do
|
13
|
+
rs = RegisterSet.new(6)
|
14
|
+
|
15
|
+
rs[0] = 11
|
16
|
+
expect(rs[0]).to eq(11)
|
17
|
+
end
|
18
|
+
|
19
|
+
it "merges with other register sets" do
|
20
|
+
rand = Random.new(2)
|
21
|
+
count = 32
|
22
|
+
rs = RegisterSet.new(count)
|
23
|
+
|
24
|
+
rss = []
|
25
|
+
5.times do |i|
|
26
|
+
rss[i] = RegisterSet.new(count)
|
27
|
+
count.times do |pos|
|
28
|
+
val = rand.rand(10)
|
29
|
+
rs.update_if_greater(pos, val)
|
30
|
+
rss[i][pos] = val
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
merged = RegisterSet.new(count)
|
35
|
+
rss.each do |set|
|
36
|
+
merged.merge(set)
|
37
|
+
end
|
38
|
+
|
39
|
+
rs.each_with_index do |value, index|
|
40
|
+
expect(value).to eq(merged[index])
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
it "merges with other register sets using update_if_greater" do
|
45
|
+
rand = Random.new(2)
|
46
|
+
count = 32
|
47
|
+
rs = RegisterSet.new(count)
|
48
|
+
|
49
|
+
rss = []
|
50
|
+
5.times do |i|
|
51
|
+
rss[i] = RegisterSet.new(count)
|
52
|
+
count.times do |pos|
|
53
|
+
val = rand.rand(10)
|
54
|
+
rs.update_if_greater(pos, val)
|
55
|
+
rss[i][pos] = val
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
merged = RegisterSet.new(count)
|
60
|
+
rss.each do |set|
|
61
|
+
set.each_with_index do |value, index|
|
62
|
+
merged.update_if_greater(index, value)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
rs.each_with_index do |value, index|
|
67
|
+
expect(value).to eq(merged[index])
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
# This file was generated by the `rspec --init` command. Conventionally, all
|
2
|
+
# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
|
3
|
+
# Require this file using `require "spec_helper"` to ensure that it is only
|
4
|
+
# loaded once.
|
5
|
+
#
|
6
|
+
# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
|
7
|
+
RSpec.configure do |config|
|
8
|
+
config.treat_symbols_as_metadata_keys_with_true_values = true
|
9
|
+
config.run_all_when_everything_filtered = true
|
10
|
+
config.filter_run :focus
|
11
|
+
|
12
|
+
# Run specs in random order to surface order dependencies. If you find an
|
13
|
+
# order dependency and want to debug it, you can fix the order by providing
|
14
|
+
# the seed, which is printed after each run.
|
15
|
+
# --seed 1234
|
16
|
+
config.order = 'random'
|
17
|
+
end
|
metadata
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: hyperll
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Andy Lindeman
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-10-24 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.3'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '2.14'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '2.14'
|
55
|
+
description: HyperLogLog implementation in pure Ruby
|
56
|
+
email:
|
57
|
+
- andy@andylindeman.com
|
58
|
+
executables: []
|
59
|
+
extensions: []
|
60
|
+
extra_rdoc_files: []
|
61
|
+
files:
|
62
|
+
- .gitignore
|
63
|
+
- .rspec
|
64
|
+
- Gemfile
|
65
|
+
- LICENSE.txt
|
66
|
+
- README.md
|
67
|
+
- Rakefile
|
68
|
+
- hyperll.gemspec
|
69
|
+
- lib/hyperll.rb
|
70
|
+
- lib/hyperll/hyper_log_log.rb
|
71
|
+
- lib/hyperll/murmur_hash.rb
|
72
|
+
- lib/hyperll/register_set.rb
|
73
|
+
- lib/hyperll/version.rb
|
74
|
+
- spec/hyperll/hyper_log_log_spec.rb
|
75
|
+
- spec/hyperll/murmur_hash_spec.rb
|
76
|
+
- spec/hyperll/register_set_spec.rb
|
77
|
+
- spec/spec_helper.rb
|
78
|
+
homepage: https://github.com/alindeman/hyperll
|
79
|
+
licenses:
|
80
|
+
- MIT
|
81
|
+
metadata: {}
|
82
|
+
post_install_message:
|
83
|
+
rdoc_options: []
|
84
|
+
require_paths:
|
85
|
+
- lib
|
86
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
87
|
+
requirements:
|
88
|
+
- - '>='
|
89
|
+
- !ruby/object:Gem::Version
|
90
|
+
version: '0'
|
91
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
92
|
+
requirements:
|
93
|
+
- - '>='
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: '0'
|
96
|
+
requirements: []
|
97
|
+
rubyforge_project:
|
98
|
+
rubygems_version: 2.0.3
|
99
|
+
signing_key:
|
100
|
+
specification_version: 4
|
101
|
+
summary: HyperLogLog implementation in pure Ruby
|
102
|
+
test_files:
|
103
|
+
- spec/hyperll/hyper_log_log_spec.rb
|
104
|
+
- spec/hyperll/murmur_hash_spec.rb
|
105
|
+
- spec/hyperll/register_set_spec.rb
|
106
|
+
- spec/spec_helper.rb
|