hyperll 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/.rspec +2 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +43 -0
- data/Rakefile +5 -0
- data/hyperll.gemspec +24 -0
- data/lib/hyperll.rb +2 -0
- data/lib/hyperll/hyper_log_log.rb +86 -0
- data/lib/hyperll/murmur_hash.rb +94 -0
- data/lib/hyperll/register_set.rb +86 -0
- data/lib/hyperll/version.rb +3 -0
- data/spec/hyperll/hyper_log_log_spec.rb +70 -0
- data/spec/hyperll/murmur_hash_spec.rb +25 -0
- data/spec/hyperll/register_set_spec.rb +71 -0
- data/spec/spec_helper.rb +17 -0
- metadata +106 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 08b6c537f72013125c31cab21a5fc92592d5e3fc
|
4
|
+
data.tar.gz: 4ceb4deff0578637df2c0330347fc530b676aa81
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 61bb663e66ccf133d5db20518961867efb350f2a372227de0f49c1c28ac32484c7fd609024274d6fbea9995a39a8ce9e47e8977ae5b91c2c05242dbc1365849d
|
7
|
+
data.tar.gz: 3ce7d45798faf5c98a779767760fd325edbe015fe7e8537a063aa25b00d85e123006f3d1be6158ebd023904e0365b18a4a4b195cae32b2cbc22d3a5026700f69
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Andy Lindeman
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
# Hyperll
|
2
|
+
|
3
|
+
HyperLogLog implementation in pure Ruby
|
4
|
+
|
5
|
+
## Usage
|
6
|
+
|
7
|
+
HyperLogLog stores an estimation of the cardinality of a set. It can be merged
|
8
|
+
with other HyperLogLog instances.
|
9
|
+
|
10
|
+
```ruby
|
11
|
+
hll = Hyperll::HyperLogLog.new(10)
|
12
|
+
hll.offer(1)
|
13
|
+
hll.offer(2)
|
14
|
+
hll.offer(3)
|
15
|
+
hll.cardinality # => 3
|
16
|
+
|
17
|
+
hll2 = Hyperll::HyperLogLog.new(10)
|
18
|
+
hll2.offer(3)
|
19
|
+
hll2.offer(4)
|
20
|
+
hll2.offer(5)
|
21
|
+
hll.cardinality # => 3
|
22
|
+
|
23
|
+
merged = Hyperll::HyperLogLog.new(10)
|
24
|
+
merged.merge(hll, hll2)
|
25
|
+
merged.cardinality # => 5
|
26
|
+
```
|
27
|
+
|
28
|
+
### Serialization
|
29
|
+
|
30
|
+
HyperLogLog can be serialized to a binary string. It is compatible with the
|
31
|
+
binary format from the Java [stream-lib](https://github.com/addthis/stream-lib)
|
32
|
+
library.
|
33
|
+
|
34
|
+
```ruby
|
35
|
+
hll = Hyperll::HyperLogLog.new(4)
|
36
|
+
hll.offer(1)
|
37
|
+
hll.offer(2)
|
38
|
+
hll.offer(3)
|
39
|
+
hll.serialize # => "\x00\x00\x00\x04\x00\x00\x00\f\x02\x00\x00\x00\x00\x00\x88\x00\x00\x00\x00\x00"
|
40
|
+
|
41
|
+
hll2 = Hyperll::HyperLogLog.unserialize("\x00\x00\x00\x04\x00\x00\x00\f\x02\x00\x00\x00\x00\x00\x88\x00\x00\x00\x00\x00")
|
42
|
+
hll2.cardinality # => 3
|
43
|
+
```
|
data/Rakefile
ADDED
data/hyperll.gemspec
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'hyperll/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "hyperll"
|
8
|
+
spec.version = Hyperll::VERSION
|
9
|
+
spec.authors = ["Andy Lindeman"]
|
10
|
+
spec.email = ["andy@andylindeman.com"]
|
11
|
+
spec.description = %q{HyperLogLog implementation in pure Ruby}
|
12
|
+
spec.summary = %q{HyperLogLog implementation in pure Ruby}
|
13
|
+
spec.homepage = "https://github.com/alindeman/hyperll"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
22
|
+
spec.add_development_dependency "rake"
|
23
|
+
spec.add_development_dependency "rspec", "~> 2.14"
|
24
|
+
end
|
data/lib/hyperll.rb
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
require_relative 'register_set'
|
2
|
+
require_relative 'murmur_hash'
|
3
|
+
|
4
|
+
module Hyperll
|
5
|
+
class HyperLogLog
|
6
|
+
INT_SIZE = 32
|
7
|
+
INT_HASH = 0xFFFFFFFF
|
8
|
+
|
9
|
+
attr_reader :log2m
|
10
|
+
|
11
|
+
def self.unserialize(serialized)
|
12
|
+
log2m, rs_size, *rs_values = serialized.unpack("N*")
|
13
|
+
new(log2m, RegisterSet.new(2 ** log2m, rs_values))
|
14
|
+
end
|
15
|
+
|
16
|
+
# Constructs a new HyperLogLog instance
|
17
|
+
#
|
18
|
+
# log2m - accuracy of the counter; larger values are more accurate
|
19
|
+
def initialize(log2m, register_set = nil)
|
20
|
+
@log2m = log2m
|
21
|
+
@count = 2 ** log2m
|
22
|
+
@register_set = register_set || RegisterSet.new(@count)
|
23
|
+
|
24
|
+
case log2m
|
25
|
+
when 4
|
26
|
+
@alphaMM = 0.673 * @count * @count
|
27
|
+
when 5
|
28
|
+
@alphaMM = 0.697 * @count * @count
|
29
|
+
when 6
|
30
|
+
@alphaMM = 0.709 * @count * @count
|
31
|
+
else
|
32
|
+
@alphaMM = (0.7213 / (1 + 1.079 / @count)) * @count * @count
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def offer(obj)
|
37
|
+
offer_hashed(MurmurHash.hash(obj))
|
38
|
+
end
|
39
|
+
|
40
|
+
def offer_hashed(value)
|
41
|
+
j = value >> (INT_SIZE - @log2m)
|
42
|
+
r = number_of_leading_zeros(((value << @log2m) & INT_HASH) | (1 << (@log2m - 1)) + 1) + 1
|
43
|
+
@register_set.update_if_greater(j, r)
|
44
|
+
end
|
45
|
+
|
46
|
+
def cardinality
|
47
|
+
register_sum = 0.0
|
48
|
+
zeros = 0.0
|
49
|
+
@register_set.each do |value|
|
50
|
+
register_sum += 1.0 / (1 << value)
|
51
|
+
zeros += 1 if value == 0
|
52
|
+
end
|
53
|
+
|
54
|
+
estimate = @alphaMM * (1 / register_sum)
|
55
|
+
if estimate <= (5.0 / 2.0) * @count
|
56
|
+
# small range estimate
|
57
|
+
(@count * Math.log(@count / zeros)).round
|
58
|
+
else
|
59
|
+
estimate.round
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def merge(*others)
|
64
|
+
raise "Cannot merge hyperloglogs of different sizes" unless others.all? { |o| o.log2m == log2m }
|
65
|
+
|
66
|
+
others.each do |other|
|
67
|
+
@register_set.merge(other.register_set)
|
68
|
+
end
|
69
|
+
|
70
|
+
self
|
71
|
+
end
|
72
|
+
|
73
|
+
def serialize
|
74
|
+
[@log2m, @register_set.size * 4].pack("N*") + @register_set.serialize
|
75
|
+
end
|
76
|
+
|
77
|
+
protected
|
78
|
+
def number_of_leading_zeros(int)
|
79
|
+
-(Math.log2(int).to_i - 31)
|
80
|
+
end
|
81
|
+
|
82
|
+
def register_set
|
83
|
+
@register_set
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
module Hyperll
|
2
|
+
# Adapted from <https://github.com/addthis/stream-lib/blob/master/src/main/java/com/clearspring/analytics/hash/MurmurHash.java>
|
3
|
+
class MurmurHash
|
4
|
+
INT_MASK = 0xFFFFFFFF
|
5
|
+
|
6
|
+
def self.hash(obj)
|
7
|
+
if Integer === obj
|
8
|
+
hash_int(obj)
|
9
|
+
else
|
10
|
+
hash_string(obj.to_s)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.hash_int(data)
|
15
|
+
m = 0x5bd1e995
|
16
|
+
r = 24
|
17
|
+
|
18
|
+
h = 0
|
19
|
+
|
20
|
+
k = (data * m) & INT_MASK
|
21
|
+
k ^= k >> r
|
22
|
+
h ^= (k * m) & INT_MASK
|
23
|
+
|
24
|
+
k = ((data >> 32) * m) & INT_MASK
|
25
|
+
k ^= k >> r
|
26
|
+
h *= m
|
27
|
+
h ^= k * m
|
28
|
+
h &= INT_MASK
|
29
|
+
|
30
|
+
h ^= h >> 13
|
31
|
+
h *= m
|
32
|
+
h &= INT_MASK
|
33
|
+
h ^= h >> 15
|
34
|
+
|
35
|
+
h & INT_MASK
|
36
|
+
end
|
37
|
+
private_class_method :hash_int
|
38
|
+
|
39
|
+
def self.hash_string(str, seed = -1)
|
40
|
+
data = str.bytes
|
41
|
+
length = data.length
|
42
|
+
|
43
|
+
m = 0x5bd1e995
|
44
|
+
r = 24
|
45
|
+
|
46
|
+
h = seed ^ length
|
47
|
+
len_4 = length >> 2
|
48
|
+
|
49
|
+
0.upto(len_4 - 1) do |i|
|
50
|
+
i_4 = i << 2
|
51
|
+
k = data[i_4 + 3]
|
52
|
+
k = k << 8
|
53
|
+
k = k | (data[i_4 + 2] & 0xff)
|
54
|
+
k = k << 8
|
55
|
+
k = k | (data[i_4 + 1] & 0xff)
|
56
|
+
k = k << 8
|
57
|
+
k = k | (data[i_4 + 0] & 0xff)
|
58
|
+
k *= m
|
59
|
+
k &= INT_MASK
|
60
|
+
k ^= k >> r
|
61
|
+
k *= m
|
62
|
+
h *= m
|
63
|
+
h ^= k
|
64
|
+
h &= INT_MASK
|
65
|
+
end
|
66
|
+
|
67
|
+
len_m = len_4 << 2
|
68
|
+
left = length - len_m
|
69
|
+
|
70
|
+
if left != 0
|
71
|
+
if left >= 3
|
72
|
+
h ^= data[length - 3] << 16
|
73
|
+
end
|
74
|
+
if left >= 2
|
75
|
+
h ^= data[length - 2] << 8
|
76
|
+
end
|
77
|
+
if left >= 1
|
78
|
+
h ^= data[length - 1]
|
79
|
+
end
|
80
|
+
|
81
|
+
h *= m
|
82
|
+
h &= INT_MASK
|
83
|
+
end
|
84
|
+
|
85
|
+
h ^= h >> 13
|
86
|
+
h *= m
|
87
|
+
h &= INT_MASK
|
88
|
+
h ^= h >> 15
|
89
|
+
|
90
|
+
h
|
91
|
+
end
|
92
|
+
private_class_method :hash_string
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
module Hyperll
|
2
|
+
class RegisterSet
|
3
|
+
include Enumerable
|
4
|
+
|
5
|
+
LOG2_BITS_PER_WORD = 6
|
6
|
+
REGISTER_SIZE = 5
|
7
|
+
INTEGER_SIZE = 32
|
8
|
+
|
9
|
+
attr_reader :count, :size
|
10
|
+
|
11
|
+
def initialize(count, values = nil)
|
12
|
+
@count = count
|
13
|
+
|
14
|
+
@bits = count / LOG2_BITS_PER_WORD
|
15
|
+
if @bits.zero?
|
16
|
+
@size = 1
|
17
|
+
elsif (@bits % INTEGER_SIZE).zero?
|
18
|
+
@size = @bits
|
19
|
+
else
|
20
|
+
@size = @bits + 1
|
21
|
+
end
|
22
|
+
|
23
|
+
@values = values || Array.new(@size, 0)
|
24
|
+
end
|
25
|
+
|
26
|
+
def []=(position, value)
|
27
|
+
bucket = position / LOG2_BITS_PER_WORD
|
28
|
+
shift = REGISTER_SIZE * (position - (bucket * LOG2_BITS_PER_WORD))
|
29
|
+
|
30
|
+
@values[bucket] = (@values[bucket] & ~(0x1f << shift)) | (value << shift)
|
31
|
+
end
|
32
|
+
|
33
|
+
def [](position)
|
34
|
+
bucket = position / LOG2_BITS_PER_WORD
|
35
|
+
shift = REGISTER_SIZE * (position - (bucket * LOG2_BITS_PER_WORD))
|
36
|
+
|
37
|
+
return (@values[bucket] & (0x1f << shift)) >> shift
|
38
|
+
end
|
39
|
+
|
40
|
+
def each
|
41
|
+
return enum_for(:each) unless block_given?
|
42
|
+
@count.times do |i|
|
43
|
+
yield self[i]
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def update_if_greater(position, value)
|
48
|
+
bucket = position / LOG2_BITS_PER_WORD
|
49
|
+
shift = REGISTER_SIZE * (position - (bucket * LOG2_BITS_PER_WORD));
|
50
|
+
mask = 0x1f << shift;
|
51
|
+
|
52
|
+
current_value = @values[bucket] & mask
|
53
|
+
new_value = value << shift
|
54
|
+
if current_value < new_value
|
55
|
+
@values[bucket] = (@values[bucket] & ~mask) | new_value
|
56
|
+
true
|
57
|
+
else
|
58
|
+
false
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def merge(other)
|
63
|
+
@size.times do |bucket|
|
64
|
+
word = 0
|
65
|
+
LOG2_BITS_PER_WORD.times do |j|
|
66
|
+
mask = 0x1f << (REGISTER_SIZE * j);
|
67
|
+
|
68
|
+
this_val = self.values[bucket] & mask
|
69
|
+
other_val = other.values[bucket] & mask
|
70
|
+
word |= [this_val, other_val].max
|
71
|
+
end
|
72
|
+
|
73
|
+
@values[bucket] = word
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def serialize
|
78
|
+
@values.pack("N*")
|
79
|
+
end
|
80
|
+
|
81
|
+
protected
|
82
|
+
def values
|
83
|
+
@values
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require 'hyperll/hyper_log_log'
|
2
|
+
|
3
|
+
module Hyperll
|
4
|
+
describe HyperLogLog do
|
5
|
+
it 'computes cardinality' do
|
6
|
+
hll = HyperLogLog.new(16)
|
7
|
+
hll.offer(0)
|
8
|
+
hll.offer(1)
|
9
|
+
hll.offer(2)
|
10
|
+
hll.offer(3)
|
11
|
+
hll.offer(16)
|
12
|
+
hll.offer(17)
|
13
|
+
hll.offer(18)
|
14
|
+
hll.offer(19)
|
15
|
+
hll.offer(19)
|
16
|
+
|
17
|
+
expect(hll.cardinality).to eq(8)
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'is accurate within an expected amount for high cardinalities' do
|
21
|
+
hll = HyperLogLog.new(10)
|
22
|
+
|
23
|
+
size = 1_000_000
|
24
|
+
size.times do
|
25
|
+
hll.offer(rand(2**63))
|
26
|
+
end
|
27
|
+
|
28
|
+
expect(hll.cardinality).to be_within(10).percent_of(size)
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'merges with other hyperloglog instances' do
|
32
|
+
size = 100_000
|
33
|
+
hlls = Array.new(5) do
|
34
|
+
HyperLogLog.new(16).tap { |hll|
|
35
|
+
size.times { hll.offer(rand(2**63)) }
|
36
|
+
}
|
37
|
+
end
|
38
|
+
|
39
|
+
merged = HyperLogLog.new(16)
|
40
|
+
merged.merge(*hlls)
|
41
|
+
|
42
|
+
expect(merged.cardinality).to be_within(10).percent_of(size * hlls.length)
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'serializes to a string' do
|
46
|
+
hll = HyperLogLog.new(4)
|
47
|
+
hll.offer(1)
|
48
|
+
hll.offer(2)
|
49
|
+
|
50
|
+
# h = Java::com::clearspring::analytics::stream::cardinality::HyperLogLog.new(4)
|
51
|
+
# h.offer(1)
|
52
|
+
# h.offer(2)
|
53
|
+
# h.getBytes()
|
54
|
+
expect(hll.serialize.unpack("C*")).to eq(
|
55
|
+
[0, 0, 0, 4, 0, 0, 0, 12, 2, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0]
|
56
|
+
)
|
57
|
+
end
|
58
|
+
|
59
|
+
it 'unserializes from a string' do
|
60
|
+
serialized = [0, 0, 0, 4, 0, 0, 0, 12, 2, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0].pack("C*")
|
61
|
+
hll = HyperLogLog.unserialize(serialized)
|
62
|
+
|
63
|
+
expect(hll.cardinality).to eq(2)
|
64
|
+
hll.offer(1)
|
65
|
+
hll.offer(2)
|
66
|
+
hll.offer(3)
|
67
|
+
expect(hll.cardinality).to eq(3)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'hyperll/murmur_hash'
|
2
|
+
|
3
|
+
module Hyperll
|
4
|
+
describe MurmurHash do
|
5
|
+
it 'hashes integers' do
|
6
|
+
# java.lang.Integer.toHexString(Java::com::clearspring::analytics::hash::MurmurHash.hash(1))
|
7
|
+
expect(MurmurHash.hash(1)).to eq("5b04c018".to_i(16))
|
8
|
+
expect(MurmurHash.hash(2)).to eq("86e25492".to_i(16))
|
9
|
+
expect(MurmurHash.hash(1000)).to eq("a373b8db".to_i(16))
|
10
|
+
expect(MurmurHash.hash(5000)).to eq("5e1abaac".to_i(16))
|
11
|
+
expect(MurmurHash.hash(18_000_000)).to eq("347b61c7".to_i(16))
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'hashes integers larger than 32 bits' do
|
15
|
+
expect(MurmurHash.hash(2 ** 33)).to eq("ab332279".to_i(16))
|
16
|
+
expect(MurmurHash.hash((2 ** 36) - 1)).to eq("db264be3".to_i(16))
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'hashes strings' do
|
20
|
+
# java.lang.Integer.toHexString(Java::com::clearspring::analytics::hash::MurmurHash.hash("abc123"))
|
21
|
+
expect(MurmurHash.hash("abc123")).to eq("375301eb".to_i(16))
|
22
|
+
expect(MurmurHash.hash("The quick brown fox jumped over the lazy dog")).to eq("fe639b68".to_i(16))
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
require 'hyperll/register_set'
|
2
|
+
|
3
|
+
module Hyperll
|
4
|
+
describe RegisterSet do
|
5
|
+
it "retrieves previously set values" do
|
6
|
+
rs = RegisterSet.new(2 ** 4)
|
7
|
+
|
8
|
+
rs[0] = 11
|
9
|
+
expect(rs[0]).to eq(11)
|
10
|
+
end
|
11
|
+
|
12
|
+
it "retrieves previously set values for small bits" do
|
13
|
+
rs = RegisterSet.new(6)
|
14
|
+
|
15
|
+
rs[0] = 11
|
16
|
+
expect(rs[0]).to eq(11)
|
17
|
+
end
|
18
|
+
|
19
|
+
it "merges with other register sets" do
|
20
|
+
rand = Random.new(2)
|
21
|
+
count = 32
|
22
|
+
rs = RegisterSet.new(count)
|
23
|
+
|
24
|
+
rss = []
|
25
|
+
5.times do |i|
|
26
|
+
rss[i] = RegisterSet.new(count)
|
27
|
+
count.times do |pos|
|
28
|
+
val = rand.rand(10)
|
29
|
+
rs.update_if_greater(pos, val)
|
30
|
+
rss[i][pos] = val
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
merged = RegisterSet.new(count)
|
35
|
+
rss.each do |set|
|
36
|
+
merged.merge(set)
|
37
|
+
end
|
38
|
+
|
39
|
+
rs.each_with_index do |value, index|
|
40
|
+
expect(value).to eq(merged[index])
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
it "merges with other register sets using update_if_greater" do
|
45
|
+
rand = Random.new(2)
|
46
|
+
count = 32
|
47
|
+
rs = RegisterSet.new(count)
|
48
|
+
|
49
|
+
rss = []
|
50
|
+
5.times do |i|
|
51
|
+
rss[i] = RegisterSet.new(count)
|
52
|
+
count.times do |pos|
|
53
|
+
val = rand.rand(10)
|
54
|
+
rs.update_if_greater(pos, val)
|
55
|
+
rss[i][pos] = val
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
merged = RegisterSet.new(count)
|
60
|
+
rss.each do |set|
|
61
|
+
set.each_with_index do |value, index|
|
62
|
+
merged.update_if_greater(index, value)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
rs.each_with_index do |value, index|
|
67
|
+
expect(value).to eq(merged[index])
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
# This file was generated by the `rspec --init` command. Conventionally, all
|
2
|
+
# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
|
3
|
+
# Require this file using `require "spec_helper"` to ensure that it is only
|
4
|
+
# loaded once.
|
5
|
+
#
|
6
|
+
# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
|
7
|
+
RSpec.configure do |config|
|
8
|
+
config.treat_symbols_as_metadata_keys_with_true_values = true
|
9
|
+
config.run_all_when_everything_filtered = true
|
10
|
+
config.filter_run :focus
|
11
|
+
|
12
|
+
# Run specs in random order to surface order dependencies. If you find an
|
13
|
+
# order dependency and want to debug it, you can fix the order by providing
|
14
|
+
# the seed, which is printed after each run.
|
15
|
+
# --seed 1234
|
16
|
+
config.order = 'random'
|
17
|
+
end
|
metadata
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: hyperll
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Andy Lindeman
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-10-24 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.3'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '2.14'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '2.14'
|
55
|
+
description: HyperLogLog implementation in pure Ruby
|
56
|
+
email:
|
57
|
+
- andy@andylindeman.com
|
58
|
+
executables: []
|
59
|
+
extensions: []
|
60
|
+
extra_rdoc_files: []
|
61
|
+
files:
|
62
|
+
- .gitignore
|
63
|
+
- .rspec
|
64
|
+
- Gemfile
|
65
|
+
- LICENSE.txt
|
66
|
+
- README.md
|
67
|
+
- Rakefile
|
68
|
+
- hyperll.gemspec
|
69
|
+
- lib/hyperll.rb
|
70
|
+
- lib/hyperll/hyper_log_log.rb
|
71
|
+
- lib/hyperll/murmur_hash.rb
|
72
|
+
- lib/hyperll/register_set.rb
|
73
|
+
- lib/hyperll/version.rb
|
74
|
+
- spec/hyperll/hyper_log_log_spec.rb
|
75
|
+
- spec/hyperll/murmur_hash_spec.rb
|
76
|
+
- spec/hyperll/register_set_spec.rb
|
77
|
+
- spec/spec_helper.rb
|
78
|
+
homepage: https://github.com/alindeman/hyperll
|
79
|
+
licenses:
|
80
|
+
- MIT
|
81
|
+
metadata: {}
|
82
|
+
post_install_message:
|
83
|
+
rdoc_options: []
|
84
|
+
require_paths:
|
85
|
+
- lib
|
86
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
87
|
+
requirements:
|
88
|
+
- - '>='
|
89
|
+
- !ruby/object:Gem::Version
|
90
|
+
version: '0'
|
91
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
92
|
+
requirements:
|
93
|
+
- - '>='
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: '0'
|
96
|
+
requirements: []
|
97
|
+
rubyforge_project:
|
98
|
+
rubygems_version: 2.0.3
|
99
|
+
signing_key:
|
100
|
+
specification_version: 4
|
101
|
+
summary: HyperLogLog implementation in pure Ruby
|
102
|
+
test_files:
|
103
|
+
- spec/hyperll/hyper_log_log_spec.rb
|
104
|
+
- spec/hyperll/murmur_hash_spec.rb
|
105
|
+
- spec/hyperll/register_set_spec.rb
|
106
|
+
- spec/spec_helper.rb
|