hyperll 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/hyperll/delta_bytes.rb +9 -0
- data/lib/hyperll/hyper_log_log_plus.rb +19 -0
- data/lib/hyperll/varint.rb +11 -0
- data/lib/hyperll/version.rb +1 -1
- data/spec/hyperll/delta_bytes_spec.rb +5 -1
- data/spec/hyperll/hyper_log_log_plus_spec.rb +32 -0
- data/spec/hyperll/varint_spec.rb +6 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 82af95ec5cf82a356d3b5c1252c089509fa08193
|
4
|
+
data.tar.gz: d46c3121f220ad661fa1c4a6a6abaf3e31800726
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 91892e69eb09d3c41450b32e8f52112597190dd1f0664d762e477d33e7506ecac40b519d2f541d4c965402885839f122fab75eb40f4466bd273002bc7fe84da7
|
7
|
+
data.tar.gz: 0858d530e8520ee1765edccbac24c71d33777a24dacea7a19a1428a7eef54f2e3640e9ffba578874cba619d2bcd2375f9d41e655ea3cefce61c4511684a9b662
|
data/lib/hyperll/delta_bytes.rb
CHANGED
@@ -3,6 +3,15 @@ require_relative 'varint'
|
|
3
3
|
module Hyperll
|
4
4
|
class DeltaBytes
|
5
5
|
def self.compress(bytes)
|
6
|
+
compressed = Varint.write_unsigned_var_int(bytes.length)
|
7
|
+
previous_value = 0
|
8
|
+
|
9
|
+
bytes.each do |b|
|
10
|
+
compressed.concat(Varint.write_unsigned_var_int(b - previous_value))
|
11
|
+
previous_value = b
|
12
|
+
end
|
13
|
+
|
14
|
+
compressed
|
6
15
|
end
|
7
16
|
|
8
17
|
def self.uncompress(bytes)
|
@@ -186,6 +186,25 @@ module Hyperll
|
|
186
186
|
self
|
187
187
|
end
|
188
188
|
|
189
|
+
def serialize
|
190
|
+
str = ""
|
191
|
+
str << [-2].pack("N") # -VERSION
|
192
|
+
str << Varint.write_unsigned_var_int(p).pack("C*")
|
193
|
+
str << Varint.write_unsigned_var_int(sp).pack("C*")
|
194
|
+
|
195
|
+
case format
|
196
|
+
when :normal
|
197
|
+
str << Varint.write_unsigned_var_int(0).pack("C*")
|
198
|
+
str << Varint.write_unsigned_var_int(@register_set.size * 4).pack("C*")
|
199
|
+
str << @register_set.serialize
|
200
|
+
when :sparse
|
201
|
+
str << Varint.write_unsigned_var_int(1).pack("C*")
|
202
|
+
str << DeltaBytes.compress(@sparse_set).pack("C*")
|
203
|
+
end
|
204
|
+
|
205
|
+
str
|
206
|
+
end
|
207
|
+
|
189
208
|
protected
|
190
209
|
def sparse_set
|
191
210
|
@sparse_set
|
data/lib/hyperll/varint.rb
CHANGED
@@ -11,5 +11,16 @@ module Hyperll
|
|
11
11
|
|
12
12
|
value | (b << i)
|
13
13
|
end
|
14
|
+
|
15
|
+
def self.write_unsigned_var_int(value)
|
16
|
+
bytes = []
|
17
|
+
while (value & 0xFFFFFF80) != 0
|
18
|
+
bytes << ((value & 0x7F) | 0x80)
|
19
|
+
value >>= 7
|
20
|
+
end
|
21
|
+
|
22
|
+
bytes << (value & 0x7F)
|
23
|
+
bytes
|
24
|
+
end
|
14
25
|
end
|
15
26
|
end
|
data/lib/hyperll/version.rb
CHANGED
@@ -3,8 +3,12 @@ require 'hyperll/delta_bytes'
|
|
3
3
|
|
4
4
|
module Hyperll
|
5
5
|
describe DeltaBytes do
|
6
|
-
it '
|
6
|
+
it 'uncompresses bytes' do
|
7
7
|
expect(DeltaBytes.uncompress([2, -46, 5, -64, 4])).to eq([722, 1298])
|
8
8
|
end
|
9
|
+
|
10
|
+
it 'compresses bytes' do
|
11
|
+
expect(DeltaBytes.compress([722, 1298])).to eq([2, 256 - 46, 5, 256 - 64, 4])
|
12
|
+
end
|
9
13
|
end
|
10
14
|
end
|
@@ -18,6 +18,38 @@ module Hyperll
|
|
18
18
|
end
|
19
19
|
end
|
20
20
|
|
21
|
+
describe 'serialization' do
|
22
|
+
it 'serializes a sparse format to a string' do
|
23
|
+
# TODO: We can't yet construct a HyperLogLogPlus from scratch, so we
|
24
|
+
# deserialize a HyperLogLogPlus from stream-lib, then serialize it
|
25
|
+
# back and make sure it matches.
|
26
|
+
|
27
|
+
# h = Java::com::clearspring::analytics::stream::cardinality::HyperLogLogPlus.new(4, 10)
|
28
|
+
# h.offer(1)
|
29
|
+
# h.offer(2)
|
30
|
+
# h.getBytes()
|
31
|
+
serialized = [-1, -1, -1, -2, 4, 10, 1, 2, -46, 5, -64, 4].pack("C*")
|
32
|
+
|
33
|
+
hllp = HyperLogLogPlus.unserialize(serialized)
|
34
|
+
expect(hllp.serialize).to eq(serialized)
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'serializes a normal format to a string' do
|
38
|
+
# TODO: We can't yet construct a HyperLogLogPlus from scratch, so we
|
39
|
+
# deserialize a HyperLogLogPlus from stream-lib, then serialize it
|
40
|
+
# back and make sure it matches.
|
41
|
+
|
42
|
+
# h = Java::com::clearspring::analytics::stream::cardinality::HyperLogLogPlus.new(4)
|
43
|
+
# h.offer(1)
|
44
|
+
# h.offer(2)
|
45
|
+
# h.getBytes()
|
46
|
+
serialized = [-1, -1, -1, -2, 4, 0, 0, 12, 2, 0, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0].pack("C*")
|
47
|
+
|
48
|
+
hllp = HyperLogLogPlus.unserialize(serialized)
|
49
|
+
expect(hllp.serialize).to eq(serialized)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
21
53
|
describe 'format' do
|
22
54
|
it 'defaults to normal (non-sparse) format' do
|
23
55
|
hllp = HyperLogLogPlus.new(11)
|
data/spec/hyperll/varint_spec.rb
CHANGED
@@ -8,5 +8,11 @@ module Hyperll
|
|
8
8
|
expect(Varint.read_unsigned_var_int([0x81, 0x01])).to eq(0x81)
|
9
9
|
expect(Varint.read_unsigned_var_int([0x81, 0x81, 0x01])).to eq(0x4081)
|
10
10
|
end
|
11
|
+
|
12
|
+
it 'writes unsigned variable length integers' do
|
13
|
+
expect(Varint.write_unsigned_var_int(0x35)).to eq([0x35])
|
14
|
+
expect(Varint.write_unsigned_var_int(0x81)).to eq([0x81, 0x01])
|
15
|
+
expect(Varint.write_unsigned_var_int(0x4081)).to eq([0x81, 0x81, 0x01])
|
16
|
+
end
|
11
17
|
end
|
12
18
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hyperll
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andy Lindeman
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-10-
|
11
|
+
date: 2013-10-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|