hyperll 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/hyperll/delta_bytes.rb +9 -0
- data/lib/hyperll/hyper_log_log_plus.rb +19 -0
- data/lib/hyperll/varint.rb +11 -0
- data/lib/hyperll/version.rb +1 -1
- data/spec/hyperll/delta_bytes_spec.rb +5 -1
- data/spec/hyperll/hyper_log_log_plus_spec.rb +32 -0
- data/spec/hyperll/varint_spec.rb +6 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 82af95ec5cf82a356d3b5c1252c089509fa08193
|
4
|
+
data.tar.gz: d46c3121f220ad661fa1c4a6a6abaf3e31800726
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 91892e69eb09d3c41450b32e8f52112597190dd1f0664d762e477d33e7506ecac40b519d2f541d4c965402885839f122fab75eb40f4466bd273002bc7fe84da7
|
7
|
+
data.tar.gz: 0858d530e8520ee1765edccbac24c71d33777a24dacea7a19a1428a7eef54f2e3640e9ffba578874cba619d2bcd2375f9d41e655ea3cefce61c4511684a9b662
|
data/lib/hyperll/delta_bytes.rb
CHANGED
@@ -3,6 +3,15 @@ require_relative 'varint'
|
|
3
3
|
module Hyperll
|
4
4
|
class DeltaBytes
|
5
5
|
def self.compress(bytes)
|
6
|
+
compressed = Varint.write_unsigned_var_int(bytes.length)
|
7
|
+
previous_value = 0
|
8
|
+
|
9
|
+
bytes.each do |b|
|
10
|
+
compressed.concat(Varint.write_unsigned_var_int(b - previous_value))
|
11
|
+
previous_value = b
|
12
|
+
end
|
13
|
+
|
14
|
+
compressed
|
6
15
|
end
|
7
16
|
|
8
17
|
def self.uncompress(bytes)
|
@@ -186,6 +186,25 @@ module Hyperll
|
|
186
186
|
self
|
187
187
|
end
|
188
188
|
|
189
|
+
def serialize
|
190
|
+
str = ""
|
191
|
+
str << [-2].pack("N") # -VERSION
|
192
|
+
str << Varint.write_unsigned_var_int(p).pack("C*")
|
193
|
+
str << Varint.write_unsigned_var_int(sp).pack("C*")
|
194
|
+
|
195
|
+
case format
|
196
|
+
when :normal
|
197
|
+
str << Varint.write_unsigned_var_int(0).pack("C*")
|
198
|
+
str << Varint.write_unsigned_var_int(@register_set.size * 4).pack("C*")
|
199
|
+
str << @register_set.serialize
|
200
|
+
when :sparse
|
201
|
+
str << Varint.write_unsigned_var_int(1).pack("C*")
|
202
|
+
str << DeltaBytes.compress(@sparse_set).pack("C*")
|
203
|
+
end
|
204
|
+
|
205
|
+
str
|
206
|
+
end
|
207
|
+
|
189
208
|
protected
|
190
209
|
def sparse_set
|
191
210
|
@sparse_set
|
data/lib/hyperll/varint.rb
CHANGED
@@ -11,5 +11,16 @@ module Hyperll
|
|
11
11
|
|
12
12
|
value | (b << i)
|
13
13
|
end
|
14
|
+
|
15
|
+
def self.write_unsigned_var_int(value)
|
16
|
+
bytes = []
|
17
|
+
while (value & 0xFFFFFF80) != 0
|
18
|
+
bytes << ((value & 0x7F) | 0x80)
|
19
|
+
value >>= 7
|
20
|
+
end
|
21
|
+
|
22
|
+
bytes << (value & 0x7F)
|
23
|
+
bytes
|
24
|
+
end
|
14
25
|
end
|
15
26
|
end
|
data/lib/hyperll/version.rb
CHANGED
@@ -3,8 +3,12 @@ require 'hyperll/delta_bytes'
|
|
3
3
|
|
4
4
|
module Hyperll
|
5
5
|
describe DeltaBytes do
|
6
|
-
it '
|
6
|
+
it 'uncompresses bytes' do
|
7
7
|
expect(DeltaBytes.uncompress([2, -46, 5, -64, 4])).to eq([722, 1298])
|
8
8
|
end
|
9
|
+
|
10
|
+
it 'compresses bytes' do
|
11
|
+
expect(DeltaBytes.compress([722, 1298])).to eq([2, 256 - 46, 5, 256 - 64, 4])
|
12
|
+
end
|
9
13
|
end
|
10
14
|
end
|
@@ -18,6 +18,38 @@ module Hyperll
|
|
18
18
|
end
|
19
19
|
end
|
20
20
|
|
21
|
+
describe 'serialization' do
|
22
|
+
it 'serializes a sparse format to a string' do
|
23
|
+
# TODO: We can't yet construct a HyperLogLogPlus from scratch, so we
|
24
|
+
# deserialize a HyperLogLogPlus from stream-lib, then serialize it
|
25
|
+
# back and make sure it matches.
|
26
|
+
|
27
|
+
# h = Java::com::clearspring::analytics::stream::cardinality::HyperLogLogPlus.new(4, 10)
|
28
|
+
# h.offer(1)
|
29
|
+
# h.offer(2)
|
30
|
+
# h.getBytes()
|
31
|
+
serialized = [-1, -1, -1, -2, 4, 10, 1, 2, -46, 5, -64, 4].pack("C*")
|
32
|
+
|
33
|
+
hllp = HyperLogLogPlus.unserialize(serialized)
|
34
|
+
expect(hllp.serialize).to eq(serialized)
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'serializes a normal format to a string' do
|
38
|
+
# TODO: We can't yet construct a HyperLogLogPlus from scratch, so we
|
39
|
+
# deserialize a HyperLogLogPlus from stream-lib, then serialize it
|
40
|
+
# back and make sure it matches.
|
41
|
+
|
42
|
+
# h = Java::com::clearspring::analytics::stream::cardinality::HyperLogLogPlus.new(4)
|
43
|
+
# h.offer(1)
|
44
|
+
# h.offer(2)
|
45
|
+
# h.getBytes()
|
46
|
+
serialized = [-1, -1, -1, -2, 4, 0, 0, 12, 2, 0, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0].pack("C*")
|
47
|
+
|
48
|
+
hllp = HyperLogLogPlus.unserialize(serialized)
|
49
|
+
expect(hllp.serialize).to eq(serialized)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
21
53
|
describe 'format' do
|
22
54
|
it 'defaults to normal (non-sparse) format' do
|
23
55
|
hllp = HyperLogLogPlus.new(11)
|
data/spec/hyperll/varint_spec.rb
CHANGED
@@ -8,5 +8,11 @@ module Hyperll
|
|
8
8
|
expect(Varint.read_unsigned_var_int([0x81, 0x01])).to eq(0x81)
|
9
9
|
expect(Varint.read_unsigned_var_int([0x81, 0x81, 0x01])).to eq(0x4081)
|
10
10
|
end
|
11
|
+
|
12
|
+
it 'writes unsigned variable length integers' do
|
13
|
+
expect(Varint.write_unsigned_var_int(0x35)).to eq([0x35])
|
14
|
+
expect(Varint.write_unsigned_var_int(0x81)).to eq([0x81, 0x01])
|
15
|
+
expect(Varint.write_unsigned_var_int(0x4081)).to eq([0x81, 0x81, 0x01])
|
16
|
+
end
|
11
17
|
end
|
12
18
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hyperll
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andy Lindeman
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-10-
|
11
|
+
date: 2013-10-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|