bloom_fit 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +18 -3
- data/ext/cbloomfilter/cbloomfilter.c +1 -0
- data/ext/cbloomfilter/salts.h +43 -43
- data/lib/bloom_fit/version.rb +1 -1
- data/lib/bloom_fit.rb +29 -6
- data/lib/cbloomfilter.bundle +0 -0
- data/test/bloom_fit_test.rb +12 -3
- metadata +19 -5
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: f7685fe19ab2e07a042fd6a5aefe22994fee6be04406315328110281ccb9f2b8
|
|
4
|
+
data.tar.gz: 635b96b2029b8e3f0d63a0e978824ca1bb79bf5fbca7cdb351ad770b1b72c0ab
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a38d8af866154c03a4b39e3ccdb14f071c6ac22f82f565efed9fc3c0eaa473db2fe6bd1cce6772ceb0dc621c7c0a931e45fd45b1724f397a91cbc46e299d1cb9
|
|
7
|
+
data.tar.gz: e1a1c98656d220db7ec8fd96751d694ba36ab2e532ab7c3ee45259aa613be6e3ac449ddca3dc4ff04337ecd3064e524b9642d29d0e9e619598332e7b46b47c54
|
data/README.md
CHANGED
|
@@ -28,7 +28,7 @@ Compared with bloomfilter-rb, BloomFit:
|
|
|
28
28
|
- small Ruby API with familiar methods like `add`, `include?`, `merge`, `|`, and `&`
|
|
29
29
|
- supports strings, symbols, integers, booleans, and other values that can be converted with `to_s`
|
|
30
30
|
- manual `size` / `hashes` overrides when you want control
|
|
31
|
-
-
|
|
31
|
+
- serialize filters with msgpack via `to_msgpack`, `BloomFit.unpack`, `save`, and `BloomFit.load`
|
|
32
32
|
- inspect filter state with `stats`, `to_hex`, `to_binary`, and `bitmap`
|
|
33
33
|
|
|
34
34
|
## Requirements
|
|
@@ -201,7 +201,21 @@ reloaded.include?("cat") # => true
|
|
|
201
201
|
reloaded.include?("dog") # => true
|
|
202
202
|
```
|
|
203
203
|
|
|
204
|
-
Persistence uses Ruby `Marshal`.
|
|
204
|
+
Persistence uses msgpack, not Ruby `Marshal`.
|
|
205
|
+
|
|
206
|
+
If you want the serialized bytes directly instead of writing a file:
|
|
207
|
+
|
|
208
|
+
```ruby
|
|
209
|
+
filter = BloomFit.new(capacity: 100)
|
|
210
|
+
filter << "cat"
|
|
211
|
+
|
|
212
|
+
payload = filter.to_msgpack
|
|
213
|
+
copy = BloomFit.unpack(payload)
|
|
214
|
+
|
|
215
|
+
copy.include?("cat") # => true
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
The msgpack payload stores the filter `size`, `hashes`, and raw bitmap.
|
|
205
219
|
|
|
206
220
|
### Inspect the bitmap
|
|
207
221
|
|
|
@@ -236,7 +250,8 @@ filter.bitmap # => raw bytes from the native filter
|
|
|
236
250
|
| `set_bits`, `n` | Returns the number of bits currently set. |
|
|
237
251
|
| `stats` | Returns a human-readable summary including predicted false-positive rate. |
|
|
238
252
|
| `to_hex`, `to_binary`, `bitmap` | Returns the filter bitmap in different representations. |
|
|
239
|
-
| `
|
|
253
|
+
| `to_msgpack`, `BloomFit.unpack` | Serializes and restores a filter as msgpack bytes. |
|
|
254
|
+
| `save`, `BloomFit.load` | Persists and restores a filter using the same msgpack format. |
|
|
240
255
|
|
|
241
256
|
## Resources
|
|
242
257
|
|
data/ext/cbloomfilter/salts.h
CHANGED
|
@@ -4,47 +4,47 @@
|
|
|
4
4
|
*
|
|
5
5
|
*/
|
|
6
6
|
static unsigned int salts[] = {
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
7
|
+
0x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U, 0x706af48fU,
|
|
8
|
+
0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U, 0xe0d5e91eU, 0x97d2d988U,
|
|
9
|
+
0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U, 0x90bf1d91U, 0x1db71064U, 0x6ab020f2U,
|
|
10
|
+
0xf3b97148U, 0x84be41deU, 0x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U,
|
|
11
|
+
0x136c9856U, 0x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
|
|
12
|
+
0xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U, 0xa2677172U,
|
|
13
|
+
0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU, 0x35b5a8faU, 0x42b2986cU,
|
|
14
|
+
0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U, 0x45df5c75U, 0xdcd60dcfU, 0xabd13d59U,
|
|
15
|
+
0x26d930acU, 0x51de003aU, 0xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U,
|
|
16
|
+
0xcfba9599U, 0xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
|
|
17
|
+
0x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U, 0x01db7106U,
|
|
18
|
+
0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU, 0x9fbfe4a5U, 0xe8b8d433U,
|
|
19
|
+
0x7807c9a2U, 0x0f00f934U, 0x9609a88eU, 0xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU,
|
|
20
|
+
0x91646c97U, 0xe6635c01U, 0x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU,
|
|
21
|
+
0x6c0695edU, 0x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
|
|
22
|
+
0x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U, 0xfbd44c65U,
|
|
23
|
+
0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U, 0x4adfa541U, 0x3dd895d7U,
|
|
24
|
+
0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU, 0x346ed9fcU, 0xad678846U, 0xda60b8d0U,
|
|
25
|
+
0x44042d73U, 0x33031de5U, 0xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU,
|
|
26
|
+
0xbe0b1010U, 0xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
|
|
27
|
+
0x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U, 0x2eb40d81U,
|
|
28
|
+
0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U, 0x03b6e20cU, 0x74b1d29aU,
|
|
29
|
+
0xead54739U, 0x9dd277afU, 0x04db2615U, 0x73dc1683U, 0xe3630b12U, 0x94643b84U,
|
|
30
|
+
0x0d6d6a3eU, 0x7a6a5aa8U, 0xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U,
|
|
31
|
+
0xf00f9344U, 0x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
|
|
32
|
+
0x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU, 0x67dd4accU,
|
|
33
|
+
0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U, 0xd6d6a3e8U, 0xa1d1937eU,
|
|
34
|
+
0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U, 0xa6bc5767U, 0x3fb506ddU, 0x48b2364bU,
|
|
35
|
+
0xd80d2bdaU, 0xaf0a1b4cU, 0x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U,
|
|
36
|
+
0x316e8eefU, 0x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
|
|
37
|
+
0xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU, 0xb2bd0b28U,
|
|
38
|
+
0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U, 0x2cd99e8bU, 0x5bdeae1dU,
|
|
39
|
+
0x9b64c2b0U, 0xec63f226U, 0x756aa39cU, 0x026d930aU, 0x9c0906a9U, 0xeb0e363fU,
|
|
40
|
+
0x72076785U, 0x05005713U, 0x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U,
|
|
41
|
+
0x92d28e9bU, 0xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
|
|
42
|
+
0x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U, 0x18b74777U,
|
|
43
|
+
0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU, 0x8f659effU, 0xf862ae69U,
|
|
44
|
+
0x616bffd3U, 0x166ccf45U, 0xa00ae278U, 0xd70dd2eeU, 0x4e048354U, 0x3903b3c2U,
|
|
45
|
+
0xa7672661U, 0xd06016f7U, 0x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU,
|
|
46
|
+
0x40df0b66U, 0x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
|
|
47
|
+
0xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U, 0xcdd70693U,
|
|
48
|
+
0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U, 0x5d681b02U, 0x2a6f2b94U,
|
|
49
|
+
0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU, 0x2d02ef8dU
|
|
50
50
|
};
|
data/lib/bloom_fit/version.rb
CHANGED
data/lib/bloom_fit.rb
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
require "forwardable"
|
|
2
|
+
require "msgpack"
|
|
2
3
|
|
|
3
4
|
require "cbloomfilter"
|
|
4
5
|
require "bloom_fit/version"
|
|
@@ -12,7 +13,8 @@ require "bloom_fit/version"
|
|
|
12
13
|
#
|
|
13
14
|
# The class wraps the native +CBloomFilter+ implementation in Ruby-friendly
|
|
14
15
|
# methods such as +add+, +include?+, +merge+, +&+, and +|+. Instances can be
|
|
15
|
-
# serialized with +
|
|
16
|
+
# serialized to msgpack with +to_msgpack+ or persisted to disk with +save+ and
|
|
17
|
+
# later restored with +BloomFit.unpack+ or +BloomFit.load+.
|
|
16
18
|
#
|
|
17
19
|
# Filters can only be combined when they were created with the same +size+ and
|
|
18
20
|
# +hashes+ values; otherwise the native extension raises +ArgumentError+.
|
|
@@ -233,16 +235,37 @@ class BloomFit
|
|
|
233
235
|
[size, hashes, bitmap]
|
|
234
236
|
end
|
|
235
237
|
|
|
238
|
+
# Rebuilds a filter from the serialized data returned by +to_msgpack+.
|
|
239
|
+
#
|
|
240
|
+
# The payload stores +size+, +hashes+, and the raw bitmap in msgpack format,
|
|
241
|
+
# making it suitable for compact transport or persistence outside Ruby's
|
|
242
|
+
# +Marshal+.
|
|
243
|
+
def self.unpack(msg)
|
|
244
|
+
BloomFit.allocate.tap do |bf|
|
|
245
|
+
bf.marshal_load(MessagePack.unpack(msg))
|
|
246
|
+
end
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
# Returns this filter serialized in msgpack format.
|
|
250
|
+
#
|
|
251
|
+
# The encoded payload contains the same three values as +marshal_dump+:
|
|
252
|
+
# +size+, +hashes+, and the raw bitmap.
|
|
253
|
+
def to_msgpack
|
|
254
|
+
MessagePack.pack(marshal_dump)
|
|
255
|
+
end
|
|
256
|
+
|
|
236
257
|
# Loads a filter from a file previously written by +save+.
|
|
237
258
|
#
|
|
238
|
-
# The file
|
|
239
|
-
# with trusted input.
|
|
259
|
+
# The file contents are decoded from msgpack.
|
|
240
260
|
def self.load(filename)
|
|
241
|
-
|
|
261
|
+
unpack(File.binread(filename))
|
|
242
262
|
end
|
|
243
263
|
|
|
244
|
-
# Writes the filter to +filename+ using
|
|
264
|
+
# Writes the filter to +filename+ using msgpack format.
|
|
265
|
+
#
|
|
266
|
+
# This produces a compact binary payload that can be restored with
|
|
267
|
+
# +BloomFit.load+.
|
|
245
268
|
def save(filename)
|
|
246
|
-
File.binwrite(filename,
|
|
269
|
+
File.binwrite(filename, to_msgpack)
|
|
247
270
|
end
|
|
248
271
|
end
|
data/lib/cbloomfilter.bundle
CHANGED
|
Binary file
|
data/test/bloom_fit_test.rb
CHANGED
|
@@ -358,13 +358,22 @@ class BloomFitTest < Minitest::Spec
|
|
|
358
358
|
describe "serialization" do
|
|
359
359
|
after { FileUtils.rm_f("bf.out") }
|
|
360
360
|
|
|
361
|
-
it "
|
|
361
|
+
it "packs and unpacks" do
|
|
362
|
+
bf = BloomFit.new(size: 111, hashes: 5)
|
|
363
|
+
msg = bf.to_msgpack
|
|
364
|
+
bf2 = BloomFit.unpack(msg)
|
|
365
|
+
assert_equal 111, bf2.size
|
|
366
|
+
assert_equal 5, bf2.hashes
|
|
367
|
+
assert_empty bf2
|
|
368
|
+
end
|
|
369
|
+
|
|
370
|
+
it "saves" do
|
|
362
371
|
bf = BloomFit.new
|
|
363
372
|
assert bf.save("bf.out")
|
|
364
373
|
end
|
|
365
374
|
|
|
366
375
|
it "uses binary file io" do
|
|
367
|
-
dumped =
|
|
376
|
+
dumped = subject.to_msgpack
|
|
368
377
|
writer = Minitest::Mock.new
|
|
369
378
|
writer.expect(:call, dumped.bytesize, ["bf.out", dumped])
|
|
370
379
|
|
|
@@ -385,7 +394,7 @@ class BloomFitTest < Minitest::Spec
|
|
|
385
394
|
reader.verify
|
|
386
395
|
end
|
|
387
396
|
|
|
388
|
-
it "loads
|
|
397
|
+
it "loads" do
|
|
389
398
|
subject.add("foo")
|
|
390
399
|
subject.add("bar")
|
|
391
400
|
subject.save("bf.out")
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: bloom_fit
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ryan McGeary
|
|
@@ -11,10 +11,24 @@ authors:
|
|
|
11
11
|
bindir: bin
|
|
12
12
|
cert_chain: []
|
|
13
13
|
date: 1980-01-02 00:00:00.000000000 Z
|
|
14
|
-
dependencies:
|
|
14
|
+
dependencies:
|
|
15
|
+
- !ruby/object:Gem::Dependency
|
|
16
|
+
name: msgpack
|
|
17
|
+
requirement: !ruby/object:Gem::Requirement
|
|
18
|
+
requirements:
|
|
19
|
+
- - "~>"
|
|
20
|
+
- !ruby/object:Gem::Version
|
|
21
|
+
version: '1.0'
|
|
22
|
+
type: :runtime
|
|
23
|
+
prerelease: false
|
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
25
|
+
requirements:
|
|
26
|
+
- - "~>"
|
|
27
|
+
- !ruby/object:Gem::Version
|
|
28
|
+
version: '1.0'
|
|
15
29
|
email:
|
|
16
30
|
- ryan@mcgeary.org
|
|
17
|
-
-
|
|
31
|
+
- btalayeminaei@gmail.com
|
|
18
32
|
- ilya@grigorik.com
|
|
19
33
|
- valdzone@gmail.com
|
|
20
34
|
executables: []
|
|
@@ -57,6 +71,6 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
57
71
|
requirements: []
|
|
58
72
|
rubygems_version: 4.0.9
|
|
59
73
|
specification_version: 4
|
|
60
|
-
summary:
|
|
61
|
-
|
|
74
|
+
summary: Bloom filters for Ruby with automatic sizing and a fast native in-memory
|
|
75
|
+
core, with a small, Set-like API.
|
|
62
76
|
test_files: []
|