bloom_fit 0.3.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +220 -47
- data/ext/cbloomfilter/cbloomfilter.c +1 -1
- data/lib/bloom_fit/version.rb +1 -1
- data/lib/bloom_fit.rb +13 -1
- data/lib/cbloomfilter.bundle +0 -0
- data/test/bloom_fit_test.rb +22 -0
- data/test/c_bloom_filter_test.rb +158 -0
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 54da887424b56d9c09e4d351125c22873bc24be3e32e96cf3716d044a0864957
|
|
4
|
+
data.tar.gz: 50780ab65355bc42c075586888f4f09ee6ce6849b16c01264d83887dc83f71a3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 53511030706f900e42050938ff80eaaaa5290c609dcd40e6b809bed6c6d491fe63bc57d4d2c1e494c0081642f85e6e29e8c5bc46cbe9cc342d8700990d910043
|
|
7
|
+
data.tar.gz: f5da69e7acebde88b41649f6dfac9925e4f021c5fb7687f442a0b61b78efd1c30423f013851c2982048ef2f3c374b0e583cca7f92352475a31ca5cddfb67fd46
|
data/README.md
CHANGED
|
@@ -1,77 +1,250 @@
|
|
|
1
|
-
# BloomFit
|
|
1
|
+
# BloomFit
|
|
2
2
|
|
|
3
|
-
[](https://rubygems.org/gems/bloom_fit)
|
|
4
4
|
[](https://github.com/rmm5t/bloom_fit/actions/workflows/ci.yml)
|
|
5
5
|
[](https://rubygems.org/gems/bloom_fit)
|
|
6
6
|
|
|
7
|
-
BloomFit
|
|
7
|
+
BloomFit is an in-memory, non-counting Bloom filter for Ruby backed by a small C extension.
|
|
8
|
+
|
|
9
|
+
It gives you a compact, Set-like API for probabilistic membership checks:
|
|
10
|
+
|
|
11
|
+
- false positives are possible
|
|
12
|
+
- false negatives are not, as long as a value was added to the same filter
|
|
13
|
+
- individual values cannot be deleted safely because the filter is non-counting
|
|
14
|
+
|
|
15
|
+
BloomFit is heavily inspired by [bloomfilter-rb]'s native implementation and the original C implementation by Tatsuya Mori. This version uses a DJB2 hash with salts from the CRC table and wraps the native filter in a Ruby-friendly API. The most common way to use it is to pass an expected `capacity` and optional `false_positive_rate`, then let BloomFit calculate `size` and `hashes` for you.
|
|
16
|
+
|
|
17
|
+
Compared with bloomfilter-rb, BloomFit:
|
|
8
18
|
|
|
9
19
|
- uses DJB2 over CRC32 yielding better hash distribution
|
|
10
20
|
- improves performance for very large datasets
|
|
11
21
|
- avoids the need to supply a seed
|
|
12
|
-
- automatically calculates the
|
|
22
|
+
- automatically calculates the filter size (`m`) and hash count (`k`) from capacity and false-positive rate
|
|
13
23
|
|
|
14
|
-
|
|
24
|
+
## Features
|
|
15
25
|
|
|
16
|
-
|
|
26
|
+
- native `CBloomFilter` implementation for MRI Ruby
|
|
27
|
+
- automatic sizing from `capacity` and `false_positive_rate`
|
|
28
|
+
- small Ruby API with familiar methods like `add`, `include?`, `merge`, `|`, and `&`
|
|
29
|
+
- supports strings, symbols, integers, booleans, and other values that can be converted with `to_s`
|
|
30
|
+
- manual `size` / `hashes` overrides when you want control
|
|
31
|
+
- save and reload filters with Ruby `Marshal`
|
|
32
|
+
- inspect filter state with `stats`, `to_hex`, `to_binary`, and `bitmap`
|
|
17
33
|
|
|
18
|
-
|
|
19
|
-
- number of hash functions
|
|
34
|
+
## Requirements
|
|
20
35
|
|
|
21
|
-
|
|
36
|
+
- Ruby `>= 3.2.0`
|
|
22
37
|
|
|
23
|
-
|
|
24
|
-
- Determining parameters: [Scalable Datasets: Bloom Filters in Ruby](http://www.igvita.com/2008/12/27/scalable-datasets-bloom-filters-in-ruby/)
|
|
25
|
-
- Applications & reasons behind bloom filter: [Flow analysis: Time based bloom filter](http://www.igvita.com/2010/01/06/flow-analysis-time-based-bloom-filters/)
|
|
38
|
+
## Installation
|
|
26
39
|
|
|
27
|
-
|
|
40
|
+
```bash
|
|
41
|
+
gem install bloom_fit
|
|
42
|
+
```
|
|
28
43
|
|
|
29
|
-
|
|
44
|
+
```ruby
|
|
45
|
+
require "bloom_fit"
|
|
46
|
+
```
|
|
30
47
|
|
|
31
|
-
|
|
48
|
+
## Quick Start
|
|
32
49
|
|
|
33
50
|
```ruby
|
|
34
51
|
require "bloom_fit"
|
|
35
52
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
#
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
#
|
|
49
|
-
#
|
|
50
|
-
|
|
53
|
+
filter = BloomFit.new(capacity: 250, false_positive_rate: 0.001)
|
|
54
|
+
|
|
55
|
+
filter.add("cat")
|
|
56
|
+
filter << :dog
|
|
57
|
+
|
|
58
|
+
filter.include?("cat") # => true
|
|
59
|
+
filter.key?("dog") # => true
|
|
60
|
+
filter["bird"] # => false
|
|
61
|
+
|
|
62
|
+
filter["owl"] = true
|
|
63
|
+
filter["ant"] = false
|
|
64
|
+
|
|
65
|
+
filter["owl"] # => true
|
|
66
|
+
filter["ant"] # => false
|
|
67
|
+
|
|
68
|
+
filter.empty? # => false
|
|
69
|
+
|
|
70
|
+
filter.size # => 3595
|
|
71
|
+
filter.hashes # => 10
|
|
72
|
+
|
|
73
|
+
filter.clear
|
|
74
|
+
filter.empty? # => true
|
|
51
75
|
```
|
|
52
76
|
|
|
53
|
-
|
|
77
|
+
`#include?`, `#key?`, and `#[]` are aliases. `#add` and `#<<` are also aliases.
|
|
78
|
+
|
|
79
|
+
## Automatic Sizing
|
|
80
|
+
|
|
81
|
+
BloomFit now calculates `size` and `hashes` for you when you initialize it with an expected capacity:
|
|
54
82
|
|
|
55
83
|
```ruby
|
|
56
|
-
|
|
84
|
+
filter = BloomFit.new(capacity: 10_000, false_positive_rate: 0.01)
|
|
85
|
+
|
|
86
|
+
filter.size # => 95851
|
|
87
|
+
filter.hashes # => 7
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
The defaults are a good starting point for many small filters:
|
|
91
|
+
|
|
92
|
+
```ruby
|
|
93
|
+
filter = BloomFit.new
|
|
94
|
+
|
|
95
|
+
filter.size # => 1438
|
|
96
|
+
filter.hashes # => 10
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
That is equivalent to:
|
|
100
|
+
|
|
101
|
+
```ruby
|
|
102
|
+
filter = BloomFit.new(capacity: 100, false_positive_rate: 0.001)
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
Internally BloomFit uses the standard Bloom filter formulas:
|
|
106
|
+
|
|
107
|
+
```text
|
|
108
|
+
m = -(n * ln(p)) / (ln(2)^2)
|
|
109
|
+
k = (m / n) * ln(2)
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
- `n`: expected number of inserted values
|
|
113
|
+
- `p`: target false-positive rate
|
|
114
|
+
- `m`: number of filter buckets (`size`)
|
|
115
|
+
- `k`: number of hash functions (`hashes`)
|
|
116
|
+
|
|
117
|
+
For example, if you expect about `10_000` inserts and can tolerate a `1%` false-positive rate, BloomFit will calculate `size: 95_851` and `hashes: 7` for you.
|
|
118
|
+
|
|
119
|
+
If you prefer a calculator, see [Bloom Filter Calculator](https://hur.st/bloomfilter/).
|
|
120
|
+
|
|
121
|
+
## Manual Sizing
|
|
122
|
+
|
|
123
|
+
If you already know the exact filter width and hash count you want, you can still pass them directly:
|
|
124
|
+
|
|
125
|
+
```ruby
|
|
126
|
+
filter = BloomFit.new(size: 95_851, hashes: 7)
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
This bypasses automatic sizing.
|
|
130
|
+
|
|
131
|
+
## Common Operations
|
|
57
132
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
133
|
+
### Add and check membership
|
|
134
|
+
|
|
135
|
+
```ruby
|
|
136
|
+
filter = BloomFit.new(capacity: 100)
|
|
137
|
+
|
|
138
|
+
filter << "cat"
|
|
139
|
+
filter << "dog"
|
|
140
|
+
|
|
141
|
+
filter.include?("cat") # => true
|
|
142
|
+
filter.include?("bird") # => false
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
### Use hash-like syntax for truthy values
|
|
146
|
+
|
|
147
|
+
```ruby
|
|
148
|
+
filter = BloomFit.new(capacity: 64)
|
|
149
|
+
|
|
150
|
+
filter[:cat] = true
|
|
151
|
+
filter[:dog] = false
|
|
152
|
+
|
|
153
|
+
filter[:cat] # => true
|
|
154
|
+
filter[:dog] # => false
|
|
155
|
+
|
|
156
|
+
filter.merge({ bird: true, ant: nil })
|
|
157
|
+
|
|
158
|
+
filter.include?(:bird) # => true
|
|
159
|
+
filter.include?(:ant) # => false
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
When merging a hash, only keys with truthy values are added.
|
|
163
|
+
|
|
164
|
+
### Merge, union, and intersection
|
|
165
|
+
|
|
166
|
+
```ruby
|
|
167
|
+
pets = BloomFit.new(capacity: 50)
|
|
168
|
+
pets << "cat" << "dog"
|
|
169
|
+
|
|
170
|
+
more_pets = BloomFit.new(capacity: 50)
|
|
171
|
+
more_pets << "dog" << "bird"
|
|
172
|
+
|
|
173
|
+
combined = pets | more_pets
|
|
174
|
+
overlap = pets & more_pets
|
|
175
|
+
|
|
176
|
+
combined.include?("bird") # => true
|
|
177
|
+
overlap.include?("dog") # => true
|
|
178
|
+
overlap.include?("cat") # => false
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
`#merge` also accepts arrays, sets, and other enumerables:
|
|
182
|
+
|
|
183
|
+
```ruby
|
|
184
|
+
filter = BloomFit.new(capacity: 100)
|
|
185
|
+
filter.merge(%w[cat dog bird])
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
Filters can only be combined when they have the same `size` and `hashes`. Otherwise BloomFit raises `BloomFit::ConfigurationMismatch`.
|
|
189
|
+
|
|
190
|
+
When you create filters with automatic sizing, use the same `capacity` and `false_positive_rate` for filters you plan to merge, union, or intersect.
|
|
191
|
+
|
|
192
|
+
### Save and load filters
|
|
193
|
+
|
|
194
|
+
```ruby
|
|
195
|
+
filter = BloomFit.new(capacity: 100)
|
|
196
|
+
filter << "cat" << "dog"
|
|
197
|
+
filter.save("pets.bloom")
|
|
198
|
+
|
|
199
|
+
reloaded = BloomFit.load("pets.bloom")
|
|
200
|
+
reloaded.include?("cat") # => true
|
|
201
|
+
reloaded.include?("dog") # => true
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
Persistence uses Ruby `Marshal`. Only load files you trust.
|
|
205
|
+
|
|
206
|
+
### Inspect the bitmap
|
|
207
|
+
|
|
208
|
+
```ruby
|
|
209
|
+
filter = BloomFit.new(size: 16, hashes: 4)
|
|
210
|
+
filter << "cool"
|
|
211
|
+
|
|
212
|
+
filter.to_hex # => "1441"
|
|
213
|
+
filter.to_binary # => "0001010001000001"
|
|
214
|
+
filter.bitmap # => raw bytes from the native filter
|
|
73
215
|
```
|
|
74
216
|
|
|
217
|
+
`#bitmap` returns the native byte representation, which may include padding bytes beyond the configured filter width. `#to_binary` trims the result to exactly `size` bits.
|
|
218
|
+
|
|
219
|
+
## API Overview
|
|
220
|
+
|
|
221
|
+
| Method | Notes |
|
|
222
|
+
| --- | --- |
|
|
223
|
+
| `BloomFit.new` or `BloomFit.new(capacity:, false_positive_rate:)` | Creates a filter and calculates `size` and `hashes` automatically. Defaults to `capacity: 100`, `false_positive_rate: 0.001`. |
|
|
224
|
+
| `BloomFit.new(size:, hashes:)` | Creates a filter with explicit sizing when you want fixed parameters. |
|
|
225
|
+
| `add`, `<<` | Adds a value and returns the filter. |
|
|
226
|
+
| `add?` | Adds only when the value does not already appear present. |
|
|
227
|
+
| `include?`, `key?`, `[]` | Probabilistic membership check. |
|
|
228
|
+
| `[]=` | Adds a key only when the assigned value is truthy. |
|
|
229
|
+
| `merge` | Merges another filter or an enumerable into the receiver. |
|
|
230
|
+
| `\|`, `union` | Returns a new filter containing the union. |
|
|
231
|
+
| `&`, `intersection` | Returns a new filter containing the intersection. |
|
|
232
|
+
| `clear` | Resets all bits to `0`. |
|
|
233
|
+
| `empty?` | Exact check for whether any bits are set. |
|
|
234
|
+
| `size`, `m` | Returns the configured filter width. |
|
|
235
|
+
| `hashes`, `k` | Returns the number of hash functions. |
|
|
236
|
+
| `set_bits`, `n` | Returns the number of bits currently set. |
|
|
237
|
+
| `stats` | Returns a human-readable summary including predicted false-positive rate. |
|
|
238
|
+
| `to_hex`, `to_binary`, `bitmap` | Returns the filter bitmap in different representations. |
|
|
239
|
+
| `save`, `BloomFit.load` | Serializes and restores a filter with Ruby `Marshal`. |
|
|
240
|
+
|
|
241
|
+
## Resources
|
|
242
|
+
|
|
243
|
+
- Background: [Bloom filter](https://en.wikipedia.org/wiki/Bloom_filter)
|
|
244
|
+
- Determining parameters: [Scalable Datasets: Bloom Filters in Ruby](http://www.igvita.com/2008/12/27/scalable-datasets-bloom-filters-in-ruby/)
|
|
245
|
+
- Applications and motivation: [Flow analysis: Time based bloom filter](http://www.igvita.com/2010/01/06/flow-analysis-time-based-bloom-filters/)
|
|
246
|
+
- Calculator: [Bloom Filter Calculator](https://hur.st/bloomfilter/)
|
|
247
|
+
|
|
75
248
|
## Credits
|
|
76
249
|
|
|
77
250
|
- Tatsuya Mori <valdzone@gmail.com> (Original C implementation)
|
data/lib/bloom_fit/version.rb
CHANGED
data/lib/bloom_fit.rb
CHANGED
|
@@ -28,6 +28,8 @@ require "bloom_fit/version"
|
|
|
28
28
|
class BloomFit
|
|
29
29
|
extend Forwardable
|
|
30
30
|
|
|
31
|
+
LN2 = Math.log(2.0).freeze
|
|
32
|
+
|
|
31
33
|
# The wrapped native +CBloomFilter+ instance.
|
|
32
34
|
#
|
|
33
35
|
# This is mostly useful for low-level integrations and internal filter
|
|
@@ -40,9 +42,19 @@ class BloomFit
|
|
|
40
42
|
# but the best values depend on how many keys you expect to insert and how
|
|
41
43
|
# many false positives you can tolerate.
|
|
42
44
|
#
|
|
45
|
+
# @param capacity [Integer] expected number of elements to store in the set
|
|
46
|
+
# @param false_positive_rate [Integer] expected number of elements to store in the set
|
|
43
47
|
# @param size [Integer] number of buckets in a bloom filter
|
|
44
48
|
# @param hashes [Integer] number of hash functions
|
|
45
|
-
def initialize(size:
|
|
49
|
+
def initialize(capacity: 100, false_positive_rate: 0.001, size: nil, hashes: 4)
|
|
50
|
+
if size.nil? || hashes.nil?
|
|
51
|
+
raise ArgumentError, "capacity must be > 0" unless capacity.positive?
|
|
52
|
+
raise ArgumentError, "false_positive_rate must be between 0 and 1" if false_positive_rate <= 0.0 || false_positive_rate >= 1.0
|
|
53
|
+
|
|
54
|
+
size = (-capacity.to_f * Math.log(false_positive_rate) / (LN2**2)).ceil
|
|
55
|
+
hashes = (size / capacity * LN2).ceil
|
|
56
|
+
end
|
|
57
|
+
|
|
46
58
|
@bf = CBloomFilter.new(size, hashes)
|
|
47
59
|
end
|
|
48
60
|
|
data/lib/cbloomfilter.bundle
CHANGED
|
Binary file
|
data/test/bloom_fit_test.rb
CHANGED
|
@@ -3,6 +3,28 @@ require "test_helper"
|
|
|
3
3
|
class BloomFitTest < Minitest::Spec
|
|
4
4
|
subject { BloomFit.new(size: 100, hashes: 4) }
|
|
5
5
|
|
|
6
|
+
describe ".new" do
|
|
7
|
+
it "accepts size and hashes override" do
|
|
8
|
+
bf = BloomFit.new(size: 10, hashes: 1)
|
|
9
|
+
assert_equal 10, bf.size
|
|
10
|
+
assert_equal 1, bf.hashes
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
it "has default capacity and false positive-rate" do
|
|
14
|
+
bf = BloomFit.new
|
|
15
|
+
# https://hur.st/bloomfilter/?n=100&p=0.001&m=&k=
|
|
16
|
+
assert_equal 1438, bf.size
|
|
17
|
+
assert_equal 10, bf.hashes
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
it "calculates size and hashes given a capacity and false postiive rate" do
|
|
21
|
+
bf = BloomFit.new(capacity: 10_000, false_positive_rate: 0.0001)
|
|
22
|
+
# https://hur.st/bloomfilter/?n=10000&p=0.0001&m=&k=
|
|
23
|
+
assert_equal 191_702, bf.size
|
|
24
|
+
assert_equal 14, bf.hashes
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
6
28
|
describe "#empty?" do
|
|
7
29
|
it "returns true when nothing set" do
|
|
8
30
|
assert_equal true, subject.empty? # rubocop:disable Minitest/AssertTruthy
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
require "test_helper"
|
|
2
|
+
|
|
3
|
+
class CBloomFilterTest < Minitest::Spec
|
|
4
|
+
subject { CBloomFilter.new }
|
|
5
|
+
|
|
6
|
+
describe "#m" do
|
|
7
|
+
it "defaults" do
|
|
8
|
+
assert_equal 1000, subject.m
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
it "is set by the 1st arg of the contructor" do
|
|
12
|
+
bf = CBloomFilter.new(10_000)
|
|
13
|
+
assert_equal 10_000, bf.m
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
describe "#k" do
|
|
18
|
+
it "defaults" do
|
|
19
|
+
assert_equal 4, subject.k
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
it "is set by the 2nd arg of the contructor" do
|
|
23
|
+
bf = CBloomFilter.new(10_000, 9)
|
|
24
|
+
assert_equal 9, bf.k
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
describe "#set_bits" do
|
|
29
|
+
it "initializes to zero" do
|
|
30
|
+
assert_equal 0, subject.set_bits
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
it "counts the bits when active" do
|
|
34
|
+
subject.add("foo")
|
|
35
|
+
assert_equal 4, subject.set_bits
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
describe "#add" do
|
|
40
|
+
it "adds keys to the filter set" do
|
|
41
|
+
subject.add("foo")
|
|
42
|
+
subject.add("bar")
|
|
43
|
+
assert_includes subject, "foo"
|
|
44
|
+
assert_includes subject, "bar"
|
|
45
|
+
refute_includes subject, "baz"
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
describe "#include?" do
|
|
50
|
+
it "returns true when a key is in the set" do
|
|
51
|
+
subject.add("foo")
|
|
52
|
+
assert_equal true, subject.include?("foo") # rubocop:disable Minitest/AssertTruthy
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
it "returns false when a key is not in the set" do
|
|
56
|
+
subject.add("foo")
|
|
57
|
+
assert_equal false, subject.include?("bar") # rubocop:disable Minitest/RefuteFalse
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
describe "#clear" do
|
|
62
|
+
it "clears a set" do
|
|
63
|
+
subject.add("foo")
|
|
64
|
+
subject.add("bar")
|
|
65
|
+
subject.add("baz")
|
|
66
|
+
assert subject.set_bits.positive?
|
|
67
|
+
subject.clear
|
|
68
|
+
assert subject.set_bits.zero?
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
describe "#merge" do
|
|
73
|
+
it "adds keys from another set" do
|
|
74
|
+
subject.add("foo")
|
|
75
|
+
|
|
76
|
+
bf = CBloomFilter.new
|
|
77
|
+
bf.add("bar")
|
|
78
|
+
bf.add("baz")
|
|
79
|
+
|
|
80
|
+
subject.merge(bf)
|
|
81
|
+
assert_includes subject, "foo"
|
|
82
|
+
assert_includes subject, "bar"
|
|
83
|
+
assert_includes subject, "baz"
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
describe "#&" do
|
|
88
|
+
it "intersects keys from another set" do
|
|
89
|
+
subject.add("foo")
|
|
90
|
+
subject.add("bar")
|
|
91
|
+
|
|
92
|
+
bf = CBloomFilter.new
|
|
93
|
+
bf.add("bar")
|
|
94
|
+
bf.add("baz")
|
|
95
|
+
|
|
96
|
+
bf2 = subject & bf
|
|
97
|
+
refute_includes bf2, "foo"
|
|
98
|
+
assert_includes bf2, "bar"
|
|
99
|
+
refute_includes bf2, "baz"
|
|
100
|
+
|
|
101
|
+
bf3 = bf & subject
|
|
102
|
+
refute_includes bf3, "foo"
|
|
103
|
+
assert_includes bf3, "bar"
|
|
104
|
+
refute_includes bf3, "baz"
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
describe "#|" do
|
|
109
|
+
it "unions keys from another set" do
|
|
110
|
+
subject.add("foo")
|
|
111
|
+
subject.add("bar")
|
|
112
|
+
|
|
113
|
+
bf = CBloomFilter.new
|
|
114
|
+
bf.add("bar")
|
|
115
|
+
bf.add("baz")
|
|
116
|
+
|
|
117
|
+
bf2 = subject | bf
|
|
118
|
+
assert_includes bf2, "foo"
|
|
119
|
+
assert_includes bf2, "bar"
|
|
120
|
+
assert_includes bf2, "baz"
|
|
121
|
+
|
|
122
|
+
bf3 = bf | subject
|
|
123
|
+
assert_includes bf3, "foo"
|
|
124
|
+
assert_includes bf3, "bar"
|
|
125
|
+
assert_includes bf3, "baz"
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
describe "#bitmap" do
|
|
130
|
+
it "returns a binary bitmap of all zeros when empty (including a terminating byte)" do
|
|
131
|
+
bf = CBloomFilter.new(16)
|
|
132
|
+
assert_equal "\x00\x00\x00".b, bf.bitmap
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
it "returns a binary bitmap representing the set" do
|
|
136
|
+
bf = CBloomFilter.new(16, 4)
|
|
137
|
+
bf.add("something")
|
|
138
|
+
assert_equal "(\x82\x00".b, bf.bitmap
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
it "returns a binary bitmap representing the set even if not a multiple of 8 bits (includes padding)" do
|
|
142
|
+
bf = CBloomFilter.new(20, 4)
|
|
143
|
+
bf.add("wow")
|
|
144
|
+
assert_equal "\x04\x14\x00\x00".b, bf.bitmap
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
describe "#load" do
|
|
149
|
+
it "overwrites the bitmap" do
|
|
150
|
+
bf = CBloomFilter.new(1000, 4)
|
|
151
|
+
bf.add("foo")
|
|
152
|
+
bf.add("bar")
|
|
153
|
+
subject.load(bf.bitmap)
|
|
154
|
+
assert_includes subject, "foo"
|
|
155
|
+
assert_includes subject, "bar"
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: bloom_fit
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 1.0.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ryan McGeary
|
|
@@ -31,6 +31,7 @@ files:
|
|
|
31
31
|
- lib/bloom_fit/version.rb
|
|
32
32
|
- lib/cbloomfilter.bundle
|
|
33
33
|
- test/bloom_fit_test.rb
|
|
34
|
+
- test/c_bloom_filter_test.rb
|
|
34
35
|
- test/test_helper.rb
|
|
35
36
|
homepage: https://github.com/rmm5t/bloom_fit
|
|
36
37
|
licenses: []
|