bloom_fit 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +16 -13
- data/ext/cbloomfilter/cbloomfilter.c +33 -89
- data/lib/bloom_fit/configuration_mismatch.rb +4 -0
- data/lib/bloom_fit/version.rb +1 -1
- data/lib/bloom_fit.rb +83 -44
- data/lib/cbloomfilter.bundle +0 -0
- data/test/bloom_fit_test.rb +344 -0
- data/test/test_helper.rb +6 -0
- metadata +8 -7
- data/spec/bloom_fit_spec.rb +0 -129
- data/spec/helper.rb +0 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: efa22c92049e3607485a8fcfe471b15cca6e85e6da0c7b19b65f74b9f6ad5fe9
|
|
4
|
+
data.tar.gz: 5e8432456b1258111671d536165217bc3e82e0e430c3bc63112abc4670f91e78
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 72738a57ccb3a1a8989e86993490c3ba6a4f90925c834c1acd70ba104df8ef2bb318d5c66830786ba662e88df09f9ce46d7184810e3d4ec1c6b4cc0b41fcec44
|
|
7
|
+
data.tar.gz: 7472e370d1a66a6034ecb2f0d4720b9edd12f21e181a37cae2869e0e34c70a829366e7ee6caf880f4c4d8c789bc18bdbe2f83e6699617ccc77460320f2a2a1af
|
data/README.md
CHANGED
|
@@ -4,7 +4,12 @@
|
|
|
4
4
|
[](https://github.com/rmm5t/bloom_fit/actions/workflows/ci.yml)
|
|
5
5
|
[](https://rubygems.org/gems/bloom_fit)
|
|
6
6
|
|
|
7
|
-
BloomFit provides a MRI/C-based non-counting bloom filter for use in your Ruby projects. It is heavily based on [bloomfilter-rb]'s native implementation, but
|
|
7
|
+
BloomFit provides a MRI/C-based non-counting bloom filter for use in your Ruby projects. It is heavily based on [bloomfilter-rb]'s native implementation, but differs in the following ways:
|
|
8
|
+
|
|
9
|
+
- uses DJB2 over CRC32 yielding better hash distribution
|
|
10
|
+
- improves performance for very large datasets
|
|
11
|
+
- avoids the need to supply a seed
|
|
12
|
+
- automatically calculates the bit size (m) and the number of hashes (k) when given a capacity and false-positive-rate
|
|
8
13
|
|
|
9
14
|
A [Bloom filter](http://en.wikipedia.org/wiki/Bloom_filter) is a space-efficient probabilistic data structure that is used to test whether an element is a member of a set. False positives are possible, but false negatives are not. Instead of using k different hash functions, this implementation a DJB2 hash with k seeds from the CRC table.
|
|
10
15
|
|
|
@@ -13,8 +18,6 @@ Performance of the Bloom filter depends on the following:
|
|
|
13
18
|
- size of the bit array
|
|
14
19
|
- number of hash functions
|
|
15
20
|
|
|
16
|
-
BloomFit is a fork of [bloomfilter-rb].
|
|
17
|
-
|
|
18
21
|
## Resources
|
|
19
22
|
|
|
20
23
|
- Background: [Bloom filter](http://en.wikipedia.org/wiki/Bloom_filter)
|
|
@@ -40,11 +43,11 @@ bf["bird"] = "bar"
|
|
|
40
43
|
bf["bird"] # => true
|
|
41
44
|
bf["mouse"] # => false
|
|
42
45
|
|
|
43
|
-
bf.stats
|
|
44
|
-
#
|
|
45
|
-
#
|
|
46
|
-
#
|
|
47
|
-
#
|
|
46
|
+
puts bf.stats
|
|
47
|
+
# Number of filter bits (m): 3600
|
|
48
|
+
# Number of set bits (n): 20
|
|
49
|
+
# Number of filter hashes (k) : 10
|
|
50
|
+
# Predicted false positive rate = 0.00%
|
|
48
51
|
```
|
|
49
52
|
|
|
50
53
|
If you'd like more control over the traditional inputs like bit size and the number of hashes:
|
|
@@ -62,11 +65,11 @@ bf["bird"] = "bar"
|
|
|
62
65
|
bf["bird"] # => true
|
|
63
66
|
bf["mouse"] # => false
|
|
64
67
|
|
|
65
|
-
bf.stats
|
|
66
|
-
#
|
|
67
|
-
#
|
|
68
|
-
#
|
|
69
|
-
#
|
|
68
|
+
puts bf.stats
|
|
69
|
+
# Number of filter bits (m): 100
|
|
70
|
+
# Number of set bits (n): 4
|
|
71
|
+
# Number of filter hashes (k) : 2
|
|
72
|
+
# Predicted false positive rate = 10.87%
|
|
70
73
|
```
|
|
71
74
|
|
|
72
75
|
## Credits
|
|
@@ -17,8 +17,7 @@ static unsigned int *salts = crc_table;
|
|
|
17
17
|
static VALUE cBloomFilter;
|
|
18
18
|
|
|
19
19
|
struct BloomFilter {
|
|
20
|
-
int m; /* # of
|
|
21
|
-
int b; /* # of bits in a bloom filter bucket */
|
|
20
|
+
int m; /* # of bits in a bloom filter */
|
|
22
21
|
int k; /* # of hash functions */
|
|
23
22
|
unsigned char *ptr; /* bits data */
|
|
24
23
|
int bytes; /* size of byte data */
|
|
@@ -72,7 +71,6 @@ static VALUE bf_alloc(VALUE klass) {
|
|
|
72
71
|
VALUE obj = TypedData_Make_Struct(klass, struct BloomFilter, &bf_type, bf);
|
|
73
72
|
|
|
74
73
|
bf->m = 0;
|
|
75
|
-
bf->b = 0;
|
|
76
74
|
bf->k = 0;
|
|
77
75
|
bf->ptr = NULL;
|
|
78
76
|
bf->bytes = 0;
|
|
@@ -80,52 +78,24 @@ static VALUE bf_alloc(VALUE klass) {
|
|
|
80
78
|
return obj;
|
|
81
79
|
}
|
|
82
80
|
|
|
83
|
-
void
|
|
84
|
-
int byte_offset =
|
|
85
|
-
int bit_offset =
|
|
86
|
-
unsigned int c = bf->ptr[byte_offset];
|
|
87
|
-
c += bf->ptr[byte_offset + 1] << 8;
|
|
88
|
-
unsigned int mask = ((1 << bf->b) - 1) << bit_offset;
|
|
89
|
-
if ((c & mask) == 0) {
|
|
90
|
-
// do nothing
|
|
91
|
-
} else {
|
|
92
|
-
// reduce the counter: 11 00 => 10 00 (suppose bf->b is 2)
|
|
93
|
-
c -= (1 << bit_offset) & ((1 << 8) -1);
|
|
94
|
-
// shift the bitmap right by 1 bit: 10 00 => 01 00
|
|
95
|
-
c = (~mask & c) | ((c & mask) >> (bit_offset + 1) << bit_offset);
|
|
96
|
-
|
|
97
|
-
bf->ptr[byte_offset] = c & ((1 << 8) - 1);
|
|
98
|
-
bf->ptr[byte_offset + 1] = (c & ((1 << 16) - 1)) >> 8;
|
|
99
|
-
}
|
|
100
|
-
}
|
|
81
|
+
static void bucket_set(struct BloomFilter *bf, int index) {
|
|
82
|
+
int byte_offset = index / 8;
|
|
83
|
+
int bit_offset = index % 8;
|
|
101
84
|
|
|
102
|
-
|
|
103
|
-
int byte_offset = (index * bf->b) / 8;
|
|
104
|
-
int bit_offset = (index * bf->b) % 8;
|
|
105
|
-
unsigned int c = bf->ptr[byte_offset];
|
|
106
|
-
c += bf->ptr[byte_offset + 1] << 8;
|
|
107
|
-
unsigned int mask = ((1 << bf->b) - 1) << bit_offset;
|
|
108
|
-
if ((c & mask) != mask) {
|
|
109
|
-
c = c + ((1 << bit_offset) & ((1 << 8) -1)) | c;
|
|
110
|
-
bf->ptr[byte_offset] = c & ((1 << 8) - 1);
|
|
111
|
-
bf->ptr[byte_offset + 1] = (c & ((1 << 16) - 1)) >> 8;
|
|
112
|
-
}
|
|
85
|
+
bf->ptr[byte_offset] |= (unsigned char) (1U << bit_offset);
|
|
113
86
|
}
|
|
114
87
|
|
|
115
|
-
int bucket_check(struct BloomFilter *bf, int index) {
|
|
116
|
-
int byte_offset =
|
|
117
|
-
int bit_offset =
|
|
118
|
-
unsigned int c = bf->ptr[byte_offset];
|
|
119
|
-
c += bf->ptr[byte_offset + 1] << 8;
|
|
88
|
+
static int bucket_check(struct BloomFilter *bf, int index) {
|
|
89
|
+
int byte_offset = index / 8;
|
|
90
|
+
int bit_offset = index % 8;
|
|
120
91
|
|
|
121
|
-
|
|
122
|
-
return (c & mask) >> bit_offset;
|
|
92
|
+
return (bf->ptr[byte_offset] >> bit_offset) & 1;
|
|
123
93
|
}
|
|
124
94
|
|
|
125
95
|
static VALUE bf_initialize(int argc, VALUE *argv, VALUE self) {
|
|
126
96
|
struct BloomFilter *bf;
|
|
127
97
|
VALUE arg1, arg2;
|
|
128
|
-
int m, k
|
|
98
|
+
int m, k;
|
|
129
99
|
|
|
130
100
|
bf = bf_ptr(self);
|
|
131
101
|
|
|
@@ -143,21 +113,20 @@ static VALUE bf_initialize(int argc, VALUE *argv, VALUE self) {
|
|
|
143
113
|
|
|
144
114
|
m = FIX2INT(arg1);
|
|
145
115
|
k = FIX2INT(arg2);
|
|
146
|
-
b = 1;
|
|
147
116
|
|
|
148
117
|
if (m < 1)
|
|
149
118
|
rb_raise(rb_eArgError, "array size");
|
|
150
119
|
if (k < 1)
|
|
151
120
|
rb_raise(rb_eArgError, "hash length");
|
|
152
121
|
|
|
153
|
-
bf->b = b;
|
|
154
122
|
bf->m = m;
|
|
155
123
|
bf->k = k;
|
|
156
124
|
|
|
157
125
|
ruby_xfree(bf->ptr);
|
|
158
126
|
bf->ptr = NULL;
|
|
159
127
|
bf->bytes = 0;
|
|
160
|
-
|
|
128
|
+
/* Preserve the existing serialized bitmap length, including one padding byte. */
|
|
129
|
+
bf->bytes = (m + 15) / 8;
|
|
161
130
|
bf->ptr = ALLOC_N(unsigned char, bf->bytes);
|
|
162
131
|
|
|
163
132
|
/* initialize the bits with zeros */
|
|
@@ -194,7 +163,7 @@ static VALUE bf_set_bits(VALUE self){
|
|
|
194
163
|
return INT2FIX(count);
|
|
195
164
|
}
|
|
196
165
|
|
|
197
|
-
static VALUE
|
|
166
|
+
static VALUE bf_add(VALUE self, VALUE key) {
|
|
198
167
|
VALUE skey;
|
|
199
168
|
unsigned long hash;
|
|
200
169
|
int index;
|
|
@@ -268,57 +237,34 @@ static VALUE bf_or(VALUE self, VALUE other) {
|
|
|
268
237
|
return obj;
|
|
269
238
|
}
|
|
270
239
|
|
|
271
|
-
static VALUE bf_include(
|
|
240
|
+
static VALUE bf_include(VALUE self, VALUE key) {
|
|
241
|
+
VALUE skey;
|
|
272
242
|
unsigned long hash;
|
|
273
|
-
int i, len, m, k;
|
|
274
243
|
int index;
|
|
275
|
-
|
|
244
|
+
int i, len, m, k;
|
|
276
245
|
char *ckey;
|
|
277
|
-
|
|
278
|
-
struct BloomFilter *bf;
|
|
246
|
+
struct BloomFilter *bf = bf_ptr(self);
|
|
279
247
|
|
|
280
|
-
|
|
248
|
+
skey = rb_obj_as_string(key);
|
|
249
|
+
ckey = StringValuePtr(skey);
|
|
250
|
+
len = (int) (RSTRING_LEN(skey)); /* length of the string in bytes */
|
|
281
251
|
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
hash = (unsigned long) djb2(ckey, len);
|
|
294
|
-
for (i = 0; i <= k - 1; i++) {
|
|
295
|
-
index = (int) ((hash ^ salts[i]) % (unsigned int) (m));
|
|
296
|
-
|
|
297
|
-
/* check the bit at the index */
|
|
298
|
-
if (!bucket_check(bf, index)) {
|
|
299
|
-
return Qfalse; /* i.e., it is a new entry ; escape the loop */
|
|
300
|
-
}
|
|
252
|
+
m = bf->m;
|
|
253
|
+
k = bf->k;
|
|
254
|
+
|
|
255
|
+
hash = (unsigned long) djb2(ckey, len);
|
|
256
|
+
for (i = 0; i <= k - 1; i++) {
|
|
257
|
+
index = (int) ((hash ^ salts[i]) % (unsigned int) (m));
|
|
258
|
+
|
|
259
|
+
/* check the bit at the index */
|
|
260
|
+
if (!bucket_check(bf, index)) {
|
|
261
|
+
return Qfalse; /* i.e., it is a new entry ; escape the loop */
|
|
301
262
|
}
|
|
302
263
|
}
|
|
303
264
|
|
|
304
265
|
return Qtrue;
|
|
305
266
|
}
|
|
306
267
|
|
|
307
|
-
static VALUE bf_to_s(VALUE self) {
|
|
308
|
-
struct BloomFilter *bf = bf_ptr(self);
|
|
309
|
-
unsigned char *ptr;
|
|
310
|
-
int i;
|
|
311
|
-
VALUE str;
|
|
312
|
-
|
|
313
|
-
str = rb_str_new(0, bf->m);
|
|
314
|
-
|
|
315
|
-
ptr = (unsigned char *) RSTRING_PTR(str);
|
|
316
|
-
for (i = 0; i < bf->m; i++)
|
|
317
|
-
*ptr++ = bucket_check(bf, i) ? '1' : '0';
|
|
318
|
-
|
|
319
|
-
return str;
|
|
320
|
-
}
|
|
321
|
-
|
|
322
268
|
static VALUE bf_bitmap(VALUE self) {
|
|
323
269
|
struct BloomFilter *bf = bf_ptr(self);
|
|
324
270
|
|
|
@@ -346,15 +292,13 @@ void Init_cbloomfilter(void) {
|
|
|
346
292
|
rb_define_method(cBloomFilter, "m", bf_m, 0);
|
|
347
293
|
rb_define_method(cBloomFilter, "k", bf_k, 0);
|
|
348
294
|
rb_define_method(cBloomFilter, "set_bits", bf_set_bits, 0);
|
|
349
|
-
|
|
350
|
-
rb_define_method(cBloomFilter, "
|
|
351
|
-
rb_define_method(cBloomFilter, "include?", bf_include, -1);
|
|
295
|
+
rb_define_method(cBloomFilter, "add", bf_add, 1);
|
|
296
|
+
rb_define_method(cBloomFilter, "include?", bf_include, 1);
|
|
352
297
|
rb_define_method(cBloomFilter, "clear", bf_clear, 0);
|
|
353
|
-
rb_define_method(cBloomFilter, "merge
|
|
298
|
+
rb_define_method(cBloomFilter, "merge", bf_merge, 1);
|
|
354
299
|
rb_define_method(cBloomFilter, "&", bf_and, 1);
|
|
355
300
|
rb_define_method(cBloomFilter, "|", bf_or, 1);
|
|
356
301
|
|
|
357
|
-
rb_define_method(cBloomFilter, "to_s", bf_to_s, 0);
|
|
358
302
|
rb_define_method(cBloomFilter, "bitmap", bf_bitmap, 0);
|
|
359
303
|
rb_define_method(cBloomFilter, "load", bf_load, 1);
|
|
360
304
|
|
data/lib/bloom_fit/version.rb
CHANGED
data/lib/bloom_fit.rb
CHANGED
|
@@ -1,63 +1,110 @@
|
|
|
1
|
+
require "forwardable"
|
|
2
|
+
|
|
1
3
|
require "cbloomfilter"
|
|
4
|
+
require "bloom_fit/configuration_mismatch"
|
|
2
5
|
require "bloom_fit/version"
|
|
3
6
|
|
|
4
7
|
class BloomFit
|
|
5
|
-
|
|
6
|
-
end
|
|
8
|
+
extend Forwardable
|
|
7
9
|
|
|
8
10
|
attr_reader :bf
|
|
9
11
|
|
|
12
|
+
# @param size [Integer] number of buckets in a bloom filter
|
|
13
|
+
# @param hashes [Integer] number of hash functions
|
|
10
14
|
def initialize(size: 1_000, hashes: 4)
|
|
11
|
-
@
|
|
12
|
-
|
|
15
|
+
@bf = CBloomFilter.new(size, hashes)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def_delegators :@bf, :m, :k, :bitmap, :include?, :clear, :set_bits
|
|
19
|
+
|
|
20
|
+
alias size m
|
|
21
|
+
alias hashes k
|
|
22
|
+
alias key? include?
|
|
23
|
+
alias [] include?
|
|
24
|
+
alias n set_bits
|
|
13
25
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
@bf = CBloomFilter.new(@size, @hashes)
|
|
26
|
+
def empty?
|
|
27
|
+
set_bits.zero?
|
|
17
28
|
end
|
|
18
29
|
|
|
19
|
-
|
|
20
|
-
|
|
30
|
+
# Adds the given key to the set and returns +self+. Mimics the behavior of
|
|
31
|
+
# +Set#add+
|
|
32
|
+
def add(key)
|
|
33
|
+
@bf.add(key)
|
|
34
|
+
self
|
|
21
35
|
end
|
|
22
|
-
alias
|
|
36
|
+
alias << add
|
|
23
37
|
|
|
24
|
-
|
|
25
|
-
|
|
38
|
+
# Adds the given key to the set if the value is truthy. Mimics the behavior of
|
|
39
|
+
# +Hash#[]=+
|
|
40
|
+
def []=(key, value)
|
|
41
|
+
@bf.add(key) if value
|
|
26
42
|
end
|
|
27
|
-
alias key? include?
|
|
28
|
-
alias [] include?
|
|
29
43
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
def
|
|
44
|
+
# Adds the given key to the set and returns +self+. If the key is already
|
|
45
|
+
# the in set, returns +nil+. Mimics the behavior of +Set#add?+
|
|
46
|
+
def add?(key)
|
|
47
|
+
return nil if include?(key) # rubocop:disable Style/ReturnNilInPredicateMethodDefinition
|
|
48
|
+
add(key)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Returns a string of the set bits in hex format
|
|
52
|
+
def to_hex
|
|
53
|
+
length = ((size / 8.0).ceil * 8 / 4)
|
|
54
|
+
bitmap.unpack1("H*")[0...length]
|
|
55
|
+
end
|
|
33
56
|
|
|
34
|
-
# Returns
|
|
35
|
-
def
|
|
36
|
-
|
|
57
|
+
# Returns a string of the set bits in binary format
|
|
58
|
+
def to_binary
|
|
59
|
+
bitmap.unpack1("B*")[0...size]
|
|
37
60
|
end
|
|
38
61
|
|
|
39
|
-
#
|
|
40
|
-
#
|
|
41
|
-
|
|
62
|
+
# Adds the set from another BloomFit filter or adds all the elements from an
|
|
63
|
+
# enumerable. Mimics the behavior of +Set#merge+
|
|
64
|
+
def merge(other)
|
|
65
|
+
if other.is_a?(BloomFit)
|
|
66
|
+
raise BloomFit::ConfigurationMismatch unless same_parameters?(other)
|
|
67
|
+
@bf.merge(other.bf)
|
|
68
|
+
elsif other.respond_to?(:each_key)
|
|
69
|
+
other.each { |k, v| add(k) if v }
|
|
70
|
+
elsif other.is_a?(Enumerable)
|
|
71
|
+
other.each { |k| add(k) }
|
|
72
|
+
else
|
|
73
|
+
raise ArgumentError, "value must be enumerable or another BloomFit filter"
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Computes the intersection of two Bloom filters. It requires that both
|
|
78
|
+
# filters have the same size; otherwise, +BloomFit::ConfigurationMismatch+
|
|
79
|
+
# is raised.
|
|
42
80
|
def &(other)
|
|
43
81
|
raise BloomFit::ConfigurationMismatch unless same_parameters?(other)
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
82
|
+
self.class.new(size:, hashes:).tap do |result|
|
|
83
|
+
result.instance_variable_set(:@bf, @bf.&(other.bf))
|
|
84
|
+
end
|
|
47
85
|
end
|
|
86
|
+
alias intersection &
|
|
48
87
|
|
|
49
|
-
# Computes the union of two Bloom filters.
|
|
50
|
-
#
|
|
51
|
-
#
|
|
88
|
+
# Computes the union of two Bloom filters. It requires that both filters
|
|
89
|
+
# have the same size; otherwise, +BloomFit::ConfigurationMismatch+ is
|
|
90
|
+
# raised.
|
|
52
91
|
def |(other)
|
|
53
92
|
raise BloomFit::ConfigurationMismatch unless same_parameters?(other)
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
93
|
+
self.class.new(size:, hashes:).tap do |result|
|
|
94
|
+
result.instance_variable_set(:@bf, @bf.|(other.bf))
|
|
95
|
+
end
|
|
57
96
|
end
|
|
97
|
+
alias union |
|
|
58
98
|
|
|
59
|
-
def
|
|
60
|
-
|
|
99
|
+
def stats
|
|
100
|
+
fpr = ((1.0 - Math.exp(-(k * n).to_f / m))**k) * 100
|
|
101
|
+
|
|
102
|
+
(+"").tap do |s|
|
|
103
|
+
s << format("Number of filter buckets (m): %d\n", m)
|
|
104
|
+
s << format("Number of set bits (n): %d\n", n)
|
|
105
|
+
s << format("Number of filter hashes (k): %d\n", k)
|
|
106
|
+
s << format("Predicted false positive rate: %.2f%%\n", fpr)
|
|
107
|
+
end
|
|
61
108
|
end
|
|
62
109
|
|
|
63
110
|
def marshal_load(ary)
|
|
@@ -68,11 +115,11 @@ class BloomFit
|
|
|
68
115
|
end
|
|
69
116
|
|
|
70
117
|
def marshal_dump
|
|
71
|
-
[
|
|
118
|
+
[size, hashes, bitmap]
|
|
72
119
|
end
|
|
73
120
|
|
|
74
121
|
def self.load(filename)
|
|
75
|
-
Marshal.load(File.open(filename, "r"))
|
|
122
|
+
Marshal.load(File.open(filename, "r")) # rubocop:disable Security/MarshalLoad
|
|
76
123
|
end
|
|
77
124
|
|
|
78
125
|
def save(filename)
|
|
@@ -81,14 +128,6 @@ class BloomFit
|
|
|
81
128
|
end
|
|
82
129
|
end
|
|
83
130
|
|
|
84
|
-
def stats
|
|
85
|
-
fp = ((1.0 - Math.exp(-(@hashes * size).to_f / @size))**@hashes) * 100
|
|
86
|
-
printf "Number of filter buckets (m): %d\n", @size
|
|
87
|
-
printf "Number of set bits (n): %d\n", set_bits
|
|
88
|
-
printf "Number of filter hashes (k) : %d\n", @hashes
|
|
89
|
-
printf "Predicted false positive rate = %.2f%%\n", fp
|
|
90
|
-
end
|
|
91
|
-
|
|
92
131
|
protected
|
|
93
132
|
|
|
94
133
|
# Returns true if parameters of the +other+ filter are
|
data/lib/cbloomfilter.bundle
CHANGED
|
Binary file
|
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
require "test_helper"
|
|
2
|
+
|
|
3
|
+
class BloomFitTest < Minitest::Spec
|
|
4
|
+
subject { BloomFit.new(size: 100, hashes: 4) }
|
|
5
|
+
|
|
6
|
+
describe "#empty?" do
|
|
7
|
+
it "returns true when nothing set" do
|
|
8
|
+
assert_equal true, subject.empty? # rubocop:disable Minitest/AssertTruthy
|
|
9
|
+
assert_empty subject
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
it "returns false when something set" do
|
|
13
|
+
subject << "key"
|
|
14
|
+
assert_equal false, subject.empty? # rubocop:disable Minitest/RefuteFalse
|
|
15
|
+
refute_empty subject
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
describe "#add" do
|
|
20
|
+
it "adds the key and returns self" do
|
|
21
|
+
assert_equal subject, subject.add("test1")
|
|
22
|
+
assert_equal subject, subject.add("test2")
|
|
23
|
+
assert_includes subject, "test1"
|
|
24
|
+
assert_includes subject, "test2"
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
it "is aliased as #<<" do
|
|
28
|
+
subject << "test1" << "test2"
|
|
29
|
+
assert_includes subject, "test1"
|
|
30
|
+
assert_includes subject, "test2"
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
it "is aliased as #[]=, and handles truthy/falsey values" do
|
|
34
|
+
subject["dog"] = :bar
|
|
35
|
+
subject["cat"] = :foo
|
|
36
|
+
assert_includes subject, "dog"
|
|
37
|
+
assert_includes subject, "cat"
|
|
38
|
+
|
|
39
|
+
subject["bat"] = nil
|
|
40
|
+
subject["pig"] = false
|
|
41
|
+
refute_includes subject, "bat"
|
|
42
|
+
refute_includes subject, "pig"
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
it "casts using #to_s as necessary" do
|
|
46
|
+
subject << :symbol << true << 12_345
|
|
47
|
+
|
|
48
|
+
assert_includes subject, "symbol"
|
|
49
|
+
assert_includes subject, :symbol
|
|
50
|
+
assert_includes subject, "true"
|
|
51
|
+
assert_includes subject, "12345"
|
|
52
|
+
assert_includes subject, 12_345
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
describe "#add?" do
|
|
57
|
+
it "adds new key and returns self" do
|
|
58
|
+
assert_equal subject, subject.add("test1")
|
|
59
|
+
assert_equal subject, subject.add("test2")
|
|
60
|
+
assert_includes subject, "test1"
|
|
61
|
+
assert_includes subject, "test2"
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
it "return nil if the key already exists" do
|
|
65
|
+
subject << "test1"
|
|
66
|
+
subject << "test2"
|
|
67
|
+
assert_includes subject, "test1"
|
|
68
|
+
assert_includes subject, "test2"
|
|
69
|
+
assert_nil subject.add?("test1")
|
|
70
|
+
assert_nil subject.add?("test2")
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
describe "#include?" do
|
|
75
|
+
it "returns true when a key is in the set" do
|
|
76
|
+
subject << "test1"
|
|
77
|
+
subject << "test2"
|
|
78
|
+
assert_equal true, subject.include?("test1") # rubocop:disable Minitest/AssertTruthy
|
|
79
|
+
assert_equal true, subject.include?("test2") # rubocop:disable Minitest/AssertTruthy
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
it "returns false when a key is not in the set" do
|
|
83
|
+
assert_equal false, subject.include?("test") # rubocop:disable Minitest/RefuteFalse
|
|
84
|
+
assert_equal false, subject.include?("nada") # rubocop:disable Minitest/RefuteFalse
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
it "is aliased as #key?" do
|
|
88
|
+
subject << "test1"
|
|
89
|
+
subject << "test2"
|
|
90
|
+
assert subject.key?("test1")
|
|
91
|
+
assert subject.key?("test2")
|
|
92
|
+
refute subject.key?("test3")
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
it "is aliased as #[]" do
|
|
96
|
+
subject << "test1"
|
|
97
|
+
subject << "test2"
|
|
98
|
+
assert subject["test1"]
|
|
99
|
+
assert subject["test2"]
|
|
100
|
+
refute subject["test3"]
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
describe "#clear" do
|
|
105
|
+
it "zeroes the bits" do
|
|
106
|
+
subject.add("test")
|
|
107
|
+
assert_includes subject, "test"
|
|
108
|
+
assert_includes subject.to_binary, "1"
|
|
109
|
+
subject.clear
|
|
110
|
+
refute_includes subject, "test"
|
|
111
|
+
refute_includes subject.to_binary, "1"
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
describe "#set_bits" do
|
|
116
|
+
it "returns the number of bits set to 1" do
|
|
117
|
+
bf = BloomFit.new(size: 100, hashes: 4)
|
|
118
|
+
bf.add("bits")
|
|
119
|
+
assert_equal 4, bf.set_bits
|
|
120
|
+
|
|
121
|
+
bf = BloomFit.new(size: 100, hashes: 1)
|
|
122
|
+
bf.add("bits")
|
|
123
|
+
assert_equal 1, bf.set_bits
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
describe "#bitmap" do
|
|
128
|
+
it "returns a binary bitmap of all zeros when empty (including a terminating byte)" do
|
|
129
|
+
bf = BloomFit.new(size: 16)
|
|
130
|
+
assert_equal "\x00\x00\x00".b, bf.bitmap
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
it "returns a binary bitmap representing the set" do
|
|
134
|
+
bf = BloomFit.new(size: 16, hashes: 4)
|
|
135
|
+
bf.add("something")
|
|
136
|
+
assert_equal "(\x82\x00".b, bf.bitmap
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
it "returns a binary bitmap representing the set even if not a multiple of 8 bits" do
|
|
140
|
+
bf = BloomFit.new(size: 20, hashes: 4)
|
|
141
|
+
bf.add("wow")
|
|
142
|
+
assert_equal "\x04\x14\x00\x00".b, bf.bitmap
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
describe "#to_hex" do
|
|
147
|
+
it "returns a hex bitmap of all zeros when empty" do
|
|
148
|
+
bf = BloomFit.new(size: 16)
|
|
149
|
+
assert_equal "0000", bf.to_hex
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
it "returns a hex bitmap of all zeros when empty if not a multiple of 8 bits" do
|
|
153
|
+
bf = BloomFit.new(size: 18)
|
|
154
|
+
assert_equal "000000", bf.to_hex
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
it "returns a hex bitmap representing the set" do
|
|
158
|
+
bf = BloomFit.new(size: 16, hashes: 4)
|
|
159
|
+
bf.add("cool")
|
|
160
|
+
assert_equal "1441", bf.to_hex
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
describe "#to_binary" do
|
|
165
|
+
it "returns a binary bitmap of all zeros when empty" do
|
|
166
|
+
bf = BloomFit.new(size: 16)
|
|
167
|
+
assert_equal "0000000000000000", bf.to_binary
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
it "returns a binary bitmap of all zeros when empty if not a multiple of 8 bits" do
|
|
171
|
+
bf = BloomFit.new(size: 19)
|
|
172
|
+
assert_equal "0000000000000000000", bf.to_binary
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
it "returns a binary bitmap representing the set" do
|
|
176
|
+
bf = BloomFit.new(size: 16, hashes: 4)
|
|
177
|
+
bf << "cool" << "cat"
|
|
178
|
+
assert_equal "1001011001101001", bf.to_binary
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
describe "#merge" do
|
|
183
|
+
it "merges another BloomFit filter" do
|
|
184
|
+
bf1 = BloomFit.new(size: 100, hashes: 2)
|
|
185
|
+
bf2 = BloomFit.new(size: 100, hashes: 2)
|
|
186
|
+
bf1 << "mouse"
|
|
187
|
+
bf2 << "cat" << "dog"
|
|
188
|
+
refute_includes bf1, "cat"
|
|
189
|
+
refute_includes bf1, "dog"
|
|
190
|
+
bf1.merge(bf2)
|
|
191
|
+
assert_includes bf1, "mouse"
|
|
192
|
+
assert_includes bf1, "cat"
|
|
193
|
+
assert_includes bf1, "dog"
|
|
194
|
+
refute_includes bf2, "mouse"
|
|
195
|
+
assert_includes bf2, "cat"
|
|
196
|
+
assert_includes bf2, "dog"
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
it "merges an array" do
|
|
200
|
+
subject << "mouse"
|
|
201
|
+
subject.merge %i[cat dog]
|
|
202
|
+
assert_includes subject, "mouse"
|
|
203
|
+
assert_includes subject, "cat"
|
|
204
|
+
assert_includes subject, "dog"
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
it "merges a set" do
|
|
208
|
+
subject << "mouse"
|
|
209
|
+
subject.merge Set.new(%w[cat dog])
|
|
210
|
+
assert_includes subject, "mouse"
|
|
211
|
+
assert_includes subject, "cat"
|
|
212
|
+
assert_includes subject, "dog"
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
it "merges a hash ignoring falsey values" do
|
|
216
|
+
subject << "mouse"
|
|
217
|
+
subject.merge({ cat: 1, dog: 2, ant: false, bug: nil })
|
|
218
|
+
assert_includes subject, "mouse"
|
|
219
|
+
assert_includes subject, "cat"
|
|
220
|
+
assert_includes subject, "dog"
|
|
221
|
+
refute_includes subject, "ant"
|
|
222
|
+
refute_includes subject, "bug"
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
it "raises when merge is between incompatible filters" do
|
|
226
|
+
bf1 = BloomFit.new(size: 10)
|
|
227
|
+
bf2 = BloomFit.new(size: 20)
|
|
228
|
+
assert_raises(BloomFit::ConfigurationMismatch) { bf1.merge(bf2) }
|
|
229
|
+
end
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
describe "#&" do
|
|
233
|
+
it "returns intersection of both filters" do
|
|
234
|
+
bf1 = BloomFit.new(size: 35, hashes: 4)
|
|
235
|
+
bf1.add("test")
|
|
236
|
+
bf1.add("test1")
|
|
237
|
+
|
|
238
|
+
bf2 = BloomFit.new(size: 35, hashes: 4)
|
|
239
|
+
bf2.add("test")
|
|
240
|
+
bf2.add("test2")
|
|
241
|
+
|
|
242
|
+
bf3 = bf1 & bf2
|
|
243
|
+
assert_equal 35, bf3.size
|
|
244
|
+
assert_equal 4, bf3.hashes
|
|
245
|
+
assert_includes bf3, "test"
|
|
246
|
+
refute_includes bf3, "test1"
|
|
247
|
+
refute_includes bf3, "test2"
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
it "is aliased as #intersection" do
|
|
251
|
+
bf1 = BloomFit.new(size: 20, hashes: 4)
|
|
252
|
+
bf1.add("test")
|
|
253
|
+
bf1.add("test1")
|
|
254
|
+
|
|
255
|
+
bf2 = BloomFit.new(size: 20, hashes: 4)
|
|
256
|
+
bf2.add("test")
|
|
257
|
+
|
|
258
|
+
bf3 = bf1.intersection(bf2)
|
|
259
|
+
assert_includes bf3, "test"
|
|
260
|
+
refute_includes bf3, "test1"
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
it "raises when intersection is between incompatible filters" do
|
|
264
|
+
bf1 = BloomFit.new(size: 10)
|
|
265
|
+
bf2 = BloomFit.new(size: 20)
|
|
266
|
+
assert_raises(BloomFit::ConfigurationMismatch) { bf1 & bf2 }
|
|
267
|
+
|
|
268
|
+
bf1 = BloomFit.new(size: 10, hashes: 2)
|
|
269
|
+
bf2 = BloomFit.new(size: 10, hashes: 4)
|
|
270
|
+
assert_raises(BloomFit::ConfigurationMismatch) { bf1 & bf2 }
|
|
271
|
+
end
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
describe "#|" do
|
|
275
|
+
it "returns union with other filter" do
|
|
276
|
+
bf1 = BloomFit.new
|
|
277
|
+
bf1.add("test")
|
|
278
|
+
bf1.add("test1")
|
|
279
|
+
|
|
280
|
+
bf2 = BloomFit.new
|
|
281
|
+
bf2.add("test")
|
|
282
|
+
bf2.add("test2")
|
|
283
|
+
|
|
284
|
+
bf3 = bf1 | bf2
|
|
285
|
+
assert_includes bf3, "test"
|
|
286
|
+
assert_includes bf3, "test1"
|
|
287
|
+
assert_includes bf3, "test2"
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
it "is aliased as #union" do
|
|
291
|
+
bf1 = BloomFit.new(size: 20, hashes: 4)
|
|
292
|
+
bf1.add("test")
|
|
293
|
+
bf1.add("test1")
|
|
294
|
+
|
|
295
|
+
bf2 = BloomFit.new(size: 20, hashes: 4)
|
|
296
|
+
bf2.add("test")
|
|
297
|
+
|
|
298
|
+
bf3 = bf1.union(bf2)
|
|
299
|
+
assert_includes bf3, "test"
|
|
300
|
+
assert_includes bf3, "test1"
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
it "raises when union is between incompatible filters" do
|
|
304
|
+
bf1 = BloomFit.new(size: 10)
|
|
305
|
+
bf2 = BloomFit.new(size: 20)
|
|
306
|
+
assert_raises(BloomFit::ConfigurationMismatch) { bf1 | bf2 }
|
|
307
|
+
end
|
|
308
|
+
end
|
|
309
|
+
|
|
310
|
+
describe "#stats" do
|
|
311
|
+
it "returns current stats" do
|
|
312
|
+
bf = BloomFit.new(size: 10, hashes: 3)
|
|
313
|
+
expected = <<~STATS
|
|
314
|
+
Number of filter buckets (m): 10
|
|
315
|
+
Number of set bits (n): 0
|
|
316
|
+
Number of filter hashes (k): 3
|
|
317
|
+
Predicted false positive rate: 0.00%
|
|
318
|
+
STATS
|
|
319
|
+
assert_equal expected, bf.stats
|
|
320
|
+
end
|
|
321
|
+
end
|
|
322
|
+
|
|
323
|
+
describe "serialization" do
|
|
324
|
+
after { File.unlink("bf.out") }
|
|
325
|
+
|
|
326
|
+
it "marshalls" do
|
|
327
|
+
bf = BloomFit.new
|
|
328
|
+
assert bf.save("bf.out")
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
it "loads from marshalled" do
|
|
332
|
+
subject.add("foo")
|
|
333
|
+
subject.add("bar")
|
|
334
|
+
subject.save("bf.out")
|
|
335
|
+
|
|
336
|
+
bf2 = BloomFit.load("bf.out")
|
|
337
|
+
assert_includes bf2, "foo"
|
|
338
|
+
assert_includes bf2, "bar"
|
|
339
|
+
refute_includes bf2, "baz"
|
|
340
|
+
|
|
341
|
+
assert subject.send(:same_parameters?, bf2)
|
|
342
|
+
end
|
|
343
|
+
end
|
|
344
|
+
end
|
data/test/test_helper.rb
ADDED
metadata
CHANGED
|
@@ -1,22 +1,22 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: bloom_fit
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
|
-
- Ilya Grigorik
|
|
8
|
-
- Tatsuya Mori
|
|
9
7
|
- Ryan McGeary
|
|
10
8
|
- Beshad Talayeminaei
|
|
9
|
+
- Ilya Grigorik
|
|
10
|
+
- Tatsuya Mori
|
|
11
11
|
bindir: bin
|
|
12
12
|
cert_chain: []
|
|
13
13
|
date: 1980-01-02 00:00:00.000000000 Z
|
|
14
14
|
dependencies: []
|
|
15
15
|
email:
|
|
16
|
-
- ilya@grigorik.com
|
|
17
|
-
- valdzone@gmail.com
|
|
18
16
|
- ryan@mcgeary.org
|
|
19
17
|
- 'btalayeminaei@gmail.com '
|
|
18
|
+
- ilya@grigorik.com
|
|
19
|
+
- valdzone@gmail.com
|
|
20
20
|
executables: []
|
|
21
21
|
extensions:
|
|
22
22
|
- ext/cbloomfilter/extconf.rb
|
|
@@ -27,10 +27,11 @@ files:
|
|
|
27
27
|
- ext/cbloomfilter/crc32.h
|
|
28
28
|
- ext/cbloomfilter/extconf.rb
|
|
29
29
|
- lib/bloom_fit.rb
|
|
30
|
+
- lib/bloom_fit/configuration_mismatch.rb
|
|
30
31
|
- lib/bloom_fit/version.rb
|
|
31
32
|
- lib/cbloomfilter.bundle
|
|
32
|
-
-
|
|
33
|
-
-
|
|
33
|
+
- test/bloom_fit_test.rb
|
|
34
|
+
- test/test_helper.rb
|
|
34
35
|
homepage: https://github.com/rmm5t/bloom_fit
|
|
35
36
|
licenses: []
|
|
36
37
|
metadata:
|
data/spec/bloom_fit_spec.rb
DELETED
|
@@ -1,129 +0,0 @@
|
|
|
1
|
-
require "helper"
|
|
2
|
-
|
|
3
|
-
describe BloomFit do
|
|
4
|
-
it "clears" do
|
|
5
|
-
bf = BloomFit.new(size: 100, hashes: 2)
|
|
6
|
-
bf.insert("test")
|
|
7
|
-
expect(bf.include?("test")).to be true
|
|
8
|
-
bf.clear
|
|
9
|
-
expect(bf.include?("test")).to be false
|
|
10
|
-
end
|
|
11
|
-
|
|
12
|
-
it "merges" do
|
|
13
|
-
bf1 = BloomFit.new(size: 100, hashes: 2)
|
|
14
|
-
bf2 = BloomFit.new(size: 100, hashes: 2)
|
|
15
|
-
bf2.insert("test")
|
|
16
|
-
expect(bf1.include?("test")).to be false
|
|
17
|
-
bf1.merge!(bf2)
|
|
18
|
-
expect(bf1.include?("test")).to be true
|
|
19
|
-
expect(bf2.include?("test")).to be true
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
it "tests set membership" do
|
|
23
|
-
bf = BloomFit.new(size: 100, hashes: 2)
|
|
24
|
-
bf.insert("test")
|
|
25
|
-
bf.insert("test1")
|
|
26
|
-
|
|
27
|
-
expect(bf.include?("test")).to be true
|
|
28
|
-
expect(bf.include?("abcd")).to be false
|
|
29
|
-
expect(bf.include?("test", "test1")).to be true
|
|
30
|
-
expect(bf.include?("test1", "abcd")).to be false
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
it "works with any object's to_s" do
|
|
34
|
-
subject.insert(:test)
|
|
35
|
-
subject.insert(:test1)
|
|
36
|
-
subject.insert(12_345)
|
|
37
|
-
|
|
38
|
-
expect(subject.include?("test")).to be true
|
|
39
|
-
expect(subject.include?("abcd")).to be false
|
|
40
|
-
expect(subject.include?("12345")).to be true
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
it "returns the number of bits set to 1" do
|
|
44
|
-
bf = BloomFit.new(hashes: 4)
|
|
45
|
-
bf.insert("test")
|
|
46
|
-
expect(bf.set_bits).to eq 4
|
|
47
|
-
|
|
48
|
-
bf = BloomFit.new(hashes: 1)
|
|
49
|
-
bf.insert("test")
|
|
50
|
-
expect(bf.set_bits).to eq 1
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
it "returns intersection with other filter" do
|
|
54
|
-
bf1 = BloomFit.new
|
|
55
|
-
bf1.insert("test")
|
|
56
|
-
bf1.insert("test1")
|
|
57
|
-
|
|
58
|
-
bf2 = BloomFit.new
|
|
59
|
-
bf2.insert("test")
|
|
60
|
-
bf2.insert("test2")
|
|
61
|
-
|
|
62
|
-
bf3 = bf1 & bf2
|
|
63
|
-
expect(bf3.include?("test")).to be true
|
|
64
|
-
expect(bf3.include?("test1")).to be false
|
|
65
|
-
expect(bf3.include?("test2")).to be false
|
|
66
|
-
end
|
|
67
|
-
|
|
68
|
-
it "raises an exception when intersection is to be computed for incompatible filters" do
|
|
69
|
-
bf1 = BloomFit.new(size: 10)
|
|
70
|
-
bf1.insert("test")
|
|
71
|
-
|
|
72
|
-
bf2 = BloomFit.new(size: 20)
|
|
73
|
-
bf2.insert("test")
|
|
74
|
-
|
|
75
|
-
expect { bf1 & bf2 }.to raise_error(BloomFit::ConfigurationMismatch)
|
|
76
|
-
end
|
|
77
|
-
|
|
78
|
-
it "returns union with other filter" do
|
|
79
|
-
bf1 = BloomFit.new
|
|
80
|
-
bf1.insert("test")
|
|
81
|
-
bf1.insert("test1")
|
|
82
|
-
|
|
83
|
-
bf2 = BloomFit.new
|
|
84
|
-
bf2.insert("test")
|
|
85
|
-
bf2.insert("test2")
|
|
86
|
-
|
|
87
|
-
bf3 = bf1 | bf2
|
|
88
|
-
expect(bf3.include?("test")).to be true
|
|
89
|
-
expect(bf3.include?("test1")).to be true
|
|
90
|
-
expect(bf3.include?("test2")).to be true
|
|
91
|
-
end
|
|
92
|
-
|
|
93
|
-
it "raises an exception when union is to be computed for incompatible filters" do
|
|
94
|
-
bf1 = BloomFit.new(size: 10)
|
|
95
|
-
bf1.insert("test")
|
|
96
|
-
|
|
97
|
-
bf2 = BloomFit.new(size: 20)
|
|
98
|
-
bf2.insert("test")
|
|
99
|
-
|
|
100
|
-
expect { bf1 | bf2 }.to raise_error(BloomFit::ConfigurationMismatch)
|
|
101
|
-
end
|
|
102
|
-
|
|
103
|
-
it "outputs current stats" do
|
|
104
|
-
subject.insert("test")
|
|
105
|
-
expect { subject.stats }.not_to raise_error
|
|
106
|
-
end
|
|
107
|
-
|
|
108
|
-
context "serialization" do
|
|
109
|
-
after { File.unlink("bf.out") }
|
|
110
|
-
|
|
111
|
-
it "marshalls" do
|
|
112
|
-
bf = BloomFit.new
|
|
113
|
-
expect { bf.save("bf.out") }.not_to raise_error
|
|
114
|
-
end
|
|
115
|
-
|
|
116
|
-
it "loads from marshalled" do
|
|
117
|
-
subject.insert("foo")
|
|
118
|
-
subject.insert("bar")
|
|
119
|
-
subject.save("bf.out")
|
|
120
|
-
|
|
121
|
-
bf2 = BloomFit.load("bf.out")
|
|
122
|
-
expect(bf2.include?("foo")).to be true
|
|
123
|
-
expect(bf2.include?("bar")).to be true
|
|
124
|
-
expect(bf2.include?("baz")).to be false
|
|
125
|
-
|
|
126
|
-
expect(subject.send(:same_parameters?, bf2)).to be true
|
|
127
|
-
end
|
|
128
|
-
end
|
|
129
|
-
end
|
data/spec/helper.rb
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
require "bloom_fit"
|