bloom_fit 0.1.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +17 -14
- data/ext/cbloomfilter/cbloomfilter.c +104 -194
- data/ext/cbloomfilter/extconf.rb +0 -1
- data/lib/bloom_fit/configuration_mismatch.rb +4 -0
- data/lib/bloom_fit/version.rb +1 -1
- data/lib/bloom_fit.rb +96 -65
- data/lib/cbloomfilter.bundle +0 -0
- data/test/bloom_fit_test.rb +344 -0
- data/test/test_helper.rb +6 -0
- metadata +12 -69
- data/Rakefile +0 -12
- data/spec/bloom_fit_spec.rb +0 -152
- data/spec/helper.rb +0 -2
data/lib/bloom_fit.rb
CHANGED
|
@@ -1,107 +1,138 @@
|
|
|
1
|
-
require
|
|
2
|
-
|
|
1
|
+
require "forwardable"
|
|
2
|
+
|
|
3
|
+
require "cbloomfilter"
|
|
4
|
+
require "bloom_fit/configuration_mismatch"
|
|
5
|
+
require "bloom_fit/version"
|
|
3
6
|
|
|
4
7
|
class BloomFit
|
|
5
|
-
|
|
8
|
+
extend Forwardable
|
|
6
9
|
|
|
7
10
|
attr_reader :bf
|
|
8
11
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
12
|
+
# @param size [Integer] number of buckets in a bloom filter
|
|
13
|
+
# @param hashes [Integer] number of hash functions
|
|
14
|
+
def initialize(size: 1_000, hashes: 4)
|
|
15
|
+
@bf = CBloomFilter.new(size, hashes)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def_delegators :@bf, :m, :k, :bitmap, :include?, :clear, :set_bits
|
|
19
|
+
|
|
20
|
+
alias size m
|
|
21
|
+
alias hashes k
|
|
22
|
+
alias key? include?
|
|
23
|
+
alias [] include?
|
|
24
|
+
alias n set_bits
|
|
16
25
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
# arg 4: r => raise : whether to raise on bucket overflow
|
|
26
|
+
def empty?
|
|
27
|
+
set_bits.zero?
|
|
28
|
+
end
|
|
21
29
|
|
|
22
|
-
|
|
30
|
+
# Adds the given key to the set and returns +self+. Mimics the behavior of
|
|
31
|
+
# +Set#add+
|
|
32
|
+
def add(key)
|
|
33
|
+
@bf.add(key)
|
|
34
|
+
self
|
|
23
35
|
end
|
|
36
|
+
alias << add
|
|
24
37
|
|
|
25
|
-
|
|
26
|
-
|
|
38
|
+
# Adds the given key to the set if the value is truthy. Mimics the behavior of
|
|
39
|
+
# +Hash#[]=+
|
|
40
|
+
def []=(key, value)
|
|
41
|
+
@bf.add(key) if value
|
|
27
42
|
end
|
|
28
|
-
alias :[]= :insert
|
|
29
43
|
|
|
30
|
-
|
|
31
|
-
|
|
44
|
+
# Adds the given key to the set and returns +self+. If the key is already
|
|
45
|
+
# the in set, returns +nil+. Mimics the behavior of +Set#add?+
|
|
46
|
+
def add?(key)
|
|
47
|
+
return nil if include?(key) # rubocop:disable Style/ReturnNilInPredicateMethodDefinition
|
|
48
|
+
add(key)
|
|
32
49
|
end
|
|
33
|
-
alias :key? :include?
|
|
34
|
-
alias :[] :include?
|
|
35
50
|
|
|
36
|
-
|
|
37
|
-
def
|
|
38
|
-
|
|
39
|
-
|
|
51
|
+
# Returns a string of the set bits in hex format
|
|
52
|
+
def to_hex
|
|
53
|
+
length = ((size / 8.0).ceil * 8 / 4)
|
|
54
|
+
bitmap.unpack1("H*")[0...length]
|
|
55
|
+
end
|
|
40
56
|
|
|
41
|
-
# Returns
|
|
42
|
-
def
|
|
43
|
-
|
|
57
|
+
# Returns a string of the set bits in binary format
|
|
58
|
+
def to_binary
|
|
59
|
+
bitmap.unpack1("B*")[0...size]
|
|
44
60
|
end
|
|
45
61
|
|
|
46
|
-
#
|
|
47
|
-
#
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
62
|
+
# Adds the set from another BloomFit filter or adds all the elements from an
|
|
63
|
+
# enumerable. Mimics the behavior of +Set#merge+
|
|
64
|
+
def merge(other)
|
|
65
|
+
if other.is_a?(BloomFit)
|
|
66
|
+
raise BloomFit::ConfigurationMismatch unless same_parameters?(other)
|
|
67
|
+
@bf.merge(other.bf)
|
|
68
|
+
elsif other.respond_to?(:each_key)
|
|
69
|
+
other.each { |k, v| add(k) if v }
|
|
70
|
+
elsif other.is_a?(Enumerable)
|
|
71
|
+
other.each { |k| add(k) }
|
|
72
|
+
else
|
|
73
|
+
raise ArgumentError, "value must be enumerable or another BloomFit filter"
|
|
74
|
+
end
|
|
54
75
|
end
|
|
55
76
|
|
|
56
|
-
# Computes the
|
|
57
|
-
#
|
|
58
|
-
#
|
|
59
|
-
def
|
|
60
|
-
raise BloomFit::ConfigurationMismatch
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
77
|
+
# Computes the intersection of two Bloom filters. It requires that both
|
|
78
|
+
# filters have the same size; otherwise, +BloomFit::ConfigurationMismatch+
|
|
79
|
+
# is raised.
|
|
80
|
+
def &(other)
|
|
81
|
+
raise BloomFit::ConfigurationMismatch unless same_parameters?(other)
|
|
82
|
+
self.class.new(size:, hashes:).tap do |result|
|
|
83
|
+
result.instance_variable_set(:@bf, @bf.&(other.bf))
|
|
84
|
+
end
|
|
64
85
|
end
|
|
86
|
+
alias intersection &
|
|
87
|
+
|
|
88
|
+
# Computes the union of two Bloom filters. It requires that both filters
|
|
89
|
+
# have the same size; otherwise, +BloomFit::ConfigurationMismatch+ is
|
|
90
|
+
# raised.
|
|
91
|
+
def |(other)
|
|
92
|
+
raise BloomFit::ConfigurationMismatch unless same_parameters?(other)
|
|
93
|
+
self.class.new(size:, hashes:).tap do |result|
|
|
94
|
+
result.instance_variable_set(:@bf, @bf.|(other.bf))
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
alias union |
|
|
98
|
+
|
|
99
|
+
def stats
|
|
100
|
+
fpr = ((1.0 - Math.exp(-(k * n).to_f / m))**k) * 100
|
|
65
101
|
|
|
66
|
-
|
|
67
|
-
|
|
102
|
+
(+"").tap do |s|
|
|
103
|
+
s << format("Number of filter buckets (m): %d\n", m)
|
|
104
|
+
s << format("Number of set bits (n): %d\n", n)
|
|
105
|
+
s << format("Number of filter hashes (k): %d\n", k)
|
|
106
|
+
s << format("Predicted false positive rate: %.2f%%\n", fpr)
|
|
107
|
+
end
|
|
68
108
|
end
|
|
69
109
|
|
|
70
110
|
def marshal_load(ary)
|
|
71
|
-
|
|
111
|
+
size, hashes, bitmap = *ary
|
|
72
112
|
|
|
73
|
-
initialize(
|
|
74
|
-
@bf.load(bitmap) if
|
|
113
|
+
initialize(size:, hashes:)
|
|
114
|
+
@bf.load(bitmap) if bitmap
|
|
75
115
|
end
|
|
76
116
|
|
|
77
117
|
def marshal_dump
|
|
78
|
-
[
|
|
118
|
+
[size, hashes, bitmap]
|
|
79
119
|
end
|
|
80
120
|
|
|
81
121
|
def self.load(filename)
|
|
82
|
-
Marshal.load(File.open(filename,
|
|
122
|
+
Marshal.load(File.open(filename, "r")) # rubocop:disable Security/MarshalLoad
|
|
83
123
|
end
|
|
84
124
|
|
|
85
125
|
def save(filename)
|
|
86
|
-
File.open(filename,
|
|
126
|
+
File.open(filename, "w") do |f|
|
|
87
127
|
f << Marshal.dump(self)
|
|
88
128
|
end
|
|
89
129
|
end
|
|
90
130
|
|
|
91
|
-
def stats
|
|
92
|
-
fp = ((1.0 - Math.exp(-(@opts[:hashes] * size).to_f / @opts[:size])) ** @opts[:hashes]) * 100
|
|
93
|
-
printf "Number of filter buckets (m): %d\n", @opts[:size]
|
|
94
|
-
printf "Number of bits per buckets (b): %d\n", @opts[:bucket]
|
|
95
|
-
printf "Number of set bits (n): %d\n", set_bits
|
|
96
|
-
printf "Number of filter hashes (k) : %d\n", @opts[:hashes]
|
|
97
|
-
printf "Predicted false positive rate = %.2f%%\n", fp
|
|
98
|
-
end
|
|
99
|
-
|
|
100
131
|
protected
|
|
101
132
|
|
|
102
|
-
# Returns true if parameters of the +
|
|
133
|
+
# Returns true if parameters of the +other+ filter are
|
|
103
134
|
# the same.
|
|
104
|
-
def same_parameters?(
|
|
105
|
-
|
|
135
|
+
def same_parameters?(other)
|
|
136
|
+
bf.m == other.bf.m && bf.k == other.bf.k
|
|
106
137
|
end
|
|
107
138
|
end
|
|
Binary file
|
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
require "test_helper"
|
|
2
|
+
|
|
3
|
+
class BloomFitTest < Minitest::Spec
|
|
4
|
+
subject { BloomFit.new(size: 100, hashes: 4) }
|
|
5
|
+
|
|
6
|
+
describe "#empty?" do
|
|
7
|
+
it "returns true when nothing set" do
|
|
8
|
+
assert_equal true, subject.empty? # rubocop:disable Minitest/AssertTruthy
|
|
9
|
+
assert_empty subject
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
it "returns false when something set" do
|
|
13
|
+
subject << "key"
|
|
14
|
+
assert_equal false, subject.empty? # rubocop:disable Minitest/RefuteFalse
|
|
15
|
+
refute_empty subject
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
describe "#add" do
|
|
20
|
+
it "adds the key and returns self" do
|
|
21
|
+
assert_equal subject, subject.add("test1")
|
|
22
|
+
assert_equal subject, subject.add("test2")
|
|
23
|
+
assert_includes subject, "test1"
|
|
24
|
+
assert_includes subject, "test2"
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
it "is aliased as #<<" do
|
|
28
|
+
subject << "test1" << "test2"
|
|
29
|
+
assert_includes subject, "test1"
|
|
30
|
+
assert_includes subject, "test2"
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
it "is aliased as #[]=, and handles truthy/falsey values" do
|
|
34
|
+
subject["dog"] = :bar
|
|
35
|
+
subject["cat"] = :foo
|
|
36
|
+
assert_includes subject, "dog"
|
|
37
|
+
assert_includes subject, "cat"
|
|
38
|
+
|
|
39
|
+
subject["bat"] = nil
|
|
40
|
+
subject["pig"] = false
|
|
41
|
+
refute_includes subject, "bat"
|
|
42
|
+
refute_includes subject, "pig"
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
it "casts using #to_s as necessary" do
|
|
46
|
+
subject << :symbol << true << 12_345
|
|
47
|
+
|
|
48
|
+
assert_includes subject, "symbol"
|
|
49
|
+
assert_includes subject, :symbol
|
|
50
|
+
assert_includes subject, "true"
|
|
51
|
+
assert_includes subject, "12345"
|
|
52
|
+
assert_includes subject, 12_345
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
describe "#add?" do
|
|
57
|
+
it "adds new key and returns self" do
|
|
58
|
+
assert_equal subject, subject.add("test1")
|
|
59
|
+
assert_equal subject, subject.add("test2")
|
|
60
|
+
assert_includes subject, "test1"
|
|
61
|
+
assert_includes subject, "test2"
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
it "return nil if the key already exists" do
|
|
65
|
+
subject << "test1"
|
|
66
|
+
subject << "test2"
|
|
67
|
+
assert_includes subject, "test1"
|
|
68
|
+
assert_includes subject, "test2"
|
|
69
|
+
assert_nil subject.add?("test1")
|
|
70
|
+
assert_nil subject.add?("test2")
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
describe "#include?" do
|
|
75
|
+
it "returns true when a key is in the set" do
|
|
76
|
+
subject << "test1"
|
|
77
|
+
subject << "test2"
|
|
78
|
+
assert_equal true, subject.include?("test1") # rubocop:disable Minitest/AssertTruthy
|
|
79
|
+
assert_equal true, subject.include?("test2") # rubocop:disable Minitest/AssertTruthy
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
it "returns false when a key is not in the set" do
|
|
83
|
+
assert_equal false, subject.include?("test") # rubocop:disable Minitest/RefuteFalse
|
|
84
|
+
assert_equal false, subject.include?("nada") # rubocop:disable Minitest/RefuteFalse
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
it "is aliased as #key?" do
|
|
88
|
+
subject << "test1"
|
|
89
|
+
subject << "test2"
|
|
90
|
+
assert subject.key?("test1")
|
|
91
|
+
assert subject.key?("test2")
|
|
92
|
+
refute subject.key?("test3")
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
it "is aliased as #[]" do
|
|
96
|
+
subject << "test1"
|
|
97
|
+
subject << "test2"
|
|
98
|
+
assert subject["test1"]
|
|
99
|
+
assert subject["test2"]
|
|
100
|
+
refute subject["test3"]
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
describe "#clear" do
|
|
105
|
+
it "zeroes the bits" do
|
|
106
|
+
subject.add("test")
|
|
107
|
+
assert_includes subject, "test"
|
|
108
|
+
assert_includes subject.to_binary, "1"
|
|
109
|
+
subject.clear
|
|
110
|
+
refute_includes subject, "test"
|
|
111
|
+
refute_includes subject.to_binary, "1"
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
describe "#set_bits" do
|
|
116
|
+
it "returns the number of bits set to 1" do
|
|
117
|
+
bf = BloomFit.new(size: 100, hashes: 4)
|
|
118
|
+
bf.add("bits")
|
|
119
|
+
assert_equal 4, bf.set_bits
|
|
120
|
+
|
|
121
|
+
bf = BloomFit.new(size: 100, hashes: 1)
|
|
122
|
+
bf.add("bits")
|
|
123
|
+
assert_equal 1, bf.set_bits
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
describe "#bitmap" do
|
|
128
|
+
it "returns a binary bitmap of all zeros when empty (including a terminating byte)" do
|
|
129
|
+
bf = BloomFit.new(size: 16)
|
|
130
|
+
assert_equal "\x00\x00\x00".b, bf.bitmap
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
it "returns a binary bitmap representing the set" do
|
|
134
|
+
bf = BloomFit.new(size: 16, hashes: 4)
|
|
135
|
+
bf.add("something")
|
|
136
|
+
assert_equal "(\x82\x00".b, bf.bitmap
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
it "returns a binary bitmap representing the set even if not a multiple of 8 bits" do
|
|
140
|
+
bf = BloomFit.new(size: 20, hashes: 4)
|
|
141
|
+
bf.add("wow")
|
|
142
|
+
assert_equal "\x04\x14\x00\x00".b, bf.bitmap
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
describe "#to_hex" do
|
|
147
|
+
it "returns a hex bitmap of all zeros when empty" do
|
|
148
|
+
bf = BloomFit.new(size: 16)
|
|
149
|
+
assert_equal "0000", bf.to_hex
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
it "returns a hex bitmap of all zeros when empty if not a multiple of 8 bits" do
|
|
153
|
+
bf = BloomFit.new(size: 18)
|
|
154
|
+
assert_equal "000000", bf.to_hex
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
it "returns a hex bitmap representing the set" do
|
|
158
|
+
bf = BloomFit.new(size: 16, hashes: 4)
|
|
159
|
+
bf.add("cool")
|
|
160
|
+
assert_equal "1441", bf.to_hex
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
describe "#to_binary" do
|
|
165
|
+
it "returns a binary bitmap of all zeros when empty" do
|
|
166
|
+
bf = BloomFit.new(size: 16)
|
|
167
|
+
assert_equal "0000000000000000", bf.to_binary
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
it "returns a binary bitmap of all zeros when empty if not a multiple of 8 bits" do
|
|
171
|
+
bf = BloomFit.new(size: 19)
|
|
172
|
+
assert_equal "0000000000000000000", bf.to_binary
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
it "returns a binary bitmap representing the set" do
|
|
176
|
+
bf = BloomFit.new(size: 16, hashes: 4)
|
|
177
|
+
bf << "cool" << "cat"
|
|
178
|
+
assert_equal "1001011001101001", bf.to_binary
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
describe "#merge" do
|
|
183
|
+
it "merges another BloomFit filter" do
|
|
184
|
+
bf1 = BloomFit.new(size: 100, hashes: 2)
|
|
185
|
+
bf2 = BloomFit.new(size: 100, hashes: 2)
|
|
186
|
+
bf1 << "mouse"
|
|
187
|
+
bf2 << "cat" << "dog"
|
|
188
|
+
refute_includes bf1, "cat"
|
|
189
|
+
refute_includes bf1, "dog"
|
|
190
|
+
bf1.merge(bf2)
|
|
191
|
+
assert_includes bf1, "mouse"
|
|
192
|
+
assert_includes bf1, "cat"
|
|
193
|
+
assert_includes bf1, "dog"
|
|
194
|
+
refute_includes bf2, "mouse"
|
|
195
|
+
assert_includes bf2, "cat"
|
|
196
|
+
assert_includes bf2, "dog"
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
it "merges an array" do
|
|
200
|
+
subject << "mouse"
|
|
201
|
+
subject.merge %i[cat dog]
|
|
202
|
+
assert_includes subject, "mouse"
|
|
203
|
+
assert_includes subject, "cat"
|
|
204
|
+
assert_includes subject, "dog"
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
it "merges a set" do
|
|
208
|
+
subject << "mouse"
|
|
209
|
+
subject.merge Set.new(%w[cat dog])
|
|
210
|
+
assert_includes subject, "mouse"
|
|
211
|
+
assert_includes subject, "cat"
|
|
212
|
+
assert_includes subject, "dog"
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
it "merges a hash ignoring falsey values" do
|
|
216
|
+
subject << "mouse"
|
|
217
|
+
subject.merge({ cat: 1, dog: 2, ant: false, bug: nil })
|
|
218
|
+
assert_includes subject, "mouse"
|
|
219
|
+
assert_includes subject, "cat"
|
|
220
|
+
assert_includes subject, "dog"
|
|
221
|
+
refute_includes subject, "ant"
|
|
222
|
+
refute_includes subject, "bug"
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
it "raises when merge is between incompatible filters" do
|
|
226
|
+
bf1 = BloomFit.new(size: 10)
|
|
227
|
+
bf2 = BloomFit.new(size: 20)
|
|
228
|
+
assert_raises(BloomFit::ConfigurationMismatch) { bf1.merge(bf2) }
|
|
229
|
+
end
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
describe "#&" do
|
|
233
|
+
it "returns intersection of both filters" do
|
|
234
|
+
bf1 = BloomFit.new(size: 35, hashes: 4)
|
|
235
|
+
bf1.add("test")
|
|
236
|
+
bf1.add("test1")
|
|
237
|
+
|
|
238
|
+
bf2 = BloomFit.new(size: 35, hashes: 4)
|
|
239
|
+
bf2.add("test")
|
|
240
|
+
bf2.add("test2")
|
|
241
|
+
|
|
242
|
+
bf3 = bf1 & bf2
|
|
243
|
+
assert_equal 35, bf3.size
|
|
244
|
+
assert_equal 4, bf3.hashes
|
|
245
|
+
assert_includes bf3, "test"
|
|
246
|
+
refute_includes bf3, "test1"
|
|
247
|
+
refute_includes bf3, "test2"
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
it "is aliased as #intersection" do
|
|
251
|
+
bf1 = BloomFit.new(size: 20, hashes: 4)
|
|
252
|
+
bf1.add("test")
|
|
253
|
+
bf1.add("test1")
|
|
254
|
+
|
|
255
|
+
bf2 = BloomFit.new(size: 20, hashes: 4)
|
|
256
|
+
bf2.add("test")
|
|
257
|
+
|
|
258
|
+
bf3 = bf1.intersection(bf2)
|
|
259
|
+
assert_includes bf3, "test"
|
|
260
|
+
refute_includes bf3, "test1"
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
it "raises when intersection is between incompatible filters" do
|
|
264
|
+
bf1 = BloomFit.new(size: 10)
|
|
265
|
+
bf2 = BloomFit.new(size: 20)
|
|
266
|
+
assert_raises(BloomFit::ConfigurationMismatch) { bf1 & bf2 }
|
|
267
|
+
|
|
268
|
+
bf1 = BloomFit.new(size: 10, hashes: 2)
|
|
269
|
+
bf2 = BloomFit.new(size: 10, hashes: 4)
|
|
270
|
+
assert_raises(BloomFit::ConfigurationMismatch) { bf1 & bf2 }
|
|
271
|
+
end
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
describe "#|" do
|
|
275
|
+
it "returns union with other filter" do
|
|
276
|
+
bf1 = BloomFit.new
|
|
277
|
+
bf1.add("test")
|
|
278
|
+
bf1.add("test1")
|
|
279
|
+
|
|
280
|
+
bf2 = BloomFit.new
|
|
281
|
+
bf2.add("test")
|
|
282
|
+
bf2.add("test2")
|
|
283
|
+
|
|
284
|
+
bf3 = bf1 | bf2
|
|
285
|
+
assert_includes bf3, "test"
|
|
286
|
+
assert_includes bf3, "test1"
|
|
287
|
+
assert_includes bf3, "test2"
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
it "is aliased as #union" do
|
|
291
|
+
bf1 = BloomFit.new(size: 20, hashes: 4)
|
|
292
|
+
bf1.add("test")
|
|
293
|
+
bf1.add("test1")
|
|
294
|
+
|
|
295
|
+
bf2 = BloomFit.new(size: 20, hashes: 4)
|
|
296
|
+
bf2.add("test")
|
|
297
|
+
|
|
298
|
+
bf3 = bf1.union(bf2)
|
|
299
|
+
assert_includes bf3, "test"
|
|
300
|
+
assert_includes bf3, "test1"
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
it "raises when union is between incompatible filters" do
|
|
304
|
+
bf1 = BloomFit.new(size: 10)
|
|
305
|
+
bf2 = BloomFit.new(size: 20)
|
|
306
|
+
assert_raises(BloomFit::ConfigurationMismatch) { bf1 | bf2 }
|
|
307
|
+
end
|
|
308
|
+
end
|
|
309
|
+
|
|
310
|
+
describe "#stats" do
|
|
311
|
+
it "returns current stats" do
|
|
312
|
+
bf = BloomFit.new(size: 10, hashes: 3)
|
|
313
|
+
expected = <<~STATS
|
|
314
|
+
Number of filter buckets (m): 10
|
|
315
|
+
Number of set bits (n): 0
|
|
316
|
+
Number of filter hashes (k): 3
|
|
317
|
+
Predicted false positive rate: 0.00%
|
|
318
|
+
STATS
|
|
319
|
+
assert_equal expected, bf.stats
|
|
320
|
+
end
|
|
321
|
+
end
|
|
322
|
+
|
|
323
|
+
describe "serialization" do
|
|
324
|
+
after { File.unlink("bf.out") }
|
|
325
|
+
|
|
326
|
+
it "marshalls" do
|
|
327
|
+
bf = BloomFit.new
|
|
328
|
+
assert bf.save("bf.out")
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
it "loads from marshalled" do
|
|
332
|
+
subject.add("foo")
|
|
333
|
+
subject.add("bar")
|
|
334
|
+
subject.save("bf.out")
|
|
335
|
+
|
|
336
|
+
bf2 = BloomFit.load("bf.out")
|
|
337
|
+
assert_includes bf2, "foo"
|
|
338
|
+
assert_includes bf2, "bar"
|
|
339
|
+
refute_includes bf2, "baz"
|
|
340
|
+
|
|
341
|
+
assert subject.send(:same_parameters?, bf2)
|
|
342
|
+
end
|
|
343
|
+
end
|
|
344
|
+
end
|
data/test/test_helper.rb
ADDED
metadata
CHANGED
|
@@ -1,92 +1,37 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: bloom_fit
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
|
-
- Ilya Grigorik
|
|
8
|
-
- Tatsuya Mori
|
|
9
7
|
- Ryan McGeary
|
|
10
8
|
- Beshad Talayeminaei
|
|
9
|
+
- Ilya Grigorik
|
|
10
|
+
- Tatsuya Mori
|
|
11
11
|
bindir: bin
|
|
12
12
|
cert_chain: []
|
|
13
13
|
date: 1980-01-02 00:00:00.000000000 Z
|
|
14
|
-
dependencies:
|
|
15
|
-
- !ruby/object:Gem::Dependency
|
|
16
|
-
name: irb
|
|
17
|
-
requirement: !ruby/object:Gem::Requirement
|
|
18
|
-
requirements:
|
|
19
|
-
- - ">="
|
|
20
|
-
- !ruby/object:Gem::Version
|
|
21
|
-
version: '0'
|
|
22
|
-
type: :development
|
|
23
|
-
prerelease: false
|
|
24
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
25
|
-
requirements:
|
|
26
|
-
- - ">="
|
|
27
|
-
- !ruby/object:Gem::Version
|
|
28
|
-
version: '0'
|
|
29
|
-
- !ruby/object:Gem::Dependency
|
|
30
|
-
name: rake
|
|
31
|
-
requirement: !ruby/object:Gem::Requirement
|
|
32
|
-
requirements:
|
|
33
|
-
- - ">="
|
|
34
|
-
- !ruby/object:Gem::Version
|
|
35
|
-
version: '0'
|
|
36
|
-
type: :development
|
|
37
|
-
prerelease: false
|
|
38
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
39
|
-
requirements:
|
|
40
|
-
- - ">="
|
|
41
|
-
- !ruby/object:Gem::Version
|
|
42
|
-
version: '0'
|
|
43
|
-
- !ruby/object:Gem::Dependency
|
|
44
|
-
name: rake-compiler
|
|
45
|
-
requirement: !ruby/object:Gem::Requirement
|
|
46
|
-
requirements:
|
|
47
|
-
- - ">="
|
|
48
|
-
- !ruby/object:Gem::Version
|
|
49
|
-
version: '0'
|
|
50
|
-
type: :development
|
|
51
|
-
prerelease: false
|
|
52
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
53
|
-
requirements:
|
|
54
|
-
- - ">="
|
|
55
|
-
- !ruby/object:Gem::Version
|
|
56
|
-
version: '0'
|
|
57
|
-
- !ruby/object:Gem::Dependency
|
|
58
|
-
name: rspec
|
|
59
|
-
requirement: !ruby/object:Gem::Requirement
|
|
60
|
-
requirements:
|
|
61
|
-
- - ">="
|
|
62
|
-
- !ruby/object:Gem::Version
|
|
63
|
-
version: '3'
|
|
64
|
-
type: :development
|
|
65
|
-
prerelease: false
|
|
66
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
67
|
-
requirements:
|
|
68
|
-
- - ">="
|
|
69
|
-
- !ruby/object:Gem::Version
|
|
70
|
-
version: '3'
|
|
14
|
+
dependencies: []
|
|
71
15
|
email:
|
|
72
|
-
- ilya@grigorik.com
|
|
73
|
-
- valdzone@gmail.com
|
|
74
16
|
- ryan@mcgeary.org
|
|
75
17
|
- 'btalayeminaei@gmail.com '
|
|
18
|
+
- ilya@grigorik.com
|
|
19
|
+
- valdzone@gmail.com
|
|
76
20
|
executables: []
|
|
77
21
|
extensions:
|
|
78
22
|
- ext/cbloomfilter/extconf.rb
|
|
79
23
|
extra_rdoc_files: []
|
|
80
24
|
files:
|
|
81
25
|
- README.md
|
|
82
|
-
- Rakefile
|
|
83
26
|
- ext/cbloomfilter/cbloomfilter.c
|
|
84
27
|
- ext/cbloomfilter/crc32.h
|
|
85
28
|
- ext/cbloomfilter/extconf.rb
|
|
86
29
|
- lib/bloom_fit.rb
|
|
30
|
+
- lib/bloom_fit/configuration_mismatch.rb
|
|
87
31
|
- lib/bloom_fit/version.rb
|
|
88
|
-
-
|
|
89
|
-
-
|
|
32
|
+
- lib/cbloomfilter.bundle
|
|
33
|
+
- test/bloom_fit_test.rb
|
|
34
|
+
- test/test_helper.rb
|
|
90
35
|
homepage: https://github.com/rmm5t/bloom_fit
|
|
91
36
|
licenses: []
|
|
92
37
|
metadata:
|
|
@@ -103,7 +48,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
103
48
|
requirements:
|
|
104
49
|
- - ">="
|
|
105
50
|
- !ruby/object:Gem::Version
|
|
106
|
-
version:
|
|
51
|
+
version: 3.2.0
|
|
107
52
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
108
53
|
requirements:
|
|
109
54
|
- - ">="
|
|
@@ -114,6 +59,4 @@ rubygems_version: 4.0.9
|
|
|
114
59
|
specification_version: 4
|
|
115
60
|
summary: BloomFit helps you build correctly sized Bloom filters from expected set
|
|
116
61
|
size and target false positive rate.
|
|
117
|
-
test_files:
|
|
118
|
-
- spec/bloom_fit_spec.rb
|
|
119
|
-
- spec/helper.rb
|
|
62
|
+
test_files: []
|
data/Rakefile
DELETED
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
require "bundler/gem_tasks"
|
|
2
|
-
require "bundler/setup"
|
|
3
|
-
require "rspec/core/rake_task"
|
|
4
|
-
require "rake/extensiontask"
|
|
5
|
-
|
|
6
|
-
Rake::ExtensionTask.new("cbloomfilter")
|
|
7
|
-
RSpec::Core::RakeTask.new(:spec)
|
|
8
|
-
Rake::Task[:spec].prerequisites << :clean
|
|
9
|
-
Rake::Task[:spec].prerequisites << :compile
|
|
10
|
-
|
|
11
|
-
desc "Default: run unit tests."
|
|
12
|
-
task default: :spec
|