bloomer 0.0.5 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.travis.yml +4 -4
- data/README.md +58 -27
- data/bloomer.gemspec +1 -0
- data/lib/bloomer/msgpackable.rb +65 -0
- data/lib/bloomer/version.rb +1 -1
- data/test/bloomer_test.rb +26 -0
- metadata +35 -41
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 683b2bd1b28f30606dd8f96ab85ea18a98e1a198d38b10f823563ba6893d1454
|
4
|
+
data.tar.gz: 8f26b8793a08d01b401d727fc0371990f948d2e69d01cdcb1d2e1dc83e742830
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 53af08a56920e8d6e146a44032151facc1e507c3bb9b21e0a9a203c60339039b2e0e9241a79cdafcb1d6f90ef326e8202252163af811e70373c30608ba3fa2f2
|
7
|
+
data.tar.gz: ce375fc18e7531cb2b6455df625034cf0881db9294a41945a9ef1789f2aa649ae7d8037a2205bc32e34113be765bc7f3e71c7b3be06a1cd7e47cb1553552c7ac
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -1,32 +1,36 @@
|
|
1
1
|
# Bloomer: Bloom filters with elastic
|
2
2
|
|
3
|
-
[](https://badge.fury.io/rb/bloomer)
|
4
|
+
[](http://travis-ci.org/mceachen/bloomer)
|
4
5
|
|
5
|
-
[Bloom filters](http://en.wikipedia.org/wiki/Bloom_filter) are great for quickly
|
6
|
-
a given string has been seen before--in constant time, and
|
7
|
-
|
8
|
-
|
6
|
+
[Bloom filters](http://en.wikipedia.org/wiki/Bloom_filter) are great for quickly
|
7
|
+
checking to see if a given string has been seen before--in constant time, and
|
8
|
+
using a fixed amount of RAM, as long as you know the expected number of elements
|
9
|
+
up front. If you add more than `capacity` elements to the filter, accuracy for
|
10
|
+
`include?` will drop below `false_positive_probability`.
|
9
11
|
|
10
|
-
[Scalable Bloom Filters](http://gsd.di.uminho.pt/members/cbm/ps/dbloom.pdf)
|
11
|
-
by using additional RAM as
|
12
|
+
[Scalable Bloom Filters](http://gsd.di.uminho.pt/members/cbm/ps/dbloom.pdf)
|
13
|
+
maintain a maximal `false_positive_probability` by using additional RAM as
|
14
|
+
needed.
|
12
15
|
|
13
|
-
|
16
|
+
`Bloomer` is a Bloom Filter. `Bloomer::Scalable` is a Scalable Bloom Filter.
|
14
17
|
|
15
|
-
Keep in mind that **false positives with Bloom filters are expected**, with a
|
16
|
-
False negatives, however, are not. In other words,
|
18
|
+
Keep in mind that **false positives with Bloom filters are expected**, with a
|
19
|
+
specified probability rate. False negatives, however, are not. In other words,
|
17
20
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
+
- if `include?` returns _false_, that string has _certainly not_ been `add`ed
|
22
|
+
- if `include?` returns _true_, it _might_ mean that string was `add`ed
|
23
|
+
(depending on the `false_positive_probability` parameter provided to the
|
24
|
+
constructor).
|
21
25
|
|
22
26
|
This implementation is unique in that Bloomer
|
23
27
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
28
|
+
- supports scalable bloom filters (SBF)
|
29
|
+
- uses triple hash chains (see [the paper](http://www.ccs.neu.edu/home/pete/pub/bloom-filters-verification.pdf))
|
30
|
+
- can marshal state quickly
|
31
|
+
- has rigorous tests
|
32
|
+
- is pure ruby
|
33
|
+
- does not require EM or Redis or something else unrelated to simply implementing a bloom filter
|
30
34
|
|
31
35
|
## Usage
|
32
36
|
|
@@ -52,30 +56,57 @@ bf.include? "badda"
|
|
52
56
|
#=> false
|
53
57
|
```
|
54
58
|
|
55
|
-
Serialization
|
59
|
+
Serialization can be done using
|
60
|
+
[MessagePack](https://github.com/msgpack/msgpack-ruby):
|
61
|
+
|
62
|
+
Notice, you'll need to require `bloomer/msgpackable` to enable serialization.
|
56
63
|
|
57
64
|
```ruby
|
65
|
+
require 'bloomer/msgpackable'
|
58
66
|
b = Bloomer.new(10)
|
59
67
|
b.add("a")
|
60
|
-
s =
|
61
|
-
new_b =
|
68
|
+
s = b.to_msgpack
|
69
|
+
new_b = Bloomer.from_msgpack(s)
|
62
70
|
new_b.include? "a"
|
63
71
|
#=> true
|
64
72
|
```
|
65
73
|
|
74
|
+
The original class will be preserved regardless of calling
|
75
|
+
`Bloomer.from_msgpack(s)` or `Bloomer::Scalable.from_msgpack(s)`:
|
76
|
+
|
77
|
+
```ruby
|
78
|
+
require 'bloomer/msgpackable'
|
79
|
+
b = Bloomer::Scalable.new
|
80
|
+
b.add("a")
|
81
|
+
s = b.to_msgpack
|
82
|
+
new_b = Bloomer.from_msgpack(s)
|
83
|
+
new_b.class == Bloomer::Scalable
|
84
|
+
#=> true
|
85
|
+
```
|
86
|
+
|
66
87
|
## Changelog
|
67
88
|
|
89
|
+
### 1.0.0
|
90
|
+
|
91
|
+
- Using msgpack for more secure deserialization. Marshal.load still works but is
|
92
|
+
not recommended
|
93
|
+
|
68
94
|
### 0.0.5
|
69
|
-
|
95
|
+
|
96
|
+
- Switched from rspec to minitest
|
70
97
|
|
71
98
|
### 0.0.4
|
72
|
-
|
99
|
+
|
100
|
+
- Fixed gem packaging
|
73
101
|
|
74
102
|
### 0.0.3
|
75
|
-
|
103
|
+
|
104
|
+
- Added support for scalable bloom filters (SBF)
|
76
105
|
|
77
106
|
### 0.0.2
|
78
|
-
|
107
|
+
|
108
|
+
- Switch to triple-hash chaining (simpler, faster, and better false-positive rate)
|
79
109
|
|
80
110
|
### 0.0.1
|
81
|
-
|
111
|
+
|
112
|
+
- Bloom, there it is.
|
data/bloomer.gemspec
CHANGED
@@ -0,0 +1,65 @@
|
|
1
|
+
require "msgpack"
|
2
|
+
|
3
|
+
module Msgpackable
|
4
|
+
def self.included(base)
|
5
|
+
base.extend(ClassMethods)
|
6
|
+
end
|
7
|
+
|
8
|
+
def to_msgpack
|
9
|
+
self.class.msgpack_factory.dump self
|
10
|
+
end
|
11
|
+
|
12
|
+
module ClassMethods
|
13
|
+
def from_msgpack(data)
|
14
|
+
msgpack_factory.load(data)
|
15
|
+
end
|
16
|
+
|
17
|
+
def msgpack_factory
|
18
|
+
@msgpack_factory ||= ::MessagePack::Factory.new.tap do |factory|
|
19
|
+
factory.register_type(0x01, ::Bloomer)
|
20
|
+
factory.register_type(0x02, ::Bloomer::Scalable)
|
21
|
+
factory.freeze
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
# Patch Bloomer and Scalable to make them msgpackable
|
28
|
+
class Bloomer
|
29
|
+
include Msgpackable
|
30
|
+
|
31
|
+
def to_msgpack_ext
|
32
|
+
self.class.msgpack_factory.dump([@capacity, @count, @k, @ba.size, @ba.field])
|
33
|
+
end
|
34
|
+
|
35
|
+
def from_msgpack_ext(capacity, count, k, ba_size, ba_field)
|
36
|
+
@capacity, @count, @k = capacity, count, k
|
37
|
+
@ba = BitArray.new(ba_size, ba_field)
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.from_msgpack_ext(data)
|
41
|
+
values = msgpack_factory.load(data)
|
42
|
+
::Bloomer.new(values[1]).tap do |b|
|
43
|
+
b.from_msgpack_ext(*values)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
class Scalable
|
48
|
+
include Msgpackable
|
49
|
+
|
50
|
+
def to_msgpack_ext
|
51
|
+
self.class.msgpack_factory.dump([@false_positive_probability, @bloomers])
|
52
|
+
end
|
53
|
+
|
54
|
+
def from_msgpack_ext(false_positive_probability, bloomers)
|
55
|
+
@false_positive_probability, @bloomers = false_positive_probability, bloomers
|
56
|
+
end
|
57
|
+
|
58
|
+
def self.from_msgpack_ext(data)
|
59
|
+
false_positive_probability, bloomers = msgpack_factory.load(data)
|
60
|
+
::Bloomer::Scalable.new.tap do |b|
|
61
|
+
b.from_msgpack_ext(false_positive_probability, bloomers)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
data/lib/bloomer/version.rb
CHANGED
data/test/bloomer_test.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require "test_helper"
|
2
2
|
|
3
3
|
C = ('a'..'z').to_a
|
4
|
+
|
4
5
|
def rand_word(length = 8)
|
5
6
|
C.shuffle.first(length).join # not random enough to cause hits.
|
6
7
|
end
|
@@ -43,6 +44,20 @@ def test_marshal_state(b)
|
|
43
44
|
inputs.each { |ea| new_b.must_include(ea) }
|
44
45
|
end
|
45
46
|
|
47
|
+
def test_msgpackable(b)
|
48
|
+
require "bloomer/msgpackable"
|
49
|
+
inputs = b.capacity.times.collect { rand_word }
|
50
|
+
inputs.each { |ea| b.add(ea) }
|
51
|
+
packed = b.to_msgpack
|
52
|
+
new_b = b.class.from_msgpack(packed)
|
53
|
+
new_b.count.must_equal b.count
|
54
|
+
new_b.capacity.must_equal b.capacity
|
55
|
+
inputs.each { |ea| new_b.must_include(ea) }
|
56
|
+
dump = Marshal.dump(b)
|
57
|
+
packed.size.must_be :<, dump.size
|
58
|
+
b.class.must_equal new_b.class
|
59
|
+
end
|
60
|
+
|
46
61
|
def test_simple(b)
|
47
62
|
b.add("a").must_equal true
|
48
63
|
b.add("a").must_equal false
|
@@ -68,6 +83,11 @@ describe Bloomer do
|
|
68
83
|
test_marshal_state(b)
|
69
84
|
end
|
70
85
|
|
86
|
+
it "serializes and deserializes correctly" do
|
87
|
+
b = Bloomer.new(10, 0.001)
|
88
|
+
test_msgpackable(b)
|
89
|
+
end
|
90
|
+
|
71
91
|
it "results in similar-to-expected false positives" do
|
72
92
|
max_false_prob = 0.001
|
73
93
|
size = 50_000
|
@@ -88,6 +108,12 @@ describe Bloomer::Scalable do
|
|
88
108
|
test_marshal_state(b)
|
89
109
|
end
|
90
110
|
|
111
|
+
it "serializes and deserializes correctly" do
|
112
|
+
b = Bloomer::Scalable.new(10, 0.001)
|
113
|
+
100.times.each { b.add(rand_word) }
|
114
|
+
test_msgpackable(b)
|
115
|
+
end
|
116
|
+
|
91
117
|
it "results in similar-to-expected false positives" do
|
92
118
|
max_false_prob = 0.001
|
93
119
|
size = 10_000
|
metadata
CHANGED
@@ -1,94 +1,97 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bloomer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
5
|
-
prerelease:
|
4
|
+
version: 1.0.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Matthew McEachen
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2018-09-13 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: bitarray
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
|
-
- -
|
17
|
+
- - ">="
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: '0'
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
|
-
- -
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: msgpack
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
28
39
|
- !ruby/object:Gem::Version
|
29
40
|
version: '0'
|
30
41
|
- !ruby/object:Gem::Dependency
|
31
42
|
name: rake
|
32
43
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
44
|
requirements:
|
35
|
-
- -
|
45
|
+
- - ">="
|
36
46
|
- !ruby/object:Gem::Version
|
37
47
|
version: '0'
|
38
48
|
type: :development
|
39
49
|
prerelease: false
|
40
50
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
51
|
requirements:
|
43
|
-
- -
|
52
|
+
- - ">="
|
44
53
|
- !ruby/object:Gem::Version
|
45
54
|
version: '0'
|
46
55
|
- !ruby/object:Gem::Dependency
|
47
56
|
name: yard
|
48
57
|
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
58
|
requirements:
|
51
|
-
- -
|
59
|
+
- - ">="
|
52
60
|
- !ruby/object:Gem::Version
|
53
61
|
version: '0'
|
54
62
|
type: :development
|
55
63
|
prerelease: false
|
56
64
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
none: false
|
58
65
|
requirements:
|
59
|
-
- -
|
66
|
+
- - ">="
|
60
67
|
- !ruby/object:Gem::Version
|
61
68
|
version: '0'
|
62
69
|
- !ruby/object:Gem::Dependency
|
63
70
|
name: minitest
|
64
71
|
requirement: !ruby/object:Gem::Requirement
|
65
|
-
none: false
|
66
72
|
requirements:
|
67
|
-
- -
|
73
|
+
- - ">="
|
68
74
|
- !ruby/object:Gem::Version
|
69
75
|
version: '0'
|
70
76
|
type: :development
|
71
77
|
prerelease: false
|
72
78
|
version_requirements: !ruby/object:Gem::Requirement
|
73
|
-
none: false
|
74
79
|
requirements:
|
75
|
-
- -
|
80
|
+
- - ">="
|
76
81
|
- !ruby/object:Gem::Version
|
77
82
|
version: '0'
|
78
83
|
- !ruby/object:Gem::Dependency
|
79
84
|
name: minitest-reporters
|
80
85
|
requirement: !ruby/object:Gem::Requirement
|
81
|
-
none: false
|
82
86
|
requirements:
|
83
|
-
- -
|
87
|
+
- - ">="
|
84
88
|
- !ruby/object:Gem::Version
|
85
89
|
version: '0'
|
86
90
|
type: :development
|
87
91
|
prerelease: false
|
88
92
|
version_requirements: !ruby/object:Gem::Requirement
|
89
|
-
none: false
|
90
93
|
requirements:
|
91
|
-
- -
|
94
|
+
- - ">="
|
92
95
|
- !ruby/object:Gem::Version
|
93
96
|
version: '0'
|
94
97
|
description: Bloom filters and Scalable Bloom filters (SBF) in pure ruby
|
@@ -98,48 +101,39 @@ executables: []
|
|
98
101
|
extensions: []
|
99
102
|
extra_rdoc_files: []
|
100
103
|
files:
|
101
|
-
- .gitignore
|
102
|
-
- .travis.yml
|
104
|
+
- ".gitignore"
|
105
|
+
- ".travis.yml"
|
103
106
|
- Gemfile
|
104
107
|
- MIT-LICENSE
|
105
108
|
- README.md
|
106
109
|
- Rakefile
|
107
110
|
- bloomer.gemspec
|
108
111
|
- lib/bloomer.rb
|
112
|
+
- lib/bloomer/msgpackable.rb
|
109
113
|
- lib/bloomer/version.rb
|
110
114
|
- test/bloomer_test.rb
|
111
115
|
- test/test_helper.rb
|
112
116
|
homepage: https://github.com/mceachen/bloomer
|
113
117
|
licenses: []
|
118
|
+
metadata: {}
|
114
119
|
post_install_message:
|
115
120
|
rdoc_options: []
|
116
121
|
require_paths:
|
117
122
|
- lib
|
118
123
|
required_ruby_version: !ruby/object:Gem::Requirement
|
119
|
-
none: false
|
120
124
|
requirements:
|
121
|
-
- -
|
125
|
+
- - ">="
|
122
126
|
- !ruby/object:Gem::Version
|
123
127
|
version: '0'
|
124
|
-
segments:
|
125
|
-
- 0
|
126
|
-
hash: 2624379326334183946
|
127
128
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
128
|
-
none: false
|
129
129
|
requirements:
|
130
|
-
- -
|
130
|
+
- - ">="
|
131
131
|
- !ruby/object:Gem::Version
|
132
132
|
version: '0'
|
133
|
-
segments:
|
134
|
-
- 0
|
135
|
-
hash: 2624379326334183946
|
136
133
|
requirements: []
|
137
134
|
rubyforge_project: bloomer
|
138
|
-
rubygems_version:
|
135
|
+
rubygems_version: 2.7.7
|
139
136
|
signing_key:
|
140
|
-
specification_version:
|
137
|
+
specification_version: 4
|
141
138
|
summary: Bloom filters and Scalable Bloom filters (SBF) in pure ruby
|
142
|
-
test_files:
|
143
|
-
- test/bloomer_test.rb
|
144
|
-
- test/test_helper.rb
|
145
|
-
has_rdoc:
|
139
|
+
test_files: []
|