redis-bloomfilter 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +5 -0
- data/.rspec +2 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +20 -0
- data/README.md +105 -0
- data/Rakefile +9 -0
- data/benchmark/bf_100_000_flat.rb +23 -0
- data/benchmark/bf_10_000.rb +53 -0
- data/examples/basic.rb +39 -0
- data/lib/bloomfilter_driver/lua.rb +116 -0
- data/lib/bloomfilter_driver/ruby.rb +68 -0
- data/lib/bloomfilter_driver/ruby_test.rb +72 -0
- data/lib/redis-bloomfilter.rb +6 -0
- data/lib/redis/bloomfilter.rb +78 -0
- data/redis-bloomfilter.gemspec +29 -0
- data/spec/redis_bloomfilter_spec.rb +75 -0
- data/spec/spec_helper.rb +20 -0
- metadata +114 -0
data/.rspec
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2013 Francesco Laurita
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,105 @@
|
|
1
|
+
redis-bloomfilter
|
2
|
+
=============
|
3
|
+
Requires the redis gem.
|
4
|
+
|
5
|
+
Adds Redis::Bloomfilter class which can be used as ditributed bloom filter implementation on Redis.
|
6
|
+
|
7
|
+
A Bloom filter is a space-efficient probabilistic data structure that is used to test whether an element is a member of a set.
|
8
|
+
|
9
|
+
|
10
|
+
Installation
|
11
|
+
----------------
|
12
|
+
$ gem install redis-bloomfilter
|
13
|
+
|
14
|
+
Testing
|
15
|
+
----------------
|
16
|
+
$ bundle install
|
17
|
+
$ rake
|
18
|
+
|
19
|
+
Drivers
|
20
|
+
-----------------
|
21
|
+
The library contains a set of different drivers.
|
22
|
+
* A pure Ruby implementation
|
23
|
+
* A server-side version based on lua available for Redis v. >= 2.6
|
24
|
+
|
25
|
+
How to use
|
26
|
+
-----------------
|
27
|
+
```ruby
|
28
|
+
require "redis-bloomfilter"
|
29
|
+
|
30
|
+
# It creates a Bloom Filter using the default ruby driver
|
31
|
+
# Number of elements expected : 10000
|
32
|
+
# Max error rate: 1%
|
33
|
+
# Key name on Redis: my-bloom-filter
|
34
|
+
# Redis: 127.0.0.1:6379 or an already existing connection
|
35
|
+
@bf = Redis::Bloomfilter.new(
|
36
|
+
:size => 10_000,
|
37
|
+
:error_rate => 0.01,
|
38
|
+
:key_name => 'my-bloom-filter'
|
39
|
+
)
|
40
|
+
|
41
|
+
# Insert an element
|
42
|
+
@bf.insert "foo"
|
43
|
+
# Check if an element exists
|
44
|
+
puts @bf.include?("foo") # => true
|
45
|
+
puts @bf.include?("bar") # => false
|
46
|
+
|
47
|
+
# Empty the BF and delete the key stored on redis
|
48
|
+
@bf.clear
|
49
|
+
|
50
|
+
# Using Lua's driver: only available on Redis >= 2.6.0
|
51
|
+
# This driver should be prefered because is faster
|
52
|
+
@bf = Redis::Bloomfilter.new(
|
53
|
+
:size => 10_000,
|
54
|
+
:error_rate => 0.01,
|
55
|
+
:key_name => 'my-bloom-filter-lua',
|
56
|
+
:driver => 'lua'
|
57
|
+
)
|
58
|
+
|
59
|
+
# Specify a redis connection:
|
60
|
+
# @bf = Redis::Bloomfilter.new(
|
61
|
+
# :size => 10_000,
|
62
|
+
# :error_rate => 0.01,
|
63
|
+
# :key_name => 'my-bloom-filter-lua',
|
64
|
+
# :driver => 'lua',
|
65
|
+
# :redis => Redis.new(:host => "10.0.1.1", :port => 6380)
|
66
|
+
# )
|
67
|
+
```
|
68
|
+
|
69
|
+
Performance & Memory Usage
|
70
|
+
-----------------
|
71
|
+
```
|
72
|
+
---------------------------------------------
|
73
|
+
Benchmarking lua driver with 1000000 items
|
74
|
+
user system total real
|
75
|
+
insert: 38.620000 17.690000 56.310000 (160.377977)
|
76
|
+
include?: 43.420000 20.600000 64.020000 (175.055146)
|
77
|
+
|
78
|
+
---------------------------------------------
|
79
|
+
Benchmarking ruby driver with 1000000 items
|
80
|
+
user system total real
|
81
|
+
insert: 125.910000 20.250000 146.160000 (195.973994)
|
82
|
+
include?:121.230000 36.260000 157.490000 (231.360137)
|
83
|
+
```
|
84
|
+
The lua version is about ~3 times faster than the pure-Ruby version
|
85
|
+
|
86
|
+
Lua code is taken from https://github.com/ErikDubbelboer/redis-lua-scaling-bloom-filter
|
87
|
+
|
88
|
+
1.000.000 ~= 1.5Mb occuped on Redis
|
89
|
+
|
90
|
+
Contributing to redis-bloomfilter
|
91
|
+
----------------
|
92
|
+
|
93
|
+
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
|
94
|
+
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
|
95
|
+
* Fork the project.
|
96
|
+
* Start a feature/bugfix branch.
|
97
|
+
* Commit and push until you are happy with your contribution.
|
98
|
+
* Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
|
99
|
+
* Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
|
100
|
+
|
101
|
+
Copyright
|
102
|
+
----------------
|
103
|
+
|
104
|
+
Copyright (c) 2013 Francesco Laurita. See LICENSE.txt for
|
105
|
+
further details.
|
data/Rakefile
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
$:.push File.expand_path("../lib", __FILE__)
|
2
|
+
require "redis-bloomfilter"
|
3
|
+
require "benchmark"
|
4
|
+
items = 100_000
|
5
|
+
error_rate = 0.01
|
6
|
+
%w(lua ruby).each do |driver|
|
7
|
+
bf = Redis::Bloomfilter.new(
|
8
|
+
{
|
9
|
+
:size => items,
|
10
|
+
:error_rate => error_rate,
|
11
|
+
:key_name => "bloom-filter-bench-flat-#{driver}",
|
12
|
+
:driver => driver
|
13
|
+
}
|
14
|
+
)
|
15
|
+
bf.clear
|
16
|
+
puts "---------------------------------------------"
|
17
|
+
puts "Benchmarking #{driver} driver with #{items} items"
|
18
|
+
Benchmark.bm(7) do |x|
|
19
|
+
x.report("insert: ") {items.times { |i| bf.insert(rand(items)) }}
|
20
|
+
x.report("include?:") {items.times { |i| bf.include?(rand(items)) }}
|
21
|
+
end
|
22
|
+
puts
|
23
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
$:.push File.expand_path("../lib", __FILE__)
|
2
|
+
require "redis-bloomfilter"
|
3
|
+
require "benchmark"
|
4
|
+
require "set"
|
5
|
+
|
6
|
+
def rand_word(length = 8)
|
7
|
+
@charset ||= ('a'..'z').to_a
|
8
|
+
@charset.shuffle.first(length).join
|
9
|
+
end
|
10
|
+
|
11
|
+
items = ARGV[0].nil? ? 10_000 : ARGV[0].to_i
|
12
|
+
error_rate = 0.01
|
13
|
+
|
14
|
+
['lua', 'ruby', 'ruby-test'].each do |driver|
|
15
|
+
puts "Testing #{driver} driver..."
|
16
|
+
|
17
|
+
bf = Redis::Bloomfilter.new(
|
18
|
+
{
|
19
|
+
:size => items,
|
20
|
+
:error_rate => error_rate,
|
21
|
+
:key_name => "bloom-filter-bench-#{driver}",
|
22
|
+
:driver => driver
|
23
|
+
}
|
24
|
+
)
|
25
|
+
bf.clear
|
26
|
+
error = 0
|
27
|
+
first_error_at = 0
|
28
|
+
visited = Set.new
|
29
|
+
|
30
|
+
Benchmark.bm(7) do |x|
|
31
|
+
x.report do
|
32
|
+
items.times do |i|
|
33
|
+
item = rand_word
|
34
|
+
|
35
|
+
if bf.include?(item) != visited.include?(item)
|
36
|
+
error += 1
|
37
|
+
first_error_at = i if error == 1
|
38
|
+
end
|
39
|
+
visited << item
|
40
|
+
bf.insert item
|
41
|
+
#print ".(#{"%.1f" % ((i.to_f/items.to_f) * 100)}%) " if i % 1000 == 0
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
bf.clear
|
46
|
+
puts "Bloomfilter no of Bits: #{bf.options[:bits]} in Mb: #{(bf.options[:bits].to_f / 8 / 1024 / 1024)}"
|
47
|
+
puts "Bloomfilter no of hashes used: #{bf.options[:hashes]}"
|
48
|
+
puts "Items added: #{items}"
|
49
|
+
puts "First error found at: #{first_error_at}"
|
50
|
+
puts "Error found: #{error}"
|
51
|
+
puts "Error rate: #{(error.to_f / items)}"
|
52
|
+
puts
|
53
|
+
end
|
data/examples/basic.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
require "redis-bloomfilter"
|
2
|
+
|
3
|
+
# It creates a Bloom Filter using the default ruby driver
|
4
|
+
# Number of elements expected : 10000
|
5
|
+
# Max error rate: 1%
|
6
|
+
# Key name on Redis: my-bloom-filter
|
7
|
+
# Redis: 127.0.0.1:6379 or an already existing connection
|
8
|
+
@bf = Redis::Bloomfilter.new(
|
9
|
+
:size => 10_000,
|
10
|
+
:error_rate => 0.01,
|
11
|
+
:key_name => 'my-bloom-filter'
|
12
|
+
)
|
13
|
+
|
14
|
+
# Insert an element
|
15
|
+
@bf.insert "foo"
|
16
|
+
# Check if an element exists
|
17
|
+
puts @bf.include?("foo") # => true
|
18
|
+
puts @bf.include?("bar") # => false
|
19
|
+
|
20
|
+
# Empty the BF and delete the key stored on redis
|
21
|
+
@bf.clear
|
22
|
+
|
23
|
+
# Using Lua's driver: only available on Redis >= 2.6.0
|
24
|
+
# This driver should be prefered because is faster
|
25
|
+
@bf = Redis::Bloomfilter.new(
|
26
|
+
:size => 10_000,
|
27
|
+
:error_rate => 0.01,
|
28
|
+
:key_name => 'my-bloom-filter-lua',
|
29
|
+
:driver => 'lua'
|
30
|
+
)
|
31
|
+
|
32
|
+
# Specify a redis connection:
|
33
|
+
# @bf = Redis::Bloomfilter.new(
|
34
|
+
# :size => 10_000,
|
35
|
+
# :error_rate => 0.01,
|
36
|
+
# :key_name => 'my-bloom-filter-lua',
|
37
|
+
# :driver => 'lua',
|
38
|
+
# :redis => Redis.new(:host => "10.0.1.1", :port => 6380)
|
39
|
+
# )
|
@@ -0,0 +1,116 @@
|
|
1
|
+
require "digest/sha1"
|
2
|
+
class Redis
|
3
|
+
module BloomfilterDriver
|
4
|
+
|
5
|
+
# It loads lua script into redis.
|
6
|
+
# BF implementation is done by lua scripting
|
7
|
+
# The alghoritm is executed directly on redis
|
8
|
+
# Credits for lua code goes to Erik Dubbelboer
|
9
|
+
# https://github.com/ErikDubbelboer/redis-lua-scaling-bloom-filter
|
10
|
+
class Lua
|
11
|
+
attr_accessor :redis
|
12
|
+
|
13
|
+
def initialize(options = {})
|
14
|
+
@options = options
|
15
|
+
@redis = @options[:redis]
|
16
|
+
lua_load
|
17
|
+
end
|
18
|
+
|
19
|
+
def insert(data)
|
20
|
+
set data, 1
|
21
|
+
end
|
22
|
+
|
23
|
+
def remove(data)
|
24
|
+
set data, 0
|
25
|
+
end
|
26
|
+
|
27
|
+
def include?(key)
|
28
|
+
r = @redis.evalsha(@check_fnc_sha, :keys => [@options[:key_name]], :argv => [@options[:size], @options[:error_rate], key])
|
29
|
+
r == 1 ? true : false
|
30
|
+
end
|
31
|
+
|
32
|
+
def clear
|
33
|
+
@redis.keys("#{@options[:key_name]}:*").each {|k|@redis.del k}
|
34
|
+
end
|
35
|
+
|
36
|
+
protected
|
37
|
+
# It loads the script inside Redis
|
38
|
+
# Taken from https://github.com/ErikDubbelboer/redis-lua-scaling-bloom-filter
|
39
|
+
# This is a scalable implementation of BF. It means the initial size can vary
|
40
|
+
def lua_load
|
41
|
+
add_fnc = %q(
|
42
|
+
local entries = ARGV[1]
|
43
|
+
local precision = ARGV[2]
|
44
|
+
local set_value = ARGV[4]
|
45
|
+
local index = math.ceil(redis.call('INCR', KEYS[1] .. ':count') / entries)
|
46
|
+
local key = KEYS[1] .. ':' .. index
|
47
|
+
local bits = math.floor(-(entries * math.log(precision * math.pow(0.5, index))) / 0.480453013)
|
48
|
+
local k = math.floor(0.693147180 * bits / entries)
|
49
|
+
local hash = redis.sha1hex(ARGV[3])
|
50
|
+
local h = { }
|
51
|
+
h[0] = tonumber(string.sub(hash, 0 , 8 ), 16)
|
52
|
+
h[1] = tonumber(string.sub(hash, 8 , 16), 16)
|
53
|
+
h[2] = tonumber(string.sub(hash, 16, 24), 16)
|
54
|
+
h[3] = tonumber(string.sub(hash, 24, 32), 16)
|
55
|
+
for i=1, k do
|
56
|
+
redis.call('SETBIT', key, (h[i % 2] + i * h[2 + (((i + (i % 2)) % 4) / 2)]) % bits, set_value)
|
57
|
+
end
|
58
|
+
)
|
59
|
+
|
60
|
+
check_fnc = %q(
|
61
|
+
|
62
|
+
local entries = ARGV[1]
|
63
|
+
local precision = ARGV[2]
|
64
|
+
local index = redis.call('GET', KEYS[1] .. ':count')
|
65
|
+
if not index then
|
66
|
+
return 0
|
67
|
+
end
|
68
|
+
index = math.ceil(redis.call('GET', KEYS[1] .. ':count') / entries)
|
69
|
+
local hash = redis.sha1hex(ARGV[3])
|
70
|
+
local h = { }
|
71
|
+
h[0] = tonumber(string.sub(hash, 0 , 8 ), 16)
|
72
|
+
h[1] = tonumber(string.sub(hash, 8 , 16), 16)
|
73
|
+
h[2] = tonumber(string.sub(hash, 16, 24), 16)
|
74
|
+
h[3] = tonumber(string.sub(hash, 24, 32), 16)
|
75
|
+
local maxk = math.floor(0.693147180 * math.floor((entries * math.log(precision * math.pow(0.5, index))) / -0.480453013) / entries)
|
76
|
+
local b = { }
|
77
|
+
for i=1, maxk do
|
78
|
+
table.insert(b, h[i % 2] + i * h[2 + (((i + (i % 2)) % 4) / 2)])
|
79
|
+
end
|
80
|
+
for n=1, index do
|
81
|
+
local key = KEYS[1] .. ':' .. n
|
82
|
+
local found = true
|
83
|
+
local bits = math.floor((entries * math.log(precision * math.pow(0.5, n))) / -0.480453013)
|
84
|
+
local k = math.floor(0.693147180 * bits / entries)
|
85
|
+
|
86
|
+
for i=1, k do
|
87
|
+
if redis.call('GETBIT', key, b[i] % bits) == 0 then
|
88
|
+
found = false
|
89
|
+
break
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
if found then
|
94
|
+
return 1
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
return 0
|
99
|
+
)
|
100
|
+
|
101
|
+
@add_fnc_sha = Digest::SHA1.hexdigest(add_fnc)
|
102
|
+
@check_fnc_sha = Digest::SHA1.hexdigest(check_fnc)
|
103
|
+
|
104
|
+
loaded = @redis.script(:exists, [@add_fnc_sha, @check_fnc_sha]).uniq
|
105
|
+
if loaded.count != 1 || loaded.first != true
|
106
|
+
@add_fnc_sha = @redis.script(:load, add_fnc)
|
107
|
+
@check_fnc_sha = @redis.script(:load, check_fnc)
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
def set(data, val)
|
112
|
+
@redis.evalsha(@add_fnc_sha, :keys => [@options[:key_name]], :argv => [@options[:size], @options[:error_rate], data, val])
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require "digest/sha1"
|
2
|
+
class Redis
|
3
|
+
module BloomfilterDriver
|
4
|
+
class Ruby
|
5
|
+
|
6
|
+
# Faster Ruby version.
|
7
|
+
# This driver should be used if Redis version < 2.6
|
8
|
+
attr_accessor :redis
|
9
|
+
def initialize(options = {})
|
10
|
+
@options = options
|
11
|
+
end
|
12
|
+
|
13
|
+
# Insert a new element
|
14
|
+
def insert(data)
|
15
|
+
set data, 1
|
16
|
+
end
|
17
|
+
|
18
|
+
# It checks if a key is part of the set
|
19
|
+
def include?(key)
|
20
|
+
|
21
|
+
indexes = []
|
22
|
+
indexes_for(key).each { |idx| indexes << idx }
|
23
|
+
return false if @redis.getbit(@options[:key_name], indexes.shift) == 0
|
24
|
+
|
25
|
+
result = @redis.pipelined do
|
26
|
+
indexes.each {|idx| @redis.getbit(@options[:key_name], idx)}
|
27
|
+
end
|
28
|
+
|
29
|
+
!result.include?(0)
|
30
|
+
end
|
31
|
+
|
32
|
+
# It removes an element from the filter
|
33
|
+
def remove(data)
|
34
|
+
set data, 0
|
35
|
+
end
|
36
|
+
|
37
|
+
# It deletes a bloomfilter
|
38
|
+
def clear
|
39
|
+
@redis.del @options[:key_name]
|
40
|
+
end
|
41
|
+
|
42
|
+
protected
|
43
|
+
# Hashing strategy:
|
44
|
+
# http://www.eecs.harvard.edu/~kirsch/pubs/bbbf/esa06.pdf
|
45
|
+
def indexes_for data
|
46
|
+
sha = Digest::SHA1.hexdigest(data.to_s)
|
47
|
+
h = []
|
48
|
+
h[0] = sha[0...8].to_i(16)
|
49
|
+
h[1] = sha[8...16].to_i(16)
|
50
|
+
h[2] = sha[16...24].to_i(16)
|
51
|
+
h[3] = sha[24...32].to_i(16)
|
52
|
+
idxs = []
|
53
|
+
|
54
|
+
(@options[:hashes]).times {|i|
|
55
|
+
v = (h[i % 2] + i * h[2 + (((i + (i % 2)) % 4) / 2)]) % @options[:bits]
|
56
|
+
idxs << v
|
57
|
+
}
|
58
|
+
idxs
|
59
|
+
end
|
60
|
+
|
61
|
+
def set(key, val)
|
62
|
+
@redis.pipelined do
|
63
|
+
indexes_for(key).each {|i| @redis.setbit @options[:key_name], i, val}
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require "digest/md5"
|
2
|
+
require "digest/sha1"
|
3
|
+
require "zlib"
|
4
|
+
class Redis
|
5
|
+
module BloomfilterDriver
|
6
|
+
# It uses different hash strategy
|
7
|
+
# Usefule for benchmarking
|
8
|
+
class RubyTest
|
9
|
+
attr_accessor :redis
|
10
|
+
|
11
|
+
def initialize(options = {})
|
12
|
+
@options = options
|
13
|
+
end
|
14
|
+
|
15
|
+
# Insert a new element
|
16
|
+
def insert(data)
|
17
|
+
set data, 1
|
18
|
+
end
|
19
|
+
|
20
|
+
# Insert a new element
|
21
|
+
def remove(data)
|
22
|
+
set data, 0
|
23
|
+
end
|
24
|
+
|
25
|
+
# It checks if a key is part of the set
|
26
|
+
def include?(key)
|
27
|
+
indexes = []
|
28
|
+
indexes_for(key) { |idx| indexes << idx }
|
29
|
+
|
30
|
+
return false if @redis.getbit(@options[:key_name], indexes.shift) == 0
|
31
|
+
|
32
|
+
result = @redis.pipelined do
|
33
|
+
indexes.each {|idx| @redis.getbit(@options[:key_name], idx)}
|
34
|
+
end
|
35
|
+
|
36
|
+
!result.include?(0)
|
37
|
+
end
|
38
|
+
|
39
|
+
# It deletes a bloomfilter
|
40
|
+
def clear
|
41
|
+
@redis.del @options[:key_name]
|
42
|
+
end
|
43
|
+
|
44
|
+
protected
|
45
|
+
def indexes_for(key, engine = nil)
|
46
|
+
engine ||= @options[:hash_engine]
|
47
|
+
@options[:hashes].times do |i|
|
48
|
+
yield self.send("engine_#{engine}", key.to_s, i)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
# A set of different hash functions
|
53
|
+
def engine_crc32(data, i)
|
54
|
+
Zlib.crc32("#{i}-#{data}").to_i(16) % @options[:bits]
|
55
|
+
end
|
56
|
+
|
57
|
+
def engine_md5(data, i)
|
58
|
+
Digest::MD5.hexdigest("#{i}-#{data}").to_i(16) % @options[:bits]
|
59
|
+
end
|
60
|
+
|
61
|
+
def engine_sha1(data, i)
|
62
|
+
Digest::SHA1.hexdigest("#{i}-#{data}").to_i(16) % @options[:bits]
|
63
|
+
end
|
64
|
+
|
65
|
+
def set(data, val)
|
66
|
+
@redis.pipelined do
|
67
|
+
indexes_for(data) { |i| @redis.setbit @options[:key_name], i, val }
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
class Redis
|
2
|
+
class Bloomfilter
|
3
|
+
|
4
|
+
VERSION = "0.0.1"
|
5
|
+
|
6
|
+
def self.version
|
7
|
+
"redis-bloomfilter version #{VERSION}"
|
8
|
+
end
|
9
|
+
|
10
|
+
attr_reader :options
|
11
|
+
attr_reader :driver
|
12
|
+
|
13
|
+
# Usage: Redis::Bloomfilter.new :size => 1000, :error_rate => 0.01
|
14
|
+
# It creates a bloomfilter with a capacity of 1000 items and an error rate of 1%
|
15
|
+
def initialize(options = {})
|
16
|
+
@options = {
|
17
|
+
:size => 1000,
|
18
|
+
:error_rate => 0.01,
|
19
|
+
:key_name => 'redis-bloomfilter',
|
20
|
+
:hash_engine => 'md5',
|
21
|
+
:redis => Redis.current,
|
22
|
+
:driver => 'ruby'
|
23
|
+
}.merge options
|
24
|
+
|
25
|
+
raise ArgumentError, "options[:size] && options[:error_rate] cannot be nil" if options[:error_rate].nil? || options[:size].nil?
|
26
|
+
|
27
|
+
#Size provided, compute hashes and bits
|
28
|
+
|
29
|
+
@options[:size] = options[:size]
|
30
|
+
@options[:error_rate] = options[:error_rate] ? options[:error_rate] : @options[:error_rate]
|
31
|
+
@options[:bits] = Bloomfilter.optimal_m options[:size], @options[:error_rate]
|
32
|
+
@options[:hashes] = Bloomfilter.optimal_k options[:size], @options[:bits]
|
33
|
+
|
34
|
+
@redis = @options[:redis] || Redis.current
|
35
|
+
@options[:hash_engine] = options[:hash_engine] if options[:hash_engine]
|
36
|
+
driver_class = Redis::BloomfilterDriver.const_get(driver_name)
|
37
|
+
@driver = driver_class.new @options
|
38
|
+
@driver.redis = @redis
|
39
|
+
end
|
40
|
+
|
41
|
+
# Methods used to calculate M and K
|
42
|
+
# Taken from http://en.wikipedia.org/wiki/Bloom_filter#Probability_of_false_positives
|
43
|
+
def self.optimal_m num_of_elements, false_positive_rate = 0.01
|
44
|
+
(-1 * (num_of_elements) * Math.log(false_positive_rate) / (Math.log(2) ** 2)).round
|
45
|
+
end
|
46
|
+
|
47
|
+
def self.optimal_k num_of_elements, bf_size
|
48
|
+
h = (Math.log(2) * (bf_size / num_of_elements)).round
|
49
|
+
h+=1 if h == 0
|
50
|
+
h
|
51
|
+
end
|
52
|
+
|
53
|
+
# Insert a new element
|
54
|
+
def insert(data)
|
55
|
+
@driver.insert data
|
56
|
+
end
|
57
|
+
|
58
|
+
# It checks if a key is part of the set
|
59
|
+
def include?(key)
|
60
|
+
@driver.include?(key)
|
61
|
+
end
|
62
|
+
|
63
|
+
def remove(key)
|
64
|
+
@driver.remove key if @driver.respond_to? :remove
|
65
|
+
end
|
66
|
+
|
67
|
+
# It deletes a bloomfilter
|
68
|
+
def clear
|
69
|
+
@driver.clear
|
70
|
+
end
|
71
|
+
|
72
|
+
protected
|
73
|
+
def driver_name
|
74
|
+
@options[:driver].downcase.split('-').collect{|t| t.gsub(/(\w+)/){|s|s.capitalize}}.join
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "redis-bloomfilter"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "redis-bloomfilter"
|
7
|
+
s.version = Redis::Bloomfilter::VERSION
|
8
|
+
s.authors = ["Francesco Laurita"]
|
9
|
+
s.email = ["francesco.laurita@gmail.com"]
|
10
|
+
s.homepage = "https://github.com/taganaka/redis-bloomfilter"
|
11
|
+
s.summary = %q{Distributed Bloom Filter implementation on Redis}
|
12
|
+
s.description = %q{
|
13
|
+
Adds Redis::Bloomfilter class which can be used as ditributed bloom filter implementation on Redis.
|
14
|
+
A Bloom filter is a space-efficient probabilistic data structure that is used to test whether an element is a member of a set.
|
15
|
+
}
|
16
|
+
|
17
|
+
s.rubyforge_project = "redis-bloomfilter"
|
18
|
+
|
19
|
+
s.files = `git ls-files`.split("\n")
|
20
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
21
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
22
|
+
s.require_paths = ["lib"]
|
23
|
+
|
24
|
+
s.add_dependency "hiredis", "~> 0.4.5"
|
25
|
+
s.add_dependency "redis", "~> 3.0.4"
|
26
|
+
|
27
|
+
s.add_development_dependency "rspec"
|
28
|
+
|
29
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
require "set"
|
3
|
+
|
4
|
+
|
5
|
+
def test_error_rate(bf,elems)
|
6
|
+
visited = Set.new
|
7
|
+
error = 0
|
8
|
+
elems.times do |i|
|
9
|
+
a = rand(elems)
|
10
|
+
error += 1 if bf.include?(a) != visited.include?(a)
|
11
|
+
visited << a
|
12
|
+
bf.insert a
|
13
|
+
end
|
14
|
+
error.to_f / elems
|
15
|
+
end
|
16
|
+
|
17
|
+
def factory options, driver
|
18
|
+
options[:driver] = driver
|
19
|
+
Redis::Bloomfilter.new options
|
20
|
+
end
|
21
|
+
|
22
|
+
describe Redis::Bloomfilter do
|
23
|
+
|
24
|
+
it 'should return the right version' do
|
25
|
+
Redis::Bloomfilter.version.should eq "redis-bloomfilter version #{Redis::Bloomfilter::VERSION}"
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'should check for the initialize options' do
|
29
|
+
expect { Redis::Bloomfilter.new }.to raise_error(ArgumentError)
|
30
|
+
expect { Redis::Bloomfilter.new :size => 123 }.to raise_error(ArgumentError)
|
31
|
+
expect { Redis::Bloomfilter.new :error_rate => 0.01 }.to raise_error(ArgumentError)
|
32
|
+
expect { Redis::Bloomfilter.new :size => 123,:error_rate => 0.01, :driver => 'bibu' }.to raise_error(NameError)
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'should create a Redis::Bloomfilter object' do
|
36
|
+
bf = factory({:size => 1000, :error_rate => 0.01, :key_name => 'ossom'}, 'ruby')
|
37
|
+
bf.should be
|
38
|
+
bf.options[:size].should eq 1000
|
39
|
+
bf.options[:bits].should eq 9585
|
40
|
+
bf.options[:hashes].should eq 6
|
41
|
+
bf.options[:key_name].should eq 'ossom'
|
42
|
+
bf.clear
|
43
|
+
end
|
44
|
+
|
45
|
+
%w(ruby lua ruby-test).each do |driver|
|
46
|
+
it 'should work' do
|
47
|
+
bf = factory({:size => 1000, :error_rate => 0.01, :key_name => '__test_bf'},driver)
|
48
|
+
bf.clear
|
49
|
+
bf.include?("asdlol").should be false
|
50
|
+
bf.insert "asdlol"
|
51
|
+
bf.include?("asdlol").should be true
|
52
|
+
bf.clear
|
53
|
+
bf.include?("asdlol").should be false
|
54
|
+
end
|
55
|
+
|
56
|
+
it 'should honor the error rate' do
|
57
|
+
bf = factory({:size => 100, :error_rate => 0.01, :key_name => '__test_bf'},driver)
|
58
|
+
bf.clear
|
59
|
+
e = test_error_rate bf, 180
|
60
|
+
e.should be < bf.options[:error_rate]
|
61
|
+
bf.clear
|
62
|
+
end
|
63
|
+
|
64
|
+
it 'should remove an elemnt from the filter' do
|
65
|
+
|
66
|
+
bf = factory({:size => 100, :error_rate => 0.01, :key_name => '__test_bf'},driver)
|
67
|
+
bf.insert "asdlolol"
|
68
|
+
bf.include?("asdlolol").should be true
|
69
|
+
bf.remove "asdlolol"
|
70
|
+
bf.include?("asdlolol").should be false
|
71
|
+
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# This file was generated by the `rspec --init` command. Conventionally, all
|
2
|
+
# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
|
3
|
+
# Require this file using `require "spec_helper"` to ensure that it is only
|
4
|
+
# loaded once.
|
5
|
+
#
|
6
|
+
# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
|
7
|
+
RSpec.configure do |config|
|
8
|
+
config.treat_symbols_as_metadata_keys_with_true_values = true
|
9
|
+
config.run_all_when_everything_filtered = true
|
10
|
+
config.filter_run :focus
|
11
|
+
|
12
|
+
# Run specs in random order to surface order dependencies. If you find an
|
13
|
+
# order dependency and want to debug it, you can fix the order by providing
|
14
|
+
# the seed, which is printed after each run.
|
15
|
+
# --seed 1234
|
16
|
+
config.order = 'random'
|
17
|
+
end
|
18
|
+
|
19
|
+
$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
|
20
|
+
require 'redis-bloomfilter'
|
metadata
ADDED
@@ -0,0 +1,114 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: redis-bloomfilter
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Francesco Laurita
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-06-07 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: hiredis
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 0.4.5
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 0.4.5
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: redis
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ~>
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: 3.0.4
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ~>
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: 3.0.4
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: rspec
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
description: ! "\n Adds Redis::Bloomfilter class which can be used as ditributed
|
63
|
+
bloom filter implementation on Redis.\n A Bloom filter is a space-efficient probabilistic
|
64
|
+
data structure that is used to test whether an element is a member of a set.\n "
|
65
|
+
email:
|
66
|
+
- francesco.laurita@gmail.com
|
67
|
+
executables: []
|
68
|
+
extensions: []
|
69
|
+
extra_rdoc_files: []
|
70
|
+
files:
|
71
|
+
- .gitignore
|
72
|
+
- .rspec
|
73
|
+
- Gemfile
|
74
|
+
- LICENSE.txt
|
75
|
+
- README.md
|
76
|
+
- Rakefile
|
77
|
+
- benchmark/bf_100_000_flat.rb
|
78
|
+
- benchmark/bf_10_000.rb
|
79
|
+
- examples/basic.rb
|
80
|
+
- lib/bloomfilter_driver/lua.rb
|
81
|
+
- lib/bloomfilter_driver/ruby.rb
|
82
|
+
- lib/bloomfilter_driver/ruby_test.rb
|
83
|
+
- lib/redis-bloomfilter.rb
|
84
|
+
- lib/redis/bloomfilter.rb
|
85
|
+
- redis-bloomfilter.gemspec
|
86
|
+
- spec/redis_bloomfilter_spec.rb
|
87
|
+
- spec/spec_helper.rb
|
88
|
+
homepage: https://github.com/taganaka/redis-bloomfilter
|
89
|
+
licenses: []
|
90
|
+
post_install_message:
|
91
|
+
rdoc_options: []
|
92
|
+
require_paths:
|
93
|
+
- lib
|
94
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
95
|
+
none: false
|
96
|
+
requirements:
|
97
|
+
- - ! '>='
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: '0'
|
100
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
101
|
+
none: false
|
102
|
+
requirements:
|
103
|
+
- - ! '>='
|
104
|
+
- !ruby/object:Gem::Version
|
105
|
+
version: '0'
|
106
|
+
requirements: []
|
107
|
+
rubyforge_project: redis-bloomfilter
|
108
|
+
rubygems_version: 1.8.25
|
109
|
+
signing_key:
|
110
|
+
specification_version: 3
|
111
|
+
summary: Distributed Bloom Filter implementation on Redis
|
112
|
+
test_files:
|
113
|
+
- spec/redis_bloomfilter_spec.rb
|
114
|
+
- spec/spec_helper.rb
|