redis-bloomfilter 1.0.1 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/bloomfilter_driver/lua.rb +8 -66
- data/lib/bloomfilter_driver/ruby.rb +3 -3
- data/lib/redis/bloomfilter/version.rb +1 -1
- data/vendor/assets/lua/add.lua +50 -0
- data/vendor/assets/lua/check.lua +61 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 73a3287e8ed9a6cd594471734509e07eb20b731cc6cabbcd67752490b81d78b1
|
4
|
+
data.tar.gz: 345d7a05680558b4d7a3415f910fa57ced190c2c11788882cb47c07a52583236
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d3862a987bf159b4aa3481df9b4eade7f0ee03d69c3c2fbd8fb841f91138a8fffd377af1a5dbf4025755e87852dcd8a365a5ec2a380b14f36ed6037ed400fdcc
|
7
|
+
data.tar.gz: ab51bc7f412025d262c49c8156fb56754a178a76866710b3c04ceaddccfebec3e10e942bf8f8f62f26dcc385b9baeef2a4f7462682abae518f447517ad24d08e
|
@@ -18,7 +18,7 @@ class Redis
|
|
18
18
|
end
|
19
19
|
|
20
20
|
def insert(data)
|
21
|
-
set data
|
21
|
+
set data
|
22
22
|
end
|
23
23
|
|
24
24
|
def include?(key)
|
@@ -36,78 +36,20 @@ class Redis
|
|
36
36
|
# Taken from https://github.com/ErikDubbelboer/redis-lua-scaling-bloom-filter
|
37
37
|
# This is a scalable implementation of BF. It means the initial size can vary
|
38
38
|
def lua_load
|
39
|
-
add_fnc = "
|
40
|
-
|
41
|
-
local precision = ARGV[2]
|
42
|
-
local set_value = ARGV[4]
|
43
|
-
local index = math.ceil(redis.call('INCR', KEYS[1] .. ':count') / entries)
|
44
|
-
local key = KEYS[1] .. ':' .. index
|
45
|
-
local bits = math.floor(-(entries * math.log(precision * math.pow(0.5, index))) / 0.480453013)
|
46
|
-
local k = math.floor(0.693147180 * bits / entries)
|
47
|
-
local hash = redis.sha1hex(ARGV[3])
|
48
|
-
local h = { }
|
49
|
-
h[0] = tonumber(string.sub(hash, 0 , 8 ), 16)
|
50
|
-
h[1] = tonumber(string.sub(hash, 8 , 16), 16)
|
51
|
-
h[2] = tonumber(string.sub(hash, 16, 24), 16)
|
52
|
-
h[3] = tonumber(string.sub(hash, 24, 32), 16)
|
53
|
-
for i=1, k do
|
54
|
-
redis.call('SETBIT', key, (h[i % 2] + i * h[2 + (((i + (i % 2)) % 4) / 2)]) % bits, set_value)
|
55
|
-
end
|
56
|
-
"
|
57
|
-
|
58
|
-
check_fnc = "
|
59
|
-
|
60
|
-
local entries = ARGV[1]
|
61
|
-
local precision = ARGV[2]
|
62
|
-
local index = redis.call('GET', KEYS[1] .. ':count')
|
63
|
-
if not index then
|
64
|
-
return 0
|
65
|
-
end
|
66
|
-
index = math.ceil(redis.call('GET', KEYS[1] .. ':count') / entries)
|
67
|
-
local hash = redis.sha1hex(ARGV[3])
|
68
|
-
local h = { }
|
69
|
-
h[0] = tonumber(string.sub(hash, 0 , 8 ), 16)
|
70
|
-
h[1] = tonumber(string.sub(hash, 8 , 16), 16)
|
71
|
-
h[2] = tonumber(string.sub(hash, 16, 24), 16)
|
72
|
-
h[3] = tonumber(string.sub(hash, 24, 32), 16)
|
73
|
-
local maxk = math.floor(0.693147180 * math.floor((entries * math.log(precision * math.pow(0.5, index))) / -0.480453013) / entries)
|
74
|
-
local b = { }
|
75
|
-
for i=1, maxk do
|
76
|
-
table.insert(b, h[i % 2] + i * h[2 + (((i + (i % 2)) % 4) / 2)])
|
77
|
-
end
|
78
|
-
for n=1, index do
|
79
|
-
local key = KEYS[1] .. ':' .. n
|
80
|
-
local found = true
|
81
|
-
local bits = math.floor((entries * math.log(precision * math.pow(0.5, n))) / -0.480453013)
|
82
|
-
local k = math.floor(0.693147180 * bits / entries)
|
83
|
-
|
84
|
-
for i=1, k do
|
85
|
-
if redis.call('GETBIT', key, b[i] % bits) == 0 then
|
86
|
-
found = false
|
87
|
-
break
|
88
|
-
end
|
89
|
-
end
|
90
|
-
|
91
|
-
if found then
|
92
|
-
return 1
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
|
-
return 0
|
97
|
-
"
|
39
|
+
add_fnc = File.read File.expand_path("../../vendor/assets/lua/add.lua", __dir__)
|
40
|
+
check_fnc = File.read File.expand_path("../../vendor/assets/lua/check.lua", __dir__)
|
98
41
|
|
99
42
|
@add_fnc_sha = Digest::SHA1.hexdigest(add_fnc)
|
100
43
|
@check_fnc_sha = Digest::SHA1.hexdigest(check_fnc)
|
101
44
|
|
102
45
|
loaded = @redis.script(:exists, [@add_fnc_sha, @check_fnc_sha]).uniq
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
end
|
46
|
+
return unless loaded.count != 1 || loaded.first != true
|
47
|
+
@add_fnc_sha = @redis.script(:load, add_fnc)
|
48
|
+
@check_fnc_sha = @redis.script(:load, check_fnc)
|
107
49
|
end
|
108
50
|
|
109
|
-
def set(data
|
110
|
-
@redis.evalsha(@add_fnc_sha, keys: [@options[:key_name]], argv: [@options[:size], @options[:error_rate], data
|
51
|
+
def set(data)
|
52
|
+
@redis.evalsha(@add_fnc_sha, keys: [@options[:key_name]], argv: [@options[:size], @options[:error_rate], data])
|
111
53
|
end
|
112
54
|
end
|
113
55
|
end
|
@@ -13,7 +13,7 @@ class Redis
|
|
13
13
|
|
14
14
|
# Insert a new element
|
15
15
|
def insert(data)
|
16
|
-
set data
|
16
|
+
set data
|
17
17
|
end
|
18
18
|
|
19
19
|
# It checks if a key is part of the set
|
@@ -54,9 +54,9 @@ class Redis
|
|
54
54
|
idxs
|
55
55
|
end
|
56
56
|
|
57
|
-
def set(key
|
57
|
+
def set(key)
|
58
58
|
@redis.pipelined do
|
59
|
-
indexes_for(key).each { |i| @redis.setbit @options[:key_name], i,
|
59
|
+
indexes_for(key).each { |i| @redis.setbit @options[:key_name], i, 1 }
|
60
60
|
end
|
61
61
|
end
|
62
62
|
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
local entries = ARGV[1]
|
2
|
+
local precision = tonumber(ARGV[2])
|
3
|
+
local hash = redis.sha1hex(ARGV[3])
|
4
|
+
local countkey = KEYS[1] .. ':count'
|
5
|
+
local count = redis.call('GET', countkey)
|
6
|
+
if not count then
|
7
|
+
count = 1
|
8
|
+
else
|
9
|
+
count = count + 1
|
10
|
+
end
|
11
|
+
|
12
|
+
local factor = math.ceil((entries + count) / entries)
|
13
|
+
-- 0.69314718055995 = ln(2)
|
14
|
+
local index = math.ceil(math.log(factor) / 0.69314718055995)
|
15
|
+
local scale = math.pow(2, index - 1) * entries
|
16
|
+
local key = KEYS[1] .. ':' .. index
|
17
|
+
|
18
|
+
-- Based on the math from: http://en.wikipedia.org/wiki/Bloom_filter#Probability_of_false_positives
|
19
|
+
-- Combined with: http://www.sciencedirect.com/science/article/pii/S0020019006003127
|
20
|
+
-- 0.4804530139182 = ln(2)^2
|
21
|
+
local bits = math.floor(-(scale * math.log(precision * math.pow(0.5, index))) / 0.4804530139182)
|
22
|
+
|
23
|
+
-- 0.69314718055995 = ln(2)
|
24
|
+
local k = math.floor(0.69314718055995 * bits / scale)
|
25
|
+
|
26
|
+
-- This uses a variation on:
|
27
|
+
-- 'Less Hashing, Same Performance: Building a Better Bloom Filter'
|
28
|
+
-- https://www.eecs.harvard.edu/~michaelm/postscripts/tr-02-05.pdf
|
29
|
+
local h = { }
|
30
|
+
h[0] = tonumber(string.sub(hash, 1 , 8 ), 16)
|
31
|
+
h[1] = tonumber(string.sub(hash, 9 , 16), 16)
|
32
|
+
h[2] = tonumber(string.sub(hash, 17, 24), 16)
|
33
|
+
h[3] = tonumber(string.sub(hash, 25, 32), 16)
|
34
|
+
|
35
|
+
local found = true
|
36
|
+
for i=1, k do
|
37
|
+
if redis.call('SETBIT', key, (h[i % 2] + i * h[2 + (((i + (i % 2)) % 4) / 2)]) % bits, 1) == 0 then
|
38
|
+
found = false
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
-- We only increment the count key when we actually added the item to the filter.
|
43
|
+
-- This doesn't mean count is accurate. Since this is a scaling bloom filter
|
44
|
+
-- it is possible the item was already present in one of the filters in a lower index.
|
45
|
+
-- If you really want to make sure an items isn't added multile times you
|
46
|
+
-- can use cas.lua (Check And Set).
|
47
|
+
if found == false then
|
48
|
+
-- INCR is a little bit faster than SET.
|
49
|
+
redis.call('INCR', countkey)
|
50
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
local entries = ARGV[1]
|
2
|
+
local precision = ARGV[2]
|
3
|
+
local count = redis.call('GET', KEYS[1] .. ':count')
|
4
|
+
|
5
|
+
if not count then
|
6
|
+
return 0
|
7
|
+
end
|
8
|
+
|
9
|
+
local factor = math.ceil((entries + count) / entries)
|
10
|
+
-- 0.69314718055995 = ln(2)
|
11
|
+
local index = math.ceil(math.log(factor) / 0.69314718055995)
|
12
|
+
local scale = math.pow(2, index - 1) * entries
|
13
|
+
|
14
|
+
local hash = redis.sha1hex(ARGV[3])
|
15
|
+
|
16
|
+
-- This uses a variation on:
|
17
|
+
-- 'Less Hashing, Same Performance: Building a Better Bloom Filter'
|
18
|
+
-- https://www.eecs.harvard.edu/~michaelm/postscripts/tr-02-05.pdf
|
19
|
+
local h = { }
|
20
|
+
h[0] = tonumber(string.sub(hash, 1 , 8 ), 16)
|
21
|
+
h[1] = tonumber(string.sub(hash, 9 , 16), 16)
|
22
|
+
h[2] = tonumber(string.sub(hash, 17, 24), 16)
|
23
|
+
h[3] = tonumber(string.sub(hash, 25, 32), 16)
|
24
|
+
|
25
|
+
-- Based on the math from: http://en.wikipedia.org/wiki/Bloom_filter#Probability_of_false_positives
|
26
|
+
-- Combined with: http://www.sciencedirect.com/science/article/pii/S0020019006003127
|
27
|
+
-- 0.4804530139182 = ln(2)^2
|
28
|
+
local maxbits = math.floor((scale * math.log(precision * math.pow(0.5, index))) / -0.4804530139182)
|
29
|
+
|
30
|
+
-- 0.69314718055995 = ln(2)
|
31
|
+
local maxk = math.floor(0.69314718055995 * maxbits / scale)
|
32
|
+
local b = { }
|
33
|
+
|
34
|
+
for i=1, maxk do
|
35
|
+
table.insert(b, h[i % 2] + i * h[2 + (((i + (i % 2)) % 4) / 2)])
|
36
|
+
end
|
37
|
+
|
38
|
+
for n=1, index do
|
39
|
+
local key = KEYS[1] .. ':' .. n
|
40
|
+
local found = true
|
41
|
+
local scalen = math.pow(2, n - 1) * entries
|
42
|
+
|
43
|
+
-- 0.4804530139182 = ln(2)^2
|
44
|
+
local bits = math.floor((scalen * math.log(precision * math.pow(0.5, n))) / -0.4804530139182)
|
45
|
+
|
46
|
+
-- 0.69314718055995 = ln(2)
|
47
|
+
local k = math.floor(0.69314718055995 * bits / scalen)
|
48
|
+
|
49
|
+
for i=1, k do
|
50
|
+
if redis.call('GETBIT', key, b[i] % bits) == 0 then
|
51
|
+
found = false
|
52
|
+
break
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
if found then
|
57
|
+
return 1
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
return 0
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: redis-bloomfilter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Francesco Laurita
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-07-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: redis
|
@@ -100,6 +100,8 @@ files:
|
|
100
100
|
- redis-bloomfilter.gemspec
|
101
101
|
- spec/redis_bloomfilter_spec.rb
|
102
102
|
- spec/spec_helper.rb
|
103
|
+
- vendor/assets/lua/add.lua
|
104
|
+
- vendor/assets/lua/check.lua
|
103
105
|
homepage: https://github.com/taganaka/redis-bloomfilter
|
104
106
|
licenses:
|
105
107
|
- MIT
|