redis-bloomfilter 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/bloomfilter_driver/lua.rb +8 -66
- data/lib/bloomfilter_driver/ruby.rb +3 -3
- data/lib/redis/bloomfilter/version.rb +1 -1
- data/vendor/assets/lua/add.lua +50 -0
- data/vendor/assets/lua/check.lua +61 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 73a3287e8ed9a6cd594471734509e07eb20b731cc6cabbcd67752490b81d78b1
|
4
|
+
data.tar.gz: 345d7a05680558b4d7a3415f910fa57ced190c2c11788882cb47c07a52583236
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d3862a987bf159b4aa3481df9b4eade7f0ee03d69c3c2fbd8fb841f91138a8fffd377af1a5dbf4025755e87852dcd8a365a5ec2a380b14f36ed6037ed400fdcc
|
7
|
+
data.tar.gz: ab51bc7f412025d262c49c8156fb56754a178a76866710b3c04ceaddccfebec3e10e942bf8f8f62f26dcc385b9baeef2a4f7462682abae518f447517ad24d08e
|
@@ -18,7 +18,7 @@ class Redis
|
|
18
18
|
end
|
19
19
|
|
20
20
|
def insert(data)
|
21
|
-
set data
|
21
|
+
set data
|
22
22
|
end
|
23
23
|
|
24
24
|
def include?(key)
|
@@ -36,78 +36,20 @@ class Redis
|
|
36
36
|
# Taken from https://github.com/ErikDubbelboer/redis-lua-scaling-bloom-filter
|
37
37
|
# This is a scalable implementation of BF. It means the initial size can vary
|
38
38
|
def lua_load
|
39
|
-
add_fnc = "
|
40
|
-
|
41
|
-
local precision = ARGV[2]
|
42
|
-
local set_value = ARGV[4]
|
43
|
-
local index = math.ceil(redis.call('INCR', KEYS[1] .. ':count') / entries)
|
44
|
-
local key = KEYS[1] .. ':' .. index
|
45
|
-
local bits = math.floor(-(entries * math.log(precision * math.pow(0.5, index))) / 0.480453013)
|
46
|
-
local k = math.floor(0.693147180 * bits / entries)
|
47
|
-
local hash = redis.sha1hex(ARGV[3])
|
48
|
-
local h = { }
|
49
|
-
h[0] = tonumber(string.sub(hash, 0 , 8 ), 16)
|
50
|
-
h[1] = tonumber(string.sub(hash, 8 , 16), 16)
|
51
|
-
h[2] = tonumber(string.sub(hash, 16, 24), 16)
|
52
|
-
h[3] = tonumber(string.sub(hash, 24, 32), 16)
|
53
|
-
for i=1, k do
|
54
|
-
redis.call('SETBIT', key, (h[i % 2] + i * h[2 + (((i + (i % 2)) % 4) / 2)]) % bits, set_value)
|
55
|
-
end
|
56
|
-
"
|
57
|
-
|
58
|
-
check_fnc = "
|
59
|
-
|
60
|
-
local entries = ARGV[1]
|
61
|
-
local precision = ARGV[2]
|
62
|
-
local index = redis.call('GET', KEYS[1] .. ':count')
|
63
|
-
if not index then
|
64
|
-
return 0
|
65
|
-
end
|
66
|
-
index = math.ceil(redis.call('GET', KEYS[1] .. ':count') / entries)
|
67
|
-
local hash = redis.sha1hex(ARGV[3])
|
68
|
-
local h = { }
|
69
|
-
h[0] = tonumber(string.sub(hash, 0 , 8 ), 16)
|
70
|
-
h[1] = tonumber(string.sub(hash, 8 , 16), 16)
|
71
|
-
h[2] = tonumber(string.sub(hash, 16, 24), 16)
|
72
|
-
h[3] = tonumber(string.sub(hash, 24, 32), 16)
|
73
|
-
local maxk = math.floor(0.693147180 * math.floor((entries * math.log(precision * math.pow(0.5, index))) / -0.480453013) / entries)
|
74
|
-
local b = { }
|
75
|
-
for i=1, maxk do
|
76
|
-
table.insert(b, h[i % 2] + i * h[2 + (((i + (i % 2)) % 4) / 2)])
|
77
|
-
end
|
78
|
-
for n=1, index do
|
79
|
-
local key = KEYS[1] .. ':' .. n
|
80
|
-
local found = true
|
81
|
-
local bits = math.floor((entries * math.log(precision * math.pow(0.5, n))) / -0.480453013)
|
82
|
-
local k = math.floor(0.693147180 * bits / entries)
|
83
|
-
|
84
|
-
for i=1, k do
|
85
|
-
if redis.call('GETBIT', key, b[i] % bits) == 0 then
|
86
|
-
found = false
|
87
|
-
break
|
88
|
-
end
|
89
|
-
end
|
90
|
-
|
91
|
-
if found then
|
92
|
-
return 1
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
|
-
return 0
|
97
|
-
"
|
39
|
+
add_fnc = File.read File.expand_path("../../vendor/assets/lua/add.lua", __dir__)
|
40
|
+
check_fnc = File.read File.expand_path("../../vendor/assets/lua/check.lua", __dir__)
|
98
41
|
|
99
42
|
@add_fnc_sha = Digest::SHA1.hexdigest(add_fnc)
|
100
43
|
@check_fnc_sha = Digest::SHA1.hexdigest(check_fnc)
|
101
44
|
|
102
45
|
loaded = @redis.script(:exists, [@add_fnc_sha, @check_fnc_sha]).uniq
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
end
|
46
|
+
return unless loaded.count != 1 || loaded.first != true
|
47
|
+
@add_fnc_sha = @redis.script(:load, add_fnc)
|
48
|
+
@check_fnc_sha = @redis.script(:load, check_fnc)
|
107
49
|
end
|
108
50
|
|
109
|
-
def set(data
|
110
|
-
@redis.evalsha(@add_fnc_sha, keys: [@options[:key_name]], argv: [@options[:size], @options[:error_rate], data
|
51
|
+
def set(data)
|
52
|
+
@redis.evalsha(@add_fnc_sha, keys: [@options[:key_name]], argv: [@options[:size], @options[:error_rate], data])
|
111
53
|
end
|
112
54
|
end
|
113
55
|
end
|
@@ -13,7 +13,7 @@ class Redis
|
|
13
13
|
|
14
14
|
# Insert a new element
|
15
15
|
def insert(data)
|
16
|
-
set data
|
16
|
+
set data
|
17
17
|
end
|
18
18
|
|
19
19
|
# It checks if a key is part of the set
|
@@ -54,9 +54,9 @@ class Redis
|
|
54
54
|
idxs
|
55
55
|
end
|
56
56
|
|
57
|
-
def set(key
|
57
|
+
def set(key)
|
58
58
|
@redis.pipelined do
|
59
|
-
indexes_for(key).each { |i| @redis.setbit @options[:key_name], i,
|
59
|
+
indexes_for(key).each { |i| @redis.setbit @options[:key_name], i, 1 }
|
60
60
|
end
|
61
61
|
end
|
62
62
|
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
local entries = ARGV[1]
|
2
|
+
local precision = tonumber(ARGV[2])
|
3
|
+
local hash = redis.sha1hex(ARGV[3])
|
4
|
+
local countkey = KEYS[1] .. ':count'
|
5
|
+
local count = redis.call('GET', countkey)
|
6
|
+
if not count then
|
7
|
+
count = 1
|
8
|
+
else
|
9
|
+
count = count + 1
|
10
|
+
end
|
11
|
+
|
12
|
+
local factor = math.ceil((entries + count) / entries)
|
13
|
+
-- 0.69314718055995 = ln(2)
|
14
|
+
local index = math.ceil(math.log(factor) / 0.69314718055995)
|
15
|
+
local scale = math.pow(2, index - 1) * entries
|
16
|
+
local key = KEYS[1] .. ':' .. index
|
17
|
+
|
18
|
+
-- Based on the math from: http://en.wikipedia.org/wiki/Bloom_filter#Probability_of_false_positives
|
19
|
+
-- Combined with: http://www.sciencedirect.com/science/article/pii/S0020019006003127
|
20
|
+
-- 0.4804530139182 = ln(2)^2
|
21
|
+
local bits = math.floor(-(scale * math.log(precision * math.pow(0.5, index))) / 0.4804530139182)
|
22
|
+
|
23
|
+
-- 0.69314718055995 = ln(2)
|
24
|
+
local k = math.floor(0.69314718055995 * bits / scale)
|
25
|
+
|
26
|
+
-- This uses a variation on:
|
27
|
+
-- 'Less Hashing, Same Performance: Building a Better Bloom Filter'
|
28
|
+
-- https://www.eecs.harvard.edu/~michaelm/postscripts/tr-02-05.pdf
|
29
|
+
local h = { }
|
30
|
+
h[0] = tonumber(string.sub(hash, 1 , 8 ), 16)
|
31
|
+
h[1] = tonumber(string.sub(hash, 9 , 16), 16)
|
32
|
+
h[2] = tonumber(string.sub(hash, 17, 24), 16)
|
33
|
+
h[3] = tonumber(string.sub(hash, 25, 32), 16)
|
34
|
+
|
35
|
+
local found = true
|
36
|
+
for i=1, k do
|
37
|
+
if redis.call('SETBIT', key, (h[i % 2] + i * h[2 + (((i + (i % 2)) % 4) / 2)]) % bits, 1) == 0 then
|
38
|
+
found = false
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
-- We only increment the count key when we actually added the item to the filter.
|
43
|
+
-- This doesn't mean count is accurate. Since this is a scaling bloom filter
|
44
|
+
-- it is possible the item was already present in one of the filters in a lower index.
|
45
|
+
-- If you really want to make sure an items isn't added multile times you
|
46
|
+
-- can use cas.lua (Check And Set).
|
47
|
+
if found == false then
|
48
|
+
-- INCR is a little bit faster than SET.
|
49
|
+
redis.call('INCR', countkey)
|
50
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
local entries = ARGV[1]
|
2
|
+
local precision = ARGV[2]
|
3
|
+
local count = redis.call('GET', KEYS[1] .. ':count')
|
4
|
+
|
5
|
+
if not count then
|
6
|
+
return 0
|
7
|
+
end
|
8
|
+
|
9
|
+
local factor = math.ceil((entries + count) / entries)
|
10
|
+
-- 0.69314718055995 = ln(2)
|
11
|
+
local index = math.ceil(math.log(factor) / 0.69314718055995)
|
12
|
+
local scale = math.pow(2, index - 1) * entries
|
13
|
+
|
14
|
+
local hash = redis.sha1hex(ARGV[3])
|
15
|
+
|
16
|
+
-- This uses a variation on:
|
17
|
+
-- 'Less Hashing, Same Performance: Building a Better Bloom Filter'
|
18
|
+
-- https://www.eecs.harvard.edu/~michaelm/postscripts/tr-02-05.pdf
|
19
|
+
local h = { }
|
20
|
+
h[0] = tonumber(string.sub(hash, 1 , 8 ), 16)
|
21
|
+
h[1] = tonumber(string.sub(hash, 9 , 16), 16)
|
22
|
+
h[2] = tonumber(string.sub(hash, 17, 24), 16)
|
23
|
+
h[3] = tonumber(string.sub(hash, 25, 32), 16)
|
24
|
+
|
25
|
+
-- Based on the math from: http://en.wikipedia.org/wiki/Bloom_filter#Probability_of_false_positives
|
26
|
+
-- Combined with: http://www.sciencedirect.com/science/article/pii/S0020019006003127
|
27
|
+
-- 0.4804530139182 = ln(2)^2
|
28
|
+
local maxbits = math.floor((scale * math.log(precision * math.pow(0.5, index))) / -0.4804530139182)
|
29
|
+
|
30
|
+
-- 0.69314718055995 = ln(2)
|
31
|
+
local maxk = math.floor(0.69314718055995 * maxbits / scale)
|
32
|
+
local b = { }
|
33
|
+
|
34
|
+
for i=1, maxk do
|
35
|
+
table.insert(b, h[i % 2] + i * h[2 + (((i + (i % 2)) % 4) / 2)])
|
36
|
+
end
|
37
|
+
|
38
|
+
for n=1, index do
|
39
|
+
local key = KEYS[1] .. ':' .. n
|
40
|
+
local found = true
|
41
|
+
local scalen = math.pow(2, n - 1) * entries
|
42
|
+
|
43
|
+
-- 0.4804530139182 = ln(2)^2
|
44
|
+
local bits = math.floor((scalen * math.log(precision * math.pow(0.5, n))) / -0.4804530139182)
|
45
|
+
|
46
|
+
-- 0.69314718055995 = ln(2)
|
47
|
+
local k = math.floor(0.69314718055995 * bits / scalen)
|
48
|
+
|
49
|
+
for i=1, k do
|
50
|
+
if redis.call('GETBIT', key, b[i] % bits) == 0 then
|
51
|
+
found = false
|
52
|
+
break
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
if found then
|
57
|
+
return 1
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
return 0
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: redis-bloomfilter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Francesco Laurita
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-07-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: redis
|
@@ -100,6 +100,8 @@ files:
|
|
100
100
|
- redis-bloomfilter.gemspec
|
101
101
|
- spec/redis_bloomfilter_spec.rb
|
102
102
|
- spec/spec_helper.rb
|
103
|
+
- vendor/assets/lua/add.lua
|
104
|
+
- vendor/assets/lua/check.lua
|
103
105
|
homepage: https://github.com/taganaka/redis-bloomfilter
|
104
106
|
licenses:
|
105
107
|
- MIT
|