redis-bloomfilter 1.0.1 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7ad9f7110655941d31af9601bb5f4bc039f02f2bee7d66ef09c52c32796487e2
4
- data.tar.gz: 57daaf41aaee08713cf7b9e0c5bb1e40772af503acf0c5111008ba4daa899b88
3
+ metadata.gz: 73a3287e8ed9a6cd594471734509e07eb20b731cc6cabbcd67752490b81d78b1
4
+ data.tar.gz: 345d7a05680558b4d7a3415f910fa57ced190c2c11788882cb47c07a52583236
5
5
  SHA512:
6
- metadata.gz: 1a68456cb710464e8e2021f2d859c9253a353db34c49f192af5751f8977a0bb1bb05b63d6ef6d8cea3bf836e3235bc97786fe6549fdfedba2eb0fd9ab23d1a05
7
- data.tar.gz: 697ce76ea20dbfb916d70787265ccc0c27c60fb726555a449e4d968117fc521685c5e3f3df3257b3951cfa550fd20a1a042daf4162f4ec70ffc6ec98b6f71fe3
6
+ metadata.gz: d3862a987bf159b4aa3481df9b4eade7f0ee03d69c3c2fbd8fb841f91138a8fffd377af1a5dbf4025755e87852dcd8a365a5ec2a380b14f36ed6037ed400fdcc
7
+ data.tar.gz: ab51bc7f412025d262c49c8156fb56754a178a76866710b3c04ceaddccfebec3e10e942bf8f8f62f26dcc385b9baeef2a4f7462682abae518f447517ad24d08e
@@ -18,7 +18,7 @@ class Redis
18
18
  end
19
19
 
20
20
  def insert(data)
21
- set data, 1
21
+ set data
22
22
  end
23
23
 
24
24
  def include?(key)
@@ -36,78 +36,20 @@ class Redis
36
36
  # Taken from https://github.com/ErikDubbelboer/redis-lua-scaling-bloom-filter
37
37
  # This is a scalable implementation of BF. It means the initial size can vary
38
38
  def lua_load
39
- add_fnc = "
40
- local entries = ARGV[1]
41
- local precision = ARGV[2]
42
- local set_value = ARGV[4]
43
- local index = math.ceil(redis.call('INCR', KEYS[1] .. ':count') / entries)
44
- local key = KEYS[1] .. ':' .. index
45
- local bits = math.floor(-(entries * math.log(precision * math.pow(0.5, index))) / 0.480453013)
46
- local k = math.floor(0.693147180 * bits / entries)
47
- local hash = redis.sha1hex(ARGV[3])
48
- local h = { }
49
- h[0] = tonumber(string.sub(hash, 0 , 8 ), 16)
50
- h[1] = tonumber(string.sub(hash, 8 , 16), 16)
51
- h[2] = tonumber(string.sub(hash, 16, 24), 16)
52
- h[3] = tonumber(string.sub(hash, 24, 32), 16)
53
- for i=1, k do
54
- redis.call('SETBIT', key, (h[i % 2] + i * h[2 + (((i + (i % 2)) % 4) / 2)]) % bits, set_value)
55
- end
56
- "
57
-
58
- check_fnc = "
59
-
60
- local entries = ARGV[1]
61
- local precision = ARGV[2]
62
- local index = redis.call('GET', KEYS[1] .. ':count')
63
- if not index then
64
- return 0
65
- end
66
- index = math.ceil(redis.call('GET', KEYS[1] .. ':count') / entries)
67
- local hash = redis.sha1hex(ARGV[3])
68
- local h = { }
69
- h[0] = tonumber(string.sub(hash, 0 , 8 ), 16)
70
- h[1] = tonumber(string.sub(hash, 8 , 16), 16)
71
- h[2] = tonumber(string.sub(hash, 16, 24), 16)
72
- h[3] = tonumber(string.sub(hash, 24, 32), 16)
73
- local maxk = math.floor(0.693147180 * math.floor((entries * math.log(precision * math.pow(0.5, index))) / -0.480453013) / entries)
74
- local b = { }
75
- for i=1, maxk do
76
- table.insert(b, h[i % 2] + i * h[2 + (((i + (i % 2)) % 4) / 2)])
77
- end
78
- for n=1, index do
79
- local key = KEYS[1] .. ':' .. n
80
- local found = true
81
- local bits = math.floor((entries * math.log(precision * math.pow(0.5, n))) / -0.480453013)
82
- local k = math.floor(0.693147180 * bits / entries)
83
-
84
- for i=1, k do
85
- if redis.call('GETBIT', key, b[i] % bits) == 0 then
86
- found = false
87
- break
88
- end
89
- end
90
-
91
- if found then
92
- return 1
93
- end
94
- end
95
-
96
- return 0
97
- "
39
+ add_fnc = File.read File.expand_path("../../vendor/assets/lua/add.lua", __dir__)
40
+ check_fnc = File.read File.expand_path("../../vendor/assets/lua/check.lua", __dir__)
98
41
 
99
42
  @add_fnc_sha = Digest::SHA1.hexdigest(add_fnc)
100
43
  @check_fnc_sha = Digest::SHA1.hexdigest(check_fnc)
101
44
 
102
45
  loaded = @redis.script(:exists, [@add_fnc_sha, @check_fnc_sha]).uniq
103
- if loaded.count != 1 || loaded.first != true
104
- @add_fnc_sha = @redis.script(:load, add_fnc)
105
- @check_fnc_sha = @redis.script(:load, check_fnc)
106
- end
46
+ return unless loaded.count != 1 || loaded.first != true
47
+ @add_fnc_sha = @redis.script(:load, add_fnc)
48
+ @check_fnc_sha = @redis.script(:load, check_fnc)
107
49
  end
108
50
 
109
- def set(data, val)
110
- @redis.evalsha(@add_fnc_sha, keys: [@options[:key_name]], argv: [@options[:size], @options[:error_rate], data, val])
51
+ def set(data)
52
+ @redis.evalsha(@add_fnc_sha, keys: [@options[:key_name]], argv: [@options[:size], @options[:error_rate], data])
111
53
  end
112
54
  end
113
55
  end
@@ -13,7 +13,7 @@ class Redis
13
13
 
14
14
  # Insert a new element
15
15
  def insert(data)
16
- set data, 1
16
+ set data
17
17
  end
18
18
 
19
19
  # It checks if a key is part of the set
@@ -54,9 +54,9 @@ class Redis
54
54
  idxs
55
55
  end
56
56
 
57
- def set(key, val)
57
+ def set(key)
58
58
  @redis.pipelined do
59
- indexes_for(key).each { |i| @redis.setbit @options[:key_name], i, val }
59
+ indexes_for(key).each { |i| @redis.setbit @options[:key_name], i, 1 }
60
60
  end
61
61
  end
62
62
  end
@@ -2,7 +2,7 @@
2
2
 
3
3
  class Redis
4
4
  class Bloomfilter
5
- VERSION = '1.0.1'
5
+ VERSION = '1.1.0'
6
6
  def self.version
7
7
  "redis-bloomfilter version #{VERSION}"
8
8
  end
@@ -0,0 +1,50 @@
1
+ local entries = ARGV[1]
2
+ local precision = tonumber(ARGV[2])
3
+ local hash = redis.sha1hex(ARGV[3])
4
+ local countkey = KEYS[1] .. ':count'
5
+ local count = redis.call('GET', countkey)
6
+ if not count then
7
+ count = 1
8
+ else
9
+ count = count + 1
10
+ end
11
+
12
+ local factor = math.ceil((entries + count) / entries)
13
+ -- 0.69314718055995 = ln(2)
14
+ local index = math.ceil(math.log(factor) / 0.69314718055995)
15
+ local scale = math.pow(2, index - 1) * entries
16
+ local key = KEYS[1] .. ':' .. index
17
+
18
+ -- Based on the math from: http://en.wikipedia.org/wiki/Bloom_filter#Probability_of_false_positives
19
+ -- Combined with: http://www.sciencedirect.com/science/article/pii/S0020019006003127
20
+ -- 0.4804530139182 = ln(2)^2
21
+ local bits = math.floor(-(scale * math.log(precision * math.pow(0.5, index))) / 0.4804530139182)
22
+
23
+ -- 0.69314718055995 = ln(2)
24
+ local k = math.floor(0.69314718055995 * bits / scale)
25
+
26
+ -- This uses a variation on:
27
+ -- 'Less Hashing, Same Performance: Building a Better Bloom Filter'
28
+ -- https://www.eecs.harvard.edu/~michaelm/postscripts/tr-02-05.pdf
29
+ local h = { }
30
+ h[0] = tonumber(string.sub(hash, 1 , 8 ), 16)
31
+ h[1] = tonumber(string.sub(hash, 9 , 16), 16)
32
+ h[2] = tonumber(string.sub(hash, 17, 24), 16)
33
+ h[3] = tonumber(string.sub(hash, 25, 32), 16)
34
+
35
+ local found = true
36
+ for i=1, k do
37
+ if redis.call('SETBIT', key, (h[i % 2] + i * h[2 + (((i + (i % 2)) % 4) / 2)]) % bits, 1) == 0 then
38
+ found = false
39
+ end
40
+ end
41
+
42
+ -- We only increment the count key when we actually added the item to the filter.
43
+ -- This doesn't mean count is accurate. Since this is a scaling bloom filter
44
+ -- it is possible the item was already present in one of the filters in a lower index.
45
+ -- If you really want to make sure an items isn't added multile times you
46
+ -- can use cas.lua (Check And Set).
47
+ if found == false then
48
+ -- INCR is a little bit faster than SET.
49
+ redis.call('INCR', countkey)
50
+ end
@@ -0,0 +1,61 @@
1
+ local entries = ARGV[1]
2
+ local precision = ARGV[2]
3
+ local count = redis.call('GET', KEYS[1] .. ':count')
4
+
5
+ if not count then
6
+ return 0
7
+ end
8
+
9
+ local factor = math.ceil((entries + count) / entries)
10
+ -- 0.69314718055995 = ln(2)
11
+ local index = math.ceil(math.log(factor) / 0.69314718055995)
12
+ local scale = math.pow(2, index - 1) * entries
13
+
14
+ local hash = redis.sha1hex(ARGV[3])
15
+
16
+ -- This uses a variation on:
17
+ -- 'Less Hashing, Same Performance: Building a Better Bloom Filter'
18
+ -- https://www.eecs.harvard.edu/~michaelm/postscripts/tr-02-05.pdf
19
+ local h = { }
20
+ h[0] = tonumber(string.sub(hash, 1 , 8 ), 16)
21
+ h[1] = tonumber(string.sub(hash, 9 , 16), 16)
22
+ h[2] = tonumber(string.sub(hash, 17, 24), 16)
23
+ h[3] = tonumber(string.sub(hash, 25, 32), 16)
24
+
25
+ -- Based on the math from: http://en.wikipedia.org/wiki/Bloom_filter#Probability_of_false_positives
26
+ -- Combined with: http://www.sciencedirect.com/science/article/pii/S0020019006003127
27
+ -- 0.4804530139182 = ln(2)^2
28
+ local maxbits = math.floor((scale * math.log(precision * math.pow(0.5, index))) / -0.4804530139182)
29
+
30
+ -- 0.69314718055995 = ln(2)
31
+ local maxk = math.floor(0.69314718055995 * maxbits / scale)
32
+ local b = { }
33
+
34
+ for i=1, maxk do
35
+ table.insert(b, h[i % 2] + i * h[2 + (((i + (i % 2)) % 4) / 2)])
36
+ end
37
+
38
+ for n=1, index do
39
+ local key = KEYS[1] .. ':' .. n
40
+ local found = true
41
+ local scalen = math.pow(2, n - 1) * entries
42
+
43
+ -- 0.4804530139182 = ln(2)^2
44
+ local bits = math.floor((scalen * math.log(precision * math.pow(0.5, n))) / -0.4804530139182)
45
+
46
+ -- 0.69314718055995 = ln(2)
47
+ local k = math.floor(0.69314718055995 * bits / scalen)
48
+
49
+ for i=1, k do
50
+ if redis.call('GETBIT', key, b[i] % bits) == 0 then
51
+ found = false
52
+ break
53
+ end
54
+ end
55
+
56
+ if found then
57
+ return 1
58
+ end
59
+ end
60
+
61
+ return 0
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: redis-bloomfilter
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Francesco Laurita
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-06-02 00:00:00.000000000 Z
11
+ date: 2018-07-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: redis
@@ -100,6 +100,8 @@ files:
100
100
  - redis-bloomfilter.gemspec
101
101
  - spec/redis_bloomfilter_spec.rb
102
102
  - spec/spec_helper.rb
103
+ - vendor/assets/lua/add.lua
104
+ - vendor/assets/lua/check.lua
103
105
  homepage: https://github.com/taganaka/redis-bloomfilter
104
106
  licenses:
105
107
  - MIT