qbloom_filter 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +11 -1
- data/lib/bloom_filter.rb +32 -9
- data/lib/bloom_filter/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1a5c8c820e92cec2133acd196b0b42ff9713b5b3446226db27dfa1d8db7ba28f
|
4
|
+
data.tar.gz: f2074640b8228181c145500e39cba8467d7fd7d743c94fecc11c8aefc5a013ba
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c76fb3bea4b43e1ede2ddfcc6570ace55bd7e3e6a1e5fd3f3b6cb1845f082b8f55716345d40b7202c9d6798e61b84619d0e83a2158c050b966be3451cec83a73
|
7
|
+
data.tar.gz: bb76e4786858a0b0c86fecbd49e6971cccad8b34aaeca4d802940184ca6b38343b39df76da9bdb37fbfe4e9e478260874b400fc2f1ad31b0c3b015cbd3fbc829
|
data/README.md
CHANGED
@@ -30,7 +30,7 @@ And two parameters can be used to describe the bloom filter:
|
|
30
30
|
bloom_filter = BloomFilter::Filter.new(1000, 0.001)
|
31
31
|
```
|
32
32
|
|
33
|
-
####
|
33
|
+
#### Methods
|
34
34
|
__add(value)__ - add item into filter
|
35
35
|
|
36
36
|
__includes?(value)__ - check if filter includes the value
|
@@ -39,6 +39,16 @@ __contains?(value)__ - alias of __includes?(value)__
|
|
39
39
|
|
40
40
|
__count__ - returns number of inserted items
|
41
41
|
|
42
|
+
##### Methods for union and intersection several bloom filters
|
43
|
+
|
44
|
+
__bit_size__ - returns number of bits in the bit array
|
45
|
+
|
46
|
+
__get_bit(position)__ - returns value of a bit(true/false) in the bit array, rises an error if position is out of range of the bit array
|
47
|
+
|
48
|
+
__set_bit(position)__ - set a bit to TRUE in the bit array, rises an error if position is out of range of the bit array
|
49
|
+
|
50
|
+
__clear_bit(position)__ - set a bit to FALSE in the bit array, rises an error if position is out of range of the bit array
|
51
|
+
|
42
52
|
## Contributing
|
43
53
|
|
44
54
|
Bug reports and pull requests are welcome on GitHub at https://github.com/superedriver/bloom_filter
|
data/lib/bloom_filter.rb
CHANGED
@@ -5,6 +5,8 @@ require 'digest/md5'
|
|
5
5
|
module BloomFilter
|
6
6
|
PRIME = 100_000_000_003
|
7
7
|
MAX_HASH_PARAM = 1000
|
8
|
+
OUT_OF_RANGE = "Position is out of range"
|
9
|
+
|
8
10
|
class Filter
|
9
11
|
attr_reader :count
|
10
12
|
|
@@ -19,20 +21,16 @@ module BloomFilter
|
|
19
21
|
|
20
22
|
#number of hash functions that minimizes the probability of false positives
|
21
23
|
@k = (Math.log(2) * (@m / capacity)).ceil
|
22
|
-
|
23
|
-
# a, b params for hash functions
|
24
|
-
@hash_params = []
|
25
|
-
@k.times { @hash_params.push([rand(1000), rand(1000)]) }
|
26
24
|
end
|
27
25
|
|
28
26
|
def add(value)
|
29
27
|
x = get_hash(value)
|
30
28
|
was_inserted = true
|
31
29
|
@k.times do |i|
|
32
|
-
a, b =
|
30
|
+
a, b = get_hash_params(i)
|
33
31
|
position = get_position(a, b, x)
|
34
|
-
was_inserted = false unless
|
35
|
-
|
32
|
+
was_inserted = false unless self.get_bit(position)
|
33
|
+
self.set_bit(position)
|
36
34
|
end
|
37
35
|
@count += 1 unless was_inserted
|
38
36
|
value
|
@@ -42,14 +40,30 @@ module BloomFilter
|
|
42
40
|
x = get_hash(value)
|
43
41
|
result = true
|
44
42
|
@k.times do |i|
|
45
|
-
a, b =
|
46
|
-
result = false unless
|
43
|
+
a, b = get_hash_params(i)
|
44
|
+
result = false unless self.get_bit(get_position(a, b, x))
|
47
45
|
end
|
48
46
|
|
49
47
|
result
|
50
48
|
end
|
51
49
|
alias :includes? :contains?
|
52
50
|
|
51
|
+
def get_bit(position)
|
52
|
+
@bitset[position] if is_valid_position(position)
|
53
|
+
end
|
54
|
+
|
55
|
+
def set_bit(position)
|
56
|
+
@bitset[position] = true if is_valid_position(position)
|
57
|
+
end
|
58
|
+
|
59
|
+
def clear_bit(position)
|
60
|
+
@bitset[position] = false if is_valid_position(position)
|
61
|
+
end
|
62
|
+
|
63
|
+
def bit_size
|
64
|
+
@m
|
65
|
+
end
|
66
|
+
|
53
67
|
private
|
54
68
|
|
55
69
|
def get_position(a, b, val)
|
@@ -59,5 +73,14 @@ module BloomFilter
|
|
59
73
|
def get_hash(value)
|
60
74
|
Digest::MD5.hexdigest(value.to_s).to_i(16)
|
61
75
|
end
|
76
|
+
|
77
|
+
def is_valid_position(position)
|
78
|
+
raise OUT_OF_RANGE if position >= @m
|
79
|
+
true
|
80
|
+
end
|
81
|
+
|
82
|
+
def get_hash_params(i)
|
83
|
+
return 2*i + 1, 2*i + 2
|
84
|
+
end
|
62
85
|
end
|
63
86
|
end
|
data/lib/bloom_filter/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: qbloom_filter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- qaz
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-09-
|
11
|
+
date: 2020-09-28 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Simple Bloom Filter
|
14
14
|
email:
|