qbloom_filter 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +11 -1
- data/lib/bloom_filter.rb +32 -9
- data/lib/bloom_filter/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1a5c8c820e92cec2133acd196b0b42ff9713b5b3446226db27dfa1d8db7ba28f
|
4
|
+
data.tar.gz: f2074640b8228181c145500e39cba8467d7fd7d743c94fecc11c8aefc5a013ba
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c76fb3bea4b43e1ede2ddfcc6570ace55bd7e3e6a1e5fd3f3b6cb1845f082b8f55716345d40b7202c9d6798e61b84619d0e83a2158c050b966be3451cec83a73
|
7
|
+
data.tar.gz: bb76e4786858a0b0c86fecbd49e6971cccad8b34aaeca4d802940184ca6b38343b39df76da9bdb37fbfe4e9e478260874b400fc2f1ad31b0c3b015cbd3fbc829
|
data/README.md
CHANGED
@@ -30,7 +30,7 @@ And two parameters can be used to describe the bloom filter:
|
|
30
30
|
bloom_filter = BloomFilter::Filter.new(1000, 0.001)
|
31
31
|
```
|
32
32
|
|
33
|
-
####
|
33
|
+
#### Methods
|
34
34
|
__add(value)__ - add item into filter
|
35
35
|
|
36
36
|
__includes?(value)__ - check if filter includes the value
|
@@ -39,6 +39,16 @@ __contains?(value)__ - alias of __includes?(value)__
|
|
39
39
|
|
40
40
|
__count__ - returns number of inserted items
|
41
41
|
|
42
|
+
##### Methods for union and intersection several bloom filters
|
43
|
+
|
44
|
+
__bit_size__ - returns number of bits in the bit array
|
45
|
+
|
46
|
+
__get_bit(position)__ - returns value of a bit(true/false) in the bit array, rises an error if position is out of range of the bit array
|
47
|
+
|
48
|
+
__set_bit(position)__ - set a bit to TRUE in the bit array, rises an error if position is out of range of the bit array
|
49
|
+
|
50
|
+
__clear_bit(position)__ - set a bit to FALSE in the bit array, rises an error if position is out of range of the bit array
|
51
|
+
|
42
52
|
## Contributing
|
43
53
|
|
44
54
|
Bug reports and pull requests are welcome on GitHub at https://github.com/superedriver/bloom_filter
|
data/lib/bloom_filter.rb
CHANGED
@@ -5,6 +5,8 @@ require 'digest/md5'
|
|
5
5
|
module BloomFilter
|
6
6
|
PRIME = 100_000_000_003
|
7
7
|
MAX_HASH_PARAM = 1000
|
8
|
+
OUT_OF_RANGE = "Position is out of range"
|
9
|
+
|
8
10
|
class Filter
|
9
11
|
attr_reader :count
|
10
12
|
|
@@ -19,20 +21,16 @@ module BloomFilter
|
|
19
21
|
|
20
22
|
#number of hash functions that minimizes the probability of false positives
|
21
23
|
@k = (Math.log(2) * (@m / capacity)).ceil
|
22
|
-
|
23
|
-
# a, b params for hash functions
|
24
|
-
@hash_params = []
|
25
|
-
@k.times { @hash_params.push([rand(1000), rand(1000)]) }
|
26
24
|
end
|
27
25
|
|
28
26
|
def add(value)
|
29
27
|
x = get_hash(value)
|
30
28
|
was_inserted = true
|
31
29
|
@k.times do |i|
|
32
|
-
a, b =
|
30
|
+
a, b = get_hash_params(i)
|
33
31
|
position = get_position(a, b, x)
|
34
|
-
was_inserted = false unless
|
35
|
-
|
32
|
+
was_inserted = false unless self.get_bit(position)
|
33
|
+
self.set_bit(position)
|
36
34
|
end
|
37
35
|
@count += 1 unless was_inserted
|
38
36
|
value
|
@@ -42,14 +40,30 @@ module BloomFilter
|
|
42
40
|
x = get_hash(value)
|
43
41
|
result = true
|
44
42
|
@k.times do |i|
|
45
|
-
a, b =
|
46
|
-
result = false unless
|
43
|
+
a, b = get_hash_params(i)
|
44
|
+
result = false unless self.get_bit(get_position(a, b, x))
|
47
45
|
end
|
48
46
|
|
49
47
|
result
|
50
48
|
end
|
51
49
|
alias :includes? :contains?
|
52
50
|
|
51
|
+
def get_bit(position)
|
52
|
+
@bitset[position] if is_valid_position(position)
|
53
|
+
end
|
54
|
+
|
55
|
+
def set_bit(position)
|
56
|
+
@bitset[position] = true if is_valid_position(position)
|
57
|
+
end
|
58
|
+
|
59
|
+
def clear_bit(position)
|
60
|
+
@bitset[position] = false if is_valid_position(position)
|
61
|
+
end
|
62
|
+
|
63
|
+
def bit_size
|
64
|
+
@m
|
65
|
+
end
|
66
|
+
|
53
67
|
private
|
54
68
|
|
55
69
|
def get_position(a, b, val)
|
@@ -59,5 +73,14 @@ module BloomFilter
|
|
59
73
|
def get_hash(value)
|
60
74
|
Digest::MD5.hexdigest(value.to_s).to_i(16)
|
61
75
|
end
|
76
|
+
|
77
|
+
def is_valid_position(position)
|
78
|
+
raise OUT_OF_RANGE if position >= @m
|
79
|
+
true
|
80
|
+
end
|
81
|
+
|
82
|
+
def get_hash_params(i)
|
83
|
+
return 2*i + 1, 2*i + 2
|
84
|
+
end
|
62
85
|
end
|
63
86
|
end
|
data/lib/bloom_filter/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: qbloom_filter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- qaz
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-09-
|
11
|
+
date: 2020-09-28 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Simple Bloom Filter
|
14
14
|
email:
|