qbloom_filter 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +7 -1
- data/lib/bloom_filter.rb +36 -7
- data/lib/bloom_filter/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4437f90c178eb577a2082d8f0dd39fd791821a66fe4f6158564104d09e0482fc
|
4
|
+
data.tar.gz: 31b0ef142afe7125b9ccd7b64714f3b5272d6d966e3ff1c5a15997b95486dd29
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1f0e7a61344a58862c5b489e8bf8268643983f3184320ae1778c083edf298d5f704996ef12c3c80bca234a9035f8ccb943cdd5ca41e5bab3989baed2af90e8f1
|
7
|
+
data.tar.gz: 488f71cd0878139b930103567962f66f671edabff8e69aa3080e9cce834aa9756c4936d52083e1bd9d9f5670911bf2517a13227dd9e2cde57a7a7b216b566a4f
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -39,7 +39,9 @@ __contains?(value)__ - alias of __includes?(value)__
|
|
39
39
|
|
40
40
|
__count__ - returns number of inserted items
|
41
41
|
|
42
|
-
|
42
|
+
__capacity__ - returns initial capacity
|
43
|
+
|
44
|
+
__probability__ - returns initial probability
|
43
45
|
|
44
46
|
__bit_size__ - returns number of bits in the bit array
|
45
47
|
|
@@ -49,6 +51,10 @@ __set_bit(position)__ - set a bit to TRUE in the bit array, rises an error if po
|
|
49
51
|
|
50
52
|
__clear_bit(position)__ - set a bit to FALSE in the bit array, rises an error if position is out of range of the bit array
|
51
53
|
|
54
|
+
__union_with(bloom_filter)__ - unions current bloom filter with another one, bloom filters should be the instances of this module and have the same initial params(capacity, probability)
|
55
|
+
|
56
|
+
__intersect_with(bloom_filter)__ - intersects current bloom filter with another one, bloom filters should be the instances of this module and have the same initial params(capacity, probability)
|
57
|
+
|
52
58
|
## Contributing
|
53
59
|
|
54
60
|
Bug reports and pull requests are welcome on GitHub at https://github.com/superedriver/bloom_filter
|
data/lib/bloom_filter.rb
CHANGED
@@ -6,14 +6,19 @@ module BloomFilter
|
|
6
6
|
PRIME = 100_000_000_003
|
7
7
|
MAX_HASH_PARAM = 1000
|
8
8
|
OUT_OF_RANGE = "Position is out of range"
|
9
|
+
DIFFERENT_INITIAL_PARAMS = "Bloom filters have different initial params"
|
9
10
|
|
10
11
|
class Filter
|
11
|
-
attr_reader :count
|
12
|
+
attr_reader :count, :capacity, :probability
|
12
13
|
|
13
14
|
def initialize(capacity = 100, probability = 0.01)
|
14
15
|
# amount of inserted elements
|
15
16
|
@count = 0
|
16
17
|
|
18
|
+
# params ob filter, are used for comparison with params of other bloom filters
|
19
|
+
@capacity = capacity
|
20
|
+
@probability = probability
|
21
|
+
|
17
22
|
#number of bits in the array
|
18
23
|
@m = (-(capacity * Math.log(probability)) / (Math.log(2) ** 2)).ceil
|
19
24
|
|
@@ -48,20 +53,39 @@ module BloomFilter
|
|
48
53
|
end
|
49
54
|
alias :includes? :contains?
|
50
55
|
|
56
|
+
def bit_size
|
57
|
+
@m
|
58
|
+
end
|
59
|
+
|
51
60
|
def get_bit(position)
|
52
|
-
|
61
|
+
valid_position?(position)
|
62
|
+
@bitset[position]
|
53
63
|
end
|
54
64
|
|
55
65
|
def set_bit(position)
|
56
|
-
|
66
|
+
valid_position?(position)
|
67
|
+
@bitset[position] = true
|
57
68
|
end
|
58
69
|
|
59
70
|
def clear_bit(position)
|
60
|
-
|
71
|
+
valid_position?(position)
|
72
|
+
@bitset[position] = false
|
61
73
|
end
|
62
74
|
|
63
|
-
def
|
64
|
-
|
75
|
+
def union_with(bloom_filter)
|
76
|
+
same_params?(bloom_filter)
|
77
|
+
|
78
|
+
@m.times do |i|
|
79
|
+
@bitset[i] = self.get_bit(i) || bloom_filter.get_bit(i)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def intersect_with(bloom_filter)
|
84
|
+
same_params?(bloom_filter)
|
85
|
+
|
86
|
+
@m.times do |i|
|
87
|
+
@bitset[i] = self.get_bit(i) && bloom_filter.get_bit(i)
|
88
|
+
end
|
65
89
|
end
|
66
90
|
|
67
91
|
private
|
@@ -74,11 +98,16 @@ module BloomFilter
|
|
74
98
|
Digest::MD5.hexdigest(value.to_s).to_i(16)
|
75
99
|
end
|
76
100
|
|
77
|
-
def
|
101
|
+
def valid_position?(position)
|
78
102
|
raise OUT_OF_RANGE if position >= @m
|
79
103
|
true
|
80
104
|
end
|
81
105
|
|
106
|
+
def same_params?(bf)
|
107
|
+
raise DIFFERENT_INITIAL_PARAMS if self.class != bf.class || bf.capacity != @capacity || bf.probability != @probability
|
108
|
+
true
|
109
|
+
end
|
110
|
+
|
82
111
|
def get_hash_params(i)
|
83
112
|
return 2*i + 1, 2*i + 2
|
84
113
|
end
|
data/lib/bloom_filter/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: qbloom_filter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- qaz
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-09-
|
11
|
+
date: 2020-09-29 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Simple Bloom Filter
|
14
14
|
email:
|