qbloom_filter 0.1.0 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +20 -4
- data/bin/console +1 -1
- data/lib/qbloom_filter.rb +115 -0
- data/lib/{bloom_filter → qbloom_filter}/version.rb +1 -1
- data/{bloom_filter.gemspec → qbloom_filter.gemspec} +5 -5
- metadata +10 -10
- data/lib/bloom_filter.rb +0 -63
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d3107815ff8ebf8aa327a4f2cd2cfb80d9f0aefbffd84e428adcfc504295d42c
|
4
|
+
data.tar.gz: fb29b08c1f68f5f50a06dfbc3b395c43ff9be36fbe0e2741cecf47c7ccf75f1e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2e6ded653a4c765234dd4f38e874efb50de33952fa7faef0ae9c9f97ad0142eabe5bae0c63029f621c9224f456a5bc04d7de12839d28f2748539f7b0d7d53bab
|
7
|
+
data.tar.gz: 40670964f95d373b7c4d413ae60ad5369760a34bcf511195b4e31a38fbc46782e76f8b8b9745607c010dfb32330e9803388d8e08c7f5ec95fc7ec30ffd514631
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -7,7 +7,7 @@ A Bloom filter is a space-efficient probabilistic data structure
|
|
7
7
|
Add this line to your application's Gemfile:
|
8
8
|
|
9
9
|
```ruby
|
10
|
-
gem '
|
10
|
+
gem 'qbloom_filter'
|
11
11
|
```
|
12
12
|
|
13
13
|
And then execute:
|
@@ -16,7 +16,7 @@ And then execute:
|
|
16
16
|
|
17
17
|
Or install it yourself as:
|
18
18
|
|
19
|
-
$ gem install
|
19
|
+
$ gem install qbloom_filter
|
20
20
|
|
21
21
|
## Usage
|
22
22
|
|
@@ -30,7 +30,7 @@ And two parameters can be used to describe the bloom filter:
|
|
30
30
|
bloom_filter = BloomFilter::Filter.new(1000, 0.001)
|
31
31
|
```
|
32
32
|
|
33
|
-
####
|
33
|
+
#### Methods
|
34
34
|
__add(value)__ - add item into filter
|
35
35
|
|
36
36
|
__includes?(value)__ - check if filter includes the value
|
@@ -39,7 +39,23 @@ __contains?(value)__ - alias of __includes?(value)__
|
|
39
39
|
|
40
40
|
__count__ - returns number of inserted items
|
41
41
|
|
42
|
+
__capacity__ - returns initial capacity
|
43
|
+
|
44
|
+
__probability__ - returns initial probability
|
45
|
+
|
46
|
+
__bit_size__ - returns number of bits in the bit array
|
47
|
+
|
48
|
+
__get_bit(position)__ - returns value of a bit(true/false) in the bit array, rises an error if position is out of range of the bit array
|
49
|
+
|
50
|
+
__set_bit(position)__ - set a bit to TRUE in the bit array, rises an error if position is out of range of the bit array
|
51
|
+
|
52
|
+
__clear_bit(position)__ - set a bit to FALSE in the bit array, rises an error if position is out of range of the bit array
|
53
|
+
|
54
|
+
__union_with(bloom_filter)__ - unions current bloom filter with another one, bloom filters should be the instances of this module and have the same initial params(capacity, probability)
|
55
|
+
|
56
|
+
__intersect_with(bloom_filter)__ - intersects current bloom filter with another one, bloom filters should be the instances of this module and have the same initial params(capacity, probability)
|
57
|
+
|
42
58
|
## Contributing
|
43
59
|
|
44
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/superedriver/
|
60
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/superedriver/qbloom_filter
|
45
61
|
|
data/bin/console
CHANGED
@@ -0,0 +1,115 @@
|
|
1
|
+
require "bloom_filter/version"
|
2
|
+
require "bitset"
|
3
|
+
require 'digest/md5'
|
4
|
+
|
5
|
+
module BloomFilter
|
6
|
+
PRIME = 100_000_000_003
|
7
|
+
MAX_HASH_PARAM = 1000
|
8
|
+
OUT_OF_RANGE = "Position is out of range"
|
9
|
+
DIFFERENT_INITIAL_PARAMS = "Bloom filters have different initial params"
|
10
|
+
|
11
|
+
class Filter
|
12
|
+
attr_reader :count, :capacity, :probability
|
13
|
+
|
14
|
+
def initialize(capacity = 100, probability = 0.01)
|
15
|
+
# amount of inserted elements
|
16
|
+
@count = 0
|
17
|
+
|
18
|
+
# params ob filter, are used for comparison with params of other bloom filters
|
19
|
+
@capacity = capacity
|
20
|
+
@probability = probability
|
21
|
+
|
22
|
+
#number of bits in the array
|
23
|
+
@m = (-(capacity * Math.log(probability)) / (Math.log(2) ** 2)).ceil
|
24
|
+
|
25
|
+
@bitset = Bitset.new(@m)
|
26
|
+
|
27
|
+
#number of hash functions that minimizes the probability of false positives
|
28
|
+
@k = (Math.log(2) * (@m / capacity)).ceil
|
29
|
+
end
|
30
|
+
|
31
|
+
def add(value)
|
32
|
+
x = get_hash(value)
|
33
|
+
was_inserted = true
|
34
|
+
@k.times do |i|
|
35
|
+
a, b = get_hash_params(i)
|
36
|
+
position = get_position(a, b, x)
|
37
|
+
was_inserted = false unless self.get_bit(position)
|
38
|
+
self.set_bit(position)
|
39
|
+
end
|
40
|
+
@count += 1 unless was_inserted
|
41
|
+
value
|
42
|
+
end
|
43
|
+
|
44
|
+
def contains?(value)
|
45
|
+
x = get_hash(value)
|
46
|
+
result = true
|
47
|
+
@k.times do |i|
|
48
|
+
a, b = get_hash_params(i)
|
49
|
+
result = false unless self.get_bit(get_position(a, b, x))
|
50
|
+
end
|
51
|
+
|
52
|
+
result
|
53
|
+
end
|
54
|
+
alias :includes? :contains?
|
55
|
+
|
56
|
+
def bit_size
|
57
|
+
@m
|
58
|
+
end
|
59
|
+
|
60
|
+
def get_bit(position)
|
61
|
+
valid_position?(position)
|
62
|
+
@bitset[position]
|
63
|
+
end
|
64
|
+
|
65
|
+
def set_bit(position)
|
66
|
+
valid_position?(position)
|
67
|
+
@bitset[position] = true
|
68
|
+
end
|
69
|
+
|
70
|
+
def clear_bit(position)
|
71
|
+
valid_position?(position)
|
72
|
+
@bitset[position] = false
|
73
|
+
end
|
74
|
+
|
75
|
+
def union_with(bloom_filter)
|
76
|
+
same_params?(bloom_filter)
|
77
|
+
|
78
|
+
@m.times do |i|
|
79
|
+
@bitset[i] = self.get_bit(i) || bloom_filter.get_bit(i)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def intersect_with(bloom_filter)
|
84
|
+
same_params?(bloom_filter)
|
85
|
+
|
86
|
+
@m.times do |i|
|
87
|
+
@bitset[i] = self.get_bit(i) && bloom_filter.get_bit(i)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
private
|
92
|
+
|
93
|
+
def get_position(a, b, val)
|
94
|
+
((a * val + b) % PRIME) % @m
|
95
|
+
end
|
96
|
+
|
97
|
+
def get_hash(value)
|
98
|
+
Digest::MD5.hexdigest(value.to_s).to_i(16)
|
99
|
+
end
|
100
|
+
|
101
|
+
def valid_position?(position)
|
102
|
+
raise OUT_OF_RANGE if position >= @m
|
103
|
+
true
|
104
|
+
end
|
105
|
+
|
106
|
+
def same_params?(bf)
|
107
|
+
raise DIFFERENT_INITIAL_PARAMS if self.class != bf.class || bf.capacity != @capacity || bf.probability != @probability
|
108
|
+
true
|
109
|
+
end
|
110
|
+
|
111
|
+
def get_hash_params(i)
|
112
|
+
return 2*i + 1, 2*i + 2
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
require_relative 'lib/
|
1
|
+
require_relative 'lib/qbloom_filter/version'
|
2
2
|
|
3
3
|
Gem::Specification.new do |spec|
|
4
4
|
spec.name = "qbloom_filter"
|
@@ -8,15 +8,15 @@ Gem::Specification.new do |spec|
|
|
8
8
|
|
9
9
|
spec.licenses = ['MIT']
|
10
10
|
spec.summary = %q{Bloom Filter}
|
11
|
-
spec.description = %q{
|
12
|
-
spec.homepage = "https://github.com/superedriver/
|
11
|
+
spec.description = %q{Bloom Filter with union and intersection}
|
12
|
+
spec.homepage = "https://github.com/superedriver/qbloom_filter"
|
13
13
|
spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
|
14
14
|
|
15
15
|
spec.metadata["allowed_push_host"] = "https://rubygems.org"
|
16
16
|
|
17
17
|
spec.metadata["homepage_uri"] = spec.homepage
|
18
|
-
spec.metadata["source_code_uri"] = "https://github.com/superedriver/
|
19
|
-
spec.metadata["changelog_uri"] = "https://github.com/superedriver/
|
18
|
+
spec.metadata["source_code_uri"] = "https://github.com/superedriver/qbloom_filter"
|
19
|
+
spec.metadata["changelog_uri"] = "https://github.com/superedriver/qbloom_filter"
|
20
20
|
|
21
21
|
# Specify which files should be added to the gem when it is released.
|
22
22
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
metadata
CHANGED
@@ -1,16 +1,16 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: qbloom_filter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- qaz
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-09-
|
11
|
+
date: 2020-09-29 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
|
-
description:
|
13
|
+
description: Bloom Filter with union and intersection
|
14
14
|
email:
|
15
15
|
- qaz@qaz.qaz
|
16
16
|
executables: []
|
@@ -30,17 +30,17 @@ files:
|
|
30
30
|
- bin/rake
|
31
31
|
- bin/rspec
|
32
32
|
- bin/setup
|
33
|
-
-
|
34
|
-
- lib/
|
35
|
-
-
|
36
|
-
homepage: https://github.com/superedriver/
|
33
|
+
- lib/qbloom_filter.rb
|
34
|
+
- lib/qbloom_filter/version.rb
|
35
|
+
- qbloom_filter.gemspec
|
36
|
+
homepage: https://github.com/superedriver/qbloom_filter
|
37
37
|
licenses:
|
38
38
|
- MIT
|
39
39
|
metadata:
|
40
40
|
allowed_push_host: https://rubygems.org
|
41
|
-
homepage_uri: https://github.com/superedriver/
|
42
|
-
source_code_uri: https://github.com/superedriver/
|
43
|
-
changelog_uri: https://github.com/superedriver/
|
41
|
+
homepage_uri: https://github.com/superedriver/qbloom_filter
|
42
|
+
source_code_uri: https://github.com/superedriver/qbloom_filter
|
43
|
+
changelog_uri: https://github.com/superedriver/qbloom_filter
|
44
44
|
post_install_message:
|
45
45
|
rdoc_options: []
|
46
46
|
require_paths:
|
data/lib/bloom_filter.rb
DELETED
@@ -1,63 +0,0 @@
|
|
1
|
-
require "bloom_filter/version"
|
2
|
-
require "bitset"
|
3
|
-
require 'digest/md5'
|
4
|
-
|
5
|
-
module BloomFilter
|
6
|
-
PRIME = 100_000_000_003
|
7
|
-
MAX_HASH_PARAM = 1000
|
8
|
-
class Filter
|
9
|
-
attr_reader :count
|
10
|
-
|
11
|
-
def initialize(capacity = 100, probability = 0.01)
|
12
|
-
# amount of inserted elements
|
13
|
-
@count = 0
|
14
|
-
|
15
|
-
#number of bits in the array
|
16
|
-
@m = (-(capacity * Math.log(probability)) / (Math.log(2) ** 2)).ceil
|
17
|
-
|
18
|
-
@bitset = Bitset.new(@m)
|
19
|
-
|
20
|
-
#number of hash functions that minimizes the probability of false positives
|
21
|
-
@k = (Math.log(2) * (@m / capacity)).ceil
|
22
|
-
|
23
|
-
# a, b params for hash functions
|
24
|
-
@hash_params = []
|
25
|
-
@k.times { @hash_params.push([rand(1000), rand(1000)]) }
|
26
|
-
end
|
27
|
-
|
28
|
-
def add(value)
|
29
|
-
x = get_hash(value)
|
30
|
-
was_inserted = true
|
31
|
-
@k.times do |i|
|
32
|
-
a, b = @hash_params[i]
|
33
|
-
position = get_position(a, b, x)
|
34
|
-
was_inserted = false unless @bitset[position]
|
35
|
-
@bitset[position] = true
|
36
|
-
end
|
37
|
-
@count += 1 unless was_inserted
|
38
|
-
value
|
39
|
-
end
|
40
|
-
|
41
|
-
def contains?(value)
|
42
|
-
x = get_hash(value)
|
43
|
-
result = true
|
44
|
-
@k.times do |i|
|
45
|
-
a, b = @hash_params[i]
|
46
|
-
result = false unless @bitset[get_position(a, b, x)]
|
47
|
-
end
|
48
|
-
|
49
|
-
result
|
50
|
-
end
|
51
|
-
alias :includes? :contains?
|
52
|
-
|
53
|
-
private
|
54
|
-
|
55
|
-
def get_position(a, b, val)
|
56
|
-
((a * val + b) % PRIME) % @m
|
57
|
-
end
|
58
|
-
|
59
|
-
def get_hash(value)
|
60
|
-
Digest::MD5.hexdigest(value.to_s).to_i(16)
|
61
|
-
end
|
62
|
-
end
|
63
|
-
end
|