qbloom_filter 0.1.0 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c8384171d7c3a67af223b2425c08ec9fa8ccf4c784d658f5951b37642708a384
4
- data.tar.gz: a50a3260fcd55e0284e85a1b78f4f34dce3f5ff9bf4a56add2cdebc3aecd1d07
3
+ metadata.gz: d3107815ff8ebf8aa327a4f2cd2cfb80d9f0aefbffd84e428adcfc504295d42c
4
+ data.tar.gz: fb29b08c1f68f5f50a06dfbc3b395c43ff9be36fbe0e2741cecf47c7ccf75f1e
5
5
  SHA512:
6
- metadata.gz: 9fe8aeae6469952e99be203cbe4c12cf03317b7eb9499a2bde069cf11c980d3f7746d29fcb2853451ae82ff5d244f87491103bd15d9125e445f44bb63bbabbfb
7
- data.tar.gz: 4a43600e93388a498797dcb6706801b105c470602e0449da6bd37faf885ea660f17c3a9a5fcbfede9f1860af9c0cc8fc47a960073ba893e523fca13d4a8e6590
6
+ metadata.gz: 2e6ded653a4c765234dd4f38e874efb50de33952fa7faef0ae9c9f97ad0142eabe5bae0c63029f621c9224f456a5bc04d7de12839d28f2748539f7b0d7d53bab
7
+ data.tar.gz: 40670964f95d373b7c4d413ae60ad5369760a34bcf511195b4e31a38fbc46782e76f8b8b9745607c010dfb32330e9803388d8e08c7f5ec95fc7ec30ffd514631
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- qbloom_filter (0.1.0)
4
+ qbloom_filter (0.3.4)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
data/README.md CHANGED
@@ -7,7 +7,7 @@ A Bloom filter is a space-efficient probabilistic data structure
7
7
  Add this line to your application's Gemfile:
8
8
 
9
9
  ```ruby
10
- gem 'bloom_filter'
10
+ gem 'qbloom_filter'
11
11
  ```
12
12
 
13
13
  And then execute:
@@ -16,7 +16,7 @@ And then execute:
16
16
 
17
17
  Or install it yourself as:
18
18
 
19
- $ gem install bloom_filter
19
+ $ gem install qbloom_filter
20
20
 
21
21
  ## Usage
22
22
 
@@ -30,7 +30,7 @@ And two parameters can be used to describe the bloom filter:
30
30
  bloom_filter = BloomFilter::Filter.new(1000, 0.001)
31
31
  ```
32
32
 
33
- #### API
33
+ #### Methods
34
34
  __add(value)__ - add item into filter
35
35
 
36
36
  __includes?(value)__ - check if filter includes the value
@@ -39,7 +39,23 @@ __contains?(value)__ - alias of __includes?(value)__
39
39
 
40
40
  __count__ - returns number of inserted items
41
41
 
42
+ __capacity__ - returns initial capacity
43
+
44
+ __probability__ - returns initial probability
45
+
46
+ __bit_size__ - returns number of bits in the bit array
47
+
48
+ __get_bit(position)__ - returns value of a bit(true/false) in the bit array, rises an error if position is out of range of the bit array
49
+
50
+ __set_bit(position)__ - set a bit to TRUE in the bit array, rises an error if position is out of range of the bit array
51
+
52
+ __clear_bit(position)__ - set a bit to FALSE in the bit array, rises an error if position is out of range of the bit array
53
+
54
+ __union_with(bloom_filter)__ - unions current bloom filter with another one, bloom filters should be the instances of this module and have the same initial params(capacity, probability)
55
+
56
+ __intersect_with(bloom_filter)__ - intersects current bloom filter with another one, bloom filters should be the instances of this module and have the same initial params(capacity, probability)
57
+
42
58
  ## Contributing
43
59
 
44
- Bug reports and pull requests are welcome on GitHub at https://github.com/superedriver/bloom_filter
60
+ Bug reports and pull requests are welcome on GitHub at https://github.com/superedriver/qbloom_filter
45
61
 
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require "bundler/setup"
4
- require "bloom_filter"
4
+ require "qbloom_filter"
5
5
 
6
6
  # You can add fixtures and/or initialization code here to make experimenting
7
7
  # with your gem easier. You can also use a different console, if you like.
@@ -0,0 +1,115 @@
1
+ require "bloom_filter/version"
2
+ require "bitset"
3
+ require 'digest/md5'
4
+
5
+ module BloomFilter
6
+ PRIME = 100_000_000_003
7
+ MAX_HASH_PARAM = 1000
8
+ OUT_OF_RANGE = "Position is out of range"
9
+ DIFFERENT_INITIAL_PARAMS = "Bloom filters have different initial params"
10
+
11
+ class Filter
12
+ attr_reader :count, :capacity, :probability
13
+
14
+ def initialize(capacity = 100, probability = 0.01)
15
+ # amount of inserted elements
16
+ @count = 0
17
+
18
+ # params ob filter, are used for comparison with params of other bloom filters
19
+ @capacity = capacity
20
+ @probability = probability
21
+
22
+ #number of bits in the array
23
+ @m = (-(capacity * Math.log(probability)) / (Math.log(2) ** 2)).ceil
24
+
25
+ @bitset = Bitset.new(@m)
26
+
27
+ #number of hash functions that minimizes the probability of false positives
28
+ @k = (Math.log(2) * (@m / capacity)).ceil
29
+ end
30
+
31
+ def add(value)
32
+ x = get_hash(value)
33
+ was_inserted = true
34
+ @k.times do |i|
35
+ a, b = get_hash_params(i)
36
+ position = get_position(a, b, x)
37
+ was_inserted = false unless self.get_bit(position)
38
+ self.set_bit(position)
39
+ end
40
+ @count += 1 unless was_inserted
41
+ value
42
+ end
43
+
44
+ def contains?(value)
45
+ x = get_hash(value)
46
+ result = true
47
+ @k.times do |i|
48
+ a, b = get_hash_params(i)
49
+ result = false unless self.get_bit(get_position(a, b, x))
50
+ end
51
+
52
+ result
53
+ end
54
+ alias :includes? :contains?
55
+
56
+ def bit_size
57
+ @m
58
+ end
59
+
60
+ def get_bit(position)
61
+ valid_position?(position)
62
+ @bitset[position]
63
+ end
64
+
65
+ def set_bit(position)
66
+ valid_position?(position)
67
+ @bitset[position] = true
68
+ end
69
+
70
+ def clear_bit(position)
71
+ valid_position?(position)
72
+ @bitset[position] = false
73
+ end
74
+
75
+ def union_with(bloom_filter)
76
+ same_params?(bloom_filter)
77
+
78
+ @m.times do |i|
79
+ @bitset[i] = self.get_bit(i) || bloom_filter.get_bit(i)
80
+ end
81
+ end
82
+
83
+ def intersect_with(bloom_filter)
84
+ same_params?(bloom_filter)
85
+
86
+ @m.times do |i|
87
+ @bitset[i] = self.get_bit(i) && bloom_filter.get_bit(i)
88
+ end
89
+ end
90
+
91
+ private
92
+
93
+ def get_position(a, b, val)
94
+ ((a * val + b) % PRIME) % @m
95
+ end
96
+
97
+ def get_hash(value)
98
+ Digest::MD5.hexdigest(value.to_s).to_i(16)
99
+ end
100
+
101
+ def valid_position?(position)
102
+ raise OUT_OF_RANGE if position >= @m
103
+ true
104
+ end
105
+
106
+ def same_params?(bf)
107
+ raise DIFFERENT_INITIAL_PARAMS if self.class != bf.class || bf.capacity != @capacity || bf.probability != @probability
108
+ true
109
+ end
110
+
111
+ def get_hash_params(i)
112
+ return 2*i + 1, 2*i + 2
113
+ end
114
+ end
115
+ end
@@ -1,3 +1,3 @@
1
1
  module BloomFilter
2
- VERSION = "0.1.0"
2
+ VERSION = "0.3.4"
3
3
  end
@@ -1,4 +1,4 @@
1
- require_relative 'lib/bloom_filter/version'
1
+ require_relative 'lib/qbloom_filter/version'
2
2
 
3
3
  Gem::Specification.new do |spec|
4
4
  spec.name = "qbloom_filter"
@@ -8,15 +8,15 @@ Gem::Specification.new do |spec|
8
8
 
9
9
  spec.licenses = ['MIT']
10
10
  spec.summary = %q{Bloom Filter}
11
- spec.description = %q{Simple Bloom Filter}
12
- spec.homepage = "https://github.com/superedriver/bloom-filter"
11
+ spec.description = %q{Bloom Filter with union and intersection}
12
+ spec.homepage = "https://github.com/superedriver/qbloom_filter"
13
13
  spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
14
14
 
15
15
  spec.metadata["allowed_push_host"] = "https://rubygems.org"
16
16
 
17
17
  spec.metadata["homepage_uri"] = spec.homepage
18
- spec.metadata["source_code_uri"] = "https://github.com/superedriver/bloom-filter"
19
- spec.metadata["changelog_uri"] = "https://github.com/superedriver/bloom-filter"
18
+ spec.metadata["source_code_uri"] = "https://github.com/superedriver/qbloom_filter"
19
+ spec.metadata["changelog_uri"] = "https://github.com/superedriver/qbloom_filter"
20
20
 
21
21
  # Specify which files should be added to the gem when it is released.
22
22
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
metadata CHANGED
@@ -1,16 +1,16 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: qbloom_filter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.3.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - qaz
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-09-27 00:00:00.000000000 Z
11
+ date: 2020-09-29 00:00:00.000000000 Z
12
12
  dependencies: []
13
- description: Simple Bloom Filter
13
+ description: Bloom Filter with union and intersection
14
14
  email:
15
15
  - qaz@qaz.qaz
16
16
  executables: []
@@ -30,17 +30,17 @@ files:
30
30
  - bin/rake
31
31
  - bin/rspec
32
32
  - bin/setup
33
- - bloom_filter.gemspec
34
- - lib/bloom_filter.rb
35
- - lib/bloom_filter/version.rb
36
- homepage: https://github.com/superedriver/bloom-filter
33
+ - lib/qbloom_filter.rb
34
+ - lib/qbloom_filter/version.rb
35
+ - qbloom_filter.gemspec
36
+ homepage: https://github.com/superedriver/qbloom_filter
37
37
  licenses:
38
38
  - MIT
39
39
  metadata:
40
40
  allowed_push_host: https://rubygems.org
41
- homepage_uri: https://github.com/superedriver/bloom-filter
42
- source_code_uri: https://github.com/superedriver/bloom-filter
43
- changelog_uri: https://github.com/superedriver/bloom-filter
41
+ homepage_uri: https://github.com/superedriver/qbloom_filter
42
+ source_code_uri: https://github.com/superedriver/qbloom_filter
43
+ changelog_uri: https://github.com/superedriver/qbloom_filter
44
44
  post_install_message:
45
45
  rdoc_options: []
46
46
  require_paths:
@@ -1,63 +0,0 @@
1
- require "bloom_filter/version"
2
- require "bitset"
3
- require 'digest/md5'
4
-
5
- module BloomFilter
6
- PRIME = 100_000_000_003
7
- MAX_HASH_PARAM = 1000
8
- class Filter
9
- attr_reader :count
10
-
11
- def initialize(capacity = 100, probability = 0.01)
12
- # amount of inserted elements
13
- @count = 0
14
-
15
- #number of bits in the array
16
- @m = (-(capacity * Math.log(probability)) / (Math.log(2) ** 2)).ceil
17
-
18
- @bitset = Bitset.new(@m)
19
-
20
- #number of hash functions that minimizes the probability of false positives
21
- @k = (Math.log(2) * (@m / capacity)).ceil
22
-
23
- # a, b params for hash functions
24
- @hash_params = []
25
- @k.times { @hash_params.push([rand(1000), rand(1000)]) }
26
- end
27
-
28
- def add(value)
29
- x = get_hash(value)
30
- was_inserted = true
31
- @k.times do |i|
32
- a, b = @hash_params[i]
33
- position = get_position(a, b, x)
34
- was_inserted = false unless @bitset[position]
35
- @bitset[position] = true
36
- end
37
- @count += 1 unless was_inserted
38
- value
39
- end
40
-
41
- def contains?(value)
42
- x = get_hash(value)
43
- result = true
44
- @k.times do |i|
45
- a, b = @hash_params[i]
46
- result = false unless @bitset[get_position(a, b, x)]
47
- end
48
-
49
- result
50
- end
51
- alias :includes? :contains?
52
-
53
- private
54
-
55
- def get_position(a, b, val)
56
- ((a * val + b) % PRIME) % @m
57
- end
58
-
59
- def get_hash(value)
60
- Digest::MD5.hexdigest(value.to_s).to_i(16)
61
- end
62
- end
63
- end