qbloom_filter 0.1.0 → 0.3.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c8384171d7c3a67af223b2425c08ec9fa8ccf4c784d658f5951b37642708a384
4
- data.tar.gz: a50a3260fcd55e0284e85a1b78f4f34dce3f5ff9bf4a56add2cdebc3aecd1d07
3
+ metadata.gz: d3107815ff8ebf8aa327a4f2cd2cfb80d9f0aefbffd84e428adcfc504295d42c
4
+ data.tar.gz: fb29b08c1f68f5f50a06dfbc3b395c43ff9be36fbe0e2741cecf47c7ccf75f1e
5
5
  SHA512:
6
- metadata.gz: 9fe8aeae6469952e99be203cbe4c12cf03317b7eb9499a2bde069cf11c980d3f7746d29fcb2853451ae82ff5d244f87491103bd15d9125e445f44bb63bbabbfb
7
- data.tar.gz: 4a43600e93388a498797dcb6706801b105c470602e0449da6bd37faf885ea660f17c3a9a5fcbfede9f1860af9c0cc8fc47a960073ba893e523fca13d4a8e6590
6
+ metadata.gz: 2e6ded653a4c765234dd4f38e874efb50de33952fa7faef0ae9c9f97ad0142eabe5bae0c63029f621c9224f456a5bc04d7de12839d28f2748539f7b0d7d53bab
7
+ data.tar.gz: 40670964f95d373b7c4d413ae60ad5369760a34bcf511195b4e31a38fbc46782e76f8b8b9745607c010dfb32330e9803388d8e08c7f5ec95fc7ec30ffd514631
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- qbloom_filter (0.1.0)
4
+ qbloom_filter (0.3.4)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
data/README.md CHANGED
@@ -7,7 +7,7 @@ A Bloom filter is a space-efficient probabilistic data structure
7
7
  Add this line to your application's Gemfile:
8
8
 
9
9
  ```ruby
10
- gem 'bloom_filter'
10
+ gem 'qbloom_filter'
11
11
  ```
12
12
 
13
13
  And then execute:
@@ -16,7 +16,7 @@ And then execute:
16
16
 
17
17
  Or install it yourself as:
18
18
 
19
- $ gem install bloom_filter
19
+ $ gem install qbloom_filter
20
20
 
21
21
  ## Usage
22
22
 
@@ -30,7 +30,7 @@ And two parameters can be used to describe the bloom filter:
30
30
  bloom_filter = BloomFilter::Filter.new(1000, 0.001)
31
31
  ```
32
32
 
33
- #### API
33
+ #### Methods
34
34
  __add(value)__ - add item into filter
35
35
 
36
36
  __includes?(value)__ - check if filter includes the value
@@ -39,7 +39,23 @@ __contains?(value)__ - alias of __includes?(value)__
39
39
 
40
40
  __count__ - returns number of inserted items
41
41
 
42
+ __capacity__ - returns initial capacity
43
+
44
+ __probability__ - returns initial probability
45
+
46
+ __bit_size__ - returns number of bits in the bit array
47
+
48
+ __get_bit(position)__ - returns value of a bit(true/false) in the bit array, rises an error if position is out of range of the bit array
49
+
50
+ __set_bit(position)__ - set a bit to TRUE in the bit array, rises an error if position is out of range of the bit array
51
+
52
+ __clear_bit(position)__ - set a bit to FALSE in the bit array, rises an error if position is out of range of the bit array
53
+
54
+ __union_with(bloom_filter)__ - unions current bloom filter with another one, bloom filters should be the instances of this module and have the same initial params(capacity, probability)
55
+
56
+ __intersect_with(bloom_filter)__ - intersects current bloom filter with another one, bloom filters should be the instances of this module and have the same initial params(capacity, probability)
57
+
42
58
  ## Contributing
43
59
 
44
- Bug reports and pull requests are welcome on GitHub at https://github.com/superedriver/bloom_filter
60
+ Bug reports and pull requests are welcome on GitHub at https://github.com/superedriver/qbloom_filter
45
61
 
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require "bundler/setup"
4
- require "bloom_filter"
4
+ require "qbloom_filter"
5
5
 
6
6
  # You can add fixtures and/or initialization code here to make experimenting
7
7
  # with your gem easier. You can also use a different console, if you like.
@@ -0,0 +1,115 @@
1
+ require "bloom_filter/version"
2
+ require "bitset"
3
+ require 'digest/md5'
4
+
5
+ module BloomFilter
6
+ PRIME = 100_000_000_003
7
+ MAX_HASH_PARAM = 1000
8
+ OUT_OF_RANGE = "Position is out of range"
9
+ DIFFERENT_INITIAL_PARAMS = "Bloom filters have different initial params"
10
+
11
+ class Filter
12
+ attr_reader :count, :capacity, :probability
13
+
14
+ def initialize(capacity = 100, probability = 0.01)
15
+ # amount of inserted elements
16
+ @count = 0
17
+
18
+ # params ob filter, are used for comparison with params of other bloom filters
19
+ @capacity = capacity
20
+ @probability = probability
21
+
22
+ #number of bits in the array
23
+ @m = (-(capacity * Math.log(probability)) / (Math.log(2) ** 2)).ceil
24
+
25
+ @bitset = Bitset.new(@m)
26
+
27
+ #number of hash functions that minimizes the probability of false positives
28
+ @k = (Math.log(2) * (@m / capacity)).ceil
29
+ end
30
+
31
+ def add(value)
32
+ x = get_hash(value)
33
+ was_inserted = true
34
+ @k.times do |i|
35
+ a, b = get_hash_params(i)
36
+ position = get_position(a, b, x)
37
+ was_inserted = false unless self.get_bit(position)
38
+ self.set_bit(position)
39
+ end
40
+ @count += 1 unless was_inserted
41
+ value
42
+ end
43
+
44
+ def contains?(value)
45
+ x = get_hash(value)
46
+ result = true
47
+ @k.times do |i|
48
+ a, b = get_hash_params(i)
49
+ result = false unless self.get_bit(get_position(a, b, x))
50
+ end
51
+
52
+ result
53
+ end
54
+ alias :includes? :contains?
55
+
56
+ def bit_size
57
+ @m
58
+ end
59
+
60
+ def get_bit(position)
61
+ valid_position?(position)
62
+ @bitset[position]
63
+ end
64
+
65
+ def set_bit(position)
66
+ valid_position?(position)
67
+ @bitset[position] = true
68
+ end
69
+
70
+ def clear_bit(position)
71
+ valid_position?(position)
72
+ @bitset[position] = false
73
+ end
74
+
75
+ def union_with(bloom_filter)
76
+ same_params?(bloom_filter)
77
+
78
+ @m.times do |i|
79
+ @bitset[i] = self.get_bit(i) || bloom_filter.get_bit(i)
80
+ end
81
+ end
82
+
83
+ def intersect_with(bloom_filter)
84
+ same_params?(bloom_filter)
85
+
86
+ @m.times do |i|
87
+ @bitset[i] = self.get_bit(i) && bloom_filter.get_bit(i)
88
+ end
89
+ end
90
+
91
+ private
92
+
93
+ def get_position(a, b, val)
94
+ ((a * val + b) % PRIME) % @m
95
+ end
96
+
97
+ def get_hash(value)
98
+ Digest::MD5.hexdigest(value.to_s).to_i(16)
99
+ end
100
+
101
+ def valid_position?(position)
102
+ raise OUT_OF_RANGE if position >= @m
103
+ true
104
+ end
105
+
106
+ def same_params?(bf)
107
+ raise DIFFERENT_INITIAL_PARAMS if self.class != bf.class || bf.capacity != @capacity || bf.probability != @probability
108
+ true
109
+ end
110
+
111
+ def get_hash_params(i)
112
+ return 2*i + 1, 2*i + 2
113
+ end
114
+ end
115
+ end
@@ -1,3 +1,3 @@
1
1
  module BloomFilter
2
- VERSION = "0.1.0"
2
+ VERSION = "0.3.4"
3
3
  end
@@ -1,4 +1,4 @@
1
- require_relative 'lib/bloom_filter/version'
1
+ require_relative 'lib/qbloom_filter/version'
2
2
 
3
3
  Gem::Specification.new do |spec|
4
4
  spec.name = "qbloom_filter"
@@ -8,15 +8,15 @@ Gem::Specification.new do |spec|
8
8
 
9
9
  spec.licenses = ['MIT']
10
10
  spec.summary = %q{Bloom Filter}
11
- spec.description = %q{Simple Bloom Filter}
12
- spec.homepage = "https://github.com/superedriver/bloom-filter"
11
+ spec.description = %q{Bloom Filter with union and intersection}
12
+ spec.homepage = "https://github.com/superedriver/qbloom_filter"
13
13
  spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
14
14
 
15
15
  spec.metadata["allowed_push_host"] = "https://rubygems.org"
16
16
 
17
17
  spec.metadata["homepage_uri"] = spec.homepage
18
- spec.metadata["source_code_uri"] = "https://github.com/superedriver/bloom-filter"
19
- spec.metadata["changelog_uri"] = "https://github.com/superedriver/bloom-filter"
18
+ spec.metadata["source_code_uri"] = "https://github.com/superedriver/qbloom_filter"
19
+ spec.metadata["changelog_uri"] = "https://github.com/superedriver/qbloom_filter"
20
20
 
21
21
  # Specify which files should be added to the gem when it is released.
22
22
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
metadata CHANGED
@@ -1,16 +1,16 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: qbloom_filter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.3.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - qaz
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-09-27 00:00:00.000000000 Z
11
+ date: 2020-09-29 00:00:00.000000000 Z
12
12
  dependencies: []
13
- description: Simple Bloom Filter
13
+ description: Bloom Filter with union and intersection
14
14
  email:
15
15
  - qaz@qaz.qaz
16
16
  executables: []
@@ -30,17 +30,17 @@ files:
30
30
  - bin/rake
31
31
  - bin/rspec
32
32
  - bin/setup
33
- - bloom_filter.gemspec
34
- - lib/bloom_filter.rb
35
- - lib/bloom_filter/version.rb
36
- homepage: https://github.com/superedriver/bloom-filter
33
+ - lib/qbloom_filter.rb
34
+ - lib/qbloom_filter/version.rb
35
+ - qbloom_filter.gemspec
36
+ homepage: https://github.com/superedriver/qbloom_filter
37
37
  licenses:
38
38
  - MIT
39
39
  metadata:
40
40
  allowed_push_host: https://rubygems.org
41
- homepage_uri: https://github.com/superedriver/bloom-filter
42
- source_code_uri: https://github.com/superedriver/bloom-filter
43
- changelog_uri: https://github.com/superedriver/bloom-filter
41
+ homepage_uri: https://github.com/superedriver/qbloom_filter
42
+ source_code_uri: https://github.com/superedriver/qbloom_filter
43
+ changelog_uri: https://github.com/superedriver/qbloom_filter
44
44
  post_install_message:
45
45
  rdoc_options: []
46
46
  require_paths:
@@ -1,63 +0,0 @@
1
- require "bloom_filter/version"
2
- require "bitset"
3
- require 'digest/md5'
4
-
5
- module BloomFilter
6
- PRIME = 100_000_000_003
7
- MAX_HASH_PARAM = 1000
8
- class Filter
9
- attr_reader :count
10
-
11
- def initialize(capacity = 100, probability = 0.01)
12
- # amount of inserted elements
13
- @count = 0
14
-
15
- #number of bits in the array
16
- @m = (-(capacity * Math.log(probability)) / (Math.log(2) ** 2)).ceil
17
-
18
- @bitset = Bitset.new(@m)
19
-
20
- #number of hash functions that minimizes the probability of false positives
21
- @k = (Math.log(2) * (@m / capacity)).ceil
22
-
23
- # a, b params for hash functions
24
- @hash_params = []
25
- @k.times { @hash_params.push([rand(1000), rand(1000)]) }
26
- end
27
-
28
- def add(value)
29
- x = get_hash(value)
30
- was_inserted = true
31
- @k.times do |i|
32
- a, b = @hash_params[i]
33
- position = get_position(a, b, x)
34
- was_inserted = false unless @bitset[position]
35
- @bitset[position] = true
36
- end
37
- @count += 1 unless was_inserted
38
- value
39
- end
40
-
41
- def contains?(value)
42
- x = get_hash(value)
43
- result = true
44
- @k.times do |i|
45
- a, b = @hash_params[i]
46
- result = false unless @bitset[get_position(a, b, x)]
47
- end
48
-
49
- result
50
- end
51
- alias :includes? :contains?
52
-
53
- private
54
-
55
- def get_position(a, b, val)
56
- ((a * val + b) % PRIME) % @m
57
- end
58
-
59
- def get_hash(value)
60
- Digest::MD5.hexdigest(value.to_s).to_i(16)
61
- end
62
- end
63
- end