qbloom_filter 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c8384171d7c3a67af223b2425c08ec9fa8ccf4c784d658f5951b37642708a384
4
- data.tar.gz: a50a3260fcd55e0284e85a1b78f4f34dce3f5ff9bf4a56add2cdebc3aecd1d07
3
+ metadata.gz: 1a5c8c820e92cec2133acd196b0b42ff9713b5b3446226db27dfa1d8db7ba28f
4
+ data.tar.gz: f2074640b8228181c145500e39cba8467d7fd7d743c94fecc11c8aefc5a013ba
5
5
  SHA512:
6
- metadata.gz: 9fe8aeae6469952e99be203cbe4c12cf03317b7eb9499a2bde069cf11c980d3f7746d29fcb2853451ae82ff5d244f87491103bd15d9125e445f44bb63bbabbfb
7
- data.tar.gz: 4a43600e93388a498797dcb6706801b105c470602e0449da6bd37faf885ea660f17c3a9a5fcbfede9f1860af9c0cc8fc47a960073ba893e523fca13d4a8e6590
6
+ metadata.gz: c76fb3bea4b43e1ede2ddfcc6570ace55bd7e3e6a1e5fd3f3b6cb1845f082b8f55716345d40b7202c9d6798e61b84619d0e83a2158c050b966be3451cec83a73
7
+ data.tar.gz: bb76e4786858a0b0c86fecbd49e6971cccad8b34aaeca4d802940184ca6b38343b39df76da9bdb37fbfe4e9e478260874b400fc2f1ad31b0c3b015cbd3fbc829
data/README.md CHANGED
@@ -30,7 +30,7 @@ And two parameters can be used to describe the bloom filter:
30
30
  bloom_filter = BloomFilter::Filter.new(1000, 0.001)
31
31
  ```
32
32
 
33
- #### API
33
+ #### Methods
34
34
  __add(value)__ - add item into filter
35
35
 
36
36
  __includes?(value)__ - check if filter includes the value
@@ -39,6 +39,16 @@ __contains?(value)__ - alias of __includes?(value)__
39
39
 
40
40
  __count__ - returns number of inserted items
41
41
 
42
+ ##### Methods for union and intersection several bloom filters
43
+
44
+ __bit_size__ - returns number of bits in the bit array
45
+
46
+ __get_bit(position)__ - returns value of a bit(true/false) in the bit array, rises an error if position is out of range of the bit array
47
+
48
+ __set_bit(position)__ - set a bit to TRUE in the bit array, rises an error if position is out of range of the bit array
49
+
50
+ __clear_bit(position)__ - set a bit to FALSE in the bit array, rises an error if position is out of range of the bit array
51
+
42
52
  ## Contributing
43
53
 
44
54
  Bug reports and pull requests are welcome on GitHub at https://github.com/superedriver/bloom_filter
@@ -5,6 +5,8 @@ require 'digest/md5'
5
5
  module BloomFilter
6
6
  PRIME = 100_000_000_003
7
7
  MAX_HASH_PARAM = 1000
8
+ OUT_OF_RANGE = "Position is out of range"
9
+
8
10
  class Filter
9
11
  attr_reader :count
10
12
 
@@ -19,20 +21,16 @@ module BloomFilter
19
21
 
20
22
  #number of hash functions that minimizes the probability of false positives
21
23
  @k = (Math.log(2) * (@m / capacity)).ceil
22
-
23
- # a, b params for hash functions
24
- @hash_params = []
25
- @k.times { @hash_params.push([rand(1000), rand(1000)]) }
26
24
  end
27
25
 
28
26
  def add(value)
29
27
  x = get_hash(value)
30
28
  was_inserted = true
31
29
  @k.times do |i|
32
- a, b = @hash_params[i]
30
+ a, b = get_hash_params(i)
33
31
  position = get_position(a, b, x)
34
- was_inserted = false unless @bitset[position]
35
- @bitset[position] = true
32
+ was_inserted = false unless self.get_bit(position)
33
+ self.set_bit(position)
36
34
  end
37
35
  @count += 1 unless was_inserted
38
36
  value
@@ -42,14 +40,30 @@ module BloomFilter
42
40
  x = get_hash(value)
43
41
  result = true
44
42
  @k.times do |i|
45
- a, b = @hash_params[i]
46
- result = false unless @bitset[get_position(a, b, x)]
43
+ a, b = get_hash_params(i)
44
+ result = false unless self.get_bit(get_position(a, b, x))
47
45
  end
48
46
 
49
47
  result
50
48
  end
51
49
  alias :includes? :contains?
52
50
 
51
+ def get_bit(position)
52
+ @bitset[position] if is_valid_position(position)
53
+ end
54
+
55
+ def set_bit(position)
56
+ @bitset[position] = true if is_valid_position(position)
57
+ end
58
+
59
+ def clear_bit(position)
60
+ @bitset[position] = false if is_valid_position(position)
61
+ end
62
+
63
+ def bit_size
64
+ @m
65
+ end
66
+
53
67
  private
54
68
 
55
69
  def get_position(a, b, val)
@@ -59,5 +73,14 @@ module BloomFilter
59
73
  def get_hash(value)
60
74
  Digest::MD5.hexdigest(value.to_s).to_i(16)
61
75
  end
76
+
77
+ def is_valid_position(position)
78
+ raise OUT_OF_RANGE if position >= @m
79
+ true
80
+ end
81
+
82
+ def get_hash_params(i)
83
+ return 2*i + 1, 2*i + 2
84
+ end
62
85
  end
63
86
  end
@@ -1,3 +1,3 @@
1
1
  module BloomFilter
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: qbloom_filter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - qaz
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-09-27 00:00:00.000000000 Z
11
+ date: 2020-09-28 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Simple Bloom Filter
14
14
  email: