qbloom_filter 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c8384171d7c3a67af223b2425c08ec9fa8ccf4c784d658f5951b37642708a384
4
- data.tar.gz: a50a3260fcd55e0284e85a1b78f4f34dce3f5ff9bf4a56add2cdebc3aecd1d07
3
+ metadata.gz: 1a5c8c820e92cec2133acd196b0b42ff9713b5b3446226db27dfa1d8db7ba28f
4
+ data.tar.gz: f2074640b8228181c145500e39cba8467d7fd7d743c94fecc11c8aefc5a013ba
5
5
  SHA512:
6
- metadata.gz: 9fe8aeae6469952e99be203cbe4c12cf03317b7eb9499a2bde069cf11c980d3f7746d29fcb2853451ae82ff5d244f87491103bd15d9125e445f44bb63bbabbfb
7
- data.tar.gz: 4a43600e93388a498797dcb6706801b105c470602e0449da6bd37faf885ea660f17c3a9a5fcbfede9f1860af9c0cc8fc47a960073ba893e523fca13d4a8e6590
6
+ metadata.gz: c76fb3bea4b43e1ede2ddfcc6570ace55bd7e3e6a1e5fd3f3b6cb1845f082b8f55716345d40b7202c9d6798e61b84619d0e83a2158c050b966be3451cec83a73
7
+ data.tar.gz: bb76e4786858a0b0c86fecbd49e6971cccad8b34aaeca4d802940184ca6b38343b39df76da9bdb37fbfe4e9e478260874b400fc2f1ad31b0c3b015cbd3fbc829
data/README.md CHANGED
@@ -30,7 +30,7 @@ And two parameters can be used to describe the bloom filter:
30
30
  bloom_filter = BloomFilter::Filter.new(1000, 0.001)
31
31
  ```
32
32
 
33
- #### API
33
+ #### Methods
34
34
  __add(value)__ - add item into filter
35
35
 
36
36
  __includes?(value)__ - check if filter includes the value
@@ -39,6 +39,16 @@ __contains?(value)__ - alias of __includes?(value)__
39
39
 
40
40
  __count__ - returns number of inserted items
41
41
 
42
+ ##### Methods for union and intersection several bloom filters
43
+
44
+ __bit_size__ - returns number of bits in the bit array
45
+
46
+ __get_bit(position)__ - returns value of a bit(true/false) in the bit array, rises an error if position is out of range of the bit array
47
+
48
+ __set_bit(position)__ - set a bit to TRUE in the bit array, rises an error if position is out of range of the bit array
49
+
50
+ __clear_bit(position)__ - set a bit to FALSE in the bit array, rises an error if position is out of range of the bit array
51
+
42
52
  ## Contributing
43
53
 
44
54
  Bug reports and pull requests are welcome on GitHub at https://github.com/superedriver/bloom_filter
@@ -5,6 +5,8 @@ require 'digest/md5'
5
5
  module BloomFilter
6
6
  PRIME = 100_000_000_003
7
7
  MAX_HASH_PARAM = 1000
8
+ OUT_OF_RANGE = "Position is out of range"
9
+
8
10
  class Filter
9
11
  attr_reader :count
10
12
 
@@ -19,20 +21,16 @@ module BloomFilter
19
21
 
20
22
  #number of hash functions that minimizes the probability of false positives
21
23
  @k = (Math.log(2) * (@m / capacity)).ceil
22
-
23
- # a, b params for hash functions
24
- @hash_params = []
25
- @k.times { @hash_params.push([rand(1000), rand(1000)]) }
26
24
  end
27
25
 
28
26
  def add(value)
29
27
  x = get_hash(value)
30
28
  was_inserted = true
31
29
  @k.times do |i|
32
- a, b = @hash_params[i]
30
+ a, b = get_hash_params(i)
33
31
  position = get_position(a, b, x)
34
- was_inserted = false unless @bitset[position]
35
- @bitset[position] = true
32
+ was_inserted = false unless self.get_bit(position)
33
+ self.set_bit(position)
36
34
  end
37
35
  @count += 1 unless was_inserted
38
36
  value
@@ -42,14 +40,30 @@ module BloomFilter
42
40
  x = get_hash(value)
43
41
  result = true
44
42
  @k.times do |i|
45
- a, b = @hash_params[i]
46
- result = false unless @bitset[get_position(a, b, x)]
43
+ a, b = get_hash_params(i)
44
+ result = false unless self.get_bit(get_position(a, b, x))
47
45
  end
48
46
 
49
47
  result
50
48
  end
51
49
  alias :includes? :contains?
52
50
 
51
+ def get_bit(position)
52
+ @bitset[position] if is_valid_position(position)
53
+ end
54
+
55
+ def set_bit(position)
56
+ @bitset[position] = true if is_valid_position(position)
57
+ end
58
+
59
+ def clear_bit(position)
60
+ @bitset[position] = false if is_valid_position(position)
61
+ end
62
+
63
+ def bit_size
64
+ @m
65
+ end
66
+
53
67
  private
54
68
 
55
69
  def get_position(a, b, val)
@@ -59,5 +73,14 @@ module BloomFilter
59
73
  def get_hash(value)
60
74
  Digest::MD5.hexdigest(value.to_s).to_i(16)
61
75
  end
76
+
77
+ def is_valid_position(position)
78
+ raise OUT_OF_RANGE if position >= @m
79
+ true
80
+ end
81
+
82
+ def get_hash_params(i)
83
+ return 2*i + 1, 2*i + 2
84
+ end
62
85
  end
63
86
  end
@@ -1,3 +1,3 @@
1
1
  module BloomFilter
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: qbloom_filter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - qaz
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-09-27 00:00:00.000000000 Z
11
+ date: 2020-09-28 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Simple Bloom Filter
14
14
  email: