bloomfilter-rb 2.1.1 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: f6473c63de973faec078172e7d3817280d448b8b1d3dd963e44a300a2aff60b2
4
+ data.tar.gz: c92fedd09bcea80a7ff93bff4ccb7a0bfe87b2e55af2d714b96193a4d5bbc870
5
+ SHA512:
6
+ metadata.gz: 5bac15299e3a82b183c24095771ff3e302e2f39ffefbeb20185118e37fe1a50170dd4675043761695dd963f3b93764ac16629ccfe22c8b5b71c6b1e14c75e28b
7
+ data.tar.gz: 86b59a43ae46eac06d8392fb772b0f661531e31c697907775898dd02a67b7c79f020e99b987b6fb5319e7b4636949101e6a5911c24712e8e7a25bbb22dc59ceb
data/.gitignore CHANGED
@@ -1,4 +1,10 @@
1
1
  *.o
2
2
  *.bundle
3
3
  *.swp
4
+ .rvmrc
5
+ .DS_Store
4
6
  ext/Makefile
7
+ lib/cbloomfilter.so
8
+ tmp
9
+ Gemfile.lock
10
+ pkg
data/Gemfile CHANGED
@@ -1,3 +1,3 @@
1
1
  source "http://rubygems.org"
2
2
 
3
- gemspec
3
+ gemspec
data/README.md CHANGED
@@ -23,26 +23,28 @@ Performance of the Bloom filter depends on a number of variables:
23
23
 
24
24
  MRI/C implementation which creates an in-memory filter which can be saved and reloaded from disk.
25
25
 
26
- require 'bloomfilter'
26
+ ```ruby
27
+ require 'bloomfilter-rb'
27
28
 
28
- bf = BloomFilter::Native.new(:size => 100, :hashes => 2, :seed => 1, :bucket => 3, :raise => false)
29
- bf.insert("test")
30
- bf.include?("test") # => true
31
- bf.include?("blah") # => false
29
+ bf = BloomFilter::Native.new(:size => 100, :hashes => 2, :seed => 1, :bucket => 3, :raise => false)
30
+ bf.insert("test")
31
+ bf.include?("test") # => true
32
+ bf.include?("blah") # => false
32
33
 
33
- bf.delete("test")
34
- bf.include?("test") # => false
34
+ bf.delete("test")
35
+ bf.include?("test") # => false
35
36
 
36
- # Hash with a bloom filter!
37
- bf["test2"] = "bar"
38
- bf["test2"] # => true
39
- bf["test3"] # => false
37
+ # Hash with a bloom filter!
38
+ bf["test2"] = "bar"
39
+ bf["test2"] # => true
40
+ bf["test3"] # => false
40
41
 
41
- bf.stats
42
- Number of filter bits (m): 10
43
- Number of filter elements (n): 2
44
- Number of filter hashes (k) : 2
45
- Predicted false positive rate = 10.87%
42
+ bf.stats
43
+ # => Number of filter bits (m): 10
44
+ # => Number of filter elements (n): 2
45
+ # => Number of filter hashes (k) : 2
46
+ # => Predicted false positive rate = 10.87%
47
+ ```
46
48
 
47
49
  ***
48
50
 
@@ -50,14 +52,16 @@ MRI/C implementation which creates an in-memory filter which can be saved and re
50
52
 
51
53
  Uses [getbit](http://redis.io/commands/getbit)/[setbit](http://redis.io/commands/setbit) on Redis strings - efficient, fast, can be shared by multiple/concurrent processes.
52
54
 
53
- bf = BloomFilter::Redis.new
55
+ ```ruby
56
+ bf = BloomFilter::Redis.new
54
57
 
55
- bf.insert('test')
56
- bf.include?('test') # => true
57
- bf.include?('blah') # => false
58
+ bf.insert('test')
59
+ bf.include?('test') # => true
60
+ bf.include?('blah') # => false
58
61
 
59
- bf.delete('test')
60
- bf.include?('test') # => false
62
+ bf.delete('test')
63
+ bf.include?('test') # => false
64
+ ```
61
65
 
62
66
  ### Memory footprint
63
67
 
@@ -67,16 +71,18 @@ Uses [getbit](http://redis.io/commands/getbit)/[setbit](http://redis.io/commands
67
71
 
68
72
  ***
69
73
 
70
- ## Redis-backed counting bloom filter with TTL's
74
+ ## Redis-backed counting bloom filter with TTLs
71
75
  Uses regular Redis get/set counters to implement a counting filter with optional TTL expiry. Because each "bit" requires its own key in Redis, you do incur a much larger memory overhead.
72
76
 
73
- bf = BloomFilter::CountingRedis.new(:ttl => 2)
77
+ ```ruby
78
+ bf = BloomFilter::CountingRedis.new(:ttl => 2)
74
79
 
75
- bf.insert('test')
76
- bf.include?('test') # => true
80
+ bf.insert('test')
81
+ bf.include?('test') # => true
77
82
 
78
- sleep(2)
79
- bf.include?('test') # => false
83
+ sleep(2)
84
+ bf.include?('test') # => false
85
+ ```
80
86
 
81
87
  ## Credits
82
88
 
@@ -84,4 +90,4 @@ Tatsuya Mori <valdzone@gmail.com> (Original C implementation: http://vald.x0.com
84
90
 
85
91
  ## License
86
92
 
87
- (MIT License) - Copyright (c) 2011 Ilya Grigorik
93
+ MIT License - Copyright (c) 2011 Ilya Grigorik
data/Rakefile CHANGED
@@ -1,9 +1,11 @@
1
+ require 'bundler/gem_tasks'
1
2
  require 'rake'
2
3
  require 'rspec'
3
4
  require 'rspec/core/rake_task'
4
5
  require 'rake/extensiontask'
5
- require 'bundler'
6
6
 
7
7
  Bundler::GemHelper.install_tasks
8
+ Rake::ExtensionTask.new('cbloomfilter')
8
9
  RSpec::Core::RakeTask.new(:spec)
9
- Rake::ExtensionTask.new('cbloomfilter')
10
+ Rake::Task[:spec].prerequisites << :clean
11
+ Rake::Task[:spec].prerequisites << :compile
@@ -8,14 +8,15 @@ Gem::Specification.new do |s|
8
8
  s.platform = Gem::Platform::RUBY
9
9
  s.authors = ["Ilya Grigorik", "Tatsuya Mori"]
10
10
  s.email = ["ilya@igvita.com"]
11
- s.homepage = "http://github.com/igrigorik/bloomfilter"
11
+ s.homepage = "http://github.com/igrigorik/bloomfilter-rb"
12
12
  s.summary = "Counting Bloom Filter implemented in Ruby"
13
13
  s.description = s.summary
14
14
  s.rubyforge_project = "bloomfilter-rb"
15
15
 
16
- s.add_dependency "redis"
17
- s.add_development_dependency "rspec"
16
+ s.add_development_dependency "redis"
17
+ s.add_development_dependency "rspec", ">= 3"
18
18
  s.add_development_dependency "rake"
19
+ s.add_development_dependency "rake-compiler"
19
20
 
20
21
  s.extensions = ["ext/cbloomfilter/extconf.rb"]
21
22
 
@@ -23,4 +24,4 @@ Gem::Specification.new do |s|
23
24
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
24
25
  s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
25
26
  s.require_paths = ["lib"]
26
- end
27
+ end
@@ -10,8 +10,8 @@ class BloomFilter
10
10
  def initialize(max_entries, num_hashes, seed)
11
11
  @num_hashes = num_hashes
12
12
  @size = max_entries.to_i
13
- @bitmap = BitSet.new(@size)
14
- @__mask = BitSet.new(@size)
13
+ @bitmap = Bitset.new(@size)
14
+ @__mask = Bitset.new(@size)
15
15
  @seed = seed
16
16
  end
17
17
 
@@ -41,7 +41,7 @@ def main
41
41
  while line = ARGF.gets
42
42
  data = line.chop
43
43
 
44
- if bf.new_entry?(data)
44
+ if bf.new?(data)
45
45
  num += 1
46
46
  bf.insert(data)
47
47
  end
@@ -19,7 +19,6 @@ struct BloomFilter {
19
19
  int k; /* # of hash functions */
20
20
  int s; /* # seed of hash functions */
21
21
  int r; /* # raise on bucket overflow? */
22
- int num_set; /* # of set bits */
23
22
  unsigned char *ptr; /* bits data */
24
23
  int bytes; /* size of byte data */
25
24
  };
@@ -37,8 +36,13 @@ void bucket_unset(struct BloomFilter *bf, int index) {
37
36
  if ((c & mask) == 0) {
38
37
  // do nothing
39
38
  } else {
40
- bf->ptr[byte_offset] -= (1 << bit_offset) & ((1 << 8) - 1);
41
- bf->ptr[byte_offset + 1] -= ((1 << bit_offset) & ((1 << 16) - 1)) >> 8;
39
+ // reduce the counter: 11 00 => 10 00 (suppose bf->b is 2)
40
+ c -= (1 << bit_offset) & ((1 << 8) -1);
41
+ // shift the bitmap right by 1 bit: 10 00 => 01 00
42
+ c = (~mask & c) | ((c & mask) >> (bit_offset + 1) << bit_offset);
43
+
44
+ bf->ptr[byte_offset] = c & ((1 << 8) - 1);
45
+ bf->ptr[byte_offset + 1] = (c & ((1 << 16) - 1)) >> 8;
42
46
  }
43
47
 
44
48
  }
@@ -52,8 +56,9 @@ void bucket_set(struct BloomFilter *bf, int index) {
52
56
  if ((c & mask) == mask) {
53
57
  if (bf->r == 1) rb_raise(rb_eRuntimeError, "bucket got filled up");
54
58
  } else {
55
- bf->ptr[byte_offset] += (1 << bit_offset) & ((1 << 8) - 1);
56
- bf->ptr[byte_offset + 1] += ((1 << bit_offset) & ((1 << 16) - 1)) >> 8;
59
+ c = c + ((1 << bit_offset) & ((1 << 8) -1)) | c;
60
+ bf->ptr[byte_offset] = c & ((1 << 8) - 1);
61
+ bf->ptr[byte_offset + 1] = (c & ((1 << 16) - 1)) >> 8;
57
62
  }
58
63
  }
59
64
 
@@ -127,7 +132,6 @@ static VALUE bf_s_new(int argc, VALUE *argv, VALUE self) {
127
132
  bf->k = k;
128
133
  bf->s = s;
129
134
  bf->r = r;
130
- bf->num_set = 0;
131
135
 
132
136
  bf->bytes = ((m * b) + 15) / 8;
133
137
  bf->ptr = ALLOC_N(unsigned char, bf->bytes);
@@ -170,10 +174,22 @@ static VALUE bf_r(VALUE self) {
170
174
  return bf->r == 0 ? Qfalse : Qtrue;
171
175
  }
172
176
 
173
- static VALUE bf_num_set(VALUE self) {
177
+ static VALUE bf_s(VALUE self) {
178
+ struct BloomFilter *bf;
179
+ Data_Get_Struct(self, struct BloomFilter, bf);
180
+ return INT2FIX(bf->s);
181
+ }
182
+
183
+ static VALUE bf_set_bits(VALUE self){
174
184
  struct BloomFilter *bf;
185
+ int i,j,count = 0;
175
186
  Data_Get_Struct(self, struct BloomFilter, bf);
176
- return INT2FIX(bf->num_set);
187
+ for (i = 0; i < bf->bytes; i++) {
188
+ for (j = 0; j < 8; j++) {
189
+ count += (bf->ptr[i] >> j) & 1;
190
+ }
191
+ }
192
+ return INT2FIX(count);
177
193
  }
178
194
 
179
195
  static VALUE bf_insert(VALUE self, VALUE key) {
@@ -203,21 +219,64 @@ static VALUE bf_insert(VALUE self, VALUE key) {
203
219
  bucket_set(bf, index);
204
220
  }
205
221
 
206
- bf->num_set += 1;
207
222
  return Qnil;
208
223
  }
209
224
 
210
225
  static VALUE bf_merge(VALUE self, VALUE other) {
211
226
  struct BloomFilter *bf, *target;
227
+ int i;
212
228
  Data_Get_Struct(self, struct BloomFilter, bf);
213
229
  Data_Get_Struct(other, struct BloomFilter, target);
214
- int i;
215
230
  for (i = 0; i < bf->bytes; i++) {
216
231
  bf->ptr[i] |= target->ptr[i];
217
232
  }
218
233
  return Qnil;
219
234
  }
220
235
 
236
+ static VALUE bf_and(VALUE self, VALUE other) {
237
+ struct BloomFilter *bf, *bf_other, *target;
238
+ VALUE klass, obj, args[5];
239
+ int i;
240
+
241
+ Data_Get_Struct(self, struct BloomFilter, bf);
242
+ Data_Get_Struct(other, struct BloomFilter, bf_other);
243
+ args[0] = INT2FIX(bf->m);
244
+ args[1] = INT2FIX(bf->k);
245
+ args[2] = INT2FIX(bf->s);
246
+ args[3] = INT2FIX(bf->b);
247
+ args[4] = INT2FIX(bf->r);
248
+ klass = rb_funcall(self,rb_intern("class"),0);
249
+ obj = bf_s_new(5,args,klass);
250
+ Data_Get_Struct(obj, struct BloomFilter, target);
251
+ for (i = 0; i < bf->bytes; i++){
252
+ target->ptr[i] = bf->ptr[i] & bf_other->ptr[i];
253
+ }
254
+
255
+ return obj;
256
+ }
257
+
258
+ static VALUE bf_or(VALUE self, VALUE other) {
259
+ struct BloomFilter *bf, *bf_other, *target;
260
+ VALUE klass, obj, args[5];
261
+ int i;
262
+
263
+ Data_Get_Struct(self, struct BloomFilter, bf);
264
+ Data_Get_Struct(other, struct BloomFilter, bf_other);
265
+ args[0] = INT2FIX(bf->m);
266
+ args[1] = INT2FIX(bf->k);
267
+ args[2] = INT2FIX(bf->s);
268
+ args[3] = INT2FIX(bf->b);
269
+ args[4] = INT2FIX(bf->r);
270
+ klass = rb_funcall(self,rb_intern("class"),0);
271
+ obj = bf_s_new(5,args,klass);
272
+ Data_Get_Struct(obj, struct BloomFilter, target);
273
+ for (i = 0; i < bf->bytes; i++){
274
+ target->ptr[i] = bf->ptr[i] | bf_other->ptr[i];
275
+ }
276
+
277
+ return obj;
278
+ }
279
+
221
280
  static VALUE bf_delete(VALUE self, VALUE key) {
222
281
  int index, seed;
223
282
  int i, len, m, k, s;
@@ -245,7 +304,6 @@ static VALUE bf_delete(VALUE self, VALUE key) {
245
304
  bucket_unset(bf, index);
246
305
  }
247
306
 
248
- bf->num_set += 1;
249
307
  return Qnil;
250
308
  }
251
309
 
@@ -309,13 +367,11 @@ static VALUE bf_bitmap(VALUE self) {
309
367
  struct BloomFilter *bf;
310
368
  Data_Get_Struct(self, struct BloomFilter, bf);
311
369
 
312
- VALUE str = rb_str_new(0, bf->m);
370
+ VALUE str = rb_str_new(0, bf->bytes);
313
371
  unsigned char* ptr = (unsigned char *) RSTRING_PTR(str);
314
372
 
315
- int i;
316
- for (i = 0; i < bf->m; i++)
317
- *ptr++ = bucket_get(bf, i);
318
-
373
+ memcpy(ptr, bf->ptr, bf->bytes);
374
+
319
375
  return str;
320
376
  }
321
377
 
@@ -324,11 +380,7 @@ static VALUE bf_load(VALUE self, VALUE bitmap) {
324
380
  Data_Get_Struct(self, struct BloomFilter, bf);
325
381
  unsigned char* ptr = (unsigned char *) RSTRING_PTR(bitmap);
326
382
 
327
- int i;
328
- for (i = 0; i < bf->m; i++) {
329
- if (*ptr++)
330
- bucket_set(bf, i);
331
- }
383
+ memcpy(bf->ptr, ptr, bf->bytes);
332
384
 
333
385
  return Qnil;
334
386
  }
@@ -340,12 +392,15 @@ void Init_cbloomfilter(void) {
340
392
  rb_define_method(cBloomFilter, "k", bf_k, 0);
341
393
  rb_define_method(cBloomFilter, "b", bf_b, 0);
342
394
  rb_define_method(cBloomFilter, "r", bf_r, 0);
343
- rb_define_method(cBloomFilter, "num_set", bf_num_set, 0);
395
+ rb_define_method(cBloomFilter, "set_bits", bf_set_bits, 0);
396
+ rb_define_method(cBloomFilter, "s", bf_s, 0);
344
397
  rb_define_method(cBloomFilter, "insert", bf_insert, 1);
345
398
  rb_define_method(cBloomFilter, "delete", bf_delete, 1);
346
399
  rb_define_method(cBloomFilter, "include?", bf_include, -1);
347
400
  rb_define_method(cBloomFilter, "clear", bf_clear, 0);
348
401
  rb_define_method(cBloomFilter, "merge!", bf_merge, 1);
402
+ rb_define_method(cBloomFilter, "&", bf_and, 1);
403
+ rb_define_method(cBloomFilter, "|", bf_or, 1);
349
404
 
350
405
  rb_define_method(cBloomFilter, "to_s", bf_to_s, 0);
351
406
  rb_define_method(cBloomFilter, "bitmap", bf_bitmap, 0);
@@ -353,7 +408,5 @@ void Init_cbloomfilter(void) {
353
408
 
354
409
  /* functions that have not been implemented, yet */
355
410
 
356
- // rb_define_method(cBloomFilter, "&", bf_and, 1);
357
- // rb_define_method(cBloomFilter, "|", bf_or, 1);
358
411
  // rb_define_method(cBloomFilter, "<=>", bf_cmp, 1);
359
412
  }
@@ -1,4 +1,3 @@
1
- require 'redis'
2
1
  require 'zlib'
3
2
 
4
3
  require 'cbloomfilter'
@@ -6,4 +5,4 @@ require 'bloomfilter/filter'
6
5
  require 'bloomfilter/native'
7
6
  require 'bloomfilter/counting_redis'
8
7
  require 'bloomfilter/redis'
9
- require 'bloomfilter/version'
8
+ require 'bloomfilter/version'
@@ -0,0 +1 @@
1
+ require 'bloomfilter-rb'
@@ -3,14 +3,15 @@ module BloomFilter
3
3
 
4
4
  def initialize(opts = {})
5
5
  @opts = {
6
- :size => 100,
7
- :hashes => 4,
8
- :seed => Time.now.to_i,
9
- :bucket => 3,
10
- :ttl => false,
11
- :server => {}
6
+ :identifier => 'rbloom',
7
+ :size => 100,
8
+ :hashes => 4,
9
+ :seed => Time.now.to_i,
10
+ :bucket => 3,
11
+ :ttl => false,
12
+ :server => {}
12
13
  }.merge opts
13
- @db = ::Redis.new(@opts[:server])
14
+ @db = @opts.delete(:db) || ::Redis.new(@opts[:server])
14
15
  end
15
16
 
16
17
  def insert(key, ttl=nil)
@@ -25,8 +26,10 @@ module BloomFilter
25
26
 
26
27
  def delete(key)
27
28
  indexes_for(key).each do |idx|
28
- if @db.decr(idx).to_i <= 0
29
+ count = @db.decr(idx).to_i
30
+ if count <= 0
29
31
  @db.del(idx)
32
+ @db.setbit(idx, 0) if count < 0
30
33
  end
31
34
  end
32
35
  end
@@ -38,7 +41,7 @@ module BloomFilter
38
41
  alias :key? :include?
39
42
 
40
43
  def num_set
41
- @db.keys("rbloom:*").size
44
+ @db.eval("return #redis.call('keys', '#{@opts[:identifier]}:*')")
42
45
  end
43
46
  alias :size :num_set
44
47
 
@@ -52,7 +55,7 @@ module BloomFilter
52
55
  def indexes_for(key)
53
56
  indexes = []
54
57
  @opts[:hashes].times do |i|
55
- indexes.push "rbloom:" + (Zlib.crc32("#{key}:#{i+@opts[:seed]}") % @opts[:size]).to_s
58
+ indexes.push @opts[:identifier] + ":" + (Zlib.crc32("#{key}:#{i+@opts[:seed]}") % @opts[:size]).to_s
56
59
  end
57
60
 
58
61
  indexes
@@ -1,7 +1,9 @@
1
1
  module BloomFilter
2
+ BloomFilter::ConfigurationMismatch = Class.new(ArgumentError)
3
+
2
4
  class Native < Filter
3
5
  attr_reader :bf
4
-
6
+
5
7
  def initialize(opts = {})
6
8
  @opts = {
7
9
  :size => 100,
@@ -33,9 +35,34 @@ module BloomFilter
33
35
 
34
36
  def delete(key); @bf.delete(key); end
35
37
  def clear; @bf.clear; end
36
- def size; @bf.num_set; end
38
+ def size; @bf.set_bits; end
37
39
  def merge!(o); @bf.merge!(o.bf); end
38
40
 
41
+ # Returns the number of bits that are set to 1 in the filter.
42
+ def set_bits
43
+ @bf.set_bits
44
+ end
45
+
46
+ # Computes the intersection of two Bloom filters.
47
+ # It assumes that both filters have the same size -
48
+ # if this is not true +BloomFilter::ConfigurationMismatch+ is raised.
49
+ def &(o)
50
+ raise BloomFilter::ConfigurationMismatch.new unless same_parameters?(o)
51
+ result = self.class.new
52
+ result.instance_variable_set(:@bf,@bf.&(o.bf))
53
+ result
54
+ end
55
+
56
+ # Computes the union of two Bloom filters.
57
+ # It assumes that both filters have the same size -
58
+ # if this is not true +BloomFilter::ConfigurationMismatch+ is raised.
59
+ def |(o)
60
+ raise BloomFilter::ConfigurationMismatch.new unless same_parameters?(o)
61
+ result = self.class.new
62
+ result.instance_variable_set(:@bf,@bf.|(o.bf))
63
+ result
64
+ end
65
+
39
66
  def bitmap
40
67
  @bf.bitmap
41
68
  end
@@ -43,8 +70,8 @@ module BloomFilter
43
70
  def marshal_load(ary)
44
71
  opts, bitmap = *ary
45
72
 
46
- @bf = Native.new(opts)
47
- @bf.bf.load(bitmap) if !bitmap.nil?
73
+ initialize(opts)
74
+ @bf.load(bitmap) if !bitmap.nil?
48
75
  end
49
76
 
50
77
  def marshal_dump
@@ -61,5 +88,14 @@ module BloomFilter
61
88
  end
62
89
  end
63
90
 
91
+ protected
92
+
93
+ # Returns true if parameters of the +o+ther filter are
94
+ # the same.
95
+ def same_parameters?(o)
96
+ @bf.m == o.bf.m && @bf.k == o.bf.k &&
97
+ @bf.s == o.bf.s && @bf.b == o.bf.b
98
+ end
99
+
64
100
  end
65
101
  end
@@ -10,7 +10,7 @@ module BloomFilter
10
10
  :eager => false,
11
11
  :server => {}
12
12
  }.merge opts
13
- @db = ::Redis.new(@opts[:server])
13
+ @db = @opts.delete(:db) || ::Redis.new(@opts[:server])
14
14
 
15
15
  if @opts[:eager]
16
16
  @db.setbit @opts[:namespace], @opts[:size]+1, 1
@@ -45,11 +45,7 @@ module BloomFilter
45
45
  alias :key? :include?
46
46
 
47
47
  def delete(key)
48
- @db.pipelined do
49
- indexes_for(key) do |idx|
50
- @db.setbit @opts[:namespace], idx, 0
51
- end
52
- end
48
+ warn "Deletes are disabled on non-counting filter, see: https://github.com/igrigorik/bloomfilter-rb/issues/37. This method will be deprecated in a future release."
53
49
  end
54
50
 
55
51
  def clear
@@ -1,3 +1,3 @@
1
1
  module BloomFilter
2
- VERSION = "2.1.1"
2
+ VERSION = "2.1.2"
3
3
  end
@@ -1,52 +1,61 @@
1
1
  require 'helper'
2
2
 
3
3
  describe BloomFilter::CountingRedis do
4
- include BloomFilter
5
4
 
6
- context "use Redis for storage" do
7
- it "should store data in Redis" do
8
- bf = CountingRedis.new
5
+ it "should connect to remote redis server" do
6
+ expect { BloomFilter::CountingRedis.new }.not_to raise_error
7
+ end
9
8
 
10
- bf.insert(:abcd)
11
- bf.insert('test')
12
- bf.include?('test').should be_true
13
- bf.key?('test').should be_true
9
+ it "should allow redis client instance to be passed in" do
10
+ redis_client = double("Redis")
11
+ bf = BloomFilter::CountingRedis.new(:db => redis_client)
12
+ expect(bf.instance_variable_get(:@db)).to be(redis_client)
13
+ end
14
14
 
15
- bf.include?('test', 'test2').should be_false
16
- bf.include?('test', 'abcd').should be_true
15
+ context "a default CountingRedis instance" do
16
+ before do
17
+ # clear all redis databases
18
+ subject.instance_variable_get(:@db).flushall
17
19
  end
20
+
21
+ it "should store data in Redis" do
22
+ subject.insert(:abcd)
23
+ subject.insert('test')
18
24
 
19
- it "should accept a TTL value for a key" do
20
- bf = CountingRedis.new(:ttl => 1)
21
-
22
- bf.insert('test')
23
- bf.include?('test').should be_true
25
+ expect(subject.include?('test')).to be true
26
+ expect(subject.key?('test')).to be true
24
27
 
25
- sleep(2)
26
- bf.include?('test').should be_false
28
+ expect(subject.include?('test', 'test2')).to be false
29
+ expect(subject.include?('test', 'abcd')).to be true
30
+ expect(subject.include?('test', 'abcd', 'nada')).to be false
27
31
  end
28
32
 
29
33
  it "should delete keys from Redis" do
30
- bf = CountingRedis.new
31
-
32
- bf.insert('test')
33
- bf.include?('test').should be_true
34
+ subject.insert('test')
35
+ expect(subject.include?('test')).to be true
34
36
 
35
- bf.delete('test')
36
- bf.include?('test').should be_false
37
+ subject.delete('test')
38
+ expect(subject.include?('test')).to be false
37
39
  end
38
40
 
39
41
  it "should output current stats" do
40
- bf = CountingRedis.new
41
- bf.clear
42
-
43
- bf.insert('test')
44
- bf.size.should == 4
45
- lambda { bf.stats }.should_not raise_error
42
+ subject.insert('test')
43
+ expect(subject.size).to eq(4)
44
+ expect { subject.stats }.not_to raise_error
46
45
  end
46
+ end
47
47
 
48
- it "should connect to remote redis server" do
49
- lambda { CountingRedis.new }.should_not raise_error
48
+ context "a TTL 1 instance" do
49
+ subject { BloomFilter::CountingRedis.new(:ttl => 1) }
50
+
51
+ it "should accept a TTL value for a key" do
52
+ subject.instance_variable_get(:@db).flushall
53
+
54
+ subject.insert('test')
55
+ expect(subject.include?('test')).to be true
56
+
57
+ sleep(2)
58
+ expect(subject.include?('test')).to be false
50
59
  end
51
60
  end
52
- end
61
+ end
data/spec/helper.rb CHANGED
@@ -1,2 +1,3 @@
1
+ require 'redis'
1
2
  require 'bundler/setup'
2
- require 'bloomfilter-rb'
3
+ require 'bloomfilter-rb'
data/spec/native_spec.rb CHANGED
@@ -1,58 +1,116 @@
1
1
  require 'helper'
2
2
 
3
3
  describe BloomFilter::Native do
4
- include BloomFilter
5
4
 
6
5
  it "should clear" do
7
- bf = Native.new(:size => 100, :hashes => 2, :seed => 1, :bucket => 3, :raise => false)
6
+ bf = BloomFilter::Native.new(:size => 100, :hashes => 2, :seed => 1, :bucket => 3, :raise => false)
8
7
  bf.insert("test")
9
- bf.include?("test").should be_true
8
+ expect(bf.include?("test")).to be true
10
9
  bf.clear
11
- bf.include?("test").should be_false
10
+ expect(bf.include?("test")).to be false
12
11
  end
13
12
 
14
13
  it "should merge" do
15
- bf1 = Native.new(:size => 100, :hashes => 2, :seed => 1, :bucket => 3, :raise => false)
16
- bf2 = Native.new(:size => 100, :hashes => 2, :seed => 1, :bucket => 3, :raise => false)
14
+ bf1 = BloomFilter::Native.new(:size => 100, :hashes => 2, :seed => 1, :bucket => 3, :raise => false)
15
+ bf2 = BloomFilter::Native.new(:size => 100, :hashes => 2, :seed => 1, :bucket => 3, :raise => false)
17
16
  bf2.insert("test")
18
- bf1.include?("test").should be_false
17
+ expect(bf1.include?("test")).to be false
19
18
  bf1.merge!(bf2)
20
- bf1.include?("test").should be_true
21
- bf2.include?("test").should be_true
19
+ expect(bf1.include?("test")).to be true
20
+ expect(bf2.include?("test")).to be true
22
21
  end
23
22
 
24
23
  context "behave like a bloomfilter" do
25
- it "should test set memerbship" do
26
- bf = Native.new(:size => 100, :hashes => 2, :seed => 1, :bucket => 3, :raise => false)
24
+ it "should test set membership" do
25
+ bf = BloomFilter::Native.new(:size => 100, :hashes => 2, :seed => 1, :bucket => 3, :raise => false)
27
26
  bf.insert("test")
28
27
  bf.insert("test1")
29
28
 
30
- bf.include?("test").should be_true
31
- bf.include?("abcd").should be_false
32
- bf.include?("test", "test1").should be_true
29
+ expect(bf.include?("test")).to be true
30
+ expect(bf.include?("abcd")).to be false
31
+ expect(bf.include?("test", "test1")).to be true
33
32
  end
34
33
 
35
34
  it "should work with any object's to_s" do
36
- bf = Native.new
37
- bf.insert(:test)
38
- bf.insert(:test1)
39
- bf.insert(12345)
40
-
41
- bf.include?("test").should be_true
42
- bf.include?("abcd").should be_false
43
- bf.include?("test", "test1", '12345').should be_true
35
+ subject.insert(:test)
36
+ subject.insert(:test1)
37
+ subject.insert(12345)
38
+
39
+ expect(subject.include?("test")).to be true
40
+ expect(subject.include?("abcd")).to be false
41
+ expect(subject.include?("test", "test1", '12345')).to be true
42
+ end
43
+
44
+ it "should return the number of bits set to 1" do
45
+ bf = BloomFilter::Native.new(:hashes => 4)
46
+ bf.insert("test")
47
+ expect(bf.set_bits).to be == 4
48
+ bf.delete("test")
49
+ expect(bf.set_bits).to be == 0
50
+
51
+ bf = BloomFilter::Native.new(:hashes => 1)
52
+ bf.insert("test")
53
+ expect(bf.set_bits).to be == 1
54
+ end
55
+
56
+ it "should return intersection with other filter" do
57
+ bf1 = BloomFilter::Native.new(:seed => 1)
58
+ bf1.insert("test")
59
+ bf1.insert("test1")
60
+
61
+ bf2 = BloomFilter::Native.new(:seed => 1)
62
+ bf2.insert("test")
63
+ bf2.insert("test2")
64
+
65
+ bf3 = bf1 & bf2
66
+ expect(bf3.include?("test")).to be true
67
+ expect(bf3.include?("test1")).to be false
68
+ expect(bf3.include?("test2")).to be false
69
+ end
70
+
71
+ it "should raise an exception when intersection is to be computed for incompatible filters" do
72
+ bf1 = BloomFilter::Native.new(:size => 10)
73
+ bf1.insert("test")
74
+
75
+ bf2 = BloomFilter::Native.new(:size => 20)
76
+ bf2.insert("test")
77
+
78
+ expect { bf1 & bf2 }.to raise_error(BloomFilter::ConfigurationMismatch)
79
+ end
80
+
81
+ it "should return union with other filter" do
82
+ bf1 = BloomFilter::Native.new(:seed => 1)
83
+ bf1.insert("test")
84
+ bf1.insert("test1")
85
+
86
+ bf2 = BloomFilter::Native.new(:seed => 1)
87
+ bf2.insert("test")
88
+ bf2.insert("test2")
89
+
90
+ bf3 = bf1 | bf2
91
+ expect(bf3.include?("test")).to be true
92
+ expect(bf3.include?("test1")).to be true
93
+ expect(bf3.include?("test2")).to be true
94
+ end
95
+
96
+ it "should raise an exception when union is to be computed for incompatible filters" do
97
+ bf1 = BloomFilter::Native.new(:size => 10)
98
+ bf1.insert("test")
99
+
100
+ bf2 = BloomFilter::Native.new(:size => 20)
101
+ bf2.insert("test")
102
+
103
+ expect {bf1 | bf2}.to raise_error(BloomFilter::ConfigurationMismatch)
44
104
  end
45
105
  end
46
106
 
47
107
  context "behave like counting bloom filter" do
48
108
  it "should delete / decrement keys" do
49
- bf = Native.new
109
+ subject.insert("test")
110
+ expect(subject.include?("test")).to be true
50
111
 
51
- bf.insert("test")
52
- bf.include?("test").should be_true
53
-
54
- bf.delete("test")
55
- bf.include?("test").should be_false
112
+ subject.delete("test")
113
+ expect(subject.include?("test")).to be false
56
114
  end
57
115
  end
58
116
 
@@ -60,20 +118,32 @@ describe BloomFilter::Native do
60
118
  after(:each) { File.unlink('bf.out') }
61
119
 
62
120
  it "should marshall the bloomfilter" do
63
- bf = Native.new
64
- lambda { bf.save('bf.out') }.should_not raise_error
121
+ bf = BloomFilter::Native.new
122
+ expect { bf.save('bf.out') }.not_to raise_error
65
123
  end
66
124
 
67
125
  it "should load marshalled bloomfilter" do
68
- bf = Native.new
69
- bf.insert('foo')
70
- bf.insert('bar')
71
- bf.save('bf.out')
72
-
73
- bf = Native.load('bf.out')
74
- bf.include?('foo').should be_true
75
- bf.include?('bar').should be_true
76
- bf.include?('baz').should be_false
126
+ subject.insert('foo')
127
+ subject.insert('bar')
128
+ subject.save('bf.out')
129
+
130
+ bf2 = BloomFilter::Native.load('bf.out')
131
+ expect(bf2.include?('foo')).to be true
132
+ expect(bf2.include?('bar')).to be true
133
+ expect(bf2.include?('baz')).to be false
134
+
135
+ expect(subject.send(:same_parameters?, bf2)).to be true
77
136
  end
137
+
138
+ it "should serialize to a file size proporational its bucket size" do
139
+ fs_size = 0
140
+ 8.times do |i|
141
+ bf = BloomFilter::Native.new(size: 10_000, bucket: i+1)
142
+ bf.save('bf.out')
143
+ prev_size, fs_size = fs_size, File.size('bf.out')
144
+ expect(prev_size).to be < fs_size
145
+ end
146
+ end
147
+
78
148
  end
79
- end
149
+ end
data/spec/redis_spec.rb CHANGED
@@ -1,54 +1,62 @@
1
1
  require 'helper'
2
2
 
3
3
  describe BloomFilter::Redis do
4
- include BloomFilter
5
4
 
6
5
  context "use Redis bitstring for storage" do
7
- let(:bf) { Redis.new }
6
+ before do
7
+ # clear all redis databases
8
+ subject.instance_variable_get(:@db).flushall
9
+ end
8
10
 
9
11
  it "should store data in Redis" do
10
- bf.insert(:abcd)
11
- bf.insert('test')
12
- bf.include?('test').should be_true
13
- bf.key?('test').should be_true
12
+ subject.insert(:abcd)
13
+ subject.insert('test')
14
+ expect(subject.include?('test')).to be true
15
+ expect(subject.key?('test')).to be true
14
16
 
15
- bf.include?('test', 'test2').should be_false
16
- bf.include?('test', 'abcd').should be_true
17
+ expect(subject.include?('test', 'test2')).to be false
18
+ expect(subject.include?('test', 'abcd')).to be true
17
19
  end
18
20
 
19
- it "should delete keys from Redis" do
20
- bf.insert('test')
21
- bf.include?('test').should be_true
21
+ it "should not delete keys from Redis" do
22
+ subject.insert('test')
23
+ expect(subject.include?('test')).to be true
22
24
 
23
- bf.delete('test')
24
- bf.include?('test').should be_false
25
+ subject.delete('test')
26
+ expect(subject.include?('test')).to be true
25
27
  end
26
28
 
27
29
  it "should clear Redis filter" do
28
- bf.insert('test')
29
- bf.include?('test').should be_true
30
+ subject.insert('test')
31
+ expect(subject.include?('test')).to be true
30
32
 
31
- bf.clear
32
- bf.include?('test').should be_false
33
+ subject.clear
34
+ expect(subject.include?('test')).to be false
33
35
  end
34
36
 
35
37
  it "should output current stats" do
36
- bf.clear
37
- bf.insert('test')
38
- lambda { bf.stats }.should_not raise_error
38
+ subject.clear
39
+ subject.insert('test')
40
+ expect { subject.stats }.not_to raise_error
39
41
  end
40
42
 
41
43
  it "should connect to remote redis server" do
42
- lambda { Redis.new }.should_not raise_error
44
+ expect { BloomFilter::Redis.new }.not_to raise_error
45
+ end
46
+
47
+ it "should allow redis client instance to be passed in" do
48
+ redis_client = double("Redis")
49
+ bf = BloomFilter::Redis.new(:db => redis_client)
50
+ expect(bf.instance_variable_get(:@db)).to be redis_client
43
51
  end
44
52
 
45
53
  it "should allow namespaced BloomFilters" do
46
- bf1 = Redis.new(:namespace => :a)
47
- bf2 = Redis.new(:namespace => :b)
54
+ bf1 = BloomFilter::Redis.new(:namespace => :a)
55
+ bf2 = BloomFilter::Redis.new(:namespace => :b)
48
56
 
49
57
  bf1.insert('test')
50
- bf1.include?('test').should be_true
51
- bf2.include?('test').should be_false
58
+ expect(bf1.include?('test')).to be true
59
+ expect(bf2.include?('test')).to be false
52
60
  end
53
61
  end
54
- end
62
+ end
metadata CHANGED
@@ -1,66 +1,83 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: bloomfilter-rb
3
- version: !ruby/object:Gem::Version
4
- prerelease:
5
- version: 2.1.1
3
+ version: !ruby/object:Gem::Version
4
+ version: 2.1.2
6
5
  platform: ruby
7
- authors:
6
+ authors:
8
7
  - Ilya Grigorik
9
8
  - Tatsuya Mori
10
- autorequire:
9
+ autorequire:
11
10
  bindir: bin
12
11
  cert_chain: []
13
-
14
- date: 2011-03-31 00:00:00 -04:00
15
- default_executable:
16
- dependencies:
17
- - !ruby/object:Gem::Dependency
12
+ date: 2021-07-10 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
18
15
  name: redis
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - ">="
19
+ - !ruby/object:Gem::Version
20
+ version: '0'
21
+ type: :development
19
22
  prerelease: false
20
- requirement: &id001 !ruby/object:Gem::Requirement
21
- none: false
22
- requirements:
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
23
25
  - - ">="
24
- - !ruby/object:Gem::Version
25
- version: "0"
26
- type: :runtime
27
- version_requirements: *id001
28
- - !ruby/object:Gem::Dependency
26
+ - !ruby/object:Gem::Version
27
+ version: '0'
28
+ - !ruby/object:Gem::Dependency
29
29
  name: rspec
30
- prerelease: false
31
- requirement: &id002 !ruby/object:Gem::Requirement
32
- none: false
33
- requirements:
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
34
32
  - - ">="
35
- - !ruby/object:Gem::Version
36
- version: "0"
33
+ - !ruby/object:Gem::Version
34
+ version: '3'
37
35
  type: :development
38
- version_requirements: *id002
39
- - !ruby/object:Gem::Dependency
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: '3'
42
+ - !ruby/object:Gem::Dependency
40
43
  name: rake
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: '0'
49
+ type: :development
41
50
  prerelease: false
42
- requirement: &id003 !ruby/object:Gem::Requirement
43
- none: false
44
- requirements:
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ - !ruby/object:Gem::Dependency
57
+ name: rake-compiler
58
+ requirement: !ruby/object:Gem::Requirement
59
+ requirements:
45
60
  - - ">="
46
- - !ruby/object:Gem::Version
47
- version: "0"
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
48
63
  type: :development
49
- version_requirements: *id003
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
50
70
  description: Counting Bloom Filter implemented in Ruby
51
- email:
71
+ email:
52
72
  - ilya@igvita.com
53
73
  executables: []
54
-
55
- extensions:
74
+ extensions:
56
75
  - ext/cbloomfilter/extconf.rb
57
76
  extra_rdoc_files: []
58
-
59
- files:
60
- - .gitignore
61
- - .rspec
77
+ files:
78
+ - ".gitignore"
79
+ - ".rspec"
62
80
  - Gemfile
63
- - Gemfile.lock
64
81
  - README.md
65
82
  - Rakefile
66
83
  - benchmark/redis-bm.rb
@@ -74,6 +91,7 @@ files:
74
91
  - ext/cbloomfilter/crc32.h
75
92
  - ext/cbloomfilter/extconf.rb
76
93
  - lib/bloomfilter-rb.rb
94
+ - lib/bloomfilter.rb
77
95
  - lib/bloomfilter/counting_redis.rb
78
96
  - lib/bloomfilter/filter.rb
79
97
  - lib/bloomfilter/native.rb
@@ -83,35 +101,29 @@ files:
83
101
  - spec/helper.rb
84
102
  - spec/native_spec.rb
85
103
  - spec/redis_spec.rb
86
- has_rdoc: true
87
- homepage: http://github.com/igrigorik/bloomfilter
104
+ homepage: http://github.com/igrigorik/bloomfilter-rb
88
105
  licenses: []
89
-
90
- post_install_message:
106
+ metadata: {}
107
+ post_install_message:
91
108
  rdoc_options: []
92
-
93
- require_paths:
109
+ require_paths:
94
110
  - lib
95
- required_ruby_version: !ruby/object:Gem::Requirement
96
- none: false
97
- requirements:
111
+ required_ruby_version: !ruby/object:Gem::Requirement
112
+ requirements:
98
113
  - - ">="
99
- - !ruby/object:Gem::Version
100
- version: "0"
101
- required_rubygems_version: !ruby/object:Gem::Requirement
102
- none: false
103
- requirements:
114
+ - !ruby/object:Gem::Version
115
+ version: '0'
116
+ required_rubygems_version: !ruby/object:Gem::Requirement
117
+ requirements:
104
118
  - - ">="
105
- - !ruby/object:Gem::Version
106
- version: "0"
119
+ - !ruby/object:Gem::Version
120
+ version: '0'
107
121
  requirements: []
108
-
109
- rubyforge_project: bloomfilter-rb
110
- rubygems_version: 1.6.2
111
- signing_key:
112
- specification_version: 3
122
+ rubygems_version: 3.0.3
123
+ signing_key:
124
+ specification_version: 4
113
125
  summary: Counting Bloom Filter implemented in Ruby
114
- test_files:
126
+ test_files:
115
127
  - spec/counting_redis_spec.rb
116
128
  - spec/helper.rb
117
129
  - spec/native_spec.rb
data/Gemfile.lock DELETED
@@ -1,28 +0,0 @@
1
- PATH
2
- remote: .
3
- specs:
4
- bloomfilter-rb (2.0.0)
5
- redis
6
-
7
- GEM
8
- remote: http://rubygems.org/
9
- specs:
10
- diff-lcs (1.1.2)
11
- rake (0.8.7)
12
- redis (2.2.0)
13
- rspec (2.5.0)
14
- rspec-core (~> 2.5.0)
15
- rspec-expectations (~> 2.5.0)
16
- rspec-mocks (~> 2.5.0)
17
- rspec-core (2.5.1)
18
- rspec-expectations (2.5.0)
19
- diff-lcs (~> 1.1.2)
20
- rspec-mocks (2.5.0)
21
-
22
- PLATFORMS
23
- ruby
24
-
25
- DEPENDENCIES
26
- bloomfilter-rb!
27
- rake
28
- rspec