bloomfilter-rb 2.1.1 → 2.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: f6473c63de973faec078172e7d3817280d448b8b1d3dd963e44a300a2aff60b2
4
+ data.tar.gz: c92fedd09bcea80a7ff93bff4ccb7a0bfe87b2e55af2d714b96193a4d5bbc870
5
+ SHA512:
6
+ metadata.gz: 5bac15299e3a82b183c24095771ff3e302e2f39ffefbeb20185118e37fe1a50170dd4675043761695dd963f3b93764ac16629ccfe22c8b5b71c6b1e14c75e28b
7
+ data.tar.gz: 86b59a43ae46eac06d8392fb772b0f661531e31c697907775898dd02a67b7c79f020e99b987b6fb5319e7b4636949101e6a5911c24712e8e7a25bbb22dc59ceb
data/.gitignore CHANGED
@@ -1,4 +1,10 @@
1
1
  *.o
2
2
  *.bundle
3
3
  *.swp
4
+ .rvmrc
5
+ .DS_Store
4
6
  ext/Makefile
7
+ lib/cbloomfilter.so
8
+ tmp
9
+ Gemfile.lock
10
+ pkg
data/Gemfile CHANGED
@@ -1,3 +1,3 @@
1
1
  source "http://rubygems.org"
2
2
 
3
- gemspec
3
+ gemspec
data/README.md CHANGED
@@ -23,26 +23,28 @@ Performance of the Bloom filter depends on a number of variables:
23
23
 
24
24
  MRI/C implementation which creates an in-memory filter which can be saved and reloaded from disk.
25
25
 
26
- require 'bloomfilter'
26
+ ```ruby
27
+ require 'bloomfilter-rb'
27
28
 
28
- bf = BloomFilter::Native.new(:size => 100, :hashes => 2, :seed => 1, :bucket => 3, :raise => false)
29
- bf.insert("test")
30
- bf.include?("test") # => true
31
- bf.include?("blah") # => false
29
+ bf = BloomFilter::Native.new(:size => 100, :hashes => 2, :seed => 1, :bucket => 3, :raise => false)
30
+ bf.insert("test")
31
+ bf.include?("test") # => true
32
+ bf.include?("blah") # => false
32
33
 
33
- bf.delete("test")
34
- bf.include?("test") # => false
34
+ bf.delete("test")
35
+ bf.include?("test") # => false
35
36
 
36
- # Hash with a bloom filter!
37
- bf["test2"] = "bar"
38
- bf["test2"] # => true
39
- bf["test3"] # => false
37
+ # Hash with a bloom filter!
38
+ bf["test2"] = "bar"
39
+ bf["test2"] # => true
40
+ bf["test3"] # => false
40
41
 
41
- bf.stats
42
- Number of filter bits (m): 10
43
- Number of filter elements (n): 2
44
- Number of filter hashes (k) : 2
45
- Predicted false positive rate = 10.87%
42
+ bf.stats
43
+ # => Number of filter bits (m): 10
44
+ # => Number of filter elements (n): 2
45
+ # => Number of filter hashes (k) : 2
46
+ # => Predicted false positive rate = 10.87%
47
+ ```
46
48
 
47
49
  ***
48
50
 
@@ -50,14 +52,16 @@ MRI/C implementation which creates an in-memory filter which can be saved and re
50
52
 
51
53
  Uses [getbit](http://redis.io/commands/getbit)/[setbit](http://redis.io/commands/setbit) on Redis strings - efficient, fast, can be shared by multiple/concurrent processes.
52
54
 
53
- bf = BloomFilter::Redis.new
55
+ ```ruby
56
+ bf = BloomFilter::Redis.new
54
57
 
55
- bf.insert('test')
56
- bf.include?('test') # => true
57
- bf.include?('blah') # => false
58
+ bf.insert('test')
59
+ bf.include?('test') # => true
60
+ bf.include?('blah') # => false
58
61
 
59
- bf.delete('test')
60
- bf.include?('test') # => false
62
+ bf.delete('test')
63
+ bf.include?('test') # => false
64
+ ```
61
65
 
62
66
  ### Memory footprint
63
67
 
@@ -67,16 +71,18 @@ Uses [getbit](http://redis.io/commands/getbit)/[setbit](http://redis.io/commands
67
71
 
68
72
  ***
69
73
 
70
- ## Redis-backed counting bloom filter with TTL's
74
+ ## Redis-backed counting bloom filter with TTLs
71
75
  Uses regular Redis get/set counters to implement a counting filter with optional TTL expiry. Because each "bit" requires its own key in Redis, you do incur a much larger memory overhead.
72
76
 
73
- bf = BloomFilter::CountingRedis.new(:ttl => 2)
77
+ ```ruby
78
+ bf = BloomFilter::CountingRedis.new(:ttl => 2)
74
79
 
75
- bf.insert('test')
76
- bf.include?('test') # => true
80
+ bf.insert('test')
81
+ bf.include?('test') # => true
77
82
 
78
- sleep(2)
79
- bf.include?('test') # => false
83
+ sleep(2)
84
+ bf.include?('test') # => false
85
+ ```
80
86
 
81
87
  ## Credits
82
88
 
@@ -84,4 +90,4 @@ Tatsuya Mori <valdzone@gmail.com> (Original C implementation: http://vald.x0.com
84
90
 
85
91
  ## License
86
92
 
87
- (MIT License) - Copyright (c) 2011 Ilya Grigorik
93
+ MIT License - Copyright (c) 2011 Ilya Grigorik
data/Rakefile CHANGED
@@ -1,9 +1,11 @@
1
+ require 'bundler/gem_tasks'
1
2
  require 'rake'
2
3
  require 'rspec'
3
4
  require 'rspec/core/rake_task'
4
5
  require 'rake/extensiontask'
5
- require 'bundler'
6
6
 
7
7
  Bundler::GemHelper.install_tasks
8
+ Rake::ExtensionTask.new('cbloomfilter')
8
9
  RSpec::Core::RakeTask.new(:spec)
9
- Rake::ExtensionTask.new('cbloomfilter')
10
+ Rake::Task[:spec].prerequisites << :clean
11
+ Rake::Task[:spec].prerequisites << :compile
@@ -8,14 +8,15 @@ Gem::Specification.new do |s|
8
8
  s.platform = Gem::Platform::RUBY
9
9
  s.authors = ["Ilya Grigorik", "Tatsuya Mori"]
10
10
  s.email = ["ilya@igvita.com"]
11
- s.homepage = "http://github.com/igrigorik/bloomfilter"
11
+ s.homepage = "http://github.com/igrigorik/bloomfilter-rb"
12
12
  s.summary = "Counting Bloom Filter implemented in Ruby"
13
13
  s.description = s.summary
14
14
  s.rubyforge_project = "bloomfilter-rb"
15
15
 
16
- s.add_dependency "redis"
17
- s.add_development_dependency "rspec"
16
+ s.add_development_dependency "redis"
17
+ s.add_development_dependency "rspec", ">= 3"
18
18
  s.add_development_dependency "rake"
19
+ s.add_development_dependency "rake-compiler"
19
20
 
20
21
  s.extensions = ["ext/cbloomfilter/extconf.rb"]
21
22
 
@@ -23,4 +24,4 @@ Gem::Specification.new do |s|
23
24
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
24
25
  s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
25
26
  s.require_paths = ["lib"]
26
- end
27
+ end
@@ -10,8 +10,8 @@ class BloomFilter
10
10
  def initialize(max_entries, num_hashes, seed)
11
11
  @num_hashes = num_hashes
12
12
  @size = max_entries.to_i
13
- @bitmap = BitSet.new(@size)
14
- @__mask = BitSet.new(@size)
13
+ @bitmap = Bitset.new(@size)
14
+ @__mask = Bitset.new(@size)
15
15
  @seed = seed
16
16
  end
17
17
 
@@ -41,7 +41,7 @@ def main
41
41
  while line = ARGF.gets
42
42
  data = line.chop
43
43
 
44
- if bf.new_entry?(data)
44
+ if bf.new?(data)
45
45
  num += 1
46
46
  bf.insert(data)
47
47
  end
@@ -19,7 +19,6 @@ struct BloomFilter {
19
19
  int k; /* # of hash functions */
20
20
  int s; /* # seed of hash functions */
21
21
  int r; /* # raise on bucket overflow? */
22
- int num_set; /* # of set bits */
23
22
  unsigned char *ptr; /* bits data */
24
23
  int bytes; /* size of byte data */
25
24
  };
@@ -37,8 +36,13 @@ void bucket_unset(struct BloomFilter *bf, int index) {
37
36
  if ((c & mask) == 0) {
38
37
  // do nothing
39
38
  } else {
40
- bf->ptr[byte_offset] -= (1 << bit_offset) & ((1 << 8) - 1);
41
- bf->ptr[byte_offset + 1] -= ((1 << bit_offset) & ((1 << 16) - 1)) >> 8;
39
+ // reduce the counter: 11 00 => 10 00 (suppose bf->b is 2)
40
+ c -= (1 << bit_offset) & ((1 << 8) -1);
41
+ // shift the bitmap right by 1 bit: 10 00 => 01 00
42
+ c = (~mask & c) | ((c & mask) >> (bit_offset + 1) << bit_offset);
43
+
44
+ bf->ptr[byte_offset] = c & ((1 << 8) - 1);
45
+ bf->ptr[byte_offset + 1] = (c & ((1 << 16) - 1)) >> 8;
42
46
  }
43
47
 
44
48
  }
@@ -52,8 +56,9 @@ void bucket_set(struct BloomFilter *bf, int index) {
52
56
  if ((c & mask) == mask) {
53
57
  if (bf->r == 1) rb_raise(rb_eRuntimeError, "bucket got filled up");
54
58
  } else {
55
- bf->ptr[byte_offset] += (1 << bit_offset) & ((1 << 8) - 1);
56
- bf->ptr[byte_offset + 1] += ((1 << bit_offset) & ((1 << 16) - 1)) >> 8;
59
+ c = c + ((1 << bit_offset) & ((1 << 8) -1)) | c;
60
+ bf->ptr[byte_offset] = c & ((1 << 8) - 1);
61
+ bf->ptr[byte_offset + 1] = (c & ((1 << 16) - 1)) >> 8;
57
62
  }
58
63
  }
59
64
 
@@ -127,7 +132,6 @@ static VALUE bf_s_new(int argc, VALUE *argv, VALUE self) {
127
132
  bf->k = k;
128
133
  bf->s = s;
129
134
  bf->r = r;
130
- bf->num_set = 0;
131
135
 
132
136
  bf->bytes = ((m * b) + 15) / 8;
133
137
  bf->ptr = ALLOC_N(unsigned char, bf->bytes);
@@ -170,10 +174,22 @@ static VALUE bf_r(VALUE self) {
170
174
  return bf->r == 0 ? Qfalse : Qtrue;
171
175
  }
172
176
 
173
- static VALUE bf_num_set(VALUE self) {
177
+ static VALUE bf_s(VALUE self) {
178
+ struct BloomFilter *bf;
179
+ Data_Get_Struct(self, struct BloomFilter, bf);
180
+ return INT2FIX(bf->s);
181
+ }
182
+
183
+ static VALUE bf_set_bits(VALUE self){
174
184
  struct BloomFilter *bf;
185
+ int i,j,count = 0;
175
186
  Data_Get_Struct(self, struct BloomFilter, bf);
176
- return INT2FIX(bf->num_set);
187
+ for (i = 0; i < bf->bytes; i++) {
188
+ for (j = 0; j < 8; j++) {
189
+ count += (bf->ptr[i] >> j) & 1;
190
+ }
191
+ }
192
+ return INT2FIX(count);
177
193
  }
178
194
 
179
195
  static VALUE bf_insert(VALUE self, VALUE key) {
@@ -203,21 +219,64 @@ static VALUE bf_insert(VALUE self, VALUE key) {
203
219
  bucket_set(bf, index);
204
220
  }
205
221
 
206
- bf->num_set += 1;
207
222
  return Qnil;
208
223
  }
209
224
 
210
225
  static VALUE bf_merge(VALUE self, VALUE other) {
211
226
  struct BloomFilter *bf, *target;
227
+ int i;
212
228
  Data_Get_Struct(self, struct BloomFilter, bf);
213
229
  Data_Get_Struct(other, struct BloomFilter, target);
214
- int i;
215
230
  for (i = 0; i < bf->bytes; i++) {
216
231
  bf->ptr[i] |= target->ptr[i];
217
232
  }
218
233
  return Qnil;
219
234
  }
220
235
 
236
+ static VALUE bf_and(VALUE self, VALUE other) {
237
+ struct BloomFilter *bf, *bf_other, *target;
238
+ VALUE klass, obj, args[5];
239
+ int i;
240
+
241
+ Data_Get_Struct(self, struct BloomFilter, bf);
242
+ Data_Get_Struct(other, struct BloomFilter, bf_other);
243
+ args[0] = INT2FIX(bf->m);
244
+ args[1] = INT2FIX(bf->k);
245
+ args[2] = INT2FIX(bf->s);
246
+ args[3] = INT2FIX(bf->b);
247
+ args[4] = INT2FIX(bf->r);
248
+ klass = rb_funcall(self,rb_intern("class"),0);
249
+ obj = bf_s_new(5,args,klass);
250
+ Data_Get_Struct(obj, struct BloomFilter, target);
251
+ for (i = 0; i < bf->bytes; i++){
252
+ target->ptr[i] = bf->ptr[i] & bf_other->ptr[i];
253
+ }
254
+
255
+ return obj;
256
+ }
257
+
258
+ static VALUE bf_or(VALUE self, VALUE other) {
259
+ struct BloomFilter *bf, *bf_other, *target;
260
+ VALUE klass, obj, args[5];
261
+ int i;
262
+
263
+ Data_Get_Struct(self, struct BloomFilter, bf);
264
+ Data_Get_Struct(other, struct BloomFilter, bf_other);
265
+ args[0] = INT2FIX(bf->m);
266
+ args[1] = INT2FIX(bf->k);
267
+ args[2] = INT2FIX(bf->s);
268
+ args[3] = INT2FIX(bf->b);
269
+ args[4] = INT2FIX(bf->r);
270
+ klass = rb_funcall(self,rb_intern("class"),0);
271
+ obj = bf_s_new(5,args,klass);
272
+ Data_Get_Struct(obj, struct BloomFilter, target);
273
+ for (i = 0; i < bf->bytes; i++){
274
+ target->ptr[i] = bf->ptr[i] | bf_other->ptr[i];
275
+ }
276
+
277
+ return obj;
278
+ }
279
+
221
280
  static VALUE bf_delete(VALUE self, VALUE key) {
222
281
  int index, seed;
223
282
  int i, len, m, k, s;
@@ -245,7 +304,6 @@ static VALUE bf_delete(VALUE self, VALUE key) {
245
304
  bucket_unset(bf, index);
246
305
  }
247
306
 
248
- bf->num_set += 1;
249
307
  return Qnil;
250
308
  }
251
309
 
@@ -309,13 +367,11 @@ static VALUE bf_bitmap(VALUE self) {
309
367
  struct BloomFilter *bf;
310
368
  Data_Get_Struct(self, struct BloomFilter, bf);
311
369
 
312
- VALUE str = rb_str_new(0, bf->m);
370
+ VALUE str = rb_str_new(0, bf->bytes);
313
371
  unsigned char* ptr = (unsigned char *) RSTRING_PTR(str);
314
372
 
315
- int i;
316
- for (i = 0; i < bf->m; i++)
317
- *ptr++ = bucket_get(bf, i);
318
-
373
+ memcpy(ptr, bf->ptr, bf->bytes);
374
+
319
375
  return str;
320
376
  }
321
377
 
@@ -324,11 +380,7 @@ static VALUE bf_load(VALUE self, VALUE bitmap) {
324
380
  Data_Get_Struct(self, struct BloomFilter, bf);
325
381
  unsigned char* ptr = (unsigned char *) RSTRING_PTR(bitmap);
326
382
 
327
- int i;
328
- for (i = 0; i < bf->m; i++) {
329
- if (*ptr++)
330
- bucket_set(bf, i);
331
- }
383
+ memcpy(bf->ptr, ptr, bf->bytes);
332
384
 
333
385
  return Qnil;
334
386
  }
@@ -340,12 +392,15 @@ void Init_cbloomfilter(void) {
340
392
  rb_define_method(cBloomFilter, "k", bf_k, 0);
341
393
  rb_define_method(cBloomFilter, "b", bf_b, 0);
342
394
  rb_define_method(cBloomFilter, "r", bf_r, 0);
343
- rb_define_method(cBloomFilter, "num_set", bf_num_set, 0);
395
+ rb_define_method(cBloomFilter, "set_bits", bf_set_bits, 0);
396
+ rb_define_method(cBloomFilter, "s", bf_s, 0);
344
397
  rb_define_method(cBloomFilter, "insert", bf_insert, 1);
345
398
  rb_define_method(cBloomFilter, "delete", bf_delete, 1);
346
399
  rb_define_method(cBloomFilter, "include?", bf_include, -1);
347
400
  rb_define_method(cBloomFilter, "clear", bf_clear, 0);
348
401
  rb_define_method(cBloomFilter, "merge!", bf_merge, 1);
402
+ rb_define_method(cBloomFilter, "&", bf_and, 1);
403
+ rb_define_method(cBloomFilter, "|", bf_or, 1);
349
404
 
350
405
  rb_define_method(cBloomFilter, "to_s", bf_to_s, 0);
351
406
  rb_define_method(cBloomFilter, "bitmap", bf_bitmap, 0);
@@ -353,7 +408,5 @@ void Init_cbloomfilter(void) {
353
408
 
354
409
  /* functions that have not been implemented, yet */
355
410
 
356
- // rb_define_method(cBloomFilter, "&", bf_and, 1);
357
- // rb_define_method(cBloomFilter, "|", bf_or, 1);
358
411
  // rb_define_method(cBloomFilter, "<=>", bf_cmp, 1);
359
412
  }
@@ -1,4 +1,3 @@
1
- require 'redis'
2
1
  require 'zlib'
3
2
 
4
3
  require 'cbloomfilter'
@@ -6,4 +5,4 @@ require 'bloomfilter/filter'
6
5
  require 'bloomfilter/native'
7
6
  require 'bloomfilter/counting_redis'
8
7
  require 'bloomfilter/redis'
9
- require 'bloomfilter/version'
8
+ require 'bloomfilter/version'
@@ -0,0 +1 @@
1
+ require 'bloomfilter-rb'
@@ -3,14 +3,15 @@ module BloomFilter
3
3
 
4
4
  def initialize(opts = {})
5
5
  @opts = {
6
- :size => 100,
7
- :hashes => 4,
8
- :seed => Time.now.to_i,
9
- :bucket => 3,
10
- :ttl => false,
11
- :server => {}
6
+ :identifier => 'rbloom',
7
+ :size => 100,
8
+ :hashes => 4,
9
+ :seed => Time.now.to_i,
10
+ :bucket => 3,
11
+ :ttl => false,
12
+ :server => {}
12
13
  }.merge opts
13
- @db = ::Redis.new(@opts[:server])
14
+ @db = @opts.delete(:db) || ::Redis.new(@opts[:server])
14
15
  end
15
16
 
16
17
  def insert(key, ttl=nil)
@@ -25,8 +26,10 @@ module BloomFilter
25
26
 
26
27
  def delete(key)
27
28
  indexes_for(key).each do |idx|
28
- if @db.decr(idx).to_i <= 0
29
+ count = @db.decr(idx).to_i
30
+ if count <= 0
29
31
  @db.del(idx)
32
+ @db.setbit(idx, 0) if count < 0
30
33
  end
31
34
  end
32
35
  end
@@ -38,7 +41,7 @@ module BloomFilter
38
41
  alias :key? :include?
39
42
 
40
43
  def num_set
41
- @db.keys("rbloom:*").size
44
+ @db.eval("return #redis.call('keys', '#{@opts[:identifier]}:*')")
42
45
  end
43
46
  alias :size :num_set
44
47
 
@@ -52,7 +55,7 @@ module BloomFilter
52
55
  def indexes_for(key)
53
56
  indexes = []
54
57
  @opts[:hashes].times do |i|
55
- indexes.push "rbloom:" + (Zlib.crc32("#{key}:#{i+@opts[:seed]}") % @opts[:size]).to_s
58
+ indexes.push @opts[:identifier] + ":" + (Zlib.crc32("#{key}:#{i+@opts[:seed]}") % @opts[:size]).to_s
56
59
  end
57
60
 
58
61
  indexes
@@ -1,7 +1,9 @@
1
1
  module BloomFilter
2
+ BloomFilter::ConfigurationMismatch = Class.new(ArgumentError)
3
+
2
4
  class Native < Filter
3
5
  attr_reader :bf
4
-
6
+
5
7
  def initialize(opts = {})
6
8
  @opts = {
7
9
  :size => 100,
@@ -33,9 +35,34 @@ module BloomFilter
33
35
 
34
36
  def delete(key); @bf.delete(key); end
35
37
  def clear; @bf.clear; end
36
- def size; @bf.num_set; end
38
+ def size; @bf.set_bits; end
37
39
  def merge!(o); @bf.merge!(o.bf); end
38
40
 
41
+ # Returns the number of bits that are set to 1 in the filter.
42
+ def set_bits
43
+ @bf.set_bits
44
+ end
45
+
46
+ # Computes the intersection of two Bloom filters.
47
+ # It assumes that both filters have the same size -
48
+ # if this is not true +BloomFilter::ConfigurationMismatch+ is raised.
49
+ def &(o)
50
+ raise BloomFilter::ConfigurationMismatch.new unless same_parameters?(o)
51
+ result = self.class.new
52
+ result.instance_variable_set(:@bf,@bf.&(o.bf))
53
+ result
54
+ end
55
+
56
+ # Computes the union of two Bloom filters.
57
+ # It assumes that both filters have the same size -
58
+ # if this is not true +BloomFilter::ConfigurationMismatch+ is raised.
59
+ def |(o)
60
+ raise BloomFilter::ConfigurationMismatch.new unless same_parameters?(o)
61
+ result = self.class.new
62
+ result.instance_variable_set(:@bf,@bf.|(o.bf))
63
+ result
64
+ end
65
+
39
66
  def bitmap
40
67
  @bf.bitmap
41
68
  end
@@ -43,8 +70,8 @@ module BloomFilter
43
70
  def marshal_load(ary)
44
71
  opts, bitmap = *ary
45
72
 
46
- @bf = Native.new(opts)
47
- @bf.bf.load(bitmap) if !bitmap.nil?
73
+ initialize(opts)
74
+ @bf.load(bitmap) if !bitmap.nil?
48
75
  end
49
76
 
50
77
  def marshal_dump
@@ -61,5 +88,14 @@ module BloomFilter
61
88
  end
62
89
  end
63
90
 
91
+ protected
92
+
93
+ # Returns true if parameters of the +o+ther filter are
94
+ # the same.
95
+ def same_parameters?(o)
96
+ @bf.m == o.bf.m && @bf.k == o.bf.k &&
97
+ @bf.s == o.bf.s && @bf.b == o.bf.b
98
+ end
99
+
64
100
  end
65
101
  end
@@ -10,7 +10,7 @@ module BloomFilter
10
10
  :eager => false,
11
11
  :server => {}
12
12
  }.merge opts
13
- @db = ::Redis.new(@opts[:server])
13
+ @db = @opts.delete(:db) || ::Redis.new(@opts[:server])
14
14
 
15
15
  if @opts[:eager]
16
16
  @db.setbit @opts[:namespace], @opts[:size]+1, 1
@@ -45,11 +45,7 @@ module BloomFilter
45
45
  alias :key? :include?
46
46
 
47
47
  def delete(key)
48
- @db.pipelined do
49
- indexes_for(key) do |idx|
50
- @db.setbit @opts[:namespace], idx, 0
51
- end
52
- end
48
+ warn "Deletes are disabled on non-counting filter, see: https://github.com/igrigorik/bloomfilter-rb/issues/37. This method will be deprecated in a future release."
53
49
  end
54
50
 
55
51
  def clear
@@ -1,3 +1,3 @@
1
1
  module BloomFilter
2
- VERSION = "2.1.1"
2
+ VERSION = "2.1.2"
3
3
  end
@@ -1,52 +1,61 @@
1
1
  require 'helper'
2
2
 
3
3
  describe BloomFilter::CountingRedis do
4
- include BloomFilter
5
4
 
6
- context "use Redis for storage" do
7
- it "should store data in Redis" do
8
- bf = CountingRedis.new
5
+ it "should connect to remote redis server" do
6
+ expect { BloomFilter::CountingRedis.new }.not_to raise_error
7
+ end
9
8
 
10
- bf.insert(:abcd)
11
- bf.insert('test')
12
- bf.include?('test').should be_true
13
- bf.key?('test').should be_true
9
+ it "should allow redis client instance to be passed in" do
10
+ redis_client = double("Redis")
11
+ bf = BloomFilter::CountingRedis.new(:db => redis_client)
12
+ expect(bf.instance_variable_get(:@db)).to be(redis_client)
13
+ end
14
14
 
15
- bf.include?('test', 'test2').should be_false
16
- bf.include?('test', 'abcd').should be_true
15
+ context "a default CountingRedis instance" do
16
+ before do
17
+ # clear all redis databases
18
+ subject.instance_variable_get(:@db).flushall
17
19
  end
20
+
21
+ it "should store data in Redis" do
22
+ subject.insert(:abcd)
23
+ subject.insert('test')
18
24
 
19
- it "should accept a TTL value for a key" do
20
- bf = CountingRedis.new(:ttl => 1)
21
-
22
- bf.insert('test')
23
- bf.include?('test').should be_true
25
+ expect(subject.include?('test')).to be true
26
+ expect(subject.key?('test')).to be true
24
27
 
25
- sleep(2)
26
- bf.include?('test').should be_false
28
+ expect(subject.include?('test', 'test2')).to be false
29
+ expect(subject.include?('test', 'abcd')).to be true
30
+ expect(subject.include?('test', 'abcd', 'nada')).to be false
27
31
  end
28
32
 
29
33
  it "should delete keys from Redis" do
30
- bf = CountingRedis.new
31
-
32
- bf.insert('test')
33
- bf.include?('test').should be_true
34
+ subject.insert('test')
35
+ expect(subject.include?('test')).to be true
34
36
 
35
- bf.delete('test')
36
- bf.include?('test').should be_false
37
+ subject.delete('test')
38
+ expect(subject.include?('test')).to be false
37
39
  end
38
40
 
39
41
  it "should output current stats" do
40
- bf = CountingRedis.new
41
- bf.clear
42
-
43
- bf.insert('test')
44
- bf.size.should == 4
45
- lambda { bf.stats }.should_not raise_error
42
+ subject.insert('test')
43
+ expect(subject.size).to eq(4)
44
+ expect { subject.stats }.not_to raise_error
46
45
  end
46
+ end
47
47
 
48
- it "should connect to remote redis server" do
49
- lambda { CountingRedis.new }.should_not raise_error
48
+ context "a TTL 1 instance" do
49
+ subject { BloomFilter::CountingRedis.new(:ttl => 1) }
50
+
51
+ it "should accept a TTL value for a key" do
52
+ subject.instance_variable_get(:@db).flushall
53
+
54
+ subject.insert('test')
55
+ expect(subject.include?('test')).to be true
56
+
57
+ sleep(2)
58
+ expect(subject.include?('test')).to be false
50
59
  end
51
60
  end
52
- end
61
+ end
data/spec/helper.rb CHANGED
@@ -1,2 +1,3 @@
1
+ require 'redis'
1
2
  require 'bundler/setup'
2
- require 'bloomfilter-rb'
3
+ require 'bloomfilter-rb'
data/spec/native_spec.rb CHANGED
@@ -1,58 +1,116 @@
1
1
  require 'helper'
2
2
 
3
3
  describe BloomFilter::Native do
4
- include BloomFilter
5
4
 
6
5
  it "should clear" do
7
- bf = Native.new(:size => 100, :hashes => 2, :seed => 1, :bucket => 3, :raise => false)
6
+ bf = BloomFilter::Native.new(:size => 100, :hashes => 2, :seed => 1, :bucket => 3, :raise => false)
8
7
  bf.insert("test")
9
- bf.include?("test").should be_true
8
+ expect(bf.include?("test")).to be true
10
9
  bf.clear
11
- bf.include?("test").should be_false
10
+ expect(bf.include?("test")).to be false
12
11
  end
13
12
 
14
13
  it "should merge" do
15
- bf1 = Native.new(:size => 100, :hashes => 2, :seed => 1, :bucket => 3, :raise => false)
16
- bf2 = Native.new(:size => 100, :hashes => 2, :seed => 1, :bucket => 3, :raise => false)
14
+ bf1 = BloomFilter::Native.new(:size => 100, :hashes => 2, :seed => 1, :bucket => 3, :raise => false)
15
+ bf2 = BloomFilter::Native.new(:size => 100, :hashes => 2, :seed => 1, :bucket => 3, :raise => false)
17
16
  bf2.insert("test")
18
- bf1.include?("test").should be_false
17
+ expect(bf1.include?("test")).to be false
19
18
  bf1.merge!(bf2)
20
- bf1.include?("test").should be_true
21
- bf2.include?("test").should be_true
19
+ expect(bf1.include?("test")).to be true
20
+ expect(bf2.include?("test")).to be true
22
21
  end
23
22
 
24
23
  context "behave like a bloomfilter" do
25
- it "should test set memerbship" do
26
- bf = Native.new(:size => 100, :hashes => 2, :seed => 1, :bucket => 3, :raise => false)
24
+ it "should test set membership" do
25
+ bf = BloomFilter::Native.new(:size => 100, :hashes => 2, :seed => 1, :bucket => 3, :raise => false)
27
26
  bf.insert("test")
28
27
  bf.insert("test1")
29
28
 
30
- bf.include?("test").should be_true
31
- bf.include?("abcd").should be_false
32
- bf.include?("test", "test1").should be_true
29
+ expect(bf.include?("test")).to be true
30
+ expect(bf.include?("abcd")).to be false
31
+ expect(bf.include?("test", "test1")).to be true
33
32
  end
34
33
 
35
34
  it "should work with any object's to_s" do
36
- bf = Native.new
37
- bf.insert(:test)
38
- bf.insert(:test1)
39
- bf.insert(12345)
40
-
41
- bf.include?("test").should be_true
42
- bf.include?("abcd").should be_false
43
- bf.include?("test", "test1", '12345').should be_true
35
+ subject.insert(:test)
36
+ subject.insert(:test1)
37
+ subject.insert(12345)
38
+
39
+ expect(subject.include?("test")).to be true
40
+ expect(subject.include?("abcd")).to be false
41
+ expect(subject.include?("test", "test1", '12345')).to be true
42
+ end
43
+
44
+ it "should return the number of bits set to 1" do
45
+ bf = BloomFilter::Native.new(:hashes => 4)
46
+ bf.insert("test")
47
+ expect(bf.set_bits).to be == 4
48
+ bf.delete("test")
49
+ expect(bf.set_bits).to be == 0
50
+
51
+ bf = BloomFilter::Native.new(:hashes => 1)
52
+ bf.insert("test")
53
+ expect(bf.set_bits).to be == 1
54
+ end
55
+
56
+ it "should return intersection with other filter" do
57
+ bf1 = BloomFilter::Native.new(:seed => 1)
58
+ bf1.insert("test")
59
+ bf1.insert("test1")
60
+
61
+ bf2 = BloomFilter::Native.new(:seed => 1)
62
+ bf2.insert("test")
63
+ bf2.insert("test2")
64
+
65
+ bf3 = bf1 & bf2
66
+ expect(bf3.include?("test")).to be true
67
+ expect(bf3.include?("test1")).to be false
68
+ expect(bf3.include?("test2")).to be false
69
+ end
70
+
71
+ it "should raise an exception when intersection is to be computed for incompatible filters" do
72
+ bf1 = BloomFilter::Native.new(:size => 10)
73
+ bf1.insert("test")
74
+
75
+ bf2 = BloomFilter::Native.new(:size => 20)
76
+ bf2.insert("test")
77
+
78
+ expect { bf1 & bf2 }.to raise_error(BloomFilter::ConfigurationMismatch)
79
+ end
80
+
81
+ it "should return union with other filter" do
82
+ bf1 = BloomFilter::Native.new(:seed => 1)
83
+ bf1.insert("test")
84
+ bf1.insert("test1")
85
+
86
+ bf2 = BloomFilter::Native.new(:seed => 1)
87
+ bf2.insert("test")
88
+ bf2.insert("test2")
89
+
90
+ bf3 = bf1 | bf2
91
+ expect(bf3.include?("test")).to be true
92
+ expect(bf3.include?("test1")).to be true
93
+ expect(bf3.include?("test2")).to be true
94
+ end
95
+
96
+ it "should raise an exception when union is to be computed for incompatible filters" do
97
+ bf1 = BloomFilter::Native.new(:size => 10)
98
+ bf1.insert("test")
99
+
100
+ bf2 = BloomFilter::Native.new(:size => 20)
101
+ bf2.insert("test")
102
+
103
+ expect {bf1 | bf2}.to raise_error(BloomFilter::ConfigurationMismatch)
44
104
  end
45
105
  end
46
106
 
47
107
  context "behave like counting bloom filter" do
48
108
  it "should delete / decrement keys" do
49
- bf = Native.new
109
+ subject.insert("test")
110
+ expect(subject.include?("test")).to be true
50
111
 
51
- bf.insert("test")
52
- bf.include?("test").should be_true
53
-
54
- bf.delete("test")
55
- bf.include?("test").should be_false
112
+ subject.delete("test")
113
+ expect(subject.include?("test")).to be false
56
114
  end
57
115
  end
58
116
 
@@ -60,20 +118,32 @@ describe BloomFilter::Native do
60
118
  after(:each) { File.unlink('bf.out') }
61
119
 
62
120
  it "should marshall the bloomfilter" do
63
- bf = Native.new
64
- lambda { bf.save('bf.out') }.should_not raise_error
121
+ bf = BloomFilter::Native.new
122
+ expect { bf.save('bf.out') }.not_to raise_error
65
123
  end
66
124
 
67
125
  it "should load marshalled bloomfilter" do
68
- bf = Native.new
69
- bf.insert('foo')
70
- bf.insert('bar')
71
- bf.save('bf.out')
72
-
73
- bf = Native.load('bf.out')
74
- bf.include?('foo').should be_true
75
- bf.include?('bar').should be_true
76
- bf.include?('baz').should be_false
126
+ subject.insert('foo')
127
+ subject.insert('bar')
128
+ subject.save('bf.out')
129
+
130
+ bf2 = BloomFilter::Native.load('bf.out')
131
+ expect(bf2.include?('foo')).to be true
132
+ expect(bf2.include?('bar')).to be true
133
+ expect(bf2.include?('baz')).to be false
134
+
135
+ expect(subject.send(:same_parameters?, bf2)).to be true
77
136
  end
137
+
138
+ it "should serialize to a file size proporational its bucket size" do
139
+ fs_size = 0
140
+ 8.times do |i|
141
+ bf = BloomFilter::Native.new(size: 10_000, bucket: i+1)
142
+ bf.save('bf.out')
143
+ prev_size, fs_size = fs_size, File.size('bf.out')
144
+ expect(prev_size).to be < fs_size
145
+ end
146
+ end
147
+
78
148
  end
79
- end
149
+ end
data/spec/redis_spec.rb CHANGED
@@ -1,54 +1,62 @@
1
1
  require 'helper'
2
2
 
3
3
  describe BloomFilter::Redis do
4
- include BloomFilter
5
4
 
6
5
  context "use Redis bitstring for storage" do
7
- let(:bf) { Redis.new }
6
+ before do
7
+ # clear all redis databases
8
+ subject.instance_variable_get(:@db).flushall
9
+ end
8
10
 
9
11
  it "should store data in Redis" do
10
- bf.insert(:abcd)
11
- bf.insert('test')
12
- bf.include?('test').should be_true
13
- bf.key?('test').should be_true
12
+ subject.insert(:abcd)
13
+ subject.insert('test')
14
+ expect(subject.include?('test')).to be true
15
+ expect(subject.key?('test')).to be true
14
16
 
15
- bf.include?('test', 'test2').should be_false
16
- bf.include?('test', 'abcd').should be_true
17
+ expect(subject.include?('test', 'test2')).to be false
18
+ expect(subject.include?('test', 'abcd')).to be true
17
19
  end
18
20
 
19
- it "should delete keys from Redis" do
20
- bf.insert('test')
21
- bf.include?('test').should be_true
21
+ it "should not delete keys from Redis" do
22
+ subject.insert('test')
23
+ expect(subject.include?('test')).to be true
22
24
 
23
- bf.delete('test')
24
- bf.include?('test').should be_false
25
+ subject.delete('test')
26
+ expect(subject.include?('test')).to be true
25
27
  end
26
28
 
27
29
  it "should clear Redis filter" do
28
- bf.insert('test')
29
- bf.include?('test').should be_true
30
+ subject.insert('test')
31
+ expect(subject.include?('test')).to be true
30
32
 
31
- bf.clear
32
- bf.include?('test').should be_false
33
+ subject.clear
34
+ expect(subject.include?('test')).to be false
33
35
  end
34
36
 
35
37
  it "should output current stats" do
36
- bf.clear
37
- bf.insert('test')
38
- lambda { bf.stats }.should_not raise_error
38
+ subject.clear
39
+ subject.insert('test')
40
+ expect { subject.stats }.not_to raise_error
39
41
  end
40
42
 
41
43
  it "should connect to remote redis server" do
42
- lambda { Redis.new }.should_not raise_error
44
+ expect { BloomFilter::Redis.new }.not_to raise_error
45
+ end
46
+
47
+ it "should allow redis client instance to be passed in" do
48
+ redis_client = double("Redis")
49
+ bf = BloomFilter::Redis.new(:db => redis_client)
50
+ expect(bf.instance_variable_get(:@db)).to be redis_client
43
51
  end
44
52
 
45
53
  it "should allow namespaced BloomFilters" do
46
- bf1 = Redis.new(:namespace => :a)
47
- bf2 = Redis.new(:namespace => :b)
54
+ bf1 = BloomFilter::Redis.new(:namespace => :a)
55
+ bf2 = BloomFilter::Redis.new(:namespace => :b)
48
56
 
49
57
  bf1.insert('test')
50
- bf1.include?('test').should be_true
51
- bf2.include?('test').should be_false
58
+ expect(bf1.include?('test')).to be true
59
+ expect(bf2.include?('test')).to be false
52
60
  end
53
61
  end
54
- end
62
+ end
metadata CHANGED
@@ -1,66 +1,83 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: bloomfilter-rb
3
- version: !ruby/object:Gem::Version
4
- prerelease:
5
- version: 2.1.1
3
+ version: !ruby/object:Gem::Version
4
+ version: 2.1.2
6
5
  platform: ruby
7
- authors:
6
+ authors:
8
7
  - Ilya Grigorik
9
8
  - Tatsuya Mori
10
- autorequire:
9
+ autorequire:
11
10
  bindir: bin
12
11
  cert_chain: []
13
-
14
- date: 2011-03-31 00:00:00 -04:00
15
- default_executable:
16
- dependencies:
17
- - !ruby/object:Gem::Dependency
12
+ date: 2021-07-10 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
18
15
  name: redis
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - ">="
19
+ - !ruby/object:Gem::Version
20
+ version: '0'
21
+ type: :development
19
22
  prerelease: false
20
- requirement: &id001 !ruby/object:Gem::Requirement
21
- none: false
22
- requirements:
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
23
25
  - - ">="
24
- - !ruby/object:Gem::Version
25
- version: "0"
26
- type: :runtime
27
- version_requirements: *id001
28
- - !ruby/object:Gem::Dependency
26
+ - !ruby/object:Gem::Version
27
+ version: '0'
28
+ - !ruby/object:Gem::Dependency
29
29
  name: rspec
30
- prerelease: false
31
- requirement: &id002 !ruby/object:Gem::Requirement
32
- none: false
33
- requirements:
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
34
32
  - - ">="
35
- - !ruby/object:Gem::Version
36
- version: "0"
33
+ - !ruby/object:Gem::Version
34
+ version: '3'
37
35
  type: :development
38
- version_requirements: *id002
39
- - !ruby/object:Gem::Dependency
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: '3'
42
+ - !ruby/object:Gem::Dependency
40
43
  name: rake
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: '0'
49
+ type: :development
41
50
  prerelease: false
42
- requirement: &id003 !ruby/object:Gem::Requirement
43
- none: false
44
- requirements:
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ - !ruby/object:Gem::Dependency
57
+ name: rake-compiler
58
+ requirement: !ruby/object:Gem::Requirement
59
+ requirements:
45
60
  - - ">="
46
- - !ruby/object:Gem::Version
47
- version: "0"
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
48
63
  type: :development
49
- version_requirements: *id003
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
50
70
  description: Counting Bloom Filter implemented in Ruby
51
- email:
71
+ email:
52
72
  - ilya@igvita.com
53
73
  executables: []
54
-
55
- extensions:
74
+ extensions:
56
75
  - ext/cbloomfilter/extconf.rb
57
76
  extra_rdoc_files: []
58
-
59
- files:
60
- - .gitignore
61
- - .rspec
77
+ files:
78
+ - ".gitignore"
79
+ - ".rspec"
62
80
  - Gemfile
63
- - Gemfile.lock
64
81
  - README.md
65
82
  - Rakefile
66
83
  - benchmark/redis-bm.rb
@@ -74,6 +91,7 @@ files:
74
91
  - ext/cbloomfilter/crc32.h
75
92
  - ext/cbloomfilter/extconf.rb
76
93
  - lib/bloomfilter-rb.rb
94
+ - lib/bloomfilter.rb
77
95
  - lib/bloomfilter/counting_redis.rb
78
96
  - lib/bloomfilter/filter.rb
79
97
  - lib/bloomfilter/native.rb
@@ -83,35 +101,29 @@ files:
83
101
  - spec/helper.rb
84
102
  - spec/native_spec.rb
85
103
  - spec/redis_spec.rb
86
- has_rdoc: true
87
- homepage: http://github.com/igrigorik/bloomfilter
104
+ homepage: http://github.com/igrigorik/bloomfilter-rb
88
105
  licenses: []
89
-
90
- post_install_message:
106
+ metadata: {}
107
+ post_install_message:
91
108
  rdoc_options: []
92
-
93
- require_paths:
109
+ require_paths:
94
110
  - lib
95
- required_ruby_version: !ruby/object:Gem::Requirement
96
- none: false
97
- requirements:
111
+ required_ruby_version: !ruby/object:Gem::Requirement
112
+ requirements:
98
113
  - - ">="
99
- - !ruby/object:Gem::Version
100
- version: "0"
101
- required_rubygems_version: !ruby/object:Gem::Requirement
102
- none: false
103
- requirements:
114
+ - !ruby/object:Gem::Version
115
+ version: '0'
116
+ required_rubygems_version: !ruby/object:Gem::Requirement
117
+ requirements:
104
118
  - - ">="
105
- - !ruby/object:Gem::Version
106
- version: "0"
119
+ - !ruby/object:Gem::Version
120
+ version: '0'
107
121
  requirements: []
108
-
109
- rubyforge_project: bloomfilter-rb
110
- rubygems_version: 1.6.2
111
- signing_key:
112
- specification_version: 3
122
+ rubygems_version: 3.0.3
123
+ signing_key:
124
+ specification_version: 4
113
125
  summary: Counting Bloom Filter implemented in Ruby
114
- test_files:
126
+ test_files:
115
127
  - spec/counting_redis_spec.rb
116
128
  - spec/helper.rb
117
129
  - spec/native_spec.rb
data/Gemfile.lock DELETED
@@ -1,28 +0,0 @@
1
- PATH
2
- remote: .
3
- specs:
4
- bloomfilter-rb (2.0.0)
5
- redis
6
-
7
- GEM
8
- remote: http://rubygems.org/
9
- specs:
10
- diff-lcs (1.1.2)
11
- rake (0.8.7)
12
- redis (2.2.0)
13
- rspec (2.5.0)
14
- rspec-core (~> 2.5.0)
15
- rspec-expectations (~> 2.5.0)
16
- rspec-mocks (~> 2.5.0)
17
- rspec-core (2.5.1)
18
- rspec-expectations (2.5.0)
19
- diff-lcs (~> 1.1.2)
20
- rspec-mocks (2.5.0)
21
-
22
- PLATFORMS
23
- ruby
24
-
25
- DEPENDENCIES
26
- bloomfilter-rb!
27
- rake
28
- rspec