bloomfilter-rb 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.o
2
+ *.bundle
3
+ *.swp
4
+ ext/Makefile
data/.rspec ADDED
File without changes
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source "http://rubygems.org"
2
+
3
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,29 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ bloomfilter-rb (2.0.0)
5
+ redis (>= 2.1.1)
6
+
7
+ GEM
8
+ remote: http://rubygems.org/
9
+ specs:
10
+ diff-lcs (1.1.2)
11
+ rake (0.8.7)
12
+ redis (2.1.1)
13
+ rspec (2.3.0)
14
+ rspec-core (~> 2.3.0)
15
+ rspec-expectations (~> 2.3.0)
16
+ rspec-mocks (~> 2.3.0)
17
+ rspec-core (2.3.1)
18
+ rspec-expectations (2.3.0)
19
+ diff-lcs (~> 1.1.2)
20
+ rspec-mocks (2.3.0)
21
+
22
+ PLATFORMS
23
+ ruby
24
+
25
+ DEPENDENCIES
26
+ bloomfilter-rb!
27
+ rake
28
+ redis (>= 2.1.1)
29
+ rspec
data/README.md ADDED
@@ -0,0 +1,87 @@
1
+ # BloomFilter(s) in Ruby
2
+
3
+ - Native (MRI/C) counting bloom filter
4
+ - Redis-backed getbit/setbit non-counting bloom filter
5
+ - Redis-backed set-based counting (+TTL) bloom filter
6
+
7
+ Bloom filter is a space-efficient probabilistic data structure that is used to test whether an element is a member of a set. False positives are possible, but false negatives are not. For more detail, check the [wikipedia article](http://en.wikipedia.org/wiki/Bloom_filter). Instead of using k different hash functions, this implementation seeds the CRC32 hash with k different initial values (0, 1, ..., k-1). This may or may not give you a good distribution, it all depends on the data.
8
+
9
+ Performance of the Bloom filter depends on a number of variables:
10
+
11
+ - size of the bit array
12
+ - size of the counter bucket
13
+ - number of hash functions
14
+
15
+ ## Resources
16
+
17
+ - Determining parameters: [Scalable Datasets: Bloom Filters in Ruby](http://www.igvita.com/2008/12/27/scalable-datasets-bloom-filters-in-ruby/)
18
+ - Applications & reasons behind bloom filter: [Flow analysis: Time based bloom filter](http://www.igvita.com/2010/01/06/flow-analysis-time-based-bloom-filters/)
19
+
20
+ ***
21
+
22
+ ## MRI/C API Example
23
+
24
+ MRI/C implementation which creates an in-memory filter which can be saved and reloaded from disk.
25
+
26
+ require 'bloomfilter'
27
+
28
+ bf = BloomFilter::Native.new(:size => 100, :hashes => 2, :seed => 1, :bucket => 3, :raise => false)
29
+ bf.insert("test")
30
+ bf.include?("test") # => true
31
+ bf.include?("blah") # => false
32
+
33
+ bf.delete("test")
34
+ bf.include?("test") # => false
35
+
36
+ # Hash with a bloom filter!
37
+ bf["test2"] = "bar"
38
+ bf["test2"] # => true
39
+ bf["test3"] # => false
40
+
41
+ bf.stats
42
+ Number of filter bits (m): 10
43
+ Number of filter elements (n): 2
44
+ Number of filter hashes (k) : 2
45
+ Predicted false positive rate = 10.87%
46
+
47
+ ***
48
+
49
+ ## Redis-backed setbit/getbit bloom filter
50
+
51
+ Uses [getbit](http://redis.io/commands/getbit)/[setbit](http://redis.io/commands/setbit) on Redis strings - efficient, fast, can be shared by multiple/concurrent processes.
52
+
53
+ bf = BloomFilter::Redis.new
54
+
55
+ bf.insert('test')
56
+ bf.include?('test') # => true
57
+ bf.include?('blah') # => false
58
+
59
+ bf.delete('test')
60
+ bf.include?('test') # => false
61
+
62
+ ### Memory footprint
63
+
64
+ - 1.0% error rate for 1M items, 10 bits/item: *2.5 mb*
65
+ - 1.0% error rate for 150M items, 10 bits per item: *358.52 mb*
66
+ - 0.1% error rate for 150M items, 15 bits per item: *537.33 mb*
67
+
68
+ ***
69
+
70
+ ## Redis-backed counting bloom filter with TTL's
71
+ Uses regular Redis get/set counters to implement a counting filter with optional TTL expiry. Because each "bit" requires its own key in Redis, you do incur a much larger memory overhead.
72
+
73
+ bf = BloomFilter::CountingRedis.new(:ttl => 2)
74
+
75
+ bf.insert('test')
76
+ bf.include?('test') # => true
77
+
78
+ sleep(2)
79
+ bf.include?('test') # => false
80
+
81
+ ## Credits
82
+
83
+ Tatsuya Mori <valdzone@gmail.com> (Original C implementation: http://vald.x0.com/sb/)
84
+
85
+ ## License
86
+
87
+ (MIT License) - Copyright (c) 2011 Ilya Grigorik
data/Rakefile ADDED
@@ -0,0 +1,9 @@
1
+ require 'rake'
2
+ require 'rspec'
3
+ require 'rspec/core/rake_task'
4
+ require 'rake/extensiontask'
5
+ require 'bundler'
6
+
7
+ Bundler::GemHelper.install_tasks
8
+ RSpec::Core::RakeTask.new(:spec)
9
+ Rake::ExtensionTask.new('cbloomfilter')
@@ -0,0 +1,26 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "bloomfilter/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "bloomfilter-rb"
7
+ s.version = BloomFilter::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Ilya Grigorik", "Tatsuya Mori"]
10
+ s.email = ["ilya@igvita.com"]
11
+ s.homepage = "http://github.com/igrigorik/bloomfilter"
12
+ s.summary = "Counting Bloom Filter implemented in Ruby"
13
+ s.description = s.summary
14
+ s.rubyforge_project = "bloomfilter-rb"
15
+
16
+ s.add_dependency "redis", ">= 2.1.1"
17
+ s.add_development_dependency "rspec"
18
+ s.add_development_dependency "rake"
19
+
20
+ s.extensions = ["ext/cbloomfilter/extconf.rb"]
21
+
22
+ s.files = `git ls-files`.split("\n")
23
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
24
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
25
+ s.require_paths = ["lib"]
26
+ end
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env ruby
2
+ require 'bloomfilter'
3
+
4
+ bf = BloomFilter::CountingRedis.new(:ttl => 2, :server => {:host => 'localhost'})
5
+
6
+ bf.insert('test')
7
+ puts bf.include?('test')
8
+
9
+ sleep(3)
10
+ puts bf.include?('test')
11
+
12
+ puts bf.stats
@@ -0,0 +1,52 @@
1
+ #
2
+ # Pure ruby implementation of a Bloom filter, just for kicks
3
+ #
4
+
5
+ require 'bitset'
6
+ require 'zlib'
7
+
8
+ class BloomFilter
9
+
10
+ def initialize(max_entries, num_hashes, seed)
11
+ @num_hashes = num_hashes
12
+ @size = max_entries.to_i
13
+ @bitmap = BitSet.new(@size)
14
+ @__mask = BitSet.new(@size)
15
+ @seed = seed
16
+ end
17
+
18
+ def insert(key)
19
+ mask = make_mask(key)
20
+ @bitmap |= mask
21
+ end
22
+
23
+ def new?(key)
24
+ mask = make_mask(key)
25
+ return ((@bitmap & mask) != mask);
26
+ end
27
+
28
+ def make_mask(key)
29
+ @__mask.clear
30
+ 0.upto(@num_hashes.to_i - 1) do |i|
31
+ hash = Zlib.crc32(key, i + @seed)
32
+ @__mask.set(hash % @size, 1)
33
+ end
34
+ return @__mask
35
+ end
36
+ end
37
+
38
+ def main
39
+ bf = BloomFilter.new(1000000, 4, 0)
40
+ num = 0
41
+ while line = ARGF.gets
42
+ data = line.chop
43
+
44
+ if bf.new_entry?(data)
45
+ num += 1
46
+ bf.insert(data)
47
+ end
48
+ end
49
+ print "#element = #{num}\n"
50
+ end
51
+
52
+ main
@@ -0,0 +1,25 @@
1
+ #!/usr/bin/env ruby
2
+ require 'bloomfilter-rb'
3
+
4
+ WORDS = %w(duck penguin bear panda)
5
+ TEST = %w(penguin moose racooon)
6
+
7
+ bf = BloomFilter::Native.new(:size => 100, :hashes => 2, :seed => 1, :bucket => 3, :raise => false)
8
+
9
+ WORDS.each { |w| bf.insert(w) }
10
+ TEST.each do |w|
11
+ puts "#{w}: #{bf.include?(w)}"
12
+ end
13
+
14
+ bf.stats
15
+
16
+ # penguin: true
17
+ # moose: false
18
+ # racooon: false
19
+ #
20
+ # Number of filter buckets (m): 100
21
+ # Number of bits per buckets (b): 1
22
+ # Number of filter elements (n): 4
23
+ # Number of filter hashes (k) : 4
24
+ # Raise on overflow? (r) : false
25
+ # Predicted false positive rate = 0.05%
@@ -0,0 +1,31 @@
1
+ require 'set'
2
+ require 'lib/bloomfilter-rb'
3
+
4
+ items = 1_000_00
5
+ bits = 1
6
+
7
+ # p BloomFilter::Redis.new(:size => items*bits, :hashes => 7) # 2.5 mb
8
+ # p BloomFilter::Redis.new(:size => items*bits*5, :hashes => 7) # 13 mb
9
+ # p BloomFilter::Redis.new(:size => items*bits*30, :hashes => 7) # 73 mb
10
+
11
+ # 1% error rate for 5M items/day, 10 bits per item, for 30 days of data: 358.52 mb
12
+ # 0.1% error rate for 5M items/day, 15 bits per item, for 30 days of data: 537.33 mb
13
+
14
+ bf = BloomFilter::Redis.new(:size => items*bits, :hashes => 7) # 2.5 mb
15
+
16
+ seen = Set.new
17
+ err = 0
18
+ num = 100000
19
+
20
+ num.times do
21
+ item = rand(items)
22
+
23
+ if bf.include?(item) != seen.include?(item)
24
+ err += 1
25
+ end
26
+
27
+ seen << item
28
+ bf.insert(item)
29
+ end
30
+
31
+ p [:error_rate, (err.to_f / num) * 100]
@@ -0,0 +1,359 @@
1
+ /*
2
+ * cbloomfilter.c - simple Bloom Filter
3
+ * (c) Tatsuya Mori <valdzone@gmail.com>
4
+ */
5
+
6
+ #include "ruby.h"
7
+ #include "crc32.h"
8
+
9
+ #if !defined(RSTRING_LEN)
10
+ # define RSTRING_LEN(x) (RSTRING(x)->len)
11
+ # define RSTRING_PTR(x) (RSTRING(x)->ptr)
12
+ #endif
13
+
14
+ static VALUE cBloomFilter;
15
+
16
+ struct BloomFilter {
17
+ int m; /* # of buckets in a bloom filter */
18
+ int b; /* # of bits in a bloom filter bucket */
19
+ int k; /* # of hash functions */
20
+ int s; /* # seed of hash functions */
21
+ int r; /* # raise on bucket overflow? */
22
+ int num_set; /* # of set bits */
23
+ unsigned char *ptr; /* bits data */
24
+ int bytes; /* size of byte data */
25
+ };
26
+
27
+ void bits_free(struct BloomFilter *bf) {
28
+ ruby_xfree(bf->ptr);
29
+ }
30
+
31
+ void bucket_unset(struct BloomFilter *bf, int index) {
32
+ int byte_offset = (index * bf->b) / 8;
33
+ int bit_offset = (index * bf->b) % 8;
34
+ unsigned int c = bf->ptr[byte_offset];
35
+ c += bf->ptr[byte_offset + 1] << 8;
36
+ unsigned int mask = ((1 << bf->b) - 1) << bit_offset;
37
+ if ((c & mask) == 0) {
38
+ // do nothing
39
+ } else {
40
+ bf->ptr[byte_offset] -= (1 << bit_offset) & ((1 << 8) - 1);
41
+ bf->ptr[byte_offset + 1] -= ((1 << bit_offset) & ((1 << 16) - 1)) >> 8;
42
+ }
43
+
44
+ }
45
+
46
+ void bucket_set(struct BloomFilter *bf, int index) {
47
+ int byte_offset = (index * bf->b) / 8;
48
+ int bit_offset = (index * bf->b) % 8;
49
+ unsigned int c = bf->ptr[byte_offset];
50
+ c += bf->ptr[byte_offset + 1] << 8;
51
+ unsigned int mask = ((1 << bf->b) - 1) << bit_offset;
52
+ if ((c & mask) == mask) {
53
+ if (bf->r == 1) rb_raise(rb_eRuntimeError, "bucket got filled up");
54
+ } else {
55
+ bf->ptr[byte_offset] += (1 << bit_offset) & ((1 << 8) - 1);
56
+ bf->ptr[byte_offset + 1] += ((1 << bit_offset) & ((1 << 16) - 1)) >> 8;
57
+ }
58
+ }
59
+
60
+ int bucket_check(struct BloomFilter *bf, int index) {
61
+ int byte_offset = (index * bf->b) / 8;
62
+ int bit_offset = (index * bf->b) % 8;
63
+ unsigned int c = bf->ptr[byte_offset];
64
+ c += bf->ptr[byte_offset + 1] << 8;
65
+
66
+ unsigned int mask = ((1 << bf->b) - 1) << bit_offset;
67
+ return (c & mask) >> bit_offset;
68
+ }
69
+
70
+ int bucket_get(struct BloomFilter *bf, int index) {
71
+ int byte_offset = (index * bf->b) / 8;
72
+ int bit_offset = (index * bf->b) % 8;
73
+ unsigned int c = bf->ptr[byte_offset];
74
+ c += bf->ptr[byte_offset + 1] << 8;
75
+
76
+ unsigned int mask = ((1 << bf->b) - 1) << bit_offset;
77
+ return (c & mask) >> bit_offset;
78
+ }
79
+
80
+ static VALUE bf_s_new(int argc, VALUE *argv, VALUE self) {
81
+ struct BloomFilter *bf;
82
+ VALUE arg1, arg2, arg3, arg4, arg5, obj;
83
+ int m, k, s, b, r, bytes;
84
+
85
+ obj = Data_Make_Struct(self, struct BloomFilter, NULL, bits_free, bf);
86
+
87
+ /* default = Fugou approach :-) */
88
+ arg1 = INT2FIX(100000000);
89
+ arg2 = INT2FIX(4);
90
+ arg3 = INT2FIX(0);
91
+ arg4 = INT2FIX(1);
92
+ arg5 = INT2FIX(0);
93
+
94
+ switch (argc) {
95
+ case 5:
96
+ if (argv[4] == Qtrue) {
97
+ arg5 = INT2FIX(1);
98
+ }
99
+ case 4:
100
+ arg4 = argv[3];
101
+ case 3:
102
+ arg3 = argv[2];
103
+ case 2:
104
+ arg2 = argv[1];
105
+ case 1:
106
+ arg1 = argv[0];
107
+ break;
108
+ }
109
+
110
+ m = FIX2INT(arg1);
111
+ k = FIX2INT(arg2);
112
+ s = FIX2INT(arg3);
113
+ b = FIX2INT(arg4);
114
+ r = FIX2INT(arg5);
115
+
116
+ if (b < 1 || b > 8)
117
+ rb_raise(rb_eArgError, "bucket size");
118
+ if (m < 1)
119
+ rb_raise(rb_eArgError, "array size");
120
+ if (k < 1)
121
+ rb_raise(rb_eArgError, "hash length");
122
+ if (s < 0)
123
+ rb_raise(rb_eArgError, "random seed");
124
+
125
+ bf->b = b;
126
+ bf->m = m;
127
+ bf->k = k;
128
+ bf->s = s;
129
+ bf->r = r;
130
+ bf->num_set = 0;
131
+
132
+ bf->bytes = ((m * b) + 15) / 8;
133
+ bf->ptr = ALLOC_N(unsigned char, bf->bytes);
134
+
135
+ /* initialize the bits with zeros */
136
+ memset(bf->ptr, 0, bf->bytes);
137
+ rb_iv_set(obj, "@hash_value", rb_hash_new());
138
+
139
+ return obj;
140
+ }
141
+
142
+ static VALUE bf_clear(VALUE self) {
143
+ struct BloomFilter *bf;
144
+ Data_Get_Struct(self, struct BloomFilter, bf);
145
+ memset(bf->ptr, 0, bf->bytes);
146
+ return Qtrue;
147
+ }
148
+
149
+ static VALUE bf_m(VALUE self) {
150
+ struct BloomFilter *bf;
151
+ Data_Get_Struct(self, struct BloomFilter, bf);
152
+ return INT2FIX(bf->m);
153
+ }
154
+
155
+ static VALUE bf_k(VALUE self) {
156
+ struct BloomFilter *bf;
157
+ Data_Get_Struct(self, struct BloomFilter, bf);
158
+ return INT2FIX(bf->k);
159
+ }
160
+
161
+ static VALUE bf_b(VALUE self) {
162
+ struct BloomFilter *bf;
163
+ Data_Get_Struct(self, struct BloomFilter, bf);
164
+ return INT2FIX(bf->b);
165
+ }
166
+
167
+ static VALUE bf_r(VALUE self) {
168
+ struct BloomFilter *bf;
169
+ Data_Get_Struct(self, struct BloomFilter, bf);
170
+ return bf->r == 0 ? Qfalse : Qtrue;
171
+ }
172
+
173
+ static VALUE bf_num_set(VALUE self) {
174
+ struct BloomFilter *bf;
175
+ Data_Get_Struct(self, struct BloomFilter, bf);
176
+ return INT2FIX(bf->num_set);
177
+ }
178
+
179
+ static VALUE bf_insert(VALUE self, VALUE key) {
180
+ VALUE skey;
181
+ int index, seed;
182
+ int i, len, m, k, s;
183
+ char *ckey;
184
+ struct BloomFilter *bf;
185
+ Data_Get_Struct(self, struct BloomFilter, bf);
186
+
187
+ skey = rb_obj_as_string(key);
188
+ ckey = StringValuePtr(skey);
189
+ len = (int) (RSTRING_LEN(skey)); /* length of the string in bytes */
190
+
191
+ m = bf->m;
192
+ k = bf->k;
193
+ s = bf->s;
194
+
195
+ for (i = 0; i <= k - 1; i++) {
196
+ /* seeds for hash functions */
197
+ seed = i + s;
198
+
199
+ /* hash */
200
+ index = (int) (crc32((unsigned int) (seed), ckey, len) % (unsigned int) (m));
201
+
202
+ /* set a bit at the index */
203
+ bucket_set(bf, index);
204
+ }
205
+
206
+ bf->num_set += 1;
207
+ return Qnil;
208
+ }
209
+
210
+ static VALUE bf_merge(VALUE self, VALUE other) {
211
+ struct BloomFilter *bf, *target;
212
+ Data_Get_Struct(self, struct BloomFilter, bf);
213
+ Data_Get_Struct(other, struct BloomFilter, target);
214
+ int i;
215
+ for (i = 0; i < bf->bytes; i++) {
216
+ bf->ptr[i] |= target->ptr[i];
217
+ }
218
+ return Qnil;
219
+ }
220
+
221
+ static VALUE bf_delete(VALUE self, VALUE key) {
222
+ int index, seed;
223
+ int i, len, m, k, s;
224
+ char *ckey;
225
+ VALUE skey;
226
+ struct BloomFilter *bf;
227
+ Data_Get_Struct(self, struct BloomFilter, bf);
228
+
229
+ skey = rb_obj_as_string(key);
230
+ ckey = StringValuePtr(skey);
231
+ len = (int) (RSTRING_LEN(skey)); /* length of the string in bytes */
232
+
233
+ m = bf->m;
234
+ k = bf->k;
235
+ s = bf->s;
236
+
237
+ for (i = 0; i <= k - 1; i++) {
238
+ /* seeds for hash functions */
239
+ seed = i + s;
240
+
241
+ /* hash */
242
+ index = (int) (crc32((unsigned int) (seed), ckey, len) % (unsigned int) (m));
243
+
244
+ /* set a bit at the index */
245
+ bucket_unset(bf, index);
246
+ }
247
+
248
+ bf->num_set += 1;
249
+ return Qnil;
250
+ }
251
+
252
+
253
+ static VALUE bf_include(int argc, VALUE* argv, VALUE self) {
254
+ int index, seed;
255
+ int i, len, m, k, s, tests_idx, vlen;
256
+ char *ckey;
257
+ VALUE tests, key, skey;
258
+ struct BloomFilter *bf;
259
+
260
+ rb_scan_args(argc, argv, "*", &tests);
261
+
262
+ Data_Get_Struct(self, struct BloomFilter, bf);
263
+ vlen = RARRAY_LEN(tests);
264
+ for(tests_idx = 0; tests_idx < vlen; tests_idx++) {
265
+ key = rb_ary_entry(tests, tests_idx);
266
+ skey = rb_obj_as_string(key);
267
+ ckey = StringValuePtr(skey);
268
+ len = (int) (RSTRING_LEN(skey)); /* length of the string in bytes */
269
+
270
+ m = bf->m;
271
+ k = bf->k;
272
+ s = bf->s;
273
+
274
+ for (i = 0; i <= k - 1; i++) {
275
+ /* seeds for hash functions */
276
+ seed = i + s;
277
+
278
+ /* hash */
279
+ index = (int) (crc32((unsigned int) (seed), ckey, len) % (unsigned int) (m));
280
+
281
+ /* check the bit at the index */
282
+ if (!bucket_check(bf, index)) {
283
+ return Qfalse; /* i.e., it is a new entry ; escape the loop */
284
+ }
285
+ }
286
+
287
+ return Qtrue;
288
+ }
289
+
290
+ }
291
+
292
+ static VALUE bf_to_s(VALUE self) {
293
+ struct BloomFilter *bf;
294
+ unsigned char *ptr;
295
+ int i;
296
+ VALUE str;
297
+
298
+ Data_Get_Struct(self, struct BloomFilter, bf);
299
+ str = rb_str_new(0, bf->m);
300
+
301
+ ptr = (unsigned char *) RSTRING_PTR(str);
302
+ for (i = 0; i < bf->m; i++)
303
+ *ptr++ = bucket_get(bf, i) ? '1' : '0';
304
+
305
+ return str;
306
+ }
307
+
308
+ static VALUE bf_bitmap(VALUE self) {
309
+ struct BloomFilter *bf;
310
+ Data_Get_Struct(self, struct BloomFilter, bf);
311
+
312
+ VALUE str = rb_str_new(0, bf->m);
313
+ unsigned char* ptr = (unsigned char *) RSTRING_PTR(str);
314
+
315
+ int i;
316
+ for (i = 0; i < bf->m; i++)
317
+ *ptr++ = bucket_get(bf, i);
318
+
319
+ return str;
320
+ }
321
+
322
+ static VALUE bf_load(VALUE self, VALUE bitmap) {
323
+ struct BloomFilter *bf;
324
+ Data_Get_Struct(self, struct BloomFilter, bf);
325
+ unsigned char* ptr = (unsigned char *) RSTRING_PTR(bitmap);
326
+
327
+ int i;
328
+ for (i = 0; i < bf->m; i++) {
329
+ if (*ptr++)
330
+ bucket_set(bf, i);
331
+ }
332
+
333
+ return Qnil;
334
+ }
335
+
336
+ void Init_cbloomfilter(void) {
337
+ cBloomFilter = rb_define_class("CBloomFilter", rb_cObject);
338
+ rb_define_singleton_method(cBloomFilter, "new", bf_s_new, -1);
339
+ rb_define_method(cBloomFilter, "m", bf_m, 0);
340
+ rb_define_method(cBloomFilter, "k", bf_k, 0);
341
+ rb_define_method(cBloomFilter, "b", bf_b, 0);
342
+ rb_define_method(cBloomFilter, "r", bf_r, 0);
343
+ rb_define_method(cBloomFilter, "num_set", bf_num_set, 0);
344
+ rb_define_method(cBloomFilter, "insert", bf_insert, 1);
345
+ rb_define_method(cBloomFilter, "delete", bf_delete, 1);
346
+ rb_define_method(cBloomFilter, "include?", bf_include, -1);
347
+ rb_define_method(cBloomFilter, "clear", bf_clear, 0);
348
+ rb_define_method(cBloomFilter, "merge!", bf_merge, 1);
349
+
350
+ rb_define_method(cBloomFilter, "to_s", bf_to_s, 0);
351
+ rb_define_method(cBloomFilter, "bitmap", bf_bitmap, 0);
352
+ rb_define_method(cBloomFilter, "load", bf_load, 1);
353
+
354
+ /* functions that have not been implemented, yet */
355
+
356
+ // rb_define_method(cBloomFilter, "&", bf_and, 1);
357
+ // rb_define_method(cBloomFilter, "|", bf_or, 1);
358
+ // rb_define_method(cBloomFilter, "<=>", bf_cmp, 1);
359
+ }
@@ -0,0 +1,32 @@
1
+ /* simple CRC32 code */
2
+ /*
3
+ * Copyright 2005 Aris Adamantiadis
4
+ *
5
+ * This file is part of the SSH Library
6
+ *
7
+ * The SSH Library is free software; you can redistribute it and/or modify
8
+ * it under the terms of the GNU Lesser General Public License as published by
9
+ * the Free Software Foundation; either version 2.1 of the License, or (at your
10
+ * option) any later version.
11
+ *
12
+ *
13
+ * The SSH Library is distributed in the hope that it will be useful, but
14
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16
+ * License for more details.
17
+ *
18
+ * You should have received a copy of the GNU Lesser General Public License
19
+ * along with the SSH Library; see the file COPYING. If not, write to
20
+ * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21
+ * MA 02111-1307, USA. */
22
+
23
+ #include "crc32.h"
24
+
25
+ unsigned int crc32(unsigned int crc, char *buf, int len) {
26
+ while (len > 0) {
27
+ crc = crc_table[(crc ^ *buf) & 0xff] ^ (crc >> 8);
28
+ --len;
29
+ ++buf;
30
+ }
31
+ return crc;
32
+ }
@@ -0,0 +1,78 @@
1
+ /* simple CRC32 code */
2
+ /*
3
+ * Copyright 2005 Aris Adamantiadis
4
+ *
5
+ * This file is part of the SSH Library
6
+ *
7
+ * The SSH Library is free software; you can redistribute it and/or modify
8
+ * it under the terms of the GNU Lesser General Public License as published by
9
+ * the Free Software Foundation; either version 2.1 of the License, or (at your
10
+ * option) any later version.
11
+ *
12
+ *
13
+ * The SSH Library is distributed in the hope that it will be useful, but
14
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16
+ * License for more details.
17
+ *
18
+ * You should have received a copy of the GNU Lesser General Public License
19
+ * along with the SSH Library; see the file COPYING. If not, write to
20
+ * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21
+ * MA 02111-1307, USA. */
22
+
23
+ static unsigned int crc_table[] = {
24
+ 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
25
+ 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
26
+ 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
27
+ 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
28
+ 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
29
+ 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
30
+ 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
31
+ 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
32
+ 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
33
+ 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
34
+ 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
35
+ 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
36
+ 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
37
+ 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
38
+ 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
39
+ 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
40
+ 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
41
+ 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
42
+ 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
43
+ 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
44
+ 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
45
+ 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
46
+ 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
47
+ 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
48
+ 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
49
+ 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
50
+ 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
51
+ 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
52
+ 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
53
+ 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
54
+ 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
55
+ 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
56
+ 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
57
+ 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
58
+ 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
59
+ 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
60
+ 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
61
+ 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
62
+ 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
63
+ 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
64
+ 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
65
+ 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
66
+ 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
67
+ 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
68
+ 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
69
+ 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
70
+ 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
71
+ 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
72
+ 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
73
+ 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
74
+ 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
75
+ 0x2d02ef8dUL
76
+ };
77
+
78
+ unsigned int crc32(unsigned int crc, char *buf, int len);
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ require "mkmf"
3
+
4
+ create_makefile("cbloomfilter")
@@ -0,0 +1,9 @@
1
+ require 'redis'
2
+ require 'zlib'
3
+
4
+ require 'cbloomfilter'
5
+ require 'bloomfilter/filter'
6
+ require 'bloomfilter/native'
7
+ require 'bloomfilter/counting_redis'
8
+ require 'bloomfilter/redis'
9
+ require 'bloomfilter/version'
@@ -0,0 +1,61 @@
1
+ module BloomFilter
2
+ class CountingRedis < Filter
3
+
4
+ def initialize(opts = {})
5
+ @opts = {
6
+ :size => 100,
7
+ :hashes => 4,
8
+ :seed => Time.now.to_i,
9
+ :bucket => 3,
10
+ :ttl => false,
11
+ :server => {}
12
+ }.merge opts
13
+ @db = ::Redis.new(@opts[:server])
14
+ end
15
+
16
+ def insert(key, ttl=nil)
17
+ ttl = @opts[:ttl] if ttl.nil?
18
+
19
+ indexes_for(key).each do |idx|
20
+ @db.incr idx
21
+ @db.expire(idx, ttl) if ttl
22
+ end
23
+ end
24
+ alias :[]= :insert
25
+
26
+ def delete(key)
27
+ indexes_for(key).each do |idx|
28
+ if @db.decr(idx).to_i <= 0
29
+ @db.del(idx)
30
+ end
31
+ end
32
+ end
33
+
34
+ def include?(*keys)
35
+ indexes = keys.collect { |key| indexes_for(key) }
36
+ not @db.mget(*indexes.flatten).include? nil
37
+ end
38
+ alias :key? :include?
39
+
40
+ def num_set
41
+ @db.keys("rbloom:*").size
42
+ end
43
+ alias :size :num_set
44
+
45
+ def clear
46
+ @db.flushdb
47
+ end
48
+
49
+ private
50
+
51
+ # compute index offsets for provided key
52
+ def indexes_for(key)
53
+ indexes = []
54
+ @opts[:hashes].times do |i|
55
+ indexes.push "rbloom:" + (Zlib.crc32("#{key}:#{i+@opts[:seed]}") % @opts[:size]).to_s
56
+ end
57
+
58
+ indexes
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,13 @@
1
+ module BloomFilter
2
+ class Filter
3
+ def stats
4
+ fp = ((1.0 - Math.exp(-(@opts[:hashes] * size).to_f / @opts[:size])) ** @opts[:hashes]) * 100
5
+ printf "Number of filter buckets (m): %d\n" % @opts[:size]
6
+ printf "Number of bits per buckets (b): %d\n" % @opts[:bucket]
7
+ printf "Number of filter elements (n): %d\n" % size
8
+ printf "Number of filter hashes (k) : %d\n" % @opts[:hashes]
9
+ printf "Raise on overflow? (r) : %s\n" % @opts[:raise].to_s
10
+ printf "Predicted false positive rate = %.2f%\n" % fp
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,65 @@
1
+ module BloomFilter
2
+ class Native < Filter
3
+ attr_reader :bf
4
+
5
+ def initialize(opts = {})
6
+ @opts = {
7
+ :size => 100,
8
+ :hashes => 4,
9
+ :seed => Time.now.to_i,
10
+ :bucket => 3,
11
+ :raise => false
12
+ }.merge(opts)
13
+
14
+ # arg 1: m => size : number of buckets in a bloom filter
15
+ # arg 2: k => hashes : number of hash functions
16
+ # arg 3: s => seed : seed of hash functions
17
+ # arg 4: b => bucket : number of bits in a bloom filter bucket
18
+ # arg 5: r => raise : raise on bucket overflow?
19
+
20
+ @bf = CBloomFilter.new(@opts[:size], @opts[:hashes], @opts[:seed], @opts[:bucket], @opts[:raise])
21
+ end
22
+
23
+ def insert(key)
24
+ @bf.insert(key)
25
+ end
26
+ alias :[]= :insert
27
+
28
+ def include?(*keys)
29
+ @bf.include?(*keys)
30
+ end
31
+ alias :key? :include?
32
+ alias :[] :include?
33
+
34
+ def delete(key); @bf.delete(key); end
35
+ def clear; @bf.clear; end
36
+ def size; @bf.num_set; end
37
+ def merge!(o); @bf.merge!(o.bf); end
38
+
39
+ def bitmap
40
+ @bf.bitmap
41
+ end
42
+
43
+ def marshal_load(ary)
44
+ opts, bitmap = *ary
45
+
46
+ @bf = Native.new(opts)
47
+ @bf.bf.load(bitmap) if !bitmap.nil?
48
+ end
49
+
50
+ def marshal_dump
51
+ [@opts, @bf.bitmap]
52
+ end
53
+
54
+ def self.load(filename)
55
+ Marshal.load(File.open(filename, 'r'))
56
+ end
57
+
58
+ def save(filename)
59
+ File.open(filename, 'w') do |f|
60
+ f << Marshal.dump(self)
61
+ end
62
+ end
63
+
64
+ end
65
+ end
@@ -0,0 +1,69 @@
1
+ module BloomFilter
2
+ class Redis < Filter
3
+
4
+ def initialize(opts = {})
5
+ @opts = {
6
+ :size => 100,
7
+ :hashes => 4,
8
+ :seed => Time.now.to_i,
9
+ :namespace => 'redis',
10
+ :eager => true,
11
+ :server => {}
12
+ }.merge opts
13
+ @db = ::Redis.new(@opts[:server])
14
+
15
+ if @opts[:eager]
16
+ # allocate the memory immediately
17
+ @db.setbit @opts[:namespace], @opts[:size], 1
18
+ @db.setbit @opts[:namespace], @opts[:size], 0
19
+ end
20
+ end
21
+
22
+ def insert(key, ttl=nil)
23
+ indexes_for(key) { |idx| @db.setbit @opts[:namespace], idx, 1 }
24
+ end
25
+ alias :[]= :insert
26
+
27
+ def include?(*keys)
28
+ keys.each do |key|
29
+ indexes_for(key) do |idx|
30
+ return false if @db.getbit(@opts[:namespace], idx).zero?
31
+ end
32
+ end
33
+
34
+ true
35
+ end
36
+ alias :key? :include?
37
+
38
+ def delete(key)
39
+ indexes_for(key) do |idx|
40
+ @db.setbit @opts[:namespace], idx, 0
41
+ end
42
+ end
43
+
44
+ def clear
45
+ @db.set @opts[:namespace], 0
46
+ end
47
+
48
+ def num_set
49
+ @db.strlen @opts[:namespace]
50
+ end
51
+ alias :size :num_set
52
+
53
+ def stats
54
+ printf "Number of filter buckets (m): %d\n" % @opts[:size]
55
+ printf "Number of filter hashes (k) : %d\n" % @opts[:hashes]
56
+ end
57
+
58
+ private
59
+
60
+ # compute index offsets for provided key
61
+ def indexes_for(key)
62
+ indexes = []
63
+ @opts[:hashes].times do |i|
64
+ yield Zlib.crc32("#{key}:#{i+@opts[:seed]}") % @opts[:size]
65
+ end
66
+ end
67
+
68
+ end
69
+ end
@@ -0,0 +1,3 @@
1
+ module BloomFilter
2
+ VERSION = "2.0.0"
3
+ end
@@ -0,0 +1,52 @@
1
+ require 'helper'
2
+
3
+ describe BloomFilter::CountingRedis do
4
+ include BloomFilter
5
+
6
+ context "use Redis for storage" do
7
+ it "should store data in Redis" do
8
+ bf = CountingRedis.new
9
+
10
+ bf.insert(:abcd)
11
+ bf.insert('test')
12
+ bf.include?('test').should be_true
13
+ bf.key?('test').should be_true
14
+
15
+ bf.include?('test', 'test2').should be_false
16
+ bf.include?('test', 'abcd').should be_true
17
+ end
18
+
19
+ it "should accept a TTL value for a key" do
20
+ bf = CountingRedis.new(:ttl => 1)
21
+
22
+ bf.insert('test')
23
+ bf.include?('test').should be_true
24
+
25
+ sleep(2)
26
+ bf.include?('test').should be_false
27
+ end
28
+
29
+ it "should delete keys from Redis" do
30
+ bf = CountingRedis.new
31
+
32
+ bf.insert('test')
33
+ bf.include?('test').should be_true
34
+
35
+ bf.delete('test')
36
+ bf.include?('test').should be_false
37
+ end
38
+
39
+ it "should output current stats" do
40
+ bf = CountingRedis.new
41
+ bf.clear
42
+
43
+ bf.insert('test')
44
+ bf.size.should == 4
45
+ lambda { bf.stats }.should_not raise_error
46
+ end
47
+
48
+ it "should connect to remote redis server" do
49
+ lambda { CountingRedis.new }.should_not raise_error
50
+ end
51
+ end
52
+ end
data/spec/helper.rb ADDED
@@ -0,0 +1,2 @@
1
+ require 'bundler/setup'
2
+ require 'bloomfilter-rb'
@@ -0,0 +1,79 @@
1
+ require 'helper'
2
+
3
+ describe BloomFilter::Native do
4
+ include BloomFilter
5
+
6
+ it "should clear" do
7
+ bf = Native.new(:size => 100, :hashes => 2, :seed => 1, :bucket => 3, :raise => false)
8
+ bf.insert("test")
9
+ bf.include?("test").should be_true
10
+ bf.clear
11
+ bf.include?("test").should be_false
12
+ end
13
+
14
+ it "should merge" do
15
+ bf1 = Native.new(:size => 100, :hashes => 2, :seed => 1, :bucket => 3, :raise => false)
16
+ bf2 = Native.new(:size => 100, :hashes => 2, :seed => 1, :bucket => 3, :raise => false)
17
+ bf2.insert("test")
18
+ bf1.include?("test").should be_false
19
+ bf1.merge!(bf2)
20
+ bf1.include?("test").should be_true
21
+ bf2.include?("test").should be_true
22
+ end
23
+
24
+ context "behave like a bloomfilter" do
25
+ it "should test set memerbship" do
26
+ bf = Native.new(:size => 100, :hashes => 2, :seed => 1, :bucket => 3, :raise => false)
27
+ bf.insert("test")
28
+ bf.insert("test1")
29
+
30
+ bf.include?("test").should be_true
31
+ bf.include?("abcd").should be_false
32
+ bf.include?("test", "test1").should be_true
33
+ end
34
+
35
+ it "should work with any object's to_s" do
36
+ bf = Native.new
37
+ bf.insert(:test)
38
+ bf.insert(:test1)
39
+ bf.insert(12345)
40
+
41
+ bf.include?("test").should be_true
42
+ bf.include?("abcd").should be_false
43
+ bf.include?("test", "test1", '12345').should be_true
44
+ end
45
+ end
46
+
47
+ context "behave like counting bloom filter" do
48
+ it "should delete / decrement keys" do
49
+ bf = Native.new
50
+
51
+ bf.insert("test")
52
+ bf.include?("test").should be_true
53
+
54
+ bf.delete("test")
55
+ bf.include?("test").should be_false
56
+ end
57
+ end
58
+
59
+ context "serialize" do
60
+ after(:each) { File.unlink('bf.out') }
61
+
62
+ it "should marshall the bloomfilter" do
63
+ bf = Native.new
64
+ lambda { bf.save('bf.out') }.should_not raise_error
65
+ end
66
+
67
+ it "should load marshalled bloomfilter" do
68
+ bf = Native.new
69
+ bf.insert('foo')
70
+ bf.insert('bar')
71
+ bf.save('bf.out')
72
+
73
+ bf = Native.load('bf.out')
74
+ bf.include?('foo').should be_true
75
+ bf.include?('bar').should be_true
76
+ bf.include?('baz').should be_false
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,54 @@
1
+ require 'helper'
2
+
3
+ describe BloomFilter::Redis do
4
+ include BloomFilter
5
+
6
+ context "use Redis bitstring for storage" do
7
+ let(:bf) { Redis.new }
8
+
9
+ it "should store data in Redis" do
10
+ bf.insert(:abcd)
11
+ bf.insert('test')
12
+ bf.include?('test').should be_true
13
+ bf.key?('test').should be_true
14
+
15
+ bf.include?('test', 'test2').should be_false
16
+ bf.include?('test', 'abcd').should be_true
17
+ end
18
+
19
+ it "should delete keys from Redis" do
20
+ bf.insert('test')
21
+ bf.include?('test').should be_true
22
+
23
+ bf.delete('test')
24
+ bf.include?('test').should be_false
25
+ end
26
+
27
+ it "should clear Redis filter" do
28
+ bf.insert('test')
29
+ bf.include?('test').should be_true
30
+
31
+ bf.clear
32
+ bf.include?('test').should be_false
33
+ end
34
+
35
+ it "should output current stats" do
36
+ bf.clear
37
+ bf.insert('test')
38
+ lambda { bf.stats }.should_not raise_error
39
+ end
40
+
41
+ it "should connect to remote redis server" do
42
+ lambda { Redis.new }.should_not raise_error
43
+ end
44
+
45
+ it "should allow namespaced BloomFilters" do
46
+ bf1 = Redis.new(:namespace => :a)
47
+ bf2 = Redis.new(:namespace => :b)
48
+
49
+ bf1.insert('test')
50
+ bf1.include?('test').should be_true
51
+ bf2.include?('test').should be_false
52
+ end
53
+ end
54
+ end
metadata ADDED
@@ -0,0 +1,133 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bloomfilter-rb
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 2
7
+ - 0
8
+ - 0
9
+ version: 2.0.0
10
+ platform: ruby
11
+ authors:
12
+ - Ilya Grigorik
13
+ - Tatsuya Mori
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-01-05 00:00:00 -05:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: redis
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ segments:
30
+ - 2
31
+ - 1
32
+ - 1
33
+ version: 2.1.1
34
+ type: :runtime
35
+ version_requirements: *id001
36
+ - !ruby/object:Gem::Dependency
37
+ name: rspec
38
+ prerelease: false
39
+ requirement: &id002 !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ segments:
45
+ - 0
46
+ version: "0"
47
+ type: :development
48
+ version_requirements: *id002
49
+ - !ruby/object:Gem::Dependency
50
+ name: rake
51
+ prerelease: false
52
+ requirement: &id003 !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ segments:
58
+ - 0
59
+ version: "0"
60
+ type: :development
61
+ version_requirements: *id003
62
+ description: Counting Bloom Filter implemented in Ruby
63
+ email:
64
+ - ilya@igvita.com
65
+ executables: []
66
+
67
+ extensions:
68
+ - ext/cbloomfilter/extconf.rb
69
+ extra_rdoc_files: []
70
+
71
+ files:
72
+ - .gitignore
73
+ - .rspec
74
+ - Gemfile
75
+ - Gemfile.lock
76
+ - README.md
77
+ - Rakefile
78
+ - bloomfilter-rb.gemspec
79
+ - examples/counting-redis.rb
80
+ - examples/pure-ruby-bf.rb
81
+ - examples/simple-native.rb
82
+ - examples/simple-redis.rb
83
+ - ext/cbloomfilter/cbloomfilter.c
84
+ - ext/cbloomfilter/crc32.c
85
+ - ext/cbloomfilter/crc32.h
86
+ - ext/cbloomfilter/extconf.rb
87
+ - lib/bloomfilter-rb.rb
88
+ - lib/bloomfilter/counting_redis.rb
89
+ - lib/bloomfilter/filter.rb
90
+ - lib/bloomfilter/native.rb
91
+ - lib/bloomfilter/redis.rb
92
+ - lib/bloomfilter/version.rb
93
+ - spec/counting_redis_spec.rb
94
+ - spec/helper.rb
95
+ - spec/native_spec.rb
96
+ - spec/redis_spec.rb
97
+ has_rdoc: true
98
+ homepage: http://github.com/igrigorik/bloomfilter
99
+ licenses: []
100
+
101
+ post_install_message:
102
+ rdoc_options: []
103
+
104
+ require_paths:
105
+ - lib
106
+ required_ruby_version: !ruby/object:Gem::Requirement
107
+ none: false
108
+ requirements:
109
+ - - ">="
110
+ - !ruby/object:Gem::Version
111
+ segments:
112
+ - 0
113
+ version: "0"
114
+ required_rubygems_version: !ruby/object:Gem::Requirement
115
+ none: false
116
+ requirements:
117
+ - - ">="
118
+ - !ruby/object:Gem::Version
119
+ segments:
120
+ - 0
121
+ version: "0"
122
+ requirements: []
123
+
124
+ rubyforge_project: bloomfilter-rb
125
+ rubygems_version: 1.3.7
126
+ signing_key:
127
+ specification_version: 3
128
+ summary: Counting Bloom Filter implemented in Ruby
129
+ test_files:
130
+ - spec/counting_redis_spec.rb
131
+ - spec/helper.rb
132
+ - spec/native_spec.rb
133
+ - spec/redis_spec.rb