bloom_fit 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5f1a3e06592409a17a287562a4f51910e4c103cc7e7a95fa18b051e684ec2f72
4
- data.tar.gz: 85dab6561d4626f1ece379cbdd63678befac1e19d8f3cc403a270a13da5ca049
3
+ metadata.gz: efa22c92049e3607485a8fcfe471b15cca6e85e6da0c7b19b65f74b9f6ad5fe9
4
+ data.tar.gz: 5e8432456b1258111671d536165217bc3e82e0e430c3bc63112abc4670f91e78
5
5
  SHA512:
6
- metadata.gz: 2553c5c3ce8bff634d2d2f79bc58d9d05ee96426e681c3bdb3295c762fc01c9b2de0fe5aa97cc6755037a7c8a80cc4d367c31b6457e8b087489d36797ad1a598
7
- data.tar.gz: 803c578af7494501775e52bb4db1ea46b5311c7280d20aaa632185d486dcd35ce60390a48d3ca4af2cb6adec4b9cbcc7aa7c922fcc5d9e58b0d7cdeb60a12fa7
6
+ metadata.gz: 72738a57ccb3a1a8989e86993490c3ba6a4f90925c834c1acd70ba104df8ef2bb318d5c66830786ba662e88df09f9ce46d7184810e3d4ec1c6b4cc0b41fcec44
7
+ data.tar.gz: 7472e370d1a66a6034ecb2f0d4720b9edd12f21e181a37cae2869e0e34c70a829366e7ee6caf880f4c4d8c789bc18bdbe2f83e6699617ccc77460320f2a2a1af
data/README.md CHANGED
@@ -4,7 +4,12 @@
4
4
  [![CI](https://github.com/rmm5t/bloom_fit/actions/workflows/ci.yml/badge.svg)](https://github.com/rmm5t/bloom_fit/actions/workflows/ci.yml)
5
5
  [![Gem Downloads](https://img.shields.io/gem/dt/bloom_fit.svg)](https://rubygems.org/gems/bloom_fit)
6
6
 
7
- BloomFit provides a MRI/C-based non-counting bloom filter for use in your Ruby projects. It is heavily based on [bloomfilter-rb]'s native implementation, but provides a better hashing distribution by using DJB2 over CRC32, avoids the need to supply a seed, removes counting abilities, improves performance for very large datasets, and will automatically calculate the bit size (m) and the number of hashes (k) when given a capacity and false-positive-rate.
7
+ BloomFit provides a MRI/C-based non-counting bloom filter for use in your Ruby projects. It is heavily based on [bloomfilter-rb]'s native implementation, but differs in the following ways:
8
+
9
+ - uses DJB2 over CRC32 yielding better hash distribution
10
+ - improves performance for very large datasets
11
+ - avoids the need to supply a seed
12
+ - automatically calculates the bit size (m) and the number of hashes (k) when given a capacity and false-positive-rate
8
13
 
9
14
  A [Bloom filter](http://en.wikipedia.org/wiki/Bloom_filter) is a space-efficient probabilistic data structure that is used to test whether an element is a member of a set. False positives are possible, but false negatives are not. Instead of using k different hash functions, this implementation a DJB2 hash with k seeds from the CRC table.
10
15
 
@@ -13,8 +18,6 @@ Performance of the Bloom filter depends on the following:
13
18
  - size of the bit array
14
19
  - number of hash functions
15
20
 
16
- BloomFit is a fork of [bloomfilter-rb].
17
-
18
21
  ## Resources
19
22
 
20
23
  - Background: [Bloom filter](http://en.wikipedia.org/wiki/Bloom_filter)
@@ -25,7 +28,7 @@ BloomFit is a fork of [bloomfilter-rb].
25
28
 
26
29
  MRI/C implementation which creates an in-memory filter which can be saved and reloaded from disk.
27
30
 
28
- (COMING SOON) If you'd like to specify an expected item count and a false-positive rate that you can tolerate:
31
+ (COMING SOON) If you'd like to specify an expected item count and a false-positive rate that you can tolerate. Visit the [Bloom Filter Calculator](https://hur.st/bloomfilter/) to learn more.
29
32
 
30
33
  ```ruby
31
34
  require "bloom_fit"
@@ -40,11 +43,11 @@ bf["bird"] = "bar"
40
43
  bf["bird"] # => true
41
44
  bf["mouse"] # => false
42
45
 
43
- bf.stats
44
- # => Number of filter bits (m): 3600
45
- # => Number of set bits (n): 20
46
- # => Number of filter hashes (k) : 10
47
- # => Predicted false positive rate = 0.00%
46
+ puts bf.stats
47
+ # Number of filter bits (m): 3600
48
+ # Number of set bits (n): 20
49
+ # Number of filter hashes (k) : 10
50
+ # Predicted false positive rate = 0.00%
48
51
  ```
49
52
 
50
53
  If you'd like more control over the traditional inputs like bit size and the number of hashes:
@@ -62,11 +65,11 @@ bf["bird"] = "bar"
62
65
  bf["bird"] # => true
63
66
  bf["mouse"] # => false
64
67
 
65
- bf.stats
66
- # => Number of filter bits (m): 100
67
- # => Number of set bits (n): 4
68
- # => Number of filter hashes (k) : 2
69
- # => Predicted false positive rate = 10.87%
68
+ puts bf.stats
69
+ # Number of filter bits (m): 100
70
+ # Number of set bits (n): 4
71
+ # Number of filter hashes (k) : 2
72
+ # Predicted false positive rate = 10.87%
70
73
  ```
71
74
 
72
75
  ## Credits
@@ -11,110 +11,99 @@
11
11
  # define RSTRING_PTR(x) (RSTRING(x)->ptr)
12
12
  #endif
13
13
 
14
- /* Reuse the standard CRC table for consistent seeds */
15
- static unsigned int *seeds = crc_table;
14
+ /* Reuse the standard CRC table for consistent salts */
15
+ static unsigned int *salts = crc_table;
16
16
 
17
17
  static VALUE cBloomFilter;
18
18
 
19
19
  struct BloomFilter {
20
- int m; /* # of buckets in a bloom filter */
21
- int b; /* # of bits in a bloom filter bucket */
20
+ int m; /* # of bits in a bloom filter */
22
21
  int k; /* # of hash functions */
23
- int r; /* # raise on bucket overflow? */
24
22
  unsigned char *ptr; /* bits data */
25
23
  int bytes; /* size of byte data */
26
24
  };
27
25
 
28
- unsigned long djb2(unsigned char *str, int len) {
26
+ unsigned long djb2(const char *str, int len) {
29
27
  unsigned long hash = 5381;
30
- unsigned char *c;
31
- c = (unsigned char *) str;
32
- while (len > 0) {
33
- hash = ((hash << 5) ^ hash) ^ (*c);
34
- --len;
35
- ++c;
28
+ for (int i = 0; i < len; i++) {
29
+ hash = ((hash << 5) + hash) + str[i];
36
30
  }
37
31
  return hash;
38
32
  }
39
33
 
40
- void bits_free(struct BloomFilter *bf) {
34
+ static void bf_free(void *ptr) {
35
+ struct BloomFilter *bf = ptr;
36
+
37
+ if (bf == NULL) {
38
+ return;
39
+ }
40
+
41
41
  ruby_xfree(bf->ptr);
42
+ ruby_xfree(bf);
42
43
  }
43
44
 
44
- void bucket_unset(struct BloomFilter *bf, int index) {
45
- int byte_offset = (index * bf->b) / 8;
46
- int bit_offset = (index * bf->b) % 8;
47
- unsigned int c = bf->ptr[byte_offset];
48
- c += bf->ptr[byte_offset + 1] << 8;
49
- unsigned int mask = ((1 << bf->b) - 1) << bit_offset;
50
- if ((c & mask) == 0) {
51
- // do nothing
52
- } else {
53
- // reduce the counter: 11 00 => 10 00 (suppose bf->b is 2)
54
- c -= (1 << bit_offset) & ((1 << 8) -1);
55
- // shift the bitmap right by 1 bit: 10 00 => 01 00
56
- c = (~mask & c) | ((c & mask) >> (bit_offset + 1) << bit_offset);
57
-
58
- bf->ptr[byte_offset] = c & ((1 << 8) - 1);
59
- bf->ptr[byte_offset + 1] = (c & ((1 << 16) - 1)) >> 8;
45
+ static size_t bf_memsize(const void *ptr) {
46
+ const struct BloomFilter *bf = ptr;
47
+
48
+ if (bf == NULL) {
49
+ return 0;
60
50
  }
51
+
52
+ return sizeof(*bf) + (bf->ptr == NULL ? 0 : (size_t) bf->bytes);
61
53
  }
62
54
 
63
- void bucket_set(struct BloomFilter *bf, int index) {
64
- int byte_offset = (index * bf->b) / 8;
65
- int bit_offset = (index * bf->b) % 8;
66
- unsigned int c = bf->ptr[byte_offset];
67
- c += bf->ptr[byte_offset + 1] << 8;
68
- unsigned int mask = ((1 << bf->b) - 1) << bit_offset;
69
- if ((c & mask) == mask) {
70
- if (bf->r == 1) rb_raise(rb_eRuntimeError, "bucket got filled up");
71
- } else {
72
- c = c + ((1 << bit_offset) & ((1 << 8) -1)) | c;
73
- bf->ptr[byte_offset] = c & ((1 << 8) - 1);
74
- bf->ptr[byte_offset + 1] = (c & ((1 << 16) - 1)) >> 8;
75
- }
55
+ static const rb_data_type_t bf_type = {
56
+ "CBloomFilter",
57
+ {0, bf_free, bf_memsize,},
58
+ 0, 0, RUBY_TYPED_FREE_IMMEDIATELY,
59
+ };
60
+
61
+ static struct BloomFilter *bf_ptr(VALUE obj) {
62
+ struct BloomFilter *bf;
63
+
64
+ TypedData_Get_Struct(obj, struct BloomFilter, &bf_type, bf);
65
+
66
+ return bf;
67
+ }
68
+
69
+ static VALUE bf_alloc(VALUE klass) {
70
+ struct BloomFilter *bf;
71
+ VALUE obj = TypedData_Make_Struct(klass, struct BloomFilter, &bf_type, bf);
72
+
73
+ bf->m = 0;
74
+ bf->k = 0;
75
+ bf->ptr = NULL;
76
+ bf->bytes = 0;
77
+
78
+ return obj;
76
79
  }
77
80
 
78
- int bucket_check(struct BloomFilter *bf, int index) {
79
- int byte_offset = (index * bf->b) / 8;
80
- int bit_offset = (index * bf->b) % 8;
81
- unsigned int c = bf->ptr[byte_offset];
82
- c += bf->ptr[byte_offset + 1] << 8;
81
+ static void bucket_set(struct BloomFilter *bf, int index) {
82
+ int byte_offset = index / 8;
83
+ int bit_offset = index % 8;
83
84
 
84
- unsigned int mask = ((1 << bf->b) - 1) << bit_offset;
85
- return (c & mask) >> bit_offset;
85
+ bf->ptr[byte_offset] |= (unsigned char) (1U << bit_offset);
86
86
  }
87
87
 
88
- int bucket_get(struct BloomFilter *bf, int index) {
89
- int byte_offset = (index * bf->b) / 8;
90
- int bit_offset = (index * bf->b) % 8;
91
- unsigned int c = bf->ptr[byte_offset];
92
- c += bf->ptr[byte_offset + 1] << 8;
88
+ static int bucket_check(struct BloomFilter *bf, int index) {
89
+ int byte_offset = index / 8;
90
+ int bit_offset = index % 8;
93
91
 
94
- unsigned int mask = ((1 << bf->b) - 1) << bit_offset;
95
- return (c & mask) >> bit_offset;
92
+ return (bf->ptr[byte_offset] >> bit_offset) & 1;
96
93
  }
97
94
 
98
- static VALUE bf_s_new(int argc, VALUE *argv, VALUE self) {
95
+ static VALUE bf_initialize(int argc, VALUE *argv, VALUE self) {
99
96
  struct BloomFilter *bf;
100
- VALUE arg1, arg2, arg3, arg4, obj;
101
- int m, k, b, r;
97
+ VALUE arg1, arg2;
98
+ int m, k;
102
99
 
103
- obj = Data_Make_Struct(self, struct BloomFilter, NULL, bits_free, bf);
100
+ bf = bf_ptr(self);
104
101
 
105
102
  /* default = Fugou approach :-) */
106
- arg1 = INT2FIX(100000000);
103
+ arg1 = INT2FIX(1000);
107
104
  arg2 = INT2FIX(4);
108
- arg3 = INT2FIX(1);
109
- arg4 = INT2FIX(0);
110
105
 
111
106
  switch (argc) {
112
- case 4:
113
- if (argv[3] == Qtrue) {
114
- arg4 = INT2FIX(1);
115
- }
116
- case 3:
117
- arg3 = argv[2];
118
107
  case 2:
119
108
  arg2 = argv[1];
120
109
  case 1:
@@ -124,66 +113,48 @@ static VALUE bf_s_new(int argc, VALUE *argv, VALUE self) {
124
113
 
125
114
  m = FIX2INT(arg1);
126
115
  k = FIX2INT(arg2);
127
- b = FIX2INT(arg3);
128
- r = FIX2INT(arg4);
129
116
 
130
- if (b < 1 || b > 8)
131
- rb_raise(rb_eArgError, "bucket size");
132
117
  if (m < 1)
133
118
  rb_raise(rb_eArgError, "array size");
134
119
  if (k < 1)
135
120
  rb_raise(rb_eArgError, "hash length");
136
121
 
137
- bf->b = b;
138
122
  bf->m = m;
139
123
  bf->k = k;
140
- bf->r = r;
141
124
 
142
- bf->bytes = ((m * b) + 15) / 8;
125
+ ruby_xfree(bf->ptr);
126
+ bf->ptr = NULL;
127
+ bf->bytes = 0;
128
+ /* Preserve the existing serialized bitmap length, including one padding byte. */
129
+ bf->bytes = (m + 15) / 8;
143
130
  bf->ptr = ALLOC_N(unsigned char, bf->bytes);
144
131
 
145
132
  /* initialize the bits with zeros */
146
133
  memset(bf->ptr, 0, bf->bytes);
147
- rb_iv_set(obj, "@hash_value", rb_hash_new());
134
+ rb_iv_set(self, "@hash_value", rb_hash_new());
148
135
 
149
- return obj;
136
+ return self;
150
137
  }
151
138
 
152
139
  static VALUE bf_clear(VALUE self) {
153
- struct BloomFilter *bf;
154
- Data_Get_Struct(self, struct BloomFilter, bf);
140
+ struct BloomFilter *bf = bf_ptr(self);
155
141
  memset(bf->ptr, 0, bf->bytes);
156
142
  return Qtrue;
157
143
  }
158
144
 
159
145
  static VALUE bf_m(VALUE self) {
160
- struct BloomFilter *bf;
161
- Data_Get_Struct(self, struct BloomFilter, bf);
146
+ struct BloomFilter *bf = bf_ptr(self);
162
147
  return INT2FIX(bf->m);
163
148
  }
164
149
 
165
150
  static VALUE bf_k(VALUE self) {
166
- struct BloomFilter *bf;
167
- Data_Get_Struct(self, struct BloomFilter, bf);
151
+ struct BloomFilter *bf = bf_ptr(self);
168
152
  return INT2FIX(bf->k);
169
153
  }
170
154
 
171
- static VALUE bf_b(VALUE self) {
172
- struct BloomFilter *bf;
173
- Data_Get_Struct(self, struct BloomFilter, bf);
174
- return INT2FIX(bf->b);
175
- }
176
-
177
- static VALUE bf_r(VALUE self) {
178
- struct BloomFilter *bf;
179
- Data_Get_Struct(self, struct BloomFilter, bf);
180
- return bf->r == 0 ? Qfalse : Qtrue;
181
- }
182
-
183
155
  static VALUE bf_set_bits(VALUE self){
184
- struct BloomFilter *bf;
156
+ struct BloomFilter *bf = bf_ptr(self);
185
157
  int i,j,count = 0;
186
- Data_Get_Struct(self, struct BloomFilter, bf);
187
158
  for (i = 0; i < bf->bytes; i++) {
188
159
  for (j = 0; j < 8; j++) {
189
160
  count += (bf->ptr[i] >> j) & 1;
@@ -192,13 +163,13 @@ static VALUE bf_set_bits(VALUE self){
192
163
  return INT2FIX(count);
193
164
  }
194
165
 
195
- static VALUE bf_insert(VALUE self, VALUE key) {
166
+ static VALUE bf_add(VALUE self, VALUE key) {
196
167
  VALUE skey;
197
- unsigned long hash, index;
168
+ unsigned long hash;
169
+ int index;
198
170
  int i, len, m, k;
199
171
  char *ckey;
200
- struct BloomFilter *bf;
201
- Data_Get_Struct(self, struct BloomFilter, bf);
172
+ struct BloomFilter *bf = bf_ptr(self);
202
173
 
203
174
  skey = rb_obj_as_string(key);
204
175
  ckey = StringValuePtr(skey);
@@ -209,7 +180,7 @@ static VALUE bf_insert(VALUE self, VALUE key) {
209
180
 
210
181
  hash = (unsigned long) djb2(ckey, len);
211
182
  for (i = 0; i <= k - 1; i++) {
212
- index = (unsigned long) (hash ^ seeds[i]) % (unsigned int) (m);
183
+ index = (int) ((hash ^ salts[i]) % (unsigned int) (m));
213
184
 
214
185
  /* set a bit at the index */
215
186
  bucket_set(bf, index);
@@ -219,10 +190,9 @@ static VALUE bf_insert(VALUE self, VALUE key) {
219
190
  }
220
191
 
221
192
  static VALUE bf_merge(VALUE self, VALUE other) {
222
- struct BloomFilter *bf, *target;
193
+ struct BloomFilter *bf = bf_ptr(self);
194
+ struct BloomFilter *target = bf_ptr(other);
223
195
  int i;
224
- Data_Get_Struct(self, struct BloomFilter, bf);
225
- Data_Get_Struct(other, struct BloomFilter, target);
226
196
  for (i = 0; i < bf->bytes; i++) {
227
197
  bf->ptr[i] |= target->ptr[i];
228
198
  }
@@ -230,19 +200,17 @@ static VALUE bf_merge(VALUE self, VALUE other) {
230
200
  }
231
201
 
232
202
  static VALUE bf_and(VALUE self, VALUE other) {
233
- struct BloomFilter *bf, *bf_other, *target;
203
+ struct BloomFilter *bf = bf_ptr(self);
204
+ struct BloomFilter *bf_other = bf_ptr(other);
205
+ struct BloomFilter *target;
234
206
  VALUE klass, obj, args[5];
235
207
  int i;
236
208
 
237
- Data_Get_Struct(self, struct BloomFilter, bf);
238
- Data_Get_Struct(other, struct BloomFilter, bf_other);
239
209
  args[0] = INT2FIX(bf->m);
240
210
  args[1] = INT2FIX(bf->k);
241
- args[2] = INT2FIX(bf->b);
242
- args[3] = INT2FIX(bf->r);
243
211
  klass = rb_funcall(self,rb_intern("class"),0);
244
- obj = bf_s_new(4,args,klass);
245
- Data_Get_Struct(obj, struct BloomFilter, target);
212
+ obj = rb_class_new_instance(2, args, klass);
213
+ target = bf_ptr(obj);
246
214
  for (i = 0; i < bf->bytes; i++){
247
215
  target->ptr[i] = bf->ptr[i] & bf_other->ptr[i];
248
216
  }
@@ -251,19 +219,17 @@ static VALUE bf_and(VALUE self, VALUE other) {
251
219
  }
252
220
 
253
221
  static VALUE bf_or(VALUE self, VALUE other) {
254
- struct BloomFilter *bf, *bf_other, *target;
222
+ struct BloomFilter *bf = bf_ptr(self);
223
+ struct BloomFilter *bf_other = bf_ptr(other);
224
+ struct BloomFilter *target;
255
225
  VALUE klass, obj, args[5];
256
226
  int i;
257
227
 
258
- Data_Get_Struct(self, struct BloomFilter, bf);
259
- Data_Get_Struct(other, struct BloomFilter, bf_other);
260
228
  args[0] = INT2FIX(bf->m);
261
229
  args[1] = INT2FIX(bf->k);
262
- args[2] = INT2FIX(bf->b);
263
- args[3] = INT2FIX(bf->r);
264
230
  klass = rb_funcall(self,rb_intern("class"),0);
265
- obj = bf_s_new(4,args,klass);
266
- Data_Get_Struct(obj, struct BloomFilter, target);
231
+ obj = rb_class_new_instance(2, args, klass);
232
+ target = bf_ptr(obj);
267
233
  for (i = 0; i < bf->bytes; i++){
268
234
  target->ptr[i] = bf->ptr[i] | bf_other->ptr[i];
269
235
  }
@@ -271,13 +237,13 @@ static VALUE bf_or(VALUE self, VALUE other) {
271
237
  return obj;
272
238
  }
273
239
 
274
- static VALUE bf_delete(VALUE self, VALUE key) {
275
- unsigned long hash, index;
240
+ static VALUE bf_include(VALUE self, VALUE key) {
241
+ VALUE skey;
242
+ unsigned long hash;
243
+ int index;
276
244
  int i, len, m, k;
277
245
  char *ckey;
278
- VALUE skey;
279
- struct BloomFilter *bf;
280
- Data_Get_Struct(self, struct BloomFilter, bf);
246
+ struct BloomFilter *bf = bf_ptr(self);
281
247
 
282
248
  skey = rb_obj_as_string(key);
283
249
  ckey = StringValuePtr(skey);
@@ -288,69 +254,19 @@ static VALUE bf_delete(VALUE self, VALUE key) {
288
254
 
289
255
  hash = (unsigned long) djb2(ckey, len);
290
256
  for (i = 0; i <= k - 1; i++) {
291
- index = (unsigned long) (hash ^ seeds[i]) % (unsigned int) (m);
292
-
293
- /* set a bit at the index */
294
- bucket_unset(bf, index);
295
- }
296
-
297
- return Qnil;
298
- }
299
-
300
-
301
- static VALUE bf_include(int argc, VALUE* argv, VALUE self) {
302
- unsigned long hash, index;
303
- int i, len, m, k, tests_idx, vlen;
304
- char *ckey;
305
- VALUE tests, key, skey;
306
- struct BloomFilter *bf;
307
-
308
- rb_scan_args(argc, argv, "*", &tests);
309
-
310
- Data_Get_Struct(self, struct BloomFilter, bf);
311
- vlen = RARRAY_LEN(tests);
312
- for(tests_idx = 0; tests_idx < vlen; tests_idx++) {
313
- key = rb_ary_entry(tests, tests_idx);
314
- skey = rb_obj_as_string(key);
315
- ckey = StringValuePtr(skey);
316
- len = (int) (RSTRING_LEN(skey)); /* length of the string in bytes */
257
+ index = (int) ((hash ^ salts[i]) % (unsigned int) (m));
317
258
 
318
- m = bf->m;
319
- k = bf->k;
320
-
321
- hash = (unsigned long) djb2(ckey, len);
322
- for (i = 0; i <= k - 1; i++) {
323
- index = (unsigned long) (hash ^ seeds[i]) % (unsigned int) (m);
324
-
325
- /* check the bit at the index */
326
- if (!bucket_check(bf, index)) {
327
- return Qfalse; /* i.e., it is a new entry ; escape the loop */
328
- }
329
- }
330
-
331
- return Qtrue;
259
+ /* check the bit at the index */
260
+ if (!bucket_check(bf, index)) {
261
+ return Qfalse; /* i.e., it is a new entry ; escape the loop */
262
+ }
332
263
  }
333
- }
334
-
335
- static VALUE bf_to_s(VALUE self) {
336
- struct BloomFilter *bf;
337
- unsigned char *ptr;
338
- int i;
339
- VALUE str;
340
264
 
341
- Data_Get_Struct(self, struct BloomFilter, bf);
342
- str = rb_str_new(0, bf->m);
343
-
344
- ptr = (unsigned char *) RSTRING_PTR(str);
345
- for (i = 0; i < bf->m; i++)
346
- *ptr++ = bucket_get(bf, i) ? '1' : '0';
347
-
348
- return str;
265
+ return Qtrue;
349
266
  }
350
267
 
351
268
  static VALUE bf_bitmap(VALUE self) {
352
- struct BloomFilter *bf;
353
- Data_Get_Struct(self, struct BloomFilter, bf);
269
+ struct BloomFilter *bf = bf_ptr(self);
354
270
 
355
271
  VALUE str = rb_str_new(0, bf->bytes);
356
272
  unsigned char* ptr = (unsigned char *) RSTRING_PTR(str);
@@ -361,8 +277,7 @@ static VALUE bf_bitmap(VALUE self) {
361
277
  }
362
278
 
363
279
  static VALUE bf_load(VALUE self, VALUE bitmap) {
364
- struct BloomFilter *bf;
365
- Data_Get_Struct(self, struct BloomFilter, bf);
280
+ struct BloomFilter *bf = bf_ptr(self);
366
281
  unsigned char* ptr = (unsigned char *) RSTRING_PTR(bitmap);
367
282
 
368
283
  memcpy(bf->ptr, ptr, bf->bytes);
@@ -372,26 +287,21 @@ static VALUE bf_load(VALUE self, VALUE bitmap) {
372
287
 
373
288
  void Init_cbloomfilter(void) {
374
289
  cBloomFilter = rb_define_class("CBloomFilter", rb_cObject);
375
- rb_define_singleton_method(cBloomFilter, "new", bf_s_new, -1);
290
+ rb_define_alloc_func(cBloomFilter, bf_alloc);
291
+ rb_define_method(cBloomFilter, "initialize", bf_initialize, -1);
376
292
  rb_define_method(cBloomFilter, "m", bf_m, 0);
377
293
  rb_define_method(cBloomFilter, "k", bf_k, 0);
378
- rb_define_method(cBloomFilter, "b", bf_b, 0);
379
- rb_define_method(cBloomFilter, "r", bf_r, 0);
380
294
  rb_define_method(cBloomFilter, "set_bits", bf_set_bits, 0);
381
- /* rb_define_method(cBloomFilter, "s", bf_s, 0); */
382
- rb_define_method(cBloomFilter, "insert", bf_insert, 1);
383
- rb_define_method(cBloomFilter, "delete", bf_delete, 1);
384
- rb_define_method(cBloomFilter, "include?", bf_include, -1);
295
+ rb_define_method(cBloomFilter, "add", bf_add, 1);
296
+ rb_define_method(cBloomFilter, "include?", bf_include, 1);
385
297
  rb_define_method(cBloomFilter, "clear", bf_clear, 0);
386
- rb_define_method(cBloomFilter, "merge!", bf_merge, 1);
298
+ rb_define_method(cBloomFilter, "merge", bf_merge, 1);
387
299
  rb_define_method(cBloomFilter, "&", bf_and, 1);
388
300
  rb_define_method(cBloomFilter, "|", bf_or, 1);
389
301
 
390
- rb_define_method(cBloomFilter, "to_s", bf_to_s, 0);
391
302
  rb_define_method(cBloomFilter, "bitmap", bf_bitmap, 0);
392
303
  rb_define_method(cBloomFilter, "load", bf_load, 1);
393
304
 
394
305
  /* functions that have not been implemented, yet */
395
-
396
306
  // rb_define_method(cBloomFilter, "<=>", bf_cmp, 1);
397
307
  }
@@ -1,4 +1,3 @@
1
- #!/usr/bin/env ruby
2
1
  require "mkmf"
3
2
 
4
3
  create_makefile("cbloomfilter")
@@ -0,0 +1,4 @@
1
+ class BloomFit
2
+ class ConfigurationMismatch < ArgumentError
3
+ end
4
+ end
@@ -1,3 +1,3 @@
1
1
  class BloomFit
2
- VERSION = "0.1.1".freeze
2
+ VERSION = "0.3.0".freeze
3
3
  end