bloom-filter 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG ADDED
@@ -0,0 +1,3 @@
1
+ === 0.1.0 (2012-01-25)
2
+
3
+ * Initial version.
data/README.md ADDED
@@ -0,0 +1,49 @@
1
+ # BloomFilter
2
+
3
+ BloomFilter is a ruby library that implements an in-memory [Bloom Filter](http://en.wikipedia.org/wiki/Bloom_filter)
4
+
5
+ ## Dependencies
6
+
7
+ * ruby 1.9.1 or later
8
+
9
+ ## Installation
10
+
11
+ ```
12
+ gem install bloom-filter
13
+ ```
14
+
15
+ ## API
16
+
17
+ ```
18
+ BloomFilter
19
+ .new
20
+ .load
21
+ #dump
22
+ #insert
23
+ #exists?
24
+
25
+ ```
26
+
27
+ ## Usage
28
+
29
+ ```ruby
30
+ require 'bloom-filter'
31
+
32
+ filter = BloomFilter.new
33
+ filter = BloomFilter.new 100_000
34
+
35
+ filter.insert "foo"
36
+ filter.exists? "foo" #=> true
37
+ filter.exists? "bar" #=> false
38
+
39
+ filter.dump "/tmp/random.bloom"
40
+ filter = BloomFilter.load "/tmp/random.bloom", 100_000
41
+ ```
42
+
43
+ ## See Also
44
+
45
+ [https://github.com/igrigorik/bloomfilter-rb](https://github.com/igrigorik/bloomfilter-rb)
46
+
47
+ ## License
48
+
49
+ [Creative Commons Attribution - CC BY](http://creativecommons.org/licenses/by/3.0)
@@ -0,0 +1,265 @@
1
+ /*
2
+
3
+ Copyright (c) 2005-2008, Simon Howard
4
+
5
+ Permission to use, copy, modify, and/or distribute this software
6
+ for any purpose with or without fee is hereby granted, provided
7
+ that the above copyright notice and this permission notice appear
8
+ in all copies.
9
+
10
+ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11
+ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12
+ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13
+ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
14
+ CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
15
+ LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
16
+ NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
17
+ CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18
+
19
+ */
20
+
21
+ #include <stdlib.h>
22
+ #include <string.h>
23
+
24
+ #include "bloom-filter.h"
25
+
26
+ /* malloc() / free() testing */
27
+
28
+ #ifdef ALLOC_TESTING
29
+ #include "alloc-testing.h"
30
+ #endif
31
+
32
+ /* Salt values. These salts are XORed with the output of the hash
33
+ * function to give multiple unique hashes. */
34
+
35
+ static const unsigned int salts[] = {
36
+ 0x5cee4612, 0xb5587b1c, 0xa250f2b0, 0xa3bf6d2a,
37
+ 0x7a81bd1a, 0x92888d7f, 0x1dc977c7, 0xedc96624,
38
+ 0x920c85d9, 0xf16066b3, 0xc6f0d4b3, 0x2b76eb86,
39
+ 0xcacb3893, 0x493d81c5, 0xf5a133ac, 0x039740bf,
40
+ 0x162b8224, 0xf841de90, 0xc3e5090d, 0x3bce93a7,
41
+ 0xf1860334, 0xe832b5f1, 0xf5b6535b, 0xe4cf4fa6,
42
+ 0x8357b769, 0x1442b07a, 0x21c5863d, 0xabc0d846,
43
+ 0x6dc0d77a, 0x23a3992c, 0xe12179ba, 0xd81d1e23,
44
+ 0xcff4727b, 0xe957ecfb, 0xee8f391a, 0x426efa23,
45
+ 0x3a34ff2c, 0x8b875d94, 0x34fd0f63, 0xf159daae,
46
+ 0xaabab8b3, 0xa83a07ba, 0x4e54fb33, 0xfb82fab8,
47
+ 0x2ae2888f, 0xd1a307a8, 0xbe33322d, 0x87c73f86,
48
+ 0x7270fa7e, 0x68673c55, 0x2c8026d0, 0xead8e422,
49
+ 0xa3ee5132, 0xecb67767, 0x1c3b1ae5, 0x47adf5b6,
50
+ 0xf4518d30, 0x46e62797, 0x9889aa76, 0x1405aadf,
51
+ 0xf62f9124, 0x5c435ac5, 0x35b8dfe3, 0x651c08c5,
52
+ };
53
+
54
+ BloomFilter *bloom_filter_new (unsigned int table_size, BloomFilterHashFunc hash_func, unsigned int num_functions) {
55
+ BloomFilter *filter;
56
+
57
+ /* There is a limit on the number of functions which can be
58
+ * applied, due to the table size */
59
+
60
+ if (num_functions > sizeof (salts) / sizeof (*salts)) {
61
+ return NULL;
62
+ }
63
+
64
+ /* Allocate bloom filter structure */
65
+
66
+ filter = malloc (sizeof (BloomFilter));
67
+
68
+ if (filter == NULL) {
69
+ return NULL;
70
+ }
71
+
72
+ /* Allocate table, each entry is one bit; these are packed into
73
+ * bytes. When allocating we must round the length up to the nearest
74
+ * byte. */
75
+
76
+ filter->table = calloc ((table_size + 7) / 8, 1);
77
+
78
+ if (filter->table == NULL) {
79
+ free (filter);
80
+ return NULL;
81
+ }
82
+
83
+ filter->hash_func = hash_func;
84
+ filter->num_functions = num_functions;
85
+ filter->table_size = table_size;
86
+
87
+ return filter;
88
+ }
89
+
90
+ void bloom_filter_free (BloomFilter * bloomfilter) {
91
+ free (bloomfilter->table);
92
+ free (bloomfilter);
93
+ }
94
+
95
+ void bloom_filter_insert (BloomFilter * bloomfilter, BloomFilterValue value) {
96
+ unsigned long hash;
97
+ unsigned long subhash;
98
+ unsigned int index;
99
+ unsigned int i;
100
+
101
+ /* Generate hash of the value to insert */
102
+
103
+ hash = bloomfilter->hash_func (value);
104
+
105
+ /* Generate multiple unique hashes by XORing with values in the
106
+ * salt table. */
107
+
108
+ for (i = 0; i < bloomfilter->num_functions; ++i) {
109
+
110
+ /* Generate a unique hash */
111
+
112
+ subhash = hash ^ salts[i];
113
+
114
+ /* Find the index into the table */
115
+
116
+ index = subhash % bloomfilter->table_size;
117
+
118
+ /* Insert into the table.
119
+ * index / 8 finds the byte index of the table,
120
+ * index % 8 gives the bit index within that byte to set. */
121
+
122
+ bloomfilter->table[index / 8] |= 1 << (index % 8);
123
+ }
124
+ }
125
+
126
+ int bloom_filter_query (BloomFilter * bloomfilter, BloomFilterValue value) {
127
+ unsigned long hash;
128
+ unsigned long subhash;
129
+ unsigned int index;
130
+ unsigned int i;
131
+ unsigned char b;
132
+ int bit;
133
+
134
+ /* Generate hash of the value to lookup */
135
+
136
+ hash = bloomfilter->hash_func (value);
137
+
138
+ /* Generate multiple unique hashes by XORing with values in the
139
+ * salt table. */
140
+
141
+ for (i = 0; i < bloomfilter->num_functions; ++i) {
142
+
143
+ /* Generate a unique hash */
144
+
145
+ subhash = hash ^ salts[i];
146
+
147
+ /* Find the index into the table to test */
148
+
149
+ index = subhash % bloomfilter->table_size;
150
+
151
+ /* The byte at index / 8 holds the value to test */
152
+
153
+ b = bloomfilter->table[index / 8];
154
+ bit = 1 << (index % 8);
155
+
156
+ /* Test if the particular bit is set; if it is not set,
157
+ * this value can not have been inserted. */
158
+
159
+ if ((b & bit) == 0) {
160
+ return 0;
161
+ }
162
+ }
163
+
164
+ /* All necessary bits were set. This may indicate that the value
165
+ * was inserted, or the values could have been set through other
166
+ * insertions. */
167
+
168
+ return 1;
169
+ }
170
+
171
+ void bloom_filter_read (BloomFilter * bloomfilter, unsigned char *array) {
172
+ unsigned int array_size;
173
+
174
+ /* The table is an array of bits, packed into bytes. Round up
175
+ * to the nearest byte. */
176
+
177
+ array_size = (bloomfilter->table_size + 7) / 8;
178
+
179
+ /* Copy into the buffer of the calling routine. */
180
+
181
+ memcpy (array, bloomfilter->table, array_size);
182
+ }
183
+
184
+ void bloom_filter_load (BloomFilter * bloomfilter, unsigned char *array) {
185
+ unsigned int array_size;
186
+
187
+ /* The table is an array of bits, packed into bytes. Round up
188
+ * to the nearest byte. */
189
+
190
+ array_size = (bloomfilter->table_size + 7) / 8;
191
+
192
+ /* Copy from the buffer of the calling routine. */
193
+
194
+ memcpy (bloomfilter->table, array, array_size);
195
+ }
196
+
197
+ BloomFilter *bloom_filter_union (BloomFilter * filter1, BloomFilter * filter2) {
198
+ BloomFilter *result;
199
+ unsigned int i;
200
+ unsigned int array_size;
201
+
202
+ /* To perform this operation, both filters must be created with
203
+ * the same values. */
204
+
205
+ if (filter1->table_size != filter2->table_size
206
+ || filter1->num_functions != filter2->num_functions || filter1->hash_func != filter2->hash_func) {
207
+ return NULL;
208
+ }
209
+
210
+ /* Create a new bloom filter for the result */
211
+
212
+ result = bloom_filter_new (filter1->table_size, filter1->hash_func, filter1->num_functions);
213
+
214
+ if (result == NULL) {
215
+ return NULL;
216
+ }
217
+
218
+ /* The table is an array of bits, packed into bytes. Round up
219
+ * to the nearest byte. */
220
+
221
+ array_size = (filter1->table_size + 7) / 8;
222
+
223
+ /* Populate the table of the new filter */
224
+
225
+ for (i = 0; i < array_size; ++i) {
226
+ result->table[i] = filter1->table[i] | filter2->table[i];
227
+ }
228
+
229
+ return result;
230
+ }
231
+
232
+ BloomFilter *bloom_filter_intersection (BloomFilter * filter1, BloomFilter * filter2) {
233
+ BloomFilter *result;
234
+ unsigned int i;
235
+ unsigned int array_size;
236
+
237
+ /* To perform this operation, both filters must be created with
238
+ * the same values. */
239
+
240
+ if (filter1->table_size != filter2->table_size
241
+ || filter1->num_functions != filter2->num_functions || filter1->hash_func != filter2->hash_func) {
242
+ return NULL;
243
+ }
244
+
245
+ /* Create a new bloom filter for the result */
246
+
247
+ result = bloom_filter_new (filter1->table_size, filter1->hash_func, filter1->num_functions);
248
+
249
+ if (result == NULL) {
250
+ return NULL;
251
+ }
252
+
253
+ /* The table is an array of bits, packed into bytes. Round up
254
+ * to the nearest byte. */
255
+
256
+ array_size = (filter1->table_size + 7) / 8;
257
+
258
+ /* Populate the table of the new filter */
259
+
260
+ for (i = 0; i < array_size; ++i) {
261
+ result->table[i] = filter1->table[i] & filter2->table[i];
262
+ }
263
+
264
+ return result;
265
+ }
@@ -0,0 +1,193 @@
1
+ /*
2
+
3
+ Copyright (c) 2005-2008, Simon Howard
4
+
5
+ Permission to use, copy, modify, and/or distribute this software
6
+ for any purpose with or without fee is hereby granted, provided
7
+ that the above copyright notice and this permission notice appear
8
+ in all copies.
9
+
10
+ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11
+ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12
+ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13
+ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
14
+ CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
15
+ LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
16
+ NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
17
+ CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18
+
19
+ */
20
+
21
+ /**
22
+ * @file bloom-filter.h
23
+ *
24
+ * @brief Bloom filter
25
+ *
26
+ * A bloom filter is a space efficient data structure that can be
27
+ * used to test whether a given element is part of a set. Lookups
28
+ * will occasionally generate false positives, but never false
29
+ * negatives.
30
+ *
31
+ * To create a bloom filter, use @ref bloom_filter_new. To destroy a
32
+ * bloom filter, use @ref bloom_filter_free.
33
+ *
34
+ * To insert a value into a bloom filter, use @ref bloom_filter_insert.
35
+ *
36
+ * To query whether a value is part of the set, use
37
+ * @ref bloom_filter_query.
38
+ */
39
+
40
+ #ifndef ALGORITHM_BLOOM_FILTER_H
41
+ #define ALGORITHM_BLOOM_FILTER_H
42
+
43
+ #ifdef __cplusplus
44
+ extern "C" {
45
+ #endif
46
+
47
+ /**
48
+ * A value stored in a @ref BloomFilter.
49
+ */
50
+
51
+ typedef void *BloomFilterValue;
52
+
53
+ /**
54
+ * Hash function used to generate hash values for values inserted into a
55
+ * bloom filter.
56
+ *
57
+ * @param data The value to generate a hash value for.
58
+ * @return The hash value.
59
+ */
60
+
61
+ typedef unsigned long (*BloomFilterHashFunc) (BloomFilterValue data);
62
+
63
+ /**
64
+ * A bloom filter structure.
65
+ */
66
+
67
+ typedef struct BloomFilter {
68
+ BloomFilterHashFunc hash_func;
69
+ unsigned char *table;
70
+ unsigned int table_size;
71
+ unsigned int num_functions;
72
+ } BloomFilter;
73
+
74
+
75
+ /**
76
+ * Create a new bloom filter.
77
+ *
78
+ * @param table_size The size of the bloom filter. The greater
79
+ * the table size, the more elements can be
80
+ * stored, and the lesser the chance of false
81
+ * positives.
82
+ * @param hash_func Hash function to use on values stored in the
83
+ * filter.
84
+ * @param num_functions Number of hash functions to apply to each
85
+ * element on insertion. This running time for
86
+ * insertion and queries is proportional to this
87
+ * value. The more functions applied, the lesser
88
+ * the chance of false positives. The maximum
89
+ * number of functions is 64.
90
+ * @return A new hash table structure, or NULL if it
91
+ * was not possible to allocate the new bloom
92
+ * filter.
93
+ */
94
+
95
+ BloomFilter *bloom_filter_new (unsigned int table_size, BloomFilterHashFunc hash_func, unsigned int num_functions);
96
+
97
+ /**
98
+ * Destroy a bloom filter.
99
+ *
100
+ * @param bloomfilter The bloom filter to destroy.
101
+ */
102
+
103
+ void bloom_filter_free (BloomFilter * bloomfilter);
104
+
105
+ /**
106
+ * Insert a value into a bloom filter.
107
+ *
108
+ * @param bloomfilter The bloom filter.
109
+ * @param value The value to insert.
110
+ */
111
+
112
+ void bloom_filter_insert (BloomFilter * bloomfilter, BloomFilterValue value);
113
+
114
+ /**
115
+ * Query a bloom filter for a particular value.
116
+ *
117
+ * @param bloomfilter The bloom filter.
118
+ * @param value The value to look up.
119
+ * @return Zero if the value was definitely not
120
+ * inserted into the filter. Non-zero
121
+ * indicates that it either may or may not
122
+ * have been inserted.
123
+ */
124
+
125
+ int bloom_filter_query (BloomFilter * bloomfilter, BloomFilterValue value);
126
+
127
+ /**
128
+ * Read the contents of a bloom filter into an array.
129
+ *
130
+ * @param bloomfilter The bloom filter.
131
+ * @param array Pointer to the array to read into. This
132
+ * should be (table_size + 7) / 8 bytes in
133
+ * length.
134
+ */
135
+
136
+ void bloom_filter_read (BloomFilter * bloomfilter, unsigned char *array);
137
+
138
+ /**
139
+ * Load the contents of a bloom filter from an array.
140
+ * The data loaded should be the output read from @ref bloom_filter_read,
141
+ * from a bloom filter created using the same arguments used to create
142
+ * the original filter.
143
+ *
144
+ * @param bloomfilter The bloom filter.
145
+ * @param array Pointer to the array to load from. This
146
+ * should be (table_size + 7) / 8 bytes in
147
+ * length.
148
+ */
149
+
150
+ void bloom_filter_load (BloomFilter * bloomfilter, unsigned char *array);
151
+
152
+ /**
153
+ * Find the union of two bloom filters. Values are present in the
154
+ * resulting filter if they are present in either of the original
155
+ * filters.
156
+ *
157
+ * Both of the original filters must have been created using the
158
+ * same parameters to @ref bloom_filter_new.
159
+ *
160
+ * @param filter1 The first filter.
161
+ * @param filter2 The second filter.
162
+ * @return A new filter which is an intersection of the
163
+ * two filters, or NULL if it was not possible
164
+ * to allocate memory for the new filter, or
165
+ * if the two filters specified were created
166
+ * with different parameters.
167
+ */
168
+
169
+ BloomFilter *bloom_filter_union (BloomFilter * filter1, BloomFilter * filter2);
170
+
171
+ /**
172
+ * Find the intersection of two bloom filters. Values are only ever
173
+ * present in the resulting filter if they are present in both of the
174
+ * original filters.
175
+ *
176
+ * Both of the original filters must have been created using the
177
+ * same parameters to @ref bloom_filter_new.
178
+ *
179
+ * @param filter1 The first filter.
180
+ * @param filter2 The second filter.
181
+ * @return A new filter which is an intersection of the
182
+ * two filters, or NULL if it was not possible
183
+ * to allocate memory for the new filter, or
184
+ * if the two filters specified were created
185
+ * with different parameters.
186
+ */
187
+
188
+ BloomFilter *bloom_filter_intersection (BloomFilter * filter1, BloomFilter * filter2);
189
+
190
+ #ifdef __cplusplus
191
+ }
192
+ #endif
193
+ #endif /* #ifndef ALGORITHM_BLOOM_FILTER_H */
@@ -0,0 +1,150 @@
1
+ #include <sys/types.h>
2
+ #include <sys/stat.h>
3
+ #include <fcntl.h>
4
+ #include <unistd.h>
5
+ #include <errno.h>
6
+ #include <string.h>
7
+ #include <stdlib.h>
8
+
9
+ #include "ruby/ruby.h"
10
+ #include "bloom-filter.h"
11
+ #include "hash-string.h"
12
+ #include "version.h"
13
+
14
+ #define TO_S(v) rb_funcall(v, rb_intern("to_s"), 0)
15
+ #define CSTRING(v) RSTRING_PTR(TO_S(v))
16
+
17
+ static void bloom_free(BloomFilter *filter) {
18
+ if (filter)
19
+ bloom_filter_free(filter);
20
+ }
21
+
22
+ VALUE bloom_allocate(VALUE klass) {
23
+ BloomFilter *filter = 0;
24
+ return Data_Wrap_Struct(klass, 0, bloom_free, filter);
25
+ }
26
+
27
+ BloomFilter* bloom_handle(VALUE self) {
28
+ BloomFilter *filter = 0;
29
+ Data_Get_Struct(self, BloomFilter, filter);
30
+ if (!filter)
31
+ rb_raise(rb_eArgError, "invalid BloomFilter instance");
32
+ return filter;
33
+ }
34
+
35
+ VALUE bloom_initialize(int argc, VALUE *argv, VALUE self) {
36
+ size_t size;
37
+ VALUE table_size;
38
+ BloomFilter *filter = 0;
39
+
40
+ rb_scan_args(argc, argv, "01", &table_size);
41
+ if (NIL_P(table_size))
42
+ size = 1000000;
43
+ else
44
+ size = atol(CSTRING(table_size));
45
+
46
+ filter = bloom_filter_new(size, string_nocase_hash, 4);
47
+
48
+ if (!filter)
49
+ rb_raise(rb_eNoMemError, "unable to allocate memory for BloomFilter");
50
+
51
+ DATA_PTR(self) = filter;
52
+ }
53
+
54
+
55
+ VALUE bloom_insert(VALUE klass, VALUE string) {
56
+ BloomFilter *filter = bloom_handle(klass);
57
+ bloom_filter_insert(filter, (BloomFilterValue)CSTRING(string));
58
+ return Qtrue;
59
+ }
60
+
61
+ VALUE bloom_exists(VALUE klass, VALUE string) {
62
+ BloomFilter *filter = bloom_handle(klass);
63
+ return bloom_filter_query(filter, (BloomFilterValue)CSTRING(string)) ? Qtrue : Qfalse;
64
+ }
65
+
66
+ VALUE bloom_dump(VALUE klass, VALUE file) {
67
+ int fd;
68
+ BloomFilter *filter = bloom_handle(klass);
69
+
70
+ size_t size = (filter->table_size + 7) / 8;
71
+ void *buffer = malloc(size);
72
+
73
+ if (!buffer)
74
+ rb_raise(rb_eNoMemError, "out of memory dumping BloomFilter");
75
+
76
+ bloom_filter_read(filter, buffer);
77
+
78
+ fd = open(CSTRING(file), O_WRONLY|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR);
79
+ if (fd == -1) {
80
+ free(buffer);
81
+ rb_raise(rb_eIOError, "unable to open file. %s", strerror(errno));
82
+ }
83
+
84
+ if (write(fd, buffer, size) != -1) {
85
+ free(buffer);
86
+ close(fd);
87
+ return Qtrue;
88
+ }
89
+ else {
90
+ free(buffer);
91
+ close(fd);
92
+ rb_raise(rb_eIOError, "error dumping BloomFilter: %s\n", strerror(errno));
93
+ return Qfalse; // not reachable
94
+ }
95
+ }
96
+
97
+ VALUE bloom_load(int argc, VALUE *argv, VALUE klass) {
98
+ int fd;
99
+ void *buffer;
100
+ size_t size, bytes;
101
+ BloomFilter *filter;
102
+ VALUE file, table_size, instance;
103
+
104
+ rb_scan_args(argc, argv, "11", &file, &table_size);
105
+ if (NIL_P(table_size)) {
106
+ size = 1000000;
107
+ table_size = INT2NUM(size);
108
+ }
109
+ else
110
+ size = atol(CSTRING(table_size));
111
+
112
+ fd = open(CSTRING(file), O_RDONLY);
113
+ if (fd == -1)
114
+ rb_raise(rb_eIOError, "unable to open file: %s", strerror(errno));
115
+
116
+ size = (size + 7) / 8;
117
+ buffer = malloc(size);
118
+ if (!buffer) {
119
+ close(fd);
120
+ rb_raise(rb_eNoMemError, "out of memory dumping BloomFilter");
121
+ }
122
+
123
+ bytes = read(fd, buffer, size);
124
+ if (bytes != size) {
125
+ free(buffer);
126
+ close(fd);
127
+ rb_raise(rb_eStandardError, "unable to load BloomFilter, expected %ld but got %ld bytes", size, bytes);
128
+ }
129
+
130
+ instance = bloom_allocate(klass);
131
+ bloom_initialize(1, &table_size, instance);
132
+
133
+ bloom_filter_load(bloom_handle(instance), buffer);
134
+ free(buffer);
135
+ close(fd);
136
+ return instance;
137
+ }
138
+
139
+ Init_bloom_filter() {
140
+ VALUE cBloom = rb_define_class("BloomFilter", rb_cObject);
141
+
142
+ rb_define_method(cBloom, "initialize", RUBY_METHOD_FUNC(bloom_initialize), -1);
143
+ rb_define_method(cBloom, "dump", RUBY_METHOD_FUNC(bloom_dump), 1);
144
+ rb_define_method(cBloom, "insert", RUBY_METHOD_FUNC(bloom_insert), 1);
145
+ rb_define_method(cBloom, "exists?", RUBY_METHOD_FUNC(bloom_exists), 1);
146
+
147
+ rb_define_alloc_func(cBloom, bloom_allocate);
148
+ rb_define_singleton_method(cBloom, "load", RUBY_METHOD_FUNC(bloom_load), -1);
149
+ rb_define_const(cBloom, "VERSION", rb_str_new2(RUBY_BLOOM_FILTER_VERSION));
150
+ }
data/ext/extconf.rb ADDED
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'mkmf'
4
+ create_makefile 'bloom_filter'
data/ext/hash-string.c ADDED
@@ -0,0 +1,58 @@
1
+ /*
2
+
3
+ Copyright (c) 2005-2008, Simon Howard
4
+
5
+ Permission to use, copy, modify, and/or distribute this software
6
+ for any purpose with or without fee is hereby granted, provided
7
+ that the above copyright notice and this permission notice appear
8
+ in all copies.
9
+
10
+ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11
+ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12
+ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13
+ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
14
+ CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
15
+ LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
16
+ NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
17
+ CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18
+
19
+ */
20
+
21
+ #include <ctype.h>
22
+
23
+ #include "hash-string.h"
24
+
25
+ /* String hash function */
26
+
27
+ unsigned long string_hash (void *string) {
28
+ /* This is the djb2 string hash function */
29
+
30
+ unsigned long result = 5381;
31
+ unsigned char *p;
32
+
33
+ p = (unsigned char *) string;
34
+
35
+ while (*p != '\0') {
36
+ result = ((result << 5) ^ result) ^ (*p);
37
+ ++p;
38
+ }
39
+
40
+ return result;
41
+ }
42
+
43
+ /* The same function, with a tolower on every character so that
44
+ * case is ignored. This code is duplicated for performance. */
45
+
46
+ unsigned long string_nocase_hash (void *string) {
47
+ unsigned long result = 5381;
48
+ unsigned char *p;
49
+
50
+ p = (unsigned char *) string;
51
+
52
+ while (*p != '\0') {
53
+ result = ((result << 5) ^ result) ^ tolower (*p);
54
+ ++p;
55
+ }
56
+
57
+ return result;
58
+ }
data/ext/hash-string.h ADDED
@@ -0,0 +1,56 @@
1
+ /*
2
+
3
+ Copyright (c) 2005-2008, Simon Howard
4
+
5
+ Permission to use, copy, modify, and/or distribute this software
6
+ for any purpose with or without fee is hereby granted, provided
7
+ that the above copyright notice and this permission notice appear
8
+ in all copies.
9
+
10
+ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11
+ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12
+ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13
+ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
14
+ CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
15
+ LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
16
+ NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
17
+ CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18
+
19
+ */
20
+
21
+ /**
22
+ * @file hash-string.h
23
+ *
24
+ * Hash functions for text strings. For more information
25
+ * see @ref string_hash or @ref string_nocase_hash.
26
+ */
27
+
28
+ #ifndef ALGORITHM_HASH_STRING_H
29
+ #define ALGORITHM_HASH_STRING_H
30
+
31
+ #ifdef __cplusplus
32
+ extern "C" {
33
+ #endif
34
+
35
+ /**
36
+ * Generate a hash key from a string.
37
+ *
38
+ * @param string The string.
39
+ * @return A hash key for the string.
40
+ */
41
+
42
+ unsigned long string_hash (void *string);
43
+
44
+ /**
45
+ * Generate a hash key from a string, ignoring the case of letters.
46
+ *
47
+ * @param string The string.
48
+ * @return A hash key for the string.
49
+ */
50
+
51
+ unsigned long string_nocase_hash (void *string);
52
+
53
+ #ifdef __cplusplus
54
+ }
55
+ #endif
56
+ #endif /* #ifndef ALGORITHM_HASH_STRING_H */
data/ext/version.h ADDED
@@ -0,0 +1 @@
1
+ #define RUBY_BLOOM_FILTER_VERSION "0.1.0"
@@ -0,0 +1 @@
1
+ require 'bloom_filter'
Binary file
data/test/helper.rb ADDED
@@ -0,0 +1,4 @@
1
+ require 'bloom-filter'
2
+ require 'minitest/spec'
3
+
4
+ MiniTest::Unit.autorun
@@ -0,0 +1,23 @@
1
+ require 'helper'
2
+
3
+ describe 'BloomFilter primitives' do
4
+ it 'should create one with default size' do
5
+ assert BloomFilter.new
6
+ end
7
+
8
+ it 'should create one with given size' do
9
+ assert BloomFilter.new(100)
10
+ end
11
+
12
+ it 'should insert' do
13
+ assert filter = BloomFilter.new(100)
14
+ assert filter.insert("foo")
15
+ end
16
+
17
+ it 'should allow membership checks' do
18
+ assert filter = BloomFilter.new(100)
19
+ assert filter.insert("foo")
20
+ assert filter.exists?("foo")
21
+ assert !filter.exists?("bar")
22
+ end
23
+ end
data/test/test_io.rb ADDED
@@ -0,0 +1,15 @@
1
+ require 'helper'
2
+ require 'tempfile'
3
+
4
+ describe 'BloomFilter load & dump' do
5
+ it 'should dump and load a filter' do
6
+ file = Tempfile.new("bloom-filter-test")
7
+
8
+ assert filter = BloomFilter.new(100)
9
+ assert filter.insert("foo")
10
+ assert filter.dump(file.path), "dump filter"
11
+ assert filter = BloomFilter.load(file.path, 100)
12
+ assert filter.exists?("foo")
13
+ assert !filter.exists?("bar")
14
+ end
15
+ end
metadata ADDED
@@ -0,0 +1,103 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bloom-filter
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 1
8
+ - 0
9
+ version: 0.1.0
10
+ platform: ruby
11
+ authors:
12
+ - Bharanee Rathna
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2012-01-25 00:00:00 +11:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: rake
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ segments:
29
+ - 0
30
+ version: "0"
31
+ type: :development
32
+ version_requirements: *id001
33
+ - !ruby/object:Gem::Dependency
34
+ name: rake-compiler
35
+ prerelease: false
36
+ requirement: &id002 !ruby/object:Gem::Requirement
37
+ none: false
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ segments:
42
+ - 0
43
+ version: "0"
44
+ type: :development
45
+ version_requirements: *id002
46
+ description: A fast Bloom Filter library for Ruby for unices.
47
+ email:
48
+ - deepfryed@gmail.com
49
+ executables: []
50
+
51
+ extensions:
52
+ - ext/extconf.rb
53
+ extra_rdoc_files: []
54
+
55
+ files:
56
+ - ext/hash-string.c
57
+ - ext/bloom-filter.c
58
+ - ext/bloom_filter.c
59
+ - ext/bloom-filter.h
60
+ - ext/version.h
61
+ - ext/hash-string.h
62
+ - ext/extconf.rb
63
+ - test/test_io.rb
64
+ - test/helper.rb
65
+ - test/test_basic.rb
66
+ - README.md
67
+ - CHANGELOG
68
+ - lib/bloom-filter.rb
69
+ - lib/bloom_filter.so
70
+ has_rdoc: true
71
+ homepage: http://github.com/deepfryed/bloom-filter
72
+ licenses: []
73
+
74
+ post_install_message:
75
+ rdoc_options: []
76
+
77
+ require_paths:
78
+ - lib
79
+ required_ruby_version: !ruby/object:Gem::Requirement
80
+ none: false
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ segments:
85
+ - 0
86
+ version: "0"
87
+ required_rubygems_version: !ruby/object:Gem::Requirement
88
+ none: false
89
+ requirements:
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ segments:
93
+ - 0
94
+ version: "0"
95
+ requirements: []
96
+
97
+ rubyforge_project:
98
+ rubygems_version: 1.3.7
99
+ signing_key:
100
+ specification_version: 3
101
+ summary: A fast Bloom Filter library for Ruby.
102
+ test_files: []
103
+