bloom-filter 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG ADDED
@@ -0,0 +1,3 @@
1
+ === 0.1.0 (2012-01-25)
2
+
3
+ * Initial version.
data/README.md ADDED
@@ -0,0 +1,49 @@
1
+ # BloomFilter
2
+
3
+ BloomFilter is a ruby library that implements an in-memory [Bloom Filter](http://en.wikipedia.org/wiki/Bloom_filter)
4
+
5
+ ## Dependencies
6
+
7
+ * ruby 1.9.1 or later
8
+
9
+ ## Installation
10
+
11
+ ```
12
+ gem install bloom-filter
13
+ ```
14
+
15
+ ## API
16
+
17
+ ```
18
+ BloomFilter
19
+ .new
20
+ .load
21
+ #dump
22
+ #insert
23
+ #exists?
24
+
25
+ ```
26
+
27
+ ## Usage
28
+
29
+ ```ruby
30
+ require 'bloom-filter'
31
+
32
+ filter = BloomFilter.new
33
+ filter = BloomFilter.new 100_000
34
+
35
+ filter.insert "foo"
36
+ filter.exists? "foo" #=> true
37
+ filter.exists? "bar" #=> false
38
+
39
+ filter.dump "/tmp/random.bloom"
40
+ filter = BloomFilter.load "/tmp/random.bloom", 100_000
41
+ ```
42
+
43
+ ## See Also
44
+
45
+ [https://github.com/igrigorik/bloomfilter-rb](https://github.com/igrigorik/bloomfilter-rb)
46
+
47
+ ## License
48
+
49
+ [Creative Commons Attribution - CC BY](http://creativecommons.org/licenses/by/3.0)
@@ -0,0 +1,265 @@
1
+ /*
2
+
3
+ Copyright (c) 2005-2008, Simon Howard
4
+
5
+ Permission to use, copy, modify, and/or distribute this software
6
+ for any purpose with or without fee is hereby granted, provided
7
+ that the above copyright notice and this permission notice appear
8
+ in all copies.
9
+
10
+ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11
+ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12
+ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13
+ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
14
+ CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
15
+ LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
16
+ NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
17
+ CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18
+
19
+ */
20
+
21
+ #include <stdlib.h>
22
+ #include <string.h>
23
+
24
+ #include "bloom-filter.h"
25
+
26
+ /* malloc() / free() testing */
27
+
28
+ #ifdef ALLOC_TESTING
29
+ #include "alloc-testing.h"
30
+ #endif
31
+
32
+ /* Salt values. These salts are XORed with the output of the hash
33
+ * function to give multiple unique hashes. */
34
+
35
+ static const unsigned int salts[] = {
36
+ 0x5cee4612, 0xb5587b1c, 0xa250f2b0, 0xa3bf6d2a,
37
+ 0x7a81bd1a, 0x92888d7f, 0x1dc977c7, 0xedc96624,
38
+ 0x920c85d9, 0xf16066b3, 0xc6f0d4b3, 0x2b76eb86,
39
+ 0xcacb3893, 0x493d81c5, 0xf5a133ac, 0x039740bf,
40
+ 0x162b8224, 0xf841de90, 0xc3e5090d, 0x3bce93a7,
41
+ 0xf1860334, 0xe832b5f1, 0xf5b6535b, 0xe4cf4fa6,
42
+ 0x8357b769, 0x1442b07a, 0x21c5863d, 0xabc0d846,
43
+ 0x6dc0d77a, 0x23a3992c, 0xe12179ba, 0xd81d1e23,
44
+ 0xcff4727b, 0xe957ecfb, 0xee8f391a, 0x426efa23,
45
+ 0x3a34ff2c, 0x8b875d94, 0x34fd0f63, 0xf159daae,
46
+ 0xaabab8b3, 0xa83a07ba, 0x4e54fb33, 0xfb82fab8,
47
+ 0x2ae2888f, 0xd1a307a8, 0xbe33322d, 0x87c73f86,
48
+ 0x7270fa7e, 0x68673c55, 0x2c8026d0, 0xead8e422,
49
+ 0xa3ee5132, 0xecb67767, 0x1c3b1ae5, 0x47adf5b6,
50
+ 0xf4518d30, 0x46e62797, 0x9889aa76, 0x1405aadf,
51
+ 0xf62f9124, 0x5c435ac5, 0x35b8dfe3, 0x651c08c5,
52
+ };
53
+
54
+ BloomFilter *bloom_filter_new (unsigned int table_size, BloomFilterHashFunc hash_func, unsigned int num_functions) {
55
+ BloomFilter *filter;
56
+
57
+ /* There is a limit on the number of functions which can be
58
+ * applied, due to the table size */
59
+
60
+ if (num_functions > sizeof (salts) / sizeof (*salts)) {
61
+ return NULL;
62
+ }
63
+
64
+ /* Allocate bloom filter structure */
65
+
66
+ filter = malloc (sizeof (BloomFilter));
67
+
68
+ if (filter == NULL) {
69
+ return NULL;
70
+ }
71
+
72
+ /* Allocate table, each entry is one bit; these are packed into
73
+ * bytes. When allocating we must round the length up to the nearest
74
+ * byte. */
75
+
76
+ filter->table = calloc ((table_size + 7) / 8, 1);
77
+
78
+ if (filter->table == NULL) {
79
+ free (filter);
80
+ return NULL;
81
+ }
82
+
83
+ filter->hash_func = hash_func;
84
+ filter->num_functions = num_functions;
85
+ filter->table_size = table_size;
86
+
87
+ return filter;
88
+ }
89
+
90
+ void bloom_filter_free (BloomFilter * bloomfilter) {
91
+ free (bloomfilter->table);
92
+ free (bloomfilter);
93
+ }
94
+
95
+ void bloom_filter_insert (BloomFilter * bloomfilter, BloomFilterValue value) {
96
+ unsigned long hash;
97
+ unsigned long subhash;
98
+ unsigned int index;
99
+ unsigned int i;
100
+
101
+ /* Generate hash of the value to insert */
102
+
103
+ hash = bloomfilter->hash_func (value);
104
+
105
+ /* Generate multiple unique hashes by XORing with values in the
106
+ * salt table. */
107
+
108
+ for (i = 0; i < bloomfilter->num_functions; ++i) {
109
+
110
+ /* Generate a unique hash */
111
+
112
+ subhash = hash ^ salts[i];
113
+
114
+ /* Find the index into the table */
115
+
116
+ index = subhash % bloomfilter->table_size;
117
+
118
+ /* Insert into the table.
119
+ * index / 8 finds the byte index of the table,
120
+ * index % 8 gives the bit index within that byte to set. */
121
+
122
+ bloomfilter->table[index / 8] |= 1 << (index % 8);
123
+ }
124
+ }
125
+
126
+ int bloom_filter_query (BloomFilter * bloomfilter, BloomFilterValue value) {
127
+ unsigned long hash;
128
+ unsigned long subhash;
129
+ unsigned int index;
130
+ unsigned int i;
131
+ unsigned char b;
132
+ int bit;
133
+
134
+ /* Generate hash of the value to lookup */
135
+
136
+ hash = bloomfilter->hash_func (value);
137
+
138
+ /* Generate multiple unique hashes by XORing with values in the
139
+ * salt table. */
140
+
141
+ for (i = 0; i < bloomfilter->num_functions; ++i) {
142
+
143
+ /* Generate a unique hash */
144
+
145
+ subhash = hash ^ salts[i];
146
+
147
+ /* Find the index into the table to test */
148
+
149
+ index = subhash % bloomfilter->table_size;
150
+
151
+ /* The byte at index / 8 holds the value to test */
152
+
153
+ b = bloomfilter->table[index / 8];
154
+ bit = 1 << (index % 8);
155
+
156
+ /* Test if the particular bit is set; if it is not set,
157
+ * this value can not have been inserted. */
158
+
159
+ if ((b & bit) == 0) {
160
+ return 0;
161
+ }
162
+ }
163
+
164
+ /* All necessary bits were set. This may indicate that the value
165
+ * was inserted, or the values could have been set through other
166
+ * insertions. */
167
+
168
+ return 1;
169
+ }
170
+
171
+ void bloom_filter_read (BloomFilter * bloomfilter, unsigned char *array) {
172
+ unsigned int array_size;
173
+
174
+ /* The table is an array of bits, packed into bytes. Round up
175
+ * to the nearest byte. */
176
+
177
+ array_size = (bloomfilter->table_size + 7) / 8;
178
+
179
+ /* Copy into the buffer of the calling routine. */
180
+
181
+ memcpy (array, bloomfilter->table, array_size);
182
+ }
183
+
184
+ void bloom_filter_load (BloomFilter * bloomfilter, unsigned char *array) {
185
+ unsigned int array_size;
186
+
187
+ /* The table is an array of bits, packed into bytes. Round up
188
+ * to the nearest byte. */
189
+
190
+ array_size = (bloomfilter->table_size + 7) / 8;
191
+
192
+ /* Copy from the buffer of the calling routine. */
193
+
194
+ memcpy (bloomfilter->table, array, array_size);
195
+ }
196
+
197
+ BloomFilter *bloom_filter_union (BloomFilter * filter1, BloomFilter * filter2) {
198
+ BloomFilter *result;
199
+ unsigned int i;
200
+ unsigned int array_size;
201
+
202
+ /* To perform this operation, both filters must be created with
203
+ * the same values. */
204
+
205
+ if (filter1->table_size != filter2->table_size
206
+ || filter1->num_functions != filter2->num_functions || filter1->hash_func != filter2->hash_func) {
207
+ return NULL;
208
+ }
209
+
210
+ /* Create a new bloom filter for the result */
211
+
212
+ result = bloom_filter_new (filter1->table_size, filter1->hash_func, filter1->num_functions);
213
+
214
+ if (result == NULL) {
215
+ return NULL;
216
+ }
217
+
218
+ /* The table is an array of bits, packed into bytes. Round up
219
+ * to the nearest byte. */
220
+
221
+ array_size = (filter1->table_size + 7) / 8;
222
+
223
+ /* Populate the table of the new filter */
224
+
225
+ for (i = 0; i < array_size; ++i) {
226
+ result->table[i] = filter1->table[i] | filter2->table[i];
227
+ }
228
+
229
+ return result;
230
+ }
231
+
232
+ BloomFilter *bloom_filter_intersection (BloomFilter * filter1, BloomFilter * filter2) {
233
+ BloomFilter *result;
234
+ unsigned int i;
235
+ unsigned int array_size;
236
+
237
+ /* To perform this operation, both filters must be created with
238
+ * the same values. */
239
+
240
+ if (filter1->table_size != filter2->table_size
241
+ || filter1->num_functions != filter2->num_functions || filter1->hash_func != filter2->hash_func) {
242
+ return NULL;
243
+ }
244
+
245
+ /* Create a new bloom filter for the result */
246
+
247
+ result = bloom_filter_new (filter1->table_size, filter1->hash_func, filter1->num_functions);
248
+
249
+ if (result == NULL) {
250
+ return NULL;
251
+ }
252
+
253
+ /* The table is an array of bits, packed into bytes. Round up
254
+ * to the nearest byte. */
255
+
256
+ array_size = (filter1->table_size + 7) / 8;
257
+
258
+ /* Populate the table of the new filter */
259
+
260
+ for (i = 0; i < array_size; ++i) {
261
+ result->table[i] = filter1->table[i] & filter2->table[i];
262
+ }
263
+
264
+ return result;
265
+ }
@@ -0,0 +1,193 @@
1
+ /*
2
+
3
+ Copyright (c) 2005-2008, Simon Howard
4
+
5
+ Permission to use, copy, modify, and/or distribute this software
6
+ for any purpose with or without fee is hereby granted, provided
7
+ that the above copyright notice and this permission notice appear
8
+ in all copies.
9
+
10
+ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11
+ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12
+ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13
+ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
14
+ CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
15
+ LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
16
+ NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
17
+ CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18
+
19
+ */
20
+
21
+ /**
22
+ * @file bloom-filter.h
23
+ *
24
+ * @brief Bloom filter
25
+ *
26
+ * A bloom filter is a space efficient data structure that can be
27
+ * used to test whether a given element is part of a set. Lookups
28
+ * will occasionally generate false positives, but never false
29
+ * negatives.
30
+ *
31
+ * To create a bloom filter, use @ref bloom_filter_new. To destroy a
32
+ * bloom filter, use @ref bloom_filter_free.
33
+ *
34
+ * To insert a value into a bloom filter, use @ref bloom_filter_insert.
35
+ *
36
+ * To query whether a value is part of the set, use
37
+ * @ref bloom_filter_query.
38
+ */
39
+
40
+ #ifndef ALGORITHM_BLOOM_FILTER_H
41
+ #define ALGORITHM_BLOOM_FILTER_H
42
+
43
+ #ifdef __cplusplus
44
+ extern "C" {
45
+ #endif
46
+
47
+ /**
48
+ * A value stored in a @ref BloomFilter.
49
+ */
50
+
51
+ typedef void *BloomFilterValue;
52
+
53
+ /**
54
+ * Hash function used to generate hash values for values inserted into a
55
+ * bloom filter.
56
+ *
57
+ * @param data The value to generate a hash value for.
58
+ * @return The hash value.
59
+ */
60
+
61
+ typedef unsigned long (*BloomFilterHashFunc) (BloomFilterValue data);
62
+
63
+ /**
64
+ * A bloom filter structure.
65
+ */
66
+
67
+ typedef struct BloomFilter {
68
+ BloomFilterHashFunc hash_func;
69
+ unsigned char *table;
70
+ unsigned int table_size;
71
+ unsigned int num_functions;
72
+ } BloomFilter;
73
+
74
+
75
+ /**
76
+ * Create a new bloom filter.
77
+ *
78
+ * @param table_size The size of the bloom filter. The greater
79
+ * the table size, the more elements can be
80
+ * stored, and the lesser the chance of false
81
+ * positives.
82
+ * @param hash_func Hash function to use on values stored in the
83
+ * filter.
84
+ * @param num_functions Number of hash functions to apply to each
85
+ * element on insertion. This running time for
86
+ * insertion and queries is proportional to this
87
+ * value. The more functions applied, the lesser
88
+ * the chance of false positives. The maximum
89
+ * number of functions is 64.
90
+ * @return A new hash table structure, or NULL if it
91
+ * was not possible to allocate the new bloom
92
+ * filter.
93
+ */
94
+
95
+ BloomFilter *bloom_filter_new (unsigned int table_size, BloomFilterHashFunc hash_func, unsigned int num_functions);
96
+
97
+ /**
98
+ * Destroy a bloom filter.
99
+ *
100
+ * @param bloomfilter The bloom filter to destroy.
101
+ */
102
+
103
+ void bloom_filter_free (BloomFilter * bloomfilter);
104
+
105
+ /**
106
+ * Insert a value into a bloom filter.
107
+ *
108
+ * @param bloomfilter The bloom filter.
109
+ * @param value The value to insert.
110
+ */
111
+
112
+ void bloom_filter_insert (BloomFilter * bloomfilter, BloomFilterValue value);
113
+
114
+ /**
115
+ * Query a bloom filter for a particular value.
116
+ *
117
+ * @param bloomfilter The bloom filter.
118
+ * @param value The value to look up.
119
+ * @return Zero if the value was definitely not
120
+ * inserted into the filter. Non-zero
121
+ * indicates that it either may or may not
122
+ * have been inserted.
123
+ */
124
+
125
+ int bloom_filter_query (BloomFilter * bloomfilter, BloomFilterValue value);
126
+
127
+ /**
128
+ * Read the contents of a bloom filter into an array.
129
+ *
130
+ * @param bloomfilter The bloom filter.
131
+ * @param array Pointer to the array to read into. This
132
+ * should be (table_size + 7) / 8 bytes in
133
+ * length.
134
+ */
135
+
136
+ void bloom_filter_read (BloomFilter * bloomfilter, unsigned char *array);
137
+
138
+ /**
139
+ * Load the contents of a bloom filter from an array.
140
+ * The data loaded should be the output read from @ref bloom_filter_read,
141
+ * from a bloom filter created using the same arguments used to create
142
+ * the original filter.
143
+ *
144
+ * @param bloomfilter The bloom filter.
145
+ * @param array Pointer to the array to load from. This
146
+ * should be (table_size + 7) / 8 bytes in
147
+ * length.
148
+ */
149
+
150
+ void bloom_filter_load (BloomFilter * bloomfilter, unsigned char *array);
151
+
152
+ /**
153
+ * Find the union of two bloom filters. Values are present in the
154
+ * resulting filter if they are present in either of the original
155
+ * filters.
156
+ *
157
+ * Both of the original filters must have been created using the
158
+ * same parameters to @ref bloom_filter_new.
159
+ *
160
+ * @param filter1 The first filter.
161
+ * @param filter2 The second filter.
162
+ * @return A new filter which is an intersection of the
163
+ * two filters, or NULL if it was not possible
164
+ * to allocate memory for the new filter, or
165
+ * if the two filters specified were created
166
+ * with different parameters.
167
+ */
168
+
169
+ BloomFilter *bloom_filter_union (BloomFilter * filter1, BloomFilter * filter2);
170
+
171
+ /**
172
+ * Find the intersection of two bloom filters. Values are only ever
173
+ * present in the resulting filter if they are present in both of the
174
+ * original filters.
175
+ *
176
+ * Both of the original filters must have been created using the
177
+ * same parameters to @ref bloom_filter_new.
178
+ *
179
+ * @param filter1 The first filter.
180
+ * @param filter2 The second filter.
181
+ * @return A new filter which is an intersection of the
182
+ * two filters, or NULL if it was not possible
183
+ * to allocate memory for the new filter, or
184
+ * if the two filters specified were created
185
+ * with different parameters.
186
+ */
187
+
188
+ BloomFilter *bloom_filter_intersection (BloomFilter * filter1, BloomFilter * filter2);
189
+
190
+ #ifdef __cplusplus
191
+ }
192
+ #endif
193
+ #endif /* #ifndef ALGORITHM_BLOOM_FILTER_H */
@@ -0,0 +1,150 @@
1
+ #include <sys/types.h>
2
+ #include <sys/stat.h>
3
+ #include <fcntl.h>
4
+ #include <unistd.h>
5
+ #include <errno.h>
6
+ #include <string.h>
7
+ #include <stdlib.h>
8
+
9
+ #include "ruby/ruby.h"
10
+ #include "bloom-filter.h"
11
+ #include "hash-string.h"
12
+ #include "version.h"
13
+
14
+ #define TO_S(v) rb_funcall(v, rb_intern("to_s"), 0)
15
+ #define CSTRING(v) RSTRING_PTR(TO_S(v))
16
+
17
+ static void bloom_free(BloomFilter *filter) {
18
+ if (filter)
19
+ bloom_filter_free(filter);
20
+ }
21
+
22
+ VALUE bloom_allocate(VALUE klass) {
23
+ BloomFilter *filter = 0;
24
+ return Data_Wrap_Struct(klass, 0, bloom_free, filter);
25
+ }
26
+
27
+ BloomFilter* bloom_handle(VALUE self) {
28
+ BloomFilter *filter = 0;
29
+ Data_Get_Struct(self, BloomFilter, filter);
30
+ if (!filter)
31
+ rb_raise(rb_eArgError, "invalid BloomFilter instance");
32
+ return filter;
33
+ }
34
+
35
+ VALUE bloom_initialize(int argc, VALUE *argv, VALUE self) {
36
+ size_t size;
37
+ VALUE table_size;
38
+ BloomFilter *filter = 0;
39
+
40
+ rb_scan_args(argc, argv, "01", &table_size);
41
+ if (NIL_P(table_size))
42
+ size = 1000000;
43
+ else
44
+ size = atol(CSTRING(table_size));
45
+
46
+ filter = bloom_filter_new(size, string_nocase_hash, 4);
47
+
48
+ if (!filter)
49
+ rb_raise(rb_eNoMemError, "unable to allocate memory for BloomFilter");
50
+
51
+ DATA_PTR(self) = filter;
52
+ }
53
+
54
+
55
+ VALUE bloom_insert(VALUE klass, VALUE string) {
56
+ BloomFilter *filter = bloom_handle(klass);
57
+ bloom_filter_insert(filter, (BloomFilterValue)CSTRING(string));
58
+ return Qtrue;
59
+ }
60
+
61
+ VALUE bloom_exists(VALUE klass, VALUE string) {
62
+ BloomFilter *filter = bloom_handle(klass);
63
+ return bloom_filter_query(filter, (BloomFilterValue)CSTRING(string)) ? Qtrue : Qfalse;
64
+ }
65
+
66
+ VALUE bloom_dump(VALUE klass, VALUE file) {
67
+ int fd;
68
+ BloomFilter *filter = bloom_handle(klass);
69
+
70
+ size_t size = (filter->table_size + 7) / 8;
71
+ void *buffer = malloc(size);
72
+
73
+ if (!buffer)
74
+ rb_raise(rb_eNoMemError, "out of memory dumping BloomFilter");
75
+
76
+ bloom_filter_read(filter, buffer);
77
+
78
+ fd = open(CSTRING(file), O_WRONLY|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR);
79
+ if (fd == -1) {
80
+ free(buffer);
81
+ rb_raise(rb_eIOError, "unable to open file. %s", strerror(errno));
82
+ }
83
+
84
+ if (write(fd, buffer, size) != -1) {
85
+ free(buffer);
86
+ close(fd);
87
+ return Qtrue;
88
+ }
89
+ else {
90
+ free(buffer);
91
+ close(fd);
92
+ rb_raise(rb_eIOError, "error dumping BloomFilter: %s\n", strerror(errno));
93
+ return Qfalse; // not reachable
94
+ }
95
+ }
96
+
97
+ VALUE bloom_load(int argc, VALUE *argv, VALUE klass) {
98
+ int fd;
99
+ void *buffer;
100
+ size_t size, bytes;
101
+ BloomFilter *filter;
102
+ VALUE file, table_size, instance;
103
+
104
+ rb_scan_args(argc, argv, "11", &file, &table_size);
105
+ if (NIL_P(table_size)) {
106
+ size = 1000000;
107
+ table_size = INT2NUM(size);
108
+ }
109
+ else
110
+ size = atol(CSTRING(table_size));
111
+
112
+ fd = open(CSTRING(file), O_RDONLY);
113
+ if (fd == -1)
114
+ rb_raise(rb_eIOError, "unable to open file: %s", strerror(errno));
115
+
116
+ size = (size + 7) / 8;
117
+ buffer = malloc(size);
118
+ if (!buffer) {
119
+ close(fd);
120
+ rb_raise(rb_eNoMemError, "out of memory dumping BloomFilter");
121
+ }
122
+
123
+ bytes = read(fd, buffer, size);
124
+ if (bytes != size) {
125
+ free(buffer);
126
+ close(fd);
127
+ rb_raise(rb_eStandardError, "unable to load BloomFilter, expected %ld but got %ld bytes", size, bytes);
128
+ }
129
+
130
+ instance = bloom_allocate(klass);
131
+ bloom_initialize(1, &table_size, instance);
132
+
133
+ bloom_filter_load(bloom_handle(instance), buffer);
134
+ free(buffer);
135
+ close(fd);
136
+ return instance;
137
+ }
138
+
139
+ Init_bloom_filter() {
140
+ VALUE cBloom = rb_define_class("BloomFilter", rb_cObject);
141
+
142
+ rb_define_method(cBloom, "initialize", RUBY_METHOD_FUNC(bloom_initialize), -1);
143
+ rb_define_method(cBloom, "dump", RUBY_METHOD_FUNC(bloom_dump), 1);
144
+ rb_define_method(cBloom, "insert", RUBY_METHOD_FUNC(bloom_insert), 1);
145
+ rb_define_method(cBloom, "exists?", RUBY_METHOD_FUNC(bloom_exists), 1);
146
+
147
+ rb_define_alloc_func(cBloom, bloom_allocate);
148
+ rb_define_singleton_method(cBloom, "load", RUBY_METHOD_FUNC(bloom_load), -1);
149
+ rb_define_const(cBloom, "VERSION", rb_str_new2(RUBY_BLOOM_FILTER_VERSION));
150
+ }
data/ext/extconf.rb ADDED
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'mkmf'
4
+ create_makefile 'bloom_filter'
data/ext/hash-string.c ADDED
@@ -0,0 +1,58 @@
1
+ /*
2
+
3
+ Copyright (c) 2005-2008, Simon Howard
4
+
5
+ Permission to use, copy, modify, and/or distribute this software
6
+ for any purpose with or without fee is hereby granted, provided
7
+ that the above copyright notice and this permission notice appear
8
+ in all copies.
9
+
10
+ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11
+ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12
+ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13
+ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
14
+ CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
15
+ LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
16
+ NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
17
+ CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18
+
19
+ */
20
+
21
+ #include <ctype.h>
22
+
23
+ #include "hash-string.h"
24
+
25
+ /* String hash function */
26
+
27
+ unsigned long string_hash (void *string) {
28
+ /* This is the djb2 string hash function */
29
+
30
+ unsigned long result = 5381;
31
+ unsigned char *p;
32
+
33
+ p = (unsigned char *) string;
34
+
35
+ while (*p != '\0') {
36
+ result = ((result << 5) ^ result) ^ (*p);
37
+ ++p;
38
+ }
39
+
40
+ return result;
41
+ }
42
+
43
+ /* The same function, with a tolower on every character so that
44
+ * case is ignored. This code is duplicated for performance. */
45
+
46
+ unsigned long string_nocase_hash (void *string) {
47
+ unsigned long result = 5381;
48
+ unsigned char *p;
49
+
50
+ p = (unsigned char *) string;
51
+
52
+ while (*p != '\0') {
53
+ result = ((result << 5) ^ result) ^ tolower (*p);
54
+ ++p;
55
+ }
56
+
57
+ return result;
58
+ }
data/ext/hash-string.h ADDED
@@ -0,0 +1,56 @@
1
+ /*
2
+
3
+ Copyright (c) 2005-2008, Simon Howard
4
+
5
+ Permission to use, copy, modify, and/or distribute this software
6
+ for any purpose with or without fee is hereby granted, provided
7
+ that the above copyright notice and this permission notice appear
8
+ in all copies.
9
+
10
+ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
11
+ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
12
+ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
13
+ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
14
+ CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
15
+ LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
16
+ NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
17
+ CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18
+
19
+ */
20
+
21
+ /**
22
+ * @file hash-string.h
23
+ *
24
+ * Hash functions for text strings. For more information
25
+ * see @ref string_hash or @ref string_nocase_hash.
26
+ */
27
+
28
+ #ifndef ALGORITHM_HASH_STRING_H
29
+ #define ALGORITHM_HASH_STRING_H
30
+
31
+ #ifdef __cplusplus
32
+ extern "C" {
33
+ #endif
34
+
35
+ /**
36
+ * Generate a hash key from a string.
37
+ *
38
+ * @param string The string.
39
+ * @return A hash key for the string.
40
+ */
41
+
42
+ unsigned long string_hash (void *string);
43
+
44
+ /**
45
+ * Generate a hash key from a string, ignoring the case of letters.
46
+ *
47
+ * @param string The string.
48
+ * @return A hash key for the string.
49
+ */
50
+
51
+ unsigned long string_nocase_hash (void *string);
52
+
53
+ #ifdef __cplusplus
54
+ }
55
+ #endif
56
+ #endif /* #ifndef ALGORITHM_HASH_STRING_H */
data/ext/version.h ADDED
@@ -0,0 +1 @@
1
+ #define RUBY_BLOOM_FILTER_VERSION "0.1.0"
@@ -0,0 +1 @@
1
+ require 'bloom_filter'
Binary file
data/test/helper.rb ADDED
@@ -0,0 +1,4 @@
1
+ require 'bloom-filter'
2
+ require 'minitest/spec'
3
+
4
+ MiniTest::Unit.autorun
@@ -0,0 +1,23 @@
1
+ require 'helper'
2
+
3
+ describe 'BloomFilter primitives' do
4
+ it 'should create one with default size' do
5
+ assert BloomFilter.new
6
+ end
7
+
8
+ it 'should create one with given size' do
9
+ assert BloomFilter.new(100)
10
+ end
11
+
12
+ it 'should insert' do
13
+ assert filter = BloomFilter.new(100)
14
+ assert filter.insert("foo")
15
+ end
16
+
17
+ it 'should allow membership checks' do
18
+ assert filter = BloomFilter.new(100)
19
+ assert filter.insert("foo")
20
+ assert filter.exists?("foo")
21
+ assert !filter.exists?("bar")
22
+ end
23
+ end
data/test/test_io.rb ADDED
@@ -0,0 +1,15 @@
1
+ require 'helper'
2
+ require 'tempfile'
3
+
4
+ describe 'BloomFilter load & dump' do
5
+ it 'should dump and load a filter' do
6
+ file = Tempfile.new("bloom-filter-test")
7
+
8
+ assert filter = BloomFilter.new(100)
9
+ assert filter.insert("foo")
10
+ assert filter.dump(file.path), "dump filter"
11
+ assert filter = BloomFilter.load(file.path, 100)
12
+ assert filter.exists?("foo")
13
+ assert !filter.exists?("bar")
14
+ end
15
+ end
metadata ADDED
@@ -0,0 +1,103 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bloom-filter
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 1
8
+ - 0
9
+ version: 0.1.0
10
+ platform: ruby
11
+ authors:
12
+ - Bharanee Rathna
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2012-01-25 00:00:00 +11:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: rake
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ segments:
29
+ - 0
30
+ version: "0"
31
+ type: :development
32
+ version_requirements: *id001
33
+ - !ruby/object:Gem::Dependency
34
+ name: rake-compiler
35
+ prerelease: false
36
+ requirement: &id002 !ruby/object:Gem::Requirement
37
+ none: false
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ segments:
42
+ - 0
43
+ version: "0"
44
+ type: :development
45
+ version_requirements: *id002
46
+ description: A fast Bloom Filter library for Ruby for unices.
47
+ email:
48
+ - deepfryed@gmail.com
49
+ executables: []
50
+
51
+ extensions:
52
+ - ext/extconf.rb
53
+ extra_rdoc_files: []
54
+
55
+ files:
56
+ - ext/hash-string.c
57
+ - ext/bloom-filter.c
58
+ - ext/bloom_filter.c
59
+ - ext/bloom-filter.h
60
+ - ext/version.h
61
+ - ext/hash-string.h
62
+ - ext/extconf.rb
63
+ - test/test_io.rb
64
+ - test/helper.rb
65
+ - test/test_basic.rb
66
+ - README.md
67
+ - CHANGELOG
68
+ - lib/bloom-filter.rb
69
+ - lib/bloom_filter.so
70
+ has_rdoc: true
71
+ homepage: http://github.com/deepfryed/bloom-filter
72
+ licenses: []
73
+
74
+ post_install_message:
75
+ rdoc_options: []
76
+
77
+ require_paths:
78
+ - lib
79
+ required_ruby_version: !ruby/object:Gem::Requirement
80
+ none: false
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ segments:
85
+ - 0
86
+ version: "0"
87
+ required_rubygems_version: !ruby/object:Gem::Requirement
88
+ none: false
89
+ requirements:
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ segments:
93
+ - 0
94
+ version: "0"
95
+ requirements: []
96
+
97
+ rubyforge_project:
98
+ rubygems_version: 1.3.7
99
+ signing_key:
100
+ specification_version: 3
101
+ summary: A fast Bloom Filter library for Ruby.
102
+ test_files: []
103
+