blurrily 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: ed39eb955b4d71f3b924a16be4430046ba1d02ab
4
+ data.tar.gz: 1c5a5b42b6877ad3d66928a0fe0520ea73defa9b
5
+ SHA512:
6
+ metadata.gz: 54fdb049c894470cf18afdafe18053607e1b4336b6f7353866ae8d81115e87a97ed6f5273270d930a88c292bf02a361868280997b6dbe5668c894aa456745950
7
+ data.tar.gz: b8c280aa93d062a9a89fbda80cdf3365efcb34ed3e3c28d8dadf6c9b9ee5deba389a3a0233c788e8e182894b1067254ec9a9ef4ae80e7c1676a60edd6cd50e83
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 HouseTrip Ltd.
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,36 @@
1
+ # Blurrily — Fast fuzzy text search
2
+
3
+ [![Build Status](https://travis-ci.org/mezis/blurrily.png?branch=master)](https://travis-ci.org/mezis/blurrily)
4
+ [![Dependency Status](https://gemnasium.com/mezis/blurrily.png)](https://gemnasium.com/mezis/blurrily)
5
+ [![Code Climate](https://codeclimate.com/github/mezis/blurrily.png)](https://codeclimate.com/github/mezis/blurrily)
6
+
7
+ This will be a C version of [fuzzily](http://github.com/mezis/fuzzily), a
8
+ Ruby gem to perform fuzzy text searching.
9
+
10
+ WORK IN PROGRESS.
11
+
12
+ ## Installation
13
+
14
+ Add this line to your application's Gemfile:
15
+
16
+ gem 'blurrily'
17
+
18
+ And then execute:
19
+
20
+ $ bundle
21
+
22
+ Or install it yourself as:
23
+
24
+ $ gem install blurrily
25
+
26
+ ## Usage
27
+
28
+ TODO: Write usage instructions here
29
+
30
+ ## Contributing
31
+
32
+ 1. Fork it
33
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
34
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
35
+ 4. Push to the branch (`git push origin my-new-feature`)
36
+ 5. Create new Pull Request
@@ -0,0 +1,2 @@
1
+ #define PACKED_STRUCT __attribute__ ((__packed__))
2
+ #define UNUSED(_IDENT) _IDENT __attribute__ ((unused))
@@ -0,0 +1,17 @@
1
+ require 'mkmf'
2
+
3
+ PLATFORM = `uname`.strip.upcase
4
+ SHARED_FLAGS = "-DPLATFORM_#{PLATFORM} --std=c99 -Wall -Wextra -Werror"
5
+
6
+ case PLATFORM
7
+ when 'LINUX'
8
+ SHARED_FLAGS += ' -D_XOPEN_SOURCE=500' # for ftruncate to be present
9
+ end
10
+
11
+ # production
12
+ $CFLAGS += " #{SHARED_FLAGS} -O3 -fno-fast-math"
13
+
14
+ # development
15
+ # $CFLAGS += " #{SHARED_FLAGS} -O0 -g"
16
+
17
+ create_makefile('blurrily/map_ext')
@@ -0,0 +1,5 @@
1
+ #ifdef DEBUG
2
+ #define LOG(...) fprintf(stderr, __VA_ARGS__)
3
+ #else
4
+ #define LOG(...)
5
+ #endif
@@ -0,0 +1,174 @@
1
+ #include <ruby.h>
2
+ #include <assert.h>
3
+ #include "storage.h"
4
+ #include "blurrily.h"
5
+
6
+ /******************************************************************************/
7
+
8
+ static void blurrily_free(void* haystack)
9
+ {
10
+ int res = -1;
11
+
12
+ res = blurrily_storage_close((trigram_map*) &haystack);
13
+ assert(res >= 0);
14
+ }
15
+
16
+ /******************************************************************************/
17
+
18
+ static VALUE blurrily_new(VALUE class) {
19
+ VALUE wrapper = Qnil;
20
+ trigram_map haystack = (trigram_map)NULL;
21
+ int res = -1;
22
+
23
+ res = blurrily_storage_new(&haystack);
24
+ if (res < 0) { rb_sys_fail(NULL); return Qnil; }
25
+
26
+ wrapper = Data_Wrap_Struct(class, 0, blurrily_free, (void*)haystack);
27
+ rb_obj_call_init(wrapper, 0, NULL);
28
+ return wrapper;
29
+ }
30
+
31
+ /******************************************************************************/
32
+
33
+ static VALUE blurrily_load(VALUE class, VALUE rb_path) {
34
+ char* path = StringValuePtr(rb_path);
35
+ VALUE wrapper = Qnil;
36
+ trigram_map haystack = (trigram_map)NULL;
37
+ int res = -1;
38
+
39
+ res = blurrily_storage_load(&haystack, path);
40
+ if (res < 0) { rb_sys_fail(NULL); return Qnil; }
41
+
42
+ wrapper = Data_Wrap_Struct(class, 0, blurrily_free, (void*)haystack);
43
+ rb_obj_call_init(wrapper, 0, NULL);
44
+ return wrapper;
45
+ }
46
+
47
+ /******************************************************************************/
48
+
49
+ static VALUE blurrily_initialize(VALUE UNUSED(self)) {
50
+ return Qtrue;
51
+ }
52
+
53
+ /******************************************************************************/
54
+
55
+ static VALUE blurrily_put(VALUE self, VALUE rb_needle, VALUE rb_reference, VALUE rb_weight) {
56
+ trigram_map haystack = (trigram_map)NULL;
57
+ int res = -1;
58
+ char* needle = StringValuePtr(rb_needle);
59
+ uint32_t reference = NUM2UINT(rb_reference);
60
+ uint32_t weight = NUM2UINT(rb_weight);
61
+
62
+ Data_Get_Struct(self, struct trigram_map_t, haystack);
63
+
64
+ res = blurrily_storage_put(haystack, needle, reference, weight);
65
+ assert(res >= 0);
66
+
67
+ return Qnil;
68
+ }
69
+
70
+ /******************************************************************************/
71
+
72
+ static VALUE blurrily_delete(VALUE self, VALUE rb_reference) {
73
+ trigram_map haystack = (trigram_map)NULL;
74
+ uint32_t reference = NUM2UINT(rb_reference);
75
+ int res = -1;
76
+
77
+ Data_Get_Struct(self, struct trigram_map_t, haystack);
78
+
79
+ res = blurrily_storage_delete(haystack, reference);
80
+ assert(res >= 0);
81
+
82
+ return INT2NUM(res);
83
+ }
84
+
85
+ /******************************************************************************/
86
+
87
+ static VALUE blurrily_save(VALUE self, VALUE rb_path) {
88
+ trigram_map haystack = (trigram_map)NULL;
89
+ int res = -1;
90
+ const char* path = StringValuePtr(rb_path);
91
+
92
+ Data_Get_Struct(self, struct trigram_map_t, haystack);
93
+
94
+ res = blurrily_storage_save(haystack, path);
95
+ assert(res >= 0);
96
+
97
+ return Qnil;
98
+ }
99
+
100
+ /******************************************************************************/
101
+
102
+ static VALUE blurrily_find(VALUE self, VALUE rb_needle, VALUE rb_limit) {
103
+ trigram_map haystack = (trigram_map)NULL;
104
+ int res = -1;
105
+ const char* needle = StringValuePtr(rb_needle);
106
+ int limit = NUM2UINT(rb_limit);
107
+ trigram_match matches = NULL;
108
+ VALUE rb_matches = Qnil;
109
+
110
+ if (limit <= 0) { limit = 10 ; }
111
+ matches = (trigram_match) malloc(limit * sizeof(trigram_match_t));
112
+
113
+ Data_Get_Struct(self, struct trigram_map_t, haystack);
114
+
115
+ res = blurrily_storage_find(haystack, needle, limit, matches);
116
+ assert(res >= 0);
117
+
118
+ /* wrap the matches into a Ruby array */
119
+ rb_matches = rb_ary_new();
120
+ for (int k = 0; k < res; ++k) {
121
+ VALUE rb_match = rb_ary_new();
122
+ rb_ary_push(rb_match, rb_uint_new(matches[k].reference));
123
+ rb_ary_push(rb_match, rb_uint_new(matches[k].matches));
124
+ rb_ary_push(rb_match, rb_uint_new(matches[k].weight));
125
+ rb_ary_push(rb_matches, rb_match);
126
+ }
127
+ return rb_matches;
128
+ }
129
+
130
+
131
+ /******************************************************************************/
132
+
133
+ static VALUE blurrily_stats(VALUE self)
134
+ {
135
+ trigram_map haystack = (trigram_map)NULL;
136
+ trigram_stat_t stats;
137
+ VALUE result = rb_hash_new();
138
+ int res = -1;
139
+
140
+ Data_Get_Struct(self, struct trigram_map_t, haystack);
141
+
142
+ res = blurrily_storage_stats(haystack, &stats);
143
+ assert(res >= 0);
144
+
145
+ (void) rb_hash_aset(result, ID2SYM(rb_intern("references")), UINT2NUM(stats.references));
146
+ (void) rb_hash_aset(result, ID2SYM(rb_intern("trigrams")), UINT2NUM(stats.trigrams));
147
+
148
+ return result;
149
+ }
150
+
151
+ /******************************************************************************/
152
+
153
+ void Init_map_ext(void) {
154
+ VALUE module = Qnil;
155
+ VALUE klass = Qnil;
156
+
157
+ /* assume we haven't yet defined blurrily */
158
+ module = rb_define_module("Blurrily");
159
+ assert(module != Qnil);
160
+
161
+ klass = rb_define_class_under(module, "Map", rb_cObject);
162
+ assert(klass != Qnil);
163
+
164
+ rb_define_singleton_method(klass, "new", blurrily_new, 0);
165
+ rb_define_singleton_method(klass, "load", blurrily_load, 1);
166
+
167
+ rb_define_method(klass, "initialize", blurrily_initialize, 0);
168
+ rb_define_method(klass, "put", blurrily_put, 3);
169
+ rb_define_method(klass, "delete", blurrily_delete, 1);
170
+ rb_define_method(klass, "save", blurrily_save, 1);
171
+ rb_define_method(klass, "find", blurrily_find, 2);
172
+ rb_define_method(klass, "stats", blurrily_stats, 0);
173
+ return;
174
+ }
@@ -0,0 +1,541 @@
1
+ #include <stdlib.h>
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+ #include <assert.h>
5
+ #include <fcntl.h>
6
+ #include <sys/mman.h>
7
+ #include <unistd.h>
8
+ #include <sys/stat.h>
9
+
10
+ #ifdef PLATFORM_LINUX
11
+ #include <linux/limits.h>
12
+ #define MERGESORT fake_mergesort
13
+ #else
14
+ #include <limits.h>
15
+ #define MERGESORT mergesort
16
+ #endif
17
+
18
+ #ifndef PATH_MAX
19
+ /* safe default ... */
20
+ #define PATH_MAX 1024
21
+ #endif
22
+
23
+ #include "storage.h"
24
+
25
+ #include "log.h"
26
+
27
+ /******************************************************************************/
28
+
29
+ #define PAGE_SIZE 4096
30
+ #define TRIGRAM_COUNT (TRIGRAM_BASE * TRIGRAM_BASE * TRIGRAM_BASE)
31
+ #define TRIGRAM_ENTRIES_START_SIZE PAGE_SIZE/8
32
+
33
+ /******************************************************************************/
34
+
35
+ /* one trigram entry -- client reference and sorting weight */
36
+ struct PACKED_STRUCT trigram_entry_t
37
+ {
38
+ uint32_t reference;
39
+ uint32_t weight;
40
+ };
41
+ typedef struct trigram_entry_t trigram_entry_t;
42
+
43
+
44
+ /* collection of entries for a given trigram */
45
+ /* <entries> points to an array of <buckets> entries */
46
+ /* of which <used> are filled */
47
+ struct PACKED_STRUCT trigram_entries_t
48
+ {
49
+ uint32_t buckets;
50
+ uint32_t used;
51
+
52
+ trigram_entry_t* entries; /* set when the structure is in memory */
53
+ size_t entries_offset; /* set when the structure is on disk */
54
+
55
+ uint8_t dirty; /* not optimised (presorted) yet */
56
+ };
57
+ typedef struct trigram_entries_t trigram_entries_t;
58
+
59
+
60
+ /* hash map of all possible trigrams to collection of entries */
61
+ /* there are 28^3 = 19,683 possible trigrams */
62
+ struct PACKED_STRUCT trigram_map_t
63
+ {
64
+ char magic[6]; /* the string "trigra" */
65
+ uint8_t big_endian;
66
+ uint8_t pointer_size;
67
+
68
+ uint32_t total_references;
69
+ uint32_t total_trigrams;
70
+ size_t mapped_size; /* when mapped from disk, the number of bytes mapped */
71
+ int mapped_fd; /* when mapped from disk, the file descriptor */
72
+
73
+ trigram_entries_t map[TRIGRAM_COUNT]; /* this whole structure is ~500KB */
74
+ };
75
+ typedef struct trigram_map_t trigram_map_t;
76
+
77
+ /******************************************************************************/
78
+
79
+ #ifdef PLATFORM_LINUX
80
+ /* fake version of mergesort(3) implemented with qsort(3) as Linux lacks */
81
+ /* the specific variants */
82
+ static int fake_mergesort(void *base, size_t nel, size_t width, int (*compar)(const void *, const void *))
83
+ {
84
+ qsort(base, nel, width, compar);
85
+ return 0;
86
+ }
87
+ #endif
88
+
89
+ /******************************************************************************/
90
+
91
+ /* 1 -> little endian, 2 -> big endian */
92
+ static uint8_t get_big_endian()
93
+ {
94
+ uint32_t magic = 0xAA0000BB;
95
+ uint8_t head = *((uint8_t*) &magic);
96
+
97
+ return (head == 0xBB) ? 1 : 2;
98
+ }
99
+
100
+ /******************************************************************************/
101
+
102
+ /* 4 or 8 (bytes) */
103
+ static uint8_t get_pointer_size()
104
+ {
105
+ return (uint8_t) sizeof(void*);
106
+ }
107
+
108
+ /******************************************************************************/
109
+
110
+ static int compare_entries(const void* left_p, const void* right_p)
111
+ {
112
+ trigram_entry_t* left = (trigram_entry_t*)left_p;
113
+ trigram_entry_t* right = (trigram_entry_t*)right_p;
114
+ return (int)left->reference - (int)right->reference;
115
+ }
116
+
117
+ /* compares matches on #matches (descending) then weight (ascending) */
118
+ static int compare_matches(const void* left_p, const void* right_p)
119
+ {
120
+ trigram_match_t* left = (trigram_match_t*)left_p;
121
+ trigram_match_t* right = (trigram_match_t*)right_p;
122
+ /* int delta = (int)left->matches - (int)right->matches; */
123
+ int delta = (int)right->matches - (int)left->matches;
124
+
125
+ return (delta != 0) ? delta : ((int)left->weight - (int)right->weight);
126
+
127
+ }
128
+
129
+ /******************************************************************************/
130
+
131
+ static void sort_map_if_dirty(trigram_entries_t* map)
132
+ {
133
+ int res = -1;
134
+ if (! map->dirty) return;
135
+
136
+ res = MERGESORT(map->entries, map->used, sizeof(trigram_entry_t), &compare_entries);
137
+ assert(res >= 0);
138
+ map->dirty = 0;
139
+ }
140
+
141
+ /******************************************************************************/
142
+
143
+ static size_t round_to_page(size_t value)
144
+ {
145
+ if (value % PAGE_SIZE == 0) return value;
146
+ return (value / PAGE_SIZE + 1) * PAGE_SIZE;
147
+ }
148
+
149
+ /******************************************************************************/
150
+
151
+ static size_t get_map_size(trigram_map haystack, int index)
152
+ {
153
+ return haystack->map[index].buckets * sizeof(trigram_entry_t);
154
+ }
155
+
156
+ /******************************************************************************/
157
+
158
+ static void free_if(void* ptr)
159
+ {
160
+ if (ptr == NULL) return;
161
+ free(ptr);
162
+ return;
163
+ }
164
+
165
+ /******************************************************************************/
166
+
167
+ int blurrily_storage_new(trigram_map* haystack_ptr)
168
+ {
169
+ trigram_map haystack = (trigram_map)NULL;
170
+ trigram_entries_t* ptr = NULL;
171
+ int k = 0;
172
+
173
+ LOG("blurrily_storage_new\n");
174
+ haystack = (trigram_map) malloc(sizeof(trigram_map_t));
175
+ if (haystack == NULL) return -1;
176
+
177
+ memset(haystack, 0x00, sizeof(trigram_map_t));
178
+
179
+ memcpy(haystack->magic, "trigra", 6);
180
+ haystack->big_endian = get_big_endian();
181
+ haystack->pointer_size = get_pointer_size();
182
+
183
+ haystack->mapped_size = 0; /* not mapped, as we just created it in memory */
184
+ haystack->mapped_fd = 0;
185
+ haystack->total_references = 0;
186
+ haystack->total_trigrams = 0;
187
+ for(k = 0, ptr = haystack->map ; k < TRIGRAM_COUNT ; ++k, ++ptr) {
188
+ ptr->buckets = 0;
189
+ ptr->used = 0;
190
+ ptr->dirty = 0;
191
+ ptr->entries = (trigram_entry_t*)NULL;
192
+ }
193
+
194
+ *haystack_ptr = haystack;
195
+ return 0;
196
+ }
197
+
198
+ /******************************************************************************/
199
+
200
+ int blurrily_storage_load(trigram_map* haystack, const char* path)
201
+ {
202
+ int fd = -1;
203
+ int res = -1;
204
+ trigram_map header = NULL;
205
+ uint8_t* origin = NULL;
206
+ struct stat metadata;
207
+
208
+ /* open and map file */
209
+ res = fd = open(path, O_RDONLY);
210
+ if (res < 0) goto cleanup;
211
+
212
+ res = fstat(fd, &metadata);
213
+ if (res < 0) goto cleanup;
214
+
215
+ header = (trigram_map) mmap(NULL, metadata.st_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
216
+ assert(header != NULL);
217
+
218
+ /* check magic */
219
+ /* TODO */
220
+
221
+ /* fix header data */
222
+ header->mapped_size = metadata.st_size;
223
+ header->mapped_fd = fd;
224
+ origin = (uint8_t*)header;
225
+ for (int k = 0; k < TRIGRAM_COUNT; ++k) {
226
+ trigram_entries_t* map = header->map + k;
227
+ if (map->entries_offset == 0) continue;
228
+ map->entries = (trigram_entry_t*) (origin + map->entries_offset);
229
+ map->entries_offset = 0;
230
+ }
231
+ *haystack = header;
232
+
233
+ cleanup:
234
+ return res;
235
+ }
236
+
237
+ /******************************************************************************/
238
+
239
+ int blurrily_storage_close(trigram_map* haystack_ptr)
240
+ {
241
+ trigram_map haystack = *haystack_ptr;
242
+ int res = -1;
243
+
244
+ LOG("blurrily_storage_close\n");
245
+
246
+ if (haystack->mapped_size) {
247
+ int fd = haystack->mapped_fd;
248
+
249
+ res = munmap(haystack, haystack->mapped_size);
250
+ assert(res >= 0);
251
+
252
+ res = close(fd);
253
+ assert(res >= 0);
254
+ } else {
255
+ trigram_entries_t* ptr = haystack->map;
256
+ for(int k = 0 ; k < TRIGRAM_COUNT ; ++k) {
257
+ free(ptr->entries);
258
+ ++ptr;
259
+ }
260
+ free(haystack);
261
+ }
262
+
263
+ *haystack_ptr = NULL;
264
+ return 0;
265
+ }
266
+
267
+ /******************************************************************************/
268
+
269
+ int blurrily_storage_save(trigram_map haystack, const char* path)
270
+ {
271
+ int fd = -1;
272
+ int res = -1;
273
+ uint8_t* ptr = (uint8_t*)NULL;
274
+ size_t total_size = 0;
275
+ size_t offset = 0;
276
+ trigram_map header = NULL;
277
+ char path_tmp[PATH_MAX];
278
+
279
+ /* cleanup maps in memory */
280
+ for (int k = 0; k < TRIGRAM_COUNT; ++k) {
281
+ sort_map_if_dirty(haystack->map + k);
282
+ }
283
+
284
+ /* path for temporary file */
285
+ snprintf(path_tmp, PATH_MAX, "%s.tmp", path);
286
+
287
+ /* compute storage space required */
288
+ total_size += round_to_page(sizeof(trigram_map_t));
289
+
290
+ for (int k = 0; k < TRIGRAM_COUNT; ++k) {
291
+ total_size += round_to_page(get_map_size(haystack, k));
292
+ }
293
+
294
+ /* open and map file */
295
+ fd = open(path_tmp, O_RDWR | O_CREAT | O_TRUNC, 0644);
296
+ assert(fd >= 0);
297
+
298
+ res = ftruncate(fd, total_size);
299
+ assert(res >= 0);
300
+
301
+ ptr = mmap(NULL, total_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
302
+ assert(ptr != NULL);
303
+
304
+ /* flush data */
305
+ memset(ptr, 0x00, total_size);
306
+
307
+ /* copy header & clean copy */
308
+ memcpy(ptr, (void*)haystack, sizeof(trigram_map_t));
309
+ offset += round_to_page(sizeof(trigram_map_t));
310
+ header = (trigram_map)ptr;
311
+
312
+ header->mapped_size = 0;
313
+ header->mapped_fd = 0;
314
+
315
+ /* copy each map, set offset in header */
316
+ for (int k = 0; k < TRIGRAM_COUNT; ++k) {
317
+ size_t block_size = get_map_size(haystack, k);
318
+
319
+ if (block_size > 0) {
320
+ memcpy(ptr+offset, haystack->map[k].entries, block_size);
321
+
322
+ header->map[k].entries = NULL;
323
+ header->map[k].entries_offset = offset;
324
+
325
+ offset += round_to_page(block_size);
326
+ } else {
327
+ header->map[k].entries = NULL;
328
+ header->map[k].entries_offset = 0;
329
+ }
330
+ }
331
+ assert(offset == total_size);
332
+
333
+ res = munmap(ptr, total_size);
334
+ assert(res >= 0);
335
+
336
+ res = close(fd);
337
+ assert(res >= 0);
338
+
339
+ /* commit by renaming the file */
340
+ res = rename(path_tmp, path);
341
+ assert(res >= 0);
342
+
343
+ return 0;
344
+ }
345
+
346
+ /******************************************************************************/
347
+
348
+ int blurrily_storage_put(trigram_map haystack, const char* needle, uint32_t reference, uint32_t weight)
349
+ {
350
+ int nb_trigrams = -1;
351
+ int length = strlen(needle);
352
+ trigram_t* trigrams = (trigram_t*)NULL;
353
+
354
+ trigrams = (trigram_t*)malloc((length+1) * sizeof(trigram_t));
355
+ nb_trigrams = blurrily_tokeniser_parse_string(needle, trigrams);
356
+
357
+ if (weight <= 0) weight = length;
358
+
359
+ for (int k = 0; k < nb_trigrams; ++k) {
360
+ trigram_t t = trigrams[k];
361
+ trigram_entries_t* map = &haystack->map[t];
362
+ trigram_entry_t entry = { reference, weight };
363
+
364
+ assert(t < TRIGRAM_COUNT);
365
+ assert(map-> used <= map-> buckets);
366
+
367
+ /* allocate more space as needed (exponential growth) */
368
+ if (map->buckets == 0) {
369
+ LOG("- alloc for %d\n", t);
370
+
371
+ map->buckets = TRIGRAM_ENTRIES_START_SIZE;
372
+ map->entries = (trigram_entry_t*) calloc(map->buckets, sizeof(trigram_entry_t));
373
+ }
374
+ if (map->used == map->buckets) {
375
+ uint32_t new_buckets = map->buckets * 4/3;
376
+ trigram_entry_t* new_entries = NULL;
377
+ LOG("- realloc for %d\n", t);
378
+
379
+ /* copy old data, free old pointer, zero extra space */
380
+ new_entries = malloc(new_buckets * sizeof(trigram_entry_t));
381
+ assert(new_entries != NULL);
382
+ memcpy(new_entries, map->entries, map->buckets * sizeof(trigram_entry_t));
383
+ free(map->entries);
384
+ memset(new_entries + map->buckets, 0x00, (new_buckets - map->buckets) * sizeof(trigram_entry_t));
385
+ /* swap fields */
386
+ map->buckets = new_buckets;
387
+ map->entries = new_entries;
388
+ }
389
+ map->entries[map->used] = entry;
390
+
391
+ map->used += 1;
392
+ map->dirty = 1;
393
+ }
394
+ haystack->total_trigrams += nb_trigrams;
395
+ haystack->total_references += 1;
396
+
397
+ free((void*)trigrams);
398
+ return 0;
399
+ }
400
+
401
+ /******************************************************************************/
402
+
403
+ int blurrily_storage_find(trigram_map haystack, const char* needle, uint16_t limit, trigram_match results)
404
+ {
405
+ int nb_trigrams = -1;
406
+ int length = strlen(needle);
407
+ trigram_t* trigrams = (trigram_t*)NULL;
408
+ int nb_entries = -1;
409
+ trigram_entry_t* entries = NULL;
410
+ trigram_entry_t* entry_ptr = NULL;
411
+ int nb_matches = -1;
412
+ trigram_match_t* matches = NULL;
413
+ trigram_match_t* match_ptr = NULL;
414
+ uint32_t last_ref = (uint32_t)-1;
415
+ int nb_results = 0;
416
+
417
+ trigrams = (trigram_t*)malloc((length+1) * sizeof(trigram_t));
418
+ nb_trigrams = blurrily_tokeniser_parse_string(needle, trigrams);
419
+ if (nb_trigrams == 0) goto cleanup;
420
+
421
+ LOG("%d trigrams in '%s'\n", nb_trigrams, needle);
422
+
423
+ /* measure size required for sorting */
424
+ nb_entries = 0;
425
+ for (int k = 0; k < nb_trigrams; ++k) {
426
+ trigram_t t = trigrams[k];
427
+ nb_entries += haystack->map[t].used;
428
+ }
429
+ if (nb_entries == 0) goto cleanup;
430
+
431
+ /* allocate sorting memory */
432
+ entries = (trigram_entry_t*) malloc(nb_entries * sizeof(trigram_entry_t));
433
+ assert(entries != NULL);
434
+ LOG("allocated space for %zd trigrams entries\n", nb_entries);
435
+
436
+ /* copy data for sorting */
437
+ entry_ptr = entries;
438
+ for (int k = 0; k < nb_trigrams; ++k) {
439
+ trigram_t t = trigrams[k];
440
+ size_t buckets = haystack->map[t].used;
441
+
442
+ sort_map_if_dirty(haystack->map + t);
443
+ memcpy(entry_ptr, haystack->map[t].entries, buckets * sizeof(trigram_entry_t));
444
+ entry_ptr += buckets;
445
+ }
446
+ assert(entry_ptr == entries + nb_entries);
447
+
448
+ /* sort data */
449
+ MERGESORT(entries, nb_entries, sizeof(trigram_entry_t), &compare_entries);
450
+ LOG("sorting entries\n");
451
+
452
+ /* count distinct matches */
453
+ entry_ptr = entries;
454
+ last_ref = -1;
455
+ nb_matches = 0;
456
+ for (int k = 0; k < nb_entries; ++k) {
457
+ if (entry_ptr->reference != last_ref) {
458
+ last_ref = entry_ptr->reference;
459
+ ++nb_matches;
460
+ }
461
+ ++entry_ptr;
462
+ }
463
+ assert(entry_ptr == entries + nb_entries);
464
+ LOG("total %zd distinct matches\n", nb_matches);
465
+
466
+ /* allocate maches result */
467
+ matches = (trigram_match_t*) calloc(nb_matches, sizeof(trigram_match_t));
468
+ assert(matches != NULL);
469
+
470
+ /* reduction, counting matches per reference */
471
+ entry_ptr = entries;
472
+ match_ptr = matches;
473
+ match_ptr->matches = 0;
474
+ match_ptr->reference = entry_ptr->reference; /* setup the first match to */
475
+ match_ptr->weight = entry_ptr->weight; /* simplify the loop */
476
+ for (int k = 0; k < nb_entries; ++k) {
477
+ if (entry_ptr->reference != match_ptr->reference) {
478
+ ++match_ptr;
479
+ match_ptr->reference = entry_ptr->reference;
480
+ match_ptr->weight = entry_ptr->weight;
481
+ match_ptr->matches = 1;
482
+ } else {
483
+ match_ptr->matches += 1;
484
+ }
485
+ assert((int) match_ptr->matches <= nb_trigrams);
486
+ ++entry_ptr;
487
+ }
488
+ assert(match_ptr == matches + nb_matches - 1);
489
+ assert(entry_ptr == entries + nb_entries);
490
+
491
+ /* sort by weight (qsort) */
492
+ qsort(matches, nb_matches, sizeof(trigram_match_t), &compare_matches);
493
+
494
+ /* output results */
495
+ nb_results = (limit < nb_matches) ? limit : nb_matches;
496
+ for (int k = 0; k < nb_results; ++k) {
497
+ results[k] = matches[k];
498
+ LOG("match %d: reference %d, matchiness %d, weight %d\n", k, matches[k].reference, matches[k].matches, matches[k].weight);
499
+ }
500
+
501
+ cleanup:
502
+ free_if(entries);
503
+ free_if(matches);
504
+ free_if(trigrams);
505
+ return nb_results;
506
+ }
507
+
508
+ /******************************************************************************/
509
+
510
+ int blurrily_storage_delete(trigram_map haystack, uint32_t reference)
511
+ {
512
+ int trigrams_deleted = 0;
513
+
514
+ for (int k = 0; k < TRIGRAM_COUNT; ++k) {
515
+ trigram_entries_t* map = haystack->map + k;
516
+ trigram_entry_t* entry = NULL;
517
+
518
+ for (unsigned int j = 0; j < map->used; ++j) {
519
+ entry = map->entries + j;
520
+ if (entry->reference != reference) continue;
521
+
522
+ *entry = map->entries[map->used - 1];
523
+ map->used -= 1;
524
+
525
+ ++trigrams_deleted;
526
+ --j;
527
+ }
528
+ }
529
+ haystack->total_trigrams -= trigrams_deleted;
530
+ haystack->total_references -= 1;
531
+ return trigrams_deleted;
532
+ }
533
+
534
+ /******************************************************************************/
535
+
536
+ int blurrily_storage_stats(trigram_map haystack, trigram_stat_t* stats)
537
+ {
538
+ stats->references = haystack->total_references;
539
+ stats->trigrams = haystack->total_trigrams;
540
+ return 0;
541
+ }
@@ -0,0 +1,109 @@
1
+ /*
2
+
3
+ storage.h --
4
+
5
+ Trigram map creation, persistence, and qurying.
6
+
7
+ */
8
+ #include <inttypes.h>
9
+ #include "tokeniser.h"
10
+ #include "blurrily.h"
11
+
12
+ struct trigram_map_t;
13
+ typedef struct trigram_map_t* trigram_map;
14
+
15
+ struct PACKED_STRUCT trigram_match_t {
16
+ uint32_t reference;
17
+ uint32_t matches;
18
+ uint32_t weight;
19
+ };
20
+ typedef struct trigram_match_t trigram_match_t;
21
+ typedef struct trigram_match_t* trigram_match;
22
+
23
+ typedef struct trigram_stat_t {
24
+ uint32_t references;
25
+ uint32_t trigrams;
26
+
27
+ } trigram_stat_t;
28
+
29
+
30
+ /*
31
+ Create a new trigram map, resident in memory.
32
+ */
33
+ int blurrily_storage_new(trigram_map* haystack);
34
+
35
+ /*
36
+ Load an existing trigram map from disk.
37
+ */
38
+ int blurrily_storage_load(trigram_map* haystack, const char* path);
39
+
40
+ /*
41
+ Release resources claimed by <new> or <open>.
42
+ */
43
+ int blurrily_storage_close(trigram_map* haystack);
44
+
45
+ /*
46
+ Persist to disk what <blurrily_storage_new> or <blurrily_storage_open>
47
+ gave you.
48
+ */
49
+ int blurrily_storage_save(trigram_map haystack, const char* path);
50
+
51
+ /*
52
+ Add a new string to the map. <reference> is your identifier for that
53
+ string, <weight> will be using to discriminate entries that match "as
54
+ well" when searching.
55
+
56
+ If <weight> is zero, it will be replaced by the number of characters in
57
+ the <needle>.
58
+
59
+ Returns positive on success, negative on failure.
60
+ */
61
+ int blurrily_storage_put(trigram_map haystack, const char* needle, uint32_t reference, uint32_t weight);
62
+
63
+ /*
64
+ Check the map for an existing <reference>.
65
+
66
+ Returns < 0 on error, 0 if the reference is not found, the number of trigrams
67
+ for that reference otherwise.
68
+
69
+ If <weight> is not NULL, will be set to the weight value passed to the put
70
+ method on return (is the reference is found).
71
+
72
+ If <trigrams> is not NULL, it should point an array <nb_trigrams> long,
73
+ and up to <nb_trigrams> will be copied into it matching the <needle>
74
+ originally passed to the put method.
75
+
76
+ Not that this is a O(n) method: the whole map will be read.
77
+ */
78
+ // int blurrily_storage_get(trigram_map haystack, uint32_t reference, uint32_t* weight, int nb_trigrams, trigram_t* trigrams);
79
+
80
+ /*
81
+ Remove a <reference> from the map.
82
+
83
+ Note that this is very innefective.
84
+
85
+ Returns positive on success, negative on failure.
86
+ */
87
+ int blurrily_storage_delete(trigram_map haystack, uint32_t reference);
88
+
89
+ /*
90
+ Return at most <limit> entries matching <needle> from the <haystack>.
91
+
92
+ Results are written to <results>. The first results are the ones entries
93
+ sharing the most trigrams with the <needle>. Amongst entries with the same
94
+ number of matches, the lightest ones (lowest <weight>) will be returned
95
+ first.
96
+
97
+ <results> should be allocated by the caller.
98
+
99
+ Returns number of matches on success, negative on failure.
100
+ */
101
+ int blurrily_storage_find(trigram_map haystack, const char* needle, uint16_t limit, trigram_match results);
102
+
103
+ /*
104
+ Copies metadata into <stats>
105
+
106
+ Returns positive on success, negative on failure.
107
+ */
108
+ int blurrily_storage_stats(trigram_map haystack, trigram_stat_t* stats);
109
+
@@ -0,0 +1,127 @@
1
+ #include <stdlib.h>
2
+ #include <string.h>
3
+ #include <stdio.h>
4
+ #include <math.h>
5
+ #include "tokeniser.h"
6
+ #include "log.h"
7
+ #include "blurrily.h"
8
+
9
+
10
+ /******************************************************************************/
11
+
12
+ static int ipow(int a, int b)
13
+ {
14
+ int result = 1;
15
+
16
+ while (b-- > 0) result = result * a;
17
+ return result;
18
+ }
19
+
20
+ /******************************************************************************/
21
+
22
+ static void string_to_code(const char* input, trigram_t *output)
23
+ {
24
+ trigram_t result = 0;
25
+
26
+ for (int k = 0 ; k < 3; ++k) {
27
+ if (input[k] == '*' || input[k] < 'a' || input[k] > 'z') continue;
28
+ result += ipow(TRIGRAM_BASE, k) * (input[k] - 'a' + 1);
29
+ }
30
+
31
+ *output = result;
32
+ }
33
+
34
+ /******************************************************************************/
35
+
36
+ static void code_to_string(trigram_t input, char* output)
37
+ {
38
+ for (int k = 0 ; k < 3; ++k) {
39
+ uint16_t elem = input / ipow(TRIGRAM_BASE, k) % TRIGRAM_BASE;
40
+ if (elem == 0) {
41
+ output[k] = '*';
42
+ } else {
43
+ output[k] = ('a' + elem - 1);
44
+ }
45
+ }
46
+ output[3] = 0;
47
+ }
48
+
49
+ /******************************************************************************/
50
+
51
+ static int blurrily_compare_trigrams(const void* left_p, const void* right_p)
52
+ {
53
+ trigram_t* left = (trigram_t*)left_p;
54
+ trigram_t* right = (trigram_t*)right_p;
55
+ return (int)*left - (int)*right;
56
+ }
57
+
58
+ /******************************************************************************/
59
+
60
+ int blurrily_tokeniser_parse_string(const char* input, trigram_t* output)
61
+ {
62
+ int length = strlen(input);
63
+ char* normalized = (char*) malloc(length+5);
64
+ int duplicates = 0;
65
+
66
+ snprintf(normalized, length+4, "**%s*", input);
67
+
68
+ /* replace spaces with '*' */
69
+ for (int k = 0; k < length+3; ++k) {
70
+ if (normalized[k] == ' ') normalized[k] = '*';
71
+ }
72
+
73
+ /* compute trigrams */
74
+ for (int k = 0; k <= length; ++k) {
75
+ string_to_code(normalized+k, output+k);
76
+ }
77
+
78
+ /* print results */
79
+ LOG("-- normalization\n");
80
+ LOG("%s -> %s\n", input, normalized);
81
+ LOG("-- tokenisation\n");
82
+ for (int k = 0; k <= length; ++k) {
83
+ char res[4];
84
+
85
+ code_to_string(output[k], res);
86
+
87
+ LOG("%c%c%c -> %d -> %s\n",
88
+ normalized[k], normalized[k+1], normalized[k+2],
89
+ output[k], res
90
+ );
91
+ }
92
+
93
+ /* sort */
94
+ qsort((void*)output, length+1, sizeof(trigram_t), &blurrily_compare_trigrams);
95
+
96
+ /* remove duplicates */
97
+ for (int k = 1; k <= length; ++k) {
98
+ trigram_t* previous = output + k - 1;
99
+ trigram_t* current = output + k;
100
+
101
+ if (*previous == *current) {
102
+ *previous = 32768;
103
+ ++duplicates;
104
+ }
105
+ }
106
+
107
+ /* compact */
108
+ qsort((void*)output, length+1, sizeof(trigram_t), &blurrily_compare_trigrams);
109
+
110
+ /* print again */
111
+ LOG("-- after sort/compact\n");
112
+ for (int k = 0; k <= length-duplicates; ++k) {
113
+ char res[4];
114
+ code_to_string(output[k], res);
115
+ LOG("%d -> %s\n", output[k], res);
116
+ }
117
+
118
+ free((void*)normalized);
119
+ return length+1 - duplicates;
120
+ }
121
+
122
+ /******************************************************************************/
123
+
124
+ int blurrily_tokeniser_trigram(trigram_t UNUSED(input), char* UNUSED(output))
125
+ {
126
+ return 0;
127
+ }
@@ -0,0 +1,41 @@
1
+ /*
2
+
3
+ tokeniser.h --
4
+
5
+ Split a string into an array of trigrams.
6
+
7
+ The input string should be only lowercase latin letters and spaces
8
+ (convert using iconv).
9
+
10
+ Each trigram is a three-symbol tuple consisting of latters and the
11
+ "epsilon" character used to represent spaces and beginning-of-word/end-of-
12
+ word anchors.
13
+
14
+ Each trigram is represented by a 16-bit integer.
15
+
16
+ */
17
+ #include <inttypes.h>
18
+
19
+ #define TRIGRAM_BASE 28
20
+
21
+ typedef uint16_t trigram_t;
22
+
23
+ /*
24
+ Parse the <input> string and store the result in <ouput>.
25
+ <output> must be allocated by the caller and provide at least as many slots
26
+ as characters in <input>, plus one.
27
+ (not all will be necessarily be filled)
28
+
29
+ Returns the number of trigrams on success, a negative number on failure.
30
+ */
31
+ int blurrily_tokeniser_parse_string(const char* input, trigram_t* output);
32
+
33
+
34
+ /*
35
+ Given an <input> returns a string representation of the trigram in <output>.
36
+ <output> must be allocated by caller and will always be exactly 3
37
+ <characters plus NULL.
38
+
39
+ Returns positive on success, negative on failure.
40
+ */
41
+ int blurrily_tokeniser_trigram(trigram_t input, char* output);
data/lib/blurrily.rb ADDED
@@ -0,0 +1,3 @@
1
+ require "blurrily/map_ext"
2
+ require "blurrily/map"
3
+ require "blurrily/version"
@@ -0,0 +1,34 @@
1
+ require 'blurrily/map_ext'
2
+ require 'active_support/all' # fixme: we only need enough to get mb_chars and alias_method_chain in
3
+
4
+ module Blurrily
5
+ Map.class_eval do
6
+
7
+ def put_with_string_normalize(needle, reference, weight=0)
8
+ needle = normalize_string needle
9
+ put_without_string_normalize(needle, reference, weight)
10
+ end
11
+
12
+ alias_method_chain :put, :string_normalize
13
+
14
+
15
+ def find_with_string_normalize(needle, limit=10)
16
+ needle = normalize_string needle
17
+ find_without_string_normalize(needle, limit)
18
+ end
19
+
20
+ alias_method_chain :find, :string_normalize
21
+
22
+
23
+ private
24
+
25
+ def normalize_string(needle)
26
+ result = needle.downcase
27
+ unless result =~ /^([a-z ])+$/
28
+ result = result.mb_chars.normalize(:kd).gsub(/[^\x00-\x7F]/,'').to_s.gsub(/[^a-z]/,' ')
29
+ end
30
+ result.gsub(/\s+/,' ').strip
31
+ end
32
+
33
+ end
34
+ end
File without changes
@@ -0,0 +1,3 @@
1
+ module Blurrily
2
+ VERSION = "0.0.1"
3
+ end
metadata ADDED
@@ -0,0 +1,199 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: blurrily
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Julien Letessier
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-03-27 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activesupport
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: eventmachine
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: json
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rake
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rake-compiler
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: pry
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - '>='
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - '>='
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: pry-nav
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - '>='
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: pry-doc
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - '>='
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - '>='
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: progressbar
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - '>='
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - '>='
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ description: Native fuzzy string search
154
+ email:
155
+ - julien.letessier@gmail.com
156
+ executables: []
157
+ extensions:
158
+ - ext/blurrily/extconf.rb
159
+ extra_rdoc_files: []
160
+ files:
161
+ - lib/blurrily/map.rb
162
+ - lib/blurrily/server.rb
163
+ - lib/blurrily/version.rb
164
+ - lib/blurrily.rb
165
+ - ext/blurrily/map_ext.c
166
+ - ext/blurrily/storage.c
167
+ - ext/blurrily/tokeniser.c
168
+ - ext/blurrily/blurrily.h
169
+ - ext/blurrily/log.h
170
+ - ext/blurrily/storage.h
171
+ - ext/blurrily/tokeniser.h
172
+ - ext/blurrily/extconf.rb
173
+ - README.md
174
+ - LICENSE.txt
175
+ homepage: http://github.com/mezis/blurrily
176
+ licenses: []
177
+ metadata: {}
178
+ post_install_message:
179
+ rdoc_options: []
180
+ require_paths:
181
+ - lib
182
+ required_ruby_version: !ruby/object:Gem::Requirement
183
+ requirements:
184
+ - - '>='
185
+ - !ruby/object:Gem::Version
186
+ version: '0'
187
+ required_rubygems_version: !ruby/object:Gem::Requirement
188
+ requirements:
189
+ - - '>='
190
+ - !ruby/object:Gem::Version
191
+ version: '0'
192
+ requirements: []
193
+ rubyforge_project:
194
+ rubygems_version: 2.0.0
195
+ signing_key:
196
+ specification_version: 4
197
+ summary: Native fuzzy string search
198
+ test_files: []
199
+ has_rdoc: