blurrily 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: ed39eb955b4d71f3b924a16be4430046ba1d02ab
4
+ data.tar.gz: 1c5a5b42b6877ad3d66928a0fe0520ea73defa9b
5
+ SHA512:
6
+ metadata.gz: 54fdb049c894470cf18afdafe18053607e1b4336b6f7353866ae8d81115e87a97ed6f5273270d930a88c292bf02a361868280997b6dbe5668c894aa456745950
7
+ data.tar.gz: b8c280aa93d062a9a89fbda80cdf3365efcb34ed3e3c28d8dadf6c9b9ee5deba389a3a0233c788e8e182894b1067254ec9a9ef4ae80e7c1676a60edd6cd50e83
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 HouseTrip Ltd.
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,36 @@
1
+ # Blurrily — Fast fuzzy text search
2
+
3
+ [![Build Status](https://travis-ci.org/mezis/blurrily.png?branch=master)](https://travis-ci.org/mezis/blurrily)
4
+ [![Dependency Status](https://gemnasium.com/mezis/blurrily.png)](https://gemnasium.com/mezis/blurrily)
5
+ [![Code Climate](https://codeclimate.com/github/mezis/blurrily.png)](https://codeclimate.com/github/mezis/blurrily)
6
+
7
+ This will be a C version of [fuzzily](http://github.com/mezis/fuzzily), a
8
+ Ruby gem to perform fuzzy text searching.
9
+
10
+ WORK IN PROGRESS.
11
+
12
+ ## Installation
13
+
14
+ Add this line to your application's Gemfile:
15
+
16
+ gem 'blurrily'
17
+
18
+ And then execute:
19
+
20
+ $ bundle
21
+
22
+ Or install it yourself as:
23
+
24
+ $ gem install blurrily
25
+
26
+ ## Usage
27
+
28
+ TODO: Write usage instructions here
29
+
30
+ ## Contributing
31
+
32
+ 1. Fork it
33
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
34
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
35
+ 4. Push to the branch (`git push origin my-new-feature`)
36
+ 5. Create new Pull Request
@@ -0,0 +1,2 @@
1
+ #define PACKED_STRUCT __attribute__ ((__packed__))
2
+ #define UNUSED(_IDENT) _IDENT __attribute__ ((unused))
@@ -0,0 +1,17 @@
1
+ require 'mkmf'
2
+
3
+ PLATFORM = `uname`.strip.upcase
4
+ SHARED_FLAGS = "-DPLATFORM_#{PLATFORM} --std=c99 -Wall -Wextra -Werror"
5
+
6
+ case PLATFORM
7
+ when 'LINUX'
8
+ SHARED_FLAGS += ' -D_XOPEN_SOURCE=500' # for ftruncate to be present
9
+ end
10
+
11
+ # production
12
+ $CFLAGS += " #{SHARED_FLAGS} -O3 -fno-fast-math"
13
+
14
+ # development
15
+ # $CFLAGS += " #{SHARED_FLAGS} -O0 -g"
16
+
17
+ create_makefile('blurrily/map_ext')
@@ -0,0 +1,5 @@
1
+ #ifdef DEBUG
2
+ #define LOG(...) fprintf(stderr, __VA_ARGS__)
3
+ #else
4
+ #define LOG(...)
5
+ #endif
@@ -0,0 +1,174 @@
1
+ #include <ruby.h>
2
+ #include <assert.h>
3
+ #include "storage.h"
4
+ #include "blurrily.h"
5
+
6
+ /******************************************************************************/
7
+
8
+ static void blurrily_free(void* haystack)
9
+ {
10
+ int res = -1;
11
+
12
+ res = blurrily_storage_close((trigram_map*) &haystack);
13
+ assert(res >= 0);
14
+ }
15
+
16
+ /******************************************************************************/
17
+
18
+ static VALUE blurrily_new(VALUE class) {
19
+ VALUE wrapper = Qnil;
20
+ trigram_map haystack = (trigram_map)NULL;
21
+ int res = -1;
22
+
23
+ res = blurrily_storage_new(&haystack);
24
+ if (res < 0) { rb_sys_fail(NULL); return Qnil; }
25
+
26
+ wrapper = Data_Wrap_Struct(class, 0, blurrily_free, (void*)haystack);
27
+ rb_obj_call_init(wrapper, 0, NULL);
28
+ return wrapper;
29
+ }
30
+
31
+ /******************************************************************************/
32
+
33
+ static VALUE blurrily_load(VALUE class, VALUE rb_path) {
34
+ char* path = StringValuePtr(rb_path);
35
+ VALUE wrapper = Qnil;
36
+ trigram_map haystack = (trigram_map)NULL;
37
+ int res = -1;
38
+
39
+ res = blurrily_storage_load(&haystack, path);
40
+ if (res < 0) { rb_sys_fail(NULL); return Qnil; }
41
+
42
+ wrapper = Data_Wrap_Struct(class, 0, blurrily_free, (void*)haystack);
43
+ rb_obj_call_init(wrapper, 0, NULL);
44
+ return wrapper;
45
+ }
46
+
47
+ /******************************************************************************/
48
+
49
+ static VALUE blurrily_initialize(VALUE UNUSED(self)) {
50
+ return Qtrue;
51
+ }
52
+
53
+ /******************************************************************************/
54
+
55
+ static VALUE blurrily_put(VALUE self, VALUE rb_needle, VALUE rb_reference, VALUE rb_weight) {
56
+ trigram_map haystack = (trigram_map)NULL;
57
+ int res = -1;
58
+ char* needle = StringValuePtr(rb_needle);
59
+ uint32_t reference = NUM2UINT(rb_reference);
60
+ uint32_t weight = NUM2UINT(rb_weight);
61
+
62
+ Data_Get_Struct(self, struct trigram_map_t, haystack);
63
+
64
+ res = blurrily_storage_put(haystack, needle, reference, weight);
65
+ assert(res >= 0);
66
+
67
+ return Qnil;
68
+ }
69
+
70
+ /******************************************************************************/
71
+
72
+ static VALUE blurrily_delete(VALUE self, VALUE rb_reference) {
73
+ trigram_map haystack = (trigram_map)NULL;
74
+ uint32_t reference = NUM2UINT(rb_reference);
75
+ int res = -1;
76
+
77
+ Data_Get_Struct(self, struct trigram_map_t, haystack);
78
+
79
+ res = blurrily_storage_delete(haystack, reference);
80
+ assert(res >= 0);
81
+
82
+ return INT2NUM(res);
83
+ }
84
+
85
+ /******************************************************************************/
86
+
87
+ static VALUE blurrily_save(VALUE self, VALUE rb_path) {
88
+ trigram_map haystack = (trigram_map)NULL;
89
+ int res = -1;
90
+ const char* path = StringValuePtr(rb_path);
91
+
92
+ Data_Get_Struct(self, struct trigram_map_t, haystack);
93
+
94
+ res = blurrily_storage_save(haystack, path);
95
+ assert(res >= 0);
96
+
97
+ return Qnil;
98
+ }
99
+
100
+ /******************************************************************************/
101
+
102
+ static VALUE blurrily_find(VALUE self, VALUE rb_needle, VALUE rb_limit) {
103
+ trigram_map haystack = (trigram_map)NULL;
104
+ int res = -1;
105
+ const char* needle = StringValuePtr(rb_needle);
106
+ int limit = NUM2UINT(rb_limit);
107
+ trigram_match matches = NULL;
108
+ VALUE rb_matches = Qnil;
109
+
110
+ if (limit <= 0) { limit = 10 ; }
111
+ matches = (trigram_match) malloc(limit * sizeof(trigram_match_t));
112
+
113
+ Data_Get_Struct(self, struct trigram_map_t, haystack);
114
+
115
+ res = blurrily_storage_find(haystack, needle, limit, matches);
116
+ assert(res >= 0);
117
+
118
+ /* wrap the matches into a Ruby array */
119
+ rb_matches = rb_ary_new();
120
+ for (int k = 0; k < res; ++k) {
121
+ VALUE rb_match = rb_ary_new();
122
+ rb_ary_push(rb_match, rb_uint_new(matches[k].reference));
123
+ rb_ary_push(rb_match, rb_uint_new(matches[k].matches));
124
+ rb_ary_push(rb_match, rb_uint_new(matches[k].weight));
125
+ rb_ary_push(rb_matches, rb_match);
126
+ }
127
+ return rb_matches;
128
+ }
129
+
130
+
131
+ /******************************************************************************/
132
+
133
+ static VALUE blurrily_stats(VALUE self)
134
+ {
135
+ trigram_map haystack = (trigram_map)NULL;
136
+ trigram_stat_t stats;
137
+ VALUE result = rb_hash_new();
138
+ int res = -1;
139
+
140
+ Data_Get_Struct(self, struct trigram_map_t, haystack);
141
+
142
+ res = blurrily_storage_stats(haystack, &stats);
143
+ assert(res >= 0);
144
+
145
+ (void) rb_hash_aset(result, ID2SYM(rb_intern("references")), UINT2NUM(stats.references));
146
+ (void) rb_hash_aset(result, ID2SYM(rb_intern("trigrams")), UINT2NUM(stats.trigrams));
147
+
148
+ return result;
149
+ }
150
+
151
+ /******************************************************************************/
152
+
153
+ void Init_map_ext(void) {
154
+ VALUE module = Qnil;
155
+ VALUE klass = Qnil;
156
+
157
+ /* assume we haven't yet defined blurrily */
158
+ module = rb_define_module("Blurrily");
159
+ assert(module != Qnil);
160
+
161
+ klass = rb_define_class_under(module, "Map", rb_cObject);
162
+ assert(klass != Qnil);
163
+
164
+ rb_define_singleton_method(klass, "new", blurrily_new, 0);
165
+ rb_define_singleton_method(klass, "load", blurrily_load, 1);
166
+
167
+ rb_define_method(klass, "initialize", blurrily_initialize, 0);
168
+ rb_define_method(klass, "put", blurrily_put, 3);
169
+ rb_define_method(klass, "delete", blurrily_delete, 1);
170
+ rb_define_method(klass, "save", blurrily_save, 1);
171
+ rb_define_method(klass, "find", blurrily_find, 2);
172
+ rb_define_method(klass, "stats", blurrily_stats, 0);
173
+ return;
174
+ }
@@ -0,0 +1,541 @@
1
+ #include <stdlib.h>
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+ #include <assert.h>
5
+ #include <fcntl.h>
6
+ #include <sys/mman.h>
7
+ #include <unistd.h>
8
+ #include <sys/stat.h>
9
+
10
+ #ifdef PLATFORM_LINUX
11
+ #include <linux/limits.h>
12
+ #define MERGESORT fake_mergesort
13
+ #else
14
+ #include <limits.h>
15
+ #define MERGESORT mergesort
16
+ #endif
17
+
18
+ #ifndef PATH_MAX
19
+ /* safe default ... */
20
+ #define PATH_MAX 1024
21
+ #endif
22
+
23
+ #include "storage.h"
24
+
25
+ #include "log.h"
26
+
27
+ /******************************************************************************/
28
+
29
+ #define PAGE_SIZE 4096
30
+ #define TRIGRAM_COUNT (TRIGRAM_BASE * TRIGRAM_BASE * TRIGRAM_BASE)
31
+ #define TRIGRAM_ENTRIES_START_SIZE PAGE_SIZE/8
32
+
33
+ /******************************************************************************/
34
+
35
+ /* one trigram entry -- client reference and sorting weight */
36
+ struct PACKED_STRUCT trigram_entry_t
37
+ {
38
+ uint32_t reference;
39
+ uint32_t weight;
40
+ };
41
+ typedef struct trigram_entry_t trigram_entry_t;
42
+
43
+
44
+ /* collection of entries for a given trigram */
45
+ /* <entries> points to an array of <buckets> entries */
46
+ /* of which <used> are filled */
47
+ struct PACKED_STRUCT trigram_entries_t
48
+ {
49
+ uint32_t buckets;
50
+ uint32_t used;
51
+
52
+ trigram_entry_t* entries; /* set when the structure is in memory */
53
+ size_t entries_offset; /* set when the structure is on disk */
54
+
55
+ uint8_t dirty; /* not optimised (presorted) yet */
56
+ };
57
+ typedef struct trigram_entries_t trigram_entries_t;
58
+
59
+
60
+ /* hash map of all possible trigrams to collection of entries */
61
+ /* there are 28^3 = 19,683 possible trigrams */
62
+ struct PACKED_STRUCT trigram_map_t
63
+ {
64
+ char magic[6]; /* the string "trigra" */
65
+ uint8_t big_endian;
66
+ uint8_t pointer_size;
67
+
68
+ uint32_t total_references;
69
+ uint32_t total_trigrams;
70
+ size_t mapped_size; /* when mapped from disk, the number of bytes mapped */
71
+ int mapped_fd; /* when mapped from disk, the file descriptor */
72
+
73
+ trigram_entries_t map[TRIGRAM_COUNT]; /* this whole structure is ~500KB */
74
+ };
75
+ typedef struct trigram_map_t trigram_map_t;
76
+
77
+ /******************************************************************************/
78
+
79
+ #ifdef PLATFORM_LINUX
80
+ /* fake version of mergesort(3) implemented with qsort(3) as Linux lacks */
81
+ /* the specific variants */
82
+ static int fake_mergesort(void *base, size_t nel, size_t width, int (*compar)(const void *, const void *))
83
+ {
84
+ qsort(base, nel, width, compar);
85
+ return 0;
86
+ }
87
+ #endif
88
+
89
+ /******************************************************************************/
90
+
91
+ /* 1 -> little endian, 2 -> big endian */
92
+ static uint8_t get_big_endian()
93
+ {
94
+ uint32_t magic = 0xAA0000BB;
95
+ uint8_t head = *((uint8_t*) &magic);
96
+
97
+ return (head == 0xBB) ? 1 : 2;
98
+ }
99
+
100
+ /******************************************************************************/
101
+
102
+ /* 4 or 8 (bytes) */
103
+ static uint8_t get_pointer_size()
104
+ {
105
+ return (uint8_t) sizeof(void*);
106
+ }
107
+
108
+ /******************************************************************************/
109
+
110
+ static int compare_entries(const void* left_p, const void* right_p)
111
+ {
112
+ trigram_entry_t* left = (trigram_entry_t*)left_p;
113
+ trigram_entry_t* right = (trigram_entry_t*)right_p;
114
+ return (int)left->reference - (int)right->reference;
115
+ }
116
+
117
+ /* compares matches on #matches (descending) then weight (ascending) */
118
+ static int compare_matches(const void* left_p, const void* right_p)
119
+ {
120
+ trigram_match_t* left = (trigram_match_t*)left_p;
121
+ trigram_match_t* right = (trigram_match_t*)right_p;
122
+ /* int delta = (int)left->matches - (int)right->matches; */
123
+ int delta = (int)right->matches - (int)left->matches;
124
+
125
+ return (delta != 0) ? delta : ((int)left->weight - (int)right->weight);
126
+
127
+ }
128
+
129
+ /******************************************************************************/
130
+
131
+ static void sort_map_if_dirty(trigram_entries_t* map)
132
+ {
133
+ int res = -1;
134
+ if (! map->dirty) return;
135
+
136
+ res = MERGESORT(map->entries, map->used, sizeof(trigram_entry_t), &compare_entries);
137
+ assert(res >= 0);
138
+ map->dirty = 0;
139
+ }
140
+
141
+ /******************************************************************************/
142
+
143
+ static size_t round_to_page(size_t value)
144
+ {
145
+ if (value % PAGE_SIZE == 0) return value;
146
+ return (value / PAGE_SIZE + 1) * PAGE_SIZE;
147
+ }
148
+
149
+ /******************************************************************************/
150
+
151
+ static size_t get_map_size(trigram_map haystack, int index)
152
+ {
153
+ return haystack->map[index].buckets * sizeof(trigram_entry_t);
154
+ }
155
+
156
+ /******************************************************************************/
157
+
158
+ static void free_if(void* ptr)
159
+ {
160
+ if (ptr == NULL) return;
161
+ free(ptr);
162
+ return;
163
+ }
164
+
165
+ /******************************************************************************/
166
+
167
+ int blurrily_storage_new(trigram_map* haystack_ptr)
168
+ {
169
+ trigram_map haystack = (trigram_map)NULL;
170
+ trigram_entries_t* ptr = NULL;
171
+ int k = 0;
172
+
173
+ LOG("blurrily_storage_new\n");
174
+ haystack = (trigram_map) malloc(sizeof(trigram_map_t));
175
+ if (haystack == NULL) return -1;
176
+
177
+ memset(haystack, 0x00, sizeof(trigram_map_t));
178
+
179
+ memcpy(haystack->magic, "trigra", 6);
180
+ haystack->big_endian = get_big_endian();
181
+ haystack->pointer_size = get_pointer_size();
182
+
183
+ haystack->mapped_size = 0; /* not mapped, as we just created it in memory */
184
+ haystack->mapped_fd = 0;
185
+ haystack->total_references = 0;
186
+ haystack->total_trigrams = 0;
187
+ for(k = 0, ptr = haystack->map ; k < TRIGRAM_COUNT ; ++k, ++ptr) {
188
+ ptr->buckets = 0;
189
+ ptr->used = 0;
190
+ ptr->dirty = 0;
191
+ ptr->entries = (trigram_entry_t*)NULL;
192
+ }
193
+
194
+ *haystack_ptr = haystack;
195
+ return 0;
196
+ }
197
+
198
+ /******************************************************************************/
199
+
200
+ int blurrily_storage_load(trigram_map* haystack, const char* path)
201
+ {
202
+ int fd = -1;
203
+ int res = -1;
204
+ trigram_map header = NULL;
205
+ uint8_t* origin = NULL;
206
+ struct stat metadata;
207
+
208
+ /* open and map file */
209
+ res = fd = open(path, O_RDONLY);
210
+ if (res < 0) goto cleanup;
211
+
212
+ res = fstat(fd, &metadata);
213
+ if (res < 0) goto cleanup;
214
+
215
+ header = (trigram_map) mmap(NULL, metadata.st_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
216
+ assert(header != NULL);
217
+
218
+ /* check magic */
219
+ /* TODO */
220
+
221
+ /* fix header data */
222
+ header->mapped_size = metadata.st_size;
223
+ header->mapped_fd = fd;
224
+ origin = (uint8_t*)header;
225
+ for (int k = 0; k < TRIGRAM_COUNT; ++k) {
226
+ trigram_entries_t* map = header->map + k;
227
+ if (map->entries_offset == 0) continue;
228
+ map->entries = (trigram_entry_t*) (origin + map->entries_offset);
229
+ map->entries_offset = 0;
230
+ }
231
+ *haystack = header;
232
+
233
+ cleanup:
234
+ return res;
235
+ }
236
+
237
+ /******************************************************************************/
238
+
239
+ int blurrily_storage_close(trigram_map* haystack_ptr)
240
+ {
241
+ trigram_map haystack = *haystack_ptr;
242
+ int res = -1;
243
+
244
+ LOG("blurrily_storage_close\n");
245
+
246
+ if (haystack->mapped_size) {
247
+ int fd = haystack->mapped_fd;
248
+
249
+ res = munmap(haystack, haystack->mapped_size);
250
+ assert(res >= 0);
251
+
252
+ res = close(fd);
253
+ assert(res >= 0);
254
+ } else {
255
+ trigram_entries_t* ptr = haystack->map;
256
+ for(int k = 0 ; k < TRIGRAM_COUNT ; ++k) {
257
+ free(ptr->entries);
258
+ ++ptr;
259
+ }
260
+ free(haystack);
261
+ }
262
+
263
+ *haystack_ptr = NULL;
264
+ return 0;
265
+ }
266
+
267
+ /******************************************************************************/
268
+
269
+ int blurrily_storage_save(trigram_map haystack, const char* path)
270
+ {
271
+ int fd = -1;
272
+ int res = -1;
273
+ uint8_t* ptr = (uint8_t*)NULL;
274
+ size_t total_size = 0;
275
+ size_t offset = 0;
276
+ trigram_map header = NULL;
277
+ char path_tmp[PATH_MAX];
278
+
279
+ /* cleanup maps in memory */
280
+ for (int k = 0; k < TRIGRAM_COUNT; ++k) {
281
+ sort_map_if_dirty(haystack->map + k);
282
+ }
283
+
284
+ /* path for temporary file */
285
+ snprintf(path_tmp, PATH_MAX, "%s.tmp", path);
286
+
287
+ /* compute storage space required */
288
+ total_size += round_to_page(sizeof(trigram_map_t));
289
+
290
+ for (int k = 0; k < TRIGRAM_COUNT; ++k) {
291
+ total_size += round_to_page(get_map_size(haystack, k));
292
+ }
293
+
294
+ /* open and map file */
295
+ fd = open(path_tmp, O_RDWR | O_CREAT | O_TRUNC, 0644);
296
+ assert(fd >= 0);
297
+
298
+ res = ftruncate(fd, total_size);
299
+ assert(res >= 0);
300
+
301
+ ptr = mmap(NULL, total_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
302
+ assert(ptr != NULL);
303
+
304
+ /* flush data */
305
+ memset(ptr, 0x00, total_size);
306
+
307
+ /* copy header & clean copy */
308
+ memcpy(ptr, (void*)haystack, sizeof(trigram_map_t));
309
+ offset += round_to_page(sizeof(trigram_map_t));
310
+ header = (trigram_map)ptr;
311
+
312
+ header->mapped_size = 0;
313
+ header->mapped_fd = 0;
314
+
315
+ /* copy each map, set offset in header */
316
+ for (int k = 0; k < TRIGRAM_COUNT; ++k) {
317
+ size_t block_size = get_map_size(haystack, k);
318
+
319
+ if (block_size > 0) {
320
+ memcpy(ptr+offset, haystack->map[k].entries, block_size);
321
+
322
+ header->map[k].entries = NULL;
323
+ header->map[k].entries_offset = offset;
324
+
325
+ offset += round_to_page(block_size);
326
+ } else {
327
+ header->map[k].entries = NULL;
328
+ header->map[k].entries_offset = 0;
329
+ }
330
+ }
331
+ assert(offset == total_size);
332
+
333
+ res = munmap(ptr, total_size);
334
+ assert(res >= 0);
335
+
336
+ res = close(fd);
337
+ assert(res >= 0);
338
+
339
+ /* commit by renaming the file */
340
+ res = rename(path_tmp, path);
341
+ assert(res >= 0);
342
+
343
+ return 0;
344
+ }
345
+
346
+ /******************************************************************************/
347
+
348
+ int blurrily_storage_put(trigram_map haystack, const char* needle, uint32_t reference, uint32_t weight)
349
+ {
350
+ int nb_trigrams = -1;
351
+ int length = strlen(needle);
352
+ trigram_t* trigrams = (trigram_t*)NULL;
353
+
354
+ trigrams = (trigram_t*)malloc((length+1) * sizeof(trigram_t));
355
+ nb_trigrams = blurrily_tokeniser_parse_string(needle, trigrams);
356
+
357
+ if (weight <= 0) weight = length;
358
+
359
+ for (int k = 0; k < nb_trigrams; ++k) {
360
+ trigram_t t = trigrams[k];
361
+ trigram_entries_t* map = &haystack->map[t];
362
+ trigram_entry_t entry = { reference, weight };
363
+
364
+ assert(t < TRIGRAM_COUNT);
365
+ assert(map-> used <= map-> buckets);
366
+
367
+ /* allocate more space as needed (exponential growth) */
368
+ if (map->buckets == 0) {
369
+ LOG("- alloc for %d\n", t);
370
+
371
+ map->buckets = TRIGRAM_ENTRIES_START_SIZE;
372
+ map->entries = (trigram_entry_t*) calloc(map->buckets, sizeof(trigram_entry_t));
373
+ }
374
+ if (map->used == map->buckets) {
375
+ uint32_t new_buckets = map->buckets * 4/3;
376
+ trigram_entry_t* new_entries = NULL;
377
+ LOG("- realloc for %d\n", t);
378
+
379
+ /* copy old data, free old pointer, zero extra space */
380
+ new_entries = malloc(new_buckets * sizeof(trigram_entry_t));
381
+ assert(new_entries != NULL);
382
+ memcpy(new_entries, map->entries, map->buckets * sizeof(trigram_entry_t));
383
+ free(map->entries);
384
+ memset(new_entries + map->buckets, 0x00, (new_buckets - map->buckets) * sizeof(trigram_entry_t));
385
+ /* swap fields */
386
+ map->buckets = new_buckets;
387
+ map->entries = new_entries;
388
+ }
389
+ map->entries[map->used] = entry;
390
+
391
+ map->used += 1;
392
+ map->dirty = 1;
393
+ }
394
+ haystack->total_trigrams += nb_trigrams;
395
+ haystack->total_references += 1;
396
+
397
+ free((void*)trigrams);
398
+ return 0;
399
+ }
400
+
401
+ /******************************************************************************/
402
+
403
+ int blurrily_storage_find(trigram_map haystack, const char* needle, uint16_t limit, trigram_match results)
404
+ {
405
+ int nb_trigrams = -1;
406
+ int length = strlen(needle);
407
+ trigram_t* trigrams = (trigram_t*)NULL;
408
+ int nb_entries = -1;
409
+ trigram_entry_t* entries = NULL;
410
+ trigram_entry_t* entry_ptr = NULL;
411
+ int nb_matches = -1;
412
+ trigram_match_t* matches = NULL;
413
+ trigram_match_t* match_ptr = NULL;
414
+ uint32_t last_ref = (uint32_t)-1;
415
+ int nb_results = 0;
416
+
417
+ trigrams = (trigram_t*)malloc((length+1) * sizeof(trigram_t));
418
+ nb_trigrams = blurrily_tokeniser_parse_string(needle, trigrams);
419
+ if (nb_trigrams == 0) goto cleanup;
420
+
421
+ LOG("%d trigrams in '%s'\n", nb_trigrams, needle);
422
+
423
+ /* measure size required for sorting */
424
+ nb_entries = 0;
425
+ for (int k = 0; k < nb_trigrams; ++k) {
426
+ trigram_t t = trigrams[k];
427
+ nb_entries += haystack->map[t].used;
428
+ }
429
+ if (nb_entries == 0) goto cleanup;
430
+
431
+ /* allocate sorting memory */
432
+ entries = (trigram_entry_t*) malloc(nb_entries * sizeof(trigram_entry_t));
433
+ assert(entries != NULL);
434
+ LOG("allocated space for %zd trigrams entries\n", nb_entries);
435
+
436
+ /* copy data for sorting */
437
+ entry_ptr = entries;
438
+ for (int k = 0; k < nb_trigrams; ++k) {
439
+ trigram_t t = trigrams[k];
440
+ size_t buckets = haystack->map[t].used;
441
+
442
+ sort_map_if_dirty(haystack->map + t);
443
+ memcpy(entry_ptr, haystack->map[t].entries, buckets * sizeof(trigram_entry_t));
444
+ entry_ptr += buckets;
445
+ }
446
+ assert(entry_ptr == entries + nb_entries);
447
+
448
+ /* sort data */
449
+ MERGESORT(entries, nb_entries, sizeof(trigram_entry_t), &compare_entries);
450
+ LOG("sorting entries\n");
451
+
452
+ /* count distinct matches */
453
+ entry_ptr = entries;
454
+ last_ref = -1;
455
+ nb_matches = 0;
456
+ for (int k = 0; k < nb_entries; ++k) {
457
+ if (entry_ptr->reference != last_ref) {
458
+ last_ref = entry_ptr->reference;
459
+ ++nb_matches;
460
+ }
461
+ ++entry_ptr;
462
+ }
463
+ assert(entry_ptr == entries + nb_entries);
464
+ LOG("total %zd distinct matches\n", nb_matches);
465
+
466
+ /* allocate maches result */
467
+ matches = (trigram_match_t*) calloc(nb_matches, sizeof(trigram_match_t));
468
+ assert(matches != NULL);
469
+
470
+ /* reduction, counting matches per reference */
471
+ entry_ptr = entries;
472
+ match_ptr = matches;
473
+ match_ptr->matches = 0;
474
+ match_ptr->reference = entry_ptr->reference; /* setup the first match to */
475
+ match_ptr->weight = entry_ptr->weight; /* simplify the loop */
476
+ for (int k = 0; k < nb_entries; ++k) {
477
+ if (entry_ptr->reference != match_ptr->reference) {
478
+ ++match_ptr;
479
+ match_ptr->reference = entry_ptr->reference;
480
+ match_ptr->weight = entry_ptr->weight;
481
+ match_ptr->matches = 1;
482
+ } else {
483
+ match_ptr->matches += 1;
484
+ }
485
+ assert((int) match_ptr->matches <= nb_trigrams);
486
+ ++entry_ptr;
487
+ }
488
+ assert(match_ptr == matches + nb_matches - 1);
489
+ assert(entry_ptr == entries + nb_entries);
490
+
491
+ /* sort by weight (qsort) */
492
+ qsort(matches, nb_matches, sizeof(trigram_match_t), &compare_matches);
493
+
494
+ /* output results */
495
+ nb_results = (limit < nb_matches) ? limit : nb_matches;
496
+ for (int k = 0; k < nb_results; ++k) {
497
+ results[k] = matches[k];
498
+ LOG("match %d: reference %d, matchiness %d, weight %d\n", k, matches[k].reference, matches[k].matches, matches[k].weight);
499
+ }
500
+
501
+ cleanup:
502
+ free_if(entries);
503
+ free_if(matches);
504
+ free_if(trigrams);
505
+ return nb_results;
506
+ }
507
+
508
+ /******************************************************************************/
509
+
510
+ int blurrily_storage_delete(trigram_map haystack, uint32_t reference)
511
+ {
512
+ int trigrams_deleted = 0;
513
+
514
+ for (int k = 0; k < TRIGRAM_COUNT; ++k) {
515
+ trigram_entries_t* map = haystack->map + k;
516
+ trigram_entry_t* entry = NULL;
517
+
518
+ for (unsigned int j = 0; j < map->used; ++j) {
519
+ entry = map->entries + j;
520
+ if (entry->reference != reference) continue;
521
+
522
+ *entry = map->entries[map->used - 1];
523
+ map->used -= 1;
524
+
525
+ ++trigrams_deleted;
526
+ --j;
527
+ }
528
+ }
529
+ haystack->total_trigrams -= trigrams_deleted;
530
+ haystack->total_references -= 1;
531
+ return trigrams_deleted;
532
+ }
533
+
534
+ /******************************************************************************/
535
+
536
+ int blurrily_storage_stats(trigram_map haystack, trigram_stat_t* stats)
537
+ {
538
+ stats->references = haystack->total_references;
539
+ stats->trigrams = haystack->total_trigrams;
540
+ return 0;
541
+ }
@@ -0,0 +1,109 @@
1
+ /*
2
+
3
+ storage.h --
4
+
5
+ Trigram map creation, persistence, and qurying.
6
+
7
+ */
8
+ #include <inttypes.h>
9
+ #include "tokeniser.h"
10
+ #include "blurrily.h"
11
+
12
+ struct trigram_map_t;
13
+ typedef struct trigram_map_t* trigram_map;
14
+
15
+ struct PACKED_STRUCT trigram_match_t {
16
+ uint32_t reference;
17
+ uint32_t matches;
18
+ uint32_t weight;
19
+ };
20
+ typedef struct trigram_match_t trigram_match_t;
21
+ typedef struct trigram_match_t* trigram_match;
22
+
23
+ typedef struct trigram_stat_t {
24
+ uint32_t references;
25
+ uint32_t trigrams;
26
+
27
+ } trigram_stat_t;
28
+
29
+
30
+ /*
31
+ Create a new trigram map, resident in memory.
32
+ */
33
+ int blurrily_storage_new(trigram_map* haystack);
34
+
35
+ /*
36
+ Load an existing trigram map from disk.
37
+ */
38
+ int blurrily_storage_load(trigram_map* haystack, const char* path);
39
+
40
+ /*
41
+ Release resources claimed by <new> or <open>.
42
+ */
43
+ int blurrily_storage_close(trigram_map* haystack);
44
+
45
+ /*
46
+ Persist to disk what <blurrily_storage_new> or <blurrily_storage_open>
47
+ gave you.
48
+ */
49
+ int blurrily_storage_save(trigram_map haystack, const char* path);
50
+
51
+ /*
52
+ Add a new string to the map. <reference> is your identifier for that
53
+ string, <weight> will be using to discriminate entries that match "as
54
+ well" when searching.
55
+
56
+ If <weight> is zero, it will be replaced by the number of characters in
57
+ the <needle>.
58
+
59
+ Returns positive on success, negative on failure.
60
+ */
61
+ int blurrily_storage_put(trigram_map haystack, const char* needle, uint32_t reference, uint32_t weight);
62
+
63
+ /*
64
+ Check the map for an existing <reference>.
65
+
66
+ Returns < 0 on error, 0 if the reference is not found, the number of trigrams
67
+ for that reference otherwise.
68
+
69
+ If <weight> is not NULL, will be set to the weight value passed to the put
70
+ method on return (is the reference is found).
71
+
72
+ If <trigrams> is not NULL, it should point an array <nb_trigrams> long,
73
+ and up to <nb_trigrams> will be copied into it matching the <needle>
74
+ originally passed to the put method.
75
+
76
+ Not that this is a O(n) method: the whole map will be read.
77
+ */
78
+ // int blurrily_storage_get(trigram_map haystack, uint32_t reference, uint32_t* weight, int nb_trigrams, trigram_t* trigrams);
79
+
80
+ /*
81
+ Remove a <reference> from the map.
82
+
83
+ Note that this is very innefective.
84
+
85
+ Returns positive on success, negative on failure.
86
+ */
87
+ int blurrily_storage_delete(trigram_map haystack, uint32_t reference);
88
+
89
+ /*
90
+ Return at most <limit> entries matching <needle> from the <haystack>.
91
+
92
+ Results are written to <results>. The first results are the ones entries
93
+ sharing the most trigrams with the <needle>. Amongst entries with the same
94
+ number of matches, the lightest ones (lowest <weight>) will be returned
95
+ first.
96
+
97
+ <results> should be allocated by the caller.
98
+
99
+ Returns number of matches on success, negative on failure.
100
+ */
101
+ int blurrily_storage_find(trigram_map haystack, const char* needle, uint16_t limit, trigram_match results);
102
+
103
+ /*
104
+ Copies metadata into <stats>
105
+
106
+ Returns positive on success, negative on failure.
107
+ */
108
+ int blurrily_storage_stats(trigram_map haystack, trigram_stat_t* stats);
109
+
@@ -0,0 +1,127 @@
1
+ #include <stdlib.h>
2
+ #include <string.h>
3
+ #include <stdio.h>
4
+ #include <math.h>
5
+ #include "tokeniser.h"
6
+ #include "log.h"
7
+ #include "blurrily.h"
8
+
9
+
10
+ /******************************************************************************/
11
+
12
+ static int ipow(int a, int b)
13
+ {
14
+ int result = 1;
15
+
16
+ while (b-- > 0) result = result * a;
17
+ return result;
18
+ }
19
+
20
+ /******************************************************************************/
21
+
22
+ static void string_to_code(const char* input, trigram_t *output)
23
+ {
24
+ trigram_t result = 0;
25
+
26
+ for (int k = 0 ; k < 3; ++k) {
27
+ if (input[k] == '*' || input[k] < 'a' || input[k] > 'z') continue;
28
+ result += ipow(TRIGRAM_BASE, k) * (input[k] - 'a' + 1);
29
+ }
30
+
31
+ *output = result;
32
+ }
33
+
34
+ /******************************************************************************/
35
+
36
+ static void code_to_string(trigram_t input, char* output)
37
+ {
38
+ for (int k = 0 ; k < 3; ++k) {
39
+ uint16_t elem = input / ipow(TRIGRAM_BASE, k) % TRIGRAM_BASE;
40
+ if (elem == 0) {
41
+ output[k] = '*';
42
+ } else {
43
+ output[k] = ('a' + elem - 1);
44
+ }
45
+ }
46
+ output[3] = 0;
47
+ }
48
+
49
+ /******************************************************************************/
50
+
51
+ static int blurrily_compare_trigrams(const void* left_p, const void* right_p)
52
+ {
53
+ trigram_t* left = (trigram_t*)left_p;
54
+ trigram_t* right = (trigram_t*)right_p;
55
+ return (int)*left - (int)*right;
56
+ }
57
+
58
+ /******************************************************************************/
59
+
60
+ int blurrily_tokeniser_parse_string(const char* input, trigram_t* output)
61
+ {
62
+ int length = strlen(input);
63
+ char* normalized = (char*) malloc(length+5);
64
+ int duplicates = 0;
65
+
66
+ snprintf(normalized, length+4, "**%s*", input);
67
+
68
+ /* replace spaces with '*' */
69
+ for (int k = 0; k < length+3; ++k) {
70
+ if (normalized[k] == ' ') normalized[k] = '*';
71
+ }
72
+
73
+ /* compute trigrams */
74
+ for (int k = 0; k <= length; ++k) {
75
+ string_to_code(normalized+k, output+k);
76
+ }
77
+
78
+ /* print results */
79
+ LOG("-- normalization\n");
80
+ LOG("%s -> %s\n", input, normalized);
81
+ LOG("-- tokenisation\n");
82
+ for (int k = 0; k <= length; ++k) {
83
+ char res[4];
84
+
85
+ code_to_string(output[k], res);
86
+
87
+ LOG("%c%c%c -> %d -> %s\n",
88
+ normalized[k], normalized[k+1], normalized[k+2],
89
+ output[k], res
90
+ );
91
+ }
92
+
93
+ /* sort */
94
+ qsort((void*)output, length+1, sizeof(trigram_t), &blurrily_compare_trigrams);
95
+
96
+ /* remove duplicates */
97
+ for (int k = 1; k <= length; ++k) {
98
+ trigram_t* previous = output + k - 1;
99
+ trigram_t* current = output + k;
100
+
101
+ if (*previous == *current) {
102
+ *previous = 32768;
103
+ ++duplicates;
104
+ }
105
+ }
106
+
107
+ /* compact */
108
+ qsort((void*)output, length+1, sizeof(trigram_t), &blurrily_compare_trigrams);
109
+
110
+ /* print again */
111
+ LOG("-- after sort/compact\n");
112
+ for (int k = 0; k <= length-duplicates; ++k) {
113
+ char res[4];
114
+ code_to_string(output[k], res);
115
+ LOG("%d -> %s\n", output[k], res);
116
+ }
117
+
118
+ free((void*)normalized);
119
+ return length+1 - duplicates;
120
+ }
121
+
122
+ /******************************************************************************/
123
+
124
+ int blurrily_tokeniser_trigram(trigram_t UNUSED(input), char* UNUSED(output))
125
+ {
126
+ return 0;
127
+ }
@@ -0,0 +1,41 @@
1
+ /*
2
+
3
+ tokeniser.h --
4
+
5
+ Split a string into an array of trigrams.
6
+
7
+ The input string should be only lowercase latin letters and spaces
8
+ (convert using iconv).
9
+
10
+ Each trigram is a three-symbol tuple consisting of latters and the
11
+ "epsilon" character used to represent spaces and beginning-of-word/end-of-
12
+ word anchors.
13
+
14
+ Each trigram is represented by a 16-bit integer.
15
+
16
+ */
17
+ #include <inttypes.h>
18
+
19
+ #define TRIGRAM_BASE 28
20
+
21
+ typedef uint16_t trigram_t;
22
+
23
+ /*
24
+ Parse the <input> string and store the result in <ouput>.
25
+ <output> must be allocated by the caller and provide at least as many slots
26
+ as characters in <input>, plus one.
27
+ (not all will be necessarily be filled)
28
+
29
+ Returns the number of trigrams on success, a negative number on failure.
30
+ */
31
+ int blurrily_tokeniser_parse_string(const char* input, trigram_t* output);
32
+
33
+
34
+ /*
35
+ Given an <input> returns a string representation of the trigram in <output>.
36
+ <output> must be allocated by caller and will always be exactly 3
37
+ <characters plus NULL.
38
+
39
+ Returns positive on success, negative on failure.
40
+ */
41
+ int blurrily_tokeniser_trigram(trigram_t input, char* output);
data/lib/blurrily.rb ADDED
@@ -0,0 +1,3 @@
1
+ require "blurrily/map_ext"
2
+ require "blurrily/map"
3
+ require "blurrily/version"
@@ -0,0 +1,34 @@
1
+ require 'blurrily/map_ext'
2
+ require 'active_support/all' # fixme: we only need enough to get mb_chars and alias_method_chain in
3
+
4
+ module Blurrily
5
+ Map.class_eval do
6
+
7
+ def put_with_string_normalize(needle, reference, weight=0)
8
+ needle = normalize_string needle
9
+ put_without_string_normalize(needle, reference, weight)
10
+ end
11
+
12
+ alias_method_chain :put, :string_normalize
13
+
14
+
15
+ def find_with_string_normalize(needle, limit=10)
16
+ needle = normalize_string needle
17
+ find_without_string_normalize(needle, limit)
18
+ end
19
+
20
+ alias_method_chain :find, :string_normalize
21
+
22
+
23
+ private
24
+
25
+ def normalize_string(needle)
26
+ result = needle.downcase
27
+ unless result =~ /^([a-z ])+$/
28
+ result = result.mb_chars.normalize(:kd).gsub(/[^\x00-\x7F]/,'').to_s.gsub(/[^a-z]/,' ')
29
+ end
30
+ result.gsub(/\s+/,' ').strip
31
+ end
32
+
33
+ end
34
+ end
File without changes
@@ -0,0 +1,3 @@
1
+ module Blurrily
2
+ VERSION = "0.0.1"
3
+ end
metadata ADDED
@@ -0,0 +1,199 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: blurrily
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Julien Letessier
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-03-27 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activesupport
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: eventmachine
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: json
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rake
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rake-compiler
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: pry
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - '>='
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - '>='
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: pry-nav
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - '>='
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: pry-doc
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - '>='
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - '>='
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: progressbar
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - '>='
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - '>='
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ description: Native fuzzy string search
154
+ email:
155
+ - julien.letessier@gmail.com
156
+ executables: []
157
+ extensions:
158
+ - ext/blurrily/extconf.rb
159
+ extra_rdoc_files: []
160
+ files:
161
+ - lib/blurrily/map.rb
162
+ - lib/blurrily/server.rb
163
+ - lib/blurrily/version.rb
164
+ - lib/blurrily.rb
165
+ - ext/blurrily/map_ext.c
166
+ - ext/blurrily/storage.c
167
+ - ext/blurrily/tokeniser.c
168
+ - ext/blurrily/blurrily.h
169
+ - ext/blurrily/log.h
170
+ - ext/blurrily/storage.h
171
+ - ext/blurrily/tokeniser.h
172
+ - ext/blurrily/extconf.rb
173
+ - README.md
174
+ - LICENSE.txt
175
+ homepage: http://github.com/mezis/blurrily
176
+ licenses: []
177
+ metadata: {}
178
+ post_install_message:
179
+ rdoc_options: []
180
+ require_paths:
181
+ - lib
182
+ required_ruby_version: !ruby/object:Gem::Requirement
183
+ requirements:
184
+ - - '>='
185
+ - !ruby/object:Gem::Version
186
+ version: '0'
187
+ required_rubygems_version: !ruby/object:Gem::Requirement
188
+ requirements:
189
+ - - '>='
190
+ - !ruby/object:Gem::Version
191
+ version: '0'
192
+ requirements: []
193
+ rubyforge_project:
194
+ rubygems_version: 2.0.0
195
+ signing_key:
196
+ specification_version: 4
197
+ summary: Native fuzzy string search
198
+ test_files: []
199
+ has_rdoc: