blurrily 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +177 -9
- data/bin/blurrily +43 -0
- data/ext/blurrily/blurrily.h +14 -0
- data/ext/blurrily/extconf.rb +5 -1
- data/ext/blurrily/map_ext.c +67 -11
- data/ext/blurrily/search_tree.c +66 -0
- data/ext/blurrily/search_tree.h +30 -0
- data/ext/blurrily/storage.c +143 -58
- data/ext/blurrily/storage.h +6 -0
- data/ext/blurrily/tokeniser.c +9 -10
- data/lib/blurrily.rb +1 -3
- data/lib/blurrily/client.rb +129 -0
- data/lib/blurrily/command_processor.rb +54 -0
- data/lib/blurrily/defaults.rb +10 -0
- data/lib/blurrily/map.rb +6 -3
- data/lib/blurrily/map_group.rb +39 -0
- data/lib/blurrily/server.rb +49 -0
- data/lib/blurrily/version.rb +1 -1
- metadata +81 -4
- data/ext/blurrily/log.h +0 -5
@@ -0,0 +1,66 @@
|
|
1
|
+
#include <stdlib.h>
|
2
|
+
#include <inttypes.h>
|
3
|
+
#include "blurrily.h"
|
4
|
+
#include "ruby.h"
|
5
|
+
|
6
|
+
/******************************************************************************/
|
7
|
+
|
8
|
+
typedef struct blurrily_refs_t {
|
9
|
+
VALUE hash;
|
10
|
+
} blurrily_refs_t;
|
11
|
+
|
12
|
+
/******************************************************************************/
|
13
|
+
|
14
|
+
int blurrily_refs_new(blurrily_refs_t** refs_ptr)
|
15
|
+
{
|
16
|
+
blurrily_refs_t* refs = NULL;
|
17
|
+
|
18
|
+
refs = (blurrily_refs_t*) malloc(sizeof(blurrily_refs_t));
|
19
|
+
if (!refs) return -1;
|
20
|
+
|
21
|
+
refs->hash = rb_hash_new();
|
22
|
+
*refs_ptr = refs;
|
23
|
+
return 0;
|
24
|
+
}
|
25
|
+
|
26
|
+
/******************************************************************************/
|
27
|
+
|
28
|
+
void blurrily_refs_mark(blurrily_refs_t* refs)
|
29
|
+
{
|
30
|
+
rb_gc_mark(refs->hash);
|
31
|
+
return;
|
32
|
+
}
|
33
|
+
|
34
|
+
/******************************************************************************/
|
35
|
+
|
36
|
+
void blurrily_refs_free(blurrily_refs_t** refs_ptr)
|
37
|
+
{
|
38
|
+
blurrily_refs_t* refs = *refs_ptr;
|
39
|
+
|
40
|
+
refs->hash = Qnil;
|
41
|
+
free(refs);
|
42
|
+
*refs_ptr = NULL;
|
43
|
+
return;
|
44
|
+
}
|
45
|
+
|
46
|
+
/******************************************************************************/
|
47
|
+
|
48
|
+
void blurrily_refs_add(blurrily_refs_t* refs, uint32_t ref)
|
49
|
+
{
|
50
|
+
(void) rb_hash_aset(refs->hash, UINT2NUM(ref), Qtrue);
|
51
|
+
return;
|
52
|
+
}
|
53
|
+
|
54
|
+
/******************************************************************************/
|
55
|
+
|
56
|
+
void blurrily_refs_remove(blurrily_refs_t* refs, uint32_t ref)
|
57
|
+
{
|
58
|
+
(void) rb_hash_aset(refs->hash, UINT2NUM(ref), Qnil);
|
59
|
+
}
|
60
|
+
|
61
|
+
/******************************************************************************/
|
62
|
+
|
63
|
+
int blurrily_refs_test(blurrily_refs_t* refs, uint32_t ref)
|
64
|
+
{
|
65
|
+
return rb_hash_aref(refs->hash, UINT2NUM(ref)) == Qtrue ? 1 : 0;
|
66
|
+
}
|
@@ -0,0 +1,30 @@
|
|
1
|
+
/*
|
2
|
+
|
3
|
+
search_tree.h --
|
4
|
+
|
5
|
+
List of all references that's fast to query for existence.
|
6
|
+
|
7
|
+
*/
|
8
|
+
#include <inttypes.h>
|
9
|
+
|
10
|
+
|
11
|
+
typedef struct blurrily_refs_t blurrily_refs_t;
|
12
|
+
|
13
|
+
|
14
|
+
/* Allocate a search tree */
|
15
|
+
int blurrily_refs_new(blurrily_refs_t** refs_ptr);
|
16
|
+
|
17
|
+
/* Destroy a search tree */
|
18
|
+
void blurrily_refs_free(blurrily_refs_t** refs_ptr);
|
19
|
+
|
20
|
+
/* Mark with Ruby's GC */
|
21
|
+
void blurrily_refs_mark(blurrily_refs_t* refs);
|
22
|
+
|
23
|
+
/* Add a reference */
|
24
|
+
void blurrily_refs_add(blurrily_refs_t* refs, uint32_t ref);
|
25
|
+
|
26
|
+
/* Remove a reference */
|
27
|
+
void blurrily_refs_remove(blurrily_refs_t* refs, uint32_t ref);
|
28
|
+
|
29
|
+
/* Test for a reference (1 = present, 0 = absent) */
|
30
|
+
int blurrily_refs_test(blurrily_refs_t* refs, uint32_t ref);
|
data/ext/blurrily/storage.c
CHANGED
@@ -4,6 +4,7 @@
|
|
4
4
|
#include <assert.h>
|
5
5
|
#include <fcntl.h>
|
6
6
|
#include <sys/mman.h>
|
7
|
+
#include <sys/errno.h>
|
7
8
|
#include <unistd.h>
|
8
9
|
#include <sys/stat.h>
|
9
10
|
|
@@ -21,14 +22,13 @@
|
|
21
22
|
#endif
|
22
23
|
|
23
24
|
#include "storage.h"
|
24
|
-
|
25
|
-
#include "log.h"
|
25
|
+
#include "search_tree.h"
|
26
26
|
|
27
27
|
/******************************************************************************/
|
28
28
|
|
29
29
|
#define PAGE_SIZE 4096
|
30
30
|
#define TRIGRAM_COUNT (TRIGRAM_BASE * TRIGRAM_BASE * TRIGRAM_BASE)
|
31
|
-
#define TRIGRAM_ENTRIES_START_SIZE PAGE_SIZE/
|
31
|
+
#define TRIGRAM_ENTRIES_START_SIZE PAGE_SIZE/sizeof(trigram_entry_t)
|
32
32
|
|
33
33
|
/******************************************************************************/
|
34
34
|
|
@@ -50,7 +50,7 @@ struct PACKED_STRUCT trigram_entries_t
|
|
50
50
|
uint32_t used;
|
51
51
|
|
52
52
|
trigram_entry_t* entries; /* set when the structure is in memory */
|
53
|
-
|
53
|
+
off_t entries_offset; /* set when the structure is on disk */
|
54
54
|
|
55
55
|
uint8_t dirty; /* not optimised (presorted) yet */
|
56
56
|
};
|
@@ -68,7 +68,7 @@ struct PACKED_STRUCT trigram_map_t
|
|
68
68
|
uint32_t total_references;
|
69
69
|
uint32_t total_trigrams;
|
70
70
|
size_t mapped_size; /* when mapped from disk, the number of bytes mapped */
|
71
|
-
|
71
|
+
blurrily_refs_t* refs;
|
72
72
|
|
73
73
|
trigram_entries_t map[TRIGRAM_COUNT]; /* this whole structure is ~500KB */
|
74
74
|
};
|
@@ -88,6 +88,17 @@ static int fake_mergesort(void *base, size_t nel, size_t width, int (*compar)(co
|
|
88
88
|
|
89
89
|
/******************************************************************************/
|
90
90
|
|
91
|
+
#define SMALLOC(_NELEM,_TYPE) (_TYPE*) smalloc(_NELEM, sizeof(_TYPE))
|
92
|
+
|
93
|
+
static void* smalloc(size_t nelem, size_t length)
|
94
|
+
{
|
95
|
+
void* result = malloc(nelem * length);
|
96
|
+
if (result) memset(result, 0xAA, nelem * length);
|
97
|
+
return result;
|
98
|
+
}
|
99
|
+
|
100
|
+
/******************************************************************************/
|
101
|
+
|
91
102
|
/* 1 -> little endian, 2 -> big endian */
|
92
103
|
static uint8_t get_big_endian()
|
93
104
|
{
|
@@ -171,24 +182,23 @@ int blurrily_storage_new(trigram_map* haystack_ptr)
|
|
171
182
|
int k = 0;
|
172
183
|
|
173
184
|
LOG("blurrily_storage_new\n");
|
174
|
-
haystack = (
|
185
|
+
haystack = SMALLOC(1, trigram_map_t);
|
175
186
|
if (haystack == NULL) return -1;
|
176
187
|
|
177
|
-
memset(haystack, 0x00, sizeof(trigram_map_t));
|
178
|
-
|
179
188
|
memcpy(haystack->magic, "trigra", 6);
|
180
189
|
haystack->big_endian = get_big_endian();
|
181
190
|
haystack->pointer_size = get_pointer_size();
|
182
191
|
|
183
192
|
haystack->mapped_size = 0; /* not mapped, as we just created it in memory */
|
184
|
-
haystack->mapped_fd = 0;
|
185
193
|
haystack->total_references = 0;
|
186
194
|
haystack->total_trigrams = 0;
|
195
|
+
haystack->refs = NULL;
|
187
196
|
for(k = 0, ptr = haystack->map ; k < TRIGRAM_COUNT ; ++k, ++ptr) {
|
188
197
|
ptr->buckets = 0;
|
189
198
|
ptr->used = 0;
|
190
199
|
ptr->dirty = 0;
|
191
200
|
ptr->entries = (trigram_entry_t*)NULL;
|
201
|
+
ptr->entries_offset = 0;
|
192
202
|
}
|
193
203
|
|
194
204
|
*haystack_ptr = haystack;
|
@@ -212,25 +222,46 @@ int blurrily_storage_load(trigram_map* haystack, const char* path)
|
|
212
222
|
res = fstat(fd, &metadata);
|
213
223
|
if (res < 0) goto cleanup;
|
214
224
|
|
225
|
+
/* check this file is at least lng enough to have a header */
|
226
|
+
if (metadata.st_size < (off_t) sizeof(trigram_map_t)) {
|
227
|
+
errno = EPROTO;
|
228
|
+
res = -1;
|
229
|
+
goto cleanup;
|
230
|
+
}
|
231
|
+
|
215
232
|
header = (trigram_map) mmap(NULL, metadata.st_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
|
216
|
-
|
233
|
+
if (header == MAP_FAILED) {
|
234
|
+
res = -1;
|
235
|
+
header = NULL;
|
236
|
+
goto cleanup;
|
237
|
+
}
|
238
|
+
|
239
|
+
/* fd not needed once mapping established */
|
240
|
+
res = close(fd);
|
241
|
+
if (res < 0) goto cleanup;
|
242
|
+
fd = -1;
|
217
243
|
|
218
244
|
/* check magic */
|
219
|
-
|
245
|
+
res = memcmp(header->magic, "trigra", 6);
|
246
|
+
if (res != 0 || header->big_endian != get_big_endian() || header->pointer_size != get_pointer_size()) {
|
247
|
+
errno = EPROTO;
|
248
|
+
res = -1;
|
249
|
+
goto cleanup;
|
250
|
+
}
|
220
251
|
|
221
252
|
/* fix header data */
|
222
253
|
header->mapped_size = metadata.st_size;
|
223
|
-
header->mapped_fd = fd;
|
224
254
|
origin = (uint8_t*)header;
|
225
255
|
for (int k = 0; k < TRIGRAM_COUNT; ++k) {
|
226
256
|
trigram_entries_t* map = header->map + k;
|
227
257
|
if (map->entries_offset == 0) continue;
|
228
258
|
map->entries = (trigram_entry_t*) (origin + map->entries_offset);
|
229
|
-
map->entries_offset = 0;
|
230
259
|
}
|
231
260
|
*haystack = header;
|
232
261
|
|
233
262
|
cleanup:
|
263
|
+
if (fd > 0) (void) close(fd);
|
264
|
+
if (res < 0 && header != NULL) (void) munmap(header, metadata.st_size);
|
234
265
|
return res;
|
235
266
|
}
|
236
267
|
|
@@ -239,29 +270,28 @@ cleanup:
|
|
239
270
|
int blurrily_storage_close(trigram_map* haystack_ptr)
|
240
271
|
{
|
241
272
|
trigram_map haystack = *haystack_ptr;
|
242
|
-
int res =
|
273
|
+
int res = 0;
|
274
|
+
trigram_entries_t* ptr = haystack->map;
|
243
275
|
|
244
276
|
LOG("blurrily_storage_close\n");
|
245
277
|
|
246
|
-
|
247
|
-
|
278
|
+
for(int k = 0 ; k < TRIGRAM_COUNT ; ++k) {
|
279
|
+
if (ptr->entries_offset == 0) free(ptr->entries);
|
280
|
+
++ptr;
|
281
|
+
}
|
248
282
|
|
249
|
-
|
250
|
-
assert(res >= 0);
|
283
|
+
if (haystack->refs) blurrily_refs_free(&haystack->refs);
|
251
284
|
|
252
|
-
|
253
|
-
|
285
|
+
if (haystack->mapped_size) {
|
286
|
+
res = munmap(haystack, haystack->mapped_size);
|
287
|
+
if (res < 0) goto cleanup;
|
254
288
|
} else {
|
255
|
-
trigram_entries_t* ptr = haystack->map;
|
256
|
-
for(int k = 0 ; k < TRIGRAM_COUNT ; ++k) {
|
257
|
-
free(ptr->entries);
|
258
|
-
++ptr;
|
259
|
-
}
|
260
289
|
free(haystack);
|
261
290
|
}
|
262
291
|
|
292
|
+
cleanup:
|
263
293
|
*haystack_ptr = NULL;
|
264
|
-
return
|
294
|
+
return res;
|
265
295
|
}
|
266
296
|
|
267
297
|
/******************************************************************************/
|
@@ -269,7 +299,7 @@ int blurrily_storage_close(trigram_map* haystack_ptr)
|
|
269
299
|
int blurrily_storage_save(trigram_map haystack, const char* path)
|
270
300
|
{
|
271
301
|
int fd = -1;
|
272
|
-
int res =
|
302
|
+
int res = 0;
|
273
303
|
uint8_t* ptr = (uint8_t*)NULL;
|
274
304
|
size_t total_size = 0;
|
275
305
|
size_t offset = 0;
|
@@ -282,7 +312,7 @@ int blurrily_storage_save(trigram_map haystack, const char* path)
|
|
282
312
|
}
|
283
313
|
|
284
314
|
/* path for temporary file */
|
285
|
-
snprintf(path_tmp, PATH_MAX, "%s.tmp", path);
|
315
|
+
snprintf(path_tmp, PATH_MAX, "%s.tmp.%ld", path, random());
|
286
316
|
|
287
317
|
/* compute storage space required */
|
288
318
|
total_size += round_to_page(sizeof(trigram_map_t));
|
@@ -293,16 +323,19 @@ int blurrily_storage_save(trigram_map haystack, const char* path)
|
|
293
323
|
|
294
324
|
/* open and map file */
|
295
325
|
fd = open(path_tmp, O_RDWR | O_CREAT | O_TRUNC, 0644);
|
296
|
-
|
326
|
+
if (fd < 0) goto cleanup;
|
297
327
|
|
298
328
|
res = ftruncate(fd, total_size);
|
299
|
-
|
329
|
+
if (res < 0) goto cleanup;
|
300
330
|
|
301
331
|
ptr = mmap(NULL, total_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
|
302
|
-
|
332
|
+
if (ptr == MAP_FAILED) { res = -1 ; goto cleanup ; }
|
333
|
+
|
334
|
+
(void) close(fd);
|
335
|
+
fd = -1;
|
303
336
|
|
304
337
|
/* flush data */
|
305
|
-
memset(ptr,
|
338
|
+
memset(ptr, 0xFF, total_size);
|
306
339
|
|
307
340
|
/* copy header & clean copy */
|
308
341
|
memcpy(ptr, (void*)haystack, sizeof(trigram_map_t));
|
@@ -310,7 +343,7 @@ int blurrily_storage_save(trigram_map haystack, const char* path)
|
|
310
343
|
header = (trigram_map)ptr;
|
311
344
|
|
312
345
|
header->mapped_size = 0;
|
313
|
-
header->
|
346
|
+
header->refs = NULL;
|
314
347
|
|
315
348
|
/* copy each map, set offset in header */
|
316
349
|
for (int k = 0; k < TRIGRAM_COUNT; ++k) {
|
@@ -330,17 +363,34 @@ int blurrily_storage_save(trigram_map haystack, const char* path)
|
|
330
363
|
}
|
331
364
|
assert(offset == total_size);
|
332
365
|
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
assert(res >= 0);
|
366
|
+
cleanup:
|
367
|
+
if (ptr != NULL && total_size > 0) {
|
368
|
+
res = munmap(ptr, total_size);
|
369
|
+
}
|
338
370
|
|
339
371
|
/* commit by renaming the file */
|
340
|
-
res
|
341
|
-
|
372
|
+
if (res >= 0 && path) {
|
373
|
+
res = rename(path_tmp, path);
|
374
|
+
}
|
342
375
|
|
343
|
-
return
|
376
|
+
return res;
|
377
|
+
}
|
378
|
+
|
379
|
+
/******************************************************************************/
|
380
|
+
|
381
|
+
void add_all_refs(trigram_map haystack)
|
382
|
+
{
|
383
|
+
assert(haystack->refs != NULL);
|
384
|
+
|
385
|
+
for (int k = 0; k < TRIGRAM_COUNT; ++k) {
|
386
|
+
trigram_entries_t* map = haystack->map + k;
|
387
|
+
trigram_entry_t* ptr = map->entries;
|
388
|
+
assert(map->used <= map->buckets);
|
389
|
+
for (uint32_t j = 0; j < map->used; ++j, ++ptr) {
|
390
|
+
uint32_t ref = ptr->reference;
|
391
|
+
blurrily_refs_add(haystack->refs, ref);
|
392
|
+
}
|
393
|
+
}
|
344
394
|
}
|
345
395
|
|
346
396
|
/******************************************************************************/
|
@@ -348,13 +398,19 @@ int blurrily_storage_save(trigram_map haystack, const char* path)
|
|
348
398
|
int blurrily_storage_put(trigram_map haystack, const char* needle, uint32_t reference, uint32_t weight)
|
349
399
|
{
|
350
400
|
int nb_trigrams = -1;
|
351
|
-
|
401
|
+
size_t length = strlen(needle);
|
352
402
|
trigram_t* trigrams = (trigram_t*)NULL;
|
353
403
|
|
354
|
-
|
404
|
+
if (!haystack->refs) {
|
405
|
+
blurrily_refs_new(&haystack->refs);
|
406
|
+
add_all_refs(haystack);
|
407
|
+
}
|
408
|
+
if (blurrily_refs_test(haystack->refs, reference)) return 0;
|
409
|
+
if (weight <= 0) weight = (uint32_t) length;
|
410
|
+
|
411
|
+
trigrams = SMALLOC(length+1, trigram_t);
|
355
412
|
nb_trigrams = blurrily_tokeniser_parse_string(needle, trigrams);
|
356
413
|
|
357
|
-
if (weight <= 0) weight = length;
|
358
414
|
|
359
415
|
for (int k = 0; k < nb_trigrams; ++k) {
|
360
416
|
trigram_t t = trigrams[k];
|
@@ -369,33 +425,51 @@ int blurrily_storage_put(trigram_map haystack, const char* needle, uint32_t refe
|
|
369
425
|
LOG("- alloc for %d\n", t);
|
370
426
|
|
371
427
|
map->buckets = TRIGRAM_ENTRIES_START_SIZE;
|
372
|
-
map->entries = (
|
428
|
+
map->entries = SMALLOC(map->buckets, trigram_entry_t);
|
373
429
|
}
|
374
|
-
if (map->used == map->buckets) {
|
375
|
-
uint32_t
|
430
|
+
else if (map->used == map->buckets) {
|
431
|
+
uint32_t new_buckets = map->buckets * 4/3;
|
376
432
|
trigram_entry_t* new_entries = NULL;
|
377
433
|
LOG("- realloc for %d\n", t);
|
378
434
|
|
379
435
|
/* copy old data, free old pointer, zero extra space */
|
380
|
-
new_entries =
|
436
|
+
new_entries = SMALLOC(new_buckets, trigram_entry_t);
|
381
437
|
assert(new_entries != NULL);
|
382
438
|
memcpy(new_entries, map->entries, map->buckets * sizeof(trigram_entry_t));
|
383
|
-
|
384
|
-
memset(new_entries + map->buckets,
|
439
|
+
/* scribble the rest of the map*/
|
440
|
+
// memset(new_entries + map->buckets, 0xFF, (new_buckets - map->buckets) * sizeof(trigram_entry_t));
|
441
|
+
|
442
|
+
#ifndef NDEBUG
|
443
|
+
/* scribble old data */
|
444
|
+
memset(map->entries, 0xFF, map->buckets * sizeof(trigram_entry_t));
|
445
|
+
#endif
|
446
|
+
|
447
|
+
if (map->entries_offset) {
|
448
|
+
/* old data was on disk, just mark it as no longer on disk */
|
449
|
+
map->entries_offset = 0;
|
450
|
+
} else {
|
451
|
+
/* free old data */
|
452
|
+
free(map->entries);
|
453
|
+
}
|
454
|
+
|
385
455
|
/* swap fields */
|
386
456
|
map->buckets = new_buckets;
|
387
457
|
map->entries = new_entries;
|
388
458
|
}
|
459
|
+
|
460
|
+
/* insert new entry */
|
461
|
+
assert(map->used < map->buckets);
|
389
462
|
map->entries[map->used] = entry;
|
390
|
-
|
391
463
|
map->used += 1;
|
392
464
|
map->dirty = 1;
|
393
465
|
}
|
394
466
|
haystack->total_trigrams += nb_trigrams;
|
395
467
|
haystack->total_references += 1;
|
396
468
|
|
469
|
+
blurrily_refs_add(haystack->refs, reference);
|
470
|
+
|
397
471
|
free((void*)trigrams);
|
398
|
-
return
|
472
|
+
return nb_trigrams;
|
399
473
|
}
|
400
474
|
|
401
475
|
/******************************************************************************/
|
@@ -403,7 +477,7 @@ int blurrily_storage_put(trigram_map haystack, const char* needle, uint32_t refe
|
|
403
477
|
int blurrily_storage_find(trigram_map haystack, const char* needle, uint16_t limit, trigram_match results)
|
404
478
|
{
|
405
479
|
int nb_trigrams = -1;
|
406
|
-
|
480
|
+
size_t length = strlen(needle);
|
407
481
|
trigram_t* trigrams = (trigram_t*)NULL;
|
408
482
|
int nb_entries = -1;
|
409
483
|
trigram_entry_t* entries = NULL;
|
@@ -414,7 +488,7 @@ int blurrily_storage_find(trigram_map haystack, const char* needle, uint16_t lim
|
|
414
488
|
uint32_t last_ref = (uint32_t)-1;
|
415
489
|
int nb_results = 0;
|
416
490
|
|
417
|
-
trigrams = (
|
491
|
+
trigrams = SMALLOC(length+1, trigram_t);
|
418
492
|
nb_trigrams = blurrily_tokeniser_parse_string(needle, trigrams);
|
419
493
|
if (nb_trigrams == 0) goto cleanup;
|
420
494
|
|
@@ -429,7 +503,7 @@ int blurrily_storage_find(trigram_map haystack, const char* needle, uint16_t lim
|
|
429
503
|
if (nb_entries == 0) goto cleanup;
|
430
504
|
|
431
505
|
/* allocate sorting memory */
|
432
|
-
entries = (
|
506
|
+
entries = SMALLOC(nb_entries, trigram_entry_t);
|
433
507
|
assert(entries != NULL);
|
434
508
|
LOG("allocated space for %zd trigrams entries\n", nb_entries);
|
435
509
|
|
@@ -464,7 +538,7 @@ int blurrily_storage_find(trigram_map haystack, const char* needle, uint16_t lim
|
|
464
538
|
LOG("total %zd distinct matches\n", nb_matches);
|
465
539
|
|
466
540
|
/* allocate maches result */
|
467
|
-
matches = (
|
541
|
+
matches = SMALLOC(nb_matches, trigram_match_t);
|
468
542
|
assert(matches != NULL);
|
469
543
|
|
470
544
|
/* reduction, counting matches per reference */
|
@@ -519,15 +593,18 @@ int blurrily_storage_delete(trigram_map haystack, uint32_t reference)
|
|
519
593
|
entry = map->entries + j;
|
520
594
|
if (entry->reference != reference) continue;
|
521
595
|
|
596
|
+
/* swap with the last entry */
|
522
597
|
*entry = map->entries[map->used - 1];
|
598
|
+
memset(map->entries + map->used - 1, 0xFF, sizeof(trigram_entry_t));
|
599
|
+
|
523
600
|
map->used -= 1;
|
524
601
|
|
525
602
|
++trigrams_deleted;
|
526
603
|
--j;
|
527
604
|
}
|
528
605
|
}
|
529
|
-
haystack->total_trigrams
|
530
|
-
haystack->total_references -= 1;
|
606
|
+
haystack->total_trigrams -= trigrams_deleted;
|
607
|
+
if (trigrams_deleted > 0) haystack->total_references -= 1;
|
531
608
|
return trigrams_deleted;
|
532
609
|
}
|
533
610
|
|
@@ -539,3 +616,11 @@ int blurrily_storage_stats(trigram_map haystack, trigram_stat_t* stats)
|
|
539
616
|
stats->trigrams = haystack->total_trigrams;
|
540
617
|
return 0;
|
541
618
|
}
|
619
|
+
|
620
|
+
/******************************************************************************/
|
621
|
+
|
622
|
+
void blurrily_storage_mark(trigram_map haystack)
|
623
|
+
{
|
624
|
+
if (haystack->refs) blurrily_refs_mark(haystack->refs);
|
625
|
+
return;
|
626
|
+
}
|