wordtriez 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/changes ADDED
@@ -0,0 +1,21 @@
1
+ 1.0.4
2
+
3
+ 2014-09-15
4
+ fix returning result of search_with_prefix on trie node. see https://github.com/luikore/triez/issues/3.
5
+
6
+ 1.0.3
7
+
8
+ 2014-09-14
9
+ fix a segfault. see https://github.com/luikore/triez/issues/3. thanks @canadaduane for reporting this.
10
+
11
+ 1.0.2
12
+
13
+ 2013-06-01
14
+ add #walk
15
+ 2013-05-31
16
+ fix rb_gc_mark() stuck for value_type: :object
17
+
18
+ 1.0.1
19
+
20
+ 2013-05-30
21
+ fix extconf.rb for mkmf of ruby-head
data/copying ADDED
@@ -0,0 +1,18 @@
1
+ Copyright (C) 2013 by Zete Lui
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
4
+ this software and associated documentation files (the "Software"), to deal in
5
+ the Software without restriction, including without limitation the rights to
6
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
7
+ the Software, and to permit persons to whom the Software is furnished to do so,
8
+ subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in all
11
+ copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
15
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
16
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
17
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
data/ext/common.h ADDED
@@ -0,0 +1,8 @@
1
+ /* redef value_t */
2
+
3
+ #ifndef HATTRIE_COMMON_H
4
+ #define HATTRIE_COMMON_H
5
+
6
+ typedef long long value_t;
7
+
8
+ #endif
data/ext/extconf.rb ADDED
@@ -0,0 +1,32 @@
1
+ require "mkmf"
2
+
3
+ $CFLAGS << ' -Ihat-trie'
4
+ $CPPFLAGS << ' -Ihat-trie'
5
+ $LDFLAGS << ' -Lbuild -ltries'
6
+ create_makefile 'triez'
7
+
8
+ # respect header changes
9
+ headers = Dir.glob('*.{hpp,h}').join ' '
10
+ File.open 'Makefile', 'a' do |f|
11
+ f.puts "\n$(OBJS): #{headers}"
12
+ end
13
+
14
+ # build vendor lib
15
+ def sh *xs
16
+ puts xs.join(' ')
17
+ system *xs
18
+ end
19
+
20
+ require "fileutils"
21
+ include FileUtils
22
+ build_dir = File.dirname(__FILE__) + '/build'
23
+ mkdir_p build_dir
24
+ cd build_dir
25
+ unless File.exist?('libtries.a')
26
+ cc = ENV['CC'] || RbConfig::CONFIG['CC']
27
+ cc = [cc, '-O3', '-std=c99', '-Wall', '-pedantic', '-fPIC', '-c']
28
+ ar = RbConfig::CONFIG['AR']
29
+ ar = 'ar' unless File.exist?(ar)
30
+ sh *cc, '-I..', *Dir.glob("../hat-trie/*.c")
31
+ sh ar, '-r', 'libtries.a', *Dir.glob("*.o")
32
+ end
@@ -0,0 +1,550 @@
1
+ /*
2
+ * This file is part of hat-trie.
3
+ *
4
+ * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
5
+ *
6
+ */
7
+
8
+ #include "ahtable.h"
9
+ #include "misc.h"
10
+ #include "murmurhash3.h"
11
+ #include <assert.h>
12
+ #include <string.h>
13
+
14
+
15
+
16
+ const double ahtable_max_load_factor = 100000.0; /* arbitrary large number => don't resize */
17
+ const size_t ahtable_initial_size = 4096;
18
+
19
+ static size_t keylen(slot_t s) {
20
+ if (0x1 & *s) {
21
+ return (size_t) (*((uint16_t*) s) >> 1);
22
+ }
23
+ else {
24
+ return (size_t) (*s >> 1);
25
+ }
26
+ }
27
+
28
+
29
+ ahtable_t* ahtable_create()
30
+ {
31
+ return ahtable_create_n(ahtable_initial_size);
32
+ }
33
+
34
+
35
+ ahtable_t* ahtable_create_n(size_t n)
36
+ {
37
+ ahtable_t* T = malloc_or_die(sizeof(ahtable_t));
38
+ T->flag = 0;
39
+ T->c0 = T->c1 = '\0';
40
+
41
+ T->n = n;
42
+ T->m = 0;
43
+ T->max_m = (size_t) (ahtable_max_load_factor * (double) T->n);
44
+ T->slots = malloc_or_die(n * sizeof(slot_t));
45
+ memset(T->slots, 0, n * sizeof(slot_t));
46
+
47
+ T->slot_sizes = malloc_or_die(n * sizeof(size_t));
48
+ memset(T->slot_sizes, 0, n * sizeof(size_t));
49
+
50
+ return T;
51
+ }
52
+
53
+
54
+ void ahtable_free(ahtable_t* T)
55
+ {
56
+ if (T == NULL) return;
57
+ size_t i;
58
+ for (i = 0; i < T->n; ++i) free(T->slots[i]);
59
+ free(T->slots);
60
+ free(T->slot_sizes);
61
+ free(T);
62
+ }
63
+
64
+
65
+ size_t ahtable_size(const ahtable_t* T)
66
+ {
67
+ return T->m;
68
+ }
69
+
70
+
71
+ void ahtable_clear(ahtable_t* T)
72
+ {
73
+ size_t i;
74
+ for (i = 0; i < T->n; ++i) free(T->slots[i]);
75
+ T->n = ahtable_initial_size;
76
+ T->slots = realloc_or_die(T->slots, T->n * sizeof(slot_t));
77
+ memset(T->slots, 0, T->n * sizeof(slot_t));
78
+
79
+ T->slot_sizes = realloc_or_die(T->slot_sizes, T->n * sizeof(size_t));
80
+ memset(T->slot_sizes, 0, T->n * sizeof(size_t));
81
+ }
82
+
83
+
84
+ static slot_t ins_key(slot_t s, const char* key, size_t len, value_t** val)
85
+ {
86
+ // key length
87
+ if (len < 128) {
88
+ s[0] = (unsigned char) (len << 1);
89
+ s += 1;
90
+ }
91
+ else {
92
+ /* The most significant bit is set to indicate that two bytes are
93
+ * being used to store the key length. */
94
+ *((uint16_t*) s) = ((uint16_t) len << 1) | 0x1;
95
+ s += 2;
96
+ }
97
+
98
+ // key
99
+ memcpy(s, key, len * sizeof(unsigned char));
100
+ s += len;
101
+
102
+ // value
103
+ *val = (value_t*) s;
104
+ **val = 0;
105
+ s += sizeof(value_t);
106
+
107
+ return s;
108
+ }
109
+
110
+
111
+ static void ahtable_expand(ahtable_t* T)
112
+ {
113
+ /* Resizing a table is essentially building a brand new one.
114
+ * One little shortcut we can take on the memory allocation front is to
115
+ * figure out how much memory each slot needs in advance.
116
+ */
117
+ assert(T->n > 0);
118
+ size_t new_n = 2 * T->n;
119
+ size_t* slot_sizes = malloc_or_die(new_n * sizeof(size_t));
120
+ memset(slot_sizes, 0, new_n * sizeof(size_t));
121
+
122
+ const char* key;
123
+ size_t len = 0;
124
+ size_t m = 0;
125
+ ahtable_iter_t* i = ahtable_iter_begin(T, false);
126
+ while (!ahtable_iter_finished(i)) {
127
+ key = ahtable_iter_key(i, &len);
128
+ slot_sizes[hash(key, len) % new_n] +=
129
+ len + sizeof(value_t) + (len >= 128 ? 2 : 1);
130
+
131
+ ++m;
132
+ ahtable_iter_next(i);
133
+ }
134
+ assert(m == T->m);
135
+ ahtable_iter_free(i);
136
+
137
+
138
+ /* allocate slots */
139
+ slot_t* slots = malloc_or_die(new_n * sizeof(slot_t));
140
+ size_t j;
141
+ for (j = 0; j < new_n; ++j) {
142
+ if (slot_sizes[j] > 0) {
143
+ slots[j] = malloc_or_die(slot_sizes[j]);
144
+ }
145
+ else slots[j] = NULL;
146
+ }
147
+
148
+ /* rehash values. A few shortcuts can be taken here as well, as we know
149
+ * there will be no collisions. Instead of the regular insertion routine,
150
+ * we keep track of the ends of every slot and simply insert keys.
151
+ * */
152
+ slot_t* slots_next = malloc_or_die(new_n * sizeof(slot_t));
153
+ memcpy(slots_next, slots, new_n * sizeof(slot_t));
154
+ size_t h;
155
+ m = 0;
156
+ value_t* u;
157
+ value_t* v;
158
+ i = ahtable_iter_begin(T, false);
159
+ while (!ahtable_iter_finished(i)) {
160
+
161
+ key = ahtable_iter_key(i, &len);
162
+ h = hash(key, len) % new_n;
163
+
164
+ slots_next[h] = ins_key(slots_next[h], key, len, &u);
165
+ v = ahtable_iter_val(i);
166
+ *u = *v;
167
+
168
+ ++m;
169
+ ahtable_iter_next(i);
170
+ }
171
+ assert(m == T->m);
172
+ ahtable_iter_free(i);
173
+
174
+
175
+ free(slots_next);
176
+ for (j = 0; j < T->n; ++j) free(T->slots[j]);
177
+
178
+ free(T->slots);
179
+ T->slots = slots;
180
+
181
+ free(T->slot_sizes);
182
+ T->slot_sizes = slot_sizes;
183
+
184
+ T->n = new_n;
185
+ T->max_m = (size_t) (ahtable_max_load_factor * (double) T->n);
186
+ }
187
+
188
+
189
+ static value_t* get_key(ahtable_t* T, const char* key, size_t len, bool insert_missing)
190
+ {
191
+ /* if we are at capacity, preemptively resize */
192
+ if (insert_missing && T->m >= T->max_m) {
193
+ ahtable_expand(T);
194
+ }
195
+
196
+
197
+ uint32_t i = hash(key, len) % T->n;
198
+ size_t k;
199
+ slot_t s;
200
+ value_t* val;
201
+
202
+ /* search the array for our key */
203
+ s = T->slots[i];
204
+ while ((size_t) (s - T->slots[i]) < T->slot_sizes[i]) {
205
+ /* get the key length */
206
+ k = keylen(s);
207
+ s += k < 128 ? 1 : 2;
208
+
209
+ /* skip keys that are longer than ours */
210
+ if (k != len) {
211
+ s += k + sizeof(value_t);
212
+ continue;
213
+ }
214
+
215
+ /* key found. */
216
+ if (memcmp(s, key, len) == 0) {
217
+ return (value_t*) (s + len);
218
+ }
219
+ /* key not found. */
220
+ else {
221
+ s += k + sizeof(value_t);
222
+ continue;
223
+ }
224
+ }
225
+
226
+
227
+ if (insert_missing) {
228
+ /* the key was not found, so we must insert it. */
229
+ size_t new_size = T->slot_sizes[i];
230
+ new_size += 1 + (len >= 128 ? 1 : 0); // key length
231
+ new_size += len * sizeof(unsigned char); // key
232
+ new_size += sizeof(value_t); // value
233
+
234
+ T->slots[i] = realloc_or_die(T->slots[i], new_size);
235
+
236
+ ++T->m;
237
+ ins_key(T->slots[i] + T->slot_sizes[i], key, len, &val);
238
+ T->slot_sizes[i] = new_size;
239
+
240
+ return val;
241
+ }
242
+ else return NULL;
243
+ }
244
+
245
+
246
+ value_t* ahtable_get(ahtable_t* T, const char* key, size_t len)
247
+ {
248
+ return get_key(T, key, len, true);
249
+ }
250
+
251
+
252
+ value_t* ahtable_tryget(ahtable_t* T, const char* key, size_t len )
253
+ {
254
+ return get_key(T, key, len, false);
255
+ }
256
+
257
+
258
+ int ahtable_del(ahtable_t* T, const char* key, size_t len)
259
+ {
260
+ uint32_t i = hash(key, len) % T->n;
261
+ size_t k;
262
+ slot_t s;
263
+
264
+ /* search the array for our key */
265
+ s = T->slots[i];
266
+ while ((size_t) (s - T->slots[i]) < T->slot_sizes[i]) {
267
+ /* get the key length */
268
+ k = keylen(s);
269
+ s += k < 128 ? 1 : 2;
270
+
271
+ /* skip keys that are longer than ours */
272
+ if (k != len) {
273
+ s += k + sizeof(value_t);
274
+ continue;
275
+ }
276
+
277
+ /* key found. */
278
+ if (memcmp(s, key, len) == 0) {
279
+ /* move everything over, resize the array */
280
+ unsigned char* t = s + len + sizeof(value_t);
281
+ s -= k < 128 ? 1 : 2;
282
+ memmove(s, t, T->slot_sizes[i] - (size_t) (t - T->slots[i]));
283
+ T->slot_sizes[i] -= (size_t) (t - s);
284
+ --T->m;
285
+ return 0;
286
+ }
287
+ /* key not found. */
288
+ else {
289
+ s += k + sizeof(value_t);
290
+ continue;
291
+ }
292
+ }
293
+
294
+ // Key was not found. Do nothing.
295
+ return -1;
296
+ }
297
+
298
+
299
+
300
+ static int cmpkey(const void* a_, const void* b_)
301
+ {
302
+ slot_t a = *(slot_t*) a_;
303
+ slot_t b = *(slot_t*) b_;
304
+
305
+ size_t ka = keylen(a), kb = keylen(b);
306
+
307
+ a += ka < 128 ? 1 : 2;
308
+ b += kb < 128 ? 1 : 2;
309
+
310
+ int c = memcmp(a, b, ka < kb ? ka : kb);
311
+ return c == 0 ? (int) ka - (int) kb : c;
312
+ }
313
+
314
+
315
+ /* Sorted/unsorted iterators are kept private and exposed by passing the
316
+ sorted flag to ahtable_iter_begin. */
317
+
318
+ typedef struct ahtable_sorted_iter_t_
319
+ {
320
+ const ahtable_t* T; // parent
321
+ slot_t* xs; // pointers to keys
322
+ size_t i; // current key
323
+ } ahtable_sorted_iter_t;
324
+
325
+
326
+ static ahtable_sorted_iter_t* ahtable_sorted_iter_begin(const ahtable_t* T)
327
+ {
328
+ ahtable_sorted_iter_t* i = malloc_or_die(sizeof(ahtable_sorted_iter_t));
329
+ i->T = T;
330
+ i->xs = malloc_or_die(T->m * sizeof(slot_t));
331
+ i->i = 0;
332
+
333
+ slot_t s;
334
+ size_t j, k, u;
335
+ for (j = 0, u = 0; j < T->n; ++j) {
336
+ s = T->slots[j];
337
+ while (s < T->slots[j] + T->slot_sizes[j]) {
338
+ i->xs[u++] = s;
339
+ k = keylen(s);
340
+ s += k < 128 ? 1 : 2;
341
+ s += k + sizeof(value_t);
342
+ }
343
+ }
344
+
345
+ qsort(i->xs, T->m, sizeof(slot_t), cmpkey);
346
+
347
+ return i;
348
+ }
349
+
350
+
351
+ static bool ahtable_sorted_iter_finished(ahtable_sorted_iter_t* i)
352
+ {
353
+ return i->i >= i->T->m;
354
+ }
355
+
356
+
357
+ static void ahtable_sorted_iter_next(ahtable_sorted_iter_t* i)
358
+ {
359
+ if (ahtable_sorted_iter_finished(i)) return;
360
+ ++i->i;
361
+ }
362
+
363
+
364
+ static void ahtable_sorted_iter_free(ahtable_sorted_iter_t* i)
365
+ {
366
+ if (i == NULL) return;
367
+ free(i->xs);
368
+ free(i);
369
+ }
370
+
371
+
372
+ static const char* ahtable_sorted_iter_key(ahtable_sorted_iter_t* i, size_t* len)
373
+ {
374
+ if (ahtable_sorted_iter_finished(i)) return NULL;
375
+
376
+ slot_t s = i->xs[i->i];
377
+ *len = keylen(s);
378
+
379
+ return (const char*) (s + (*len < 128 ? 1 : 2));
380
+ }
381
+
382
+
383
+ static value_t* ahtable_sorted_iter_val(ahtable_sorted_iter_t* i)
384
+ {
385
+ if (ahtable_sorted_iter_finished(i)) return NULL;
386
+
387
+ slot_t s = i->xs[i->i];
388
+ size_t k = keylen(s);
389
+
390
+ s += k < 128 ? 1 : 2;
391
+ s += k;
392
+
393
+ return (value_t*) s;
394
+ }
395
+
396
+
397
+ typedef struct ahtable_unsorted_iter_t_
398
+ {
399
+ const ahtable_t* T; // parent
400
+ size_t i; // slot index
401
+ slot_t s; // slot position
402
+ } ahtable_unsorted_iter_t;
403
+
404
+
405
+ static ahtable_unsorted_iter_t* ahtable_unsorted_iter_begin(const ahtable_t* T)
406
+ {
407
+ ahtable_unsorted_iter_t* i = malloc_or_die(sizeof(ahtable_unsorted_iter_t));
408
+ i->T = T;
409
+
410
+ for (i->i = 0; i->i < i->T->n; ++i->i) {
411
+ i->s = T->slots[i->i];
412
+ if ((size_t) (i->s - T->slots[i->i]) >= T->slot_sizes[i->i]) continue;
413
+ break;
414
+ }
415
+
416
+ return i;
417
+ }
418
+
419
+
420
+ static bool ahtable_unsorted_iter_finished(ahtable_unsorted_iter_t* i)
421
+ {
422
+ return i->i >= i->T->n;
423
+ }
424
+
425
+
426
+ static void ahtable_unsorted_iter_next(ahtable_unsorted_iter_t* i)
427
+ {
428
+ if (ahtable_unsorted_iter_finished(i)) return;
429
+
430
+ /* get the key length */
431
+ size_t k = keylen(i->s);
432
+ i->s += k < 128 ? 1 : 2;
433
+
434
+ /* skip to the next key */
435
+ i->s += k + sizeof(value_t);
436
+
437
+ if ((size_t) (i->s - i->T->slots[i->i]) >= i->T->slot_sizes[i->i]) {
438
+ do {
439
+ ++i->i;
440
+ } while(i->i < i->T->n &&
441
+ i->T->slot_sizes[i->i] == 0);
442
+
443
+ if (i->i < i->T->n) i->s = i->T->slots[i->i];
444
+ else i->s = NULL;
445
+ }
446
+ }
447
+
448
+
449
+ static void ahtable_unsorted_iter_free(ahtable_unsorted_iter_t* i)
450
+ {
451
+ free(i);
452
+ }
453
+
454
+
455
+ static const char* ahtable_unsorted_iter_key(ahtable_unsorted_iter_t* i, size_t* len)
456
+ {
457
+ if (ahtable_unsorted_iter_finished(i)) return NULL;
458
+
459
+ slot_t s = i->s;
460
+ size_t k;
461
+ if (0x1 & *s) {
462
+ k = (size_t) (*((uint16_t*) s)) >> 1;
463
+ s += 2;
464
+ }
465
+ else {
466
+ k = (size_t) (*s >> 1);
467
+ s += 1;
468
+ }
469
+
470
+ *len = k;
471
+ return (const char*) s;
472
+ }
473
+
474
+
475
+ static value_t* ahtable_unsorted_iter_val(ahtable_unsorted_iter_t* i)
476
+ {
477
+ if (ahtable_unsorted_iter_finished(i)) return NULL;
478
+
479
+ slot_t s = i->s;
480
+
481
+ size_t k;
482
+ if (0x1 & *s) {
483
+ k = (size_t) (*((uint16_t*) s)) >> 1;
484
+ s += 2;
485
+ }
486
+ else {
487
+ k = (size_t) (*s >> 1);
488
+ s += 1;
489
+ }
490
+
491
+ s += k;
492
+ return (value_t*) s;
493
+ }
494
+
495
+
496
+ struct ahtable_iter_t_
497
+ {
498
+ bool sorted;
499
+ union {
500
+ ahtable_unsorted_iter_t* unsorted;
501
+ ahtable_sorted_iter_t* sorted;
502
+ } i;
503
+ };
504
+
505
+
506
+ ahtable_iter_t* ahtable_iter_begin(const ahtable_t* T, bool sorted) {
507
+ ahtable_iter_t* i = malloc_or_die(sizeof(ahtable_iter_t));
508
+ i->sorted = sorted;
509
+ if (sorted) i->i.sorted = ahtable_sorted_iter_begin(T);
510
+ else i->i.unsorted = ahtable_unsorted_iter_begin(T);
511
+ return i;
512
+ }
513
+
514
+
515
+ void ahtable_iter_next(ahtable_iter_t* i)
516
+ {
517
+ if (i->sorted) ahtable_sorted_iter_next(i->i.sorted);
518
+ else ahtable_unsorted_iter_next(i->i.unsorted);
519
+ }
520
+
521
+
522
+ bool ahtable_iter_finished(ahtable_iter_t* i)
523
+ {
524
+ if (i->sorted) return ahtable_sorted_iter_finished(i->i.sorted);
525
+ else return ahtable_unsorted_iter_finished(i->i.unsorted);
526
+ }
527
+
528
+
529
+ void ahtable_iter_free(ahtable_iter_t* i)
530
+ {
531
+ if (i == NULL) return;
532
+ if (i->sorted) ahtable_sorted_iter_free(i->i.sorted);
533
+ else ahtable_unsorted_iter_free(i->i.unsorted);
534
+ free(i);
535
+ }
536
+
537
+
538
+ const char* ahtable_iter_key(ahtable_iter_t* i, size_t* len)
539
+ {
540
+ if (i->sorted) return ahtable_sorted_iter_key(i->i.sorted, len);
541
+ else return ahtable_unsorted_iter_key(i->i.unsorted, len);
542
+ }
543
+
544
+
545
+ value_t* ahtable_iter_val(ahtable_iter_t* i)
546
+ {
547
+ if (i->sorted) return ahtable_sorted_iter_val(i->i.sorted);
548
+ else return ahtable_unsorted_iter_val(i->i.unsorted);
549
+ }
550
+