triez 0.2

Sign up to get free protection for your applications and to get access to all the features.
data/copying ADDED
@@ -0,0 +1,18 @@
1
+ Copyright (C) 2013 by Zete Lui
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
4
+ this software and associated documentation files (the "Software"), to deal in
5
+ the Software without restriction, including without limitation the rights to
6
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
7
+ the Software, and to permit persons to whom the Software is furnished to do so,
8
+ subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in all
11
+ copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
15
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
16
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
17
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
data/ext/common.h ADDED
@@ -0,0 +1,8 @@
1
+ /* redef value_t */
2
+
3
+ #ifndef HATTRIE_COMMON_H
4
+ #define HATTRIE_COMMON_H
5
+
6
+ typedef long long value_t;
7
+
8
+ #endif
data/ext/extconf.rb ADDED
@@ -0,0 +1,31 @@
1
+ require "mkmf"
2
+
3
+ $CFLAGS << ' -Ihat-trie'
4
+ $LDFLAGS << ' -Lbuild -ltries'
5
+ create_makefile 'triez'
6
+
7
+ # respect header changes
8
+ headers = Dir.glob('*.{hpp,h}').join ' '
9
+ File.open 'Makefile', 'a' do |f|
10
+ f.puts "\n$(OBJS): #{headers}"
11
+ end
12
+
13
+ # build vendor lib
14
+ def sh *xs
15
+ puts xs.join(' ')
16
+ system *xs
17
+ end
18
+
19
+ require "fileutils"
20
+ include FileUtils
21
+ build_dir = File.dirname(__FILE__) + '/build'
22
+ mkdir_p build_dir
23
+ cd build_dir
24
+ unless File.exist?('libtries.a')
25
+ cc = ENV['CC'] || RbConfig::CONFIG['CC']
26
+ cc = [cc, '-O3', '-std=c99', '-Wall', '-pedantic', '-c']
27
+ ar = RbConfig::CONFIG['AR']
28
+ ar = 'ar' unless File.exist?(ar)
29
+ sh *cc, '-I..', *Dir.glob("../hat-trie/*.c")
30
+ sh ar, '-r', 'libtries.a', *Dir.glob("*.o")
31
+ end
data/ext/hat-stub.c ADDED
@@ -0,0 +1,14 @@
1
+ #include <ruby.h>
2
+
3
+ void* malloc_or_die(size_t sz) {
4
+ return malloc(sz);
5
+ }
6
+
7
+ void* realloc_or_die(void* p, size_t sz) {
8
+ return realloc(p, sz);
9
+ }
10
+
11
+ FILE* fopen_or_die(const char* file, const char* mode) {
12
+ // to do raise error
13
+ return fopen(file, mode);
14
+ }
@@ -0,0 +1,19 @@
1
+ Copyright (C) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
4
+ this software and associated documentation files (the "Software"), to deal in
5
+ the Software without restriction, including without limitation the rights to
6
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
7
+ the Software, and to permit persons to whom the Software is furnished to do so,
8
+ subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in all
11
+ copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
15
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
16
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
17
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+
@@ -0,0 +1,551 @@
1
+ /*
2
+ * This file is part of hat-trie.
3
+ *
4
+ * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
5
+ *
6
+ */
7
+
8
+ #include "ahtable.h"
9
+ #include "misc.h"
10
+ #include "murmurhash3.h"
11
+ #include <assert.h>
12
+ #include <string.h>
13
+
14
+
15
+
16
+ const double ahtable_max_load_factor = 100000.0; /* arbitrary large number => don't resize */
17
+ const const size_t ahtable_initial_size = 4096;
18
+ static const uint16_t LONG_KEYLEN_MASK = 0x7fff;
19
+
20
+ static size_t keylen(slot_t s) {
21
+ if (0x1 & *s) {
22
+ return (size_t) (*((uint16_t*) s) >> 1);
23
+ }
24
+ else {
25
+ return (size_t) (*s >> 1);
26
+ }
27
+ }
28
+
29
+
30
+ ahtable_t* ahtable_create()
31
+ {
32
+ return ahtable_create_n(ahtable_initial_size);
33
+ }
34
+
35
+
36
+ ahtable_t* ahtable_create_n(size_t n)
37
+ {
38
+ ahtable_t* T = malloc_or_die(sizeof(ahtable_t));
39
+ T->flag = 0;
40
+ T->c0 = T->c1 = '\0';
41
+
42
+ T->n = n;
43
+ T->m = 0;
44
+ T->max_m = (size_t) (ahtable_max_load_factor * (double) T->n);
45
+ T->slots = malloc_or_die(n * sizeof(slot_t));
46
+ memset(T->slots, 0, n * sizeof(slot_t));
47
+
48
+ T->slot_sizes = malloc_or_die(n * sizeof(size_t));
49
+ memset(T->slot_sizes, 0, n * sizeof(size_t));
50
+
51
+ return T;
52
+ }
53
+
54
+
55
+ void ahtable_free(ahtable_t* T)
56
+ {
57
+ if (T == NULL) return;
58
+ size_t i;
59
+ for (i = 0; i < T->n; ++i) free(T->slots[i]);
60
+ free(T->slots);
61
+ free(T->slot_sizes);
62
+ free(T);
63
+ }
64
+
65
+
66
+ size_t ahtable_size(const ahtable_t* T)
67
+ {
68
+ return T->m;
69
+ }
70
+
71
+
72
+ void ahtable_clear(ahtable_t* T)
73
+ {
74
+ size_t i;
75
+ for (i = 0; i < T->n; ++i) free(T->slots[i]);
76
+ T->n = ahtable_initial_size;
77
+ T->slots = realloc_or_die(T->slots, T->n * sizeof(slot_t));
78
+ memset(T->slots, 0, T->n * sizeof(slot_t));
79
+
80
+ T->slot_sizes = realloc_or_die(T->slot_sizes, T->n * sizeof(size_t));
81
+ memset(T->slot_sizes, 0, T->n * sizeof(size_t));
82
+ }
83
+
84
+
85
+ static slot_t ins_key(slot_t s, const char* key, size_t len, value_t** val)
86
+ {
87
+ // key length
88
+ if (len < 128) {
89
+ s[0] = (unsigned char) (len << 1);
90
+ s += 1;
91
+ }
92
+ else {
93
+ /* The most significant bit is set to indicate that two bytes are
94
+ * being used to store the key length. */
95
+ *((uint16_t*) s) = ((uint16_t) len << 1) | 0x1;
96
+ s += 2;
97
+ }
98
+
99
+ // key
100
+ memcpy(s, key, len * sizeof(unsigned char));
101
+ s += len;
102
+
103
+ // value
104
+ *val = (value_t*) s;
105
+ **val = 0;
106
+ s += sizeof(value_t);
107
+
108
+ return s;
109
+ }
110
+
111
+
112
+ static void ahtable_expand(ahtable_t* T)
113
+ {
114
+ /* Resizing a table is essentially building a brand new one.
115
+ * One little shortcut we can take on the memory allocation front is to
116
+ * figure out how much memory each slot needs in advance.
117
+ */
118
+ assert(T->n > 0);
119
+ size_t new_n = 2 * T->n;
120
+ size_t* slot_sizes = malloc_or_die(new_n * sizeof(size_t));
121
+ memset(slot_sizes, 0, new_n * sizeof(size_t));
122
+
123
+ const char* key;
124
+ size_t len = 0;
125
+ size_t m = 0;
126
+ ahtable_iter_t* i = ahtable_iter_begin(T, false);
127
+ while (!ahtable_iter_finished(i)) {
128
+ key = ahtable_iter_key(i, &len);
129
+ slot_sizes[hash(key, len) % new_n] +=
130
+ len + sizeof(value_t) + (len >= 128 ? 2 : 1);
131
+
132
+ ++m;
133
+ ahtable_iter_next(i);
134
+ }
135
+ assert(m == T->m);
136
+ ahtable_iter_free(i);
137
+
138
+
139
+ /* allocate slots */
140
+ slot_t* slots = malloc_or_die(new_n * sizeof(slot_t));
141
+ size_t j;
142
+ for (j = 0; j < new_n; ++j) {
143
+ if (slot_sizes[j] > 0) {
144
+ slots[j] = malloc_or_die(slot_sizes[j]);
145
+ }
146
+ else slots[j] = NULL;
147
+ }
148
+
149
+ /* rehash values. A few shortcuts can be taken here as well, as we know
150
+ * there will be no collisions. Instead of the regular insertion routine,
151
+ * we keep track of the ends of every slot and simply insert keys.
152
+ * */
153
+ slot_t* slots_next = malloc_or_die(new_n * sizeof(slot_t));
154
+ memcpy(slots_next, slots, new_n * sizeof(slot_t));
155
+ size_t h;
156
+ m = 0;
157
+ value_t* u;
158
+ value_t* v;
159
+ i = ahtable_iter_begin(T, false);
160
+ while (!ahtable_iter_finished(i)) {
161
+
162
+ key = ahtable_iter_key(i, &len);
163
+ h = hash(key, len) % new_n;
164
+
165
+ slots_next[h] = ins_key(slots_next[h], key, len, &u);
166
+ v = ahtable_iter_val(i);
167
+ *u = *v;
168
+
169
+ ++m;
170
+ ahtable_iter_next(i);
171
+ }
172
+ assert(m == T->m);
173
+ ahtable_iter_free(i);
174
+
175
+
176
+ free(slots_next);
177
+ for (j = 0; j < T->n; ++j) free(T->slots[j]);
178
+
179
+ free(T->slots);
180
+ T->slots = slots;
181
+
182
+ free(T->slot_sizes);
183
+ T->slot_sizes = slot_sizes;
184
+
185
+ T->n = new_n;
186
+ T->max_m = (size_t) (ahtable_max_load_factor * (double) T->n);
187
+ }
188
+
189
+
190
+ static value_t* get_key(ahtable_t* T, const char* key, size_t len, bool insert_missing)
191
+ {
192
+ /* if we are at capacity, preemptively resize */
193
+ if (insert_missing && T->m >= T->max_m) {
194
+ ahtable_expand(T);
195
+ }
196
+
197
+
198
+ uint32_t i = hash(key, len) % T->n;
199
+ size_t k;
200
+ slot_t s;
201
+ value_t* val;
202
+
203
+ /* search the array for our key */
204
+ s = T->slots[i];
205
+ while ((size_t) (s - T->slots[i]) < T->slot_sizes[i]) {
206
+ /* get the key length */
207
+ k = keylen(s);
208
+ s += k < 128 ? 1 : 2;
209
+
210
+ /* skip keys that are longer than ours */
211
+ if (k != len) {
212
+ s += k + sizeof(value_t);
213
+ continue;
214
+ }
215
+
216
+ /* key found. */
217
+ if (memcmp(s, key, len) == 0) {
218
+ return (value_t*) (s + len);
219
+ }
220
+ /* key not found. */
221
+ else {
222
+ s += k + sizeof(value_t);
223
+ continue;
224
+ }
225
+ }
226
+
227
+
228
+ if (insert_missing) {
229
+ /* the key was not found, so we must insert it. */
230
+ size_t new_size = T->slot_sizes[i];
231
+ new_size += 1 + (len >= 128 ? 1 : 0); // key length
232
+ new_size += len * sizeof(unsigned char); // key
233
+ new_size += sizeof(value_t); // value
234
+
235
+ T->slots[i] = realloc_or_die(T->slots[i], new_size);
236
+
237
+ ++T->m;
238
+ ins_key(T->slots[i] + T->slot_sizes[i], key, len, &val);
239
+ T->slot_sizes[i] = new_size;
240
+
241
+ return val;
242
+ }
243
+ else return NULL;
244
+ }
245
+
246
+
247
+ value_t* ahtable_get(ahtable_t* T, const char* key, size_t len)
248
+ {
249
+ return get_key(T, key, len, true);
250
+ }
251
+
252
+
253
+ value_t* ahtable_tryget(ahtable_t* T, const char* key, size_t len )
254
+ {
255
+ return get_key(T, key, len, false);
256
+ }
257
+
258
+
259
+ int ahtable_del(ahtable_t* T, const char* key, size_t len)
260
+ {
261
+ uint32_t i = hash(key, len) % T->n;
262
+ size_t k;
263
+ slot_t s;
264
+
265
+ /* search the array for our key */
266
+ s = T->slots[i];
267
+ while ((size_t) (s - T->slots[i]) < T->slot_sizes[i]) {
268
+ /* get the key length */
269
+ k = keylen(s);
270
+ s += k < 128 ? 1 : 2;
271
+
272
+ /* skip keys that are longer than ours */
273
+ if (k != len) {
274
+ s += k + sizeof(value_t);
275
+ continue;
276
+ }
277
+
278
+ /* key found. */
279
+ if (memcmp(s, key, len) == 0) {
280
+ /* move everything over, resize the array */
281
+ unsigned char* t = s + len + sizeof(value_t);
282
+ s -= k < 128 ? 1 : 2;
283
+ memmove(s, t, T->slot_sizes[i] - (size_t) (t - T->slots[i]));
284
+ T->slot_sizes[i] -= (size_t) (t - s);
285
+ --T->m;
286
+ return 0;
287
+ }
288
+ /* key not found. */
289
+ else {
290
+ s += k + sizeof(value_t);
291
+ continue;
292
+ }
293
+ }
294
+
295
+ // Key was not found. Do nothing.
296
+ return -1;
297
+ }
298
+
299
+
300
+
301
+ static int cmpkey(const void* a_, const void* b_)
302
+ {
303
+ slot_t a = *(slot_t*) a_;
304
+ slot_t b = *(slot_t*) b_;
305
+
306
+ size_t ka = keylen(a), kb = keylen(b);
307
+
308
+ a += ka < 128 ? 1 : 2;
309
+ b += kb < 128 ? 1 : 2;
310
+
311
+ int c = memcmp(a, b, ka < kb ? ka : kb);
312
+ return c == 0 ? (int) ka - (int) kb : c;
313
+ }
314
+
315
+
316
+ /* Sorted/unsorted iterators are kept private and exposed by passing the
317
+ sorted flag to ahtable_iter_begin. */
318
+
319
+ typedef struct ahtable_sorted_iter_t_
320
+ {
321
+ const ahtable_t* T; // parent
322
+ slot_t* xs; // pointers to keys
323
+ size_t i; // current key
324
+ } ahtable_sorted_iter_t;
325
+
326
+
327
+ static ahtable_sorted_iter_t* ahtable_sorted_iter_begin(const ahtable_t* T)
328
+ {
329
+ ahtable_sorted_iter_t* i = malloc_or_die(sizeof(ahtable_sorted_iter_t));
330
+ i->T = T;
331
+ i->xs = malloc_or_die(T->m * sizeof(slot_t));
332
+ i->i = 0;
333
+
334
+ slot_t s;
335
+ size_t j, k, u;
336
+ for (j = 0, u = 0; j < T->n; ++j) {
337
+ s = T->slots[j];
338
+ while (s < T->slots[j] + T->slot_sizes[j]) {
339
+ i->xs[u++] = s;
340
+ k = keylen(s);
341
+ s += k < 128 ? 1 : 2;
342
+ s += k + sizeof(value_t);
343
+ }
344
+ }
345
+
346
+ qsort(i->xs, T->m, sizeof(slot_t), cmpkey);
347
+
348
+ return i;
349
+ }
350
+
351
+
352
+ static bool ahtable_sorted_iter_finished(ahtable_sorted_iter_t* i)
353
+ {
354
+ return i->i >= i->T->m;
355
+ }
356
+
357
+
358
+ static void ahtable_sorted_iter_next(ahtable_sorted_iter_t* i)
359
+ {
360
+ if (ahtable_sorted_iter_finished(i)) return;
361
+ ++i->i;
362
+ }
363
+
364
+
365
+ static void ahtable_sorted_iter_free(ahtable_sorted_iter_t* i)
366
+ {
367
+ if (i == NULL) return;
368
+ free(i->xs);
369
+ free(i);
370
+ }
371
+
372
+
373
+ static const char* ahtable_sorted_iter_key(ahtable_sorted_iter_t* i, size_t* len)
374
+ {
375
+ if (ahtable_sorted_iter_finished(i)) return NULL;
376
+
377
+ slot_t s = i->xs[i->i];
378
+ *len = keylen(s);
379
+
380
+ return (const char*) (s + (*len < 128 ? 1 : 2));
381
+ }
382
+
383
+
384
+ static value_t* ahtable_sorted_iter_val(ahtable_sorted_iter_t* i)
385
+ {
386
+ if (ahtable_sorted_iter_finished(i)) return NULL;
387
+
388
+ slot_t s = i->xs[i->i];
389
+ size_t k = keylen(s);
390
+
391
+ s += k < 128 ? 1 : 2;
392
+ s += k;
393
+
394
+ return (value_t*) s;
395
+ }
396
+
397
+
398
+ typedef struct ahtable_unsorted_iter_t_
399
+ {
400
+ const ahtable_t* T; // parent
401
+ size_t i; // slot index
402
+ slot_t s; // slot position
403
+ } ahtable_unsorted_iter_t;
404
+
405
+
406
+ static ahtable_unsorted_iter_t* ahtable_unsorted_iter_begin(const ahtable_t* T)
407
+ {
408
+ ahtable_unsorted_iter_t* i = malloc_or_die(sizeof(ahtable_unsorted_iter_t));
409
+ i->T = T;
410
+
411
+ for (i->i = 0; i->i < i->T->n; ++i->i) {
412
+ i->s = T->slots[i->i];
413
+ if ((size_t) (i->s - T->slots[i->i]) >= T->slot_sizes[i->i]) continue;
414
+ break;
415
+ }
416
+
417
+ return i;
418
+ }
419
+
420
+
421
+ static bool ahtable_unsorted_iter_finished(ahtable_unsorted_iter_t* i)
422
+ {
423
+ return i->i >= i->T->n;
424
+ }
425
+
426
+
427
+ static void ahtable_unsorted_iter_next(ahtable_unsorted_iter_t* i)
428
+ {
429
+ if (ahtable_unsorted_iter_finished(i)) return;
430
+
431
+ /* get the key length */
432
+ size_t k = keylen(i->s);
433
+ i->s += k < 128 ? 1 : 2;
434
+
435
+ /* skip to the next key */
436
+ i->s += k + sizeof(value_t);
437
+
438
+ if ((size_t) (i->s - i->T->slots[i->i]) >= i->T->slot_sizes[i->i]) {
439
+ do {
440
+ ++i->i;
441
+ } while(i->i < i->T->n &&
442
+ i->T->slot_sizes[i->i] == 0);
443
+
444
+ if (i->i < i->T->n) i->s = i->T->slots[i->i];
445
+ else i->s = NULL;
446
+ }
447
+ }
448
+
449
+
450
+ static void ahtable_unsorted_iter_free(ahtable_unsorted_iter_t* i)
451
+ {
452
+ free(i);
453
+ }
454
+
455
+
456
+ static const char* ahtable_unsorted_iter_key(ahtable_unsorted_iter_t* i, size_t* len)
457
+ {
458
+ if (ahtable_unsorted_iter_finished(i)) return NULL;
459
+
460
+ slot_t s = i->s;
461
+ size_t k;
462
+ if (0x1 & *s) {
463
+ k = (size_t) (*((uint16_t*) s)) >> 1;
464
+ s += 2;
465
+ }
466
+ else {
467
+ k = (size_t) (*s >> 1);
468
+ s += 1;
469
+ }
470
+
471
+ *len = k;
472
+ return (const char*) s;
473
+ }
474
+
475
+
476
+ static value_t* ahtable_unsorted_iter_val(ahtable_unsorted_iter_t* i)
477
+ {
478
+ if (ahtable_unsorted_iter_finished(i)) return NULL;
479
+
480
+ slot_t s = i->s;
481
+
482
+ size_t k;
483
+ if (0x1 & *s) {
484
+ k = (size_t) (*((uint16_t*) s)) >> 1;
485
+ s += 2;
486
+ }
487
+ else {
488
+ k = (size_t) (*s >> 1);
489
+ s += 1;
490
+ }
491
+
492
+ s += k;
493
+ return (value_t*) s;
494
+ }
495
+
496
+
497
+ struct ahtable_iter_t_
498
+ {
499
+ bool sorted;
500
+ union {
501
+ ahtable_unsorted_iter_t* unsorted;
502
+ ahtable_sorted_iter_t* sorted;
503
+ } i;
504
+ };
505
+
506
+
507
+ ahtable_iter_t* ahtable_iter_begin(const ahtable_t* T, bool sorted) {
508
+ ahtable_iter_t* i = malloc_or_die(sizeof(ahtable_iter_t));
509
+ i->sorted = sorted;
510
+ if (sorted) i->i.sorted = ahtable_sorted_iter_begin(T);
511
+ else i->i.unsorted = ahtable_unsorted_iter_begin(T);
512
+ return i;
513
+ }
514
+
515
+
516
+ void ahtable_iter_next(ahtable_iter_t* i)
517
+ {
518
+ if (i->sorted) ahtable_sorted_iter_next(i->i.sorted);
519
+ else ahtable_unsorted_iter_next(i->i.unsorted);
520
+ }
521
+
522
+
523
+ bool ahtable_iter_finished(ahtable_iter_t* i)
524
+ {
525
+ if (i->sorted) return ahtable_sorted_iter_finished(i->i.sorted);
526
+ else return ahtable_unsorted_iter_finished(i->i.unsorted);
527
+ }
528
+
529
+
530
+ void ahtable_iter_free(ahtable_iter_t* i)
531
+ {
532
+ if (i == NULL) return;
533
+ if (i->sorted) ahtable_sorted_iter_free(i->i.sorted);
534
+ else ahtable_unsorted_iter_free(i->i.unsorted);
535
+ free(i);
536
+ }
537
+
538
+
539
+ const char* ahtable_iter_key(ahtable_iter_t* i, size_t* len)
540
+ {
541
+ if (i->sorted) return ahtable_sorted_iter_key(i->i.sorted, len);
542
+ else return ahtable_unsorted_iter_key(i->i.unsorted, len);
543
+ }
544
+
545
+
546
+ value_t* ahtable_iter_val(ahtable_iter_t* i)
547
+ {
548
+ if (i->sorted) return ahtable_sorted_iter_val(i->i.sorted);
549
+ else return ahtable_unsorted_iter_val(i->i.unsorted);
550
+ }
551
+