tyler-trie 0.2.3 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
data/README.textile CHANGED
@@ -1,6 +1,6 @@
1
1
  h1. Trie
2
2
 
3
- This is a Ruby binding for libdatrie, a dual-array trie implemented in C. It is a disk-based trie so the memory usage is minimal, but it's still quite fast.
3
+ This is a trie for Ruby using libdatrie. It uses a dual-array system, meaning it has best-in-class memory usage and search time.
4
4
 
5
5
 
6
6
  h2. What is a trie?
@@ -22,14 +22,12 @@ It's easy to see how this can have pretty neat implications for things like sear
22
22
 
23
23
  h2. Tutorial
24
24
 
25
- Let's go through building a simple autocompleter using Trie. The very first thing you'll want to do is create a directory for your trie's data to be held in. Remember, this is a disk-based trie so having a place to store the files is important.
25
+ Let's go through building a simple autocompleter using Trie.
26
26
 
27
27
  <pre><code>
28
- Trie.new('your-directory')
28
+ Trie.new
29
29
  </code></pre>
30
30
 
31
- When you call <code>Trie.new</code> for the first time with the given directory as the first argument it will create three files. 'trie.br', 'trie.tl', and 'trie.sbm'. 'trie.br' and 'trie.tl' are binary files corresponding to the two arrays which represent the trie structure it self and the tails and data for the strings, respectively. You probably don't want to mess with these directly, use the library for that. 'trie.sbm' controls what characters are valid in the trie. Look into the libdatrie documentation for more details.
32
-
33
31
  Anyway. So we've created our blank trie. Now, since we're creating an autocompleter, we'll need to add some words into it. We do that simply with the add method.
34
32
 
35
33
  <pre><code>
@@ -90,13 +88,4 @@ There are, of course, some more interesting and advanced ways to use a trie. Fo
90
88
  By calling <code>root</code> on a Trie object, you get a TrieNode, pointed at the root of the trie. You can then use this node to walk the trie and perceive things about each word.
91
89
 
92
90
 
93
- h2. Limitations
94
-
95
- By default libdatrie supports only 32767 words in a trie, as well as only 16-bit integers for the value that goes along with inserted strings. This certainly makes sense for some purposes on some platforms... but I want to be able to enter bajillions of words with large bits of data associated. So, I've forked the project to switch both indexes and datum to 32-bit. So you can enter... a lot of information now. You can find my fork at http://github.com/tyler/libdatrie.
96
-
97
- h2. Bugs
98
-
99
- Saving to disk doesn't work correctly. Not sure why... maybe related to my libdatrie changes.
100
-
101
-
102
91
  Copyright (c) 2008 Tyler McMullen. See LICENSE for details.
data/VERSION.yml CHANGED
@@ -1,4 +1,4 @@
1
1
  ---
2
- patch: 3
2
+ patch: 1
3
3
  major: 0
4
- minor: 2
4
+ minor: 3
data/ext/trie/Makefile ADDED
@@ -0,0 +1,149 @@
1
+
2
+ SHELL = /bin/sh
3
+
4
+ #### Start of system configuration section. ####
5
+
6
+ srcdir = .
7
+ topdir = /System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/lib/ruby/1.8/universal-darwin9.0
8
+ hdrdir = $(topdir)
9
+ VPATH = $(srcdir):$(topdir):$(hdrdir)
10
+ prefix = $(DESTDIR)/System/Library/Frameworks/Ruby.framework/Versions/1.8/usr
11
+ exec_prefix = $(prefix)
12
+ sitedir = $(DESTDIR)/Library/Ruby/Site
13
+ rubylibdir = $(libdir)/ruby/$(ruby_version)
14
+ docdir = $(datarootdir)/doc/$(PACKAGE)
15
+ dvidir = $(docdir)
16
+ datarootdir = $(prefix)/share
17
+ archdir = $(rubylibdir)/$(arch)
18
+ sbindir = $(exec_prefix)/sbin
19
+ psdir = $(docdir)
20
+ localedir = $(datarootdir)/locale
21
+ htmldir = $(docdir)
22
+ datadir = $(datarootdir)
23
+ includedir = $(prefix)/include
24
+ infodir = $(DESTDIR)/usr/share/info
25
+ sysconfdir = $(prefix)/etc
26
+ mandir = $(DESTDIR)/usr/share/man
27
+ libdir = $(exec_prefix)/lib
28
+ sharedstatedir = $(prefix)/com
29
+ oldincludedir = $(DESTDIR)/usr/include
30
+ pdfdir = $(docdir)
31
+ sitearchdir = $(sitelibdir)/$(sitearch)
32
+ bindir = $(exec_prefix)/bin
33
+ localstatedir = $(prefix)/var
34
+ sitelibdir = $(sitedir)/$(ruby_version)
35
+ libexecdir = $(exec_prefix)/libexec
36
+
37
+ CC = gcc
38
+ LIBRUBY = $(LIBRUBY_SO)
39
+ LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
40
+ LIBRUBYARG_SHARED = -l$(RUBY_SO_NAME)
41
+ LIBRUBYARG_STATIC = -l$(RUBY_SO_NAME)
42
+
43
+ RUBY_EXTCONF_H =
44
+ CFLAGS = -fno-common -arch ppc -arch i386 -Os -pipe -fno-common
45
+ INCFLAGS = -I. -I$(topdir) -I$(hdrdir) -I$(srcdir)
46
+ CPPFLAGS =
47
+ CXXFLAGS = $(CFLAGS)
48
+ DLDFLAGS = -L. -arch ppc -arch i386
49
+ LDSHARED = cc -arch ppc -arch i386 -pipe -bundle -undefined dynamic_lookup
50
+ AR = ar
51
+ EXEEXT =
52
+
53
+ RUBY_INSTALL_NAME = ruby
54
+ RUBY_SO_NAME = ruby
55
+ arch = universal-darwin9.0
56
+ sitearch = universal-darwin9.0
57
+ ruby_version = 1.8
58
+ ruby = /System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/bin/ruby
59
+ RUBY = $(ruby)
60
+ RM = rm -f
61
+ MAKEDIRS = mkdir -p
62
+ INSTALL = /usr/bin/install -c
63
+ INSTALL_PROG = $(INSTALL) -m 0755
64
+ INSTALL_DATA = $(INSTALL) -m 644
65
+ COPY = cp
66
+
67
+ #### End of system configuration section. ####
68
+
69
+ preload =
70
+
71
+ libpath = . $(libdir)
72
+ LIBPATH = -L"." -L"$(libdir)"
73
+ DEFFILE =
74
+
75
+ CLEANFILES = mkmf.log
76
+ DISTCLEANFILES =
77
+
78
+ extout =
79
+ extout_prefix =
80
+ target_prefix =
81
+ LOCAL_LIBS =
82
+ LIBS = $(LIBRUBYARG_SHARED) -lpthread -ldl -lm
83
+ SRCS = darray.c fileutils.c tail.c trie-private.c trie.c
84
+ OBJS = darray.o fileutils.o tail.o trie-private.o trie.o
85
+ TARGET = trie
86
+ DLLIB = $(TARGET).bundle
87
+ EXTSTATIC =
88
+ STATIC_LIB =
89
+
90
+ RUBYCOMMONDIR = $(sitedir)$(target_prefix)
91
+ RUBYLIBDIR = $(sitelibdir)$(target_prefix)
92
+ RUBYARCHDIR = $(sitearchdir)$(target_prefix)
93
+
94
+ TARGET_SO = $(DLLIB)
95
+ CLEANLIBS = $(TARGET).bundle $(TARGET).il? $(TARGET).tds $(TARGET).map
96
+ CLEANOBJS = *.o *.a *.s[ol] *.pdb *.exp *.bak
97
+
98
+ all: $(DLLIB)
99
+ static: $(STATIC_LIB)
100
+
101
+ clean:
102
+ @-$(RM) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES)
103
+
104
+ distclean: clean
105
+ @-$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
106
+ @-$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
107
+
108
+ realclean: distclean
109
+ install: install-so install-rb
110
+
111
+ install-so: $(RUBYARCHDIR)
112
+ install-so: $(RUBYARCHDIR)/$(DLLIB)
113
+ $(RUBYARCHDIR)/$(DLLIB): $(DLLIB)
114
+ $(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR)
115
+ install-rb: pre-install-rb install-rb-default
116
+ install-rb-default: pre-install-rb-default
117
+ pre-install-rb: Makefile
118
+ pre-install-rb-default: Makefile
119
+ $(RUBYARCHDIR):
120
+ $(MAKEDIRS) $@
121
+
122
+ site-install: site-install-so site-install-rb
123
+ site-install-so: install-so
124
+ site-install-rb: install-rb
125
+
126
+ .SUFFIXES: .c .m .cc .cxx .cpp .C .o
127
+
128
+ .cc.o:
129
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
130
+
131
+ .cxx.o:
132
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
133
+
134
+ .cpp.o:
135
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
136
+
137
+ .C.o:
138
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
139
+
140
+ .c.o:
141
+ $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) -c $<
142
+
143
+ $(DLLIB): $(OBJS)
144
+ @-$(RM) $@
145
+ $(LDSHARED) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
146
+
147
+
148
+
149
+ $(OBJS): ruby.h defines.h
data/ext/trie/darray.c ADDED
@@ -0,0 +1,673 @@
1
+ /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
+ /*
3
+ * darray.c - Double-array trie structure
4
+ * Created: 2006-08-13
5
+ * Author: Theppitak Karoonboonyanan <thep@linux.thai.net>
6
+ */
7
+
8
+ #include <string.h>
9
+ #include <stdlib.h>
10
+ #include <stdio.h>
11
+
12
+ #include "trie-private.h"
13
+ #include "darray.h"
14
+ #include "fileutils.h"
15
+
16
+ /*----------------------------------*
17
+ * INTERNAL TYPES DECLARATIONS *
18
+ *----------------------------------*/
19
+
20
+ typedef struct _Symbols Symbols;
21
+
22
+ struct _Symbols {
23
+ short num_symbols;
24
+ TrieChar symbols[256];
25
+ };
26
+
27
+ static Symbols * symbols_new ();
28
+ static void symbols_free (Symbols *syms);
29
+ static void symbols_add (Symbols *syms, TrieChar c);
30
+
31
+ #define symbols_num(s) ((s)->num_symbols)
32
+ #define symbols_get(s,i) ((s)->symbols[i])
33
+ #define symbols_add_fast(s,c) ((s)->symbols[(s)->num_symbols++] = c)
34
+
35
+ /*-----------------------------------*
36
+ * PRIVATE METHODS DECLARATIONS *
37
+ *-----------------------------------*/
38
+
39
+ #define da_get_free_list(d) (1)
40
+
41
+ static Bool da_check_free_cell (DArray *d,
42
+ TrieIndex s);
43
+
44
+ static Bool da_has_children (DArray *d,
45
+ TrieIndex s);
46
+
47
+ static Symbols * da_output_symbols (const DArray *d,
48
+ TrieIndex s);
49
+
50
+ static TrieChar * da_get_state_key (const DArray *d,
51
+ TrieIndex state);
52
+
53
+ static TrieIndex da_find_free_base (DArray *d,
54
+ const Symbols *symbols);
55
+
56
+ static Bool da_fit_symbols (DArray *d,
57
+ TrieIndex base,
58
+ const Symbols *symbols);
59
+
60
+ static void da_relocate_base (DArray *d,
61
+ TrieIndex s,
62
+ TrieIndex new_base);
63
+
64
+ static Bool da_extend_pool (DArray *d,
65
+ TrieIndex to_index);
66
+
67
+ static void da_alloc_cell (DArray *d,
68
+ TrieIndex cell);
69
+
70
+ static void da_free_cell (DArray *d,
71
+ TrieIndex cell);
72
+
73
+ static Bool da_enumerate_recursive (const DArray *d,
74
+ TrieIndex state,
75
+ DAEnumFunc enum_func,
76
+ void *user_data);
77
+
78
+ /* ==================== BEGIN IMPLEMENTATION PART ==================== */
79
+
80
+ /*------------------------------------*
81
+ * INTERNAL TYPES IMPLEMENTATIONS *
82
+ *------------------------------------*/
83
+
84
+ static Symbols *
85
+ symbols_new ()
86
+ {
87
+ Symbols *syms;
88
+
89
+ syms = (Symbols *) malloc (sizeof (Symbols));
90
+
91
+ if (!syms)
92
+ return NULL;
93
+
94
+ syms->num_symbols = 0;
95
+
96
+ return syms;
97
+ }
98
+
99
+ static void
100
+ symbols_free (Symbols *syms)
101
+ {
102
+ free (syms);
103
+ }
104
+
105
+ static void
106
+ symbols_add (Symbols *syms, TrieChar c)
107
+ {
108
+ short lower, upper;
109
+
110
+ lower = 0;
111
+ upper = syms->num_symbols;
112
+ while (lower < upper) {
113
+ short middle;
114
+
115
+ middle = (lower + upper)/2;
116
+ if (c > syms->symbols[middle])
117
+ lower = middle + 1;
118
+ else if (c < syms->symbols[middle])
119
+ upper = middle;
120
+ else
121
+ return;
122
+ }
123
+ if (lower < syms->num_symbols) {
124
+ memmove (syms->symbols + lower + 1, syms->symbols + lower,
125
+ syms->num_symbols - lower);
126
+ }
127
+ syms->symbols[lower] = c;
128
+ syms->num_symbols++;
129
+ }
130
+
131
+ /*------------------------------*
132
+ * PRIVATE DATA DEFINITONS *
133
+ *------------------------------*/
134
+
135
+ typedef struct {
136
+ TrieIndex base;
137
+ TrieIndex check;
138
+ } DACell;
139
+
140
+ struct _DArray {
141
+ TrieIndex num_cells;
142
+ DACell *cells;
143
+ };
144
+
145
+ /*-----------------------------*
146
+ * METHODS IMPLEMENTAIONS *
147
+ *-----------------------------*/
148
+
149
+ #define DA_SIGNATURE 0xDAFCDAFC
150
+
151
+ /* DA Header:
152
+ * - Cell 0: SIGNATURE, number of cells
153
+ * - Cell 1: free circular-list pointers
154
+ * - Cell 2: root node
155
+ * - Cell 3: DA pool begin
156
+ */
157
+ #define DA_POOL_BEGIN 3
158
+
159
+ DArray *
160
+ da_new ()
161
+ {
162
+ DArray *d;
163
+
164
+ d = (DArray *) malloc (sizeof (DArray));
165
+ if (!d)
166
+ return NULL;
167
+
168
+ d->num_cells = DA_POOL_BEGIN;
169
+ d->cells = (DACell *) malloc (d->num_cells * sizeof (DACell));
170
+ if (!d->cells)
171
+ goto exit_da_created;
172
+ d->cells[0].base = DA_SIGNATURE;
173
+ d->cells[0].check = d->num_cells;
174
+ d->cells[1].base = -1;
175
+ d->cells[1].check = -1;
176
+ d->cells[2].base = DA_POOL_BEGIN;
177
+ d->cells[2].check = 0;
178
+
179
+ return d;
180
+
181
+ exit_da_created:
182
+ free (d);
183
+ return NULL;
184
+ }
185
+
186
+ DArray *
187
+ da_read (FILE *file)
188
+ {
189
+ long save_pos;
190
+ DArray *d = NULL;
191
+ TrieIndex n;
192
+
193
+ /* check signature */
194
+ save_pos = ftell (file);
195
+ if (!file_read_int32 (file, &n) || DA_SIGNATURE != (uint32) n) {
196
+ fseek (file, save_pos, SEEK_SET);
197
+ return NULL;
198
+ }
199
+
200
+ d = (DArray *) malloc (sizeof (DArray));
201
+ if (!d)
202
+ return NULL;
203
+
204
+ /* read number of cells */
205
+ file_read_int32 (file, &d->num_cells);
206
+ d->cells = (DACell *) malloc (d->num_cells * sizeof (DACell));
207
+ if (!d->cells)
208
+ goto exit_da_created;
209
+ d->cells[0].base = DA_SIGNATURE;
210
+ d->cells[0].check= d->num_cells;
211
+ for (n = 1; n < d->num_cells; n++) {
212
+ file_read_int32 (file, &d->cells[n].base);
213
+ file_read_int32 (file, &d->cells[n].check);
214
+ }
215
+
216
+ return d;
217
+
218
+ exit_da_created:
219
+ free (d);
220
+ return NULL;
221
+ }
222
+
223
+ void
224
+ da_free (DArray *d)
225
+ {
226
+ free (d->cells);
227
+ free (d);
228
+ }
229
+
230
+ int
231
+ da_write (const DArray *d, FILE *file)
232
+ {
233
+ TrieIndex i;
234
+
235
+ for (i = 0; i < d->num_cells; i++) {
236
+ if (!file_write_int32 (file, d->cells[i].base) ||
237
+ !file_write_int32 (file, d->cells[i].check))
238
+ {
239
+ return -1;
240
+ }
241
+ }
242
+
243
+ return 0;
244
+ }
245
+
246
+
247
+ TrieIndex
248
+ da_get_root (const DArray *d)
249
+ {
250
+ /* can be calculated value for multi-index trie */
251
+ return 2;
252
+ }
253
+
254
+
255
+ TrieIndex
256
+ da_get_base (const DArray *d, TrieIndex s)
257
+ {
258
+ return (0 <= s && s < d->num_cells) ? d->cells[s].base : TRIE_INDEX_ERROR;
259
+ }
260
+
261
+ TrieIndex
262
+ da_get_check (const DArray *d, TrieIndex s)
263
+ {
264
+ return (0 <= s && s < d->num_cells) ? d->cells[s].check : TRIE_INDEX_ERROR;
265
+ }
266
+
267
+
268
+ void
269
+ da_set_base (DArray *d, TrieIndex s, TrieIndex val)
270
+ {
271
+ if (0 <= s && s < d->num_cells) {
272
+ d->cells[s].base = val;
273
+ }
274
+ }
275
+
276
+ void
277
+ da_set_check (DArray *d, TrieIndex s, TrieIndex val)
278
+ {
279
+ if (0 <= s && s < d->num_cells) {
280
+ d->cells[s].check = val;
281
+ }
282
+ }
283
+
284
+ Bool
285
+ da_walk (const DArray *d, TrieIndex *s, TrieChar c)
286
+ {
287
+ TrieIndex next;
288
+
289
+ next = da_get_base (d, *s) + c;
290
+ if (da_get_check (d, next) == *s) {
291
+ *s = next;
292
+ return TRUE;
293
+ }
294
+ return FALSE;
295
+ }
296
+
297
+ TrieIndex
298
+ da_insert_branch (DArray *d, TrieIndex s, TrieChar c)
299
+ {
300
+ TrieIndex base, next;
301
+
302
+ base = da_get_base (d, s);
303
+
304
+ if (base > 0) {
305
+ next = base + c;
306
+
307
+ /* if already there, do not actually insert */
308
+ if (da_get_check (d, next) == s)
309
+ return next;
310
+
311
+ /* if (base + c) > TRIE_INDEX_MAX which means 'next' is overflow,
312
+ * or cell [next] is not free, relocate to a free slot
313
+ */
314
+ if (base > TRIE_INDEX_MAX - c || !da_check_free_cell (d, next)) {
315
+ Symbols *symbols;
316
+ TrieIndex new_base;
317
+
318
+ /* relocate BASE[s] */
319
+ symbols = da_output_symbols (d, s);
320
+ symbols_add (symbols, c);
321
+ new_base = da_find_free_base (d, symbols);
322
+ symbols_free (symbols);
323
+
324
+ if (TRIE_INDEX_ERROR == new_base)
325
+ return TRIE_INDEX_ERROR;
326
+
327
+ da_relocate_base (d, s, new_base);
328
+ next = new_base + c;
329
+ }
330
+ } else {
331
+ Symbols *symbols;
332
+ TrieIndex new_base;
333
+
334
+ symbols = symbols_new ();
335
+ symbols_add (symbols, c);
336
+ new_base = da_find_free_base (d, symbols);
337
+ symbols_free (symbols);
338
+
339
+ if (TRIE_INDEX_ERROR == new_base)
340
+ return TRIE_INDEX_ERROR;
341
+
342
+ da_set_base (d, s, new_base);
343
+ next = new_base + c;
344
+ }
345
+ da_alloc_cell (d, next);
346
+ da_set_check (d, next, s);
347
+
348
+ return next;
349
+ }
350
+
351
+ static Bool
352
+ da_check_free_cell (DArray *d,
353
+ TrieIndex s)
354
+ {
355
+ return da_extend_pool (d, s) && da_get_check (d, s) < 0;
356
+ }
357
+
358
+ static Bool
359
+ da_has_children (DArray *d,
360
+ TrieIndex s)
361
+ {
362
+ TrieIndex base;
363
+ TrieIndex c, max_c;
364
+
365
+ base = da_get_base (d, s);
366
+ if (TRIE_INDEX_ERROR == base || base < 0)
367
+ return FALSE;
368
+
369
+ max_c = MIN_VAL (TRIE_CHAR_MAX, TRIE_INDEX_MAX - base);
370
+ for (c = 0; c < max_c; c++) {
371
+ if (da_get_check (d, base + c) == s)
372
+ return TRUE;
373
+ }
374
+
375
+ return FALSE;
376
+ }
377
+
378
+ static Symbols *
379
+ da_output_symbols (const DArray *d,
380
+ TrieIndex s)
381
+ {
382
+ Symbols *syms;
383
+ TrieIndex base;
384
+ TrieIndex c, max_c;
385
+
386
+ syms = symbols_new ();
387
+
388
+ base = da_get_base (d, s);
389
+ max_c = MIN_VAL (TRIE_CHAR_MAX, TRIE_INDEX_MAX - base);
390
+ for (c = 0; c < max_c; c++) {
391
+ if (da_get_check (d, base + c) == s)
392
+ symbols_add_fast (syms, (TrieChar) c);
393
+ }
394
+
395
+ return syms;
396
+ }
397
+
398
+ static TrieChar *
399
+ da_get_state_key (const DArray *d,
400
+ TrieIndex state)
401
+ {
402
+ TrieChar *key;
403
+ int key_size, key_length;
404
+ int i;
405
+
406
+ key_size = 20;
407
+ key_length = 0;
408
+ key = (TrieChar *) malloc (key_size);
409
+
410
+ /* trace back to root */
411
+ while (da_get_root (d) != state) {
412
+ TrieIndex parent;
413
+
414
+ if (key_length + 1 >= key_size) {
415
+ key_size += 20;
416
+ key = (TrieChar *) realloc (key, key_size);
417
+ }
418
+ parent = da_get_check (d, state);
419
+ key[key_length++] = (TrieChar) (state - da_get_base (d, parent));
420
+ state = parent;
421
+ }
422
+ key[key_length] = '\0';
423
+
424
+ /* reverse the string */
425
+ for (i = 0; i < --key_length; i++) {
426
+ TrieChar temp;
427
+
428
+ temp = key[i];
429
+ key[i] = key[key_length];
430
+ key[key_length] = temp;
431
+ }
432
+
433
+ return key;
434
+ }
435
+
436
+ static TrieIndex
437
+ da_find_free_base (DArray *d,
438
+ const Symbols *symbols)
439
+ {
440
+ TrieChar first_sym;
441
+ TrieIndex s;
442
+
443
+ /* find first free cell that is beyond the first symbol */
444
+ first_sym = symbols_get (symbols, 0);
445
+ s = -da_get_check (d, da_get_free_list (d));
446
+ while (s != da_get_free_list (d)
447
+ && s < (TrieIndex) first_sym + DA_POOL_BEGIN)
448
+ {
449
+ s = -da_get_check (d, s);
450
+ }
451
+ if (s == da_get_free_list (d)) {
452
+ for (s = first_sym + DA_POOL_BEGIN; ; ++s) {
453
+ if (!da_extend_pool (d, s))
454
+ return TRIE_INDEX_ERROR;
455
+ if (da_get_check (d, s) < 0)
456
+ break;
457
+ }
458
+ }
459
+
460
+ /* search for next free cell that fits the symbols set */
461
+ while (!da_fit_symbols (d, s - first_sym, symbols)) {
462
+ /* extend pool before getting exhausted */
463
+ if (-da_get_check (d, s) == da_get_free_list (d)) {
464
+ if (!da_extend_pool (d, d->num_cells))
465
+ return TRIE_INDEX_ERROR;
466
+ }
467
+
468
+ s = -da_get_check (d, s);
469
+ }
470
+
471
+ return s - first_sym;
472
+ }
473
+
474
+ static Bool
475
+ da_fit_symbols (DArray *d,
476
+ TrieIndex base,
477
+ const Symbols *symbols)
478
+ {
479
+ int i;
480
+
481
+ for (i = 0; i < symbols_num (symbols); i++) {
482
+ TrieChar sym = symbols_get (symbols, i);
483
+
484
+ /* if (base + sym) > TRIE_INDEX_MAX which means it's overflow,
485
+ * or cell [base + sym] is not free, the symbol is not fit.
486
+ */
487
+ if (base > TRIE_INDEX_MAX - sym || !da_check_free_cell (d, base + sym))
488
+ return FALSE;
489
+ }
490
+ return TRUE;
491
+ }
492
+
493
+ static void
494
+ da_relocate_base (DArray *d,
495
+ TrieIndex s,
496
+ TrieIndex new_base)
497
+ {
498
+ TrieIndex old_base;
499
+ Symbols *symbols;
500
+ int i;
501
+
502
+ old_base = da_get_base (d, s);
503
+ symbols = da_output_symbols (d, s);
504
+
505
+ for (i = 0; i < symbols_num (symbols); i++) {
506
+ TrieIndex old_next, new_next, old_next_base;
507
+
508
+ old_next = old_base + symbols_get (symbols, i);
509
+ new_next = new_base + symbols_get (symbols, i);
510
+ old_next_base = da_get_base (d, old_next);
511
+
512
+ /* allocate new next node and copy BASE value */
513
+ da_alloc_cell (d, new_next);
514
+ da_set_check (d, new_next, s);
515
+ da_set_base (d, new_next, old_next_base);
516
+
517
+ /* old_next node is now moved to new_next
518
+ * so, all cells belonging to old_next
519
+ * must be given to new_next
520
+ */
521
+ /* preventing the case of TAIL pointer */
522
+ if (old_next_base > 0) {
523
+ TrieIndex c, max_c;
524
+
525
+ max_c = MIN_VAL (TRIE_CHAR_MAX, TRIE_INDEX_MAX - old_next_base);
526
+ for (c = 0; c < max_c; c++) {
527
+ if (da_get_check (d, old_next_base + c) == old_next)
528
+ da_set_check (d, old_next_base + c, new_next);
529
+ }
530
+ }
531
+
532
+ /* free old_next node */
533
+ da_free_cell (d, old_next);
534
+ }
535
+
536
+ symbols_free (symbols);
537
+
538
+ /* finally, make BASE[s] point to new_base */
539
+ da_set_base (d, s, new_base);
540
+ }
541
+
542
+ static Bool
543
+ da_extend_pool (DArray *d,
544
+ TrieIndex to_index)
545
+ {
546
+ TrieIndex new_begin;
547
+ TrieIndex i;
548
+ TrieIndex free_tail;
549
+
550
+ if (to_index <= 0 || TRIE_INDEX_MAX <= to_index)
551
+ return FALSE;
552
+
553
+ if (to_index < d->num_cells)
554
+ return TRUE;
555
+
556
+ d->cells = (DACell *) realloc (d->cells, (to_index + 1) * sizeof (DACell));
557
+ new_begin = d->num_cells;
558
+ d->num_cells = to_index + 1;
559
+
560
+ /* initialize new free list */
561
+ for (i = new_begin; i < to_index; i++) {
562
+ da_set_check (d, i, -(i + 1));
563
+ da_set_base (d, i + 1, -i);
564
+ }
565
+
566
+ /* merge the new circular list to the old */
567
+ free_tail = -da_get_base (d, da_get_free_list (d));
568
+ da_set_check (d, free_tail, -new_begin);
569
+ da_set_base (d, new_begin, -free_tail);
570
+ da_set_check (d, to_index, -da_get_free_list (d));
571
+ da_set_base (d, da_get_free_list (d), -to_index);
572
+
573
+ /* update header cell */
574
+ d->cells[0].check = d->num_cells;
575
+
576
+ return TRUE;
577
+ }
578
+
579
+ void
580
+ da_prune (DArray *d, TrieIndex s)
581
+ {
582
+ da_prune_upto (d, da_get_root (d), s);
583
+ }
584
+
585
+ void
586
+ da_prune_upto (DArray *d, TrieIndex p, TrieIndex s)
587
+ {
588
+ while (p != s && !da_has_children (d, s)) {
589
+ TrieIndex parent;
590
+
591
+ parent = da_get_check (d, s);
592
+ da_free_cell (d, s);
593
+ s = parent;
594
+ }
595
+ }
596
+
597
+ static void
598
+ da_alloc_cell (DArray *d,
599
+ TrieIndex cell)
600
+ {
601
+ TrieIndex prev, next;
602
+
603
+ prev = -da_get_base (d, cell);
604
+ next = -da_get_check (d, cell);
605
+
606
+ /* remove the cell from free list */
607
+ da_set_check (d, prev, -next);
608
+ da_set_base (d, next, -prev);
609
+ }
610
+
611
+ static void
612
+ da_free_cell (DArray *d,
613
+ TrieIndex cell)
614
+ {
615
+ TrieIndex i, prev;
616
+
617
+ /* find insertion point */
618
+ i = -da_get_check (d, da_get_free_list (d));
619
+ while (i != da_get_free_list (d) && i < cell)
620
+ i = -da_get_check (d, i);
621
+
622
+ prev = -da_get_base (d, i);
623
+
624
+ /* insert cell before i */
625
+ da_set_check (d, cell, -i);
626
+ da_set_base (d, cell, -prev);
627
+ da_set_check (d, prev, -cell);
628
+ da_set_base (d, i, -cell);
629
+ }
630
+
631
+ Bool
632
+ da_enumerate (const DArray *d, DAEnumFunc enum_func, void *user_data)
633
+ {
634
+ return da_enumerate_recursive (d, da_get_root (d), enum_func, user_data);
635
+ }
636
+
637
+ static Bool
638
+ da_enumerate_recursive (const DArray *d,
639
+ TrieIndex state,
640
+ DAEnumFunc enum_func,
641
+ void *user_data)
642
+ {
643
+ Bool ret;
644
+ TrieIndex base;
645
+
646
+ base = da_get_base (d, state);
647
+
648
+ if (base < 0) {
649
+ TrieChar *key;
650
+
651
+ key = da_get_state_key (d, state);
652
+ ret = (*enum_func) (key, state, user_data);
653
+ free (key);
654
+ } else {
655
+ Symbols *symbols;
656
+ int i;
657
+
658
+ ret = TRUE;
659
+ symbols = da_output_symbols (d, state);
660
+ for (i = 0; ret && i < symbols_num (symbols); i++) {
661
+ ret = da_enumerate_recursive (d, base + symbols_get (symbols, i),
662
+ enum_func, user_data);
663
+ }
664
+
665
+ symbols_free (symbols);
666
+ }
667
+
668
+ return ret;
669
+ }
670
+
671
+ /*
672
+ vi:ts=4:ai:expandtab
673
+ */