tyler-trie 0.2.3 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.textile +3 -14
- data/VERSION.yml +2 -2
- data/ext/trie/Makefile +149 -0
- data/ext/trie/darray.c +673 -0
- data/ext/trie/darray.h +233 -0
- data/ext/trie/extconf.rb +0 -7
- data/ext/trie/fileutils.c +151 -0
- data/ext/trie/fileutils.h +36 -0
- data/ext/trie/tail.c +340 -0
- data/ext/trie/tail.h +207 -0
- data/ext/trie/trie-private.c +271 -0
- data/ext/trie/trie-private.h +31 -0
- data/ext/trie/trie.c +204 -301
- data/ext/trie/trie.h +40 -0
- data/ext/trie/triedefs.h +73 -0
- data/lib/trie.rb +1 -1
- data/spec/trie_spec.rb +31 -47
- metadata +14 -7
- data/spec/test-trie/README +0 -1
data/README.textile
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
h1. Trie
|
2
2
|
|
3
|
-
This is a Ruby
|
3
|
+
This is a trie for Ruby using libdatrie. It uses a dual-array system, meaning it has best-in-class memory usage and search time.
|
4
4
|
|
5
5
|
|
6
6
|
h2. What is a trie?
|
@@ -22,14 +22,12 @@ It's easy to see how this can have pretty neat implications for things like sear
|
|
22
22
|
|
23
23
|
h2. Tutorial
|
24
24
|
|
25
|
-
Let's go through building a simple autocompleter using Trie.
|
25
|
+
Let's go through building a simple autocompleter using Trie.
|
26
26
|
|
27
27
|
<pre><code>
|
28
|
-
Trie.new
|
28
|
+
Trie.new
|
29
29
|
</code></pre>
|
30
30
|
|
31
|
-
When you call <code>Trie.new</code> for the first time with the given directory as the first argument it will create three files. 'trie.br', 'trie.tl', and 'trie.sbm'. 'trie.br' and 'trie.tl' are binary files corresponding to the two arrays which represent the trie structure it self and the tails and data for the strings, respectively. You probably don't want to mess with these directly, use the library for that. 'trie.sbm' controls what characters are valid in the trie. Look into the libdatrie documentation for more details.
|
32
|
-
|
33
31
|
Anyway. So we've created our blank trie. Now, since we're creating an autocompleter, we'll need to add some words into it. We do that simply with the add method.
|
34
32
|
|
35
33
|
<pre><code>
|
@@ -90,13 +88,4 @@ There are, of course, some more interesting and advanced ways to use a trie. Fo
|
|
90
88
|
By calling <code>root</code> on a Trie object, you get a TrieNode, pointed at the root of the trie. You can then use this node to walk the trie and perceive things about each word.
|
91
89
|
|
92
90
|
|
93
|
-
h2. Limitations
|
94
|
-
|
95
|
-
By default libdatrie supports only 32767 words in a trie, as well as only 16-bit integers for the value that goes along with inserted strings. This certainly makes sense for some purposes on some platforms... but I want to be able to enter bajillions of words with large bits of data associated. So, I've forked the project to switch both indexes and datum to 32-bit. So you can enter... a lot of information now. You can find my fork at http://github.com/tyler/libdatrie.
|
96
|
-
|
97
|
-
h2. Bugs
|
98
|
-
|
99
|
-
Saving to disk doesn't work correctly. Not sure why... maybe related to my libdatrie changes.
|
100
|
-
|
101
|
-
|
102
91
|
Copyright (c) 2008 Tyler McMullen. See LICENSE for details.
|
data/VERSION.yml
CHANGED
data/ext/trie/Makefile
ADDED
@@ -0,0 +1,149 @@
|
|
1
|
+
|
2
|
+
SHELL = /bin/sh
|
3
|
+
|
4
|
+
#### Start of system configuration section. ####
|
5
|
+
|
6
|
+
srcdir = .
|
7
|
+
topdir = /System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/lib/ruby/1.8/universal-darwin9.0
|
8
|
+
hdrdir = $(topdir)
|
9
|
+
VPATH = $(srcdir):$(topdir):$(hdrdir)
|
10
|
+
prefix = $(DESTDIR)/System/Library/Frameworks/Ruby.framework/Versions/1.8/usr
|
11
|
+
exec_prefix = $(prefix)
|
12
|
+
sitedir = $(DESTDIR)/Library/Ruby/Site
|
13
|
+
rubylibdir = $(libdir)/ruby/$(ruby_version)
|
14
|
+
docdir = $(datarootdir)/doc/$(PACKAGE)
|
15
|
+
dvidir = $(docdir)
|
16
|
+
datarootdir = $(prefix)/share
|
17
|
+
archdir = $(rubylibdir)/$(arch)
|
18
|
+
sbindir = $(exec_prefix)/sbin
|
19
|
+
psdir = $(docdir)
|
20
|
+
localedir = $(datarootdir)/locale
|
21
|
+
htmldir = $(docdir)
|
22
|
+
datadir = $(datarootdir)
|
23
|
+
includedir = $(prefix)/include
|
24
|
+
infodir = $(DESTDIR)/usr/share/info
|
25
|
+
sysconfdir = $(prefix)/etc
|
26
|
+
mandir = $(DESTDIR)/usr/share/man
|
27
|
+
libdir = $(exec_prefix)/lib
|
28
|
+
sharedstatedir = $(prefix)/com
|
29
|
+
oldincludedir = $(DESTDIR)/usr/include
|
30
|
+
pdfdir = $(docdir)
|
31
|
+
sitearchdir = $(sitelibdir)/$(sitearch)
|
32
|
+
bindir = $(exec_prefix)/bin
|
33
|
+
localstatedir = $(prefix)/var
|
34
|
+
sitelibdir = $(sitedir)/$(ruby_version)
|
35
|
+
libexecdir = $(exec_prefix)/libexec
|
36
|
+
|
37
|
+
CC = gcc
|
38
|
+
LIBRUBY = $(LIBRUBY_SO)
|
39
|
+
LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
|
40
|
+
LIBRUBYARG_SHARED = -l$(RUBY_SO_NAME)
|
41
|
+
LIBRUBYARG_STATIC = -l$(RUBY_SO_NAME)
|
42
|
+
|
43
|
+
RUBY_EXTCONF_H =
|
44
|
+
CFLAGS = -fno-common -arch ppc -arch i386 -Os -pipe -fno-common
|
45
|
+
INCFLAGS = -I. -I$(topdir) -I$(hdrdir) -I$(srcdir)
|
46
|
+
CPPFLAGS =
|
47
|
+
CXXFLAGS = $(CFLAGS)
|
48
|
+
DLDFLAGS = -L. -arch ppc -arch i386
|
49
|
+
LDSHARED = cc -arch ppc -arch i386 -pipe -bundle -undefined dynamic_lookup
|
50
|
+
AR = ar
|
51
|
+
EXEEXT =
|
52
|
+
|
53
|
+
RUBY_INSTALL_NAME = ruby
|
54
|
+
RUBY_SO_NAME = ruby
|
55
|
+
arch = universal-darwin9.0
|
56
|
+
sitearch = universal-darwin9.0
|
57
|
+
ruby_version = 1.8
|
58
|
+
ruby = /System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/bin/ruby
|
59
|
+
RUBY = $(ruby)
|
60
|
+
RM = rm -f
|
61
|
+
MAKEDIRS = mkdir -p
|
62
|
+
INSTALL = /usr/bin/install -c
|
63
|
+
INSTALL_PROG = $(INSTALL) -m 0755
|
64
|
+
INSTALL_DATA = $(INSTALL) -m 644
|
65
|
+
COPY = cp
|
66
|
+
|
67
|
+
#### End of system configuration section. ####
|
68
|
+
|
69
|
+
preload =
|
70
|
+
|
71
|
+
libpath = . $(libdir)
|
72
|
+
LIBPATH = -L"." -L"$(libdir)"
|
73
|
+
DEFFILE =
|
74
|
+
|
75
|
+
CLEANFILES = mkmf.log
|
76
|
+
DISTCLEANFILES =
|
77
|
+
|
78
|
+
extout =
|
79
|
+
extout_prefix =
|
80
|
+
target_prefix =
|
81
|
+
LOCAL_LIBS =
|
82
|
+
LIBS = $(LIBRUBYARG_SHARED) -lpthread -ldl -lm
|
83
|
+
SRCS = darray.c fileutils.c tail.c trie-private.c trie.c
|
84
|
+
OBJS = darray.o fileutils.o tail.o trie-private.o trie.o
|
85
|
+
TARGET = trie
|
86
|
+
DLLIB = $(TARGET).bundle
|
87
|
+
EXTSTATIC =
|
88
|
+
STATIC_LIB =
|
89
|
+
|
90
|
+
RUBYCOMMONDIR = $(sitedir)$(target_prefix)
|
91
|
+
RUBYLIBDIR = $(sitelibdir)$(target_prefix)
|
92
|
+
RUBYARCHDIR = $(sitearchdir)$(target_prefix)
|
93
|
+
|
94
|
+
TARGET_SO = $(DLLIB)
|
95
|
+
CLEANLIBS = $(TARGET).bundle $(TARGET).il? $(TARGET).tds $(TARGET).map
|
96
|
+
CLEANOBJS = *.o *.a *.s[ol] *.pdb *.exp *.bak
|
97
|
+
|
98
|
+
all: $(DLLIB)
|
99
|
+
static: $(STATIC_LIB)
|
100
|
+
|
101
|
+
clean:
|
102
|
+
@-$(RM) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES)
|
103
|
+
|
104
|
+
distclean: clean
|
105
|
+
@-$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
|
106
|
+
@-$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
|
107
|
+
|
108
|
+
realclean: distclean
|
109
|
+
install: install-so install-rb
|
110
|
+
|
111
|
+
install-so: $(RUBYARCHDIR)
|
112
|
+
install-so: $(RUBYARCHDIR)/$(DLLIB)
|
113
|
+
$(RUBYARCHDIR)/$(DLLIB): $(DLLIB)
|
114
|
+
$(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR)
|
115
|
+
install-rb: pre-install-rb install-rb-default
|
116
|
+
install-rb-default: pre-install-rb-default
|
117
|
+
pre-install-rb: Makefile
|
118
|
+
pre-install-rb-default: Makefile
|
119
|
+
$(RUBYARCHDIR):
|
120
|
+
$(MAKEDIRS) $@
|
121
|
+
|
122
|
+
site-install: site-install-so site-install-rb
|
123
|
+
site-install-so: install-so
|
124
|
+
site-install-rb: install-rb
|
125
|
+
|
126
|
+
.SUFFIXES: .c .m .cc .cxx .cpp .C .o
|
127
|
+
|
128
|
+
.cc.o:
|
129
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
|
130
|
+
|
131
|
+
.cxx.o:
|
132
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
|
133
|
+
|
134
|
+
.cpp.o:
|
135
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
|
136
|
+
|
137
|
+
.C.o:
|
138
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
|
139
|
+
|
140
|
+
.c.o:
|
141
|
+
$(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) -c $<
|
142
|
+
|
143
|
+
$(DLLIB): $(OBJS)
|
144
|
+
@-$(RM) $@
|
145
|
+
$(LDSHARED) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
|
146
|
+
|
147
|
+
|
148
|
+
|
149
|
+
$(OBJS): ruby.h defines.h
|
data/ext/trie/darray.c
ADDED
@@ -0,0 +1,673 @@
|
|
1
|
+
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
2
|
+
/*
|
3
|
+
* darray.c - Double-array trie structure
|
4
|
+
* Created: 2006-08-13
|
5
|
+
* Author: Theppitak Karoonboonyanan <thep@linux.thai.net>
|
6
|
+
*/
|
7
|
+
|
8
|
+
#include <string.h>
|
9
|
+
#include <stdlib.h>
|
10
|
+
#include <stdio.h>
|
11
|
+
|
12
|
+
#include "trie-private.h"
|
13
|
+
#include "darray.h"
|
14
|
+
#include "fileutils.h"
|
15
|
+
|
16
|
+
/*----------------------------------*
|
17
|
+
* INTERNAL TYPES DECLARATIONS *
|
18
|
+
*----------------------------------*/
|
19
|
+
|
20
|
+
typedef struct _Symbols Symbols;
|
21
|
+
|
22
|
+
struct _Symbols {
|
23
|
+
short num_symbols;
|
24
|
+
TrieChar symbols[256];
|
25
|
+
};
|
26
|
+
|
27
|
+
static Symbols * symbols_new ();
|
28
|
+
static void symbols_free (Symbols *syms);
|
29
|
+
static void symbols_add (Symbols *syms, TrieChar c);
|
30
|
+
|
31
|
+
#define symbols_num(s) ((s)->num_symbols)
|
32
|
+
#define symbols_get(s,i) ((s)->symbols[i])
|
33
|
+
#define symbols_add_fast(s,c) ((s)->symbols[(s)->num_symbols++] = c)
|
34
|
+
|
35
|
+
/*-----------------------------------*
|
36
|
+
* PRIVATE METHODS DECLARATIONS *
|
37
|
+
*-----------------------------------*/
|
38
|
+
|
39
|
+
#define da_get_free_list(d) (1)
|
40
|
+
|
41
|
+
static Bool da_check_free_cell (DArray *d,
|
42
|
+
TrieIndex s);
|
43
|
+
|
44
|
+
static Bool da_has_children (DArray *d,
|
45
|
+
TrieIndex s);
|
46
|
+
|
47
|
+
static Symbols * da_output_symbols (const DArray *d,
|
48
|
+
TrieIndex s);
|
49
|
+
|
50
|
+
static TrieChar * da_get_state_key (const DArray *d,
|
51
|
+
TrieIndex state);
|
52
|
+
|
53
|
+
static TrieIndex da_find_free_base (DArray *d,
|
54
|
+
const Symbols *symbols);
|
55
|
+
|
56
|
+
static Bool da_fit_symbols (DArray *d,
|
57
|
+
TrieIndex base,
|
58
|
+
const Symbols *symbols);
|
59
|
+
|
60
|
+
static void da_relocate_base (DArray *d,
|
61
|
+
TrieIndex s,
|
62
|
+
TrieIndex new_base);
|
63
|
+
|
64
|
+
static Bool da_extend_pool (DArray *d,
|
65
|
+
TrieIndex to_index);
|
66
|
+
|
67
|
+
static void da_alloc_cell (DArray *d,
|
68
|
+
TrieIndex cell);
|
69
|
+
|
70
|
+
static void da_free_cell (DArray *d,
|
71
|
+
TrieIndex cell);
|
72
|
+
|
73
|
+
static Bool da_enumerate_recursive (const DArray *d,
|
74
|
+
TrieIndex state,
|
75
|
+
DAEnumFunc enum_func,
|
76
|
+
void *user_data);
|
77
|
+
|
78
|
+
/* ==================== BEGIN IMPLEMENTATION PART ==================== */
|
79
|
+
|
80
|
+
/*------------------------------------*
|
81
|
+
* INTERNAL TYPES IMPLEMENTATIONS *
|
82
|
+
*------------------------------------*/
|
83
|
+
|
84
|
+
static Symbols *
|
85
|
+
symbols_new ()
|
86
|
+
{
|
87
|
+
Symbols *syms;
|
88
|
+
|
89
|
+
syms = (Symbols *) malloc (sizeof (Symbols));
|
90
|
+
|
91
|
+
if (!syms)
|
92
|
+
return NULL;
|
93
|
+
|
94
|
+
syms->num_symbols = 0;
|
95
|
+
|
96
|
+
return syms;
|
97
|
+
}
|
98
|
+
|
99
|
+
static void
|
100
|
+
symbols_free (Symbols *syms)
|
101
|
+
{
|
102
|
+
free (syms);
|
103
|
+
}
|
104
|
+
|
105
|
+
static void
|
106
|
+
symbols_add (Symbols *syms, TrieChar c)
|
107
|
+
{
|
108
|
+
short lower, upper;
|
109
|
+
|
110
|
+
lower = 0;
|
111
|
+
upper = syms->num_symbols;
|
112
|
+
while (lower < upper) {
|
113
|
+
short middle;
|
114
|
+
|
115
|
+
middle = (lower + upper)/2;
|
116
|
+
if (c > syms->symbols[middle])
|
117
|
+
lower = middle + 1;
|
118
|
+
else if (c < syms->symbols[middle])
|
119
|
+
upper = middle;
|
120
|
+
else
|
121
|
+
return;
|
122
|
+
}
|
123
|
+
if (lower < syms->num_symbols) {
|
124
|
+
memmove (syms->symbols + lower + 1, syms->symbols + lower,
|
125
|
+
syms->num_symbols - lower);
|
126
|
+
}
|
127
|
+
syms->symbols[lower] = c;
|
128
|
+
syms->num_symbols++;
|
129
|
+
}
|
130
|
+
|
131
|
+
/*------------------------------*
|
132
|
+
* PRIVATE DATA DEFINITONS *
|
133
|
+
*------------------------------*/
|
134
|
+
|
135
|
+
typedef struct {
|
136
|
+
TrieIndex base;
|
137
|
+
TrieIndex check;
|
138
|
+
} DACell;
|
139
|
+
|
140
|
+
struct _DArray {
|
141
|
+
TrieIndex num_cells;
|
142
|
+
DACell *cells;
|
143
|
+
};
|
144
|
+
|
145
|
+
/*-----------------------------*
|
146
|
+
* METHODS IMPLEMENTAIONS *
|
147
|
+
*-----------------------------*/
|
148
|
+
|
149
|
+
#define DA_SIGNATURE 0xDAFCDAFC
|
150
|
+
|
151
|
+
/* DA Header:
|
152
|
+
* - Cell 0: SIGNATURE, number of cells
|
153
|
+
* - Cell 1: free circular-list pointers
|
154
|
+
* - Cell 2: root node
|
155
|
+
* - Cell 3: DA pool begin
|
156
|
+
*/
|
157
|
+
#define DA_POOL_BEGIN 3
|
158
|
+
|
159
|
+
DArray *
|
160
|
+
da_new ()
|
161
|
+
{
|
162
|
+
DArray *d;
|
163
|
+
|
164
|
+
d = (DArray *) malloc (sizeof (DArray));
|
165
|
+
if (!d)
|
166
|
+
return NULL;
|
167
|
+
|
168
|
+
d->num_cells = DA_POOL_BEGIN;
|
169
|
+
d->cells = (DACell *) malloc (d->num_cells * sizeof (DACell));
|
170
|
+
if (!d->cells)
|
171
|
+
goto exit_da_created;
|
172
|
+
d->cells[0].base = DA_SIGNATURE;
|
173
|
+
d->cells[0].check = d->num_cells;
|
174
|
+
d->cells[1].base = -1;
|
175
|
+
d->cells[1].check = -1;
|
176
|
+
d->cells[2].base = DA_POOL_BEGIN;
|
177
|
+
d->cells[2].check = 0;
|
178
|
+
|
179
|
+
return d;
|
180
|
+
|
181
|
+
exit_da_created:
|
182
|
+
free (d);
|
183
|
+
return NULL;
|
184
|
+
}
|
185
|
+
|
186
|
+
DArray *
|
187
|
+
da_read (FILE *file)
|
188
|
+
{
|
189
|
+
long save_pos;
|
190
|
+
DArray *d = NULL;
|
191
|
+
TrieIndex n;
|
192
|
+
|
193
|
+
/* check signature */
|
194
|
+
save_pos = ftell (file);
|
195
|
+
if (!file_read_int32 (file, &n) || DA_SIGNATURE != (uint32) n) {
|
196
|
+
fseek (file, save_pos, SEEK_SET);
|
197
|
+
return NULL;
|
198
|
+
}
|
199
|
+
|
200
|
+
d = (DArray *) malloc (sizeof (DArray));
|
201
|
+
if (!d)
|
202
|
+
return NULL;
|
203
|
+
|
204
|
+
/* read number of cells */
|
205
|
+
file_read_int32 (file, &d->num_cells);
|
206
|
+
d->cells = (DACell *) malloc (d->num_cells * sizeof (DACell));
|
207
|
+
if (!d->cells)
|
208
|
+
goto exit_da_created;
|
209
|
+
d->cells[0].base = DA_SIGNATURE;
|
210
|
+
d->cells[0].check= d->num_cells;
|
211
|
+
for (n = 1; n < d->num_cells; n++) {
|
212
|
+
file_read_int32 (file, &d->cells[n].base);
|
213
|
+
file_read_int32 (file, &d->cells[n].check);
|
214
|
+
}
|
215
|
+
|
216
|
+
return d;
|
217
|
+
|
218
|
+
exit_da_created:
|
219
|
+
free (d);
|
220
|
+
return NULL;
|
221
|
+
}
|
222
|
+
|
223
|
+
void
|
224
|
+
da_free (DArray *d)
|
225
|
+
{
|
226
|
+
free (d->cells);
|
227
|
+
free (d);
|
228
|
+
}
|
229
|
+
|
230
|
+
int
|
231
|
+
da_write (const DArray *d, FILE *file)
|
232
|
+
{
|
233
|
+
TrieIndex i;
|
234
|
+
|
235
|
+
for (i = 0; i < d->num_cells; i++) {
|
236
|
+
if (!file_write_int32 (file, d->cells[i].base) ||
|
237
|
+
!file_write_int32 (file, d->cells[i].check))
|
238
|
+
{
|
239
|
+
return -1;
|
240
|
+
}
|
241
|
+
}
|
242
|
+
|
243
|
+
return 0;
|
244
|
+
}
|
245
|
+
|
246
|
+
|
247
|
+
TrieIndex
|
248
|
+
da_get_root (const DArray *d)
|
249
|
+
{
|
250
|
+
/* can be calculated value for multi-index trie */
|
251
|
+
return 2;
|
252
|
+
}
|
253
|
+
|
254
|
+
|
255
|
+
TrieIndex
|
256
|
+
da_get_base (const DArray *d, TrieIndex s)
|
257
|
+
{
|
258
|
+
return (0 <= s && s < d->num_cells) ? d->cells[s].base : TRIE_INDEX_ERROR;
|
259
|
+
}
|
260
|
+
|
261
|
+
TrieIndex
|
262
|
+
da_get_check (const DArray *d, TrieIndex s)
|
263
|
+
{
|
264
|
+
return (0 <= s && s < d->num_cells) ? d->cells[s].check : TRIE_INDEX_ERROR;
|
265
|
+
}
|
266
|
+
|
267
|
+
|
268
|
+
void
|
269
|
+
da_set_base (DArray *d, TrieIndex s, TrieIndex val)
|
270
|
+
{
|
271
|
+
if (0 <= s && s < d->num_cells) {
|
272
|
+
d->cells[s].base = val;
|
273
|
+
}
|
274
|
+
}
|
275
|
+
|
276
|
+
void
|
277
|
+
da_set_check (DArray *d, TrieIndex s, TrieIndex val)
|
278
|
+
{
|
279
|
+
if (0 <= s && s < d->num_cells) {
|
280
|
+
d->cells[s].check = val;
|
281
|
+
}
|
282
|
+
}
|
283
|
+
|
284
|
+
Bool
|
285
|
+
da_walk (const DArray *d, TrieIndex *s, TrieChar c)
|
286
|
+
{
|
287
|
+
TrieIndex next;
|
288
|
+
|
289
|
+
next = da_get_base (d, *s) + c;
|
290
|
+
if (da_get_check (d, next) == *s) {
|
291
|
+
*s = next;
|
292
|
+
return TRUE;
|
293
|
+
}
|
294
|
+
return FALSE;
|
295
|
+
}
|
296
|
+
|
297
|
+
TrieIndex
|
298
|
+
da_insert_branch (DArray *d, TrieIndex s, TrieChar c)
|
299
|
+
{
|
300
|
+
TrieIndex base, next;
|
301
|
+
|
302
|
+
base = da_get_base (d, s);
|
303
|
+
|
304
|
+
if (base > 0) {
|
305
|
+
next = base + c;
|
306
|
+
|
307
|
+
/* if already there, do not actually insert */
|
308
|
+
if (da_get_check (d, next) == s)
|
309
|
+
return next;
|
310
|
+
|
311
|
+
/* if (base + c) > TRIE_INDEX_MAX which means 'next' is overflow,
|
312
|
+
* or cell [next] is not free, relocate to a free slot
|
313
|
+
*/
|
314
|
+
if (base > TRIE_INDEX_MAX - c || !da_check_free_cell (d, next)) {
|
315
|
+
Symbols *symbols;
|
316
|
+
TrieIndex new_base;
|
317
|
+
|
318
|
+
/* relocate BASE[s] */
|
319
|
+
symbols = da_output_symbols (d, s);
|
320
|
+
symbols_add (symbols, c);
|
321
|
+
new_base = da_find_free_base (d, symbols);
|
322
|
+
symbols_free (symbols);
|
323
|
+
|
324
|
+
if (TRIE_INDEX_ERROR == new_base)
|
325
|
+
return TRIE_INDEX_ERROR;
|
326
|
+
|
327
|
+
da_relocate_base (d, s, new_base);
|
328
|
+
next = new_base + c;
|
329
|
+
}
|
330
|
+
} else {
|
331
|
+
Symbols *symbols;
|
332
|
+
TrieIndex new_base;
|
333
|
+
|
334
|
+
symbols = symbols_new ();
|
335
|
+
symbols_add (symbols, c);
|
336
|
+
new_base = da_find_free_base (d, symbols);
|
337
|
+
symbols_free (symbols);
|
338
|
+
|
339
|
+
if (TRIE_INDEX_ERROR == new_base)
|
340
|
+
return TRIE_INDEX_ERROR;
|
341
|
+
|
342
|
+
da_set_base (d, s, new_base);
|
343
|
+
next = new_base + c;
|
344
|
+
}
|
345
|
+
da_alloc_cell (d, next);
|
346
|
+
da_set_check (d, next, s);
|
347
|
+
|
348
|
+
return next;
|
349
|
+
}
|
350
|
+
|
351
|
+
static Bool
|
352
|
+
da_check_free_cell (DArray *d,
|
353
|
+
TrieIndex s)
|
354
|
+
{
|
355
|
+
return da_extend_pool (d, s) && da_get_check (d, s) < 0;
|
356
|
+
}
|
357
|
+
|
358
|
+
static Bool
|
359
|
+
da_has_children (DArray *d,
|
360
|
+
TrieIndex s)
|
361
|
+
{
|
362
|
+
TrieIndex base;
|
363
|
+
TrieIndex c, max_c;
|
364
|
+
|
365
|
+
base = da_get_base (d, s);
|
366
|
+
if (TRIE_INDEX_ERROR == base || base < 0)
|
367
|
+
return FALSE;
|
368
|
+
|
369
|
+
max_c = MIN_VAL (TRIE_CHAR_MAX, TRIE_INDEX_MAX - base);
|
370
|
+
for (c = 0; c < max_c; c++) {
|
371
|
+
if (da_get_check (d, base + c) == s)
|
372
|
+
return TRUE;
|
373
|
+
}
|
374
|
+
|
375
|
+
return FALSE;
|
376
|
+
}
|
377
|
+
|
378
|
+
static Symbols *
|
379
|
+
da_output_symbols (const DArray *d,
|
380
|
+
TrieIndex s)
|
381
|
+
{
|
382
|
+
Symbols *syms;
|
383
|
+
TrieIndex base;
|
384
|
+
TrieIndex c, max_c;
|
385
|
+
|
386
|
+
syms = symbols_new ();
|
387
|
+
|
388
|
+
base = da_get_base (d, s);
|
389
|
+
max_c = MIN_VAL (TRIE_CHAR_MAX, TRIE_INDEX_MAX - base);
|
390
|
+
for (c = 0; c < max_c; c++) {
|
391
|
+
if (da_get_check (d, base + c) == s)
|
392
|
+
symbols_add_fast (syms, (TrieChar) c);
|
393
|
+
}
|
394
|
+
|
395
|
+
return syms;
|
396
|
+
}
|
397
|
+
|
398
|
+
static TrieChar *
|
399
|
+
da_get_state_key (const DArray *d,
|
400
|
+
TrieIndex state)
|
401
|
+
{
|
402
|
+
TrieChar *key;
|
403
|
+
int key_size, key_length;
|
404
|
+
int i;
|
405
|
+
|
406
|
+
key_size = 20;
|
407
|
+
key_length = 0;
|
408
|
+
key = (TrieChar *) malloc (key_size);
|
409
|
+
|
410
|
+
/* trace back to root */
|
411
|
+
while (da_get_root (d) != state) {
|
412
|
+
TrieIndex parent;
|
413
|
+
|
414
|
+
if (key_length + 1 >= key_size) {
|
415
|
+
key_size += 20;
|
416
|
+
key = (TrieChar *) realloc (key, key_size);
|
417
|
+
}
|
418
|
+
parent = da_get_check (d, state);
|
419
|
+
key[key_length++] = (TrieChar) (state - da_get_base (d, parent));
|
420
|
+
state = parent;
|
421
|
+
}
|
422
|
+
key[key_length] = '\0';
|
423
|
+
|
424
|
+
/* reverse the string */
|
425
|
+
for (i = 0; i < --key_length; i++) {
|
426
|
+
TrieChar temp;
|
427
|
+
|
428
|
+
temp = key[i];
|
429
|
+
key[i] = key[key_length];
|
430
|
+
key[key_length] = temp;
|
431
|
+
}
|
432
|
+
|
433
|
+
return key;
|
434
|
+
}
|
435
|
+
|
436
|
+
static TrieIndex
|
437
|
+
da_find_free_base (DArray *d,
|
438
|
+
const Symbols *symbols)
|
439
|
+
{
|
440
|
+
TrieChar first_sym;
|
441
|
+
TrieIndex s;
|
442
|
+
|
443
|
+
/* find first free cell that is beyond the first symbol */
|
444
|
+
first_sym = symbols_get (symbols, 0);
|
445
|
+
s = -da_get_check (d, da_get_free_list (d));
|
446
|
+
while (s != da_get_free_list (d)
|
447
|
+
&& s < (TrieIndex) first_sym + DA_POOL_BEGIN)
|
448
|
+
{
|
449
|
+
s = -da_get_check (d, s);
|
450
|
+
}
|
451
|
+
if (s == da_get_free_list (d)) {
|
452
|
+
for (s = first_sym + DA_POOL_BEGIN; ; ++s) {
|
453
|
+
if (!da_extend_pool (d, s))
|
454
|
+
return TRIE_INDEX_ERROR;
|
455
|
+
if (da_get_check (d, s) < 0)
|
456
|
+
break;
|
457
|
+
}
|
458
|
+
}
|
459
|
+
|
460
|
+
/* search for next free cell that fits the symbols set */
|
461
|
+
while (!da_fit_symbols (d, s - first_sym, symbols)) {
|
462
|
+
/* extend pool before getting exhausted */
|
463
|
+
if (-da_get_check (d, s) == da_get_free_list (d)) {
|
464
|
+
if (!da_extend_pool (d, d->num_cells))
|
465
|
+
return TRIE_INDEX_ERROR;
|
466
|
+
}
|
467
|
+
|
468
|
+
s = -da_get_check (d, s);
|
469
|
+
}
|
470
|
+
|
471
|
+
return s - first_sym;
|
472
|
+
}
|
473
|
+
|
474
|
+
static Bool
|
475
|
+
da_fit_symbols (DArray *d,
|
476
|
+
TrieIndex base,
|
477
|
+
const Symbols *symbols)
|
478
|
+
{
|
479
|
+
int i;
|
480
|
+
|
481
|
+
for (i = 0; i < symbols_num (symbols); i++) {
|
482
|
+
TrieChar sym = symbols_get (symbols, i);
|
483
|
+
|
484
|
+
/* if (base + sym) > TRIE_INDEX_MAX which means it's overflow,
|
485
|
+
* or cell [base + sym] is not free, the symbol is not fit.
|
486
|
+
*/
|
487
|
+
if (base > TRIE_INDEX_MAX - sym || !da_check_free_cell (d, base + sym))
|
488
|
+
return FALSE;
|
489
|
+
}
|
490
|
+
return TRUE;
|
491
|
+
}
|
492
|
+
|
493
|
+
static void
|
494
|
+
da_relocate_base (DArray *d,
|
495
|
+
TrieIndex s,
|
496
|
+
TrieIndex new_base)
|
497
|
+
{
|
498
|
+
TrieIndex old_base;
|
499
|
+
Symbols *symbols;
|
500
|
+
int i;
|
501
|
+
|
502
|
+
old_base = da_get_base (d, s);
|
503
|
+
symbols = da_output_symbols (d, s);
|
504
|
+
|
505
|
+
for (i = 0; i < symbols_num (symbols); i++) {
|
506
|
+
TrieIndex old_next, new_next, old_next_base;
|
507
|
+
|
508
|
+
old_next = old_base + symbols_get (symbols, i);
|
509
|
+
new_next = new_base + symbols_get (symbols, i);
|
510
|
+
old_next_base = da_get_base (d, old_next);
|
511
|
+
|
512
|
+
/* allocate new next node and copy BASE value */
|
513
|
+
da_alloc_cell (d, new_next);
|
514
|
+
da_set_check (d, new_next, s);
|
515
|
+
da_set_base (d, new_next, old_next_base);
|
516
|
+
|
517
|
+
/* old_next node is now moved to new_next
|
518
|
+
* so, all cells belonging to old_next
|
519
|
+
* must be given to new_next
|
520
|
+
*/
|
521
|
+
/* preventing the case of TAIL pointer */
|
522
|
+
if (old_next_base > 0) {
|
523
|
+
TrieIndex c, max_c;
|
524
|
+
|
525
|
+
max_c = MIN_VAL (TRIE_CHAR_MAX, TRIE_INDEX_MAX - old_next_base);
|
526
|
+
for (c = 0; c < max_c; c++) {
|
527
|
+
if (da_get_check (d, old_next_base + c) == old_next)
|
528
|
+
da_set_check (d, old_next_base + c, new_next);
|
529
|
+
}
|
530
|
+
}
|
531
|
+
|
532
|
+
/* free old_next node */
|
533
|
+
da_free_cell (d, old_next);
|
534
|
+
}
|
535
|
+
|
536
|
+
symbols_free (symbols);
|
537
|
+
|
538
|
+
/* finally, make BASE[s] point to new_base */
|
539
|
+
da_set_base (d, s, new_base);
|
540
|
+
}
|
541
|
+
|
542
|
+
static Bool
|
543
|
+
da_extend_pool (DArray *d,
|
544
|
+
TrieIndex to_index)
|
545
|
+
{
|
546
|
+
TrieIndex new_begin;
|
547
|
+
TrieIndex i;
|
548
|
+
TrieIndex free_tail;
|
549
|
+
|
550
|
+
if (to_index <= 0 || TRIE_INDEX_MAX <= to_index)
|
551
|
+
return FALSE;
|
552
|
+
|
553
|
+
if (to_index < d->num_cells)
|
554
|
+
return TRUE;
|
555
|
+
|
556
|
+
d->cells = (DACell *) realloc (d->cells, (to_index + 1) * sizeof (DACell));
|
557
|
+
new_begin = d->num_cells;
|
558
|
+
d->num_cells = to_index + 1;
|
559
|
+
|
560
|
+
/* initialize new free list */
|
561
|
+
for (i = new_begin; i < to_index; i++) {
|
562
|
+
da_set_check (d, i, -(i + 1));
|
563
|
+
da_set_base (d, i + 1, -i);
|
564
|
+
}
|
565
|
+
|
566
|
+
/* merge the new circular list to the old */
|
567
|
+
free_tail = -da_get_base (d, da_get_free_list (d));
|
568
|
+
da_set_check (d, free_tail, -new_begin);
|
569
|
+
da_set_base (d, new_begin, -free_tail);
|
570
|
+
da_set_check (d, to_index, -da_get_free_list (d));
|
571
|
+
da_set_base (d, da_get_free_list (d), -to_index);
|
572
|
+
|
573
|
+
/* update header cell */
|
574
|
+
d->cells[0].check = d->num_cells;
|
575
|
+
|
576
|
+
return TRUE;
|
577
|
+
}
|
578
|
+
|
579
|
+
void
|
580
|
+
da_prune (DArray *d, TrieIndex s)
|
581
|
+
{
|
582
|
+
da_prune_upto (d, da_get_root (d), s);
|
583
|
+
}
|
584
|
+
|
585
|
+
void
|
586
|
+
da_prune_upto (DArray *d, TrieIndex p, TrieIndex s)
|
587
|
+
{
|
588
|
+
while (p != s && !da_has_children (d, s)) {
|
589
|
+
TrieIndex parent;
|
590
|
+
|
591
|
+
parent = da_get_check (d, s);
|
592
|
+
da_free_cell (d, s);
|
593
|
+
s = parent;
|
594
|
+
}
|
595
|
+
}
|
596
|
+
|
597
|
+
static void
|
598
|
+
da_alloc_cell (DArray *d,
|
599
|
+
TrieIndex cell)
|
600
|
+
{
|
601
|
+
TrieIndex prev, next;
|
602
|
+
|
603
|
+
prev = -da_get_base (d, cell);
|
604
|
+
next = -da_get_check (d, cell);
|
605
|
+
|
606
|
+
/* remove the cell from free list */
|
607
|
+
da_set_check (d, prev, -next);
|
608
|
+
da_set_base (d, next, -prev);
|
609
|
+
}
|
610
|
+
|
611
|
+
static void
|
612
|
+
da_free_cell (DArray *d,
|
613
|
+
TrieIndex cell)
|
614
|
+
{
|
615
|
+
TrieIndex i, prev;
|
616
|
+
|
617
|
+
/* find insertion point */
|
618
|
+
i = -da_get_check (d, da_get_free_list (d));
|
619
|
+
while (i != da_get_free_list (d) && i < cell)
|
620
|
+
i = -da_get_check (d, i);
|
621
|
+
|
622
|
+
prev = -da_get_base (d, i);
|
623
|
+
|
624
|
+
/* insert cell before i */
|
625
|
+
da_set_check (d, cell, -i);
|
626
|
+
da_set_base (d, cell, -prev);
|
627
|
+
da_set_check (d, prev, -cell);
|
628
|
+
da_set_base (d, i, -cell);
|
629
|
+
}
|
630
|
+
|
631
|
+
Bool
|
632
|
+
da_enumerate (const DArray *d, DAEnumFunc enum_func, void *user_data)
|
633
|
+
{
|
634
|
+
return da_enumerate_recursive (d, da_get_root (d), enum_func, user_data);
|
635
|
+
}
|
636
|
+
|
637
|
+
static Bool
|
638
|
+
da_enumerate_recursive (const DArray *d,
|
639
|
+
TrieIndex state,
|
640
|
+
DAEnumFunc enum_func,
|
641
|
+
void *user_data)
|
642
|
+
{
|
643
|
+
Bool ret;
|
644
|
+
TrieIndex base;
|
645
|
+
|
646
|
+
base = da_get_base (d, state);
|
647
|
+
|
648
|
+
if (base < 0) {
|
649
|
+
TrieChar *key;
|
650
|
+
|
651
|
+
key = da_get_state_key (d, state);
|
652
|
+
ret = (*enum_func) (key, state, user_data);
|
653
|
+
free (key);
|
654
|
+
} else {
|
655
|
+
Symbols *symbols;
|
656
|
+
int i;
|
657
|
+
|
658
|
+
ret = TRUE;
|
659
|
+
symbols = da_output_symbols (d, state);
|
660
|
+
for (i = 0; ret && i < symbols_num (symbols); i++) {
|
661
|
+
ret = da_enumerate_recursive (d, base + symbols_get (symbols, i),
|
662
|
+
enum_func, user_data);
|
663
|
+
}
|
664
|
+
|
665
|
+
symbols_free (symbols);
|
666
|
+
}
|
667
|
+
|
668
|
+
return ret;
|
669
|
+
}
|
670
|
+
|
671
|
+
/*
|
672
|
+
vi:ts=4:ai:expandtab
|
673
|
+
*/
|