tyler-trie 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION.yml +4 -0
- data/ext/libdatrie/AUTHORS +1 -0
- data/ext/libdatrie/COPYING +510 -0
- data/ext/libdatrie/ChangeLog +410 -0
- data/ext/libdatrie/INSTALL +236 -0
- data/ext/libdatrie/Makefile.am +5 -0
- data/ext/libdatrie/Makefile.in +661 -0
- data/ext/libdatrie/NEWS +27 -0
- data/ext/libdatrie/README +32 -0
- data/ext/libdatrie/aclocal.m4 +7431 -0
- data/ext/libdatrie/config.guess +1516 -0
- data/ext/libdatrie/config.h.in +74 -0
- data/ext/libdatrie/config.sub +1626 -0
- data/ext/libdatrie/configure +22008 -0
- data/ext/libdatrie/configure.ac +71 -0
- data/ext/libdatrie/datrie.pc.in +11 -0
- data/ext/libdatrie/datrie/Makefile.am +35 -0
- data/ext/libdatrie/datrie/Makefile.in +522 -0
- data/ext/libdatrie/datrie/alpha-map.c +170 -0
- data/ext/libdatrie/datrie/alpha-map.h +36 -0
- data/ext/libdatrie/datrie/darray.c +674 -0
- data/ext/libdatrie/datrie/darray.h +229 -0
- data/ext/libdatrie/datrie/fileutils.c +151 -0
- data/ext/libdatrie/datrie/fileutils.h +36 -0
- data/ext/libdatrie/datrie/libdatrie.def +31 -0
- data/ext/libdatrie/datrie/sb-trie.c +331 -0
- data/ext/libdatrie/datrie/sb-trie.h +279 -0
- data/ext/libdatrie/datrie/tail.c +344 -0
- data/ext/libdatrie/datrie/tail.h +200 -0
- data/ext/libdatrie/datrie/trie-private.h +31 -0
- data/ext/libdatrie/datrie/trie.c +413 -0
- data/ext/libdatrie/datrie/trie.h +270 -0
- data/ext/libdatrie/datrie/triedefs.h +63 -0
- data/ext/libdatrie/datrie/typedefs.h +113 -0
- data/ext/libdatrie/depcomp +530 -0
- data/ext/libdatrie/doc/Doxyfile.in +244 -0
- data/ext/libdatrie/doc/Makefile.am +29 -0
- data/ext/libdatrie/doc/Makefile.in +352 -0
- data/ext/libdatrie/install-sh +323 -0
- data/ext/libdatrie/ltmain.sh +6938 -0
- data/ext/libdatrie/man/Makefile.am +4 -0
- data/ext/libdatrie/man/Makefile.in +381 -0
- data/ext/libdatrie/man/trietool.1 +107 -0
- data/ext/libdatrie/missing +360 -0
- data/ext/libdatrie/tools/Makefile.am +7 -0
- data/ext/libdatrie/tools/Makefile.in +460 -0
- data/ext/libdatrie/tools/trietool.c +308 -0
- data/ext/trie/extconf.rb +12 -0
- data/ext/trie/trie.c +174 -0
- data/lib/trie.rb +1 -0
- data/spec/test-trie/README +1 -0
- data/spec/trie_spec.rb +79 -0
- metadata +139 -0
@@ -0,0 +1,170 @@
|
|
1
|
+
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
2
|
+
/*
|
3
|
+
* alpha-map.c - map between character codes and trie alphabet
|
4
|
+
* Created: 2006-08-19
|
5
|
+
* Author: Theppitak Karoonboonyanan <thep@linux.thai.net>
|
6
|
+
*/
|
7
|
+
|
8
|
+
#include <ctype.h>
|
9
|
+
#include <string.h>
|
10
|
+
#include <stdlib.h>
|
11
|
+
#include <stdio.h>
|
12
|
+
|
13
|
+
#include "alpha-map.h"
|
14
|
+
#include "fileutils.h"
|
15
|
+
|
16
|
+
/*-----------------------------------*
|
17
|
+
* PRIVATE METHODS DECLARATIONS *
|
18
|
+
*-----------------------------------*/
|
19
|
+
static AlphaMap * alpha_map_new ();
|
20
|
+
|
21
|
+
/*------------------------------*
|
22
|
+
* PRIVATE DATA DEFINITONS *
|
23
|
+
*------------------------------*/
|
24
|
+
|
25
|
+
typedef struct _AlphaRange {
|
26
|
+
struct _AlphaRange *next;
|
27
|
+
|
28
|
+
UniChar begin;
|
29
|
+
UniChar end;
|
30
|
+
} AlphaRange;
|
31
|
+
|
32
|
+
struct _AlphaMap {
|
33
|
+
AlphaRange *first_range;
|
34
|
+
AlphaRange *last_range;
|
35
|
+
};
|
36
|
+
|
37
|
+
/*-----------------------------*
|
38
|
+
* METHODS IMPLEMENTAIONS *
|
39
|
+
*-----------------------------*/
|
40
|
+
|
41
|
+
AlphaMap *
|
42
|
+
alpha_map_open (const char *path, const char *name, const char *ext)
|
43
|
+
{
|
44
|
+
FILE *file;
|
45
|
+
char line[256];
|
46
|
+
AlphaMap *alpha_map;
|
47
|
+
|
48
|
+
file = file_open (path, name, ext, TRIE_IO_READ);
|
49
|
+
if (!file)
|
50
|
+
return NULL;
|
51
|
+
|
52
|
+
/* prepare data */
|
53
|
+
alpha_map = alpha_map_new ();
|
54
|
+
if (!alpha_map)
|
55
|
+
goto exit1;
|
56
|
+
|
57
|
+
/* read character ranges */
|
58
|
+
while (fgets (line, sizeof line, file)) {
|
59
|
+
AlphaRange *range;
|
60
|
+
int b, e;
|
61
|
+
|
62
|
+
range = (AlphaRange *) malloc (sizeof (AlphaRange));
|
63
|
+
|
64
|
+
/* read the range
|
65
|
+
* format: [b,e]
|
66
|
+
* where: b = begin char, e = end char; both in hex values
|
67
|
+
*/
|
68
|
+
if (sscanf (line, " [ %x , %x ] ", &b, &e) != 2)
|
69
|
+
continue;
|
70
|
+
if (b > e) {
|
71
|
+
fprintf (stderr, "Range begin (%x) > range end (%x)\n", b, e);
|
72
|
+
free (range);
|
73
|
+
continue;
|
74
|
+
}
|
75
|
+
range->begin = b;
|
76
|
+
range->end = e;
|
77
|
+
|
78
|
+
/* append it to list of ranges */
|
79
|
+
range->next = NULL;
|
80
|
+
if (alpha_map->last_range)
|
81
|
+
alpha_map->last_range->next = range;
|
82
|
+
else
|
83
|
+
alpha_map->first_range = range;
|
84
|
+
alpha_map->last_range = range;
|
85
|
+
}
|
86
|
+
|
87
|
+
fclose (file);
|
88
|
+
return alpha_map;
|
89
|
+
|
90
|
+
exit1:
|
91
|
+
fclose (file);
|
92
|
+
return NULL;
|
93
|
+
}
|
94
|
+
|
95
|
+
static AlphaMap *
|
96
|
+
alpha_map_new ()
|
97
|
+
{
|
98
|
+
AlphaMap *alpha_map;
|
99
|
+
|
100
|
+
alpha_map = (AlphaMap *) malloc (sizeof (AlphaMap));
|
101
|
+
if (!alpha_map)
|
102
|
+
return NULL;
|
103
|
+
|
104
|
+
alpha_map->first_range = alpha_map->last_range = NULL;
|
105
|
+
|
106
|
+
return alpha_map;
|
107
|
+
}
|
108
|
+
|
109
|
+
void
|
110
|
+
alpha_map_free (AlphaMap *alpha_map)
|
111
|
+
{
|
112
|
+
AlphaRange *p, *q;
|
113
|
+
|
114
|
+
p = alpha_map->first_range;
|
115
|
+
while (p) {
|
116
|
+
q = p->next;
|
117
|
+
free (p);
|
118
|
+
p = q;
|
119
|
+
}
|
120
|
+
|
121
|
+
free (alpha_map);
|
122
|
+
}
|
123
|
+
|
124
|
+
TrieChar
|
125
|
+
alpha_map_char_to_alphabet (const AlphaMap *alpha_map, UniChar uc)
|
126
|
+
{
|
127
|
+
TrieChar alpha_begin;
|
128
|
+
AlphaRange *range;
|
129
|
+
|
130
|
+
if (uc == 0)
|
131
|
+
return 0;
|
132
|
+
|
133
|
+
alpha_begin = 1;
|
134
|
+
for (range = alpha_map->first_range;
|
135
|
+
range && (uc < range->begin || range->end < uc);
|
136
|
+
range = range->next)
|
137
|
+
{
|
138
|
+
alpha_begin += range->end - range->begin + 1;
|
139
|
+
}
|
140
|
+
if (range)
|
141
|
+
return alpha_begin + (uc - range->begin);
|
142
|
+
|
143
|
+
return TRIE_CHAR_MAX;
|
144
|
+
}
|
145
|
+
|
146
|
+
UniChar
|
147
|
+
alpha_map_alphabet_to_char (const AlphaMap *alpha_map, TrieChar tc)
|
148
|
+
{
|
149
|
+
TrieChar alpha_begin;
|
150
|
+
AlphaRange *range;
|
151
|
+
|
152
|
+
if (tc == 0)
|
153
|
+
return 0;
|
154
|
+
|
155
|
+
alpha_begin = 1;
|
156
|
+
for (range = alpha_map->first_range;
|
157
|
+
range && alpha_begin + (range->end - range->begin) < tc;
|
158
|
+
range = range->next)
|
159
|
+
{
|
160
|
+
alpha_begin += range->end - range->begin + 1;
|
161
|
+
}
|
162
|
+
if (range)
|
163
|
+
return range->begin + (tc - alpha_begin);
|
164
|
+
|
165
|
+
return UNI_CHAR_ERROR;
|
166
|
+
}
|
167
|
+
|
168
|
+
/*
|
169
|
+
vi:ts=4:ai:expandtab
|
170
|
+
*/
|
@@ -0,0 +1,36 @@
|
|
1
|
+
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
2
|
+
/*
|
3
|
+
* alpha-map.h - map between character codes and trie alphabet
|
4
|
+
* Created: 2006-08-19
|
5
|
+
* Author: Theppitak Karoonboonyanan <thep@linux.thai.net>
|
6
|
+
*/
|
7
|
+
|
8
|
+
#ifndef __ALPHA_MAP_H
|
9
|
+
#define __ALPHA_MAP_H
|
10
|
+
|
11
|
+
#include "typedefs.h"
|
12
|
+
#include "triedefs.h"
|
13
|
+
|
14
|
+
typedef uint16 UniChar;
|
15
|
+
|
16
|
+
#define UNI_CHAR_ERROR (~(UniChar)0)
|
17
|
+
|
18
|
+
typedef struct _AlphaMap AlphaMap;
|
19
|
+
|
20
|
+
AlphaMap * alpha_map_open (const char *path,
|
21
|
+
const char *name,
|
22
|
+
const char *ext);
|
23
|
+
|
24
|
+
void alpha_map_free (AlphaMap *alpha_map);
|
25
|
+
|
26
|
+
TrieChar alpha_map_char_to_alphabet (const AlphaMap *alpha_map, UniChar uc);
|
27
|
+
|
28
|
+
UniChar alpha_map_alphabet_to_char (const AlphaMap *alpha_map, TrieChar tc);
|
29
|
+
|
30
|
+
|
31
|
+
#endif /* __ALPHA_MAP_H */
|
32
|
+
|
33
|
+
|
34
|
+
/*
|
35
|
+
vi:ts=4:ai:expandtab
|
36
|
+
*/
|
@@ -0,0 +1,674 @@
|
|
1
|
+
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
2
|
+
/*
|
3
|
+
* darray.c - Double-array trie structure
|
4
|
+
* Created: 2006-08-13
|
5
|
+
* Author: Theppitak Karoonboonyanan <thep@linux.thai.net>
|
6
|
+
*/
|
7
|
+
|
8
|
+
#include <string.h>
|
9
|
+
#include <stdlib.h>
|
10
|
+
#include <stdio.h>
|
11
|
+
|
12
|
+
#include "trie-private.h"
|
13
|
+
#include "darray.h"
|
14
|
+
#include "fileutils.h"
|
15
|
+
|
16
|
+
/*----------------------------------*
|
17
|
+
* INTERNAL TYPES DECLARATIONS *
|
18
|
+
*----------------------------------*/
|
19
|
+
|
20
|
+
/*
|
21
|
+
* Type for keeping intermediate values of TrieIndex.
|
22
|
+
* Must be bigger than TrieIndex, so that overflow can be easily detected.
|
23
|
+
*/
|
24
|
+
typedef int32 TrieIndexInt;
|
25
|
+
|
26
|
+
typedef struct _Symbols Symbols;
|
27
|
+
|
28
|
+
struct _Symbols {
|
29
|
+
short num_symbols;
|
30
|
+
TrieChar symbols[256];
|
31
|
+
};
|
32
|
+
|
33
|
+
static Symbols * symbols_new ();
|
34
|
+
static void symbols_free (Symbols *syms);
|
35
|
+
static void symbols_add (Symbols *syms, TrieChar c);
|
36
|
+
|
37
|
+
#define symbols_num(s) ((s)->num_symbols)
|
38
|
+
#define symbols_get(s,i) ((s)->symbols[i])
|
39
|
+
#define symbols_add_fast(s,c) ((s)->symbols[(s)->num_symbols++] = c)
|
40
|
+
|
41
|
+
/*-----------------------------------*
|
42
|
+
* PRIVATE METHODS DECLARATIONS *
|
43
|
+
*-----------------------------------*/
|
44
|
+
|
45
|
+
#define da_get_free_list(d) (1)
|
46
|
+
|
47
|
+
static Bool da_check_free_cell (DArray *d,
|
48
|
+
TrieIndexInt s);
|
49
|
+
|
50
|
+
static Bool da_has_children (DArray *d,
|
51
|
+
TrieIndex s);
|
52
|
+
|
53
|
+
static Symbols * da_output_symbols (DArray *d,
|
54
|
+
TrieIndex s);
|
55
|
+
|
56
|
+
static TrieChar * da_get_state_key (DArray *d,
|
57
|
+
TrieIndex state);
|
58
|
+
|
59
|
+
static TrieIndex da_find_free_base (DArray *d,
|
60
|
+
const Symbols *symbols);
|
61
|
+
|
62
|
+
static Bool da_fit_symbols (DArray *d,
|
63
|
+
TrieIndex base,
|
64
|
+
const Symbols *symbols);
|
65
|
+
|
66
|
+
static void da_relocate_base (DArray *d,
|
67
|
+
TrieIndex s,
|
68
|
+
TrieIndex new_base);
|
69
|
+
|
70
|
+
static Bool da_extend_pool (DArray *d,
|
71
|
+
TrieIndexInt to_index);
|
72
|
+
|
73
|
+
static void da_alloc_cell (DArray *d,
|
74
|
+
TrieIndex cell);
|
75
|
+
|
76
|
+
static void da_free_cell (DArray *d,
|
77
|
+
TrieIndex cell);
|
78
|
+
|
79
|
+
static Bool da_enumerate_recursive (DArray *d,
|
80
|
+
TrieIndex state,
|
81
|
+
DAEnumFunc enum_func,
|
82
|
+
void *user_data);
|
83
|
+
|
84
|
+
/* ==================== BEGIN IMPLEMENTATION PART ==================== */
|
85
|
+
|
86
|
+
/*------------------------------------*
|
87
|
+
* INTERNAL TYPES IMPLEMENTATIONS *
|
88
|
+
*------------------------------------*/
|
89
|
+
|
90
|
+
static Symbols *
|
91
|
+
symbols_new ()
|
92
|
+
{
|
93
|
+
Symbols *syms;
|
94
|
+
|
95
|
+
syms = (Symbols *) malloc (sizeof (Symbols));
|
96
|
+
|
97
|
+
if (!syms)
|
98
|
+
return NULL;
|
99
|
+
|
100
|
+
syms->num_symbols = 0;
|
101
|
+
|
102
|
+
return syms;
|
103
|
+
}
|
104
|
+
|
105
|
+
static void
|
106
|
+
symbols_free (Symbols *syms)
|
107
|
+
{
|
108
|
+
free (syms);
|
109
|
+
}
|
110
|
+
|
111
|
+
static void
|
112
|
+
symbols_add (Symbols *syms, TrieChar c)
|
113
|
+
{
|
114
|
+
short lower, upper;
|
115
|
+
|
116
|
+
lower = 0;
|
117
|
+
upper = syms->num_symbols;
|
118
|
+
while (lower < upper) {
|
119
|
+
short middle;
|
120
|
+
|
121
|
+
middle = (lower + upper)/2;
|
122
|
+
if (c > syms->symbols[middle])
|
123
|
+
lower = middle + 1;
|
124
|
+
else if (c < syms->symbols[middle])
|
125
|
+
upper = middle;
|
126
|
+
else
|
127
|
+
return;
|
128
|
+
}
|
129
|
+
if (lower < syms->num_symbols) {
|
130
|
+
memmove (syms->symbols + lower + 1, syms->symbols + lower,
|
131
|
+
syms->num_symbols - lower);
|
132
|
+
}
|
133
|
+
syms->symbols[lower] = c;
|
134
|
+
syms->num_symbols++;
|
135
|
+
}
|
136
|
+
|
137
|
+
/*------------------------------*
|
138
|
+
* PRIVATE DATA DEFINITONS *
|
139
|
+
*------------------------------*/
|
140
|
+
|
141
|
+
typedef struct {
|
142
|
+
TrieIndex base;
|
143
|
+
TrieIndex check;
|
144
|
+
} DACell;
|
145
|
+
|
146
|
+
struct _DArray {
|
147
|
+
TrieIndex num_cells;
|
148
|
+
DACell *cells;
|
149
|
+
|
150
|
+
FILE *file;
|
151
|
+
Bool is_dirty;
|
152
|
+
};
|
153
|
+
|
154
|
+
/*-----------------------------*
|
155
|
+
* METHODS IMPLEMENTAIONS *
|
156
|
+
*-----------------------------*/
|
157
|
+
|
158
|
+
#define DA_SIGNATURE 0xDAFD
|
159
|
+
|
160
|
+
/* DA Header:
|
161
|
+
* - Cell 0: SIGNATURE, 1
|
162
|
+
* - Cell 1: free circular-list pointers
|
163
|
+
* - Cell 2: root node
|
164
|
+
* - Cell 3: DA pool begin
|
165
|
+
*/
|
166
|
+
#define DA_POOL_BEGIN 3
|
167
|
+
|
168
|
+
DArray *
|
169
|
+
da_open (const char *path, const char *name, TrieIOMode mode)
|
170
|
+
{
|
171
|
+
DArray *d;
|
172
|
+
TrieIndex i;
|
173
|
+
|
174
|
+
d = (DArray *) malloc (sizeof (DArray));
|
175
|
+
|
176
|
+
d->file = file_open (path, name, ".br", mode);
|
177
|
+
if (!d->file)
|
178
|
+
goto exit1;
|
179
|
+
|
180
|
+
/* init cells data */
|
181
|
+
d->num_cells = file_length (d->file) / 4;
|
182
|
+
if (0 == d->num_cells) {
|
183
|
+
d->num_cells = DA_POOL_BEGIN;
|
184
|
+
d->cells = (DACell *) malloc (d->num_cells * sizeof (DACell));
|
185
|
+
if (!d->cells)
|
186
|
+
goto exit2;
|
187
|
+
d->cells[0].base = DA_SIGNATURE;
|
188
|
+
d->cells[0].check = 1;
|
189
|
+
d->cells[1].base = -1;
|
190
|
+
d->cells[1].check = -1;
|
191
|
+
d->cells[2].base = DA_POOL_BEGIN;
|
192
|
+
d->cells[2].check = 0;
|
193
|
+
d->is_dirty = TRUE;
|
194
|
+
} else {
|
195
|
+
d->cells = (DACell *) malloc (d->num_cells * sizeof (DACell));
|
196
|
+
if (!d->cells)
|
197
|
+
goto exit2;
|
198
|
+
file_read_int16 (d->file, &d->cells[0].base);
|
199
|
+
file_read_int16 (d->file, &d->cells[0].check);
|
200
|
+
if (DA_SIGNATURE != (uint16) d->cells[0].base)
|
201
|
+
goto exit3;
|
202
|
+
for (i = 1; i < d->num_cells; i++) {
|
203
|
+
file_read_int16 (d->file, &d->cells[i].base);
|
204
|
+
file_read_int16 (d->file, &d->cells[i].check);
|
205
|
+
}
|
206
|
+
d->is_dirty = FALSE;
|
207
|
+
}
|
208
|
+
|
209
|
+
return d;
|
210
|
+
|
211
|
+
exit3:
|
212
|
+
free (d->cells);
|
213
|
+
exit2:
|
214
|
+
fclose (d->file);
|
215
|
+
exit1:
|
216
|
+
free (d);
|
217
|
+
return NULL;
|
218
|
+
}
|
219
|
+
|
220
|
+
int
|
221
|
+
da_close (DArray *d)
|
222
|
+
{
|
223
|
+
int ret;
|
224
|
+
|
225
|
+
if (0 != (ret = da_save (d)))
|
226
|
+
return ret;
|
227
|
+
if (0 != (ret = fclose (d->file)))
|
228
|
+
return ret;
|
229
|
+
free (d->cells);
|
230
|
+
free (d);
|
231
|
+
|
232
|
+
return 0;
|
233
|
+
}
|
234
|
+
|
235
|
+
int
|
236
|
+
da_save (DArray *d)
|
237
|
+
{
|
238
|
+
TrieIndex i;
|
239
|
+
|
240
|
+
if (!d->is_dirty)
|
241
|
+
return 0;
|
242
|
+
|
243
|
+
rewind (d->file);
|
244
|
+
for (i = 0; i < d->num_cells; i++) {
|
245
|
+
if (!file_write_int16 (d->file, d->cells[i].base) ||
|
246
|
+
!file_write_int16 (d->file, d->cells[i].check))
|
247
|
+
{
|
248
|
+
return -1;
|
249
|
+
}
|
250
|
+
}
|
251
|
+
d->is_dirty = FALSE;
|
252
|
+
|
253
|
+
return 0;
|
254
|
+
}
|
255
|
+
|
256
|
+
|
257
|
+
TrieIndex
|
258
|
+
da_get_root (const DArray *d)
|
259
|
+
{
|
260
|
+
/* can be calculated value for multi-index trie */
|
261
|
+
return 2;
|
262
|
+
}
|
263
|
+
|
264
|
+
|
265
|
+
TrieIndex
|
266
|
+
da_get_base (const DArray *d, TrieIndex s)
|
267
|
+
{
|
268
|
+
return (s < d->num_cells) ? d->cells[s].base : TRIE_INDEX_ERROR;
|
269
|
+
}
|
270
|
+
|
271
|
+
TrieIndex
|
272
|
+
da_get_check (const DArray *d, TrieIndex s)
|
273
|
+
{
|
274
|
+
return (s < d->num_cells) ? d->cells[s].check : TRIE_INDEX_ERROR;
|
275
|
+
}
|
276
|
+
|
277
|
+
|
278
|
+
void
|
279
|
+
da_set_base (DArray *d, TrieIndex s, TrieIndex val)
|
280
|
+
{
|
281
|
+
if (s < d->num_cells) {
|
282
|
+
d->cells[s].base = val;
|
283
|
+
d->is_dirty = TRUE;
|
284
|
+
}
|
285
|
+
}
|
286
|
+
|
287
|
+
void
|
288
|
+
da_set_check (DArray *d, TrieIndex s, TrieIndex val)
|
289
|
+
{
|
290
|
+
if (s < d->num_cells) {
|
291
|
+
d->cells[s].check = val;
|
292
|
+
d->is_dirty = TRUE;
|
293
|
+
}
|
294
|
+
}
|
295
|
+
|
296
|
+
Bool
|
297
|
+
da_walk (DArray *d, TrieIndex *s, TrieChar c)
|
298
|
+
{
|
299
|
+
TrieIndex next;
|
300
|
+
|
301
|
+
next = da_get_base (d, *s) + c;
|
302
|
+
if (da_get_check (d, next) == *s) {
|
303
|
+
*s = next;
|
304
|
+
return TRUE;
|
305
|
+
}
|
306
|
+
return FALSE;
|
307
|
+
}
|
308
|
+
|
309
|
+
TrieIndex
|
310
|
+
da_insert_branch (DArray *d, TrieIndex s, TrieChar c)
|
311
|
+
{
|
312
|
+
TrieIndexInt base, next;
|
313
|
+
|
314
|
+
base = da_get_base (d, s);
|
315
|
+
|
316
|
+
if (base > 0) {
|
317
|
+
next = da_get_base (d, s) + c;
|
318
|
+
|
319
|
+
/* if already there, do not actually insert */
|
320
|
+
if (da_get_check (d, next) == s)
|
321
|
+
return next;
|
322
|
+
|
323
|
+
if (!da_check_free_cell (d, next)) {
|
324
|
+
Symbols *symbols;
|
325
|
+
TrieIndex new_base;
|
326
|
+
|
327
|
+
/* relocate BASE[s] */
|
328
|
+
symbols = da_output_symbols (d, s);
|
329
|
+
symbols_add (symbols, c);
|
330
|
+
new_base = da_find_free_base (d, symbols);
|
331
|
+
symbols_free (symbols);
|
332
|
+
|
333
|
+
if (TRIE_INDEX_ERROR == new_base)
|
334
|
+
return TRIE_INDEX_ERROR;
|
335
|
+
|
336
|
+
da_relocate_base (d, s, new_base);
|
337
|
+
next = new_base + c;
|
338
|
+
}
|
339
|
+
} else {
|
340
|
+
Symbols *symbols;
|
341
|
+
TrieIndex new_base;
|
342
|
+
|
343
|
+
symbols = symbols_new ();
|
344
|
+
symbols_add (symbols, c);
|
345
|
+
new_base = da_find_free_base (d, symbols);
|
346
|
+
symbols_free (symbols);
|
347
|
+
|
348
|
+
if (TRIE_INDEX_ERROR == new_base)
|
349
|
+
return TRIE_INDEX_ERROR;
|
350
|
+
|
351
|
+
da_set_base (d, s, new_base);
|
352
|
+
next = new_base + c;
|
353
|
+
}
|
354
|
+
da_alloc_cell (d, next);
|
355
|
+
da_set_check (d, next, s);
|
356
|
+
|
357
|
+
return next;
|
358
|
+
}
|
359
|
+
|
360
|
+
static Bool
|
361
|
+
da_check_free_cell (DArray *d,
|
362
|
+
TrieIndexInt s)
|
363
|
+
{
|
364
|
+
return da_extend_pool (d, s) && da_get_check (d, s) < 0;
|
365
|
+
}
|
366
|
+
|
367
|
+
static Bool
|
368
|
+
da_has_children (DArray *d,
|
369
|
+
TrieIndex s)
|
370
|
+
{
|
371
|
+
TrieIndex base;
|
372
|
+
uint16 c, max_c;
|
373
|
+
|
374
|
+
base = da_get_base (d, s);
|
375
|
+
if (TRIE_INDEX_ERROR == base || base < 0)
|
376
|
+
return FALSE;
|
377
|
+
|
378
|
+
max_c = MIN_VAL (TRIE_CHAR_MAX, TRIE_INDEX_MAX - base);
|
379
|
+
for (c = 0; c < max_c; c++) {
|
380
|
+
if (da_get_check (d, base + c) == s)
|
381
|
+
return TRUE;
|
382
|
+
}
|
383
|
+
|
384
|
+
return FALSE;
|
385
|
+
}
|
386
|
+
|
387
|
+
static Symbols *
|
388
|
+
da_output_symbols (DArray *d,
|
389
|
+
TrieIndex s)
|
390
|
+
{
|
391
|
+
Symbols *syms;
|
392
|
+
TrieIndex base;
|
393
|
+
uint16 c, max_c;
|
394
|
+
|
395
|
+
syms = symbols_new ();
|
396
|
+
|
397
|
+
base = da_get_base (d, s);
|
398
|
+
max_c = MIN_VAL (TRIE_CHAR_MAX, TRIE_INDEX_MAX - base);
|
399
|
+
for (c = 0; c < max_c; c++) {
|
400
|
+
if (da_get_check (d, base + c) == s)
|
401
|
+
symbols_add_fast (syms, (TrieChar) c);
|
402
|
+
}
|
403
|
+
|
404
|
+
return syms;
|
405
|
+
}
|
406
|
+
|
407
|
+
static TrieChar *
|
408
|
+
da_get_state_key (DArray *d,
|
409
|
+
TrieIndex state)
|
410
|
+
{
|
411
|
+
TrieChar *key;
|
412
|
+
int key_size, key_length;
|
413
|
+
int i;
|
414
|
+
|
415
|
+
key_size = 20;
|
416
|
+
key_length = 0;
|
417
|
+
key = (TrieChar *) malloc (key_size);
|
418
|
+
|
419
|
+
/* trace back to root */
|
420
|
+
while (da_get_root (d) != state) {
|
421
|
+
TrieIndex parent;
|
422
|
+
|
423
|
+
if (key_length + 1 >= key_size) {
|
424
|
+
key_size += 20;
|
425
|
+
key = (TrieChar *) realloc (key, key_size);
|
426
|
+
}
|
427
|
+
parent = da_get_check (d, state);
|
428
|
+
key[key_length++] = (TrieChar) (state - da_get_base (d, parent));
|
429
|
+
state = parent;
|
430
|
+
}
|
431
|
+
key[key_length] = '\0';
|
432
|
+
|
433
|
+
/* reverse the string */
|
434
|
+
for (i = 0; i < --key_length; i++) {
|
435
|
+
TrieChar temp;
|
436
|
+
|
437
|
+
temp = key[i];
|
438
|
+
key[i] = key[key_length];
|
439
|
+
key[key_length] = temp;
|
440
|
+
}
|
441
|
+
|
442
|
+
return key;
|
443
|
+
}
|
444
|
+
|
445
|
+
static TrieIndex
|
446
|
+
da_find_free_base (DArray *d,
|
447
|
+
const Symbols *symbols)
|
448
|
+
{
|
449
|
+
TrieChar first_sym;
|
450
|
+
TrieIndexInt s;
|
451
|
+
|
452
|
+
/* find first free cell that is beyond the first symbol */
|
453
|
+
first_sym = symbols_get (symbols, 0);
|
454
|
+
s = -da_get_check (d, da_get_free_list (d));
|
455
|
+
while (s != da_get_free_list (d)
|
456
|
+
&& s < (TrieIndex) first_sym + DA_POOL_BEGIN)
|
457
|
+
{
|
458
|
+
s = -da_get_check (d, s);
|
459
|
+
}
|
460
|
+
if (s == da_get_free_list (d)) {
|
461
|
+
for (s = first_sym + DA_POOL_BEGIN; ; ++s) {
|
462
|
+
if (!da_extend_pool (d, s))
|
463
|
+
return TRIE_INDEX_ERROR;
|
464
|
+
if (da_get_check (d, s) < 0)
|
465
|
+
break;
|
466
|
+
}
|
467
|
+
}
|
468
|
+
|
469
|
+
/* search for next free cell that fits the symbols set */
|
470
|
+
while (!da_fit_symbols (d, s - first_sym, symbols)) {
|
471
|
+
/* extend pool before getting exhausted */
|
472
|
+
if (-da_get_check (d, s) == da_get_free_list (d)) {
|
473
|
+
if (!da_extend_pool (d, d->num_cells))
|
474
|
+
return TRIE_INDEX_ERROR;
|
475
|
+
}
|
476
|
+
|
477
|
+
s = -da_get_check (d, s);
|
478
|
+
}
|
479
|
+
|
480
|
+
return s - first_sym;
|
481
|
+
}
|
482
|
+
|
483
|
+
static Bool
|
484
|
+
da_fit_symbols (DArray *d,
|
485
|
+
TrieIndex base,
|
486
|
+
const Symbols *symbols)
|
487
|
+
{
|
488
|
+
int i;
|
489
|
+
|
490
|
+
for (i = 0; i < symbols_num (symbols); i++) {
|
491
|
+
if (!da_check_free_cell (d, base + symbols_get (symbols, i)))
|
492
|
+
return FALSE;
|
493
|
+
}
|
494
|
+
return TRUE;
|
495
|
+
}
|
496
|
+
|
497
|
+
static void
|
498
|
+
da_relocate_base (DArray *d,
|
499
|
+
TrieIndex s,
|
500
|
+
TrieIndex new_base)
|
501
|
+
{
|
502
|
+
TrieIndex old_base;
|
503
|
+
Symbols *symbols;
|
504
|
+
int i;
|
505
|
+
|
506
|
+
old_base = da_get_base (d, s);
|
507
|
+
symbols = da_output_symbols (d, s);
|
508
|
+
|
509
|
+
for (i = 0; i < symbols_num (symbols); i++) {
|
510
|
+
TrieIndex old_next, new_next, old_next_base;
|
511
|
+
|
512
|
+
old_next = old_base + symbols_get (symbols, i);
|
513
|
+
new_next = new_base + symbols_get (symbols, i);
|
514
|
+
old_next_base = da_get_base (d, old_next);
|
515
|
+
|
516
|
+
/* allocate new next node and copy BASE value */
|
517
|
+
da_alloc_cell (d, new_next);
|
518
|
+
da_set_check (d, new_next, s);
|
519
|
+
da_set_base (d, new_next, old_next_base);
|
520
|
+
|
521
|
+
/* old_next node is now moved to new_next
|
522
|
+
* so, all cells belonging to old_next
|
523
|
+
* must be given to new_next
|
524
|
+
*/
|
525
|
+
/* preventing the case of TAIL pointer */
|
526
|
+
if (old_next_base > 0) {
|
527
|
+
uint16 c, max_c;
|
528
|
+
|
529
|
+
max_c = MIN_VAL (TRIE_CHAR_MAX, TRIE_INDEX_MAX - old_next_base);
|
530
|
+
for (c = 0; c < max_c; c++) {
|
531
|
+
if (da_get_check (d, old_next_base + c) == old_next)
|
532
|
+
da_set_check (d, old_next_base + c, new_next);
|
533
|
+
}
|
534
|
+
}
|
535
|
+
|
536
|
+
/* free old_next node */
|
537
|
+
da_free_cell (d, old_next);
|
538
|
+
}
|
539
|
+
|
540
|
+
symbols_free (symbols);
|
541
|
+
|
542
|
+
/* finally, make BASE[s] point to new_base */
|
543
|
+
da_set_base (d, s, new_base);
|
544
|
+
}
|
545
|
+
|
546
|
+
static Bool
|
547
|
+
da_extend_pool (DArray *d,
|
548
|
+
TrieIndexInt to_index)
|
549
|
+
{
|
550
|
+
TrieIndex new_begin;
|
551
|
+
TrieIndex i;
|
552
|
+
TrieIndex free_tail;
|
553
|
+
|
554
|
+
if (to_index <= 0 || TRIE_INDEX_MAX <= to_index)
|
555
|
+
return FALSE;
|
556
|
+
|
557
|
+
if (to_index < d->num_cells)
|
558
|
+
return TRUE;
|
559
|
+
|
560
|
+
d->cells = (DACell *) realloc (d->cells, (to_index + 1) * sizeof (DACell));
|
561
|
+
new_begin = d->num_cells;
|
562
|
+
d->num_cells = to_index + 1;
|
563
|
+
|
564
|
+
/* initialize new free list */
|
565
|
+
for (i = new_begin; i < to_index; i++) {
|
566
|
+
da_set_check (d, i, -(i + 1));
|
567
|
+
da_set_base (d, i + 1, -i);
|
568
|
+
}
|
569
|
+
|
570
|
+
/* merge the new circular list to the old */
|
571
|
+
free_tail = -da_get_base (d, da_get_free_list (d));
|
572
|
+
da_set_check (d, free_tail, -new_begin);
|
573
|
+
da_set_base (d, new_begin, -free_tail);
|
574
|
+
da_set_check (d, to_index, -da_get_free_list (d));
|
575
|
+
da_set_base (d, da_get_free_list (d), -to_index);
|
576
|
+
|
577
|
+
return TRUE;
|
578
|
+
}
|
579
|
+
|
580
|
+
void
|
581
|
+
da_prune (DArray *d, TrieIndex s)
|
582
|
+
{
|
583
|
+
da_prune_upto (d, da_get_root (d), s);
|
584
|
+
}
|
585
|
+
|
586
|
+
void
|
587
|
+
da_prune_upto (DArray *d, TrieIndex p, TrieIndex s)
|
588
|
+
{
|
589
|
+
while (p != s && !da_has_children (d, s)) {
|
590
|
+
TrieIndex parent;
|
591
|
+
|
592
|
+
parent = da_get_check (d, s);
|
593
|
+
da_free_cell (d, s);
|
594
|
+
s = parent;
|
595
|
+
}
|
596
|
+
}
|
597
|
+
|
598
|
+
static void
|
599
|
+
da_alloc_cell (DArray *d,
|
600
|
+
TrieIndex cell)
|
601
|
+
{
|
602
|
+
TrieIndex prev, next;
|
603
|
+
|
604
|
+
prev = -da_get_base (d, cell);
|
605
|
+
next = -da_get_check (d, cell);
|
606
|
+
|
607
|
+
/* remove the cell from free list */
|
608
|
+
da_set_check (d, prev, -next);
|
609
|
+
da_set_base (d, next, -prev);
|
610
|
+
}
|
611
|
+
|
612
|
+
static void
|
613
|
+
da_free_cell (DArray *d,
|
614
|
+
TrieIndex cell)
|
615
|
+
{
|
616
|
+
TrieIndex i, prev;
|
617
|
+
|
618
|
+
/* find insertion point */
|
619
|
+
i = -da_get_check (d, da_get_free_list (d));
|
620
|
+
while (i != da_get_free_list (d) && i < cell)
|
621
|
+
i = -da_get_check (d, i);
|
622
|
+
|
623
|
+
prev = -da_get_base (d, i);
|
624
|
+
|
625
|
+
/* insert cell before i */
|
626
|
+
da_set_check (d, cell, -i);
|
627
|
+
da_set_base (d, cell, -prev);
|
628
|
+
da_set_check (d, prev, -cell);
|
629
|
+
da_set_base (d, i, -cell);
|
630
|
+
}
|
631
|
+
|
632
|
+
Bool
|
633
|
+
da_enumerate (DArray *d, DAEnumFunc enum_func, void *user_data)
|
634
|
+
{
|
635
|
+
return da_enumerate_recursive (d, da_get_root (d), enum_func, user_data);
|
636
|
+
}
|
637
|
+
|
638
|
+
static Bool
|
639
|
+
da_enumerate_recursive (DArray *d,
|
640
|
+
TrieIndex state,
|
641
|
+
DAEnumFunc enum_func,
|
642
|
+
void *user_data)
|
643
|
+
{
|
644
|
+
Bool ret;
|
645
|
+
TrieIndex base;
|
646
|
+
|
647
|
+
base = da_get_base (d, state);
|
648
|
+
|
649
|
+
if (base < 0) {
|
650
|
+
TrieChar *key;
|
651
|
+
|
652
|
+
key = da_get_state_key (d, state);
|
653
|
+
ret = (*enum_func) (key, state, user_data);
|
654
|
+
free (key);
|
655
|
+
} else {
|
656
|
+
Symbols *symbols;
|
657
|
+
int i;
|
658
|
+
|
659
|
+
ret = TRUE;
|
660
|
+
symbols = da_output_symbols (d, state);
|
661
|
+
for (i = 0; ret && i < symbols_num (symbols); i++) {
|
662
|
+
ret = da_enumerate_recursive (d, base + symbols_get (symbols, i),
|
663
|
+
enum_func, user_data);
|
664
|
+
}
|
665
|
+
|
666
|
+
symbols_free (symbols);
|
667
|
+
}
|
668
|
+
|
669
|
+
return ret;
|
670
|
+
}
|
671
|
+
|
672
|
+
/*
|
673
|
+
vi:ts=4:ai:expandtab
|
674
|
+
*/
|