tyler-trie 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION.yml +4 -0
- data/ext/libdatrie/AUTHORS +1 -0
- data/ext/libdatrie/COPYING +510 -0
- data/ext/libdatrie/ChangeLog +410 -0
- data/ext/libdatrie/INSTALL +236 -0
- data/ext/libdatrie/Makefile.am +5 -0
- data/ext/libdatrie/Makefile.in +661 -0
- data/ext/libdatrie/NEWS +27 -0
- data/ext/libdatrie/README +32 -0
- data/ext/libdatrie/aclocal.m4 +7431 -0
- data/ext/libdatrie/config.guess +1516 -0
- data/ext/libdatrie/config.h.in +74 -0
- data/ext/libdatrie/config.sub +1626 -0
- data/ext/libdatrie/configure +22008 -0
- data/ext/libdatrie/configure.ac +71 -0
- data/ext/libdatrie/datrie.pc.in +11 -0
- data/ext/libdatrie/datrie/Makefile.am +35 -0
- data/ext/libdatrie/datrie/Makefile.in +522 -0
- data/ext/libdatrie/datrie/alpha-map.c +170 -0
- data/ext/libdatrie/datrie/alpha-map.h +36 -0
- data/ext/libdatrie/datrie/darray.c +674 -0
- data/ext/libdatrie/datrie/darray.h +229 -0
- data/ext/libdatrie/datrie/fileutils.c +151 -0
- data/ext/libdatrie/datrie/fileutils.h +36 -0
- data/ext/libdatrie/datrie/libdatrie.def +31 -0
- data/ext/libdatrie/datrie/sb-trie.c +331 -0
- data/ext/libdatrie/datrie/sb-trie.h +279 -0
- data/ext/libdatrie/datrie/tail.c +344 -0
- data/ext/libdatrie/datrie/tail.h +200 -0
- data/ext/libdatrie/datrie/trie-private.h +31 -0
- data/ext/libdatrie/datrie/trie.c +413 -0
- data/ext/libdatrie/datrie/trie.h +270 -0
- data/ext/libdatrie/datrie/triedefs.h +63 -0
- data/ext/libdatrie/datrie/typedefs.h +113 -0
- data/ext/libdatrie/depcomp +530 -0
- data/ext/libdatrie/doc/Doxyfile.in +244 -0
- data/ext/libdatrie/doc/Makefile.am +29 -0
- data/ext/libdatrie/doc/Makefile.in +352 -0
- data/ext/libdatrie/install-sh +323 -0
- data/ext/libdatrie/ltmain.sh +6938 -0
- data/ext/libdatrie/man/Makefile.am +4 -0
- data/ext/libdatrie/man/Makefile.in +381 -0
- data/ext/libdatrie/man/trietool.1 +107 -0
- data/ext/libdatrie/missing +360 -0
- data/ext/libdatrie/tools/Makefile.am +7 -0
- data/ext/libdatrie/tools/Makefile.in +460 -0
- data/ext/libdatrie/tools/trietool.c +308 -0
- data/ext/trie/extconf.rb +12 -0
- data/ext/trie/trie.c +174 -0
- data/lib/trie.rb +1 -0
- data/spec/test-trie/README +1 -0
- data/spec/trie_spec.rb +79 -0
- metadata +139 -0
@@ -0,0 +1,170 @@
|
|
1
|
+
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
2
|
+
/*
|
3
|
+
* alpha-map.c - map between character codes and trie alphabet
|
4
|
+
* Created: 2006-08-19
|
5
|
+
* Author: Theppitak Karoonboonyanan <thep@linux.thai.net>
|
6
|
+
*/
|
7
|
+
|
8
|
+
#include <ctype.h>
|
9
|
+
#include <string.h>
|
10
|
+
#include <stdlib.h>
|
11
|
+
#include <stdio.h>
|
12
|
+
|
13
|
+
#include "alpha-map.h"
|
14
|
+
#include "fileutils.h"
|
15
|
+
|
16
|
+
/*-----------------------------------*
|
17
|
+
* PRIVATE METHODS DECLARATIONS *
|
18
|
+
*-----------------------------------*/
|
19
|
+
static AlphaMap * alpha_map_new ();
|
20
|
+
|
21
|
+
/*------------------------------*
|
22
|
+
* PRIVATE DATA DEFINITONS *
|
23
|
+
*------------------------------*/
|
24
|
+
|
25
|
+
typedef struct _AlphaRange {
|
26
|
+
struct _AlphaRange *next;
|
27
|
+
|
28
|
+
UniChar begin;
|
29
|
+
UniChar end;
|
30
|
+
} AlphaRange;
|
31
|
+
|
32
|
+
struct _AlphaMap {
|
33
|
+
AlphaRange *first_range;
|
34
|
+
AlphaRange *last_range;
|
35
|
+
};
|
36
|
+
|
37
|
+
/*-----------------------------*
|
38
|
+
* METHODS IMPLEMENTAIONS *
|
39
|
+
*-----------------------------*/
|
40
|
+
|
41
|
+
AlphaMap *
|
42
|
+
alpha_map_open (const char *path, const char *name, const char *ext)
|
43
|
+
{
|
44
|
+
FILE *file;
|
45
|
+
char line[256];
|
46
|
+
AlphaMap *alpha_map;
|
47
|
+
|
48
|
+
file = file_open (path, name, ext, TRIE_IO_READ);
|
49
|
+
if (!file)
|
50
|
+
return NULL;
|
51
|
+
|
52
|
+
/* prepare data */
|
53
|
+
alpha_map = alpha_map_new ();
|
54
|
+
if (!alpha_map)
|
55
|
+
goto exit1;
|
56
|
+
|
57
|
+
/* read character ranges */
|
58
|
+
while (fgets (line, sizeof line, file)) {
|
59
|
+
AlphaRange *range;
|
60
|
+
int b, e;
|
61
|
+
|
62
|
+
range = (AlphaRange *) malloc (sizeof (AlphaRange));
|
63
|
+
|
64
|
+
/* read the range
|
65
|
+
* format: [b,e]
|
66
|
+
* where: b = begin char, e = end char; both in hex values
|
67
|
+
*/
|
68
|
+
if (sscanf (line, " [ %x , %x ] ", &b, &e) != 2)
|
69
|
+
continue;
|
70
|
+
if (b > e) {
|
71
|
+
fprintf (stderr, "Range begin (%x) > range end (%x)\n", b, e);
|
72
|
+
free (range);
|
73
|
+
continue;
|
74
|
+
}
|
75
|
+
range->begin = b;
|
76
|
+
range->end = e;
|
77
|
+
|
78
|
+
/* append it to list of ranges */
|
79
|
+
range->next = NULL;
|
80
|
+
if (alpha_map->last_range)
|
81
|
+
alpha_map->last_range->next = range;
|
82
|
+
else
|
83
|
+
alpha_map->first_range = range;
|
84
|
+
alpha_map->last_range = range;
|
85
|
+
}
|
86
|
+
|
87
|
+
fclose (file);
|
88
|
+
return alpha_map;
|
89
|
+
|
90
|
+
exit1:
|
91
|
+
fclose (file);
|
92
|
+
return NULL;
|
93
|
+
}
|
94
|
+
|
95
|
+
static AlphaMap *
|
96
|
+
alpha_map_new ()
|
97
|
+
{
|
98
|
+
AlphaMap *alpha_map;
|
99
|
+
|
100
|
+
alpha_map = (AlphaMap *) malloc (sizeof (AlphaMap));
|
101
|
+
if (!alpha_map)
|
102
|
+
return NULL;
|
103
|
+
|
104
|
+
alpha_map->first_range = alpha_map->last_range = NULL;
|
105
|
+
|
106
|
+
return alpha_map;
|
107
|
+
}
|
108
|
+
|
109
|
+
void
|
110
|
+
alpha_map_free (AlphaMap *alpha_map)
|
111
|
+
{
|
112
|
+
AlphaRange *p, *q;
|
113
|
+
|
114
|
+
p = alpha_map->first_range;
|
115
|
+
while (p) {
|
116
|
+
q = p->next;
|
117
|
+
free (p);
|
118
|
+
p = q;
|
119
|
+
}
|
120
|
+
|
121
|
+
free (alpha_map);
|
122
|
+
}
|
123
|
+
|
124
|
+
TrieChar
|
125
|
+
alpha_map_char_to_alphabet (const AlphaMap *alpha_map, UniChar uc)
|
126
|
+
{
|
127
|
+
TrieChar alpha_begin;
|
128
|
+
AlphaRange *range;
|
129
|
+
|
130
|
+
if (uc == 0)
|
131
|
+
return 0;
|
132
|
+
|
133
|
+
alpha_begin = 1;
|
134
|
+
for (range = alpha_map->first_range;
|
135
|
+
range && (uc < range->begin || range->end < uc);
|
136
|
+
range = range->next)
|
137
|
+
{
|
138
|
+
alpha_begin += range->end - range->begin + 1;
|
139
|
+
}
|
140
|
+
if (range)
|
141
|
+
return alpha_begin + (uc - range->begin);
|
142
|
+
|
143
|
+
return TRIE_CHAR_MAX;
|
144
|
+
}
|
145
|
+
|
146
|
+
UniChar
|
147
|
+
alpha_map_alphabet_to_char (const AlphaMap *alpha_map, TrieChar tc)
|
148
|
+
{
|
149
|
+
TrieChar alpha_begin;
|
150
|
+
AlphaRange *range;
|
151
|
+
|
152
|
+
if (tc == 0)
|
153
|
+
return 0;
|
154
|
+
|
155
|
+
alpha_begin = 1;
|
156
|
+
for (range = alpha_map->first_range;
|
157
|
+
range && alpha_begin + (range->end - range->begin) < tc;
|
158
|
+
range = range->next)
|
159
|
+
{
|
160
|
+
alpha_begin += range->end - range->begin + 1;
|
161
|
+
}
|
162
|
+
if (range)
|
163
|
+
return range->begin + (tc - alpha_begin);
|
164
|
+
|
165
|
+
return UNI_CHAR_ERROR;
|
166
|
+
}
|
167
|
+
|
168
|
+
/*
|
169
|
+
vi:ts=4:ai:expandtab
|
170
|
+
*/
|
@@ -0,0 +1,36 @@
|
|
1
|
+
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
2
|
+
/*
|
3
|
+
* alpha-map.h - map between character codes and trie alphabet
|
4
|
+
* Created: 2006-08-19
|
5
|
+
* Author: Theppitak Karoonboonyanan <thep@linux.thai.net>
|
6
|
+
*/
|
7
|
+
|
8
|
+
#ifndef __ALPHA_MAP_H
|
9
|
+
#define __ALPHA_MAP_H
|
10
|
+
|
11
|
+
#include "typedefs.h"
|
12
|
+
#include "triedefs.h"
|
13
|
+
|
14
|
+
typedef uint16 UniChar;
|
15
|
+
|
16
|
+
#define UNI_CHAR_ERROR (~(UniChar)0)
|
17
|
+
|
18
|
+
typedef struct _AlphaMap AlphaMap;
|
19
|
+
|
20
|
+
AlphaMap * alpha_map_open (const char *path,
|
21
|
+
const char *name,
|
22
|
+
const char *ext);
|
23
|
+
|
24
|
+
void alpha_map_free (AlphaMap *alpha_map);
|
25
|
+
|
26
|
+
TrieChar alpha_map_char_to_alphabet (const AlphaMap *alpha_map, UniChar uc);
|
27
|
+
|
28
|
+
UniChar alpha_map_alphabet_to_char (const AlphaMap *alpha_map, TrieChar tc);
|
29
|
+
|
30
|
+
|
31
|
+
#endif /* __ALPHA_MAP_H */
|
32
|
+
|
33
|
+
|
34
|
+
/*
|
35
|
+
vi:ts=4:ai:expandtab
|
36
|
+
*/
|
@@ -0,0 +1,674 @@
|
|
1
|
+
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
2
|
+
/*
|
3
|
+
* darray.c - Double-array trie structure
|
4
|
+
* Created: 2006-08-13
|
5
|
+
* Author: Theppitak Karoonboonyanan <thep@linux.thai.net>
|
6
|
+
*/
|
7
|
+
|
8
|
+
#include <string.h>
|
9
|
+
#include <stdlib.h>
|
10
|
+
#include <stdio.h>
|
11
|
+
|
12
|
+
#include "trie-private.h"
|
13
|
+
#include "darray.h"
|
14
|
+
#include "fileutils.h"
|
15
|
+
|
16
|
+
/*----------------------------------*
|
17
|
+
* INTERNAL TYPES DECLARATIONS *
|
18
|
+
*----------------------------------*/
|
19
|
+
|
20
|
+
/*
|
21
|
+
* Type for keeping intermediate values of TrieIndex.
|
22
|
+
* Must be bigger than TrieIndex, so that overflow can be easily detected.
|
23
|
+
*/
|
24
|
+
typedef int32 TrieIndexInt;
|
25
|
+
|
26
|
+
typedef struct _Symbols Symbols;
|
27
|
+
|
28
|
+
struct _Symbols {
|
29
|
+
short num_symbols;
|
30
|
+
TrieChar symbols[256];
|
31
|
+
};
|
32
|
+
|
33
|
+
static Symbols * symbols_new ();
|
34
|
+
static void symbols_free (Symbols *syms);
|
35
|
+
static void symbols_add (Symbols *syms, TrieChar c);
|
36
|
+
|
37
|
+
#define symbols_num(s) ((s)->num_symbols)
|
38
|
+
#define symbols_get(s,i) ((s)->symbols[i])
|
39
|
+
#define symbols_add_fast(s,c) ((s)->symbols[(s)->num_symbols++] = c)
|
40
|
+
|
41
|
+
/*-----------------------------------*
|
42
|
+
* PRIVATE METHODS DECLARATIONS *
|
43
|
+
*-----------------------------------*/
|
44
|
+
|
45
|
+
#define da_get_free_list(d) (1)
|
46
|
+
|
47
|
+
static Bool da_check_free_cell (DArray *d,
|
48
|
+
TrieIndexInt s);
|
49
|
+
|
50
|
+
static Bool da_has_children (DArray *d,
|
51
|
+
TrieIndex s);
|
52
|
+
|
53
|
+
static Symbols * da_output_symbols (DArray *d,
|
54
|
+
TrieIndex s);
|
55
|
+
|
56
|
+
static TrieChar * da_get_state_key (DArray *d,
|
57
|
+
TrieIndex state);
|
58
|
+
|
59
|
+
static TrieIndex da_find_free_base (DArray *d,
|
60
|
+
const Symbols *symbols);
|
61
|
+
|
62
|
+
static Bool da_fit_symbols (DArray *d,
|
63
|
+
TrieIndex base,
|
64
|
+
const Symbols *symbols);
|
65
|
+
|
66
|
+
static void da_relocate_base (DArray *d,
|
67
|
+
TrieIndex s,
|
68
|
+
TrieIndex new_base);
|
69
|
+
|
70
|
+
static Bool da_extend_pool (DArray *d,
|
71
|
+
TrieIndexInt to_index);
|
72
|
+
|
73
|
+
static void da_alloc_cell (DArray *d,
|
74
|
+
TrieIndex cell);
|
75
|
+
|
76
|
+
static void da_free_cell (DArray *d,
|
77
|
+
TrieIndex cell);
|
78
|
+
|
79
|
+
static Bool da_enumerate_recursive (DArray *d,
|
80
|
+
TrieIndex state,
|
81
|
+
DAEnumFunc enum_func,
|
82
|
+
void *user_data);
|
83
|
+
|
84
|
+
/* ==================== BEGIN IMPLEMENTATION PART ==================== */
|
85
|
+
|
86
|
+
/*------------------------------------*
|
87
|
+
* INTERNAL TYPES IMPLEMENTATIONS *
|
88
|
+
*------------------------------------*/
|
89
|
+
|
90
|
+
static Symbols *
|
91
|
+
symbols_new ()
|
92
|
+
{
|
93
|
+
Symbols *syms;
|
94
|
+
|
95
|
+
syms = (Symbols *) malloc (sizeof (Symbols));
|
96
|
+
|
97
|
+
if (!syms)
|
98
|
+
return NULL;
|
99
|
+
|
100
|
+
syms->num_symbols = 0;
|
101
|
+
|
102
|
+
return syms;
|
103
|
+
}
|
104
|
+
|
105
|
+
static void
|
106
|
+
symbols_free (Symbols *syms)
|
107
|
+
{
|
108
|
+
free (syms);
|
109
|
+
}
|
110
|
+
|
111
|
+
static void
|
112
|
+
symbols_add (Symbols *syms, TrieChar c)
|
113
|
+
{
|
114
|
+
short lower, upper;
|
115
|
+
|
116
|
+
lower = 0;
|
117
|
+
upper = syms->num_symbols;
|
118
|
+
while (lower < upper) {
|
119
|
+
short middle;
|
120
|
+
|
121
|
+
middle = (lower + upper)/2;
|
122
|
+
if (c > syms->symbols[middle])
|
123
|
+
lower = middle + 1;
|
124
|
+
else if (c < syms->symbols[middle])
|
125
|
+
upper = middle;
|
126
|
+
else
|
127
|
+
return;
|
128
|
+
}
|
129
|
+
if (lower < syms->num_symbols) {
|
130
|
+
memmove (syms->symbols + lower + 1, syms->symbols + lower,
|
131
|
+
syms->num_symbols - lower);
|
132
|
+
}
|
133
|
+
syms->symbols[lower] = c;
|
134
|
+
syms->num_symbols++;
|
135
|
+
}
|
136
|
+
|
137
|
+
/*------------------------------*
|
138
|
+
* PRIVATE DATA DEFINITONS *
|
139
|
+
*------------------------------*/
|
140
|
+
|
141
|
+
typedef struct {
|
142
|
+
TrieIndex base;
|
143
|
+
TrieIndex check;
|
144
|
+
} DACell;
|
145
|
+
|
146
|
+
struct _DArray {
|
147
|
+
TrieIndex num_cells;
|
148
|
+
DACell *cells;
|
149
|
+
|
150
|
+
FILE *file;
|
151
|
+
Bool is_dirty;
|
152
|
+
};
|
153
|
+
|
154
|
+
/*-----------------------------*
|
155
|
+
* METHODS IMPLEMENTAIONS *
|
156
|
+
*-----------------------------*/
|
157
|
+
|
158
|
+
#define DA_SIGNATURE 0xDAFD
|
159
|
+
|
160
|
+
/* DA Header:
|
161
|
+
* - Cell 0: SIGNATURE, 1
|
162
|
+
* - Cell 1: free circular-list pointers
|
163
|
+
* - Cell 2: root node
|
164
|
+
* - Cell 3: DA pool begin
|
165
|
+
*/
|
166
|
+
#define DA_POOL_BEGIN 3
|
167
|
+
|
168
|
+
DArray *
|
169
|
+
da_open (const char *path, const char *name, TrieIOMode mode)
|
170
|
+
{
|
171
|
+
DArray *d;
|
172
|
+
TrieIndex i;
|
173
|
+
|
174
|
+
d = (DArray *) malloc (sizeof (DArray));
|
175
|
+
|
176
|
+
d->file = file_open (path, name, ".br", mode);
|
177
|
+
if (!d->file)
|
178
|
+
goto exit1;
|
179
|
+
|
180
|
+
/* init cells data */
|
181
|
+
d->num_cells = file_length (d->file) / 4;
|
182
|
+
if (0 == d->num_cells) {
|
183
|
+
d->num_cells = DA_POOL_BEGIN;
|
184
|
+
d->cells = (DACell *) malloc (d->num_cells * sizeof (DACell));
|
185
|
+
if (!d->cells)
|
186
|
+
goto exit2;
|
187
|
+
d->cells[0].base = DA_SIGNATURE;
|
188
|
+
d->cells[0].check = 1;
|
189
|
+
d->cells[1].base = -1;
|
190
|
+
d->cells[1].check = -1;
|
191
|
+
d->cells[2].base = DA_POOL_BEGIN;
|
192
|
+
d->cells[2].check = 0;
|
193
|
+
d->is_dirty = TRUE;
|
194
|
+
} else {
|
195
|
+
d->cells = (DACell *) malloc (d->num_cells * sizeof (DACell));
|
196
|
+
if (!d->cells)
|
197
|
+
goto exit2;
|
198
|
+
file_read_int16 (d->file, &d->cells[0].base);
|
199
|
+
file_read_int16 (d->file, &d->cells[0].check);
|
200
|
+
if (DA_SIGNATURE != (uint16) d->cells[0].base)
|
201
|
+
goto exit3;
|
202
|
+
for (i = 1; i < d->num_cells; i++) {
|
203
|
+
file_read_int16 (d->file, &d->cells[i].base);
|
204
|
+
file_read_int16 (d->file, &d->cells[i].check);
|
205
|
+
}
|
206
|
+
d->is_dirty = FALSE;
|
207
|
+
}
|
208
|
+
|
209
|
+
return d;
|
210
|
+
|
211
|
+
exit3:
|
212
|
+
free (d->cells);
|
213
|
+
exit2:
|
214
|
+
fclose (d->file);
|
215
|
+
exit1:
|
216
|
+
free (d);
|
217
|
+
return NULL;
|
218
|
+
}
|
219
|
+
|
220
|
+
int
|
221
|
+
da_close (DArray *d)
|
222
|
+
{
|
223
|
+
int ret;
|
224
|
+
|
225
|
+
if (0 != (ret = da_save (d)))
|
226
|
+
return ret;
|
227
|
+
if (0 != (ret = fclose (d->file)))
|
228
|
+
return ret;
|
229
|
+
free (d->cells);
|
230
|
+
free (d);
|
231
|
+
|
232
|
+
return 0;
|
233
|
+
}
|
234
|
+
|
235
|
+
int
|
236
|
+
da_save (DArray *d)
|
237
|
+
{
|
238
|
+
TrieIndex i;
|
239
|
+
|
240
|
+
if (!d->is_dirty)
|
241
|
+
return 0;
|
242
|
+
|
243
|
+
rewind (d->file);
|
244
|
+
for (i = 0; i < d->num_cells; i++) {
|
245
|
+
if (!file_write_int16 (d->file, d->cells[i].base) ||
|
246
|
+
!file_write_int16 (d->file, d->cells[i].check))
|
247
|
+
{
|
248
|
+
return -1;
|
249
|
+
}
|
250
|
+
}
|
251
|
+
d->is_dirty = FALSE;
|
252
|
+
|
253
|
+
return 0;
|
254
|
+
}
|
255
|
+
|
256
|
+
|
257
|
+
TrieIndex
|
258
|
+
da_get_root (const DArray *d)
|
259
|
+
{
|
260
|
+
/* can be calculated value for multi-index trie */
|
261
|
+
return 2;
|
262
|
+
}
|
263
|
+
|
264
|
+
|
265
|
+
TrieIndex
|
266
|
+
da_get_base (const DArray *d, TrieIndex s)
|
267
|
+
{
|
268
|
+
return (s < d->num_cells) ? d->cells[s].base : TRIE_INDEX_ERROR;
|
269
|
+
}
|
270
|
+
|
271
|
+
TrieIndex
|
272
|
+
da_get_check (const DArray *d, TrieIndex s)
|
273
|
+
{
|
274
|
+
return (s < d->num_cells) ? d->cells[s].check : TRIE_INDEX_ERROR;
|
275
|
+
}
|
276
|
+
|
277
|
+
|
278
|
+
void
|
279
|
+
da_set_base (DArray *d, TrieIndex s, TrieIndex val)
|
280
|
+
{
|
281
|
+
if (s < d->num_cells) {
|
282
|
+
d->cells[s].base = val;
|
283
|
+
d->is_dirty = TRUE;
|
284
|
+
}
|
285
|
+
}
|
286
|
+
|
287
|
+
void
|
288
|
+
da_set_check (DArray *d, TrieIndex s, TrieIndex val)
|
289
|
+
{
|
290
|
+
if (s < d->num_cells) {
|
291
|
+
d->cells[s].check = val;
|
292
|
+
d->is_dirty = TRUE;
|
293
|
+
}
|
294
|
+
}
|
295
|
+
|
296
|
+
Bool
|
297
|
+
da_walk (DArray *d, TrieIndex *s, TrieChar c)
|
298
|
+
{
|
299
|
+
TrieIndex next;
|
300
|
+
|
301
|
+
next = da_get_base (d, *s) + c;
|
302
|
+
if (da_get_check (d, next) == *s) {
|
303
|
+
*s = next;
|
304
|
+
return TRUE;
|
305
|
+
}
|
306
|
+
return FALSE;
|
307
|
+
}
|
308
|
+
|
309
|
+
TrieIndex
|
310
|
+
da_insert_branch (DArray *d, TrieIndex s, TrieChar c)
|
311
|
+
{
|
312
|
+
TrieIndexInt base, next;
|
313
|
+
|
314
|
+
base = da_get_base (d, s);
|
315
|
+
|
316
|
+
if (base > 0) {
|
317
|
+
next = da_get_base (d, s) + c;
|
318
|
+
|
319
|
+
/* if already there, do not actually insert */
|
320
|
+
if (da_get_check (d, next) == s)
|
321
|
+
return next;
|
322
|
+
|
323
|
+
if (!da_check_free_cell (d, next)) {
|
324
|
+
Symbols *symbols;
|
325
|
+
TrieIndex new_base;
|
326
|
+
|
327
|
+
/* relocate BASE[s] */
|
328
|
+
symbols = da_output_symbols (d, s);
|
329
|
+
symbols_add (symbols, c);
|
330
|
+
new_base = da_find_free_base (d, symbols);
|
331
|
+
symbols_free (symbols);
|
332
|
+
|
333
|
+
if (TRIE_INDEX_ERROR == new_base)
|
334
|
+
return TRIE_INDEX_ERROR;
|
335
|
+
|
336
|
+
da_relocate_base (d, s, new_base);
|
337
|
+
next = new_base + c;
|
338
|
+
}
|
339
|
+
} else {
|
340
|
+
Symbols *symbols;
|
341
|
+
TrieIndex new_base;
|
342
|
+
|
343
|
+
symbols = symbols_new ();
|
344
|
+
symbols_add (symbols, c);
|
345
|
+
new_base = da_find_free_base (d, symbols);
|
346
|
+
symbols_free (symbols);
|
347
|
+
|
348
|
+
if (TRIE_INDEX_ERROR == new_base)
|
349
|
+
return TRIE_INDEX_ERROR;
|
350
|
+
|
351
|
+
da_set_base (d, s, new_base);
|
352
|
+
next = new_base + c;
|
353
|
+
}
|
354
|
+
da_alloc_cell (d, next);
|
355
|
+
da_set_check (d, next, s);
|
356
|
+
|
357
|
+
return next;
|
358
|
+
}
|
359
|
+
|
360
|
+
static Bool
|
361
|
+
da_check_free_cell (DArray *d,
|
362
|
+
TrieIndexInt s)
|
363
|
+
{
|
364
|
+
return da_extend_pool (d, s) && da_get_check (d, s) < 0;
|
365
|
+
}
|
366
|
+
|
367
|
+
static Bool
|
368
|
+
da_has_children (DArray *d,
|
369
|
+
TrieIndex s)
|
370
|
+
{
|
371
|
+
TrieIndex base;
|
372
|
+
uint16 c, max_c;
|
373
|
+
|
374
|
+
base = da_get_base (d, s);
|
375
|
+
if (TRIE_INDEX_ERROR == base || base < 0)
|
376
|
+
return FALSE;
|
377
|
+
|
378
|
+
max_c = MIN_VAL (TRIE_CHAR_MAX, TRIE_INDEX_MAX - base);
|
379
|
+
for (c = 0; c < max_c; c++) {
|
380
|
+
if (da_get_check (d, base + c) == s)
|
381
|
+
return TRUE;
|
382
|
+
}
|
383
|
+
|
384
|
+
return FALSE;
|
385
|
+
}
|
386
|
+
|
387
|
+
static Symbols *
|
388
|
+
da_output_symbols (DArray *d,
|
389
|
+
TrieIndex s)
|
390
|
+
{
|
391
|
+
Symbols *syms;
|
392
|
+
TrieIndex base;
|
393
|
+
uint16 c, max_c;
|
394
|
+
|
395
|
+
syms = symbols_new ();
|
396
|
+
|
397
|
+
base = da_get_base (d, s);
|
398
|
+
max_c = MIN_VAL (TRIE_CHAR_MAX, TRIE_INDEX_MAX - base);
|
399
|
+
for (c = 0; c < max_c; c++) {
|
400
|
+
if (da_get_check (d, base + c) == s)
|
401
|
+
symbols_add_fast (syms, (TrieChar) c);
|
402
|
+
}
|
403
|
+
|
404
|
+
return syms;
|
405
|
+
}
|
406
|
+
|
407
|
+
static TrieChar *
|
408
|
+
da_get_state_key (DArray *d,
|
409
|
+
TrieIndex state)
|
410
|
+
{
|
411
|
+
TrieChar *key;
|
412
|
+
int key_size, key_length;
|
413
|
+
int i;
|
414
|
+
|
415
|
+
key_size = 20;
|
416
|
+
key_length = 0;
|
417
|
+
key = (TrieChar *) malloc (key_size);
|
418
|
+
|
419
|
+
/* trace back to root */
|
420
|
+
while (da_get_root (d) != state) {
|
421
|
+
TrieIndex parent;
|
422
|
+
|
423
|
+
if (key_length + 1 >= key_size) {
|
424
|
+
key_size += 20;
|
425
|
+
key = (TrieChar *) realloc (key, key_size);
|
426
|
+
}
|
427
|
+
parent = da_get_check (d, state);
|
428
|
+
key[key_length++] = (TrieChar) (state - da_get_base (d, parent));
|
429
|
+
state = parent;
|
430
|
+
}
|
431
|
+
key[key_length] = '\0';
|
432
|
+
|
433
|
+
/* reverse the string */
|
434
|
+
for (i = 0; i < --key_length; i++) {
|
435
|
+
TrieChar temp;
|
436
|
+
|
437
|
+
temp = key[i];
|
438
|
+
key[i] = key[key_length];
|
439
|
+
key[key_length] = temp;
|
440
|
+
}
|
441
|
+
|
442
|
+
return key;
|
443
|
+
}
|
444
|
+
|
445
|
+
static TrieIndex
|
446
|
+
da_find_free_base (DArray *d,
|
447
|
+
const Symbols *symbols)
|
448
|
+
{
|
449
|
+
TrieChar first_sym;
|
450
|
+
TrieIndexInt s;
|
451
|
+
|
452
|
+
/* find first free cell that is beyond the first symbol */
|
453
|
+
first_sym = symbols_get (symbols, 0);
|
454
|
+
s = -da_get_check (d, da_get_free_list (d));
|
455
|
+
while (s != da_get_free_list (d)
|
456
|
+
&& s < (TrieIndex) first_sym + DA_POOL_BEGIN)
|
457
|
+
{
|
458
|
+
s = -da_get_check (d, s);
|
459
|
+
}
|
460
|
+
if (s == da_get_free_list (d)) {
|
461
|
+
for (s = first_sym + DA_POOL_BEGIN; ; ++s) {
|
462
|
+
if (!da_extend_pool (d, s))
|
463
|
+
return TRIE_INDEX_ERROR;
|
464
|
+
if (da_get_check (d, s) < 0)
|
465
|
+
break;
|
466
|
+
}
|
467
|
+
}
|
468
|
+
|
469
|
+
/* search for next free cell that fits the symbols set */
|
470
|
+
while (!da_fit_symbols (d, s - first_sym, symbols)) {
|
471
|
+
/* extend pool before getting exhausted */
|
472
|
+
if (-da_get_check (d, s) == da_get_free_list (d)) {
|
473
|
+
if (!da_extend_pool (d, d->num_cells))
|
474
|
+
return TRIE_INDEX_ERROR;
|
475
|
+
}
|
476
|
+
|
477
|
+
s = -da_get_check (d, s);
|
478
|
+
}
|
479
|
+
|
480
|
+
return s - first_sym;
|
481
|
+
}
|
482
|
+
|
483
|
+
static Bool
|
484
|
+
da_fit_symbols (DArray *d,
|
485
|
+
TrieIndex base,
|
486
|
+
const Symbols *symbols)
|
487
|
+
{
|
488
|
+
int i;
|
489
|
+
|
490
|
+
for (i = 0; i < symbols_num (symbols); i++) {
|
491
|
+
if (!da_check_free_cell (d, base + symbols_get (symbols, i)))
|
492
|
+
return FALSE;
|
493
|
+
}
|
494
|
+
return TRUE;
|
495
|
+
}
|
496
|
+
|
497
|
+
static void
|
498
|
+
da_relocate_base (DArray *d,
|
499
|
+
TrieIndex s,
|
500
|
+
TrieIndex new_base)
|
501
|
+
{
|
502
|
+
TrieIndex old_base;
|
503
|
+
Symbols *symbols;
|
504
|
+
int i;
|
505
|
+
|
506
|
+
old_base = da_get_base (d, s);
|
507
|
+
symbols = da_output_symbols (d, s);
|
508
|
+
|
509
|
+
for (i = 0; i < symbols_num (symbols); i++) {
|
510
|
+
TrieIndex old_next, new_next, old_next_base;
|
511
|
+
|
512
|
+
old_next = old_base + symbols_get (symbols, i);
|
513
|
+
new_next = new_base + symbols_get (symbols, i);
|
514
|
+
old_next_base = da_get_base (d, old_next);
|
515
|
+
|
516
|
+
/* allocate new next node and copy BASE value */
|
517
|
+
da_alloc_cell (d, new_next);
|
518
|
+
da_set_check (d, new_next, s);
|
519
|
+
da_set_base (d, new_next, old_next_base);
|
520
|
+
|
521
|
+
/* old_next node is now moved to new_next
|
522
|
+
* so, all cells belonging to old_next
|
523
|
+
* must be given to new_next
|
524
|
+
*/
|
525
|
+
/* preventing the case of TAIL pointer */
|
526
|
+
if (old_next_base > 0) {
|
527
|
+
uint16 c, max_c;
|
528
|
+
|
529
|
+
max_c = MIN_VAL (TRIE_CHAR_MAX, TRIE_INDEX_MAX - old_next_base);
|
530
|
+
for (c = 0; c < max_c; c++) {
|
531
|
+
if (da_get_check (d, old_next_base + c) == old_next)
|
532
|
+
da_set_check (d, old_next_base + c, new_next);
|
533
|
+
}
|
534
|
+
}
|
535
|
+
|
536
|
+
/* free old_next node */
|
537
|
+
da_free_cell (d, old_next);
|
538
|
+
}
|
539
|
+
|
540
|
+
symbols_free (symbols);
|
541
|
+
|
542
|
+
/* finally, make BASE[s] point to new_base */
|
543
|
+
da_set_base (d, s, new_base);
|
544
|
+
}
|
545
|
+
|
546
|
+
static Bool
|
547
|
+
da_extend_pool (DArray *d,
|
548
|
+
TrieIndexInt to_index)
|
549
|
+
{
|
550
|
+
TrieIndex new_begin;
|
551
|
+
TrieIndex i;
|
552
|
+
TrieIndex free_tail;
|
553
|
+
|
554
|
+
if (to_index <= 0 || TRIE_INDEX_MAX <= to_index)
|
555
|
+
return FALSE;
|
556
|
+
|
557
|
+
if (to_index < d->num_cells)
|
558
|
+
return TRUE;
|
559
|
+
|
560
|
+
d->cells = (DACell *) realloc (d->cells, (to_index + 1) * sizeof (DACell));
|
561
|
+
new_begin = d->num_cells;
|
562
|
+
d->num_cells = to_index + 1;
|
563
|
+
|
564
|
+
/* initialize new free list */
|
565
|
+
for (i = new_begin; i < to_index; i++) {
|
566
|
+
da_set_check (d, i, -(i + 1));
|
567
|
+
da_set_base (d, i + 1, -i);
|
568
|
+
}
|
569
|
+
|
570
|
+
/* merge the new circular list to the old */
|
571
|
+
free_tail = -da_get_base (d, da_get_free_list (d));
|
572
|
+
da_set_check (d, free_tail, -new_begin);
|
573
|
+
da_set_base (d, new_begin, -free_tail);
|
574
|
+
da_set_check (d, to_index, -da_get_free_list (d));
|
575
|
+
da_set_base (d, da_get_free_list (d), -to_index);
|
576
|
+
|
577
|
+
return TRUE;
|
578
|
+
}
|
579
|
+
|
580
|
+
void
|
581
|
+
da_prune (DArray *d, TrieIndex s)
|
582
|
+
{
|
583
|
+
da_prune_upto (d, da_get_root (d), s);
|
584
|
+
}
|
585
|
+
|
586
|
+
void
|
587
|
+
da_prune_upto (DArray *d, TrieIndex p, TrieIndex s)
|
588
|
+
{
|
589
|
+
while (p != s && !da_has_children (d, s)) {
|
590
|
+
TrieIndex parent;
|
591
|
+
|
592
|
+
parent = da_get_check (d, s);
|
593
|
+
da_free_cell (d, s);
|
594
|
+
s = parent;
|
595
|
+
}
|
596
|
+
}
|
597
|
+
|
598
|
+
static void
|
599
|
+
da_alloc_cell (DArray *d,
|
600
|
+
TrieIndex cell)
|
601
|
+
{
|
602
|
+
TrieIndex prev, next;
|
603
|
+
|
604
|
+
prev = -da_get_base (d, cell);
|
605
|
+
next = -da_get_check (d, cell);
|
606
|
+
|
607
|
+
/* remove the cell from free list */
|
608
|
+
da_set_check (d, prev, -next);
|
609
|
+
da_set_base (d, next, -prev);
|
610
|
+
}
|
611
|
+
|
612
|
+
static void
|
613
|
+
da_free_cell (DArray *d,
|
614
|
+
TrieIndex cell)
|
615
|
+
{
|
616
|
+
TrieIndex i, prev;
|
617
|
+
|
618
|
+
/* find insertion point */
|
619
|
+
i = -da_get_check (d, da_get_free_list (d));
|
620
|
+
while (i != da_get_free_list (d) && i < cell)
|
621
|
+
i = -da_get_check (d, i);
|
622
|
+
|
623
|
+
prev = -da_get_base (d, i);
|
624
|
+
|
625
|
+
/* insert cell before i */
|
626
|
+
da_set_check (d, cell, -i);
|
627
|
+
da_set_base (d, cell, -prev);
|
628
|
+
da_set_check (d, prev, -cell);
|
629
|
+
da_set_base (d, i, -cell);
|
630
|
+
}
|
631
|
+
|
632
|
+
Bool
|
633
|
+
da_enumerate (DArray *d, DAEnumFunc enum_func, void *user_data)
|
634
|
+
{
|
635
|
+
return da_enumerate_recursive (d, da_get_root (d), enum_func, user_data);
|
636
|
+
}
|
637
|
+
|
638
|
+
static Bool
|
639
|
+
da_enumerate_recursive (DArray *d,
|
640
|
+
TrieIndex state,
|
641
|
+
DAEnumFunc enum_func,
|
642
|
+
void *user_data)
|
643
|
+
{
|
644
|
+
Bool ret;
|
645
|
+
TrieIndex base;
|
646
|
+
|
647
|
+
base = da_get_base (d, state);
|
648
|
+
|
649
|
+
if (base < 0) {
|
650
|
+
TrieChar *key;
|
651
|
+
|
652
|
+
key = da_get_state_key (d, state);
|
653
|
+
ret = (*enum_func) (key, state, user_data);
|
654
|
+
free (key);
|
655
|
+
} else {
|
656
|
+
Symbols *symbols;
|
657
|
+
int i;
|
658
|
+
|
659
|
+
ret = TRUE;
|
660
|
+
symbols = da_output_symbols (d, state);
|
661
|
+
for (i = 0; ret && i < symbols_num (symbols); i++) {
|
662
|
+
ret = da_enumerate_recursive (d, base + symbols_get (symbols, i),
|
663
|
+
enum_func, user_data);
|
664
|
+
}
|
665
|
+
|
666
|
+
symbols_free (symbols);
|
667
|
+
}
|
668
|
+
|
669
|
+
return ret;
|
670
|
+
}
|
671
|
+
|
672
|
+
/*
|
673
|
+
vi:ts=4:ai:expandtab
|
674
|
+
*/
|