tyler-trie 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. data/VERSION.yml +4 -0
  2. data/ext/libdatrie/AUTHORS +1 -0
  3. data/ext/libdatrie/COPYING +510 -0
  4. data/ext/libdatrie/ChangeLog +410 -0
  5. data/ext/libdatrie/INSTALL +236 -0
  6. data/ext/libdatrie/Makefile.am +5 -0
  7. data/ext/libdatrie/Makefile.in +661 -0
  8. data/ext/libdatrie/NEWS +27 -0
  9. data/ext/libdatrie/README +32 -0
  10. data/ext/libdatrie/aclocal.m4 +7431 -0
  11. data/ext/libdatrie/config.guess +1516 -0
  12. data/ext/libdatrie/config.h.in +74 -0
  13. data/ext/libdatrie/config.sub +1626 -0
  14. data/ext/libdatrie/configure +22008 -0
  15. data/ext/libdatrie/configure.ac +71 -0
  16. data/ext/libdatrie/datrie.pc.in +11 -0
  17. data/ext/libdatrie/datrie/Makefile.am +35 -0
  18. data/ext/libdatrie/datrie/Makefile.in +522 -0
  19. data/ext/libdatrie/datrie/alpha-map.c +170 -0
  20. data/ext/libdatrie/datrie/alpha-map.h +36 -0
  21. data/ext/libdatrie/datrie/darray.c +674 -0
  22. data/ext/libdatrie/datrie/darray.h +229 -0
  23. data/ext/libdatrie/datrie/fileutils.c +151 -0
  24. data/ext/libdatrie/datrie/fileutils.h +36 -0
  25. data/ext/libdatrie/datrie/libdatrie.def +31 -0
  26. data/ext/libdatrie/datrie/sb-trie.c +331 -0
  27. data/ext/libdatrie/datrie/sb-trie.h +279 -0
  28. data/ext/libdatrie/datrie/tail.c +344 -0
  29. data/ext/libdatrie/datrie/tail.h +200 -0
  30. data/ext/libdatrie/datrie/trie-private.h +31 -0
  31. data/ext/libdatrie/datrie/trie.c +413 -0
  32. data/ext/libdatrie/datrie/trie.h +270 -0
  33. data/ext/libdatrie/datrie/triedefs.h +63 -0
  34. data/ext/libdatrie/datrie/typedefs.h +113 -0
  35. data/ext/libdatrie/depcomp +530 -0
  36. data/ext/libdatrie/doc/Doxyfile.in +244 -0
  37. data/ext/libdatrie/doc/Makefile.am +29 -0
  38. data/ext/libdatrie/doc/Makefile.in +352 -0
  39. data/ext/libdatrie/install-sh +323 -0
  40. data/ext/libdatrie/ltmain.sh +6938 -0
  41. data/ext/libdatrie/man/Makefile.am +4 -0
  42. data/ext/libdatrie/man/Makefile.in +381 -0
  43. data/ext/libdatrie/man/trietool.1 +107 -0
  44. data/ext/libdatrie/missing +360 -0
  45. data/ext/libdatrie/tools/Makefile.am +7 -0
  46. data/ext/libdatrie/tools/Makefile.in +460 -0
  47. data/ext/libdatrie/tools/trietool.c +308 -0
  48. data/ext/trie/extconf.rb +12 -0
  49. data/ext/trie/trie.c +174 -0
  50. data/lib/trie.rb +1 -0
  51. data/spec/test-trie/README +1 -0
  52. data/spec/trie_spec.rb +79 -0
  53. metadata +139 -0
@@ -0,0 +1,344 @@
1
+ /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
+ /*
3
+ * tail.c - trie tail for keeping suffixes
4
+ * Created: 2006-08-15
5
+ * Author: Theppitak Karoonboonyanan <thep@linux.thai.net>
6
+ */
7
+
8
+ #include <string.h>
9
+ #include <stdlib.h>
10
+ #include <stdio.h>
11
+
12
+ #include "tail.h"
13
+ #include "fileutils.h"
14
+
15
+ /*----------------------------------*
16
+ * INTERNAL TYPES DECLARATIONS *
17
+ *----------------------------------*/
18
+
19
+ /*-----------------------------------*
20
+ * PRIVATE METHODS DECLARATIONS *
21
+ *-----------------------------------*/
22
+
23
+ static TrieIndex tail_alloc_block (Tail *t);
24
+ static void tail_free_block (Tail *t, TrieIndex block);
25
+
26
+ /* ==================== BEGIN IMPLEMENTATION PART ==================== */
27
+
28
+ /*------------------------------------*
29
+ * INTERNAL TYPES IMPLEMENTATIONS *
30
+ *------------------------------------*/
31
+
32
+ /*------------------------------*
33
+ * PRIVATE DATA DEFINITONS *
34
+ *------------------------------*/
35
+
36
+ typedef struct {
37
+ TrieIndex next_free;
38
+ TrieData data;
39
+ TrieChar *suffix;
40
+ } TailBlock;
41
+
42
+ struct _Tail {
43
+ TrieIndex num_tails;
44
+ TailBlock *tails;
45
+ TrieIndex first_free;
46
+
47
+ FILE *file;
48
+ Bool is_dirty;
49
+ };
50
+
51
+ /*-----------------------------*
52
+ * METHODS IMPLEMENTAIONS *
53
+ *-----------------------------*/
54
+
55
+ #define TAIL_SIGNATURE 0xDFFD
56
+ #define TAIL_START_BLOCKNO 1
57
+
58
+ Tail *
59
+ tail_open (const char *path, const char *name, TrieIOMode mode)
60
+ {
61
+ Tail *t;
62
+ TrieIndex i;
63
+ uint16 sig;
64
+ long file_size;
65
+
66
+ t = (Tail *) malloc (sizeof (Tail));
67
+
68
+ t->file = file_open (path, name, ".tl", mode);
69
+ if (!t->file)
70
+ goto exit1;
71
+
72
+ file_size = file_length (t->file);
73
+ if (file_size != 0 && file_read_int16 (t->file, (int16 *) &sig)
74
+ && sig != TAIL_SIGNATURE)
75
+ {
76
+ goto exit2;
77
+ }
78
+
79
+ /* init tails data */
80
+ if (file_size == 0) {
81
+ t->first_free = 0;
82
+ t->num_tails = 0;
83
+ t->tails = NULL;
84
+ t->is_dirty = TRUE;
85
+ } else {
86
+ file_read_int16 (t->file, &t->first_free);
87
+ file_read_int16 (t->file, &t->num_tails);
88
+ t->tails = (TailBlock *) malloc (t->num_tails * sizeof (TailBlock));
89
+ if (!t->tails)
90
+ goto exit2;
91
+ for (i = 0; i < t->num_tails; i++) {
92
+ int8 length;
93
+
94
+ file_read_int16 (t->file, &t->tails[i].next_free);
95
+ file_read_int16 (t->file, &t->tails[i].data);
96
+
97
+ file_read_int8 (t->file, &length);
98
+ t->tails[i].suffix = (TrieChar *) malloc (length + 1);
99
+ if (length > 0)
100
+ file_read_chars (t->file, (char *)t->tails[i].suffix, length);
101
+ t->tails[i].suffix[length] = '\0';
102
+ }
103
+ t->is_dirty = FALSE;
104
+ }
105
+
106
+ return t;
107
+
108
+ exit2:
109
+ fclose (t->file);
110
+ exit1:
111
+ free (t);
112
+ return NULL;
113
+ }
114
+
115
+ int
116
+ tail_close (Tail *t)
117
+ {
118
+ int ret;
119
+ TrieIndex i;
120
+
121
+ if (0 != (ret = tail_save (t)))
122
+ return ret;
123
+ if (0 != (ret = fclose (t->file)))
124
+ return ret;
125
+ if (t->tails) {
126
+ for (i = 0; i < t->num_tails; i++)
127
+ if (t->tails[i].suffix)
128
+ free (t->tails[i].suffix);
129
+ free (t->tails);
130
+ }
131
+ free (t);
132
+
133
+ return 0;
134
+ }
135
+
136
+ int
137
+ tail_save (Tail *t)
138
+ {
139
+ TrieIndex i;
140
+
141
+ if (!t->is_dirty)
142
+ return 0;
143
+
144
+ rewind (t->file);
145
+ if (!file_write_int16 (t->file, TAIL_SIGNATURE) ||
146
+ !file_write_int16 (t->file, t->first_free) ||
147
+ !file_write_int16 (t->file, t->num_tails))
148
+ {
149
+ return -1;
150
+ }
151
+ for (i = 0; i < t->num_tails; i++) {
152
+ int8 length;
153
+
154
+ if (!file_write_int16 (t->file, t->tails[i].next_free) ||
155
+ !file_write_int16 (t->file, t->tails[i].data))
156
+ {
157
+ return -1;
158
+ }
159
+
160
+ length = t->tails[i].suffix ? strlen ((const char *)t->tails[i].suffix)
161
+ : 0;
162
+ if (!file_write_int8 (t->file, length))
163
+ return -1;
164
+ if (length > 0 &&
165
+ !file_write_chars (t->file, (char *)t->tails[i].suffix, length))
166
+ {
167
+ return -1;
168
+ }
169
+ }
170
+ t->is_dirty = FALSE;
171
+
172
+ return 0;
173
+ }
174
+
175
+
176
+ const TrieChar *
177
+ tail_get_suffix (const Tail *t, TrieIndex index)
178
+ {
179
+ index -= TAIL_START_BLOCKNO;
180
+ return (index < t->num_tails) ? t->tails[index].suffix : NULL;
181
+ }
182
+
183
+ Bool
184
+ tail_set_suffix (Tail *t, TrieIndex index, const TrieChar *suffix)
185
+ {
186
+ index -= TAIL_START_BLOCKNO;
187
+ if (index < t->num_tails) {
188
+ /* suffix and t->tails[index].suffix may overlap;
189
+ * so, dup it before it's overwritten
190
+ */
191
+ TrieChar *tmp = NULL;
192
+ if (suffix)
193
+ tmp = strdup (suffix);
194
+ if (t->tails[index].suffix)
195
+ free (t->tails[index].suffix);
196
+ t->tails[index].suffix = tmp;
197
+
198
+ t->is_dirty = TRUE;
199
+ return TRUE;
200
+ }
201
+ return FALSE;
202
+ }
203
+
204
+ TrieIndex
205
+ tail_add_suffix (Tail *t, const TrieChar *suffix)
206
+ {
207
+ TrieIndex new_block;
208
+
209
+ new_block = tail_alloc_block (t);
210
+ tail_set_suffix (t, new_block, suffix);
211
+
212
+ return new_block;
213
+ }
214
+
215
+ static TrieIndex
216
+ tail_alloc_block (Tail *t)
217
+ {
218
+ TrieIndex block;
219
+
220
+ if (0 != t->first_free) {
221
+ block = t->first_free;
222
+ t->first_free = t->tails[block].next_free;
223
+ } else {
224
+ block = t->num_tails;
225
+ t->tails = (TailBlock *) realloc (t->tails,
226
+ ++t->num_tails * sizeof (TailBlock));
227
+ }
228
+ t->tails[block].next_free = -1;
229
+ t->tails[block].data = TRIE_DATA_ERROR;
230
+ t->tails[block].suffix = NULL;
231
+
232
+ return block + TAIL_START_BLOCKNO;
233
+ }
234
+
235
+ static void
236
+ tail_free_block (Tail *t, TrieIndex block)
237
+ {
238
+ TrieIndex i, j;
239
+
240
+ block -= TAIL_START_BLOCKNO;
241
+
242
+ if (block >= t->num_tails)
243
+ return;
244
+
245
+ t->tails[block].data = TRIE_DATA_ERROR;
246
+ if (NULL != t->tails[block].suffix) {
247
+ free (t->tails[block].suffix);
248
+ t->tails[block].suffix = NULL;
249
+ }
250
+
251
+ /* find insertion point */
252
+ j = 0;
253
+ for (i = t->first_free; i != 0 && i < block; i = t->tails[i].next_free)
254
+ j = i;
255
+
256
+ /* insert free block between j and i */
257
+ t->tails[block].next_free = i;
258
+ if (0 != j)
259
+ t->tails[j].next_free = block;
260
+ else
261
+ t->first_free = block;
262
+
263
+ t->is_dirty = TRUE;
264
+ }
265
+
266
+ TrieData
267
+ tail_get_data (Tail *t, TrieIndex index)
268
+ {
269
+ index -= TAIL_START_BLOCKNO;
270
+ return (index < t->num_tails) ? t->tails[index].data : TRIE_DATA_ERROR;
271
+ }
272
+
273
+ Bool
274
+ tail_set_data (Tail *t, TrieIndex index, TrieData data)
275
+ {
276
+ index -= TAIL_START_BLOCKNO;
277
+ if (index < t->num_tails) {
278
+ t->tails[index].data = data;
279
+ t->is_dirty = TRUE;
280
+ return TRUE;
281
+ }
282
+ return FALSE;
283
+ }
284
+
285
+ void
286
+ tail_delete (Tail *t, TrieIndex index)
287
+ {
288
+ tail_free_block (t, index);
289
+ }
290
+
291
+ int
292
+ tail_walk_str (Tail *t,
293
+ TrieIndex s,
294
+ short *suffix_idx,
295
+ const TrieChar *str,
296
+ int len)
297
+ {
298
+ const TrieChar *suffix;
299
+ int i;
300
+ short j;
301
+
302
+ suffix = tail_get_suffix (t, s);
303
+ if (!suffix)
304
+ return FALSE;
305
+
306
+ i = 0; j = *suffix_idx;
307
+ while (i < len) {
308
+ if (str[i] != suffix[j])
309
+ break;
310
+ ++i;
311
+ /* stop and stay at null-terminator */
312
+ if (0 == suffix[j])
313
+ break;
314
+ ++j;
315
+ }
316
+ *suffix_idx = j;
317
+ return i;
318
+ }
319
+
320
+ Bool
321
+ tail_walk_char (Tail *t,
322
+ TrieIndex s,
323
+ short *suffix_idx,
324
+ TrieChar c)
325
+ {
326
+ const TrieChar *suffix;
327
+ TrieChar suffix_char;
328
+
329
+ suffix = tail_get_suffix (t, s);
330
+ if (!suffix)
331
+ return FALSE;
332
+
333
+ suffix_char = suffix[*suffix_idx];
334
+ if (suffix_char == c) {
335
+ if (0 != suffix_char)
336
+ ++*suffix_idx;
337
+ return TRUE;
338
+ }
339
+ return FALSE;
340
+ }
341
+
342
+ /*
343
+ vi:ts=4:ai:expandtab
344
+ */
@@ -0,0 +1,200 @@
1
+ /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
+ /*
3
+ * tail.h - trie tail for keeping suffixes
4
+ * Created: 2006-08-12
5
+ * Author: Theppitak Karoonboonyanan <thep@linux.thai.net>
6
+ */
7
+
8
+ #ifndef __TAIL_H
9
+ #define __TAIL_H
10
+
11
+ #include "triedefs.h"
12
+
13
+ /**
14
+ * @file tail.h
15
+ * @brief trie tail for keeping suffixes
16
+ */
17
+
18
+ /**
19
+ * @brief Double-array structure type
20
+ */
21
+ typedef struct _Tail Tail;
22
+
23
+ /**
24
+ * @brief Open tail data from file
25
+ *
26
+ * @param path : the path that stores the tail files
27
+ * @param name : the name of the tail data (not actual file name)
28
+ * @param mode : openning mode, read or write
29
+ *
30
+ * @return a pointer to the openned tail data, NULL on failure
31
+ *
32
+ * Open a tail data of given name. Note that @a name here does not mean the
33
+ * actual file name. Rather, the file name will be inferred by the name.
34
+ */
35
+ Tail * tail_open (const char *path, const char *name, TrieIOMode mode);
36
+
37
+ /**
38
+ * @brief Close tail data
39
+ *
40
+ * @param t : the tail data
41
+ *
42
+ * @return 0 on success, non-zero on failure
43
+ *
44
+ * Close the given tail data. If @a d was openned for writing, all pending
45
+ * changes will be saved to file.
46
+ */
47
+ int tail_close (Tail *t);
48
+
49
+ /**
50
+ * @brief Save tail data
51
+ *
52
+ * @param t : the tail data
53
+ *
54
+ * @return 0 on success, non-zero on failure
55
+ *
56
+ * If @a t data was openned for writing, save all pending changes to file.
57
+ */
58
+ int tail_save (Tail *t);
59
+
60
+
61
+ /**
62
+ * @brief Get suffix
63
+ *
64
+ * @param t : the tail data
65
+ * @param index : the index of the suffix
66
+ *
67
+ * @return an allocated string of the indexed suffix.
68
+ *
69
+ * Get suffix from tail with given @a index. The returned string is allocated.
70
+ * The caller should free it with free().
71
+ */
72
+ const TrieChar * tail_get_suffix (const Tail *t, TrieIndex index);
73
+
74
+ /**
75
+ * @brief Set suffix of existing entry
76
+ *
77
+ * @param t : the tail data
78
+ * @param index : the index of the suffix
79
+ * @param suffix : the new suffix
80
+ *
81
+ * Set suffix of existing entry of given @a index in tail.
82
+ */
83
+ Bool tail_set_suffix (Tail *t, TrieIndex index, const TrieChar *suffix);
84
+
85
+ /**
86
+ * @brief Add a new suffix
87
+ *
88
+ * @param t : the tail data
89
+ * @param suffix : the new suffix
90
+ *
91
+ * @return the index of the newly added suffix.
92
+ *
93
+ * Add a new suffix entry to tail.
94
+ */
95
+ TrieIndex tail_add_suffix (Tail *t, const TrieChar *suffix);
96
+
97
+ /**
98
+ * @brief Get data associated to suffix entry
99
+ *
100
+ * @param t : the tail data
101
+ * @param index : the index of the suffix
102
+ *
103
+ * @return the data associated to the suffix entry
104
+ *
105
+ * Get data associated to suffix entry @a index in tail data.
106
+ */
107
+ TrieData tail_get_data (Tail *t, TrieIndex index);
108
+
109
+ /**
110
+ * @brief Set data associated to suffix entry
111
+ *
112
+ * @param t : the tail data
113
+ * @param index : the index of the suffix
114
+ * @param data : the data to set
115
+ *
116
+ * @return boolean indicating success
117
+ *
118
+ * Set data associated to suffix entry @a index in tail data.
119
+ */
120
+ Bool tail_set_data (Tail *t, TrieIndex index, TrieData data);
121
+
122
+ /**
123
+ * @brief Delete suffix entry
124
+ *
125
+ * @param t : the tail data
126
+ * @param index : the index of the suffix to delete
127
+ *
128
+ * Delete suffix entry from the tail data.
129
+ */
130
+ void tail_delete (Tail *t, TrieIndex index);
131
+
132
+ /**
133
+ * @brief Walk in tail with a string
134
+ *
135
+ * @param t : the tail data
136
+ * @param s : the tail data index
137
+ * @param suffix_idx : pointer to current character index in suffix
138
+ * @param str : the string to use in walking
139
+ * @param len : total characters in @a str to walk
140
+ *
141
+ * @return total number of characters successfully walked
142
+ *
143
+ * Walk in the tail data @a t at entry @a s, from given character position
144
+ * @a *suffix_idx, using @a len characters of given string @a str. On return,
145
+ * @a *suffix_idx is updated to the position after the last successful walk,
146
+ * and the function returns the total number of character succesfully walked.
147
+ */
148
+ int tail_walk_str (Tail *t,
149
+ TrieIndex s,
150
+ short *suffix_idx,
151
+ const TrieChar *str,
152
+ int len);
153
+
154
+ /**
155
+ * @brief Walk in tail with a character
156
+ *
157
+ * @param t : the tail data
158
+ * @param s : the tail data index
159
+ * @param suffix_idx : pointer to current character index in suffix
160
+ * @param c : the character to use in walking
161
+ *
162
+ * @return boolean indicating success
163
+ *
164
+ * Walk in the tail data @a t at entry @a s, from given character position
165
+ * @a *suffix_idx, using given character @a c. If the walk is successful,
166
+ * it returns TRUE, and @a *suffix_idx is updated to the next character.
167
+ * Otherwise, it returns FALSE, and @a *suffix_idx is left unchanged.
168
+ */
169
+ Bool tail_walk_char (Tail *t,
170
+ TrieIndex s,
171
+ short *suffix_idx,
172
+ TrieChar c);
173
+
174
+ /**
175
+ * @brief Test walkability in tail with a character
176
+ *
177
+ * @param t : the tail data
178
+ * @param s : the tail data index
179
+ * @param suffix_idx : current character index in suffix
180
+ * @param c : the character to test walkability
181
+ *
182
+ * @return boolean indicating walkability
183
+ *
184
+ * Test if the character @a c can be used to walk from given character
185
+ * position @a suffix_idx of entry @a s of the tail data @a t.
186
+ */
187
+ /*
188
+ Bool tail_is_walkable_char (Tail *t,
189
+ TrieIndex s,
190
+ short suffix_idx,
191
+ const TrieChar c);
192
+ */
193
+ #define tail_is_walkable_char(t,s,suffix_idx,c) \
194
+ (tail_get_suffix ((t), (s)) [suffix_idx] == (c))
195
+
196
+ #endif /* __TAIL_H */
197
+
198
+ /*
199
+ vi:ts=4:ai:expandtab
200
+ */