tyler-trie 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. data/VERSION.yml +4 -0
  2. data/ext/libdatrie/AUTHORS +1 -0
  3. data/ext/libdatrie/COPYING +510 -0
  4. data/ext/libdatrie/ChangeLog +410 -0
  5. data/ext/libdatrie/INSTALL +236 -0
  6. data/ext/libdatrie/Makefile.am +5 -0
  7. data/ext/libdatrie/Makefile.in +661 -0
  8. data/ext/libdatrie/NEWS +27 -0
  9. data/ext/libdatrie/README +32 -0
  10. data/ext/libdatrie/aclocal.m4 +7431 -0
  11. data/ext/libdatrie/config.guess +1516 -0
  12. data/ext/libdatrie/config.h.in +74 -0
  13. data/ext/libdatrie/config.sub +1626 -0
  14. data/ext/libdatrie/configure +22008 -0
  15. data/ext/libdatrie/configure.ac +71 -0
  16. data/ext/libdatrie/datrie.pc.in +11 -0
  17. data/ext/libdatrie/datrie/Makefile.am +35 -0
  18. data/ext/libdatrie/datrie/Makefile.in +522 -0
  19. data/ext/libdatrie/datrie/alpha-map.c +170 -0
  20. data/ext/libdatrie/datrie/alpha-map.h +36 -0
  21. data/ext/libdatrie/datrie/darray.c +674 -0
  22. data/ext/libdatrie/datrie/darray.h +229 -0
  23. data/ext/libdatrie/datrie/fileutils.c +151 -0
  24. data/ext/libdatrie/datrie/fileutils.h +36 -0
  25. data/ext/libdatrie/datrie/libdatrie.def +31 -0
  26. data/ext/libdatrie/datrie/sb-trie.c +331 -0
  27. data/ext/libdatrie/datrie/sb-trie.h +279 -0
  28. data/ext/libdatrie/datrie/tail.c +344 -0
  29. data/ext/libdatrie/datrie/tail.h +200 -0
  30. data/ext/libdatrie/datrie/trie-private.h +31 -0
  31. data/ext/libdatrie/datrie/trie.c +413 -0
  32. data/ext/libdatrie/datrie/trie.h +270 -0
  33. data/ext/libdatrie/datrie/triedefs.h +63 -0
  34. data/ext/libdatrie/datrie/typedefs.h +113 -0
  35. data/ext/libdatrie/depcomp +530 -0
  36. data/ext/libdatrie/doc/Doxyfile.in +244 -0
  37. data/ext/libdatrie/doc/Makefile.am +29 -0
  38. data/ext/libdatrie/doc/Makefile.in +352 -0
  39. data/ext/libdatrie/install-sh +323 -0
  40. data/ext/libdatrie/ltmain.sh +6938 -0
  41. data/ext/libdatrie/man/Makefile.am +4 -0
  42. data/ext/libdatrie/man/Makefile.in +381 -0
  43. data/ext/libdatrie/man/trietool.1 +107 -0
  44. data/ext/libdatrie/missing +360 -0
  45. data/ext/libdatrie/tools/Makefile.am +7 -0
  46. data/ext/libdatrie/tools/Makefile.in +460 -0
  47. data/ext/libdatrie/tools/trietool.c +308 -0
  48. data/ext/trie/extconf.rb +12 -0
  49. data/ext/trie/trie.c +174 -0
  50. data/lib/trie.rb +1 -0
  51. data/spec/test-trie/README +1 -0
  52. data/spec/trie_spec.rb +79 -0
  53. metadata +139 -0
@@ -0,0 +1,344 @@
1
+ /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
+ /*
3
+ * tail.c - trie tail for keeping suffixes
4
+ * Created: 2006-08-15
5
+ * Author: Theppitak Karoonboonyanan <thep@linux.thai.net>
6
+ */
7
+
8
+ #include <string.h>
9
+ #include <stdlib.h>
10
+ #include <stdio.h>
11
+
12
+ #include "tail.h"
13
+ #include "fileutils.h"
14
+
15
+ /*----------------------------------*
16
+ * INTERNAL TYPES DECLARATIONS *
17
+ *----------------------------------*/
18
+
19
+ /*-----------------------------------*
20
+ * PRIVATE METHODS DECLARATIONS *
21
+ *-----------------------------------*/
22
+
23
+ static TrieIndex tail_alloc_block (Tail *t);
24
+ static void tail_free_block (Tail *t, TrieIndex block);
25
+
26
+ /* ==================== BEGIN IMPLEMENTATION PART ==================== */
27
+
28
+ /*------------------------------------*
29
+ * INTERNAL TYPES IMPLEMENTATIONS *
30
+ *------------------------------------*/
31
+
32
+ /*------------------------------*
33
+ * PRIVATE DATA DEFINITONS *
34
+ *------------------------------*/
35
+
36
+ typedef struct {
37
+ TrieIndex next_free;
38
+ TrieData data;
39
+ TrieChar *suffix;
40
+ } TailBlock;
41
+
42
+ struct _Tail {
43
+ TrieIndex num_tails;
44
+ TailBlock *tails;
45
+ TrieIndex first_free;
46
+
47
+ FILE *file;
48
+ Bool is_dirty;
49
+ };
50
+
51
+ /*-----------------------------*
52
+ * METHODS IMPLEMENTAIONS *
53
+ *-----------------------------*/
54
+
55
+ #define TAIL_SIGNATURE 0xDFFD
56
+ #define TAIL_START_BLOCKNO 1
57
+
58
+ Tail *
59
+ tail_open (const char *path, const char *name, TrieIOMode mode)
60
+ {
61
+ Tail *t;
62
+ TrieIndex i;
63
+ uint16 sig;
64
+ long file_size;
65
+
66
+ t = (Tail *) malloc (sizeof (Tail));
67
+
68
+ t->file = file_open (path, name, ".tl", mode);
69
+ if (!t->file)
70
+ goto exit1;
71
+
72
+ file_size = file_length (t->file);
73
+ if (file_size != 0 && file_read_int16 (t->file, (int16 *) &sig)
74
+ && sig != TAIL_SIGNATURE)
75
+ {
76
+ goto exit2;
77
+ }
78
+
79
+ /* init tails data */
80
+ if (file_size == 0) {
81
+ t->first_free = 0;
82
+ t->num_tails = 0;
83
+ t->tails = NULL;
84
+ t->is_dirty = TRUE;
85
+ } else {
86
+ file_read_int16 (t->file, &t->first_free);
87
+ file_read_int16 (t->file, &t->num_tails);
88
+ t->tails = (TailBlock *) malloc (t->num_tails * sizeof (TailBlock));
89
+ if (!t->tails)
90
+ goto exit2;
91
+ for (i = 0; i < t->num_tails; i++) {
92
+ int8 length;
93
+
94
+ file_read_int16 (t->file, &t->tails[i].next_free);
95
+ file_read_int16 (t->file, &t->tails[i].data);
96
+
97
+ file_read_int8 (t->file, &length);
98
+ t->tails[i].suffix = (TrieChar *) malloc (length + 1);
99
+ if (length > 0)
100
+ file_read_chars (t->file, (char *)t->tails[i].suffix, length);
101
+ t->tails[i].suffix[length] = '\0';
102
+ }
103
+ t->is_dirty = FALSE;
104
+ }
105
+
106
+ return t;
107
+
108
+ exit2:
109
+ fclose (t->file);
110
+ exit1:
111
+ free (t);
112
+ return NULL;
113
+ }
114
+
115
+ int
116
+ tail_close (Tail *t)
117
+ {
118
+ int ret;
119
+ TrieIndex i;
120
+
121
+ if (0 != (ret = tail_save (t)))
122
+ return ret;
123
+ if (0 != (ret = fclose (t->file)))
124
+ return ret;
125
+ if (t->tails) {
126
+ for (i = 0; i < t->num_tails; i++)
127
+ if (t->tails[i].suffix)
128
+ free (t->tails[i].suffix);
129
+ free (t->tails);
130
+ }
131
+ free (t);
132
+
133
+ return 0;
134
+ }
135
+
136
+ int
137
+ tail_save (Tail *t)
138
+ {
139
+ TrieIndex i;
140
+
141
+ if (!t->is_dirty)
142
+ return 0;
143
+
144
+ rewind (t->file);
145
+ if (!file_write_int16 (t->file, TAIL_SIGNATURE) ||
146
+ !file_write_int16 (t->file, t->first_free) ||
147
+ !file_write_int16 (t->file, t->num_tails))
148
+ {
149
+ return -1;
150
+ }
151
+ for (i = 0; i < t->num_tails; i++) {
152
+ int8 length;
153
+
154
+ if (!file_write_int16 (t->file, t->tails[i].next_free) ||
155
+ !file_write_int16 (t->file, t->tails[i].data))
156
+ {
157
+ return -1;
158
+ }
159
+
160
+ length = t->tails[i].suffix ? strlen ((const char *)t->tails[i].suffix)
161
+ : 0;
162
+ if (!file_write_int8 (t->file, length))
163
+ return -1;
164
+ if (length > 0 &&
165
+ !file_write_chars (t->file, (char *)t->tails[i].suffix, length))
166
+ {
167
+ return -1;
168
+ }
169
+ }
170
+ t->is_dirty = FALSE;
171
+
172
+ return 0;
173
+ }
174
+
175
+
176
+ const TrieChar *
177
+ tail_get_suffix (const Tail *t, TrieIndex index)
178
+ {
179
+ index -= TAIL_START_BLOCKNO;
180
+ return (index < t->num_tails) ? t->tails[index].suffix : NULL;
181
+ }
182
+
183
+ Bool
184
+ tail_set_suffix (Tail *t, TrieIndex index, const TrieChar *suffix)
185
+ {
186
+ index -= TAIL_START_BLOCKNO;
187
+ if (index < t->num_tails) {
188
+ /* suffix and t->tails[index].suffix may overlap;
189
+ * so, dup it before it's overwritten
190
+ */
191
+ TrieChar *tmp = NULL;
192
+ if (suffix)
193
+ tmp = strdup (suffix);
194
+ if (t->tails[index].suffix)
195
+ free (t->tails[index].suffix);
196
+ t->tails[index].suffix = tmp;
197
+
198
+ t->is_dirty = TRUE;
199
+ return TRUE;
200
+ }
201
+ return FALSE;
202
+ }
203
+
204
+ TrieIndex
205
+ tail_add_suffix (Tail *t, const TrieChar *suffix)
206
+ {
207
+ TrieIndex new_block;
208
+
209
+ new_block = tail_alloc_block (t);
210
+ tail_set_suffix (t, new_block, suffix);
211
+
212
+ return new_block;
213
+ }
214
+
215
+ static TrieIndex
216
+ tail_alloc_block (Tail *t)
217
+ {
218
+ TrieIndex block;
219
+
220
+ if (0 != t->first_free) {
221
+ block = t->first_free;
222
+ t->first_free = t->tails[block].next_free;
223
+ } else {
224
+ block = t->num_tails;
225
+ t->tails = (TailBlock *) realloc (t->tails,
226
+ ++t->num_tails * sizeof (TailBlock));
227
+ }
228
+ t->tails[block].next_free = -1;
229
+ t->tails[block].data = TRIE_DATA_ERROR;
230
+ t->tails[block].suffix = NULL;
231
+
232
+ return block + TAIL_START_BLOCKNO;
233
+ }
234
+
235
+ static void
236
+ tail_free_block (Tail *t, TrieIndex block)
237
+ {
238
+ TrieIndex i, j;
239
+
240
+ block -= TAIL_START_BLOCKNO;
241
+
242
+ if (block >= t->num_tails)
243
+ return;
244
+
245
+ t->tails[block].data = TRIE_DATA_ERROR;
246
+ if (NULL != t->tails[block].suffix) {
247
+ free (t->tails[block].suffix);
248
+ t->tails[block].suffix = NULL;
249
+ }
250
+
251
+ /* find insertion point */
252
+ j = 0;
253
+ for (i = t->first_free; i != 0 && i < block; i = t->tails[i].next_free)
254
+ j = i;
255
+
256
+ /* insert free block between j and i */
257
+ t->tails[block].next_free = i;
258
+ if (0 != j)
259
+ t->tails[j].next_free = block;
260
+ else
261
+ t->first_free = block;
262
+
263
+ t->is_dirty = TRUE;
264
+ }
265
+
266
+ TrieData
267
+ tail_get_data (Tail *t, TrieIndex index)
268
+ {
269
+ index -= TAIL_START_BLOCKNO;
270
+ return (index < t->num_tails) ? t->tails[index].data : TRIE_DATA_ERROR;
271
+ }
272
+
273
+ Bool
274
+ tail_set_data (Tail *t, TrieIndex index, TrieData data)
275
+ {
276
+ index -= TAIL_START_BLOCKNO;
277
+ if (index < t->num_tails) {
278
+ t->tails[index].data = data;
279
+ t->is_dirty = TRUE;
280
+ return TRUE;
281
+ }
282
+ return FALSE;
283
+ }
284
+
285
+ void
286
+ tail_delete (Tail *t, TrieIndex index)
287
+ {
288
+ tail_free_block (t, index);
289
+ }
290
+
291
+ int
292
+ tail_walk_str (Tail *t,
293
+ TrieIndex s,
294
+ short *suffix_idx,
295
+ const TrieChar *str,
296
+ int len)
297
+ {
298
+ const TrieChar *suffix;
299
+ int i;
300
+ short j;
301
+
302
+ suffix = tail_get_suffix (t, s);
303
+ if (!suffix)
304
+ return FALSE;
305
+
306
+ i = 0; j = *suffix_idx;
307
+ while (i < len) {
308
+ if (str[i] != suffix[j])
309
+ break;
310
+ ++i;
311
+ /* stop and stay at null-terminator */
312
+ if (0 == suffix[j])
313
+ break;
314
+ ++j;
315
+ }
316
+ *suffix_idx = j;
317
+ return i;
318
+ }
319
+
320
+ Bool
321
+ tail_walk_char (Tail *t,
322
+ TrieIndex s,
323
+ short *suffix_idx,
324
+ TrieChar c)
325
+ {
326
+ const TrieChar *suffix;
327
+ TrieChar suffix_char;
328
+
329
+ suffix = tail_get_suffix (t, s);
330
+ if (!suffix)
331
+ return FALSE;
332
+
333
+ suffix_char = suffix[*suffix_idx];
334
+ if (suffix_char == c) {
335
+ if (0 != suffix_char)
336
+ ++*suffix_idx;
337
+ return TRUE;
338
+ }
339
+ return FALSE;
340
+ }
341
+
342
+ /*
343
+ vi:ts=4:ai:expandtab
344
+ */
@@ -0,0 +1,200 @@
1
+ /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
+ /*
3
+ * tail.h - trie tail for keeping suffixes
4
+ * Created: 2006-08-12
5
+ * Author: Theppitak Karoonboonyanan <thep@linux.thai.net>
6
+ */
7
+
8
+ #ifndef __TAIL_H
9
+ #define __TAIL_H
10
+
11
+ #include "triedefs.h"
12
+
13
+ /**
14
+ * @file tail.h
15
+ * @brief trie tail for keeping suffixes
16
+ */
17
+
18
+ /**
19
+ * @brief Double-array structure type
20
+ */
21
+ typedef struct _Tail Tail;
22
+
23
+ /**
24
+ * @brief Open tail data from file
25
+ *
26
+ * @param path : the path that stores the tail files
27
+ * @param name : the name of the tail data (not actual file name)
28
+ * @param mode : openning mode, read or write
29
+ *
30
+ * @return a pointer to the openned tail data, NULL on failure
31
+ *
32
+ * Open a tail data of given name. Note that @a name here does not mean the
33
+ * actual file name. Rather, the file name will be inferred by the name.
34
+ */
35
+ Tail * tail_open (const char *path, const char *name, TrieIOMode mode);
36
+
37
+ /**
38
+ * @brief Close tail data
39
+ *
40
+ * @param t : the tail data
41
+ *
42
+ * @return 0 on success, non-zero on failure
43
+ *
44
+ * Close the given tail data. If @a d was openned for writing, all pending
45
+ * changes will be saved to file.
46
+ */
47
+ int tail_close (Tail *t);
48
+
49
+ /**
50
+ * @brief Save tail data
51
+ *
52
+ * @param t : the tail data
53
+ *
54
+ * @return 0 on success, non-zero on failure
55
+ *
56
+ * If @a t data was openned for writing, save all pending changes to file.
57
+ */
58
+ int tail_save (Tail *t);
59
+
60
+
61
+ /**
62
+ * @brief Get suffix
63
+ *
64
+ * @param t : the tail data
65
+ * @param index : the index of the suffix
66
+ *
67
+ * @return an allocated string of the indexed suffix.
68
+ *
69
+ * Get suffix from tail with given @a index. The returned string is allocated.
70
+ * The caller should free it with free().
71
+ */
72
+ const TrieChar * tail_get_suffix (const Tail *t, TrieIndex index);
73
+
74
+ /**
75
+ * @brief Set suffix of existing entry
76
+ *
77
+ * @param t : the tail data
78
+ * @param index : the index of the suffix
79
+ * @param suffix : the new suffix
80
+ *
81
+ * Set suffix of existing entry of given @a index in tail.
82
+ */
83
+ Bool tail_set_suffix (Tail *t, TrieIndex index, const TrieChar *suffix);
84
+
85
+ /**
86
+ * @brief Add a new suffix
87
+ *
88
+ * @param t : the tail data
89
+ * @param suffix : the new suffix
90
+ *
91
+ * @return the index of the newly added suffix.
92
+ *
93
+ * Add a new suffix entry to tail.
94
+ */
95
+ TrieIndex tail_add_suffix (Tail *t, const TrieChar *suffix);
96
+
97
+ /**
98
+ * @brief Get data associated to suffix entry
99
+ *
100
+ * @param t : the tail data
101
+ * @param index : the index of the suffix
102
+ *
103
+ * @return the data associated to the suffix entry
104
+ *
105
+ * Get data associated to suffix entry @a index in tail data.
106
+ */
107
+ TrieData tail_get_data (Tail *t, TrieIndex index);
108
+
109
+ /**
110
+ * @brief Set data associated to suffix entry
111
+ *
112
+ * @param t : the tail data
113
+ * @param index : the index of the suffix
114
+ * @param data : the data to set
115
+ *
116
+ * @return boolean indicating success
117
+ *
118
+ * Set data associated to suffix entry @a index in tail data.
119
+ */
120
+ Bool tail_set_data (Tail *t, TrieIndex index, TrieData data);
121
+
122
+ /**
123
+ * @brief Delete suffix entry
124
+ *
125
+ * @param t : the tail data
126
+ * @param index : the index of the suffix to delete
127
+ *
128
+ * Delete suffix entry from the tail data.
129
+ */
130
+ void tail_delete (Tail *t, TrieIndex index);
131
+
132
+ /**
133
+ * @brief Walk in tail with a string
134
+ *
135
+ * @param t : the tail data
136
+ * @param s : the tail data index
137
+ * @param suffix_idx : pointer to current character index in suffix
138
+ * @param str : the string to use in walking
139
+ * @param len : total characters in @a str to walk
140
+ *
141
+ * @return total number of characters successfully walked
142
+ *
143
+ * Walk in the tail data @a t at entry @a s, from given character position
144
+ * @a *suffix_idx, using @a len characters of given string @a str. On return,
145
+ * @a *suffix_idx is updated to the position after the last successful walk,
146
+ * and the function returns the total number of character succesfully walked.
147
+ */
148
+ int tail_walk_str (Tail *t,
149
+ TrieIndex s,
150
+ short *suffix_idx,
151
+ const TrieChar *str,
152
+ int len);
153
+
154
+ /**
155
+ * @brief Walk in tail with a character
156
+ *
157
+ * @param t : the tail data
158
+ * @param s : the tail data index
159
+ * @param suffix_idx : pointer to current character index in suffix
160
+ * @param c : the character to use in walking
161
+ *
162
+ * @return boolean indicating success
163
+ *
164
+ * Walk in the tail data @a t at entry @a s, from given character position
165
+ * @a *suffix_idx, using given character @a c. If the walk is successful,
166
+ * it returns TRUE, and @a *suffix_idx is updated to the next character.
167
+ * Otherwise, it returns FALSE, and @a *suffix_idx is left unchanged.
168
+ */
169
+ Bool tail_walk_char (Tail *t,
170
+ TrieIndex s,
171
+ short *suffix_idx,
172
+ TrieChar c);
173
+
174
+ /**
175
+ * @brief Test walkability in tail with a character
176
+ *
177
+ * @param t : the tail data
178
+ * @param s : the tail data index
179
+ * @param suffix_idx : current character index in suffix
180
+ * @param c : the character to test walkability
181
+ *
182
+ * @return boolean indicating walkability
183
+ *
184
+ * Test if the character @a c can be used to walk from given character
185
+ * position @a suffix_idx of entry @a s of the tail data @a t.
186
+ */
187
+ /*
188
+ Bool tail_is_walkable_char (Tail *t,
189
+ TrieIndex s,
190
+ short suffix_idx,
191
+ const TrieChar c);
192
+ */
193
+ #define tail_is_walkable_char(t,s,suffix_idx,c) \
194
+ (tail_get_suffix ((t), (s)) [suffix_idx] == (c))
195
+
196
+ #endif /* __TAIL_H */
197
+
198
+ /*
199
+ vi:ts=4:ai:expandtab
200
+ */