middlemac 3.1.0 → 3.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/ext/trie/tail.h ADDED
@@ -0,0 +1,207 @@
1
+ /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
+ /*
3
+ * tail.h - trie tail for keeping suffixes
4
+ * Created: 2006-08-12
5
+ * Author: Theppitak Karoonboonyanan <thep@linux.thai.net>
6
+ */
7
+
8
+ #ifndef __TAIL_H
9
+ #define __TAIL_H
10
+
11
+ #include "triedefs.h"
12
+
13
+ /**
14
+ * @file tail.h
15
+ * @brief trie tail for keeping suffixes
16
+ */
17
+
18
+ /**
19
+ * @brief Double-array structure type
20
+ */
21
+ typedef struct _Tail Tail;
22
+
23
+ /**
24
+ * @brief Create a new tail object
25
+ *
26
+ * Create a new empty tail object.
27
+ */
28
+ Tail * tail_new ();
29
+
30
+ /**
31
+ * @brief Read tail data from file
32
+ *
33
+ * @param file : the file to read
34
+ *
35
+ * @return a pointer to the openned tail data, NULL on failure
36
+ *
37
+ * Read tail data from the opened file, starting from the current
38
+ * file pointer until the end of tail data block. On return, the
39
+ * file pointer is left at the position after the read block.
40
+ */
41
+ Tail * tail_read (FILE *file);
42
+
43
+ /**
44
+ * @brief Free tail data
45
+ *
46
+ * @param t : the tail data
47
+ *
48
+ * @return 0 on success, non-zero on failure
49
+ *
50
+ * Free the given tail data.
51
+ */
52
+ void tail_free (Tail *t);
53
+
54
+ /**
55
+ * @brief Write tail data
56
+ *
57
+ * @param t : the tail data
58
+ * @param file : the file to write to
59
+ *
60
+ * @return 0 on success, non-zero on failure
61
+ *
62
+ * Write tail data to the given @a file, starting from the current file
63
+ * pointer. On return, the file pointer is left after the tail data block.
64
+ */
65
+ int tail_write (const Tail *t, FILE *file);
66
+
67
+
68
+ /**
69
+ * @brief Get suffix
70
+ *
71
+ * @param t : the tail data
72
+ * @param index : the index of the suffix
73
+ *
74
+ * @return an allocated string of the indexed suffix.
75
+ *
76
+ * Get suffix from tail with given @a index. The returned string is allocated.
77
+ * The caller should free it with free().
78
+ */
79
+ const TrieChar * tail_get_suffix (const Tail *t, TrieIndex index);
80
+
81
+ /**
82
+ * @brief Set suffix of existing entry
83
+ *
84
+ * @param t : the tail data
85
+ * @param index : the index of the suffix
86
+ * @param suffix : the new suffix
87
+ *
88
+ * Set suffix of existing entry of given @a index in tail.
89
+ */
90
+ Bool tail_set_suffix (Tail *t, TrieIndex index, const TrieChar *suffix);
91
+
92
+ /**
93
+ * @brief Add a new suffix
94
+ *
95
+ * @param t : the tail data
96
+ * @param suffix : the new suffix
97
+ *
98
+ * @return the index of the newly added suffix.
99
+ *
100
+ * Add a new suffix entry to tail.
101
+ */
102
+ TrieIndex tail_add_suffix (Tail *t, const TrieChar *suffix);
103
+
104
+ /**
105
+ * @brief Get data associated to suffix entry
106
+ *
107
+ * @param t : the tail data
108
+ * @param index : the index of the suffix
109
+ *
110
+ * @return the data associated to the suffix entry
111
+ *
112
+ * Get data associated to suffix entry @a index in tail data.
113
+ */
114
+ TrieData tail_get_data (const Tail *t, TrieIndex index);
115
+
116
+ /**
117
+ * @brief Set data associated to suffix entry
118
+ *
119
+ * @param t : the tail data
120
+ * @param index : the index of the suffix
121
+ * @param data : the data to set
122
+ *
123
+ * @return boolean indicating success
124
+ *
125
+ * Set data associated to suffix entry @a index in tail data.
126
+ */
127
+ Bool tail_set_data (Tail *t, TrieIndex index, TrieData data);
128
+
129
+ /**
130
+ * @brief Delete suffix entry
131
+ *
132
+ * @param t : the tail data
133
+ * @param index : the index of the suffix to delete
134
+ *
135
+ * Delete suffix entry from the tail data.
136
+ */
137
+ void tail_delete (Tail *t, TrieIndex index);
138
+
139
+ /**
140
+ * @brief Walk in tail with a string
141
+ *
142
+ * @param t : the tail data
143
+ * @param s : the tail data index
144
+ * @param suffix_idx : pointer to current character index in suffix
145
+ * @param str : the string to use in walking
146
+ * @param len : total characters in @a str to walk
147
+ *
148
+ * @return total number of characters successfully walked
149
+ *
150
+ * Walk in the tail data @a t at entry @a s, from given character position
151
+ * @a *suffix_idx, using @a len characters of given string @a str. On return,
152
+ * @a *suffix_idx is updated to the position after the last successful walk,
153
+ * and the function returns the total number of character succesfully walked.
154
+ */
155
+ int tail_walk_str (const Tail *t,
156
+ TrieIndex s,
157
+ short *suffix_idx,
158
+ const TrieChar *str,
159
+ int len);
160
+
161
+ /**
162
+ * @brief Walk in tail with a character
163
+ *
164
+ * @param t : the tail data
165
+ * @param s : the tail data index
166
+ * @param suffix_idx : pointer to current character index in suffix
167
+ * @param c : the character to use in walking
168
+ *
169
+ * @return boolean indicating success
170
+ *
171
+ * Walk in the tail data @a t at entry @a s, from given character position
172
+ * @a *suffix_idx, using given character @a c. If the walk is successful,
173
+ * it returns TRUE, and @a *suffix_idx is updated to the next character.
174
+ * Otherwise, it returns FALSE, and @a *suffix_idx is left unchanged.
175
+ */
176
+ Bool tail_walk_char (const Tail *t,
177
+ TrieIndex s,
178
+ short *suffix_idx,
179
+ TrieChar c);
180
+
181
+ /**
182
+ * @brief Test walkability in tail with a character
183
+ *
184
+ * @param t : the tail data
185
+ * @param s : the tail data index
186
+ * @param suffix_idx : current character index in suffix
187
+ * @param c : the character to test walkability
188
+ *
189
+ * @return boolean indicating walkability
190
+ *
191
+ * Test if the character @a c can be used to walk from given character
192
+ * position @a suffix_idx of entry @a s of the tail data @a t.
193
+ */
194
+ /*
195
+ Bool tail_is_walkable_char (Tail *t,
196
+ TrieIndex s,
197
+ short suffix_idx,
198
+ const TrieChar c);
199
+ */
200
+ #define tail_is_walkable_char(t,s,suffix_idx,c) \
201
+ (tail_get_suffix ((t), (s)) [suffix_idx] == (c))
202
+
203
+ #endif /* __TAIL_H */
204
+
205
+ /*
206
+ vi:ts=4:ai:expandtab
207
+ */
@@ -0,0 +1,299 @@
1
+ #include <stdio.h>
2
+ #include <stdlib.h>
3
+ #include <string.h>
4
+ #include "darray.h"
5
+ #include "tail.h"
6
+ #include "trie.h"
7
+
8
+ Trie* trie_new() {
9
+ Trie *trie = (Trie*) malloc(sizeof(Trie));
10
+ trie->da = da_new();
11
+ trie->tail = tail_new();
12
+ return trie;
13
+ }
14
+
15
+ void trie_free(Trie *trie) {
16
+ da_free(trie->da);
17
+ tail_free(trie->tail);
18
+ free(trie);
19
+ }
20
+
21
+ static Bool trie_branch_in_branch (Trie *trie, TrieIndex sep_node, const TrieChar *suffix, TrieData data) {
22
+ TrieIndex new_da, new_tail;
23
+
24
+ new_da = da_insert_branch (trie->da, sep_node, *suffix);
25
+ if (TRIE_INDEX_ERROR == new_da)
26
+ return FALSE;
27
+
28
+ if ('\0' != *suffix)
29
+ ++suffix;
30
+
31
+ new_tail = tail_add_suffix (trie->tail, suffix);
32
+ tail_set_data (trie->tail, new_tail, data);
33
+ trie_da_set_tail_index (trie->da, new_da, new_tail);
34
+
35
+ // trie->is_dirty = TRUE;
36
+ return TRUE;
37
+ }
38
+
39
+ static Bool trie_branch_in_tail(Trie *trie, TrieIndex sep_node, const TrieChar *suffix, TrieData data) {
40
+ TrieIndex old_tail, old_da, s;
41
+ const TrieChar *old_suffix, *p;
42
+
43
+ /* adjust separate point in old path */
44
+ old_tail = trie_da_get_tail_index (trie->da, sep_node);
45
+ old_suffix = tail_get_suffix (trie->tail, old_tail);
46
+ if (!old_suffix)
47
+ return FALSE;
48
+
49
+ for (p = old_suffix, s = sep_node; *p == *suffix; p++, suffix++) {
50
+ TrieIndex t = da_insert_branch (trie->da, s, *p);
51
+ if (TRIE_INDEX_ERROR == t)
52
+ goto fail;
53
+ s = t;
54
+ }
55
+
56
+ old_da = da_insert_branch (trie->da, s, *p);
57
+ if (TRIE_INDEX_ERROR == old_da)
58
+ goto fail;
59
+
60
+ if ('\0' != *p)
61
+ ++p;
62
+ tail_set_suffix (trie->tail, old_tail, p);
63
+ trie_da_set_tail_index (trie->da, old_da, old_tail);
64
+
65
+ /* insert the new branch at the new separate point */
66
+ return trie_branch_in_branch (trie, s, suffix, data);
67
+
68
+ fail:
69
+ /* failed, undo previous insertions and return error */
70
+ da_prune_upto (trie->da, sep_node, s);
71
+ trie_da_set_tail_index (trie->da, sep_node, old_tail);
72
+ return FALSE;
73
+ }
74
+
75
+ Bool trie_store (Trie *trie, const TrieChar *key, TrieData data) {
76
+ TrieIndex s, t;
77
+ short suffix_idx;
78
+ const TrieChar *p, *sep;
79
+ size_t len;
80
+
81
+ /* walk through branches */
82
+ s = da_get_root (trie->da);
83
+ for (p = key; !trie_da_is_separate (trie->da, s); p++) {
84
+ if (!da_walk (trie->da, &s, *p))
85
+ return trie_branch_in_branch (trie, s, p, data);
86
+ if (0 == *p)
87
+ break;
88
+ }
89
+
90
+ /* walk through tail */
91
+ sep = p;
92
+ t = trie_da_get_tail_index (trie->da, s);
93
+ suffix_idx = 0;
94
+ len = strlen ((const char *) p) + 1; /* including null-terminator */
95
+ if (tail_walk_str (trie->tail, t, &suffix_idx, p, len) != len)
96
+ return trie_branch_in_tail (trie, s, p, data);
97
+
98
+ /* duplicated key, overwrite val */
99
+ tail_set_data (trie->tail, t, data);
100
+ // trie->is_dirty = TRUE;
101
+ return TRUE;
102
+ }
103
+
104
+
105
+ Bool trie_has_key (const Trie *trie, const TrieChar *key) {
106
+ TrieIndex s;
107
+ short suffix_idx;
108
+ const TrieChar *p;
109
+
110
+ /* walk through branches */
111
+ s = da_get_root (trie->da);
112
+ for (p = key; !trie_da_is_separate (trie->da, s); p++) {
113
+ if (!da_walk (trie->da, &s, *p))
114
+ return FALSE;
115
+ if (0 == *p)
116
+ break;
117
+ }
118
+
119
+ /* walk through tail */
120
+ s = trie_da_get_tail_index (trie->da, s);
121
+ suffix_idx = 0;
122
+ for ( ; ; p++) {
123
+ if (!tail_walk_char (trie->tail, s, &suffix_idx, *p))
124
+ return FALSE;
125
+ if (0 == *p)
126
+ break;
127
+ }
128
+
129
+ return TRUE;
130
+ }
131
+
132
+
133
+ Bool trie_retrieve (const Trie *trie, const TrieChar *key, TrieData *o_data) {
134
+ TrieIndex s;
135
+ short suffix_idx;
136
+ const TrieChar *p;
137
+
138
+ /* walk through branches */
139
+ s = da_get_root (trie->da);
140
+ for (p = key; !trie_da_is_separate (trie->da, s); p++) {
141
+ if (!da_walk (trie->da, &s, *p))
142
+ return FALSE;
143
+ if (0 == *p)
144
+ break;
145
+ }
146
+
147
+ /* walk through tail */
148
+ s = trie_da_get_tail_index (trie->da, s);
149
+ suffix_idx = 0;
150
+ for ( ; ; p++) {
151
+ if (!tail_walk_char (trie->tail, s, &suffix_idx, *p))
152
+ return FALSE;
153
+ if (0 == *p)
154
+ break;
155
+ }
156
+
157
+ /* found, set the val and return */
158
+ if (o_data)
159
+ *o_data = tail_get_data (trie->tail, s);
160
+ return TRUE;
161
+ }
162
+
163
+ Bool trie_delete (Trie *trie, const TrieChar *key) {
164
+ TrieIndex s, t;
165
+ short suffix_idx;
166
+ const TrieChar *p;
167
+
168
+ /* walk through branches */
169
+ s = da_get_root (trie->da);
170
+ for (p = key; !trie_da_is_separate (trie->da, s); p++) {
171
+ if (!da_walk (trie->da, &s, *p))
172
+ return FALSE;
173
+ if (0 == *p)
174
+ break;
175
+ }
176
+
177
+ /* walk through tail */
178
+ t = trie_da_get_tail_index (trie->da, s);
179
+ suffix_idx = 0;
180
+ for ( ; ; p++) {
181
+ if (!tail_walk_char (trie->tail, t, &suffix_idx, *p))
182
+ return FALSE;
183
+ if (0 == *p)
184
+ break;
185
+ }
186
+
187
+ tail_delete (trie->tail, t);
188
+ da_set_base (trie->da, s, TRIE_INDEX_ERROR);
189
+ da_prune (trie->da, s);
190
+
191
+ //trie->is_dirty = TRUE;
192
+ return TRUE;
193
+ }
194
+
195
+ /*-------------------------------*
196
+ * STEPWISE QUERY OPERATIONS *
197
+ *-------------------------------*/
198
+
199
+ TrieState * trie_root (const Trie *trie) {
200
+ return trie_state_new (trie, da_get_root (trie->da), 0, FALSE);
201
+ }
202
+
203
+ /*----------------*
204
+ * TRIE STATE *
205
+ *----------------*/
206
+
207
+ static TrieState * trie_state_new (const Trie *trie, TrieIndex index, short suffix_idx, short is_suffix) {
208
+ TrieState *s;
209
+
210
+ s = (TrieState *) malloc (sizeof (TrieState));
211
+ if (!s)
212
+ return NULL;
213
+
214
+ s->trie = trie;
215
+ s->index = index;
216
+ s->suffix_idx = suffix_idx;
217
+ s->is_suffix = is_suffix;
218
+
219
+ return s;
220
+ }
221
+
222
+ TrieState * trie_state_clone (const TrieState *s) {
223
+ return trie_state_new (s->trie, s->index, s->suffix_idx, s->is_suffix);
224
+ }
225
+
226
+ void trie_state_free (TrieState *s) {
227
+ free (s);
228
+ }
229
+
230
+ void trie_state_rewind (TrieState *s) {
231
+ s->index = da_get_root (s->trie->da);
232
+ s->is_suffix = FALSE;
233
+ }
234
+
235
+ Bool trie_state_walk (TrieState *s, TrieChar c) {
236
+ if (!s->is_suffix) {
237
+ Bool ret;
238
+
239
+ ret = da_walk (s->trie->da, &s->index, c);
240
+
241
+ if (ret && trie_da_is_separate (s->trie->da, s->index)) {
242
+ s->index = trie_da_get_tail_index (s->trie->da, s->index);
243
+ s->suffix_idx = 0;
244
+ s->is_suffix = TRUE;
245
+ }
246
+
247
+ return ret;
248
+ } else {
249
+ return tail_walk_char (s->trie->tail, s->index, &s->suffix_idx, c);
250
+ }
251
+ }
252
+
253
+ Bool trie_state_is_walkable (const TrieState *s, TrieChar c) {
254
+ if (!s->is_suffix)
255
+ return da_is_walkable (s->trie->da, s->index, c);
256
+ else
257
+ return tail_is_walkable_char (s->trie->tail, s->index, s->suffix_idx, c);
258
+ }
259
+
260
+ Bool trie_state_is_leaf (const TrieState *s) {
261
+ return s->is_suffix && trie_state_is_terminal (s);
262
+ }
263
+
264
+ TrieData trie_state_get_data (const TrieState *s) {
265
+ return s->is_suffix ? tail_get_data (s->trie->tail, s->index) : TRIE_DATA_ERROR;
266
+ }
267
+
268
+ int main(void) {
269
+ Bool res;
270
+ TrieData *data = (TrieData*)malloc(sizeof(TrieData));
271
+ Trie *trie = trie_new();
272
+
273
+
274
+ trie_store(trie, (const TrieChar*)"hello", 1);
275
+ trie_store(trie, (const TrieChar*)"he", 4);
276
+ trie_store(trie, (const TrieChar*)"hel", 3);
277
+ trie_store(trie, (const TrieChar*)"h", 5);
278
+ trie_store(trie, (const TrieChar*)"hell", 2);
279
+
280
+
281
+ res = trie_retrieve(trie, (const TrieChar*)"hello", data);
282
+ printf(res ? "Win!\n" : "Fail!\n");
283
+
284
+ res = trie_retrieve(trie, (const TrieChar*)"hell", data);
285
+ printf(res ? "Win!\n" : "Fail!\n");
286
+
287
+ res = trie_retrieve(trie, (const TrieChar*)"hel", data);
288
+ printf(res ? "Win!\n" : "Fail!\n");
289
+
290
+ res = trie_retrieve(trie, (const TrieChar*)"he", data);
291
+ printf(res ? "Win!\n" : "Fail!\n");
292
+
293
+ res = trie_retrieve(trie, (const TrieChar*)"h", data);
294
+ printf(res ? "Win!\n" : "Fail!\n");
295
+
296
+
297
+ trie_free(trie);
298
+ return 0;
299
+ }