middlemac 3.1.0 → 3.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/trie/tail.h ADDED
@@ -0,0 +1,207 @@
1
+ /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
+ /*
3
+ * tail.h - trie tail for keeping suffixes
4
+ * Created: 2006-08-12
5
+ * Author: Theppitak Karoonboonyanan <thep@linux.thai.net>
6
+ */
7
+
8
+ #ifndef __TAIL_H
9
+ #define __TAIL_H
10
+
11
+ #include "triedefs.h"
12
+
13
+ /**
14
+ * @file tail.h
15
+ * @brief trie tail for keeping suffixes
16
+ */
17
+
18
+ /**
19
+ * @brief Double-array structure type
20
+ */
21
+ typedef struct _Tail Tail;
22
+
23
+ /**
24
+ * @brief Create a new tail object
25
+ *
26
+ * Create a new empty tail object.
27
+ */
28
+ Tail * tail_new ();
29
+
30
+ /**
31
+ * @brief Read tail data from file
32
+ *
33
+ * @param file : the file to read
34
+ *
35
+ * @return a pointer to the openned tail data, NULL on failure
36
+ *
37
+ * Read tail data from the opened file, starting from the current
38
+ * file pointer until the end of tail data block. On return, the
39
+ * file pointer is left at the position after the read block.
40
+ */
41
+ Tail * tail_read (FILE *file);
42
+
43
+ /**
44
+ * @brief Free tail data
45
+ *
46
+ * @param t : the tail data
47
+ *
48
+ * @return 0 on success, non-zero on failure
49
+ *
50
+ * Free the given tail data.
51
+ */
52
+ void tail_free (Tail *t);
53
+
54
+ /**
55
+ * @brief Write tail data
56
+ *
57
+ * @param t : the tail data
58
+ * @param file : the file to write to
59
+ *
60
+ * @return 0 on success, non-zero on failure
61
+ *
62
+ * Write tail data to the given @a file, starting from the current file
63
+ * pointer. On return, the file pointer is left after the tail data block.
64
+ */
65
+ int tail_write (const Tail *t, FILE *file);
66
+
67
+
68
+ /**
69
+ * @brief Get suffix
70
+ *
71
+ * @param t : the tail data
72
+ * @param index : the index of the suffix
73
+ *
74
+ * @return an allocated string of the indexed suffix.
75
+ *
76
+ * Get suffix from tail with given @a index. The returned string is allocated.
77
+ * The caller should free it with free().
78
+ */
79
+ const TrieChar * tail_get_suffix (const Tail *t, TrieIndex index);
80
+
81
+ /**
82
+ * @brief Set suffix of existing entry
83
+ *
84
+ * @param t : the tail data
85
+ * @param index : the index of the suffix
86
+ * @param suffix : the new suffix
87
+ *
88
+ * Set suffix of existing entry of given @a index in tail.
89
+ */
90
+ Bool tail_set_suffix (Tail *t, TrieIndex index, const TrieChar *suffix);
91
+
92
+ /**
93
+ * @brief Add a new suffix
94
+ *
95
+ * @param t : the tail data
96
+ * @param suffix : the new suffix
97
+ *
98
+ * @return the index of the newly added suffix.
99
+ *
100
+ * Add a new suffix entry to tail.
101
+ */
102
+ TrieIndex tail_add_suffix (Tail *t, const TrieChar *suffix);
103
+
104
+ /**
105
+ * @brief Get data associated to suffix entry
106
+ *
107
+ * @param t : the tail data
108
+ * @param index : the index of the suffix
109
+ *
110
+ * @return the data associated to the suffix entry
111
+ *
112
+ * Get data associated to suffix entry @a index in tail data.
113
+ */
114
+ TrieData tail_get_data (const Tail *t, TrieIndex index);
115
+
116
+ /**
117
+ * @brief Set data associated to suffix entry
118
+ *
119
+ * @param t : the tail data
120
+ * @param index : the index of the suffix
121
+ * @param data : the data to set
122
+ *
123
+ * @return boolean indicating success
124
+ *
125
+ * Set data associated to suffix entry @a index in tail data.
126
+ */
127
+ Bool tail_set_data (Tail *t, TrieIndex index, TrieData data);
128
+
129
+ /**
130
+ * @brief Delete suffix entry
131
+ *
132
+ * @param t : the tail data
133
+ * @param index : the index of the suffix to delete
134
+ *
135
+ * Delete suffix entry from the tail data.
136
+ */
137
+ void tail_delete (Tail *t, TrieIndex index);
138
+
139
+ /**
140
+ * @brief Walk in tail with a string
141
+ *
142
+ * @param t : the tail data
143
+ * @param s : the tail data index
144
+ * @param suffix_idx : pointer to current character index in suffix
145
+ * @param str : the string to use in walking
146
+ * @param len : total characters in @a str to walk
147
+ *
148
+ * @return total number of characters successfully walked
149
+ *
150
+ * Walk in the tail data @a t at entry @a s, from given character position
151
+ * @a *suffix_idx, using @a len characters of given string @a str. On return,
152
+ * @a *suffix_idx is updated to the position after the last successful walk,
153
+ * and the function returns the total number of character succesfully walked.
154
+ */
155
+ int tail_walk_str (const Tail *t,
156
+ TrieIndex s,
157
+ short *suffix_idx,
158
+ const TrieChar *str,
159
+ int len);
160
+
161
+ /**
162
+ * @brief Walk in tail with a character
163
+ *
164
+ * @param t : the tail data
165
+ * @param s : the tail data index
166
+ * @param suffix_idx : pointer to current character index in suffix
167
+ * @param c : the character to use in walking
168
+ *
169
+ * @return boolean indicating success
170
+ *
171
+ * Walk in the tail data @a t at entry @a s, from given character position
172
+ * @a *suffix_idx, using given character @a c. If the walk is successful,
173
+ * it returns TRUE, and @a *suffix_idx is updated to the next character.
174
+ * Otherwise, it returns FALSE, and @a *suffix_idx is left unchanged.
175
+ */
176
+ Bool tail_walk_char (const Tail *t,
177
+ TrieIndex s,
178
+ short *suffix_idx,
179
+ TrieChar c);
180
+
181
+ /**
182
+ * @brief Test walkability in tail with a character
183
+ *
184
+ * @param t : the tail data
185
+ * @param s : the tail data index
186
+ * @param suffix_idx : current character index in suffix
187
+ * @param c : the character to test walkability
188
+ *
189
+ * @return boolean indicating walkability
190
+ *
191
+ * Test if the character @a c can be used to walk from given character
192
+ * position @a suffix_idx of entry @a s of the tail data @a t.
193
+ */
194
+ /*
195
+ Bool tail_is_walkable_char (Tail *t,
196
+ TrieIndex s,
197
+ short suffix_idx,
198
+ const TrieChar c);
199
+ */
200
+ #define tail_is_walkable_char(t,s,suffix_idx,c) \
201
+ (tail_get_suffix ((t), (s)) [suffix_idx] == (c))
202
+
203
+ #endif /* __TAIL_H */
204
+
205
+ /*
206
+ vi:ts=4:ai:expandtab
207
+ */
@@ -0,0 +1,299 @@
1
+ #include <stdio.h>
2
+ #include <stdlib.h>
3
+ #include <string.h>
4
+ #include "darray.h"
5
+ #include "tail.h"
6
+ #include "trie.h"
7
+
8
+ Trie* trie_new() {
9
+ Trie *trie = (Trie*) malloc(sizeof(Trie));
10
+ trie->da = da_new();
11
+ trie->tail = tail_new();
12
+ return trie;
13
+ }
14
+
15
+ void trie_free(Trie *trie) {
16
+ da_free(trie->da);
17
+ tail_free(trie->tail);
18
+ free(trie);
19
+ }
20
+
21
+ static Bool trie_branch_in_branch (Trie *trie, TrieIndex sep_node, const TrieChar *suffix, TrieData data) {
22
+ TrieIndex new_da, new_tail;
23
+
24
+ new_da = da_insert_branch (trie->da, sep_node, *suffix);
25
+ if (TRIE_INDEX_ERROR == new_da)
26
+ return FALSE;
27
+
28
+ if ('\0' != *suffix)
29
+ ++suffix;
30
+
31
+ new_tail = tail_add_suffix (trie->tail, suffix);
32
+ tail_set_data (trie->tail, new_tail, data);
33
+ trie_da_set_tail_index (trie->da, new_da, new_tail);
34
+
35
+ // trie->is_dirty = TRUE;
36
+ return TRUE;
37
+ }
38
+
39
+ static Bool trie_branch_in_tail(Trie *trie, TrieIndex sep_node, const TrieChar *suffix, TrieData data) {
40
+ TrieIndex old_tail, old_da, s;
41
+ const TrieChar *old_suffix, *p;
42
+
43
+ /* adjust separate point in old path */
44
+ old_tail = trie_da_get_tail_index (trie->da, sep_node);
45
+ old_suffix = tail_get_suffix (trie->tail, old_tail);
46
+ if (!old_suffix)
47
+ return FALSE;
48
+
49
+ for (p = old_suffix, s = sep_node; *p == *suffix; p++, suffix++) {
50
+ TrieIndex t = da_insert_branch (trie->da, s, *p);
51
+ if (TRIE_INDEX_ERROR == t)
52
+ goto fail;
53
+ s = t;
54
+ }
55
+
56
+ old_da = da_insert_branch (trie->da, s, *p);
57
+ if (TRIE_INDEX_ERROR == old_da)
58
+ goto fail;
59
+
60
+ if ('\0' != *p)
61
+ ++p;
62
+ tail_set_suffix (trie->tail, old_tail, p);
63
+ trie_da_set_tail_index (trie->da, old_da, old_tail);
64
+
65
+ /* insert the new branch at the new separate point */
66
+ return trie_branch_in_branch (trie, s, suffix, data);
67
+
68
+ fail:
69
+ /* failed, undo previous insertions and return error */
70
+ da_prune_upto (trie->da, sep_node, s);
71
+ trie_da_set_tail_index (trie->da, sep_node, old_tail);
72
+ return FALSE;
73
+ }
74
+
75
+ Bool trie_store (Trie *trie, const TrieChar *key, TrieData data) {
76
+ TrieIndex s, t;
77
+ short suffix_idx;
78
+ const TrieChar *p, *sep;
79
+ size_t len;
80
+
81
+ /* walk through branches */
82
+ s = da_get_root (trie->da);
83
+ for (p = key; !trie_da_is_separate (trie->da, s); p++) {
84
+ if (!da_walk (trie->da, &s, *p))
85
+ return trie_branch_in_branch (trie, s, p, data);
86
+ if (0 == *p)
87
+ break;
88
+ }
89
+
90
+ /* walk through tail */
91
+ sep = p;
92
+ t = trie_da_get_tail_index (trie->da, s);
93
+ suffix_idx = 0;
94
+ len = strlen ((const char *) p) + 1; /* including null-terminator */
95
+ if (tail_walk_str (trie->tail, t, &suffix_idx, p, len) != len)
96
+ return trie_branch_in_tail (trie, s, p, data);
97
+
98
+ /* duplicated key, overwrite val */
99
+ tail_set_data (trie->tail, t, data);
100
+ // trie->is_dirty = TRUE;
101
+ return TRUE;
102
+ }
103
+
104
+
105
+ Bool trie_has_key (const Trie *trie, const TrieChar *key) {
106
+ TrieIndex s;
107
+ short suffix_idx;
108
+ const TrieChar *p;
109
+
110
+ /* walk through branches */
111
+ s = da_get_root (trie->da);
112
+ for (p = key; !trie_da_is_separate (trie->da, s); p++) {
113
+ if (!da_walk (trie->da, &s, *p))
114
+ return FALSE;
115
+ if (0 == *p)
116
+ break;
117
+ }
118
+
119
+ /* walk through tail */
120
+ s = trie_da_get_tail_index (trie->da, s);
121
+ suffix_idx = 0;
122
+ for ( ; ; p++) {
123
+ if (!tail_walk_char (trie->tail, s, &suffix_idx, *p))
124
+ return FALSE;
125
+ if (0 == *p)
126
+ break;
127
+ }
128
+
129
+ return TRUE;
130
+ }
131
+
132
+
133
+ Bool trie_retrieve (const Trie *trie, const TrieChar *key, TrieData *o_data) {
134
+ TrieIndex s;
135
+ short suffix_idx;
136
+ const TrieChar *p;
137
+
138
+ /* walk through branches */
139
+ s = da_get_root (trie->da);
140
+ for (p = key; !trie_da_is_separate (trie->da, s); p++) {
141
+ if (!da_walk (trie->da, &s, *p))
142
+ return FALSE;
143
+ if (0 == *p)
144
+ break;
145
+ }
146
+
147
+ /* walk through tail */
148
+ s = trie_da_get_tail_index (trie->da, s);
149
+ suffix_idx = 0;
150
+ for ( ; ; p++) {
151
+ if (!tail_walk_char (trie->tail, s, &suffix_idx, *p))
152
+ return FALSE;
153
+ if (0 == *p)
154
+ break;
155
+ }
156
+
157
+ /* found, set the val and return */
158
+ if (o_data)
159
+ *o_data = tail_get_data (trie->tail, s);
160
+ return TRUE;
161
+ }
162
+
163
+ Bool trie_delete (Trie *trie, const TrieChar *key) {
164
+ TrieIndex s, t;
165
+ short suffix_idx;
166
+ const TrieChar *p;
167
+
168
+ /* walk through branches */
169
+ s = da_get_root (trie->da);
170
+ for (p = key; !trie_da_is_separate (trie->da, s); p++) {
171
+ if (!da_walk (trie->da, &s, *p))
172
+ return FALSE;
173
+ if (0 == *p)
174
+ break;
175
+ }
176
+
177
+ /* walk through tail */
178
+ t = trie_da_get_tail_index (trie->da, s);
179
+ suffix_idx = 0;
180
+ for ( ; ; p++) {
181
+ if (!tail_walk_char (trie->tail, t, &suffix_idx, *p))
182
+ return FALSE;
183
+ if (0 == *p)
184
+ break;
185
+ }
186
+
187
+ tail_delete (trie->tail, t);
188
+ da_set_base (trie->da, s, TRIE_INDEX_ERROR);
189
+ da_prune (trie->da, s);
190
+
191
+ //trie->is_dirty = TRUE;
192
+ return TRUE;
193
+ }
194
+
195
+ /*-------------------------------*
196
+ * STEPWISE QUERY OPERATIONS *
197
+ *-------------------------------*/
198
+
199
+ TrieState * trie_root (const Trie *trie) {
200
+ return trie_state_new (trie, da_get_root (trie->da), 0, FALSE);
201
+ }
202
+
203
+ /*----------------*
204
+ * TRIE STATE *
205
+ *----------------*/
206
+
207
+ static TrieState * trie_state_new (const Trie *trie, TrieIndex index, short suffix_idx, short is_suffix) {
208
+ TrieState *s;
209
+
210
+ s = (TrieState *) malloc (sizeof (TrieState));
211
+ if (!s)
212
+ return NULL;
213
+
214
+ s->trie = trie;
215
+ s->index = index;
216
+ s->suffix_idx = suffix_idx;
217
+ s->is_suffix = is_suffix;
218
+
219
+ return s;
220
+ }
221
+
222
+ TrieState * trie_state_clone (const TrieState *s) {
223
+ return trie_state_new (s->trie, s->index, s->suffix_idx, s->is_suffix);
224
+ }
225
+
226
+ void trie_state_free (TrieState *s) {
227
+ free (s);
228
+ }
229
+
230
+ void trie_state_rewind (TrieState *s) {
231
+ s->index = da_get_root (s->trie->da);
232
+ s->is_suffix = FALSE;
233
+ }
234
+
235
+ Bool trie_state_walk (TrieState *s, TrieChar c) {
236
+ if (!s->is_suffix) {
237
+ Bool ret;
238
+
239
+ ret = da_walk (s->trie->da, &s->index, c);
240
+
241
+ if (ret && trie_da_is_separate (s->trie->da, s->index)) {
242
+ s->index = trie_da_get_tail_index (s->trie->da, s->index);
243
+ s->suffix_idx = 0;
244
+ s->is_suffix = TRUE;
245
+ }
246
+
247
+ return ret;
248
+ } else {
249
+ return tail_walk_char (s->trie->tail, s->index, &s->suffix_idx, c);
250
+ }
251
+ }
252
+
253
+ Bool trie_state_is_walkable (const TrieState *s, TrieChar c) {
254
+ if (!s->is_suffix)
255
+ return da_is_walkable (s->trie->da, s->index, c);
256
+ else
257
+ return tail_is_walkable_char (s->trie->tail, s->index, s->suffix_idx, c);
258
+ }
259
+
260
+ Bool trie_state_is_leaf (const TrieState *s) {
261
+ return s->is_suffix && trie_state_is_terminal (s);
262
+ }
263
+
264
+ TrieData trie_state_get_data (const TrieState *s) {
265
+ return s->is_suffix ? tail_get_data (s->trie->tail, s->index) : TRIE_DATA_ERROR;
266
+ }
267
+
268
+ int main(void) {
269
+ Bool res;
270
+ TrieData *data = (TrieData*)malloc(sizeof(TrieData));
271
+ Trie *trie = trie_new();
272
+
273
+
274
+ trie_store(trie, (const TrieChar*)"hello", 1);
275
+ trie_store(trie, (const TrieChar*)"he", 4);
276
+ trie_store(trie, (const TrieChar*)"hel", 3);
277
+ trie_store(trie, (const TrieChar*)"h", 5);
278
+ trie_store(trie, (const TrieChar*)"hell", 2);
279
+
280
+
281
+ res = trie_retrieve(trie, (const TrieChar*)"hello", data);
282
+ printf(res ? "Win!\n" : "Fail!\n");
283
+
284
+ res = trie_retrieve(trie, (const TrieChar*)"hell", data);
285
+ printf(res ? "Win!\n" : "Fail!\n");
286
+
287
+ res = trie_retrieve(trie, (const TrieChar*)"hel", data);
288
+ printf(res ? "Win!\n" : "Fail!\n");
289
+
290
+ res = trie_retrieve(trie, (const TrieChar*)"he", data);
291
+ printf(res ? "Win!\n" : "Fail!\n");
292
+
293
+ res = trie_retrieve(trie, (const TrieChar*)"h", data);
294
+ printf(res ? "Win!\n" : "Fail!\n");
295
+
296
+
297
+ trie_free(trie);
298
+ return 0;
299
+ }