tyler-trie 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION.yml +4 -0
- data/ext/libdatrie/AUTHORS +1 -0
- data/ext/libdatrie/COPYING +510 -0
- data/ext/libdatrie/ChangeLog +410 -0
- data/ext/libdatrie/INSTALL +236 -0
- data/ext/libdatrie/Makefile.am +5 -0
- data/ext/libdatrie/Makefile.in +661 -0
- data/ext/libdatrie/NEWS +27 -0
- data/ext/libdatrie/README +32 -0
- data/ext/libdatrie/aclocal.m4 +7431 -0
- data/ext/libdatrie/config.guess +1516 -0
- data/ext/libdatrie/config.h.in +74 -0
- data/ext/libdatrie/config.sub +1626 -0
- data/ext/libdatrie/configure +22008 -0
- data/ext/libdatrie/configure.ac +71 -0
- data/ext/libdatrie/datrie.pc.in +11 -0
- data/ext/libdatrie/datrie/Makefile.am +35 -0
- data/ext/libdatrie/datrie/Makefile.in +522 -0
- data/ext/libdatrie/datrie/alpha-map.c +170 -0
- data/ext/libdatrie/datrie/alpha-map.h +36 -0
- data/ext/libdatrie/datrie/darray.c +674 -0
- data/ext/libdatrie/datrie/darray.h +229 -0
- data/ext/libdatrie/datrie/fileutils.c +151 -0
- data/ext/libdatrie/datrie/fileutils.h +36 -0
- data/ext/libdatrie/datrie/libdatrie.def +31 -0
- data/ext/libdatrie/datrie/sb-trie.c +331 -0
- data/ext/libdatrie/datrie/sb-trie.h +279 -0
- data/ext/libdatrie/datrie/tail.c +344 -0
- data/ext/libdatrie/datrie/tail.h +200 -0
- data/ext/libdatrie/datrie/trie-private.h +31 -0
- data/ext/libdatrie/datrie/trie.c +413 -0
- data/ext/libdatrie/datrie/trie.h +270 -0
- data/ext/libdatrie/datrie/triedefs.h +63 -0
- data/ext/libdatrie/datrie/typedefs.h +113 -0
- data/ext/libdatrie/depcomp +530 -0
- data/ext/libdatrie/doc/Doxyfile.in +244 -0
- data/ext/libdatrie/doc/Makefile.am +29 -0
- data/ext/libdatrie/doc/Makefile.in +352 -0
- data/ext/libdatrie/install-sh +323 -0
- data/ext/libdatrie/ltmain.sh +6938 -0
- data/ext/libdatrie/man/Makefile.am +4 -0
- data/ext/libdatrie/man/Makefile.in +381 -0
- data/ext/libdatrie/man/trietool.1 +107 -0
- data/ext/libdatrie/missing +360 -0
- data/ext/libdatrie/tools/Makefile.am +7 -0
- data/ext/libdatrie/tools/Makefile.in +460 -0
- data/ext/libdatrie/tools/trietool.c +308 -0
- data/ext/trie/extconf.rb +12 -0
- data/ext/trie/trie.c +174 -0
- data/lib/trie.rb +1 -0
- data/spec/test-trie/README +1 -0
- data/spec/trie_spec.rb +79 -0
- metadata +139 -0
@@ -0,0 +1,229 @@
|
|
1
|
+
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
2
|
+
/*
|
3
|
+
* darray.h - Double-array trie structure
|
4
|
+
* Created: 2006-08-11
|
5
|
+
* Author: Theppitak Karoonboonyanan <thep@linux.thai.net>
|
6
|
+
*/
|
7
|
+
|
8
|
+
#ifndef __DARRAY_H
|
9
|
+
#define __DARRAY_H
|
10
|
+
|
11
|
+
#include "triedefs.h"
|
12
|
+
|
13
|
+
/**
|
14
|
+
* @file darray.h
|
15
|
+
* @brief Double-array trie structure
|
16
|
+
*/
|
17
|
+
|
18
|
+
/**
|
19
|
+
* @brief Double-array structure type
|
20
|
+
*/
|
21
|
+
typedef struct _DArray DArray;
|
22
|
+
|
23
|
+
/**
|
24
|
+
* @brief Double-array entry enumeration function
|
25
|
+
*
|
26
|
+
* @param key : the key of the entry, up to @a sep_node
|
27
|
+
* @param sep_node : the separate node of the entry
|
28
|
+
* @param user_data : user-supplied data
|
29
|
+
*
|
30
|
+
* @return TRUE to continue enumeration, FALSE to stop
|
31
|
+
*/
|
32
|
+
typedef Bool (*DAEnumFunc) (const TrieChar *key,
|
33
|
+
TrieIndex sep_node,
|
34
|
+
void *user_data);
|
35
|
+
|
36
|
+
|
37
|
+
/**
|
38
|
+
* @brief Open double-array from file
|
39
|
+
*
|
40
|
+
* @param path : the path that stores the double-array files
|
41
|
+
* @param name : the name of the double-array (not actual file name)
|
42
|
+
* @param mode : openning mode, read or write
|
43
|
+
*
|
44
|
+
* @return a pointer to the openned double-array, NULL on failure
|
45
|
+
*
|
46
|
+
* Open a double-array structure of given name. Note that @a name here does
|
47
|
+
* not mean the actual file name. Rather, the file name will be inferred by
|
48
|
+
* the name.
|
49
|
+
*/
|
50
|
+
DArray * da_open (const char *path, const char *name, TrieIOMode mode);
|
51
|
+
|
52
|
+
/**
|
53
|
+
* @brief Close double-array data
|
54
|
+
*
|
55
|
+
* @param d : the double-array data
|
56
|
+
*
|
57
|
+
* @return 0 on success, non-zero on failure
|
58
|
+
*
|
59
|
+
* Close the given double-array data. If @a d was openned for writing, all
|
60
|
+
* pending changes will be saved to file.
|
61
|
+
*/
|
62
|
+
int da_close (DArray *d);
|
63
|
+
|
64
|
+
/**
|
65
|
+
* @brief Save double-array data
|
66
|
+
*
|
67
|
+
* @param d : the double-array data
|
68
|
+
*
|
69
|
+
* @return 0 on success, non-zero on failure
|
70
|
+
*
|
71
|
+
* If @a double-array data was openned for writing, save all pending changes
|
72
|
+
* to file.
|
73
|
+
*/
|
74
|
+
int da_save (DArray *d);
|
75
|
+
|
76
|
+
|
77
|
+
/**
|
78
|
+
* @brief Get root state
|
79
|
+
*
|
80
|
+
* @param d : the double-array data
|
81
|
+
*
|
82
|
+
* @return root state of the @a index set, or TRIE_INDEX_ERROR on failure
|
83
|
+
*
|
84
|
+
* Get root state for stepwise walking.
|
85
|
+
*/
|
86
|
+
TrieIndex da_get_root (const DArray *d);
|
87
|
+
|
88
|
+
|
89
|
+
/**
|
90
|
+
* @brief Get BASE cell
|
91
|
+
*
|
92
|
+
* @param d : the double-array data
|
93
|
+
* @param s : the double-array state to get data
|
94
|
+
*
|
95
|
+
* @return the BASE cell value for the given state
|
96
|
+
*
|
97
|
+
* Get BASE cell value for the given state.
|
98
|
+
*/
|
99
|
+
TrieIndex da_get_base (const DArray *d, TrieIndex s);
|
100
|
+
|
101
|
+
/**
|
102
|
+
* @brief Get CHECK cell
|
103
|
+
*
|
104
|
+
* @param d : the double-array data
|
105
|
+
* @param s : the double-array state to get data
|
106
|
+
*
|
107
|
+
* @return the CHECK cell value for the given state
|
108
|
+
*
|
109
|
+
* Get CHECK cell value for the given state.
|
110
|
+
*/
|
111
|
+
TrieIndex da_get_check (const DArray *d, TrieIndex s);
|
112
|
+
|
113
|
+
|
114
|
+
/**
|
115
|
+
* @brief Set BASE cell
|
116
|
+
*
|
117
|
+
* @param d : the double-array data
|
118
|
+
* @param s : the double-array state to get data
|
119
|
+
* @param val : the value to set
|
120
|
+
*
|
121
|
+
* Set BASE cell for the given state to the given value.
|
122
|
+
*/
|
123
|
+
void da_set_base (DArray *d, TrieIndex s, TrieIndex val);
|
124
|
+
|
125
|
+
/**
|
126
|
+
* @brief Set CHECK cell
|
127
|
+
*
|
128
|
+
* @param d : the double-array data
|
129
|
+
* @param s : the double-array state to get data
|
130
|
+
* @param val : the value to set
|
131
|
+
*
|
132
|
+
* Set CHECK cell for the given state to the given value.
|
133
|
+
*/
|
134
|
+
void da_set_check (DArray *d, TrieIndex s, TrieIndex val);
|
135
|
+
|
136
|
+
/**
|
137
|
+
* @brief Walk in double-array structure
|
138
|
+
*
|
139
|
+
* @param d : the double-array structure
|
140
|
+
* @param s : current state
|
141
|
+
* @param c : the input character
|
142
|
+
*
|
143
|
+
* @return boolean indicating success
|
144
|
+
*
|
145
|
+
* Walk the double-array trie from state @a *s, using input character @a c.
|
146
|
+
* If there exists an edge from @a *s with arc labeled @a c, this function
|
147
|
+
* returns TRUE and @a *s is updated to the new state. Otherwise, it returns
|
148
|
+
* FALSE and @a *s is left unchanged.
|
149
|
+
*/
|
150
|
+
Bool da_walk (DArray *d, TrieIndex *s, TrieChar c);
|
151
|
+
|
152
|
+
/**
|
153
|
+
* @brief Test walkability in double-array structure
|
154
|
+
*
|
155
|
+
* @param d : the double-array structure
|
156
|
+
* @param s : current state
|
157
|
+
* @param c : the input character
|
158
|
+
*
|
159
|
+
* @return boolean indicating walkability
|
160
|
+
*
|
161
|
+
* Test if there is a transition from state @a s with input character @a c.
|
162
|
+
*/
|
163
|
+
/*
|
164
|
+
Bool da_is_walkable (DArray *d, TrieIndex s, TrieChar c);
|
165
|
+
*/
|
166
|
+
#define da_is_walkable(d,s,c) \
|
167
|
+
(da_get_check ((d), da_get_base ((d), (s)) + (c)) == (s))
|
168
|
+
|
169
|
+
/**
|
170
|
+
* @brief Insert a branch from trie node
|
171
|
+
*
|
172
|
+
* @param d : the double-array structure
|
173
|
+
* @param s : the state to add branch to
|
174
|
+
* @param c : the character for the branch label
|
175
|
+
*
|
176
|
+
* @return the index of the new node
|
177
|
+
*
|
178
|
+
* Insert a new arc labelled with character @a c from the trie node
|
179
|
+
* represented by index @a s in double-array structure @a d.
|
180
|
+
* Note that it assumes that no such arc exists before inserting.
|
181
|
+
*/
|
182
|
+
TrieIndex da_insert_branch (DArray *d, TrieIndex s, TrieChar c);
|
183
|
+
|
184
|
+
/**
|
185
|
+
* @brief Prune the single branch
|
186
|
+
*
|
187
|
+
* @param d : the double-array structure
|
188
|
+
* @param s : the dangling state to prune off
|
189
|
+
*
|
190
|
+
* Prune off a non-separate path up from the final state @a s.
|
191
|
+
* If @a s still has some children states, it does nothing. Otherwise,
|
192
|
+
* it deletes the node and all its parents which become non-separate.
|
193
|
+
*/
|
194
|
+
void da_prune (DArray *d, TrieIndex s);
|
195
|
+
|
196
|
+
/**
|
197
|
+
* @brief Prune the single branch up to given parent
|
198
|
+
*
|
199
|
+
* @param d : the double-array structure
|
200
|
+
* @param p : the parent up to which to be pruned
|
201
|
+
* @param s : the dangling state to prune off
|
202
|
+
*
|
203
|
+
* Prune off a non-separate path up from the final state @a s to the
|
204
|
+
* given parent @a p. The prunning stop when either the parent @a p
|
205
|
+
* is met, or a first non-separate node is found.
|
206
|
+
*/
|
207
|
+
void da_prune_upto (DArray *d, TrieIndex p, TrieIndex s);
|
208
|
+
|
209
|
+
/**
|
210
|
+
* @brief Enumerate entries stored in double-array structure
|
211
|
+
*
|
212
|
+
* @param d : the double-array structure
|
213
|
+
* @param enum_func : the callback function to be called on each separate node
|
214
|
+
* @param user_data : user-supplied data to send as an argument to @a enum_func
|
215
|
+
*
|
216
|
+
* @return boolean value indicating whether all the keys are visited
|
217
|
+
*
|
218
|
+
* Enumerate all keys stored in double-array structure. For each entry, the
|
219
|
+
* user-supplied @a enum_func callback function is called, with the entry key,
|
220
|
+
* the separate node, and user-supplied data. Returning FALSE from such
|
221
|
+
* callback will stop enumeration and return FALSE.
|
222
|
+
*/
|
223
|
+
Bool da_enumerate (DArray *d, DAEnumFunc enum_func, void *user_data);
|
224
|
+
|
225
|
+
#endif /* __DARRAY_H */
|
226
|
+
|
227
|
+
/*
|
228
|
+
vi:ts=4:ai:expandtab
|
229
|
+
*/
|
@@ -0,0 +1,151 @@
|
|
1
|
+
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
2
|
+
/*
|
3
|
+
* fileutils.h - File utility functions
|
4
|
+
* Created: 2006-08-15
|
5
|
+
* Author: Theppitak Karoonboonyanan <thep@linux.thai.net>
|
6
|
+
*/
|
7
|
+
|
8
|
+
#include <string.h>
|
9
|
+
#include <stdlib.h>
|
10
|
+
|
11
|
+
#include "fileutils.h"
|
12
|
+
|
13
|
+
/*--------------------------------------*
|
14
|
+
* INTERNAL FUNCTIONS DECLARATIONS *
|
15
|
+
*--------------------------------------*/
|
16
|
+
|
17
|
+
static char * make_full_path (const char *dir,
|
18
|
+
const char *name,
|
19
|
+
const char *ext);
|
20
|
+
|
21
|
+
/* ==================== BEGIN IMPLEMENTATION PART ==================== */
|
22
|
+
|
23
|
+
/*--------------------------------*
|
24
|
+
* FUNCTIONS IMPLEMENTATIONS *
|
25
|
+
*--------------------------------*/
|
26
|
+
|
27
|
+
static char *
|
28
|
+
make_full_path (const char *dir, const char *name, const char *ext)
|
29
|
+
{
|
30
|
+
char *path;
|
31
|
+
|
32
|
+
path = (char *) malloc (strlen (dir) + strlen (name) + strlen (ext) + 2);
|
33
|
+
sprintf (path, "%s/%s%s", dir, name, ext);
|
34
|
+
|
35
|
+
return path;
|
36
|
+
}
|
37
|
+
|
38
|
+
FILE *
|
39
|
+
file_open (const char *dir, const char *name, const char *ext, TrieIOMode mode)
|
40
|
+
{
|
41
|
+
const char *std_mode;
|
42
|
+
char *full_path;
|
43
|
+
FILE *file;
|
44
|
+
|
45
|
+
if (mode & TRIE_IO_WRITE)
|
46
|
+
std_mode = "r+";
|
47
|
+
else
|
48
|
+
std_mode = "r";
|
49
|
+
|
50
|
+
full_path = make_full_path (dir, name, ext);
|
51
|
+
file = fopen (full_path, std_mode);
|
52
|
+
if (!file && mode & TRIE_IO_CREATE)
|
53
|
+
file = fopen (full_path, "w+");
|
54
|
+
free (full_path);
|
55
|
+
|
56
|
+
return file;
|
57
|
+
}
|
58
|
+
|
59
|
+
long
|
60
|
+
file_length (FILE *file)
|
61
|
+
{
|
62
|
+
long cur_pos;
|
63
|
+
long size;
|
64
|
+
|
65
|
+
cur_pos = ftell (file);
|
66
|
+
|
67
|
+
fseek (file, 0L, SEEK_END);
|
68
|
+
size = ftell (file);
|
69
|
+
|
70
|
+
fseek (file, cur_pos, SEEK_SET);
|
71
|
+
|
72
|
+
return size;
|
73
|
+
}
|
74
|
+
|
75
|
+
Bool
|
76
|
+
file_read_int32 (FILE *file, int32 *o_val)
|
77
|
+
{
|
78
|
+
unsigned char buff[4];
|
79
|
+
|
80
|
+
if (fread (buff, 4, 1, file) == 1) {
|
81
|
+
*o_val = (buff[0] << 24) | (buff[1] << 16) | (buff[2] << 8) | buff[3];
|
82
|
+
return TRUE;
|
83
|
+
}
|
84
|
+
|
85
|
+
return FALSE;
|
86
|
+
}
|
87
|
+
|
88
|
+
Bool
|
89
|
+
file_write_int32 (FILE *file, int32 val)
|
90
|
+
{
|
91
|
+
unsigned char buff[4];
|
92
|
+
|
93
|
+
buff[0] = (val >> 24) & 0xff;
|
94
|
+
buff[1] = (val >> 16) & 0xff;
|
95
|
+
buff[2] = (val >> 8) & 0xff;
|
96
|
+
buff[3] = val & 0xff;
|
97
|
+
|
98
|
+
return (fwrite (buff, 4, 1, file) == 1);
|
99
|
+
}
|
100
|
+
|
101
|
+
Bool
|
102
|
+
file_read_int16 (FILE *file, int16 *o_val)
|
103
|
+
{
|
104
|
+
unsigned char buff[2];
|
105
|
+
|
106
|
+
if (fread (buff, 2, 1, file) == 1) {
|
107
|
+
*o_val = (buff[0] << 8) | buff[1];
|
108
|
+
return TRUE;
|
109
|
+
}
|
110
|
+
|
111
|
+
return FALSE;
|
112
|
+
}
|
113
|
+
|
114
|
+
Bool
|
115
|
+
file_write_int16 (FILE *file, int16 val)
|
116
|
+
{
|
117
|
+
unsigned char buff[2];
|
118
|
+
|
119
|
+
buff[0] = val >> 8;
|
120
|
+
buff[1] = val & 0xff;
|
121
|
+
|
122
|
+
return (fwrite (buff, 2, 1, file) == 1);
|
123
|
+
}
|
124
|
+
|
125
|
+
Bool
|
126
|
+
file_read_int8 (FILE *file, int8 *o_val)
|
127
|
+
{
|
128
|
+
return (fread (o_val, sizeof (int8), 1, file) == 1);
|
129
|
+
}
|
130
|
+
|
131
|
+
Bool
|
132
|
+
file_write_int8 (FILE *file, int8 val)
|
133
|
+
{
|
134
|
+
return (fwrite (&val, sizeof (int8), 1, file) == 1);
|
135
|
+
}
|
136
|
+
|
137
|
+
Bool
|
138
|
+
file_read_chars (FILE *file, char *buff, int len)
|
139
|
+
{
|
140
|
+
return (fread (buff, sizeof (char), len, file) == len);
|
141
|
+
}
|
142
|
+
|
143
|
+
Bool
|
144
|
+
file_write_chars (FILE *file, const char *buff, int len)
|
145
|
+
{
|
146
|
+
return (fwrite (buff, sizeof (char), len, file) == len);
|
147
|
+
}
|
148
|
+
|
149
|
+
/*
|
150
|
+
vi:ts=4:ai:expandtab
|
151
|
+
*/
|
@@ -0,0 +1,36 @@
|
|
1
|
+
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
2
|
+
/*
|
3
|
+
* fileutils.h - File utility functions
|
4
|
+
* Created: 2006-08-14
|
5
|
+
* Author: Theppitak Karoonboonyanan <thep@linux.thai.net>
|
6
|
+
*/
|
7
|
+
|
8
|
+
#ifndef __FILEUTILS_H
|
9
|
+
#define __FILEUTILS_H
|
10
|
+
|
11
|
+
#include <stdio.h>
|
12
|
+
|
13
|
+
#include "triedefs.h"
|
14
|
+
|
15
|
+
FILE * file_open (const char *dir, const char *name, const char *ext,
|
16
|
+
TrieIOMode mode);
|
17
|
+
|
18
|
+
long file_length (FILE *file);
|
19
|
+
|
20
|
+
Bool file_read_int32 (FILE *file, int32 *o_val);
|
21
|
+
Bool file_write_int32 (FILE *file, int32 val);
|
22
|
+
|
23
|
+
Bool file_read_int16 (FILE *file, int16 *o_val);
|
24
|
+
Bool file_write_int16 (FILE *file, int16 val);
|
25
|
+
|
26
|
+
Bool file_read_int8 (FILE *file, int8 *o_val);
|
27
|
+
Bool file_write_int8 (FILE *file, int8 val);
|
28
|
+
|
29
|
+
Bool file_read_chars (FILE *file, char *buff, int len);
|
30
|
+
Bool file_write_chars (FILE *file, const char *buff, int len);
|
31
|
+
|
32
|
+
#endif /* __FILEUTILS_H */
|
33
|
+
|
34
|
+
/*
|
35
|
+
vi:ts=4:ai:expandtab
|
36
|
+
*/
|
@@ -0,0 +1,31 @@
|
|
1
|
+
sb_trie_open
|
2
|
+
sb_trie_close
|
3
|
+
sb_trie_save
|
4
|
+
sb_trie_retrieve
|
5
|
+
sb_trie_store
|
6
|
+
sb_trie_delete
|
7
|
+
sb_trie_enumerate
|
8
|
+
sb_trie_root
|
9
|
+
sb_trie_state_clone
|
10
|
+
sb_trie_state_free
|
11
|
+
sb_trie_state_rewind
|
12
|
+
sb_trie_state_walk
|
13
|
+
sb_trie_state_is_walkable
|
14
|
+
sb_trie_state_is_terminal
|
15
|
+
sb_trie_state_is_leaf
|
16
|
+
sb_trie_state_get_data
|
17
|
+
trie_open
|
18
|
+
trie_close
|
19
|
+
trie_save
|
20
|
+
trie_retrieve
|
21
|
+
trie_store
|
22
|
+
trie_delete
|
23
|
+
trie_enumerate
|
24
|
+
trie_root
|
25
|
+
trie_state_clone
|
26
|
+
trie_state_free
|
27
|
+
trie_state_rewind
|
28
|
+
trie_state_walk
|
29
|
+
trie_state_is_walkable
|
30
|
+
trie_state_is_leaf
|
31
|
+
trie_state_get_data
|