tyler-trie 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. data/VERSION.yml +4 -0
  2. data/ext/libdatrie/AUTHORS +1 -0
  3. data/ext/libdatrie/COPYING +510 -0
  4. data/ext/libdatrie/ChangeLog +410 -0
  5. data/ext/libdatrie/INSTALL +236 -0
  6. data/ext/libdatrie/Makefile.am +5 -0
  7. data/ext/libdatrie/Makefile.in +661 -0
  8. data/ext/libdatrie/NEWS +27 -0
  9. data/ext/libdatrie/README +32 -0
  10. data/ext/libdatrie/aclocal.m4 +7431 -0
  11. data/ext/libdatrie/config.guess +1516 -0
  12. data/ext/libdatrie/config.h.in +74 -0
  13. data/ext/libdatrie/config.sub +1626 -0
  14. data/ext/libdatrie/configure +22008 -0
  15. data/ext/libdatrie/configure.ac +71 -0
  16. data/ext/libdatrie/datrie.pc.in +11 -0
  17. data/ext/libdatrie/datrie/Makefile.am +35 -0
  18. data/ext/libdatrie/datrie/Makefile.in +522 -0
  19. data/ext/libdatrie/datrie/alpha-map.c +170 -0
  20. data/ext/libdatrie/datrie/alpha-map.h +36 -0
  21. data/ext/libdatrie/datrie/darray.c +674 -0
  22. data/ext/libdatrie/datrie/darray.h +229 -0
  23. data/ext/libdatrie/datrie/fileutils.c +151 -0
  24. data/ext/libdatrie/datrie/fileutils.h +36 -0
  25. data/ext/libdatrie/datrie/libdatrie.def +31 -0
  26. data/ext/libdatrie/datrie/sb-trie.c +331 -0
  27. data/ext/libdatrie/datrie/sb-trie.h +279 -0
  28. data/ext/libdatrie/datrie/tail.c +344 -0
  29. data/ext/libdatrie/datrie/tail.h +200 -0
  30. data/ext/libdatrie/datrie/trie-private.h +31 -0
  31. data/ext/libdatrie/datrie/trie.c +413 -0
  32. data/ext/libdatrie/datrie/trie.h +270 -0
  33. data/ext/libdatrie/datrie/triedefs.h +63 -0
  34. data/ext/libdatrie/datrie/typedefs.h +113 -0
  35. data/ext/libdatrie/depcomp +530 -0
  36. data/ext/libdatrie/doc/Doxyfile.in +244 -0
  37. data/ext/libdatrie/doc/Makefile.am +29 -0
  38. data/ext/libdatrie/doc/Makefile.in +352 -0
  39. data/ext/libdatrie/install-sh +323 -0
  40. data/ext/libdatrie/ltmain.sh +6938 -0
  41. data/ext/libdatrie/man/Makefile.am +4 -0
  42. data/ext/libdatrie/man/Makefile.in +381 -0
  43. data/ext/libdatrie/man/trietool.1 +107 -0
  44. data/ext/libdatrie/missing +360 -0
  45. data/ext/libdatrie/tools/Makefile.am +7 -0
  46. data/ext/libdatrie/tools/Makefile.in +460 -0
  47. data/ext/libdatrie/tools/trietool.c +308 -0
  48. data/ext/trie/extconf.rb +12 -0
  49. data/ext/trie/trie.c +174 -0
  50. data/lib/trie.rb +1 -0
  51. data/spec/test-trie/README +1 -0
  52. data/spec/trie_spec.rb +79 -0
  53. metadata +139 -0
@@ -0,0 +1,308 @@
1
+ /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
+ /*
3
+ * trietool.c - Trie manipulation tool
4
+ * Created: 2006-08-15
5
+ * Author: Theppitak Karoonboonyanan <thep@linux.thai.net>
6
+ */
7
+
8
+ #include <string.h>
9
+ #include <stdlib.h>
10
+ #include <stdio.h>
11
+ #include <ctype.h>
12
+
13
+ #include <config.h>
14
+ #include <datrie/sb-trie.h>
15
+
16
+ typedef struct {
17
+ const char *path;
18
+ const char *trie_name;
19
+ SBTrie *sb_trie;
20
+ } ProgEnv;
21
+
22
+ static int decode_switch (int argc, char *argv[], ProgEnv *env);
23
+ static int decode_command (int argc, char *argv[], ProgEnv *env);
24
+
25
+ static int command_add (int argc, char *argv[], ProgEnv *env);
26
+ static int command_add_list (int argc, char *argv[], ProgEnv *env);
27
+ static int command_delete (int argc, char *argv[], ProgEnv *env);
28
+ static int command_delete_list (int argc, char *argv[], ProgEnv *env);
29
+ static int command_query (int argc, char *argv[], ProgEnv *env);
30
+ static int command_list (int argc, char *argv[], ProgEnv *env);
31
+
32
+ static void usage (const char *prog_name, int exit_status);
33
+
34
+ static char *string_trim (char *s);
35
+
36
+ int
37
+ main (int argc, char *argv[])
38
+ {
39
+ int i;
40
+ ProgEnv env;
41
+ int ret;
42
+
43
+ env.path = ".";
44
+
45
+ i = decode_switch (argc, argv, &env);
46
+ if (i == argc)
47
+ usage (argv[0], EXIT_FAILURE);
48
+
49
+ env.trie_name = argv[i++];
50
+ env.sb_trie = sb_trie_open (env.path, env.trie_name,
51
+ TRIE_IO_READ | TRIE_IO_WRITE | TRIE_IO_CREATE);
52
+ if (!env.sb_trie) {
53
+ fprintf (stderr, "Cannot open trie '%s' at '%s'\n",
54
+ env.trie_name, env.path);
55
+ exit (EXIT_FAILURE);
56
+ }
57
+
58
+ ret = decode_command (argc - i, argv + i, &env);
59
+
60
+ sb_trie_close (env.sb_trie);
61
+
62
+ return ret;
63
+ }
64
+
65
+ static int
66
+ decode_switch (int argc, char *argv[], ProgEnv *env)
67
+ {
68
+ int opt_idx;
69
+
70
+ for (opt_idx = 1; opt_idx < argc && *argv[opt_idx] == '-'; opt_idx++) {
71
+ if (strcmp (argv[opt_idx], "-h") == 0 ||
72
+ strcmp (argv[opt_idx], "--help") == 0)
73
+ {
74
+ usage (argv[0], EXIT_FAILURE);
75
+ } else if (strcmp (argv[opt_idx], "-V") == 0 ||
76
+ strcmp (argv[opt_idx], "--version") == 0)
77
+ {
78
+ printf ("%s\n", VERSION);
79
+ exit (EXIT_FAILURE);
80
+ } else if (strcmp (argv[opt_idx], "-p") == 0 ||
81
+ strcmp (argv[opt_idx], "--path") == 0)
82
+ {
83
+ env->path = argv[++opt_idx];
84
+ } else if (strcmp (argv[opt_idx], "--") == 0) {
85
+ ++opt_idx;
86
+ break;
87
+ } else {
88
+ fprintf (stderr, "Unknown option: %s\n", argv[opt_idx]);
89
+ exit (EXIT_FAILURE);
90
+ }
91
+ }
92
+
93
+ return opt_idx;
94
+ }
95
+
96
+ static int
97
+ decode_command (int argc, char *argv[], ProgEnv *env)
98
+ {
99
+ int opt_idx;
100
+
101
+ for (opt_idx = 0; opt_idx < argc; opt_idx++) {
102
+ if (strcmp (argv[opt_idx], "add") == 0) {
103
+ ++opt_idx;
104
+ opt_idx += command_add (argc - opt_idx, argv + opt_idx, env);
105
+ } else if (strcmp (argv[opt_idx], "add-list") == 0) {
106
+ ++opt_idx;
107
+ opt_idx += command_add_list (argc - opt_idx, argv + opt_idx, env);
108
+ } else if (strcmp (argv[opt_idx], "delete") == 0) {
109
+ ++opt_idx;
110
+ opt_idx += command_delete (argc - opt_idx, argv + opt_idx, env);
111
+ } else if (strcmp (argv[opt_idx], "delete-list") == 0) {
112
+ ++opt_idx;
113
+ opt_idx += command_delete_list (argc - opt_idx, argv + opt_idx, env);
114
+ } else if (strcmp (argv[opt_idx], "query") == 0) {
115
+ ++opt_idx;
116
+ opt_idx += command_query (argc - opt_idx, argv + opt_idx, env);
117
+ } else if (strcmp (argv[opt_idx], "list") == 0) {
118
+ ++opt_idx;
119
+ opt_idx += command_list (argc - opt_idx, argv + opt_idx, env);
120
+ } else {
121
+ fprintf (stderr, "Unknown command: %s\n", argv[opt_idx]);
122
+ return EXIT_FAILURE;
123
+ }
124
+ }
125
+
126
+ return EXIT_SUCCESS;
127
+ }
128
+
129
+ static int
130
+ command_add (int argc, char *argv[], ProgEnv *env)
131
+ {
132
+ int opt_idx;
133
+
134
+ opt_idx = 0;
135
+ while (opt_idx < argc) {
136
+ const TrieChar *key;
137
+ TrieData data;
138
+
139
+ key = (const TrieChar *) argv[opt_idx++];
140
+ data = (opt_idx < argc) ? atoi (argv[opt_idx++]) : TRIE_DATA_ERROR;
141
+
142
+ if (!sb_trie_store (env->sb_trie, key, data)) {
143
+ fprintf (stderr, "Failed to add entry '%s' with data %d\n",
144
+ key, data);
145
+ }
146
+ }
147
+
148
+ return opt_idx;
149
+ }
150
+
151
+ static int
152
+ command_add_list (int argc, char *argv[], ProgEnv *env)
153
+ {
154
+ FILE *input;
155
+ char line[256];
156
+
157
+ input = fopen (argv[0], "r");
158
+ if (!input) {
159
+ fprintf (stderr, "add-list: Cannot open input file '%s'\n", argv[0]);
160
+ return 1;
161
+ }
162
+
163
+ while (fgets (line, sizeof line, input)) {
164
+ char *key, *data;
165
+ TrieData data_val;
166
+
167
+ key = string_trim (line);
168
+ if ('\0' != *key) {
169
+ /* find key boundary */
170
+ for (data = key; *data && !strchr ("\t,", *data); ++data)
171
+ ;
172
+ /* mark key ending and find data begin */
173
+ if ('\0' != *data) {
174
+ *data++ = '\0';
175
+ while (isspace (*data))
176
+ ++data;
177
+ }
178
+ /* decode data */
179
+ data_val = ('\0' != *data) ? atoi (data) : TRIE_DATA_ERROR;
180
+
181
+ /* store the key */
182
+ if (!sb_trie_store (env->sb_trie, (const TrieChar *) key, data_val))
183
+ fprintf (stderr, "Failed to add key '%s' with data %d.\n",
184
+ key, data_val);
185
+ }
186
+ }
187
+
188
+ fclose (input);
189
+
190
+ return 1;
191
+ }
192
+
193
+ static int
194
+ command_delete (int argc, char *argv[], ProgEnv *env)
195
+ {
196
+ int opt_idx;
197
+
198
+ for (opt_idx = 0; opt_idx < argc; opt_idx++)
199
+ if (!sb_trie_delete (env->sb_trie, (const TrieChar *) argv[opt_idx]))
200
+ fprintf (stderr, "No entry '%s'. Not deleted.\n", argv[opt_idx]);
201
+
202
+ return opt_idx;
203
+ }
204
+
205
+ static int
206
+ command_delete_list (int argc, char *argv[], ProgEnv *env)
207
+ {
208
+ FILE *input;
209
+ char line[256];
210
+
211
+ input = fopen (argv[0], "r");
212
+ if (!input) {
213
+ fprintf (stderr, "delete-list: Cannot open input file '%s'\n", argv[0]);
214
+ return 1;
215
+ }
216
+
217
+ while (fgets (line, sizeof line, input)) {
218
+ char *p;
219
+
220
+ p = string_trim (line);
221
+ if ('\0' != *p)
222
+ if (!sb_trie_delete (env->sb_trie, (const TrieChar *) p))
223
+ fprintf (stderr, "No entry '%s'. Not deleted.\n", p);
224
+ }
225
+
226
+ fclose (input);
227
+
228
+ return 1;
229
+ }
230
+
231
+ static int
232
+ command_query (int argc, char *argv[], ProgEnv *env)
233
+ {
234
+ TrieData data;
235
+
236
+ if (argc == 0) {
237
+ fprintf (stderr, "query: No key specified.\n");
238
+ return 0;
239
+ }
240
+
241
+ if (sb_trie_retrieve (env->sb_trie, (const TrieChar *) argv[0], &data)) {
242
+ printf ("%d\n", data);
243
+ } else {
244
+ fprintf (stderr, "query: Key '%s' not found.\n", argv[0]);
245
+ }
246
+
247
+ return 1;
248
+ }
249
+
250
+ static Bool
251
+ list_enum_func (const SBChar *key, TrieData key_data, void *user_data)
252
+ {
253
+ printf ("%s\t%d\n", key, key_data);
254
+ return TRUE;
255
+ }
256
+
257
+ static int
258
+ command_list (int argc, char *argv[], ProgEnv *env)
259
+ {
260
+ sb_trie_enumerate (env->sb_trie, list_enum_func, (void *) 0);
261
+ return 0;
262
+ }
263
+
264
+
265
+ static void
266
+ usage (const char *prog_name, int exit_status)
267
+ {
268
+ printf ("%s - double-array trie manipulator\n", prog_name);
269
+ printf ("Usage: %s [OPTION]... TRIE CMD ARG ...\n", prog_name);
270
+ printf (
271
+ "Options:\n"
272
+ " -p, --path DIR set trie directory to DIR [default=.]\n"
273
+ " -h, --help display this help and exit\n"
274
+ " -V, --version output version information and exit\n"
275
+ "\n"
276
+ "Commands:\n"
277
+ " add WORD DATA ... add WORD with DATA to trie\n"
278
+ " add-list LISTFILE add WORD and DATA from LISTFILE to trie\n"
279
+ " delete WORD ... delete WORD from trie\n"
280
+ " delete-list LISTFILE delete words listed in LISTFILE from trie\n"
281
+ " query WORD query WORD data from trie\n"
282
+ " list list all words in trie\n"
283
+ );
284
+
285
+ exit (exit_status);
286
+ }
287
+
288
+ static char *
289
+ string_trim (char *s)
290
+ {
291
+ char *p;
292
+
293
+ /* skip leading white spaces */
294
+ while (*s && isspace (*s))
295
+ ++s;
296
+
297
+ /* trim trailing white spaces */
298
+ p = s + strlen (s) - 1;
299
+ while (isspace (*p))
300
+ --p;
301
+ *++p = '\0';
302
+
303
+ return s;
304
+ }
305
+
306
+ /*
307
+ vi:ts=4:ai:expandtab
308
+ */
@@ -0,0 +1,12 @@
1
+ system('cd ext/libdatrie && ./configure && make')
2
+
3
+ require 'mkmf'
4
+ dir_config 'trie'
5
+
6
+ unless find_library('datrie',nil,'ext/libdatrie')
7
+ puts 'Need libdatrie.'
8
+ exit
9
+ end
10
+
11
+ create_makefile 'trie'
12
+
data/ext/trie/trie.c ADDED
@@ -0,0 +1,174 @@
1
+ #include "ruby.h"
2
+ #include <datrie/sb-trie.h>
3
+ #include <stdlib.h>
4
+ #include <stdio.h>
5
+ #include <string.h>
6
+
7
+ static TrieChar* stringToTrieChar(VALUE string) {
8
+ return (TrieChar*) RSTRING(string)->ptr;
9
+ }
10
+
11
+ static void trie_free(SBTrie *sb_trie) {
12
+ if(sb_trie)
13
+ sb_trie_close(sb_trie);
14
+ }
15
+
16
+ static VALUE trie_alloc(VALUE klass) {
17
+ SBTrie *sb_trie;
18
+ VALUE obj;
19
+
20
+ obj = Data_Wrap_Struct(klass, 0, trie_free, sb_trie);
21
+ rb_iv_set(obj, "@open", Qfalse);
22
+
23
+ return obj;
24
+ }
25
+
26
+ static VALUE trie_initialize(VALUE self, VALUE path) {
27
+ SBTrie *sb_trie;
28
+
29
+ char *cpath = RSTRING(path)->ptr;
30
+ char *full_path = (char*)malloc(strlen(cpath) + 10);
31
+ sprintf(full_path, "%s/trie.sbm", cpath);
32
+
33
+ FILE *file;
34
+
35
+ file = fopen (full_path, "r");
36
+ if (!file) {
37
+ file = fopen (full_path, "w+");
38
+ fprintf(file,"[00,FF]\n");
39
+ }
40
+ fclose(file);
41
+ free (full_path);
42
+
43
+ // replace the pretend SBTrie created in alloc with a real one
44
+ RDATA(self)->data = sb_trie_open(cpath, "trie",
45
+ TRIE_IO_READ | TRIE_IO_WRITE | TRIE_IO_CREATE);
46
+
47
+ rb_iv_set(self, "@open", Qtrue);
48
+ rb_iv_set(self, "@path", path);
49
+ return self;
50
+ }
51
+
52
+ static VALUE trie_close(VALUE self) {
53
+ SBTrie *sb_trie;
54
+ Data_Get_Struct(self, SBTrie, sb_trie);
55
+
56
+ rb_iv_set(self, "@open", Qfalse);
57
+
58
+ return self;
59
+ }
60
+
61
+ static VALUE trie_has_key(VALUE self, VALUE key) {
62
+ SBTrie *sb_trie;
63
+ Data_Get_Struct(self, SBTrie, sb_trie);
64
+
65
+ const TrieChar *sb_key = (const TrieChar *)RSTRING(key)->ptr;
66
+
67
+ if(sb_trie_retrieve(sb_trie, sb_key, NULL))
68
+ return Qtrue;
69
+ else
70
+ return Qnil;
71
+ }
72
+
73
+ static VALUE trie_get(VALUE self, VALUE key) {
74
+ SBTrie *sb_trie;
75
+ TrieData trie_data;
76
+ Data_Get_Struct(self, SBTrie, sb_trie);
77
+
78
+ const TrieChar *sb_key = stringToTrieChar(key);
79
+
80
+ if(sb_trie_retrieve(sb_trie, sb_key, &trie_data)) {
81
+ return INT2FIX(trie_data);
82
+ } else
83
+ return Qnil;
84
+ }
85
+
86
+ static VALUE trie_add(VALUE self, VALUE key) {
87
+ SBTrie *sb_trie;
88
+ Data_Get_Struct(self, SBTrie, sb_trie);
89
+
90
+ const TrieChar *sb_key = stringToTrieChar(key);
91
+
92
+ if(sb_trie_store(sb_trie, sb_key, TRIE_DATA_ERROR))
93
+ return Qtrue;
94
+ else
95
+ return Qnil;
96
+ }
97
+
98
+ static VALUE trie_delete(VALUE self, VALUE key) {
99
+ SBTrie *sb_trie;
100
+ Data_Get_Struct(self, SBTrie, sb_trie);
101
+
102
+ const TrieChar *sb_key = stringToTrieChar(key);
103
+
104
+ if(sb_trie_delete(sb_trie, sb_key))
105
+ return Qtrue;
106
+ else
107
+ return Qnil;
108
+ }
109
+
110
+ static VALUE walk_all_paths(VALUE children, SBTrieState *state, char *prefix) {
111
+ int c;
112
+ for(c = 1; c < TRIE_CHAR_MAX; c++) {
113
+ if(sb_trie_state_is_walkable(state,c)) {
114
+ SBTrieState *next_state = sb_trie_state_clone(state);
115
+ sb_trie_state_walk(next_state, (TrieChar)c);
116
+
117
+ char *word = (char*) malloc(strlen(prefix) + 2);
118
+ strcat(strcpy(word, prefix), (char*)&c);
119
+
120
+ if(sb_trie_state_is_terminal(next_state))
121
+ rb_ary_push(children, rb_str_new2(word));
122
+
123
+ walk_all_paths(children, next_state, word);
124
+
125
+ sb_trie_state_free(next_state);
126
+ }
127
+ }
128
+ }
129
+
130
+ static VALUE trie_children(VALUE self, VALUE prefix) {
131
+ SBTrie *sb_trie;
132
+ Data_Get_Struct(self, SBTrie, sb_trie);
133
+
134
+ const TrieChar *sb_prefix = stringToTrieChar(prefix);
135
+
136
+ VALUE children = rb_ary_new();
137
+
138
+ SBTrieState *state = sb_trie_root(sb_trie);
139
+
140
+ TrieChar *iterator = (TrieChar*)sb_prefix;
141
+ while(*iterator != '\0') {
142
+ if(!sb_trie_state_is_walkable(state, *iterator))
143
+ return Qnil;
144
+ sb_trie_state_walk(state, *iterator);
145
+ iterator++;
146
+ }
147
+
148
+ if(sb_trie_state_is_terminal(state))
149
+ rb_ary_push(children, prefix);
150
+
151
+ walk_all_paths(children, state, (char*)sb_prefix);
152
+
153
+ sb_trie_state_free(state);
154
+ return children;
155
+ }
156
+
157
+ static VALUE trie_get_path(VALUE self) {
158
+ return rb_iv_get(self, "@path");
159
+ }
160
+
161
+ VALUE cTrie;
162
+
163
+ void Init_trie() {
164
+ cTrie = rb_define_class("Trie", rb_cObject);
165
+ rb_define_alloc_func(cTrie, trie_alloc);
166
+ rb_define_method(cTrie, "initialize", trie_initialize, 1);
167
+ rb_define_method(cTrie, "path", trie_get_path, 0);
168
+ rb_define_method(cTrie, "has_key?", trie_has_key, 1);
169
+ rb_define_method(cTrie, "get", trie_get, 1);
170
+ rb_define_method(cTrie, "add", trie_add, 1);
171
+ rb_define_method(cTrie, "delete", trie_delete, 1);
172
+ rb_define_method(cTrie, "close", trie_close, 0);
173
+ rb_define_method(cTrie, "children", trie_children, 1);
174
+ }