middlemac 3.1.0 → 3.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +6 -0
- data/CHANGELOG.md +17 -1
- data/Rakefile +16 -5
- data/documentation_project/Contents/Resources/SharedGlobalAssets/_layouts/layout-apple-modern.haml +2 -2
- data/documentation_project/Gemfile +2 -2
- data/documentation_project/config.rb +6 -5
- data/ext/trie/darray.c +673 -0
- data/ext/trie/darray.h +233 -0
- data/ext/trie/extconf.rb +23 -0
- data/ext/trie/fileutils.c +151 -0
- data/ext/trie/fileutils.h +36 -0
- data/ext/trie/tail.c +340 -0
- data/ext/trie/tail.h +207 -0
- data/ext/trie/trie-private.c +299 -0
- data/ext/trie/trie-private.h +31 -0
- data/ext/trie/trie.c +628 -0
- data/ext/trie/trie.h +40 -0
- data/ext/trie/triedefs.h +73 -0
- data/ext/trie/typedefs.h +117 -0
- data/features/helpers_features.feature +8 -9
- data/features/main_features.feature +3 -3
- data/fixtures/middlemac_app/Gemfile +2 -2
- data/fixtures/middlemac_app/config.rb +7 -9
- data/lib/middlemac.rb +1 -1
- data/lib/middlemac/extension.rb +5 -1
- data/lib/middlemac/{trie.rb → trie-extension.rb} +0 -0
- data/lib/middlemac/version.rb +1 -1
- data/middlemac.gemspec +20 -2
- metadata +51 -34
@@ -0,0 +1,31 @@
|
|
1
|
+
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
2
|
+
/*
|
3
|
+
* trie-private.h - Private utilities for trie implementation
|
4
|
+
* Created: 2007-08-25
|
5
|
+
* Author: Theppitak Karoonboonyanan <thep@linux.thai.net>
|
6
|
+
*/
|
7
|
+
|
8
|
+
#ifndef __TRIE_PRIVATE_H
|
9
|
+
#define __TRIE_PRIVATE_H
|
10
|
+
|
11
|
+
#include "typedefs.h"
|
12
|
+
|
13
|
+
/**
|
14
|
+
* @file trie-private.h
|
15
|
+
* @brief Private utilities for trie implementation
|
16
|
+
*/
|
17
|
+
|
18
|
+
/**
|
19
|
+
* @brief Minimum value macro
|
20
|
+
*/
|
21
|
+
#define MIN_VAL(a,b) ((a)<(b)?(a):(b))
|
22
|
+
/**
|
23
|
+
* @brief Maximum value macro
|
24
|
+
*/
|
25
|
+
#define MAX_VAL(a,b) ((a)>(b)?(a):(b))
|
26
|
+
|
27
|
+
#endif /* __TRIE_PRIVATE_H */
|
28
|
+
|
29
|
+
/*
|
30
|
+
vi:ts=4:ai:expandtab
|
31
|
+
*/
|
data/ext/trie/trie.c
ADDED
@@ -0,0 +1,628 @@
|
|
1
|
+
#include "ruby.h"
|
2
|
+
#include "trie.h"
|
3
|
+
#include <stdlib.h>
|
4
|
+
#include <stdio.h>
|
5
|
+
#include <string.h>
|
6
|
+
|
7
|
+
VALUE cTrie, cTrieNode;
|
8
|
+
|
9
|
+
/*
|
10
|
+
* Document-class: Trie
|
11
|
+
*
|
12
|
+
* A key-value data structure for string keys which is efficient memory usage and fast retrieval time.
|
13
|
+
*
|
14
|
+
*/
|
15
|
+
|
16
|
+
static VALUE rb_trie_alloc(VALUE klass) {
|
17
|
+
VALUE obj;
|
18
|
+
obj = Data_Wrap_Struct(klass, 0, trie_free, trie_new());
|
19
|
+
return obj;
|
20
|
+
}
|
21
|
+
|
22
|
+
void raise_ioerror(const char * message) {
|
23
|
+
VALUE rb_eIOError = rb_const_get(rb_cObject, rb_intern("IOError"));
|
24
|
+
rb_raise(rb_eIOError, "%s", message);
|
25
|
+
}
|
26
|
+
|
27
|
+
/*
|
28
|
+
* call-seq:
|
29
|
+
* read(filename_base) -> Trie
|
30
|
+
*
|
31
|
+
* Returns a new trie with data as read from disk.
|
32
|
+
*/
|
33
|
+
static VALUE rb_trie_read(VALUE self, VALUE filename_base) {
|
34
|
+
VALUE da_filename = rb_str_dup(filename_base);
|
35
|
+
rb_str_concat(da_filename, rb_str_new2(".da"));
|
36
|
+
StringValue(da_filename);
|
37
|
+
|
38
|
+
VALUE tail_filename = rb_str_dup(filename_base);
|
39
|
+
rb_str_concat(tail_filename, rb_str_new2(".tail"));
|
40
|
+
StringValue(tail_filename);
|
41
|
+
|
42
|
+
Trie *trie = trie_new();
|
43
|
+
|
44
|
+
VALUE obj;
|
45
|
+
obj = Data_Wrap_Struct(self, 0, trie_free, trie);
|
46
|
+
|
47
|
+
DArray *old_da = trie->da;
|
48
|
+
Tail *old_tail = trie->tail;
|
49
|
+
|
50
|
+
FILE *da_file = fopen(RSTRING_PTR(da_filename), "r");
|
51
|
+
if (da_file == NULL)
|
52
|
+
raise_ioerror("Error reading .da file.");
|
53
|
+
|
54
|
+
trie->da = da_read(da_file);
|
55
|
+
fclose(da_file);
|
56
|
+
|
57
|
+
FILE *tail_file = fopen(RSTRING_PTR(tail_filename), "r");
|
58
|
+
if (tail_file == NULL)
|
59
|
+
raise_ioerror("Error reading .tail file.");
|
60
|
+
|
61
|
+
trie->tail = tail_read(tail_file);
|
62
|
+
fclose(tail_file);
|
63
|
+
|
64
|
+
da_free(old_da);
|
65
|
+
tail_free(old_tail);
|
66
|
+
|
67
|
+
return obj;
|
68
|
+
}
|
69
|
+
|
70
|
+
/*
|
71
|
+
* Forward declare trie_has_key from trie-private.c, because it's not included
|
72
|
+
* in any headers. Clang is now enforcing C99, and so this mostly just silences
|
73
|
+
* an error because the correct function is linked in at build time anyway.
|
74
|
+
*/
|
75
|
+
Bool trie_has_key (const Trie *trie, const TrieChar *key);
|
76
|
+
|
77
|
+
/*
|
78
|
+
* call-seq:
|
79
|
+
* has_key?(key) -> true/false
|
80
|
+
*
|
81
|
+
* Determines whether or not a key exists in the Trie. Use this if you don't care about the value, as it
|
82
|
+
* is marginally faster than Trie#get.
|
83
|
+
*
|
84
|
+
*/
|
85
|
+
static VALUE rb_trie_has_key(VALUE self, VALUE key) {
|
86
|
+
StringValue(key);
|
87
|
+
|
88
|
+
Trie *trie;
|
89
|
+
Data_Get_Struct(self, Trie, trie);
|
90
|
+
|
91
|
+
if(trie_has_key(trie, (TrieChar*)RSTRING_PTR(key)))
|
92
|
+
return Qtrue;
|
93
|
+
else
|
94
|
+
return Qnil;
|
95
|
+
}
|
96
|
+
|
97
|
+
/*
|
98
|
+
* call-seq:
|
99
|
+
* get(key) -> value
|
100
|
+
* [key] -> value
|
101
|
+
*
|
102
|
+
* Retrieves the value for a particular key (or nil) from the Trie.
|
103
|
+
*
|
104
|
+
*/
|
105
|
+
static VALUE rb_trie_get(VALUE self, VALUE key) {
|
106
|
+
StringValue(key);
|
107
|
+
|
108
|
+
Trie *trie;
|
109
|
+
Data_Get_Struct(self, Trie, trie);
|
110
|
+
|
111
|
+
TrieData data;
|
112
|
+
if(trie_retrieve(trie, (TrieChar*)RSTRING_PTR(key), &data))
|
113
|
+
return (VALUE)data;
|
114
|
+
else
|
115
|
+
return Qnil;
|
116
|
+
}
|
117
|
+
|
118
|
+
/*
|
119
|
+
* call-seq:
|
120
|
+
* add(key)
|
121
|
+
* add(key,value)
|
122
|
+
*
|
123
|
+
* Add a key, or a key and value to the Trie. If you add a key without a value it assumes true for the value.
|
124
|
+
*
|
125
|
+
*/
|
126
|
+
static VALUE rb_trie_add(VALUE self, VALUE args) {
|
127
|
+
Trie *trie;
|
128
|
+
Data_Get_Struct(self, Trie, trie);
|
129
|
+
|
130
|
+
int size = RARRAY_LEN(args);
|
131
|
+
if(size < 1 || size > 2)
|
132
|
+
return Qnil;
|
133
|
+
|
134
|
+
VALUE key;
|
135
|
+
key = RARRAY_PTR(args)[0];
|
136
|
+
StringValue(key);
|
137
|
+
|
138
|
+
TrieData value = size == 2 ? RARRAY_PTR(args)[1] : TRIE_DATA_ERROR;
|
139
|
+
|
140
|
+
if(trie_store(trie, (TrieChar*)RSTRING_PTR(key), value))
|
141
|
+
return Qtrue;
|
142
|
+
else
|
143
|
+
return Qnil;
|
144
|
+
}
|
145
|
+
|
146
|
+
/*
|
147
|
+
* call-seq:
|
148
|
+
* delete(key)
|
149
|
+
*
|
150
|
+
* Delete a key from the Trie. Returns true if it deleted a key, nil otherwise.
|
151
|
+
*
|
152
|
+
*/
|
153
|
+
static VALUE rb_trie_delete(VALUE self, VALUE key) {
|
154
|
+
StringValue(key);
|
155
|
+
|
156
|
+
Trie *trie;
|
157
|
+
Data_Get_Struct(self, Trie, trie);
|
158
|
+
|
159
|
+
if(trie_delete(trie, (TrieChar*)RSTRING_PTR(key)))
|
160
|
+
return Qtrue;
|
161
|
+
else
|
162
|
+
return Qnil;
|
163
|
+
}
|
164
|
+
|
165
|
+
static VALUE walk_all_paths(Trie *trie, VALUE children, TrieState *state, char *prefix, int prefix_size) {
|
166
|
+
int c;
|
167
|
+
for(c = 1; c < 256; c++) {
|
168
|
+
if(trie_state_is_walkable(state,c)) {
|
169
|
+
TrieState *next_state = trie_state_clone(state);
|
170
|
+
trie_state_walk(next_state, c);
|
171
|
+
|
172
|
+
prefix[prefix_size] = c;
|
173
|
+
prefix[prefix_size + 1] = 0;
|
174
|
+
|
175
|
+
if(trie_state_is_terminal(next_state)) {
|
176
|
+
char *word = (char*) malloc(prefix_size + 2);
|
177
|
+
memcpy(word, prefix, prefix_size + 2);
|
178
|
+
rb_ary_push(children, rb_str_new2(word));
|
179
|
+
}
|
180
|
+
|
181
|
+
walk_all_paths(trie, children, next_state, prefix, prefix_size + 1);
|
182
|
+
|
183
|
+
prefix[prefix_size] = 0;
|
184
|
+
trie_state_free(next_state);
|
185
|
+
}
|
186
|
+
}
|
187
|
+
}
|
188
|
+
|
189
|
+
|
190
|
+
static Bool traverse(TrieState *state, TrieChar *char_prefix) {
|
191
|
+
const TrieChar *iterator = char_prefix;
|
192
|
+
while(*iterator != 0) {
|
193
|
+
if(!trie_state_is_walkable(state, *iterator))
|
194
|
+
return FALSE;
|
195
|
+
trie_state_walk(state, *iterator);
|
196
|
+
iterator++;
|
197
|
+
}
|
198
|
+
return TRUE;
|
199
|
+
}
|
200
|
+
|
201
|
+
|
202
|
+
/*
|
203
|
+
* call-seq:
|
204
|
+
* children(prefix) -> [ key, ... ]
|
205
|
+
*
|
206
|
+
* Finds all keys in the Trie beginning with the given prefix.
|
207
|
+
*
|
208
|
+
*/
|
209
|
+
static VALUE rb_trie_children(VALUE self, VALUE prefix) {
|
210
|
+
if(NIL_P(prefix))
|
211
|
+
return rb_ary_new();
|
212
|
+
|
213
|
+
StringValue(prefix);
|
214
|
+
|
215
|
+
Trie *trie;
|
216
|
+
Data_Get_Struct(self, Trie, trie);
|
217
|
+
|
218
|
+
int prefix_size = RSTRING_LEN(prefix);
|
219
|
+
TrieState *state = trie_root(trie);
|
220
|
+
VALUE children = rb_ary_new();
|
221
|
+
TrieChar *char_prefix = (TrieChar*)RSTRING_PTR(prefix);
|
222
|
+
|
223
|
+
if(!traverse(state, char_prefix)) {
|
224
|
+
return children;
|
225
|
+
}
|
226
|
+
|
227
|
+
if(trie_state_is_terminal(state))
|
228
|
+
rb_ary_push(children, prefix);
|
229
|
+
|
230
|
+
char prefix_buffer[1024];
|
231
|
+
memcpy(prefix_buffer, char_prefix, prefix_size);
|
232
|
+
prefix_buffer[prefix_size] = 0;
|
233
|
+
|
234
|
+
walk_all_paths(trie, children, state, prefix_buffer, prefix_size);
|
235
|
+
|
236
|
+
trie_state_free(state);
|
237
|
+
return children;
|
238
|
+
}
|
239
|
+
|
240
|
+
static Bool walk_all_paths_until_first_terminal(Trie *trie, TrieState *state, char *prefix, int prefix_size) {
|
241
|
+
int c;
|
242
|
+
Bool ret = FALSE;
|
243
|
+
for(c = 1; c < 256; c++) {
|
244
|
+
if(trie_state_is_walkable(state,c)) {
|
245
|
+
TrieState *next_state = trie_state_clone(state);
|
246
|
+
trie_state_walk(next_state, c);
|
247
|
+
|
248
|
+
prefix[prefix_size] = c;
|
249
|
+
prefix[prefix_size + 1] = 0;
|
250
|
+
|
251
|
+
if(trie_state_is_terminal(next_state)) {
|
252
|
+
return TRUE;
|
253
|
+
}
|
254
|
+
|
255
|
+
ret = walk_all_paths_until_first_terminal(trie, next_state, prefix, prefix_size + 1);
|
256
|
+
|
257
|
+
prefix[prefix_size] = 0;
|
258
|
+
trie_state_free(next_state);
|
259
|
+
|
260
|
+
if (ret == TRUE) {
|
261
|
+
return ret;
|
262
|
+
}
|
263
|
+
}
|
264
|
+
}
|
265
|
+
|
266
|
+
return ret;
|
267
|
+
}
|
268
|
+
|
269
|
+
static VALUE rb_trie_has_children(VALUE self, VALUE prefix) {
|
270
|
+
if(NIL_P(prefix))
|
271
|
+
return rb_ary_new();
|
272
|
+
|
273
|
+
StringValue(prefix);
|
274
|
+
|
275
|
+
Trie *trie;
|
276
|
+
Data_Get_Struct(self, Trie, trie);
|
277
|
+
|
278
|
+
int prefix_size = RSTRING_LEN(prefix);
|
279
|
+
TrieState *state = trie_root(trie);
|
280
|
+
TrieChar *char_prefix = (TrieChar*)RSTRING_PTR(prefix);
|
281
|
+
|
282
|
+
if(!traverse(state, char_prefix)) {
|
283
|
+
return Qfalse;
|
284
|
+
}
|
285
|
+
|
286
|
+
if(trie_state_is_terminal(state))
|
287
|
+
return Qtrue;
|
288
|
+
|
289
|
+
char prefix_buffer[1024];
|
290
|
+
memcpy(prefix_buffer, char_prefix, prefix_size);
|
291
|
+
prefix_buffer[prefix_size] = 0;
|
292
|
+
|
293
|
+
Bool ret = walk_all_paths_until_first_terminal(trie, state, prefix_buffer, prefix_size);
|
294
|
+
|
295
|
+
trie_state_free(state);
|
296
|
+
return ret == TRUE ? Qtrue : Qfalse;
|
297
|
+
}
|
298
|
+
|
299
|
+
static VALUE walk_all_paths_with_values(Trie *trie, VALUE children, TrieState *state, char *prefix, int prefix_size) {
|
300
|
+
int c;
|
301
|
+
for(c = 1; c < 256; c++) {
|
302
|
+
if(trie_state_is_walkable(state,c)) {
|
303
|
+
TrieState *next_state = trie_state_clone(state);
|
304
|
+
trie_state_walk(next_state, c);
|
305
|
+
|
306
|
+
prefix[prefix_size] = c;
|
307
|
+
prefix[prefix_size + 1] = 0;
|
308
|
+
|
309
|
+
if(trie_state_is_terminal(next_state)) {
|
310
|
+
TrieState *end_state = trie_state_clone(next_state);
|
311
|
+
trie_state_walk(end_state, '\0');
|
312
|
+
|
313
|
+
char *word = (char*) malloc(prefix_size + 2);
|
314
|
+
memcpy(word, prefix, prefix_size + 2);
|
315
|
+
|
316
|
+
VALUE tuple = rb_ary_new();
|
317
|
+
rb_ary_push(tuple, rb_str_new2(word));
|
318
|
+
|
319
|
+
TrieData trie_data = trie_state_get_data(end_state);
|
320
|
+
rb_ary_push(tuple, (VALUE)trie_data);
|
321
|
+
rb_ary_push(children, tuple);
|
322
|
+
|
323
|
+
trie_state_free(end_state);
|
324
|
+
}
|
325
|
+
|
326
|
+
walk_all_paths_with_values(trie, children, next_state, prefix, prefix_size + 1);
|
327
|
+
|
328
|
+
prefix[prefix_size] = 0;
|
329
|
+
trie_state_free(next_state);
|
330
|
+
}
|
331
|
+
}
|
332
|
+
}
|
333
|
+
|
334
|
+
/*
|
335
|
+
* call-seq:
|
336
|
+
* children_with_values(key) -> [ [key,value], ... ]
|
337
|
+
*
|
338
|
+
* Finds all keys with their respective values in the Trie beginning with the given prefix.
|
339
|
+
*
|
340
|
+
*/
|
341
|
+
static VALUE rb_trie_children_with_values(VALUE self, VALUE prefix) {
|
342
|
+
if(NIL_P(prefix))
|
343
|
+
return rb_ary_new();
|
344
|
+
|
345
|
+
StringValue(prefix);
|
346
|
+
|
347
|
+
Trie *trie;
|
348
|
+
Data_Get_Struct(self, Trie, trie);
|
349
|
+
|
350
|
+
int prefix_size = RSTRING_LEN(prefix);
|
351
|
+
TrieChar *char_prefix = (TrieChar*)RSTRING_PTR(prefix);
|
352
|
+
|
353
|
+
VALUE children = rb_ary_new();
|
354
|
+
|
355
|
+
TrieState *state = trie_root(trie);
|
356
|
+
|
357
|
+
if(!traverse(state, char_prefix)) {
|
358
|
+
return children;
|
359
|
+
}
|
360
|
+
|
361
|
+
if(trie_state_is_terminal(state)) {
|
362
|
+
TrieState *end_state = trie_state_clone(state);
|
363
|
+
trie_state_walk(end_state, '\0');
|
364
|
+
|
365
|
+
VALUE tuple = rb_ary_new();
|
366
|
+
rb_ary_push(tuple, prefix);
|
367
|
+
TrieData trie_data = trie_state_get_data(end_state);
|
368
|
+
rb_ary_push(tuple, (VALUE)trie_data);
|
369
|
+
rb_ary_push(children, tuple);
|
370
|
+
|
371
|
+
trie_state_free(end_state);
|
372
|
+
}
|
373
|
+
|
374
|
+
char prefix_buffer[1024];
|
375
|
+
memcpy(prefix_buffer, char_prefix, prefix_size);
|
376
|
+
prefix_buffer[prefix_size] = 0;
|
377
|
+
|
378
|
+
walk_all_paths_with_values(trie, children, state, prefix_buffer, prefix_size);
|
379
|
+
|
380
|
+
trie_state_free(state);
|
381
|
+
return children;
|
382
|
+
}
|
383
|
+
|
384
|
+
static VALUE rb_trie_node_alloc(VALUE klass);
|
385
|
+
|
386
|
+
/*
|
387
|
+
* call-seq:
|
388
|
+
* root -> TrieNode
|
389
|
+
*
|
390
|
+
* Returns a TrieNode representing the root of the Trie.
|
391
|
+
*
|
392
|
+
*/
|
393
|
+
static VALUE rb_trie_root(VALUE self) {
|
394
|
+
Trie *trie;
|
395
|
+
Data_Get_Struct(self, Trie, trie);
|
396
|
+
|
397
|
+
VALUE trie_node = rb_trie_node_alloc(cTrieNode);
|
398
|
+
|
399
|
+
TrieState *state = trie_root(trie);
|
400
|
+
RDATA(trie_node)->data = state;
|
401
|
+
|
402
|
+
rb_iv_set(trie_node, "@state", Qnil);
|
403
|
+
rb_iv_set(trie_node, "@full_state", rb_str_new2(""));
|
404
|
+
return trie_node;
|
405
|
+
}
|
406
|
+
|
407
|
+
|
408
|
+
/*
|
409
|
+
* Document-class: TrieNode
|
410
|
+
*
|
411
|
+
* Represents a single node in the Trie. It can be used as a cursor to walk around the Trie.
|
412
|
+
* You can grab a TrieNode for the root of the Trie by using Trie#root.
|
413
|
+
*
|
414
|
+
*/
|
415
|
+
|
416
|
+
static VALUE rb_trie_node_alloc(VALUE klass) {
|
417
|
+
VALUE obj;
|
418
|
+
obj = Data_Wrap_Struct(klass, 0, trie_state_free, NULL);
|
419
|
+
return obj;
|
420
|
+
}
|
421
|
+
|
422
|
+
/* nodoc */
|
423
|
+
static VALUE rb_trie_node_initialize_copy(VALUE self, VALUE from) {
|
424
|
+
RDATA(self)->data = trie_state_clone(RDATA(from)->data);
|
425
|
+
|
426
|
+
VALUE state = rb_iv_get(from, "@state");
|
427
|
+
rb_iv_set(self, "@state", state == Qnil ? Qnil : rb_str_dup(state));
|
428
|
+
|
429
|
+
VALUE full_state = rb_iv_get(from, "@full_state");
|
430
|
+
rb_iv_set(self, "@full_state", full_state == Qnil ? Qnil : rb_str_dup(full_state));
|
431
|
+
|
432
|
+
return self;
|
433
|
+
}
|
434
|
+
|
435
|
+
/*
|
436
|
+
* call-seq:
|
437
|
+
* state -> single character
|
438
|
+
*
|
439
|
+
* Returns the letter that the TrieNode instance points to. So, if the node is pointing at the "e" in "monkeys", the state is "e".
|
440
|
+
*
|
441
|
+
*/
|
442
|
+
static VALUE rb_trie_node_get_state(VALUE self) {
|
443
|
+
return rb_iv_get(self, "@state");
|
444
|
+
}
|
445
|
+
|
446
|
+
/*
|
447
|
+
* call-seq:
|
448
|
+
* full_state -> string
|
449
|
+
*
|
450
|
+
* Returns the full string from the root of the Trie up to this node. So if the node pointing at the "e" in "monkeys",
|
451
|
+
* the full_state is "monke".
|
452
|
+
*
|
453
|
+
*/
|
454
|
+
static VALUE rb_trie_node_get_full_state(VALUE self) {
|
455
|
+
return rb_iv_get(self, "@full_state");
|
456
|
+
}
|
457
|
+
|
458
|
+
/*
|
459
|
+
* call-seq:
|
460
|
+
* walk!(letter) -> TrieNode
|
461
|
+
*
|
462
|
+
* Tries to walk down a particular branch of the Trie. It modifies the node it is called on.
|
463
|
+
*
|
464
|
+
*/
|
465
|
+
static VALUE rb_trie_node_walk_bang(VALUE self, VALUE rchar) {
|
466
|
+
StringValue(rchar);
|
467
|
+
|
468
|
+
TrieState *state;
|
469
|
+
Data_Get_Struct(self, TrieState, state);
|
470
|
+
|
471
|
+
if(RSTRING_LEN(rchar) != 1)
|
472
|
+
return Qnil;
|
473
|
+
|
474
|
+
Bool result = trie_state_walk(state, *RSTRING_PTR(rchar));
|
475
|
+
|
476
|
+
if(result) {
|
477
|
+
rb_iv_set(self, "@state", rchar);
|
478
|
+
VALUE full_state = rb_iv_get(self, "@full_state");
|
479
|
+
rb_str_append(full_state, rchar);
|
480
|
+
rb_iv_set(self, "@full_state", full_state);
|
481
|
+
return self;
|
482
|
+
} else
|
483
|
+
return Qnil;
|
484
|
+
}
|
485
|
+
|
486
|
+
/*
|
487
|
+
* call-seq:
|
488
|
+
* walk(letter) -> TrieNode
|
489
|
+
*
|
490
|
+
* Tries to walk down a particular branch of the Trie. It clones the node it is called on and
|
491
|
+
* walks with that one, leaving the original unchanged.
|
492
|
+
*
|
493
|
+
*/
|
494
|
+
static VALUE rb_trie_node_walk(VALUE self, VALUE rchar) {
|
495
|
+
StringValue(rchar);
|
496
|
+
|
497
|
+
VALUE new_node = rb_funcall(self, rb_intern("dup"), 0);
|
498
|
+
|
499
|
+
TrieState *state;
|
500
|
+
Data_Get_Struct(new_node, TrieState, state);
|
501
|
+
|
502
|
+
if(RSTRING_LEN(rchar) != 1)
|
503
|
+
return Qnil;
|
504
|
+
|
505
|
+
Bool result = trie_state_walk(state, *RSTRING_PTR(rchar));
|
506
|
+
|
507
|
+
if(result) {
|
508
|
+
rb_iv_set(new_node, "@state", rchar);
|
509
|
+
VALUE full_state = rb_iv_get(new_node, "@full_state");
|
510
|
+
rb_str_append(full_state, rchar);
|
511
|
+
rb_iv_set(new_node, "@full_state", full_state);
|
512
|
+
return new_node;
|
513
|
+
} else
|
514
|
+
return Qnil;
|
515
|
+
}
|
516
|
+
|
517
|
+
/*
|
518
|
+
* call-seq:
|
519
|
+
* value
|
520
|
+
*
|
521
|
+
* Attempts to get the value at this node of the Trie. This only works if the node is a terminal
|
522
|
+
* (i.e. end of a key), otherwise it returns nil.
|
523
|
+
*
|
524
|
+
*/
|
525
|
+
static VALUE rb_trie_node_value(VALUE self) {
|
526
|
+
TrieState *state;
|
527
|
+
TrieState *dup;
|
528
|
+
Data_Get_Struct(self, TrieState, state);
|
529
|
+
|
530
|
+
dup = trie_state_clone(state);
|
531
|
+
|
532
|
+
trie_state_walk(dup, 0);
|
533
|
+
TrieData trie_data = trie_state_get_data(dup);
|
534
|
+
trie_state_free(dup);
|
535
|
+
|
536
|
+
return TRIE_DATA_ERROR == trie_data ? Qnil : (VALUE)trie_data;
|
537
|
+
}
|
538
|
+
|
539
|
+
/*
|
540
|
+
* call-seq:
|
541
|
+
* terminal? -> true/false
|
542
|
+
*
|
543
|
+
* Returns true if this node is at the end of a key. So if you have two keys in your Trie, "he" and
|
544
|
+
* "hello", and you walk all the way to the end of "hello", the "e" and the "o" will return true for terminal?.
|
545
|
+
*
|
546
|
+
*/
|
547
|
+
static VALUE rb_trie_node_terminal(VALUE self) {
|
548
|
+
TrieState *state;
|
549
|
+
Data_Get_Struct(self, TrieState, state);
|
550
|
+
|
551
|
+
return trie_state_is_terminal(state) ? Qtrue : Qnil;
|
552
|
+
}
|
553
|
+
|
554
|
+
/*
|
555
|
+
* call-seq:
|
556
|
+
* leaf? -> true/false
|
557
|
+
*
|
558
|
+
* Returns true if there are no branches at this node.
|
559
|
+
*/
|
560
|
+
static VALUE rb_trie_node_leaf(VALUE self) {
|
561
|
+
TrieState *state;
|
562
|
+
Data_Get_Struct(self, TrieState, state);
|
563
|
+
|
564
|
+
return trie_state_is_leaf(state) ? Qtrue : Qnil;
|
565
|
+
}
|
566
|
+
|
567
|
+
/*
|
568
|
+
* call-seq:
|
569
|
+
* save(filename_base) -> true
|
570
|
+
*
|
571
|
+
* Saves the trie data to two files, filename_base.da and filename_base.tail.
|
572
|
+
* Returns true if saving was successful.
|
573
|
+
*/
|
574
|
+
static VALUE rb_trie_save(VALUE self, VALUE filename_base) {
|
575
|
+
VALUE da_filename = rb_str_dup(filename_base);
|
576
|
+
rb_str_concat(da_filename, rb_str_new2(".da"));
|
577
|
+
StringValue(da_filename);
|
578
|
+
|
579
|
+
VALUE tail_filename = rb_str_dup(filename_base);
|
580
|
+
rb_str_concat(tail_filename, rb_str_new2(".tail"));
|
581
|
+
StringValue(tail_filename);
|
582
|
+
|
583
|
+
Trie *trie;
|
584
|
+
Data_Get_Struct(self, Trie, trie);
|
585
|
+
|
586
|
+
FILE *da_file = fopen(RSTRING_PTR(da_filename), "w");
|
587
|
+
if (da_file == NULL)
|
588
|
+
raise_ioerror("Error opening .da file for writing.");
|
589
|
+
if (da_write(trie->da, da_file) != 0)
|
590
|
+
raise_ioerror("Error writing DArray data.");
|
591
|
+
fclose(da_file);
|
592
|
+
|
593
|
+
FILE *tail_file = fopen(RSTRING_PTR(tail_filename), "w");
|
594
|
+
if (tail_file == NULL)
|
595
|
+
raise_ioerror("Error opening .tail file for writing.");
|
596
|
+
if (tail_write(trie->tail, tail_file) != 0)
|
597
|
+
raise_ioerror("Error writing Tail data.");
|
598
|
+
fclose(tail_file);
|
599
|
+
|
600
|
+
return Qtrue;
|
601
|
+
}
|
602
|
+
|
603
|
+
|
604
|
+
void Init_trie() {
|
605
|
+
cTrie = rb_define_class("Trie", rb_cObject);
|
606
|
+
rb_define_alloc_func(cTrie, rb_trie_alloc);
|
607
|
+
rb_define_module_function(cTrie, "read", rb_trie_read, 1);
|
608
|
+
rb_define_method(cTrie, "has_key?", rb_trie_has_key, 1);
|
609
|
+
rb_define_method(cTrie, "get", rb_trie_get, 1);
|
610
|
+
rb_define_method(cTrie, "add", rb_trie_add, -2);
|
611
|
+
rb_define_method(cTrie, "delete", rb_trie_delete, 1);
|
612
|
+
rb_define_method(cTrie, "children", rb_trie_children, 1);
|
613
|
+
rb_define_method(cTrie, "children_with_values", rb_trie_children_with_values, 1);
|
614
|
+
rb_define_method(cTrie, "has_children?", rb_trie_has_children, 1);
|
615
|
+
rb_define_method(cTrie, "root", rb_trie_root, 0);
|
616
|
+
rb_define_method(cTrie, "save", rb_trie_save, 1);
|
617
|
+
|
618
|
+
cTrieNode = rb_define_class("TrieNode", rb_cObject);
|
619
|
+
rb_define_alloc_func(cTrieNode, rb_trie_node_alloc);
|
620
|
+
rb_define_method(cTrieNode, "initialize_copy", rb_trie_node_initialize_copy, 1);
|
621
|
+
rb_define_method(cTrieNode, "state", rb_trie_node_get_state, 0);
|
622
|
+
rb_define_method(cTrieNode, "full_state", rb_trie_node_get_full_state, 0);
|
623
|
+
rb_define_method(cTrieNode, "walk!", rb_trie_node_walk_bang, 1);
|
624
|
+
rb_define_method(cTrieNode, "walk", rb_trie_node_walk, 1);
|
625
|
+
rb_define_method(cTrieNode, "value", rb_trie_node_value, 0);
|
626
|
+
rb_define_method(cTrieNode, "terminal?", rb_trie_node_terminal, 0);
|
627
|
+
rb_define_method(cTrieNode, "leaf?", rb_trie_node_leaf, 0);
|
628
|
+
}
|