tyler-trie 0.3.1 → 0.3.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.textile +42 -1
- data/VERSION.yml +1 -1
- data/ext/trie/trie-private.c +28 -0
- data/ext/trie/trie.c +167 -42
- data/ext/trie/triedefs.h +2 -2
- data/ext/trie/typedefs.h +113 -0
- data/spec/trie_spec.rb +18 -1
- metadata +13 -5
data/README.textile
CHANGED
@@ -44,7 +44,7 @@ Or if you have some integer data to store along with the words, such as weights
|
|
44
44
|
end
|
45
45
|
</code></pre>
|
46
46
|
|
47
|
-
Great, so we've populated our trie with some words.
|
47
|
+
Great, so we've populated our trie with some words. Let's make sure those words are really there.
|
48
48
|
|
49
49
|
<pre><code>
|
50
50
|
trie.has_key?('widget') #=> true
|
@@ -88,4 +88,45 @@ There are, of course, some more interesting and advanced ways to use a trie. Fo
|
|
88
88
|
By calling <code>root</code> on a Trie object, you get a TrieNode, pointed at the root of the trie. You can then use this node to walk the trie and perceive things about each word.
|
89
89
|
|
90
90
|
|
91
|
+
h2. Performance Characteristics
|
92
|
+
|
93
|
+
Here are some quick benchmarks on my 2.4ghz Intel Core 2 Duo MacBook Pro:
|
94
|
+
|
95
|
+
For keys that are 5 characters long:
|
96
|
+
31,344 adds/second
|
97
|
+
1,827,408 searches/second
|
98
|
+
38,453 prefixes searches/second
|
99
|
+
|
100
|
+
For keys that are 10 characters long:
|
101
|
+
30,653 adds/second
|
102
|
+
1,802,649 searches/second
|
103
|
+
13,553 prefix searches/second
|
104
|
+
|
105
|
+
For keys that are 20 characters long:
|
106
|
+
30,488 adds/second
|
107
|
+
1,851,461 searches/second
|
108
|
+
5,855 prefix searches/second
|
109
|
+
|
110
|
+
For keys that are 40 characters long:
|
111
|
+
30,710 adds/second
|
112
|
+
1,838,380 searches/second
|
113
|
+
2,762 prefix searches/second
|
114
|
+
|
115
|
+
|
116
|
+
There are a few takeaways from this. First, there is no strong correlation between length of keys and insert or retrieve time. They stay fairly constant as the length of keys increase. Secondly, doing prefix searches with this trie gets slower linearly with the length of the keys in the trie.
|
117
|
+
|
118
|
+
This points to a limitation of this type of trie. It is based on libdatrie, which is a dual-array trie. When finding branches from a particular node, we must query all possible branches to determine whether or not they exist. So for each node we do 255 of these queries.
|
119
|
+
|
120
|
+
There may be some tricks to speed this up, but for now it is simply a limitation of this trie.
|
121
|
+
|
122
|
+
Now, let's look at the effect of the size of the trie itself on query and insertion time. For this test I inserted 100, 1000, 10000, 100000, and 1000000 words in the trie. We measure the insertion and retrieval time in each. The graph below shows the results.
|
123
|
+
|
124
|
+
!http://codehallow.com/effect_of_size.png!
|
125
|
+
|
126
|
+
So, keeping in mind that we're increasing by orders of magnitude, you can see that the insertion time does take a signifcant hit. Retrieval also goes down but at a very gradual rate. (It decreases by about 50% in total, despite the size increasing by 1,000,000%.)
|
127
|
+
|
128
|
+
The reason the insertion times takes such a beating is due, again, to a limitation of the trie. Storing a trie in the dual array setup that is used is excellent for memory usage and retrieval time. Best in class, in fact. However, the more things are added into the trie the more complicated it gets to insert things. It often requires shuffling large pieces of the arrays. There may be room for optimization here, but ultimately insertion time will increase with the size of the trie.
|
129
|
+
|
130
|
+
|
131
|
+
|
91
132
|
Copyright (c) 2008 Tyler McMullen. See LICENSE for details.
|
data/VERSION.yml
CHANGED
data/ext/trie/trie-private.c
CHANGED
@@ -102,6 +102,34 @@ Bool trie_store (Trie *trie, const TrieChar *key, TrieData data) {
|
|
102
102
|
}
|
103
103
|
|
104
104
|
|
105
|
+
Bool trie_has_key (const Trie *trie, const TrieChar *key) {
|
106
|
+
TrieIndex s;
|
107
|
+
short suffix_idx;
|
108
|
+
const TrieChar *p;
|
109
|
+
|
110
|
+
/* walk through branches */
|
111
|
+
s = da_get_root (trie->da);
|
112
|
+
for (p = key; !trie_da_is_separate (trie->da, s); p++) {
|
113
|
+
if (!da_walk (trie->da, &s, *p))
|
114
|
+
return FALSE;
|
115
|
+
if (0 == *p)
|
116
|
+
break;
|
117
|
+
}
|
118
|
+
|
119
|
+
/* walk through tail */
|
120
|
+
s = trie_da_get_tail_index (trie->da, s);
|
121
|
+
suffix_idx = 0;
|
122
|
+
for ( ; ; p++) {
|
123
|
+
if (!tail_walk_char (trie->tail, s, &suffix_idx, *p))
|
124
|
+
return FALSE;
|
125
|
+
if (0 == *p)
|
126
|
+
break;
|
127
|
+
}
|
128
|
+
|
129
|
+
return TRUE;
|
130
|
+
}
|
131
|
+
|
132
|
+
|
105
133
|
Bool trie_retrieve (const Trie *trie, const TrieChar *key, TrieData *o_data) {
|
106
134
|
TrieIndex s;
|
107
135
|
short suffix_idx;
|
data/ext/trie/trie.c
CHANGED
@@ -6,33 +6,64 @@
|
|
6
6
|
|
7
7
|
VALUE cTrie, cTrieNode;
|
8
8
|
|
9
|
+
/*
|
10
|
+
* Document-class: Trie
|
11
|
+
*
|
12
|
+
* A key-value data structure for string keys which is efficient memory usage and fast retrieval time.
|
13
|
+
*
|
14
|
+
*/
|
15
|
+
|
9
16
|
static VALUE rb_trie_alloc(VALUE klass) {
|
10
17
|
VALUE obj;
|
11
18
|
obj = Data_Wrap_Struct(klass, 0, trie_free, trie_new());
|
12
19
|
return obj;
|
13
20
|
}
|
14
21
|
|
22
|
+
/*
|
23
|
+
* call-seq:
|
24
|
+
* has_key?(key) -> true/false
|
25
|
+
*
|
26
|
+
* Determines whether or not a key exists in the Trie. Use this if you don't care about the value, as it
|
27
|
+
* is marginally faster than Trie#get.
|
28
|
+
*
|
29
|
+
*/
|
15
30
|
static VALUE rb_trie_has_key(VALUE self, VALUE key) {
|
16
31
|
Trie *trie;
|
17
32
|
Data_Get_Struct(self, Trie, trie);
|
18
33
|
|
19
|
-
if(
|
34
|
+
if(trie_has_key(trie, (TrieChar*)RSTRING(key)->ptr))
|
20
35
|
return Qtrue;
|
21
36
|
else
|
22
37
|
return Qnil;
|
23
38
|
}
|
24
39
|
|
40
|
+
/*
|
41
|
+
* call-seq:
|
42
|
+
* get(key) -> value
|
43
|
+
* [key] -> value
|
44
|
+
*
|
45
|
+
* Retrieves the value for a particular key (or nil) from the Trie.
|
46
|
+
*
|
47
|
+
*/
|
25
48
|
static VALUE rb_trie_get(VALUE self, VALUE key) {
|
26
49
|
Trie *trie;
|
27
50
|
Data_Get_Struct(self, Trie, trie);
|
28
51
|
|
29
52
|
TrieData data;
|
30
53
|
if(trie_retrieve(trie, (TrieChar*)RSTRING(key)->ptr, &data))
|
31
|
-
return
|
54
|
+
return (VALUE)data;
|
32
55
|
else
|
33
56
|
return Qnil;
|
34
57
|
}
|
35
58
|
|
59
|
+
/*
|
60
|
+
* call-seq:
|
61
|
+
* add(key)
|
62
|
+
* add(key,value)
|
63
|
+
*
|
64
|
+
* Add a key, or a key and value to the Trie. If you add a key without a value it assumes true for the value.
|
65
|
+
*
|
66
|
+
*/
|
36
67
|
static VALUE rb_trie_add(VALUE self, VALUE args) {
|
37
68
|
Trie *trie;
|
38
69
|
Data_Get_Struct(self, Trie, trie);
|
@@ -43,7 +74,7 @@ static VALUE rb_trie_add(VALUE self, VALUE args) {
|
|
43
74
|
|
44
75
|
VALUE key;
|
45
76
|
key = RARRAY(args)->ptr[0];
|
46
|
-
TrieData value = size == 2 ?
|
77
|
+
TrieData value = size == 2 ? RARRAY(args)->ptr[1] : TRIE_DATA_ERROR;
|
47
78
|
|
48
79
|
if(trie_store(trie, (TrieChar*)RSTRING(key)->ptr, value))
|
49
80
|
return Qtrue;
|
@@ -51,6 +82,13 @@ static VALUE rb_trie_add(VALUE self, VALUE args) {
|
|
51
82
|
return Qnil;
|
52
83
|
}
|
53
84
|
|
85
|
+
/*
|
86
|
+
* call-seq:
|
87
|
+
* delete(key)
|
88
|
+
*
|
89
|
+
* Delete a key from the Trie. Returns true if it deleted a key, nil otherwise.
|
90
|
+
*
|
91
|
+
*/
|
54
92
|
static VALUE rb_trie_delete(VALUE self, VALUE key) {
|
55
93
|
Trie *trie;
|
56
94
|
Data_Get_Struct(self, Trie, trie);
|
@@ -61,14 +99,6 @@ static VALUE rb_trie_delete(VALUE self, VALUE key) {
|
|
61
99
|
return Qnil;
|
62
100
|
}
|
63
101
|
|
64
|
-
char* append_char(char* existing, int size, char c) {
|
65
|
-
char *new = (char*) malloc(size + 2);
|
66
|
-
memcpy(new, existing, size);
|
67
|
-
new[size] = c;
|
68
|
-
new[size + 1] = 0;
|
69
|
-
return new;
|
70
|
-
}
|
71
|
-
|
72
102
|
static VALUE walk_all_paths(Trie *trie, VALUE children, TrieState *state, char *prefix, int prefix_size) {
|
73
103
|
int c;
|
74
104
|
for(c = 1; c < 256; c++) {
|
@@ -93,6 +123,13 @@ static VALUE walk_all_paths(Trie *trie, VALUE children, TrieState *state, char *
|
|
93
123
|
}
|
94
124
|
}
|
95
125
|
|
126
|
+
/*
|
127
|
+
* call-seq:
|
128
|
+
* children(prefix) -> [ key, ... ]
|
129
|
+
*
|
130
|
+
* Finds all keys in the Trie beginning with the given prefix.
|
131
|
+
*
|
132
|
+
*/
|
96
133
|
static VALUE rb_trie_children(VALUE self, VALUE prefix) {
|
97
134
|
if(NIL_P(prefix))
|
98
135
|
return rb_ary_new();
|
@@ -148,7 +185,7 @@ static VALUE walk_all_paths_with_values(Trie *trie, VALUE children, TrieState *s
|
|
148
185
|
rb_ary_push(tuple, rb_str_new2(word));
|
149
186
|
|
150
187
|
TrieData trie_data = trie_state_get_data(end_state);
|
151
|
-
rb_ary_push(tuple,
|
188
|
+
rb_ary_push(tuple, (VALUE)trie_data);
|
152
189
|
rb_ary_push(children, tuple);
|
153
190
|
|
154
191
|
trie_state_free(end_state);
|
@@ -162,9 +199,13 @@ static VALUE walk_all_paths_with_values(Trie *trie, VALUE children, TrieState *s
|
|
162
199
|
}
|
163
200
|
}
|
164
201
|
|
165
|
-
|
166
|
-
|
167
|
-
|
202
|
+
/*
|
203
|
+
* call-seq:
|
204
|
+
* children_with_values(key) -> [ [key,value], ... ]
|
205
|
+
*
|
206
|
+
* Finds all keys with their respective values in the Trie beginning with the given prefix.
|
207
|
+
*
|
208
|
+
*/
|
168
209
|
static VALUE rb_trie_children_with_values(VALUE self, VALUE prefix) {
|
169
210
|
if(NIL_P(prefix))
|
170
211
|
return rb_ary_new();
|
@@ -194,7 +235,7 @@ static VALUE rb_trie_children_with_values(VALUE self, VALUE prefix) {
|
|
194
235
|
VALUE tuple = rb_ary_new();
|
195
236
|
rb_ary_push(tuple, prefix);
|
196
237
|
TrieData trie_data = trie_state_get_data(end_state);
|
197
|
-
rb_ary_push(tuple,
|
238
|
+
rb_ary_push(tuple, (VALUE)trie_data);
|
198
239
|
rb_ary_push(children, tuple);
|
199
240
|
|
200
241
|
trie_state_free(end_state);
|
@@ -210,12 +251,15 @@ static VALUE rb_trie_children_with_values(VALUE self, VALUE prefix) {
|
|
210
251
|
return children;
|
211
252
|
}
|
212
253
|
|
213
|
-
static VALUE rb_trie_node_alloc(VALUE klass)
|
214
|
-
VALUE obj;
|
215
|
-
obj = Data_Wrap_Struct(klass, 0, trie_state_free, NULL);
|
216
|
-
return obj;
|
217
|
-
}
|
254
|
+
static VALUE rb_trie_node_alloc(VALUE klass);
|
218
255
|
|
256
|
+
/*
|
257
|
+
* call-seq:
|
258
|
+
* root -> TrieNode
|
259
|
+
*
|
260
|
+
* Returns a TrieNode representing the root of the Trie.
|
261
|
+
*
|
262
|
+
*/
|
219
263
|
static VALUE rb_trie_root(VALUE self) {
|
220
264
|
Trie *trie;
|
221
265
|
Data_Get_Struct(self, Trie, trie);
|
@@ -230,13 +274,61 @@ static VALUE rb_trie_root(VALUE self) {
|
|
230
274
|
return trie_node;
|
231
275
|
}
|
232
276
|
|
277
|
+
|
278
|
+
/*
|
279
|
+
* Document-class: TrieNode
|
280
|
+
*
|
281
|
+
* Represents a single node in the Trie. It can be used as a cursor to walk around the Trie.
|
282
|
+
* You can grab a TrieNode for the root of the Trie by using Trie#root.
|
283
|
+
*
|
284
|
+
*/
|
285
|
+
|
286
|
+
static VALUE rb_trie_node_alloc(VALUE klass) {
|
287
|
+
VALUE obj;
|
288
|
+
obj = Data_Wrap_Struct(klass, 0, trie_state_free, NULL);
|
289
|
+
return obj;
|
290
|
+
}
|
291
|
+
|
292
|
+
/* nodoc */
|
293
|
+
static VALUE rb_trie_node_initialize_copy(VALUE self, VALUE from) {
|
294
|
+
RDATA(self)->data = trie_state_clone(RDATA(from)->data);
|
295
|
+
|
296
|
+
rb_iv_set(self, "@state", rb_iv_get(from, "@state"));
|
297
|
+
rb_iv_set(self, "@full_state", rb_iv_get(from, "@full_state"));
|
298
|
+
|
299
|
+
return self;
|
300
|
+
}
|
301
|
+
|
302
|
+
/*
|
303
|
+
* call-seq:
|
304
|
+
* state -> single character
|
305
|
+
*
|
306
|
+
* Returns the letter that the TrieNode instance points to. So, if the node is pointing at the "e" in "monkeys", the state is "e".
|
307
|
+
*
|
308
|
+
*/
|
233
309
|
static VALUE rb_trie_node_get_state(VALUE self) {
|
234
310
|
return rb_iv_get(self, "@state");
|
235
311
|
}
|
312
|
+
|
313
|
+
/*
|
314
|
+
* call-seq:
|
315
|
+
* full_state -> string
|
316
|
+
*
|
317
|
+
* Returns the full string from the root of the Trie up to this node. So if the node pointing at the "e" in "monkeys",
|
318
|
+
* the full_state is "monke".
|
319
|
+
*
|
320
|
+
*/
|
236
321
|
static VALUE rb_trie_node_get_full_state(VALUE self) {
|
237
322
|
return rb_iv_get(self, "@full_state");
|
238
323
|
}
|
239
324
|
|
325
|
+
/*
|
326
|
+
* call-seq:
|
327
|
+
* walk!(letter) -> TrieNode
|
328
|
+
*
|
329
|
+
* Tries to walk down a particular branch of the Trie. It modifies the node it is called on.
|
330
|
+
*
|
331
|
+
*/
|
240
332
|
static VALUE rb_trie_node_walk_bang(VALUE self, VALUE rchar) {
|
241
333
|
TrieState *state;
|
242
334
|
Data_Get_Struct(self, TrieState, state);
|
@@ -256,6 +348,43 @@ static VALUE rb_trie_node_walk_bang(VALUE self, VALUE rchar) {
|
|
256
348
|
return Qnil;
|
257
349
|
}
|
258
350
|
|
351
|
+
/*
|
352
|
+
* call-seq:
|
353
|
+
* walk(letter) -> TrieNode
|
354
|
+
*
|
355
|
+
* Tries to walk down a particular branch of the Trie. It clones the node it is called on and
|
356
|
+
* walks with that one, leaving the original unchanged.
|
357
|
+
*
|
358
|
+
*/
|
359
|
+
static VALUE rb_trie_node_walk(VALUE self, VALUE rchar) {
|
360
|
+
VALUE new_node = rb_funcall(self, rb_intern("dup"), 0);
|
361
|
+
|
362
|
+
TrieState *state;
|
363
|
+
Data_Get_Struct(new_node, TrieState, state);
|
364
|
+
|
365
|
+
if(RSTRING(rchar)->len != 1)
|
366
|
+
return Qnil;
|
367
|
+
|
368
|
+
Bool result = trie_state_walk(state, *RSTRING(rchar)->ptr);
|
369
|
+
|
370
|
+
if(result) {
|
371
|
+
rb_iv_set(new_node, "@state", rchar);
|
372
|
+
VALUE full_state = rb_iv_get(new_node, "@full_state");
|
373
|
+
rb_str_append(full_state, rchar);
|
374
|
+
rb_iv_set(new_node, "@full_state", full_state);
|
375
|
+
return self;
|
376
|
+
} else
|
377
|
+
return Qnil;
|
378
|
+
}
|
379
|
+
|
380
|
+
/*
|
381
|
+
* call-seq:
|
382
|
+
* value
|
383
|
+
*
|
384
|
+
* Attempts to get the value at this node of the Trie. This only works if the node is a terminal
|
385
|
+
* (i.e. end of a key), otherwise it returns nil.
|
386
|
+
*
|
387
|
+
*/
|
259
388
|
static VALUE rb_trie_node_value(VALUE self) {
|
260
389
|
TrieState *state;
|
261
390
|
TrieState *dup;
|
@@ -267,9 +396,17 @@ static VALUE rb_trie_node_value(VALUE self) {
|
|
267
396
|
TrieData trie_data = trie_state_get_data(dup);
|
268
397
|
trie_state_free(dup);
|
269
398
|
|
270
|
-
return TRIE_DATA_ERROR == trie_data ? Qnil :
|
399
|
+
return TRIE_DATA_ERROR == trie_data ? Qnil : (VALUE)trie_data;
|
271
400
|
}
|
272
401
|
|
402
|
+
/*
|
403
|
+
* call-seq:
|
404
|
+
* terminal? -> true/false
|
405
|
+
*
|
406
|
+
* Returns true if this node is at the end of a key. So if you have two keys in your Trie, "he" and
|
407
|
+
* "hello", and you walk all the way to the end of "hello", the "e" and the "o" will return true for terminal?.
|
408
|
+
*
|
409
|
+
*/
|
273
410
|
static VALUE rb_trie_node_terminal(VALUE self) {
|
274
411
|
TrieState *state;
|
275
412
|
Data_Get_Struct(self, TrieState, state);
|
@@ -277,6 +414,12 @@ static VALUE rb_trie_node_terminal(VALUE self) {
|
|
277
414
|
return trie_state_is_terminal(state) ? Qtrue : Qnil;
|
278
415
|
}
|
279
416
|
|
417
|
+
/*
|
418
|
+
* call-seq:
|
419
|
+
* leaf? -> true/false
|
420
|
+
*
|
421
|
+
* Returns true if there are no branches at this node.
|
422
|
+
*/
|
280
423
|
static VALUE rb_trie_node_leaf(VALUE self) {
|
281
424
|
TrieState *state;
|
282
425
|
Data_Get_Struct(self, TrieState, state);
|
@@ -284,28 +427,10 @@ static VALUE rb_trie_node_leaf(VALUE self) {
|
|
284
427
|
return trie_state_is_leaf(state) ? Qtrue : Qnil;
|
285
428
|
}
|
286
429
|
|
287
|
-
static VALUE rb_trie_node_clone(VALUE self) {
|
288
|
-
TrieState *state;
|
289
|
-
Data_Get_Struct(self, TrieState, state);
|
290
|
-
|
291
|
-
VALUE new_node = rb_trie_node_alloc(cTrieNode);
|
292
|
-
|
293
|
-
TrieState *new_state = trie_state_clone(state);
|
294
|
-
|
295
|
-
RDATA(new_node)->data = new_state;
|
296
|
-
|
297
|
-
rb_iv_set(new_node, "@state", rb_iv_get(self, "@state"));
|
298
|
-
rb_iv_set(new_node, "@full_state", rb_iv_get(self, "@full_state"));
|
299
|
-
|
300
|
-
return new_node;
|
301
|
-
}
|
302
|
-
|
303
430
|
|
304
431
|
void Init_trie() {
|
305
432
|
cTrie = rb_define_class("Trie", rb_cObject);
|
306
433
|
rb_define_alloc_func(cTrie, rb_trie_alloc);
|
307
|
-
//rb_define_method(cTrie, "initialize", rb_trie_initialize, -2);
|
308
|
-
//rb_define_method(cTrie, "path", rb_trie_get_path, 0);
|
309
434
|
rb_define_method(cTrie, "has_key?", rb_trie_has_key, 1);
|
310
435
|
rb_define_method(cTrie, "get", rb_trie_get, 1);
|
311
436
|
rb_define_method(cTrie, "add", rb_trie_add, -2);
|
@@ -313,15 +438,15 @@ void Init_trie() {
|
|
313
438
|
rb_define_method(cTrie, "children", rb_trie_children, 1);
|
314
439
|
rb_define_method(cTrie, "children_with_values", rb_trie_children_with_values, 1);
|
315
440
|
rb_define_method(cTrie, "root", rb_trie_root, 0);
|
316
|
-
//rb_define_method(cTrie, "save", rb_trie_save, 0);
|
317
441
|
|
318
442
|
cTrieNode = rb_define_class("TrieNode", rb_cObject);
|
319
443
|
rb_define_alloc_func(cTrieNode, rb_trie_node_alloc);
|
444
|
+
rb_define_method(cTrieNode, "initialize_copy", rb_trie_node_initialize_copy, 1);
|
320
445
|
rb_define_method(cTrieNode, "state", rb_trie_node_get_state, 0);
|
321
446
|
rb_define_method(cTrieNode, "full_state", rb_trie_node_get_full_state, 0);
|
322
447
|
rb_define_method(cTrieNode, "walk!", rb_trie_node_walk_bang, 1);
|
448
|
+
rb_define_method(cTrieNode, "walk", rb_trie_node_walk, 1);
|
323
449
|
rb_define_method(cTrieNode, "value", rb_trie_node_value, 0);
|
324
450
|
rb_define_method(cTrieNode, "terminal?", rb_trie_node_terminal, 0);
|
325
451
|
rb_define_method(cTrieNode, "leaf?", rb_trie_node_leaf, 0);
|
326
|
-
rb_define_method(cTrieNode, "clone", rb_trie_node_clone, 0);
|
327
452
|
}
|
data/ext/trie/triedefs.h
CHANGED
@@ -8,7 +8,7 @@
|
|
8
8
|
#ifndef __TRIEDEFS_H
|
9
9
|
#define __TRIEDEFS_H
|
10
10
|
|
11
|
-
#include
|
11
|
+
#include "typedefs.h"
|
12
12
|
|
13
13
|
/**
|
14
14
|
* @file triedefs.h
|
@@ -60,7 +60,7 @@ typedef int32 TrieIndex;
|
|
60
60
|
/**
|
61
61
|
* @brief Type of value associated to trie entries
|
62
62
|
*/
|
63
|
-
typedef
|
63
|
+
typedef unsigned long TrieData;
|
64
64
|
/**
|
65
65
|
* @brief Trie error data
|
66
66
|
*/
|
data/ext/trie/typedefs.h
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
2
|
+
/*
|
3
|
+
* typedefs.h - general types
|
4
|
+
* Created : 11 Aug 2006
|
5
|
+
* Author : Theppitak Karoonboonyanan <thep@linux.thai.net>
|
6
|
+
*/
|
7
|
+
|
8
|
+
#ifndef __TYPEDEFS_H
|
9
|
+
#define __TYPEDEFS_H
|
10
|
+
|
11
|
+
#include <limits.h>
|
12
|
+
|
13
|
+
typedef enum { FALSE = 0, TRUE = 1 } Bool;
|
14
|
+
|
15
|
+
# if UCHAR_MAX == 0xff
|
16
|
+
# ifndef UINT8_TYPEDEF
|
17
|
+
# define UINT8_TYPEDEF
|
18
|
+
typedef unsigned char uint8;
|
19
|
+
# endif /* UINT8_TYPEDEF */
|
20
|
+
# endif /* UCHAR_MAX */
|
21
|
+
|
22
|
+
# if SCHAR_MAX == 0x7f
|
23
|
+
# ifndef INT8_TYPEDEF
|
24
|
+
# define INT8_TYPEDEF
|
25
|
+
typedef signed char int8;
|
26
|
+
# endif /* INT8_TYPEDEF */
|
27
|
+
# endif /* SCHAR_MAX */
|
28
|
+
|
29
|
+
# if UINT_MAX == 0xffff
|
30
|
+
# ifndef UINT16_TYPEDEF
|
31
|
+
# define UINT16_TYPEDEF
|
32
|
+
typedef unsigned int uint16;
|
33
|
+
# endif /* UINT16_TYPEDEF */
|
34
|
+
# endif /* UINT_MAX */
|
35
|
+
|
36
|
+
# if INT_MAX == 0x7fff
|
37
|
+
# ifndef INT16_TYPEDEF
|
38
|
+
# define INT16_TYPEDEF
|
39
|
+
typedef int int16;
|
40
|
+
# endif /* INT16_TYPEDEF */
|
41
|
+
# endif /* INT_MAX */
|
42
|
+
|
43
|
+
# if USHRT_MAX == 0xffff
|
44
|
+
# ifndef UINT16_TYPEDEF
|
45
|
+
# define UINT16_TYPEDEF
|
46
|
+
typedef unsigned short uint16;
|
47
|
+
# endif /* UINT16_TYPEDEF */
|
48
|
+
# endif /* USHRT_MAX */
|
49
|
+
|
50
|
+
# if SHRT_MAX == 0x7fff
|
51
|
+
# ifndef INT16_TYPEDEF
|
52
|
+
# define INT16_TYPEDEF
|
53
|
+
typedef short int16;
|
54
|
+
# endif /* INT16_TYPEDEF */
|
55
|
+
# endif /* SHRT_MAX */
|
56
|
+
|
57
|
+
# if UINT_MAX == 0xffffffff
|
58
|
+
# ifndef UINT32_TYPEDEF
|
59
|
+
# define UINT32_TYPEDEF
|
60
|
+
typedef unsigned int uint32;
|
61
|
+
# endif /* UINT32_TYPEDEF */
|
62
|
+
# endif /* UINT_MAX */
|
63
|
+
|
64
|
+
# if INT_MAX == 0x7fffffff
|
65
|
+
# ifndef INT32_TYPEDEF
|
66
|
+
# define INT32_TYPEDEF
|
67
|
+
typedef int int32;
|
68
|
+
# endif /* INT32_TYPEDEF */
|
69
|
+
# endif /* INT_MAX */
|
70
|
+
|
71
|
+
# if ULONG_MAX == 0xffffffff
|
72
|
+
# ifndef UINT32_TYPEDEF
|
73
|
+
# define UINT32_TYPEDEF
|
74
|
+
typedef unsigned long uint32;
|
75
|
+
# endif /* UINT32_TYPEDEF */
|
76
|
+
# endif /* ULONG_MAX */
|
77
|
+
|
78
|
+
# if LONG_MAX == 0x7fffffff
|
79
|
+
# ifndef INT32_TYPEDEF
|
80
|
+
# define INT32_TYPEDEF
|
81
|
+
typedef long int32;
|
82
|
+
# endif /* INT32_TYPEDEF */
|
83
|
+
# endif /* LONG_MAX */
|
84
|
+
|
85
|
+
# ifndef UINT8_TYPEDEF
|
86
|
+
# error "uint8 type is undefined!"
|
87
|
+
# endif
|
88
|
+
# ifndef INT8_TYPEDEF
|
89
|
+
# error "int8 type is undefined!"
|
90
|
+
# endif
|
91
|
+
# ifndef UINT16_TYPEDEF
|
92
|
+
# error "uint16 type is undefined!"
|
93
|
+
# endif
|
94
|
+
# ifndef INT16_TYPEDEF
|
95
|
+
# error "int16 type is undefined!"
|
96
|
+
# endif
|
97
|
+
# ifndef UINT32_TYPEDEF
|
98
|
+
# error "uint32 type is undefined!"
|
99
|
+
# endif
|
100
|
+
# ifndef INT32_TYPEDEF
|
101
|
+
# error "int32 type is undefined!"
|
102
|
+
# endif
|
103
|
+
|
104
|
+
typedef uint8 byte;
|
105
|
+
typedef uint16 word;
|
106
|
+
typedef uint32 dword;
|
107
|
+
|
108
|
+
|
109
|
+
#endif /* __TYPEDEFS_H */
|
110
|
+
|
111
|
+
/*
|
112
|
+
vi:ts=4:ai:expandtab
|
113
|
+
*/
|
data/spec/trie_spec.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/../ext/trie'
|
1
|
+
require File.dirname(__FILE__) + '/../ext/trie/trie'
|
2
2
|
|
3
3
|
describe Trie do
|
4
4
|
before :each do
|
@@ -49,6 +49,11 @@ describe Trie do
|
|
49
49
|
@trie.add('chicka', 72_000).should == true
|
50
50
|
@trie.get('chicka').should == 72_000
|
51
51
|
end
|
52
|
+
|
53
|
+
it 'adds a word with a non-numeric value to the trie' do
|
54
|
+
@trie.add('doot', 'Heeey').should == true
|
55
|
+
@trie.get('doot').should == 'Heeey'
|
56
|
+
end
|
52
57
|
end
|
53
58
|
|
54
59
|
describe :delete do
|
@@ -194,6 +199,18 @@ describe TrieNode do
|
|
194
199
|
end
|
195
200
|
end
|
196
201
|
|
202
|
+
describe :walk do
|
203
|
+
it 'returns a new node object when the walk succeeds' do
|
204
|
+
other = @node.walk('r')
|
205
|
+
other.should != @node
|
206
|
+
end
|
207
|
+
|
208
|
+
it 'returns nil when the walk fails' do
|
209
|
+
@node.walk('q').should be_nil
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
|
197
214
|
describe :value do
|
198
215
|
it 'returns nil when the node is not terminal' do
|
199
216
|
@node.walk!('r')
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tyler-trie
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tyler McMullen
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-04-
|
12
|
+
date: 2009-04-18 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
@@ -40,11 +40,19 @@ files:
|
|
40
40
|
- ext/trie/trie.c
|
41
41
|
- ext/trie/trie.h
|
42
42
|
- ext/trie/triedefs.h
|
43
|
-
|
43
|
+
- ext/trie/typedefs.h
|
44
|
+
has_rdoc: true
|
44
45
|
homepage: http://github.com/tyler/trie
|
45
46
|
post_install_message:
|
46
|
-
rdoc_options:
|
47
|
-
|
47
|
+
rdoc_options:
|
48
|
+
- --title
|
49
|
+
- Trie
|
50
|
+
- --line-numbers
|
51
|
+
- --op
|
52
|
+
- rdoc
|
53
|
+
- --main
|
54
|
+
- ext/trie/trie.c
|
55
|
+
- README
|
48
56
|
require_paths:
|
49
57
|
- ext
|
50
58
|
- lib
|