RubyTrie 1.0

Sign up to get free protection for your applications and to get access to all the features.
data/ChangeLog ADDED
@@ -0,0 +1 @@
1
+ 1.0 added children and each methods
data/README ADDED
@@ -0,0 +1,55 @@
1
+ A trie data structure implementation for ruby. Currently, it's a pretty minimal implementation. I've only implemented Trie#[], Trie#[]=, and Trie#delete so far. It's also fairly naive: There's plenty of room for optimization I'm sure, especially in key character sibling storage. (Currently a just linked list.) However, it does manage to be a bit faster than a native ruby Hash for similar keys.
2
+
3
+ == Usage
4
+
5
+ Compile the extension:
6
+
7
+ ruby extconf.rb
8
+ make
9
+
10
+ Code:
11
+
12
+ require 'trie'
13
+
14
+ t = Trie.new
15
+ t["key"] = "value"
16
+ t["key"] # => "value"
17
+
18
+ t.children("partial key") will return all values for all keys that match the partial key
19
+
20
+ t["go"] = 1
21
+ t["goes"] = 2
22
+ t["gone"] = 3
23
+ t["other"] = 4
24
+
25
+ t.children("go") => [1,2,3]
26
+
27
+ t.each("partial key") will yield to the given block all values that are matched by the partial key
28
+ t.each("go") {|v| puts v } => prints 1, 2 & 3
29
+
30
+ == Bugs
31
+
32
+ I'm sure there are plenty!
33
+
34
+ == License
35
+
36
+ This software is copyright (c) 2008 Matt Freels
37
+
38
+ Permission is hereby granted, free of charge, to any person obtaining
39
+ a copy of this software and associated documentation files (the
40
+ "Software"), to deal in the Software without restriction, including
41
+ without limitation the rights to use, copy, modify, merge, publish,
42
+ distribute, sublicense, and/or sell copies of the Software, and to
43
+ permit persons to whom the Software is furnished to do so, subject to
44
+ the following conditions:
45
+
46
+ The above copyright notice and this permission notice shall be
47
+ included in all copies or substantial portions of the Software.
48
+
49
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
50
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
51
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
52
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
53
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
54
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
55
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,5 @@
1
+ require 'mkmf'
2
+
3
+ extension_name = 'trie'
4
+ dir_config(extension_name)
5
+ create_makefile(extension_name)
data/ext/trie/trie.c ADDED
@@ -0,0 +1,332 @@
1
+ #include <stdlib.h> /* for malloc, free */
2
+ #include <string.h> /* for memcmp, memmove */
3
+ #include "ruby.h"
4
+
5
+ // typdefs!
6
+ typedef enum { false = 0, true} bool;
7
+
8
+ typedef struct node {
9
+ char character;
10
+ VALUE value;
11
+ struct node * first_child;
12
+ struct node * next_sibling;
13
+ } trie_node;
14
+
15
+ static VALUE rb_cTrie;
16
+
17
+ // =========================
18
+ // = function declarations =
19
+ // =========================
20
+
21
+ //trie implementation
22
+ static trie_node * trie_node_for_key(trie_node * root, char * key, bool create_missing_nodes);
23
+ static trie_node * trie_sibling_for_char(trie_node * node, char ch);
24
+ static trie_node * trie_add_sibling_for_char(trie_node * node, char ch);
25
+ static trie_node * trie_new_node_with_char(char ch);
26
+ static trie_node * trie_new_node();
27
+ static VALUE rb_trie_find_children(VALUE self, VALUE key);
28
+ static VALUE rb_trie_find_children_with_block(VALUE self, VALUE key);
29
+ static void trie_collect_values(void * t, VALUE prary);
30
+ static void trie_collect_values_with_yield(void * t);
31
+ static void trie_traverse(trie_node * trie, void (*lambda_func)(void *));
32
+ static void trie_traverse_with_context(trie_node * trie, VALUE context, void (*lambda_func)(void *, VALUE));
33
+ static void free_trie(trie_node * trie);
34
+ static void count_nodes_callback(void *n, VALUE accum);
35
+ static VALUE rb_trie_count_nodes(VALUE self);
36
+
37
+
38
+ // ========================
39
+ // = function definitions =
40
+ // ========================
41
+
42
+ // instance methods
43
+ static VALUE rb_trie_get_key(VALUE self, VALUE key) {
44
+ trie_node * root;
45
+ trie_node * node;
46
+ char * key_cstring;
47
+
48
+ //Check_Type(key, T_STRING);
49
+ key_cstring = StringValuePtr(key);
50
+
51
+ Data_Get_Struct(self, trie_node, root);
52
+
53
+ node = trie_node_for_key(root, key_cstring, false);
54
+ if (node == NULL) return Qnil;
55
+ return node->value;
56
+ }
57
+
58
+ static VALUE rb_trie_set_key_to_value(VALUE self, VALUE key, VALUE value) {
59
+ trie_node * root;
60
+ trie_node * node;
61
+ char * key_cstring;
62
+
63
+ //Check_Type(key, T_STRING);
64
+ key_cstring = StringValuePtr(key);
65
+
66
+ Data_Get_Struct(self, trie_node, root);
67
+
68
+ node = trie_node_for_key(root, key_cstring, true);
69
+ node->value = value;
70
+
71
+ return Qnil;
72
+ }
73
+
74
+ static uint mem_count = 0;
75
+
76
+ static void count_nodes_callback(void *n, VALUE accum) {
77
+ trie_node *node = (trie_node*)n;
78
+ // rb_big_plus(accum, rb_uint2big(sizeof(*node)));
79
+ mem_count+=sizeof(*node);
80
+ }
81
+
82
+ static VALUE rb_trie_count_nodes(VALUE self) {
83
+ trie_node *root;
84
+ Data_Get_Struct(self, trie_node, root);
85
+ VALUE accum = rb_uint2big(0);
86
+ mem_count = 0;
87
+ trie_traverse_with_context(root, accum, count_nodes_callback);
88
+ return rb_uint2big(mem_count);
89
+ }
90
+
91
+ static VALUE rb_trie_undef_key(VALUE self, VALUE key) {
92
+ trie_node * root, * node, * prev, * next;
93
+ VALUE return_value;
94
+ char * key_cstring;
95
+ int steps;
96
+ int i;
97
+
98
+ //Check_Type(key, T_STRING);
99
+ key_cstring = StringValuePtr(key);
100
+
101
+ Data_Get_Struct(self, trie_node, root);
102
+ next = root;
103
+ node = NULL;
104
+ prev = NULL;
105
+
106
+ steps = strlen(key_cstring);
107
+
108
+ for (i = 0; i < steps; i++) {
109
+ if (next == NULL) return Qnil;
110
+
111
+ while(next->character != key_cstring[i]) {
112
+ if (next == NULL) return Qnil;
113
+ next = next->next_sibling;
114
+ }
115
+ prev = node;
116
+ node = next;
117
+ next = node->first_child;
118
+ }
119
+
120
+ return_value = node->value;
121
+ node->value = Qnil;
122
+
123
+ if (node->first_child == NULL) { //node has no children. we can delete it.
124
+ if (prev == NULL) {
125
+ //printf("should delete root");
126
+ } else if (prev->first_child == node) {
127
+ prev->first_child = node->next_sibling;
128
+ free(node);
129
+ } else if (prev->next_sibling == node) {
130
+ prev->next_sibling = node->next_sibling;
131
+ free(node);
132
+ }
133
+ }
134
+
135
+ return return_value;
136
+ }
137
+
138
+ // garbage collection and allocation
139
+ static void trie_mark_value(void * t) {
140
+ rb_gc_mark( ((trie_node *)t)->value );
141
+ }
142
+
143
+ static void rb_trie_mark(trie_node * t) {
144
+ trie_traverse(t, trie_mark_value);
145
+ }
146
+
147
+ static void rb_trie_free(trie_node * t) {
148
+ free_trie(t);
149
+ }
150
+
151
+ static VALUE rb_trie_allocate (VALUE klass) {
152
+ trie_node * t = trie_new_node();
153
+
154
+ return Data_Wrap_Struct(klass, rb_trie_mark, rb_trie_free, t);
155
+ }
156
+
157
+ // extension init
158
+ void Init_trie() {
159
+ rb_cTrie = rb_define_class("Trie", rb_cObject);
160
+
161
+ rb_define_alloc_func (rb_cTrie, rb_trie_allocate);
162
+
163
+ int arg_count = 0;
164
+ //rb_define_method(rb_cTrie, "inspect", rb_trie_inspect, arg_count);
165
+ rb_define_method(rb_cTrie, "memory", rb_trie_count_nodes, arg_count);
166
+
167
+ arg_count = 1;
168
+ rb_define_method(rb_cTrie, "[]", rb_trie_get_key, arg_count);
169
+ rb_define_method(rb_cTrie, "delete", rb_trie_undef_key, arg_count);
170
+ rb_define_method(rb_cTrie, "children", rb_trie_find_children, arg_count);
171
+ rb_define_method(rb_cTrie, "each", rb_trie_find_children_with_block, arg_count);
172
+
173
+ arg_count = 2;
174
+ rb_define_method(rb_cTrie, "[]=", rb_trie_set_key_to_value, arg_count);
175
+ }
176
+
177
+
178
+ // =======================
179
+ // = trie implementation =
180
+ // =======================
181
+
182
+ static trie_node * trie_node_for_key(trie_node * root, char * key, bool create_missing_nodes) {
183
+ int steps, i;
184
+ trie_node * next, * node;
185
+
186
+ steps = strlen(key);
187
+ next = root;
188
+
189
+ for (i = 0; i < steps; i++) {
190
+ if (next == NULL) {
191
+ if (create_missing_nodes) {
192
+ node->first_child = trie_new_node();
193
+ next = node->first_child;
194
+ }
195
+ else return NULL;
196
+ }
197
+
198
+ node = trie_sibling_for_char(next, key[i]);
199
+
200
+ if (node == NULL) {
201
+ if (create_missing_nodes) {
202
+ node = trie_add_sibling_for_char(next, key[i]);
203
+ }
204
+ else return NULL;
205
+ }
206
+
207
+ next = node->first_child;
208
+ }
209
+
210
+ return node;
211
+ }
212
+
213
+ static void trie_collect_values(void * t, VALUE rary) {
214
+ trie_node *node = (trie_node*)t;
215
+ if (node->value != Qnil) {
216
+ rb_ary_push(rary, node->value);
217
+ }
218
+ }
219
+
220
+ static void trie_collect_values_with_yield(void * t) {
221
+ trie_node *node = (trie_node*)t;
222
+ if (node->value != Qnil) {
223
+ // rb_ary_push(rary, node->value);
224
+ rb_yield(node->value);
225
+ }
226
+ }
227
+
228
+ static VALUE rb_trie_find_children(VALUE self, VALUE key) {
229
+ trie_node * root;
230
+ trie_node * node;
231
+ char * key_cstring;
232
+ VALUE rary = rb_ary_new();
233
+
234
+ key_cstring = StringValuePtr(key);
235
+ Data_Get_Struct(self, trie_node, root);
236
+
237
+ node = trie_node_for_key(root, key_cstring, false);
238
+
239
+ if (node != NULL && node->value != Qnil) {
240
+ rb_ary_push(rary, node->value);
241
+ }
242
+
243
+ if (node == NULL || node->first_child == NULL) return rary;
244
+
245
+ trie_traverse_with_context(node->first_child, rary, trie_collect_values);
246
+ return rary;
247
+ }
248
+
249
+
250
+ static VALUE rb_trie_find_children_with_block(VALUE self, VALUE key) {
251
+ trie_node * root;
252
+ trie_node * node;
253
+ char * key_cstring;
254
+ VALUE rary = rb_ary_new();
255
+
256
+ key_cstring = StringValuePtr(key);
257
+ Data_Get_Struct(self, trie_node, root);
258
+
259
+ node = trie_node_for_key(root, key_cstring, false);
260
+
261
+ if (node != NULL && node->value != Qnil) {
262
+ rb_yield(node->value);
263
+ }
264
+
265
+ if (node == NULL || node->first_child == NULL) return rary;
266
+
267
+ trie_traverse(node->first_child, trie_collect_values_with_yield);
268
+ return rary;
269
+ }
270
+
271
+ static trie_node * trie_sibling_for_char(trie_node * node, char ch) {
272
+ while(true) {
273
+ if (node == NULL) return NULL;
274
+
275
+ if (node->character == ch) return node;
276
+
277
+ node = node->next_sibling;
278
+ }
279
+ return node;
280
+ }
281
+
282
+ static trie_node * trie_add_sibling_for_char(trie_node * node, char ch) {
283
+ trie_node * current_next;
284
+
285
+ current_next = node->next_sibling;
286
+ node->next_sibling = trie_new_node_with_char(ch);
287
+ node->next_sibling->next_sibling = current_next;
288
+
289
+ return node->next_sibling;
290
+ }
291
+
292
+ static trie_node * trie_new_node_with_char(char ch) {
293
+ trie_node * trie;
294
+ trie = malloc(sizeof(trie_node));
295
+ trie->character = ch;
296
+ trie->value = Qnil;
297
+ trie->first_child = NULL;
298
+ trie->next_sibling = NULL;
299
+ return trie;
300
+ }
301
+
302
+ static trie_node * trie_new_node() {
303
+ return trie_new_node_with_char('s'); //insert most common starting letter here.
304
+ }
305
+
306
+ static void trie_traverse(trie_node * trie, void (* lambda_func)(void *)) {
307
+ if (trie->next_sibling != NULL) {
308
+ trie_traverse(trie->next_sibling, lambda_func);
309
+ }
310
+
311
+ if (trie->first_child != NULL) {
312
+ trie_traverse(trie->first_child, lambda_func);
313
+ }
314
+
315
+ lambda_func(trie);
316
+ }
317
+
318
+ static void trie_traverse_with_context(trie_node * trie, VALUE context, void (*lambda_func)(void *, VALUE)) {
319
+ if (trie->next_sibling != NULL) {
320
+ trie_traverse_with_context(trie->next_sibling, context, lambda_func);
321
+ }
322
+
323
+ if (trie->first_child != NULL) {
324
+ trie_traverse_with_context(trie->first_child, context, lambda_func);
325
+ }
326
+
327
+ lambda_func(trie, context);
328
+ }
329
+
330
+ static void free_trie(trie_node * trie) {
331
+ trie_traverse(trie, free);
332
+ }
data/lib/ruby_trie.rb ADDED
@@ -0,0 +1 @@
1
+ require 'trie'
data/test/rubytrie.rb ADDED
@@ -0,0 +1,78 @@
1
+ class RubyTrie
2
+ def initialize(default = nil)
3
+ @first_child = nil
4
+ @default = default
5
+ end
6
+
7
+ def [](key)
8
+ raise "key must be a string" unless key.is_a? String
9
+
10
+ next_node = @first_child
11
+ node = nil
12
+
13
+ 0.upto(key.length - 1) do |i|
14
+ return nil if next_node.nil?
15
+ char = key[i]
16
+ node = next_node.sibling_for_char(char)
17
+ return nil if node.nil?
18
+ next_node = node.first_child
19
+ end
20
+
21
+ return node.value
22
+ end
23
+
24
+ def []=(key, val)
25
+ raise "key must be a string" unless key.is_a? String
26
+
27
+ if @first_child.nil?
28
+ @first_child = TrieNode.new(key[0])
29
+ end
30
+
31
+ next_node = @first_child
32
+ node = nil
33
+
34
+ 0.upto(key.length - 1) do |i|
35
+ char = key[i]
36
+ node = next_node.sibling_for_char(char)
37
+ node = next_node.add_sibling(char) if node.nil?
38
+ next_node = node.first_child
39
+ next_node = node.add_child(char) if next_node.nil? && i != key.length - 1
40
+ end
41
+
42
+ node.value = val
43
+ return self
44
+ end
45
+ end
46
+
47
+ class TrieNode
48
+ attr_accessor :char, :next_sibling, :first_child, :value
49
+
50
+ def initialize(char)
51
+ @next_sibling, @first_child, @value = nil
52
+ @char = char
53
+ end
54
+
55
+ def sibling_for_char(char)
56
+ node = self
57
+ until node.nil?
58
+ return node if node.char == char
59
+ node = node.next_sibling
60
+ end
61
+ return nil
62
+ end
63
+
64
+ def add_sibling(char)
65
+ node = self
66
+
67
+ until node.next_sibling.nil?
68
+ node = node.next_sibling
69
+ end
70
+ node.next_sibling = TrieNode.new(char)
71
+ return node.next_sibling
72
+ end
73
+
74
+ def add_child(char)
75
+ self.first_child = TrieNode.new(char)
76
+ return self.first_child
77
+ end
78
+ end
data/test/trie_test.rb ADDED
@@ -0,0 +1,60 @@
1
+ $:.unshift File.dirname(__FILE__) + '/../lib'
2
+ $:.unshift File.dirname(__FILE__) + '/../ext/trie'
3
+
4
+ require 'trie'
5
+
6
+ max = 1000000
7
+
8
+ def time
9
+ starttime = Time.now
10
+ res = yield
11
+ endtime = Time.now
12
+ puts "Time elapsed: #{endtime - starttime} seconds"
13
+ res
14
+ end
15
+
16
+ puts "With a native Trie..."
17
+ t = Trie.new
18
+ time do
19
+ 1.upto(max) do |i|
20
+ t["item #{i}"] = "sweet"
21
+ end
22
+
23
+ 1.upto(max) do |i|
24
+ t["item #{i}"].class
25
+ end
26
+
27
+ 1.upto(max) do |i|
28
+ t.delete("item #{i}")
29
+ end
30
+ end
31
+
32
+
33
+ puts "With a Hash..."
34
+ t = Hash.new
35
+ time do
36
+ 1.upto(max) do |i|
37
+ t["item #{i}"] = "sweet"
38
+ end
39
+
40
+ 1.upto(max) do |i|
41
+ t["item #{i}"].class
42
+ end
43
+
44
+ 1.upto(max) do |i|
45
+ t.delete("item #{i}")
46
+ end
47
+ end
48
+
49
+ # puts "With a Ruby Trie..."
50
+ # t = RubyTrie.new
51
+ # time do
52
+ # 1.upto(max) do |i|
53
+ # t["item #{i}"] = "sweet"
54
+ # end
55
+ #
56
+ # 1.upto(max) do |i|
57
+ # t["item #{i}"].class
58
+ # end
59
+ # end
60
+
metadata ADDED
@@ -0,0 +1,77 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: RubyTrie
3
+ version: !ruby/object:Gem::Version
4
+ hash: 15
5
+ prerelease: false
6
+ segments:
7
+ - 1
8
+ - 0
9
+ version: "1.0"
10
+ platform: ruby
11
+ authors:
12
+ - Matt Freels
13
+ - Petrica Ghiurca
14
+ autorequire: trie
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-09-30 00:00:00 +03:00
19
+ default_executable:
20
+ dependencies: []
21
+
22
+ description:
23
+ email:
24
+ executables: []
25
+
26
+ extensions:
27
+ - ext/trie/extconf.rb
28
+ extra_rdoc_files:
29
+ - README
30
+ files:
31
+ - README
32
+ - ChangeLog
33
+ - ext/trie/trie.c
34
+ - ext/trie/extconf.rb
35
+ - lib/ruby_trie.rb
36
+ - test/rubytrie.rb
37
+ - test/trie_test.rb
38
+ has_rdoc: true
39
+ homepage:
40
+ licenses: []
41
+
42
+ post_install_message:
43
+ rdoc_options:
44
+ - --title
45
+ - RubyTrie
46
+ - --main
47
+ - README
48
+ - --line-numbers
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ hash: 3
57
+ segments:
58
+ - 0
59
+ version: "0"
60
+ required_rubygems_version: !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ hash: 3
66
+ segments:
67
+ - 0
68
+ version: "0"
69
+ requirements: []
70
+
71
+ rubyforge_project:
72
+ rubygems_version: 1.3.7
73
+ signing_key:
74
+ specification_version: 3
75
+ summary: Set a Timeout based on signals, which are more reliable than Timeout. Timeout is based on green threads.
76
+ test_files:
77
+ - test/trie_test.rb