RubyTrie 1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ChangeLog ADDED
@@ -0,0 +1 @@
1
+ 1.0 added children and each methods
data/README ADDED
@@ -0,0 +1,55 @@
1
+ A trie data structure implementation for ruby. Currently, it's a pretty minimal implementation. I've only implemented Trie#[], Trie#[]=, and Trie#delete so far. It's also fairly naive: There's plenty of room for optimization I'm sure, especially in key character sibling storage. (Currently a just linked list.) However, it does manage to be a bit faster than a native ruby Hash for similar keys.
2
+
3
+ == Usage
4
+
5
+ Compile the extension:
6
+
7
+ ruby extconf.rb
8
+ make
9
+
10
+ Code:
11
+
12
+ require 'trie'
13
+
14
+ t = Trie.new
15
+ t["key"] = "value"
16
+ t["key"] # => "value"
17
+
18
+ t.children("partial key") will return all values for all keys that match the partial key
19
+
20
+ t["go"] = 1
21
+ t["goes"] = 2
22
+ t["gone"] = 3
23
+ t["other"] = 4
24
+
25
+ t.children("go") => [1,2,3]
26
+
27
+ t.each("partial key") will yield to the given block all values that are matched by the partial key
28
+ t.each("go") {|v| puts v } => prints 1, 2 & 3
29
+
30
+ == Bugs
31
+
32
+ I'm sure there are plenty!
33
+
34
+ == License
35
+
36
+ This software is copyright (c) 2008 Matt Freels
37
+
38
+ Permission is hereby granted, free of charge, to any person obtaining
39
+ a copy of this software and associated documentation files (the
40
+ "Software"), to deal in the Software without restriction, including
41
+ without limitation the rights to use, copy, modify, merge, publish,
42
+ distribute, sublicense, and/or sell copies of the Software, and to
43
+ permit persons to whom the Software is furnished to do so, subject to
44
+ the following conditions:
45
+
46
+ The above copyright notice and this permission notice shall be
47
+ included in all copies or substantial portions of the Software.
48
+
49
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
50
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
51
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
52
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
53
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
54
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
55
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,5 @@
1
+ require 'mkmf'
2
+
3
+ extension_name = 'trie'
4
+ dir_config(extension_name)
5
+ create_makefile(extension_name)
data/ext/trie/trie.c ADDED
@@ -0,0 +1,332 @@
1
+ #include <stdlib.h> /* for malloc, free */
2
+ #include <string.h> /* for memcmp, memmove */
3
+ #include "ruby.h"
4
+
5
+ // typdefs!
6
+ typedef enum { false = 0, true} bool;
7
+
8
+ typedef struct node {
9
+ char character;
10
+ VALUE value;
11
+ struct node * first_child;
12
+ struct node * next_sibling;
13
+ } trie_node;
14
+
15
+ static VALUE rb_cTrie;
16
+
17
+ // =========================
18
+ // = function declarations =
19
+ // =========================
20
+
21
+ //trie implementation
22
+ static trie_node * trie_node_for_key(trie_node * root, char * key, bool create_missing_nodes);
23
+ static trie_node * trie_sibling_for_char(trie_node * node, char ch);
24
+ static trie_node * trie_add_sibling_for_char(trie_node * node, char ch);
25
+ static trie_node * trie_new_node_with_char(char ch);
26
+ static trie_node * trie_new_node();
27
+ static VALUE rb_trie_find_children(VALUE self, VALUE key);
28
+ static VALUE rb_trie_find_children_with_block(VALUE self, VALUE key);
29
+ static void trie_collect_values(void * t, VALUE prary);
30
+ static void trie_collect_values_with_yield(void * t);
31
+ static void trie_traverse(trie_node * trie, void (*lambda_func)(void *));
32
+ static void trie_traverse_with_context(trie_node * trie, VALUE context, void (*lambda_func)(void *, VALUE));
33
+ static void free_trie(trie_node * trie);
34
+ static void count_nodes_callback(void *n, VALUE accum);
35
+ static VALUE rb_trie_count_nodes(VALUE self);
36
+
37
+
38
+ // ========================
39
+ // = function definitions =
40
+ // ========================
41
+
42
+ // instance methods
43
+ static VALUE rb_trie_get_key(VALUE self, VALUE key) {
44
+ trie_node * root;
45
+ trie_node * node;
46
+ char * key_cstring;
47
+
48
+ //Check_Type(key, T_STRING);
49
+ key_cstring = StringValuePtr(key);
50
+
51
+ Data_Get_Struct(self, trie_node, root);
52
+
53
+ node = trie_node_for_key(root, key_cstring, false);
54
+ if (node == NULL) return Qnil;
55
+ return node->value;
56
+ }
57
+
58
+ static VALUE rb_trie_set_key_to_value(VALUE self, VALUE key, VALUE value) {
59
+ trie_node * root;
60
+ trie_node * node;
61
+ char * key_cstring;
62
+
63
+ //Check_Type(key, T_STRING);
64
+ key_cstring = StringValuePtr(key);
65
+
66
+ Data_Get_Struct(self, trie_node, root);
67
+
68
+ node = trie_node_for_key(root, key_cstring, true);
69
+ node->value = value;
70
+
71
+ return Qnil;
72
+ }
73
+
74
+ static uint mem_count = 0;
75
+
76
+ static void count_nodes_callback(void *n, VALUE accum) {
77
+ trie_node *node = (trie_node*)n;
78
+ // rb_big_plus(accum, rb_uint2big(sizeof(*node)));
79
+ mem_count+=sizeof(*node);
80
+ }
81
+
82
+ static VALUE rb_trie_count_nodes(VALUE self) {
83
+ trie_node *root;
84
+ Data_Get_Struct(self, trie_node, root);
85
+ VALUE accum = rb_uint2big(0);
86
+ mem_count = 0;
87
+ trie_traverse_with_context(root, accum, count_nodes_callback);
88
+ return rb_uint2big(mem_count);
89
+ }
90
+
91
+ static VALUE rb_trie_undef_key(VALUE self, VALUE key) {
92
+ trie_node * root, * node, * prev, * next;
93
+ VALUE return_value;
94
+ char * key_cstring;
95
+ int steps;
96
+ int i;
97
+
98
+ //Check_Type(key, T_STRING);
99
+ key_cstring = StringValuePtr(key);
100
+
101
+ Data_Get_Struct(self, trie_node, root);
102
+ next = root;
103
+ node = NULL;
104
+ prev = NULL;
105
+
106
+ steps = strlen(key_cstring);
107
+
108
+ for (i = 0; i < steps; i++) {
109
+ if (next == NULL) return Qnil;
110
+
111
+ while(next->character != key_cstring[i]) {
112
+ if (next == NULL) return Qnil;
113
+ next = next->next_sibling;
114
+ }
115
+ prev = node;
116
+ node = next;
117
+ next = node->first_child;
118
+ }
119
+
120
+ return_value = node->value;
121
+ node->value = Qnil;
122
+
123
+ if (node->first_child == NULL) { //node has no children. we can delete it.
124
+ if (prev == NULL) {
125
+ //printf("should delete root");
126
+ } else if (prev->first_child == node) {
127
+ prev->first_child = node->next_sibling;
128
+ free(node);
129
+ } else if (prev->next_sibling == node) {
130
+ prev->next_sibling = node->next_sibling;
131
+ free(node);
132
+ }
133
+ }
134
+
135
+ return return_value;
136
+ }
137
+
138
+ // garbage collection and allocation
139
+ static void trie_mark_value(void * t) {
140
+ rb_gc_mark( ((trie_node *)t)->value );
141
+ }
142
+
143
+ static void rb_trie_mark(trie_node * t) {
144
+ trie_traverse(t, trie_mark_value);
145
+ }
146
+
147
+ static void rb_trie_free(trie_node * t) {
148
+ free_trie(t);
149
+ }
150
+
151
+ static VALUE rb_trie_allocate (VALUE klass) {
152
+ trie_node * t = trie_new_node();
153
+
154
+ return Data_Wrap_Struct(klass, rb_trie_mark, rb_trie_free, t);
155
+ }
156
+
157
+ // extension init
158
+ void Init_trie() {
159
+ rb_cTrie = rb_define_class("Trie", rb_cObject);
160
+
161
+ rb_define_alloc_func (rb_cTrie, rb_trie_allocate);
162
+
163
+ int arg_count = 0;
164
+ //rb_define_method(rb_cTrie, "inspect", rb_trie_inspect, arg_count);
165
+ rb_define_method(rb_cTrie, "memory", rb_trie_count_nodes, arg_count);
166
+
167
+ arg_count = 1;
168
+ rb_define_method(rb_cTrie, "[]", rb_trie_get_key, arg_count);
169
+ rb_define_method(rb_cTrie, "delete", rb_trie_undef_key, arg_count);
170
+ rb_define_method(rb_cTrie, "children", rb_trie_find_children, arg_count);
171
+ rb_define_method(rb_cTrie, "each", rb_trie_find_children_with_block, arg_count);
172
+
173
+ arg_count = 2;
174
+ rb_define_method(rb_cTrie, "[]=", rb_trie_set_key_to_value, arg_count);
175
+ }
176
+
177
+
178
+ // =======================
179
+ // = trie implementation =
180
+ // =======================
181
+
182
+ static trie_node * trie_node_for_key(trie_node * root, char * key, bool create_missing_nodes) {
183
+ int steps, i;
184
+ trie_node * next, * node;
185
+
186
+ steps = strlen(key);
187
+ next = root;
188
+
189
+ for (i = 0; i < steps; i++) {
190
+ if (next == NULL) {
191
+ if (create_missing_nodes) {
192
+ node->first_child = trie_new_node();
193
+ next = node->first_child;
194
+ }
195
+ else return NULL;
196
+ }
197
+
198
+ node = trie_sibling_for_char(next, key[i]);
199
+
200
+ if (node == NULL) {
201
+ if (create_missing_nodes) {
202
+ node = trie_add_sibling_for_char(next, key[i]);
203
+ }
204
+ else return NULL;
205
+ }
206
+
207
+ next = node->first_child;
208
+ }
209
+
210
+ return node;
211
+ }
212
+
213
+ static void trie_collect_values(void * t, VALUE rary) {
214
+ trie_node *node = (trie_node*)t;
215
+ if (node->value != Qnil) {
216
+ rb_ary_push(rary, node->value);
217
+ }
218
+ }
219
+
220
+ static void trie_collect_values_with_yield(void * t) {
221
+ trie_node *node = (trie_node*)t;
222
+ if (node->value != Qnil) {
223
+ // rb_ary_push(rary, node->value);
224
+ rb_yield(node->value);
225
+ }
226
+ }
227
+
228
+ static VALUE rb_trie_find_children(VALUE self, VALUE key) {
229
+ trie_node * root;
230
+ trie_node * node;
231
+ char * key_cstring;
232
+ VALUE rary = rb_ary_new();
233
+
234
+ key_cstring = StringValuePtr(key);
235
+ Data_Get_Struct(self, trie_node, root);
236
+
237
+ node = trie_node_for_key(root, key_cstring, false);
238
+
239
+ if (node != NULL && node->value != Qnil) {
240
+ rb_ary_push(rary, node->value);
241
+ }
242
+
243
+ if (node == NULL || node->first_child == NULL) return rary;
244
+
245
+ trie_traverse_with_context(node->first_child, rary, trie_collect_values);
246
+ return rary;
247
+ }
248
+
249
+
250
+ static VALUE rb_trie_find_children_with_block(VALUE self, VALUE key) {
251
+ trie_node * root;
252
+ trie_node * node;
253
+ char * key_cstring;
254
+ VALUE rary = rb_ary_new();
255
+
256
+ key_cstring = StringValuePtr(key);
257
+ Data_Get_Struct(self, trie_node, root);
258
+
259
+ node = trie_node_for_key(root, key_cstring, false);
260
+
261
+ if (node != NULL && node->value != Qnil) {
262
+ rb_yield(node->value);
263
+ }
264
+
265
+ if (node == NULL || node->first_child == NULL) return rary;
266
+
267
+ trie_traverse(node->first_child, trie_collect_values_with_yield);
268
+ return rary;
269
+ }
270
+
271
+ static trie_node * trie_sibling_for_char(trie_node * node, char ch) {
272
+ while(true) {
273
+ if (node == NULL) return NULL;
274
+
275
+ if (node->character == ch) return node;
276
+
277
+ node = node->next_sibling;
278
+ }
279
+ return node;
280
+ }
281
+
282
+ static trie_node * trie_add_sibling_for_char(trie_node * node, char ch) {
283
+ trie_node * current_next;
284
+
285
+ current_next = node->next_sibling;
286
+ node->next_sibling = trie_new_node_with_char(ch);
287
+ node->next_sibling->next_sibling = current_next;
288
+
289
+ return node->next_sibling;
290
+ }
291
+
292
+ static trie_node * trie_new_node_with_char(char ch) {
293
+ trie_node * trie;
294
+ trie = malloc(sizeof(trie_node));
295
+ trie->character = ch;
296
+ trie->value = Qnil;
297
+ trie->first_child = NULL;
298
+ trie->next_sibling = NULL;
299
+ return trie;
300
+ }
301
+
302
+ static trie_node * trie_new_node() {
303
+ return trie_new_node_with_char('s'); //insert most common starting letter here.
304
+ }
305
+
306
+ static void trie_traverse(trie_node * trie, void (* lambda_func)(void *)) {
307
+ if (trie->next_sibling != NULL) {
308
+ trie_traverse(trie->next_sibling, lambda_func);
309
+ }
310
+
311
+ if (trie->first_child != NULL) {
312
+ trie_traverse(trie->first_child, lambda_func);
313
+ }
314
+
315
+ lambda_func(trie);
316
+ }
317
+
318
+ static void trie_traverse_with_context(trie_node * trie, VALUE context, void (*lambda_func)(void *, VALUE)) {
319
+ if (trie->next_sibling != NULL) {
320
+ trie_traverse_with_context(trie->next_sibling, context, lambda_func);
321
+ }
322
+
323
+ if (trie->first_child != NULL) {
324
+ trie_traverse_with_context(trie->first_child, context, lambda_func);
325
+ }
326
+
327
+ lambda_func(trie, context);
328
+ }
329
+
330
+ static void free_trie(trie_node * trie) {
331
+ trie_traverse(trie, free);
332
+ }
data/lib/ruby_trie.rb ADDED
@@ -0,0 +1 @@
1
+ require 'trie'
data/test/rubytrie.rb ADDED
@@ -0,0 +1,78 @@
1
+ class RubyTrie
2
+ def initialize(default = nil)
3
+ @first_child = nil
4
+ @default = default
5
+ end
6
+
7
+ def [](key)
8
+ raise "key must be a string" unless key.is_a? String
9
+
10
+ next_node = @first_child
11
+ node = nil
12
+
13
+ 0.upto(key.length - 1) do |i|
14
+ return nil if next_node.nil?
15
+ char = key[i]
16
+ node = next_node.sibling_for_char(char)
17
+ return nil if node.nil?
18
+ next_node = node.first_child
19
+ end
20
+
21
+ return node.value
22
+ end
23
+
24
+ def []=(key, val)
25
+ raise "key must be a string" unless key.is_a? String
26
+
27
+ if @first_child.nil?
28
+ @first_child = TrieNode.new(key[0])
29
+ end
30
+
31
+ next_node = @first_child
32
+ node = nil
33
+
34
+ 0.upto(key.length - 1) do |i|
35
+ char = key[i]
36
+ node = next_node.sibling_for_char(char)
37
+ node = next_node.add_sibling(char) if node.nil?
38
+ next_node = node.first_child
39
+ next_node = node.add_child(char) if next_node.nil? && i != key.length - 1
40
+ end
41
+
42
+ node.value = val
43
+ return self
44
+ end
45
+ end
46
+
47
+ class TrieNode
48
+ attr_accessor :char, :next_sibling, :first_child, :value
49
+
50
+ def initialize(char)
51
+ @next_sibling, @first_child, @value = nil
52
+ @char = char
53
+ end
54
+
55
+ def sibling_for_char(char)
56
+ node = self
57
+ until node.nil?
58
+ return node if node.char == char
59
+ node = node.next_sibling
60
+ end
61
+ return nil
62
+ end
63
+
64
+ def add_sibling(char)
65
+ node = self
66
+
67
+ until node.next_sibling.nil?
68
+ node = node.next_sibling
69
+ end
70
+ node.next_sibling = TrieNode.new(char)
71
+ return node.next_sibling
72
+ end
73
+
74
+ def add_child(char)
75
+ self.first_child = TrieNode.new(char)
76
+ return self.first_child
77
+ end
78
+ end
data/test/trie_test.rb ADDED
@@ -0,0 +1,60 @@
1
+ $:.unshift File.dirname(__FILE__) + '/../lib'
2
+ $:.unshift File.dirname(__FILE__) + '/../ext/trie'
3
+
4
+ require 'trie'
5
+
6
+ max = 1000000
7
+
8
+ def time
9
+ starttime = Time.now
10
+ res = yield
11
+ endtime = Time.now
12
+ puts "Time elapsed: #{endtime - starttime} seconds"
13
+ res
14
+ end
15
+
16
+ puts "With a native Trie..."
17
+ t = Trie.new
18
+ time do
19
+ 1.upto(max) do |i|
20
+ t["item #{i}"] = "sweet"
21
+ end
22
+
23
+ 1.upto(max) do |i|
24
+ t["item #{i}"].class
25
+ end
26
+
27
+ 1.upto(max) do |i|
28
+ t.delete("item #{i}")
29
+ end
30
+ end
31
+
32
+
33
+ puts "With a Hash..."
34
+ t = Hash.new
35
+ time do
36
+ 1.upto(max) do |i|
37
+ t["item #{i}"] = "sweet"
38
+ end
39
+
40
+ 1.upto(max) do |i|
41
+ t["item #{i}"].class
42
+ end
43
+
44
+ 1.upto(max) do |i|
45
+ t.delete("item #{i}")
46
+ end
47
+ end
48
+
49
+ # puts "With a Ruby Trie..."
50
+ # t = RubyTrie.new
51
+ # time do
52
+ # 1.upto(max) do |i|
53
+ # t["item #{i}"] = "sweet"
54
+ # end
55
+ #
56
+ # 1.upto(max) do |i|
57
+ # t["item #{i}"].class
58
+ # end
59
+ # end
60
+
metadata ADDED
@@ -0,0 +1,77 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: RubyTrie
3
+ version: !ruby/object:Gem::Version
4
+ hash: 15
5
+ prerelease: false
6
+ segments:
7
+ - 1
8
+ - 0
9
+ version: "1.0"
10
+ platform: ruby
11
+ authors:
12
+ - Matt Freels
13
+ - Petrica Ghiurca
14
+ autorequire: trie
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-09-30 00:00:00 +03:00
19
+ default_executable:
20
+ dependencies: []
21
+
22
+ description:
23
+ email:
24
+ executables: []
25
+
26
+ extensions:
27
+ - ext/trie/extconf.rb
28
+ extra_rdoc_files:
29
+ - README
30
+ files:
31
+ - README
32
+ - ChangeLog
33
+ - ext/trie/trie.c
34
+ - ext/trie/extconf.rb
35
+ - lib/ruby_trie.rb
36
+ - test/rubytrie.rb
37
+ - test/trie_test.rb
38
+ has_rdoc: true
39
+ homepage:
40
+ licenses: []
41
+
42
+ post_install_message:
43
+ rdoc_options:
44
+ - --title
45
+ - RubyTrie
46
+ - --main
47
+ - README
48
+ - --line-numbers
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ hash: 3
57
+ segments:
58
+ - 0
59
+ version: "0"
60
+ required_rubygems_version: !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ hash: 3
66
+ segments:
67
+ - 0
68
+ version: "0"
69
+ requirements: []
70
+
71
+ rubyforge_project:
72
+ rubygems_version: 1.3.7
73
+ signing_key:
74
+ specification_version: 3
75
+ summary: Set a Timeout based on signals, which are more reliable than Timeout. Timeout is based on green threads.
76
+ test_files:
77
+ - test/trie_test.rb