codders-trie 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (9) hide show
  1. data/COPYING +340 -0
  2. data/INSTALL +232 -0
  3. data/MANIFEST +8 -0
  4. data/README +6 -0
  5. data/lib/trie.rb +437 -0
  6. data/setup.rb +1551 -0
  7. data/test/tests.rb +275 -0
  8. data/trie.gemspec +14 -0
  9. metadata +54 -0
data/MANIFEST ADDED
@@ -0,0 +1,8 @@
1
+ COPYING
2
+ INSTALL
3
+ lib/trie.rb
4
+ MANIFEST
5
+ README
6
+ setup.rb
7
+ test/tests.rb
8
+ trie.gemspec
data/README ADDED
@@ -0,0 +1,6 @@
1
+ This package contains a Trie class for Ruby that implements a trie data
2
+ structure (http://en.wikipedia.org/wiki/Trie). The INSTALL file contains
3
+ installation instructions.
4
+
5
+ Daniel Erat <dan-ruby@erat.org>
6
+ http://www.erat.org/ruby/
data/lib/trie.rb ADDED
@@ -0,0 +1,437 @@
1
+ #!/usr/bin/ruby -w
2
+ #
3
+ # = Name
4
+ # Trie
5
+ #
6
+ # == Description
7
+ # This file contains an implementation of a trie data structure.
8
+ #
9
+ # == Version
10
+ # 0.0.1
11
+ #
12
+ # == Author
13
+ # Daniel Erat <dan-ruby@erat.org>
14
+ #
15
+ # == Copyright
16
+ # Copyright 2005 Daniel Erat
17
+ #
18
+ # == License
19
+ # GNU GPL; see COPYING
20
+ #
21
+ # == Changes
22
+ # 0.0.1 Initial release
23
+
24
+ require 'set'
25
+
26
+ # = Trie
27
+ #
28
+ # == Description
29
+ # Implementation of a trie data structure, well-suited for storing and
30
+ # looking up strings or other sequences.
31
+ #
32
+ # More specifically, this is an implementation of a Patricia trie
33
+ # (http://en.wikipedia.org/wiki/Patricia_trie).
34
+ #
35
+ # == Usage
36
+ # require "trie"
37
+ #
38
+ # # Create a new Trie and insert some values into it.
39
+ # t = Trie.new
40
+ # t.insert("the", 1)
41
+ # t.insert("they", 2)
42
+ # t.insert("they", 3)
43
+ # t.insert("their", 4).insert("they're", 5)
44
+ #
45
+ # # Search for an exact match of "they".
46
+ # t.find("they").values # => [2, 3]
47
+ #
48
+ # # Search for a prefix that will match all keys.
49
+ # t2 = t.find_prefix("th")
50
+ # puts t2.size # prints 5
51
+ #
52
+ # # In the sub-Trie beginning with "th", search for the prefix "ey"
53
+ # # (therefore, getting the three values with keys beginning with "they").
54
+ # t2.find_prefix("ey").each_value {|v| puts v } # prints 2, 3, and 5
55
+ #
56
+ # # Now search for "at" in the sub-Trie, which results in an empty Trie
57
+ # # (as there are no keys beginning with "that").
58
+ # puts t2.find_prefix("at").empty? # prints true
59
+ #
60
+ # # Delete all values keyed by "they" (note that this must be performed on
61
+ # # the root Trie rather than the one returned by Trie.find_prefix -- read
62
+ # # the "Notes" section to find out why).
63
+ # t.delete("they")
64
+ #
65
+ # == Notes
66
+ # Keys are stored internally as Arrays. If you use Strings as keys they
67
+ # will be automatically converted, and when you use a method to access them
68
+ # later you'll receive them as Arrays instead. For example:
69
+ #
70
+ # t = Trie.new.insert("abc", 1).insert("def", 2)
71
+ # t.keys # => [["a", "b", "c"], ["d", "e", "f"]]
72
+ # t.keys.collect {|k| k.join } # => ["abc", "def"]
73
+ #
74
+ # (I'm hesitant to add code that will return keys as Strings if the user
75
+ # has only passed in Strings so far.)
76
+ #
77
+ # Empty nodes are compressed. The strings "row" and "ruby", which would
78
+ # normally be stored as
79
+ #
80
+ # ''
81
+ # /
82
+ # r
83
+ # / \
84
+ # o u
85
+ # / \
86
+ # w b
87
+ # \
88
+ # y
89
+ #
90
+ # are actually stored as
91
+ #
92
+ # ''
93
+ # /
94
+ # r
95
+ # / \
96
+ # ow uby
97
+ #
98
+ # Because of this implementation (and to allow Trie.find to be called on
99
+ # nodes returned by Trie.find that contain compressed elements), Trie.find
100
+ # and Trie.find_prefix will in some (most) cases return Trie objects that
101
+ # are not members of the root Trie. As a result, methods such as
102
+ # Trie.insert, Trie.delete, and Trie.clear should only be called on Trie
103
+ # objects that were directly returned by Trie.new.
104
+ class Trie
105
+ include Enumerable
106
+
107
+ ##
108
+ # Create a new empty Trie.
109
+ #
110
+ # ==== Example
111
+ # t = Trie.new # gasp!
112
+ #
113
+ def initialize
114
+ @values = Set.new
115
+ @children = {}
116
+ @compressed_key = []
117
+ @compressed_values = Set.new
118
+ end
119
+
120
+ ##
121
+ # Return all of the items matching a key.
122
+ #
123
+ # ==== Example
124
+ # t = Trie.new.insert("a", 3).insert("a", 4).insert("b", 5)
125
+ # t["a"] # => [3, 4]
126
+ #
127
+ def [](key)
128
+ find(key).values
129
+ end
130
+
131
+ ##
132
+ # Clear the trie.
133
+ #
134
+ # ==== Example
135
+ # t = Trie.new.insert("blah", 3).insert("a", 1)
136
+ # t.clear # t now contains no values
137
+ #
138
+ def clear
139
+ @values.clear
140
+ @children.clear
141
+ @compressed_key.clear
142
+ @compressed_values.clear
143
+ self
144
+ end
145
+
146
+ ##
147
+ # Delete all values with a given key.
148
+ #
149
+ # ==== Example
150
+ # t = Trie.new.insert("a", 1).insert("a", 2).insert("abc", 3)
151
+ # t.delete("a") # t now only contains the third value
152
+ #
153
+ def delete(key)
154
+ key = key.split('') if key.is_a?(String)
155
+ if key.empty?
156
+ @values.clear
157
+ elsif key == @compressed_key
158
+ @compressed_key.clear
159
+ @compressed_values.clear
160
+ elsif @children[key[0]]
161
+ @children[key[0]].delete(key[1..-1])
162
+ @children.delete(key[0]) if @children[key[0]].empty?
163
+ end
164
+ self
165
+ end
166
+
167
+ ##
168
+ # Delete all occurences of an value.
169
+ #
170
+ # ==== Example
171
+ # t = Trie.new.insert("a", 1).insert("blah", 1).insert("a", 2)
172
+ # t.delete_value(1) # t now only contains the third value
173
+ #
174
+ def delete_value(value)
175
+ @compressed_values.delete(value)
176
+ @compressed_key.clear if @compressed_values.empty?
177
+ @values.delete(value)
178
+ @children.each do |p, t|
179
+ t.delete_value(value)
180
+ @children.delete(p) if t.empty?
181
+ end
182
+ self
183
+ end
184
+
185
+ ##
186
+ # Delete a (key, value) pair.
187
+ #
188
+ # ==== Example
189
+ # t = Trie.new.insert("a", 1).insert("a", 2)
190
+ # t.delete_pair("a", 1) # t now only contains the second value
191
+ #
192
+ def delete_pair(key, value)
193
+ key = key.split('') if key.is_a?(String)
194
+ if key.empty?
195
+ @values.delete(value)
196
+ elsif key == @compressed_key
197
+ @compressed_values.delete(value)
198
+ @compressed_key.clear
199
+ elsif @children[key[0]]
200
+ @children[key[0]].delete_pair(key[1..-1], value)
201
+ @children.delete(key[0]) if @children[key[0]].empty?
202
+ end
203
+ self
204
+ end
205
+
206
+ ##
207
+ # Delete all values keyed by a given prefix.
208
+ #
209
+ # ==== Example
210
+ # t = Trie.new.insert("a", 1).insert("al", 2).insert("algernon", 3)
211
+ # t.delete_prefix("al") # t now only contains the first value
212
+ #
213
+ def delete_prefix(prefix)
214
+ prefix = prefix.split('') if prefix.is_a?(String)
215
+ if prefix.empty? or prefix == @compressed_key[0...prefix.size]
216
+ clear
217
+ elsif @children[prefix[0]]
218
+ @children[prefix[0]].delete_prefix(prefix[1..-1])
219
+ @children.delete(prefix[0]) if @children[prefix[0]].empty?
220
+ end
221
+ self
222
+ end
223
+
224
+ ##
225
+ # Calls block once for each (key, value) pair in the Trie, passing
226
+ # the the key and value as parameters.
227
+ #
228
+ # ==== Example
229
+ # t = Trie.new.insert("a", 1).insert("b", 2)
230
+ # t.each {|k, v| puts "#{k.join()}: #{v} } # prints "a: 1" and "b: 2"
231
+ #
232
+ def each(prefix=[])
233
+ @values.each {|v| yield(prefix, v) }
234
+ @compressed_values.each {|v| yield(prefix.dup.concat(@compressed_key), v) }
235
+ @children.each do |k, t|
236
+ t.each(prefix.dup.push(k)) {|key, value| yield(key, value) }
237
+ end
238
+ self
239
+ end
240
+
241
+ ##
242
+ # Calls block once for each key in the Trie, passing the key as a
243
+ # parameter.
244
+ #
245
+ # ==== Example
246
+ # t = Trie.new.insert("abc", 1).insert("def", 2)
247
+ # t.each_key {|key| puts key.join() } # prints "abc" and "def"
248
+ #
249
+ def each_key(prefix=[])
250
+ yield prefix if not @values.empty?
251
+ yield prefix.dup.concat(@compressed_key) if not @compressed_values.empty?
252
+ @children.each do |k, t|
253
+ t.each_key(prefix.dup.push(k)) {|key| yield key }
254
+ end
255
+ self
256
+ end
257
+
258
+ ##
259
+ # Calls block once for each (key, value) pair in the Trie, passing
260
+ # the value as a parameter.
261
+ #
262
+ # ==== Example
263
+ # t = Trie.new.insert("a", 1).insert("b", 2)
264
+ # t.each_value {|value| puts value } # prints "1" and "2"
265
+ #
266
+ def each_value
267
+ @compressed_values.each {|value| yield value }
268
+ @values.each {|value| yield value }
269
+ @children.each_value {|t| t.each_value {|value| yield value } }
270
+ self
271
+ end
272
+
273
+ ##
274
+ # Does this Trie contain no values?
275
+ #
276
+ # ==== Example
277
+ # t = Trie.new
278
+ # t.empty? # => true
279
+ # t.insert("blah", 1)
280
+ # t.empty? # => false
281
+ #
282
+ def empty?
283
+ size == 0
284
+ end
285
+
286
+ ##
287
+ # Get a new Trie object containing all values with the passed-in key.
288
+ #
289
+ # ==== Example
290
+ # t = Trie.new.insert("the", 1).insert("their", 2).insert("foo", 4)
291
+ # t.find("the") # => Trie containing the only first value
292
+ #
293
+ def find(key)
294
+ key = key.split('') if key.is_a?(String)
295
+ if (key.empty? and @compressed_key.empty?) or key == @compressed_key
296
+ trie = self.class.new
297
+ @values.each {|v| trie.insert([], v) }
298
+ @compressed_values.each {|v| trie.insert([], v) }
299
+ trie
300
+ elsif @children[key[0]]
301
+ @children[key[0]].find(key[1..-1])
302
+ else
303
+ self.class.new
304
+ end
305
+ end
306
+
307
+ ##
308
+ # Get a new Trie object containing all values with keys that begin with
309
+ # the passed-in prefix.
310
+ #
311
+ # ==== Example
312
+ # # Both calls return a Trie containing only the first value:
313
+ # t = Trie.new.insert("test", 1).insert("testing", 2)
314
+ # t.find_prefix("test")
315
+ # t.find_prefix("").find("t").find("es").find("t").find("")
316
+ #
317
+ def find_prefix(prefix)
318
+ prefix = prefix.split('') if prefix.is_a?(String)
319
+ if prefix.empty?
320
+ self
321
+ elsif prefix == @compressed_key[0...prefix.size]
322
+ trie = self.class.new
323
+ @compressed_values.each do |value|
324
+ trie.insert(@compressed_key[prefix.size..-1], value)
325
+ end
326
+ trie
327
+ elsif @children[prefix[0]]
328
+ @children[prefix[0]].find_prefix(prefix[1..-1])
329
+ else
330
+ self.class.new
331
+ end
332
+ end
333
+
334
+ ##
335
+ # Insert an value into this Trie, keyed by the passed-in key,
336
+ # which can be any sort of indexable object.
337
+ #
338
+ # ==== Example
339
+ # t = Trie.new.insert("this is a string of considerable length", [ 0, 4, ])
340
+ # t.insert([ "abc", "def", ], "testing")
341
+ #
342
+ def insert(key, value)
343
+ key = key.split('') if key.is_a?(String)
344
+ if key != @compressed_key
345
+ @compressed_values.each {|v| insert_in_child(@compressed_key, v) }
346
+ @compressed_values.clear
347
+ @compressed_key.clear
348
+ end
349
+
350
+ if key.empty?
351
+ @values.add(value)
352
+ elsif (@values.empty? and @children.empty?) or key == @compressed_key
353
+ @compressed_key = key.dup
354
+ @compressed_values.add(value)
355
+ else
356
+ insert_in_child(key, value)
357
+ end
358
+ self
359
+ end
360
+
361
+ ##
362
+ # Insert an value into a sub-Trie, creating one if necessary.
363
+ #
364
+ # Internal method called by Trie.insert.
365
+ #
366
+ def insert_in_child(key, value)
367
+ (@children[key[0]] ||= self.class.new).insert(key[1..-1], value)
368
+ end
369
+ private :insert_in_child
370
+
371
+ ##
372
+ # Get an Array containing all keys in this Trie.
373
+ #
374
+ # ==== Example
375
+ # t = Trie.new.insert("test", 1).insert([0, 1], 2)
376
+ # t.keys # => [['t', 'e', 's', 't'], [0, 1]]
377
+ #
378
+ def keys
379
+ a = []
380
+ each_key {|key| a.push(key) }
381
+ a
382
+ end
383
+
384
+ ##
385
+ # Get the number of nodes used to represent this Trie.
386
+ #
387
+ # This is only useful for testing.
388
+ #
389
+ def num_nodes
390
+ node_count = 1
391
+ @children.each {|p, t| node_count += t.num_nodes }
392
+ node_count
393
+ end
394
+
395
+ ##
396
+ # Get the number of values contained in this Trie.
397
+ #
398
+ # ==== Example
399
+ # t = Trie.new.insert("test", 1).insert("foo", 2)
400
+ # t.size # => 2
401
+ #
402
+ def size
403
+ child_count = 0
404
+ @children.each_value {|t| child_count += t.size }
405
+ @compressed_values.size + @values.size + child_count
406
+ end
407
+
408
+ ##
409
+ # Get an Array containing all values in this Trie.
410
+ #
411
+ # ==== Example
412
+ # t = Trie.new.insert("a", 1).insert("b", 2)
413
+ # t.values # => Array containing both values
414
+ # t.values.each {|value| puts value } # prints "1" and "2"
415
+ #
416
+ def values
417
+ a = []
418
+ each_value {|value| a.push(value) }
419
+ a
420
+ end
421
+
422
+ #
423
+ # Custom marshalling behavior. This marshals to a much smaller value than
424
+ # the default.
425
+ #
426
+ def _dump(depth)
427
+ Marshal.dump(inject([]){|a,p| a << [p.first.join(''), p.last]})
428
+ end
429
+
430
+ def self._load(o)
431
+ Marshal.load(o).inject(Trie.new) {|t,v| t.insert v[0], v[1]}
432
+ end
433
+
434
+ end # class Trie
435
+
436
+ # Load extensions
437
+ require 'extensions/look_ahead_trie'