codders-trie 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/COPYING +340 -0
- data/INSTALL +232 -0
- data/MANIFEST +8 -0
- data/README +6 -0
- data/lib/trie.rb +437 -0
- data/setup.rb +1551 -0
- data/test/tests.rb +275 -0
- data/trie.gemspec +14 -0
- metadata +54 -0
data/MANIFEST
ADDED
data/README
ADDED
data/lib/trie.rb
ADDED
@@ -0,0 +1,437 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
#
|
3
|
+
# = Name
|
4
|
+
# Trie
|
5
|
+
#
|
6
|
+
# == Description
|
7
|
+
# This file contains an implementation of a trie data structure.
|
8
|
+
#
|
9
|
+
# == Version
|
10
|
+
# 0.0.1
|
11
|
+
#
|
12
|
+
# == Author
|
13
|
+
# Daniel Erat <dan-ruby@erat.org>
|
14
|
+
#
|
15
|
+
# == Copyright
|
16
|
+
# Copyright 2005 Daniel Erat
|
17
|
+
#
|
18
|
+
# == License
|
19
|
+
# GNU GPL; see COPYING
|
20
|
+
#
|
21
|
+
# == Changes
|
22
|
+
# 0.0.1 Initial release
|
23
|
+
|
24
|
+
require 'set'
|
25
|
+
|
26
|
+
# = Trie
|
27
|
+
#
|
28
|
+
# == Description
|
29
|
+
# Implementation of a trie data structure, well-suited for storing and
|
30
|
+
# looking up strings or other sequences.
|
31
|
+
#
|
32
|
+
# More specifically, this is an implementation of a Patricia trie
|
33
|
+
# (http://en.wikipedia.org/wiki/Patricia_trie).
|
34
|
+
#
|
35
|
+
# == Usage
|
36
|
+
# require "trie"
|
37
|
+
#
|
38
|
+
# # Create a new Trie and insert some values into it.
|
39
|
+
# t = Trie.new
|
40
|
+
# t.insert("the", 1)
|
41
|
+
# t.insert("they", 2)
|
42
|
+
# t.insert("they", 3)
|
43
|
+
# t.insert("their", 4).insert("they're", 5)
|
44
|
+
#
|
45
|
+
# # Search for an exact match of "they".
|
46
|
+
# t.find("they").values # => [2, 3]
|
47
|
+
#
|
48
|
+
# # Search for a prefix that will match all keys.
|
49
|
+
# t2 = t.find_prefix("th")
|
50
|
+
# puts t2.size # prints 5
|
51
|
+
#
|
52
|
+
# # In the sub-Trie beginning with "th", search for the prefix "ey"
|
53
|
+
# # (therefore, getting the three values with keys beginning with "they").
|
54
|
+
# t2.find_prefix("ey").each_value {|v| puts v } # prints 2, 3, and 5
|
55
|
+
#
|
56
|
+
# # Now search for "at" in the sub-Trie, which results in an empty Trie
|
57
|
+
# # (as there are no keys beginning with "that").
|
58
|
+
# puts t2.find_prefix("at").empty? # prints true
|
59
|
+
#
|
60
|
+
# # Delete all values keyed by "they" (note that this must be performed on
|
61
|
+
# # the root Trie rather than the one returned by Trie.find_prefix -- read
|
62
|
+
# # the "Notes" section to find out why).
|
63
|
+
# t.delete("they")
|
64
|
+
#
|
65
|
+
# == Notes
|
66
|
+
# Keys are stored internally as Arrays. If you use Strings as keys they
|
67
|
+
# will be automatically converted, and when you use a method to access them
|
68
|
+
# later you'll receive them as Arrays instead. For example:
|
69
|
+
#
|
70
|
+
# t = Trie.new.insert("abc", 1).insert("def", 2)
|
71
|
+
# t.keys # => [["a", "b", "c"], ["d", "e", "f"]]
|
72
|
+
# t.keys.collect {|k| k.join } # => ["abc", "def"]
|
73
|
+
#
|
74
|
+
# (I'm hesitant to add code that will return keys as Strings if the user
|
75
|
+
# has only passed in Strings so far.)
|
76
|
+
#
|
77
|
+
# Empty nodes are compressed. The strings "row" and "ruby", which would
|
78
|
+
# normally be stored as
|
79
|
+
#
|
80
|
+
# ''
|
81
|
+
# /
|
82
|
+
# r
|
83
|
+
# / \
|
84
|
+
# o u
|
85
|
+
# / \
|
86
|
+
# w b
|
87
|
+
# \
|
88
|
+
# y
|
89
|
+
#
|
90
|
+
# are actually stored as
|
91
|
+
#
|
92
|
+
# ''
|
93
|
+
# /
|
94
|
+
# r
|
95
|
+
# / \
|
96
|
+
# ow uby
|
97
|
+
#
|
98
|
+
# Because of this implementation (and to allow Trie.find to be called on
|
99
|
+
# nodes returned by Trie.find that contain compressed elements), Trie.find
|
100
|
+
# and Trie.find_prefix will in some (most) cases return Trie objects that
|
101
|
+
# are not members of the root Trie. As a result, methods such as
|
102
|
+
# Trie.insert, Trie.delete, and Trie.clear should only be called on Trie
|
103
|
+
# objects that were directly returned by Trie.new.
|
104
|
+
class Trie
|
105
|
+
include Enumerable
|
106
|
+
|
107
|
+
##
|
108
|
+
# Create a new empty Trie.
|
109
|
+
#
|
110
|
+
# ==== Example
|
111
|
+
# t = Trie.new # gasp!
|
112
|
+
#
|
113
|
+
def initialize
|
114
|
+
@values = Set.new
|
115
|
+
@children = {}
|
116
|
+
@compressed_key = []
|
117
|
+
@compressed_values = Set.new
|
118
|
+
end
|
119
|
+
|
120
|
+
##
|
121
|
+
# Return all of the items matching a key.
|
122
|
+
#
|
123
|
+
# ==== Example
|
124
|
+
# t = Trie.new.insert("a", 3).insert("a", 4).insert("b", 5)
|
125
|
+
# t["a"] # => [3, 4]
|
126
|
+
#
|
127
|
+
def [](key)
|
128
|
+
find(key).values
|
129
|
+
end
|
130
|
+
|
131
|
+
##
|
132
|
+
# Clear the trie.
|
133
|
+
#
|
134
|
+
# ==== Example
|
135
|
+
# t = Trie.new.insert("blah", 3).insert("a", 1)
|
136
|
+
# t.clear # t now contains no values
|
137
|
+
#
|
138
|
+
def clear
|
139
|
+
@values.clear
|
140
|
+
@children.clear
|
141
|
+
@compressed_key.clear
|
142
|
+
@compressed_values.clear
|
143
|
+
self
|
144
|
+
end
|
145
|
+
|
146
|
+
##
|
147
|
+
# Delete all values with a given key.
|
148
|
+
#
|
149
|
+
# ==== Example
|
150
|
+
# t = Trie.new.insert("a", 1).insert("a", 2).insert("abc", 3)
|
151
|
+
# t.delete("a") # t now only contains the third value
|
152
|
+
#
|
153
|
+
def delete(key)
|
154
|
+
key = key.split('') if key.is_a?(String)
|
155
|
+
if key.empty?
|
156
|
+
@values.clear
|
157
|
+
elsif key == @compressed_key
|
158
|
+
@compressed_key.clear
|
159
|
+
@compressed_values.clear
|
160
|
+
elsif @children[key[0]]
|
161
|
+
@children[key[0]].delete(key[1..-1])
|
162
|
+
@children.delete(key[0]) if @children[key[0]].empty?
|
163
|
+
end
|
164
|
+
self
|
165
|
+
end
|
166
|
+
|
167
|
+
##
|
168
|
+
# Delete all occurences of an value.
|
169
|
+
#
|
170
|
+
# ==== Example
|
171
|
+
# t = Trie.new.insert("a", 1).insert("blah", 1).insert("a", 2)
|
172
|
+
# t.delete_value(1) # t now only contains the third value
|
173
|
+
#
|
174
|
+
def delete_value(value)
|
175
|
+
@compressed_values.delete(value)
|
176
|
+
@compressed_key.clear if @compressed_values.empty?
|
177
|
+
@values.delete(value)
|
178
|
+
@children.each do |p, t|
|
179
|
+
t.delete_value(value)
|
180
|
+
@children.delete(p) if t.empty?
|
181
|
+
end
|
182
|
+
self
|
183
|
+
end
|
184
|
+
|
185
|
+
##
|
186
|
+
# Delete a (key, value) pair.
|
187
|
+
#
|
188
|
+
# ==== Example
|
189
|
+
# t = Trie.new.insert("a", 1).insert("a", 2)
|
190
|
+
# t.delete_pair("a", 1) # t now only contains the second value
|
191
|
+
#
|
192
|
+
def delete_pair(key, value)
|
193
|
+
key = key.split('') if key.is_a?(String)
|
194
|
+
if key.empty?
|
195
|
+
@values.delete(value)
|
196
|
+
elsif key == @compressed_key
|
197
|
+
@compressed_values.delete(value)
|
198
|
+
@compressed_key.clear
|
199
|
+
elsif @children[key[0]]
|
200
|
+
@children[key[0]].delete_pair(key[1..-1], value)
|
201
|
+
@children.delete(key[0]) if @children[key[0]].empty?
|
202
|
+
end
|
203
|
+
self
|
204
|
+
end
|
205
|
+
|
206
|
+
##
|
207
|
+
# Delete all values keyed by a given prefix.
|
208
|
+
#
|
209
|
+
# ==== Example
|
210
|
+
# t = Trie.new.insert("a", 1).insert("al", 2).insert("algernon", 3)
|
211
|
+
# t.delete_prefix("al") # t now only contains the first value
|
212
|
+
#
|
213
|
+
def delete_prefix(prefix)
|
214
|
+
prefix = prefix.split('') if prefix.is_a?(String)
|
215
|
+
if prefix.empty? or prefix == @compressed_key[0...prefix.size]
|
216
|
+
clear
|
217
|
+
elsif @children[prefix[0]]
|
218
|
+
@children[prefix[0]].delete_prefix(prefix[1..-1])
|
219
|
+
@children.delete(prefix[0]) if @children[prefix[0]].empty?
|
220
|
+
end
|
221
|
+
self
|
222
|
+
end
|
223
|
+
|
224
|
+
##
|
225
|
+
# Calls block once for each (key, value) pair in the Trie, passing
|
226
|
+
# the the key and value as parameters.
|
227
|
+
#
|
228
|
+
# ==== Example
|
229
|
+
# t = Trie.new.insert("a", 1).insert("b", 2)
|
230
|
+
# t.each {|k, v| puts "#{k.join()}: #{v} } # prints "a: 1" and "b: 2"
|
231
|
+
#
|
232
|
+
def each(prefix=[])
|
233
|
+
@values.each {|v| yield(prefix, v) }
|
234
|
+
@compressed_values.each {|v| yield(prefix.dup.concat(@compressed_key), v) }
|
235
|
+
@children.each do |k, t|
|
236
|
+
t.each(prefix.dup.push(k)) {|key, value| yield(key, value) }
|
237
|
+
end
|
238
|
+
self
|
239
|
+
end
|
240
|
+
|
241
|
+
##
|
242
|
+
# Calls block once for each key in the Trie, passing the key as a
|
243
|
+
# parameter.
|
244
|
+
#
|
245
|
+
# ==== Example
|
246
|
+
# t = Trie.new.insert("abc", 1).insert("def", 2)
|
247
|
+
# t.each_key {|key| puts key.join() } # prints "abc" and "def"
|
248
|
+
#
|
249
|
+
def each_key(prefix=[])
|
250
|
+
yield prefix if not @values.empty?
|
251
|
+
yield prefix.dup.concat(@compressed_key) if not @compressed_values.empty?
|
252
|
+
@children.each do |k, t|
|
253
|
+
t.each_key(prefix.dup.push(k)) {|key| yield key }
|
254
|
+
end
|
255
|
+
self
|
256
|
+
end
|
257
|
+
|
258
|
+
##
|
259
|
+
# Calls block once for each (key, value) pair in the Trie, passing
|
260
|
+
# the value as a parameter.
|
261
|
+
#
|
262
|
+
# ==== Example
|
263
|
+
# t = Trie.new.insert("a", 1).insert("b", 2)
|
264
|
+
# t.each_value {|value| puts value } # prints "1" and "2"
|
265
|
+
#
|
266
|
+
def each_value
|
267
|
+
@compressed_values.each {|value| yield value }
|
268
|
+
@values.each {|value| yield value }
|
269
|
+
@children.each_value {|t| t.each_value {|value| yield value } }
|
270
|
+
self
|
271
|
+
end
|
272
|
+
|
273
|
+
##
|
274
|
+
# Does this Trie contain no values?
|
275
|
+
#
|
276
|
+
# ==== Example
|
277
|
+
# t = Trie.new
|
278
|
+
# t.empty? # => true
|
279
|
+
# t.insert("blah", 1)
|
280
|
+
# t.empty? # => false
|
281
|
+
#
|
282
|
+
def empty?
|
283
|
+
size == 0
|
284
|
+
end
|
285
|
+
|
286
|
+
##
|
287
|
+
# Get a new Trie object containing all values with the passed-in key.
|
288
|
+
#
|
289
|
+
# ==== Example
|
290
|
+
# t = Trie.new.insert("the", 1).insert("their", 2).insert("foo", 4)
|
291
|
+
# t.find("the") # => Trie containing the only first value
|
292
|
+
#
|
293
|
+
def find(key)
|
294
|
+
key = key.split('') if key.is_a?(String)
|
295
|
+
if (key.empty? and @compressed_key.empty?) or key == @compressed_key
|
296
|
+
trie = self.class.new
|
297
|
+
@values.each {|v| trie.insert([], v) }
|
298
|
+
@compressed_values.each {|v| trie.insert([], v) }
|
299
|
+
trie
|
300
|
+
elsif @children[key[0]]
|
301
|
+
@children[key[0]].find(key[1..-1])
|
302
|
+
else
|
303
|
+
self.class.new
|
304
|
+
end
|
305
|
+
end
|
306
|
+
|
307
|
+
##
|
308
|
+
# Get a new Trie object containing all values with keys that begin with
|
309
|
+
# the passed-in prefix.
|
310
|
+
#
|
311
|
+
# ==== Example
|
312
|
+
# # Both calls return a Trie containing only the first value:
|
313
|
+
# t = Trie.new.insert("test", 1).insert("testing", 2)
|
314
|
+
# t.find_prefix("test")
|
315
|
+
# t.find_prefix("").find("t").find("es").find("t").find("")
|
316
|
+
#
|
317
|
+
def find_prefix(prefix)
|
318
|
+
prefix = prefix.split('') if prefix.is_a?(String)
|
319
|
+
if prefix.empty?
|
320
|
+
self
|
321
|
+
elsif prefix == @compressed_key[0...prefix.size]
|
322
|
+
trie = self.class.new
|
323
|
+
@compressed_values.each do |value|
|
324
|
+
trie.insert(@compressed_key[prefix.size..-1], value)
|
325
|
+
end
|
326
|
+
trie
|
327
|
+
elsif @children[prefix[0]]
|
328
|
+
@children[prefix[0]].find_prefix(prefix[1..-1])
|
329
|
+
else
|
330
|
+
self.class.new
|
331
|
+
end
|
332
|
+
end
|
333
|
+
|
334
|
+
##
|
335
|
+
# Insert an value into this Trie, keyed by the passed-in key,
|
336
|
+
# which can be any sort of indexable object.
|
337
|
+
#
|
338
|
+
# ==== Example
|
339
|
+
# t = Trie.new.insert("this is a string of considerable length", [ 0, 4, ])
|
340
|
+
# t.insert([ "abc", "def", ], "testing")
|
341
|
+
#
|
342
|
+
def insert(key, value)
|
343
|
+
key = key.split('') if key.is_a?(String)
|
344
|
+
if key != @compressed_key
|
345
|
+
@compressed_values.each {|v| insert_in_child(@compressed_key, v) }
|
346
|
+
@compressed_values.clear
|
347
|
+
@compressed_key.clear
|
348
|
+
end
|
349
|
+
|
350
|
+
if key.empty?
|
351
|
+
@values.add(value)
|
352
|
+
elsif (@values.empty? and @children.empty?) or key == @compressed_key
|
353
|
+
@compressed_key = key.dup
|
354
|
+
@compressed_values.add(value)
|
355
|
+
else
|
356
|
+
insert_in_child(key, value)
|
357
|
+
end
|
358
|
+
self
|
359
|
+
end
|
360
|
+
|
361
|
+
##
|
362
|
+
# Insert an value into a sub-Trie, creating one if necessary.
|
363
|
+
#
|
364
|
+
# Internal method called by Trie.insert.
|
365
|
+
#
|
366
|
+
def insert_in_child(key, value)
|
367
|
+
(@children[key[0]] ||= self.class.new).insert(key[1..-1], value)
|
368
|
+
end
|
369
|
+
private :insert_in_child
|
370
|
+
|
371
|
+
##
|
372
|
+
# Get an Array containing all keys in this Trie.
|
373
|
+
#
|
374
|
+
# ==== Example
|
375
|
+
# t = Trie.new.insert("test", 1).insert([0, 1], 2)
|
376
|
+
# t.keys # => [['t', 'e', 's', 't'], [0, 1]]
|
377
|
+
#
|
378
|
+
def keys
|
379
|
+
a = []
|
380
|
+
each_key {|key| a.push(key) }
|
381
|
+
a
|
382
|
+
end
|
383
|
+
|
384
|
+
##
|
385
|
+
# Get the number of nodes used to represent this Trie.
|
386
|
+
#
|
387
|
+
# This is only useful for testing.
|
388
|
+
#
|
389
|
+
def num_nodes
|
390
|
+
node_count = 1
|
391
|
+
@children.each {|p, t| node_count += t.num_nodes }
|
392
|
+
node_count
|
393
|
+
end
|
394
|
+
|
395
|
+
##
|
396
|
+
# Get the number of values contained in this Trie.
|
397
|
+
#
|
398
|
+
# ==== Example
|
399
|
+
# t = Trie.new.insert("test", 1).insert("foo", 2)
|
400
|
+
# t.size # => 2
|
401
|
+
#
|
402
|
+
def size
|
403
|
+
child_count = 0
|
404
|
+
@children.each_value {|t| child_count += t.size }
|
405
|
+
@compressed_values.size + @values.size + child_count
|
406
|
+
end
|
407
|
+
|
408
|
+
##
|
409
|
+
# Get an Array containing all values in this Trie.
|
410
|
+
#
|
411
|
+
# ==== Example
|
412
|
+
# t = Trie.new.insert("a", 1).insert("b", 2)
|
413
|
+
# t.values # => Array containing both values
|
414
|
+
# t.values.each {|value| puts value } # prints "1" and "2"
|
415
|
+
#
|
416
|
+
def values
|
417
|
+
a = []
|
418
|
+
each_value {|value| a.push(value) }
|
419
|
+
a
|
420
|
+
end
|
421
|
+
|
422
|
+
#
|
423
|
+
# Custom marshalling behavior. This marshals to a much smaller value than
|
424
|
+
# the default.
|
425
|
+
#
|
426
|
+
def _dump(depth)
|
427
|
+
Marshal.dump(inject([]){|a,p| a << [p.first.join(''), p.last]})
|
428
|
+
end
|
429
|
+
|
430
|
+
def self._load(o)
|
431
|
+
Marshal.load(o).inject(Trie.new) {|t,v| t.insert v[0], v[1]}
|
432
|
+
end
|
433
|
+
|
434
|
+
end # class Trie
|
435
|
+
|
436
|
+
# Load extensions
|
437
|
+
require 'extensions/look_ahead_trie'
|