rambling-trie 0.8.1 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Rakefile +1 -1
- data/lib/rambling/trie.rb +21 -9
- data/lib/rambling/trie/compressed_node.rb +112 -0
- data/lib/rambling/trie/compression.rb +13 -0
- data/lib/rambling/trie/compressor.rb +30 -31
- data/lib/rambling/trie/{root.rb → container.rb} +41 -38
- data/lib/rambling/trie/enumerable.rb +11 -7
- data/lib/rambling/trie/missing_node.rb +1 -1
- data/lib/rambling/trie/node.rb +25 -22
- data/lib/rambling/trie/plain_text_reader.rb +1 -1
- data/lib/rambling/trie/raw_node.rb +90 -0
- data/lib/rambling/trie/tasks/helpers/path.rb +13 -0
- data/lib/rambling/trie/tasks/helpers/time.rb +7 -0
- data/lib/rambling/trie/tasks/performance.rb +10 -91
- data/lib/rambling/trie/tasks/performance/all.rb +4 -0
- data/lib/rambling/trie/tasks/performance/benchmark.rb +172 -0
- data/lib/rambling/trie/tasks/performance/directory.rb +11 -0
- data/lib/rambling/trie/tasks/performance/profile/call_tree.rb +132 -0
- data/lib/rambling/trie/tasks/performance/profile/memory.rb +116 -0
- data/lib/rambling/trie/version.rb +1 -1
- data/rambling-trie.gemspec +6 -4
- data/spec/integration/rambling/trie_spec.rb +63 -9
- data/spec/lib/rambling/trie/compressed_node_spec.rb +35 -0
- data/spec/lib/rambling/trie/compressor_spec.rb +31 -0
- data/spec/lib/rambling/trie/container_spec.rb +470 -0
- data/spec/lib/rambling/trie/enumerable_spec.rb +2 -2
- data/spec/lib/rambling/trie/inspector_spec.rb +21 -14
- data/spec/lib/rambling/trie/node_spec.rb +72 -209
- data/spec/lib/rambling/trie/raw_node_spec.rb +377 -0
- data/spec/lib/rambling/trie_spec.rb +46 -25
- metadata +57 -16
- data/lib/rambling/trie/branches.rb +0 -149
- data/spec/lib/rambling/trie/branches_spec.rb +0 -52
- data/spec/lib/rambling/trie/root_spec.rb +0 -376
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3bb17c6b1df5c473eb696bc484ff0e3b46517867
|
4
|
+
data.tar.gz: 0b6daafa67a20409389b80aaf9ac5d1bc6694cd2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ecaecb91b920793208c878134881f08a7718d9f9771857a49cb15f8059e35273e29f81ce21a497de37566cfae8273a13ff611b43f0d049694b0207c65efa35bd
|
7
|
+
data.tar.gz: 810db517696450d411d75ad59df68e410e0c1109ad1133bc7aa95363609520f8db51031a72e1b3392475524f8d10b28ca1e827bd5dbb9c3fba0aaa2417eeef41
|
data/Rakefile
CHANGED
data/lib/rambling/trie.rb
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
require 'forwardable'
|
2
2
|
%w{
|
3
|
-
|
4
|
-
|
5
|
-
plain_text_reader root version
|
3
|
+
compression compressor inspector container enumerable invalid_operation
|
4
|
+
plain_text_reader node missing_node compressed_node raw_node version
|
6
5
|
}.each do |file|
|
7
6
|
require File.join('rambling', 'trie', file)
|
8
7
|
end
|
@@ -14,14 +13,27 @@ module Rambling
|
|
14
13
|
class << self
|
15
14
|
# Creates a new Trie. Entry point for the Rambling::Trie API.
|
16
15
|
# @param [String, nil] filepath the file to load the words from.
|
17
|
-
# @return [
|
18
|
-
# @yield [
|
19
|
-
def create filepath = nil, reader =
|
20
|
-
|
21
|
-
|
22
|
-
|
16
|
+
# @return [Container] the trie just created.
|
17
|
+
# @yield [Container] the trie just created.
|
18
|
+
def create filepath = nil, reader = nil
|
19
|
+
reader ||= default_reader
|
20
|
+
|
21
|
+
Rambling::Trie::Container.new do |container|
|
22
|
+
if filepath
|
23
|
+
reader.each_word filepath do |word|
|
24
|
+
container << word
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
yield container if block_given?
|
23
29
|
end
|
24
30
|
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def default_reader
|
35
|
+
Rambling::Trie::PlainTextReader.new
|
36
|
+
end
|
25
37
|
end
|
26
38
|
end
|
27
39
|
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
module Rambling
|
2
|
+
module Trie
|
3
|
+
# A representation of a node in an compressed Trie data structure.
|
4
|
+
class CompressedNode < Rambling::Trie::Node
|
5
|
+
# Always raises [Rambling::Trie::InvalidOperation] when trying to add a
|
6
|
+
# branch to the current trie node based on the word
|
7
|
+
# @param [String] word the word to add the branch from.
|
8
|
+
# @raise [InvalidOperation] if the trie is already compressed.
|
9
|
+
def add word
|
10
|
+
raise Rambling::Trie::InvalidOperation, 'Cannot add branch to compressed trie'
|
11
|
+
end
|
12
|
+
|
13
|
+
# Checks if a path for set of characters exists in the trie.
|
14
|
+
# @param [Array] chars the characters to look for in the trie.
|
15
|
+
# @return [Boolean] `true` if the characters are found, `false` otherwise.
|
16
|
+
def partial_word? chars
|
17
|
+
chars.empty? || has_partial_word?(chars)
|
18
|
+
end
|
19
|
+
|
20
|
+
# Checks if a path for set of characters represents a word in the trie.
|
21
|
+
# @param [Array] chars the characters to look for in the trie.
|
22
|
+
# @return [Boolean] `true` if the characters are found and form a word,
|
23
|
+
# `false` otherwise.
|
24
|
+
def word? chars
|
25
|
+
if chars.empty?
|
26
|
+
terminal?
|
27
|
+
else
|
28
|
+
has_word? chars
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# Returns all words that start with the specified characters.
|
33
|
+
# @param [Array] chars the characters to look for in the trie.
|
34
|
+
# @return [Array] all the words contained in the trie that start with the specified characters.
|
35
|
+
def scan chars
|
36
|
+
closest_node(chars).to_a
|
37
|
+
end
|
38
|
+
|
39
|
+
# Always return `true` for a raw (compressed) node.
|
40
|
+
# @return [Boolean] always true for a raw (compressed) node.
|
41
|
+
def compressed?
|
42
|
+
true
|
43
|
+
end
|
44
|
+
|
45
|
+
protected
|
46
|
+
|
47
|
+
def closest_node chars
|
48
|
+
if chars.empty?
|
49
|
+
self
|
50
|
+
else
|
51
|
+
current_length = 0
|
52
|
+
current_key, current_key_string = current_key chars.slice!(0)
|
53
|
+
|
54
|
+
begin
|
55
|
+
current_length += 1
|
56
|
+
|
57
|
+
if current_key_string.length == current_length || chars.empty?
|
58
|
+
return children_tree[current_key].closest_node chars
|
59
|
+
end
|
60
|
+
end while current_key_string[current_length] == chars.slice!(0)
|
61
|
+
|
62
|
+
Rambling::Trie::MissingNode.new
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
private
|
67
|
+
|
68
|
+
def has_partial_word? chars
|
69
|
+
current_length = 0
|
70
|
+
current_key, current_key_string = current_key chars.slice!(0)
|
71
|
+
|
72
|
+
begin
|
73
|
+
current_length += 1
|
74
|
+
|
75
|
+
if current_key_string.length == current_length || chars.empty?
|
76
|
+
return children_tree[current_key].partial_word? chars
|
77
|
+
end
|
78
|
+
end while current_key_string[current_length] == chars.slice!(0)
|
79
|
+
|
80
|
+
false
|
81
|
+
end
|
82
|
+
|
83
|
+
def has_word? chars
|
84
|
+
current_key_string = ''
|
85
|
+
|
86
|
+
while !chars.empty?
|
87
|
+
current_key_string << chars.slice!(0)
|
88
|
+
current_key = current_key_string.to_sym
|
89
|
+
child = children_tree[current_key]
|
90
|
+
return child.word? chars if child
|
91
|
+
end
|
92
|
+
|
93
|
+
false
|
94
|
+
end
|
95
|
+
|
96
|
+
def current_key letter
|
97
|
+
current_key_string = current_key = ''
|
98
|
+
|
99
|
+
children_tree.keys.each do |key|
|
100
|
+
key_string = key.to_s
|
101
|
+
if key_string.start_with? letter
|
102
|
+
current_key = key
|
103
|
+
current_key_string = key_string
|
104
|
+
break
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
[current_key, current_key_string]
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module Rambling
|
2
|
+
module Trie
|
3
|
+
# Provides the compressing behavior for the Trie data structure.
|
4
|
+
module Compression
|
5
|
+
# Indicates if the current [Rambling::Trie::Node] can be compressed.
|
6
|
+
# @return [Boolean] `true` for non-terminal nodes with one child,
|
7
|
+
# `false` otherwise.
|
8
|
+
def compressable?
|
9
|
+
!(root? || terminal?) && children_tree.size == 1
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -1,47 +1,46 @@
|
|
1
1
|
module Rambling
|
2
2
|
module Trie
|
3
|
-
#
|
4
|
-
|
5
|
-
#
|
6
|
-
# @
|
7
|
-
|
8
|
-
|
3
|
+
# Responsible for the compression process of a Trie data structure.
|
4
|
+
class Compressor
|
5
|
+
# Compresses a node from a Trie data structure.
|
6
|
+
# @param [RawNode] node the node to compress
|
7
|
+
# @return [CompressedNode] node the compressed version of the node
|
8
|
+
def compress node, parent = nil
|
9
|
+
if node.compressable?
|
10
|
+
merge_node_with_compressed_child node, parent
|
11
|
+
else
|
12
|
+
copy_node_and_compress_children node, parent
|
13
|
+
end
|
9
14
|
end
|
10
15
|
|
11
|
-
|
12
|
-
# @return [Root, Node] the compressed node.
|
13
|
-
def compress_tree!
|
14
|
-
if compressable?
|
15
|
-
merge_with! children.first
|
16
|
-
compress_tree!
|
17
|
-
end
|
16
|
+
private
|
18
17
|
|
19
|
-
|
18
|
+
def merge_node_with_compressed_child node, parent
|
19
|
+
compressed_child = compress node.children.first
|
20
20
|
|
21
|
-
|
22
|
-
|
21
|
+
new_node = Rambling::Trie::CompressedNode.new parent
|
22
|
+
new_node.letter = node.letter.to_s << compressed_child.letter.to_s
|
23
|
+
new_node.terminal! if compressed_child.terminal?
|
24
|
+
new_node.children_tree = compressed_child.children_tree
|
23
25
|
|
24
|
-
|
26
|
+
new_node.children.each do |child|
|
27
|
+
child.parent = new_node
|
28
|
+
end
|
25
29
|
|
26
|
-
|
27
|
-
!(root? || terminal?) && children_tree.size == 1
|
30
|
+
new_node
|
28
31
|
end
|
29
32
|
|
30
|
-
def
|
31
|
-
|
32
|
-
redefine_self! child
|
33
|
+
def copy_node_and_compress_children node, parent
|
34
|
+
new_node = Rambling::Trie::CompressedNode.new parent
|
33
35
|
|
34
|
-
|
35
|
-
|
36
|
+
new_node.letter = node.letter
|
37
|
+
new_node.terminal! if node.terminal?
|
36
38
|
|
37
|
-
|
38
|
-
|
39
|
-
|
39
|
+
node.children.map do |child|
|
40
|
+
compress child, new_node
|
41
|
+
end
|
40
42
|
|
41
|
-
|
42
|
-
self.letter = letter.to_s << merged_node.letter.to_s
|
43
|
-
self.children_tree = merged_node.children_tree
|
44
|
-
self.terminal = merged_node.terminal?
|
43
|
+
new_node
|
45
44
|
end
|
46
45
|
end
|
47
46
|
end
|
@@ -1,12 +1,27 @@
|
|
1
1
|
module Rambling
|
2
2
|
module Trie
|
3
|
-
#
|
4
|
-
class
|
3
|
+
# Wrapper on top of Trie data structure.
|
4
|
+
class Container
|
5
|
+
extend ::Forwardable
|
6
|
+
|
7
|
+
include ::Enumerable
|
8
|
+
|
9
|
+
delegate [
|
10
|
+
:each,
|
11
|
+
:compressed?,
|
12
|
+
:[],
|
13
|
+
:letter,
|
14
|
+
:inspect
|
15
|
+
] => :root
|
16
|
+
|
5
17
|
# Creates a new Trie.
|
6
|
-
# @
|
7
|
-
|
8
|
-
|
9
|
-
|
18
|
+
# @param [Node] root the root node for the trie
|
19
|
+
# @param [Compressor] compressor responsible for compressing the trie
|
20
|
+
# @yield [Container] the trie just created.
|
21
|
+
def initialize root = nil, compressor = nil
|
22
|
+
@root = root || default_root
|
23
|
+
@compressor = compressor || default_compressor
|
24
|
+
|
10
25
|
yield self if block_given?
|
11
26
|
end
|
12
27
|
|
@@ -14,71 +29,59 @@ module Rambling
|
|
14
29
|
# @param [String] word the word to add the branch from.
|
15
30
|
# @return [Node] the just added branch's root node.
|
16
31
|
# @raise [InvalidOperation] if the trie is already compressed.
|
17
|
-
# @see
|
32
|
+
# @see RawNode#add
|
33
|
+
# @see CompressedNode#add
|
18
34
|
# @note Avoids clearing the contents of the word variable.
|
19
35
|
def add word
|
20
|
-
|
36
|
+
root.add word.clone
|
21
37
|
end
|
22
38
|
|
23
|
-
# Compresses the existing tree using redundant node elimination. Flags
|
24
|
-
#
|
39
|
+
# Compresses the existing tree using redundant node elimination. Flags
|
40
|
+
# the trie as compressed.
|
41
|
+
# @return [Container] self
|
42
|
+
# @note Avoids compressing again if the trie has already been compressed.
|
25
43
|
def compress!
|
26
|
-
self.
|
44
|
+
self.root = compressor.compress root unless root.compressed?
|
27
45
|
self
|
28
46
|
end
|
29
47
|
|
30
|
-
# Flag for compressed tries. Overrides {Compressor#compressed?}.
|
31
|
-
# @return [Boolean] `true` for compressed tries, `false` otherwise.
|
32
|
-
def compressed?
|
33
|
-
!!compressed
|
34
|
-
end
|
35
|
-
|
36
48
|
# Checks if a path for a word or partial word exists in the trie.
|
37
49
|
# @param [String] word the word or partial word to look for in the trie.
|
38
50
|
# @return [Boolean] `true` if the word or partial word is found, `false` otherwise.
|
39
51
|
def partial_word? word = ''
|
40
|
-
|
41
|
-
end
|
42
|
-
|
43
|
-
alias_method :match?, :partial_word?
|
44
|
-
|
45
|
-
# If the current node is the root node.
|
46
|
-
# @return [Boolean] `true`
|
47
|
-
def root?
|
48
|
-
true
|
52
|
+
root.partial_word? word.chars.to_a
|
49
53
|
end
|
50
54
|
|
51
55
|
# Checks if a whole word exists in the trie.
|
52
56
|
# @param [String] word the word to look for in the trie.
|
53
57
|
# @return [Boolean] `true` only if the word is found and the last character corresponds to a terminal node.
|
54
58
|
def word? word = ''
|
55
|
-
|
59
|
+
root.word? word.chars.to_a
|
56
60
|
end
|
57
61
|
|
58
|
-
alias_method :include?, :word?
|
59
|
-
|
60
62
|
# Returns all words that start with the specified characters.
|
61
63
|
# @param [String] word the word to look for in the trie.
|
62
64
|
# @return [Array] all the words contained in the trie that start with the specified characters.
|
63
65
|
def scan word = ''
|
64
|
-
|
66
|
+
root.scan(word.chars).to_a
|
65
67
|
end
|
66
68
|
|
69
|
+
alias_method :include?, :word?
|
70
|
+
alias_method :match?, :partial_word?
|
67
71
|
alias_method :words, :scan
|
72
|
+
alias_method :<<, :add
|
68
73
|
|
69
74
|
private
|
70
75
|
|
71
|
-
|
76
|
+
attr_reader :compressor
|
77
|
+
attr_accessor :root
|
72
78
|
|
73
|
-
def
|
74
|
-
|
75
|
-
send method, word.chars.to_a
|
79
|
+
def default_root
|
80
|
+
Rambling::Trie::RawNode.new
|
76
81
|
end
|
77
82
|
|
78
|
-
def
|
79
|
-
|
80
|
-
|
81
|
-
send method, word.chars.to_a
|
83
|
+
def default_compressor
|
84
|
+
Rambling::Trie::Compressor.new
|
82
85
|
end
|
83
86
|
end
|
84
87
|
end
|
@@ -6,14 +6,18 @@ module Rambling
|
|
6
6
|
|
7
7
|
alias_method :size, :count
|
8
8
|
|
9
|
-
#
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
9
|
+
# Iterates over the words contained in the trie.
|
10
|
+
# @yield [String] the words contained in this trie node.
|
11
|
+
def each
|
12
|
+
return enum_for :each unless block_given?
|
13
|
+
|
14
|
+
yield as_word if terminal?
|
15
15
|
|
16
|
-
|
16
|
+
children.each do |child|
|
17
|
+
child.each do |word|
|
18
|
+
yield word
|
19
|
+
end
|
20
|
+
end
|
17
21
|
end
|
18
22
|
end
|
19
23
|
end
|
data/lib/rambling/trie/node.rb
CHANGED
@@ -2,14 +2,18 @@ module Rambling
|
|
2
2
|
module Trie
|
3
3
|
# A representation of a node in the Trie data structure.
|
4
4
|
class Node
|
5
|
-
extend Forwardable
|
5
|
+
extend ::Forwardable
|
6
6
|
|
7
|
-
delegate [
|
7
|
+
delegate [
|
8
|
+
:[],
|
9
|
+
:[]=,
|
10
|
+
:delete,
|
11
|
+
:has_key?
|
12
|
+
] => :children_tree
|
8
13
|
|
9
|
-
include
|
10
|
-
include
|
11
|
-
include
|
12
|
-
include Inspector
|
14
|
+
include Rambling::Trie::Compression
|
15
|
+
include Rambling::Trie::Enumerable
|
16
|
+
include Rambling::Trie::Inspector
|
13
17
|
|
14
18
|
# Letter or letters corresponding to this node.
|
15
19
|
# @return [Symbol, nil] the corresponding letter(s) or nil.
|
@@ -17,24 +21,17 @@ module Rambling
|
|
17
21
|
|
18
22
|
# Children nodes.
|
19
23
|
# @return [Hash] the children_tree hash, consisting of :letter => node.
|
20
|
-
|
24
|
+
attr_accessor :children_tree
|
21
25
|
|
22
26
|
# Parent node.
|
23
27
|
# @return [Node, nil] the parent node or nil for the root element.
|
24
28
|
attr_accessor :parent
|
25
29
|
|
26
30
|
# Creates a new Node.
|
27
|
-
# @param [String, nil] word the word from which to create this Node and his branch.
|
28
31
|
# @param [Node, nil] parent the parent of this node.
|
29
|
-
def initialize
|
32
|
+
def initialize parent = nil
|
30
33
|
self.parent = parent
|
31
34
|
self.children_tree = {}
|
32
|
-
|
33
|
-
unless word.nil? || word.empty?
|
34
|
-
self.letter = word.slice! 0
|
35
|
-
self.terminal = word.empty?
|
36
|
-
self << word
|
37
|
-
end
|
38
35
|
end
|
39
36
|
|
40
37
|
# String representation of the current node, if it is a terminal node.
|
@@ -52,9 +49,9 @@ module Rambling
|
|
52
49
|
end
|
53
50
|
|
54
51
|
# If the current node is the root node.
|
55
|
-
# @return [Boolean] `
|
52
|
+
# @return [Boolean] `true` only if the node does not have a parent
|
56
53
|
def root?
|
57
|
-
|
54
|
+
!parent
|
58
55
|
end
|
59
56
|
|
60
57
|
# Flag for terminal nodes.
|
@@ -63,23 +60,29 @@ module Rambling
|
|
63
60
|
!!terminal
|
64
61
|
end
|
65
62
|
|
63
|
+
# Force [Node] to be `terminal`
|
64
|
+
# @return [Node] the modified node.
|
65
|
+
def terminal!
|
66
|
+
self.terminal = true
|
67
|
+
self
|
68
|
+
end
|
69
|
+
|
66
70
|
# String representation of the current node.
|
67
71
|
# @return [String] the string representation of the current node.
|
68
72
|
def to_s
|
69
73
|
parent.to_s << letter.to_s
|
70
74
|
end
|
71
75
|
|
72
|
-
protected
|
73
|
-
|
74
|
-
attr_writer :children_tree
|
75
|
-
attr_accessor :terminal
|
76
|
-
|
77
76
|
def letter= new_letter
|
78
77
|
if new_letter
|
79
78
|
@letter = new_letter.to_sym
|
80
79
|
parent[letter] = self if parent
|
81
80
|
end
|
82
81
|
end
|
82
|
+
|
83
|
+
private
|
84
|
+
|
85
|
+
attr_accessor :terminal
|
83
86
|
end
|
84
87
|
end
|
85
88
|
end
|