rambling-trie 0.8.1 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +1 -1
- data/lib/rambling/trie.rb +21 -9
- data/lib/rambling/trie/compressed_node.rb +112 -0
- data/lib/rambling/trie/compression.rb +13 -0
- data/lib/rambling/trie/compressor.rb +30 -31
- data/lib/rambling/trie/{root.rb → container.rb} +41 -38
- data/lib/rambling/trie/enumerable.rb +11 -7
- data/lib/rambling/trie/missing_node.rb +1 -1
- data/lib/rambling/trie/node.rb +25 -22
- data/lib/rambling/trie/plain_text_reader.rb +1 -1
- data/lib/rambling/trie/raw_node.rb +90 -0
- data/lib/rambling/trie/tasks/helpers/path.rb +13 -0
- data/lib/rambling/trie/tasks/helpers/time.rb +7 -0
- data/lib/rambling/trie/tasks/performance.rb +10 -91
- data/lib/rambling/trie/tasks/performance/all.rb +4 -0
- data/lib/rambling/trie/tasks/performance/benchmark.rb +172 -0
- data/lib/rambling/trie/tasks/performance/directory.rb +11 -0
- data/lib/rambling/trie/tasks/performance/profile/call_tree.rb +132 -0
- data/lib/rambling/trie/tasks/performance/profile/memory.rb +116 -0
- data/lib/rambling/trie/version.rb +1 -1
- data/rambling-trie.gemspec +6 -4
- data/spec/integration/rambling/trie_spec.rb +63 -9
- data/spec/lib/rambling/trie/compressed_node_spec.rb +35 -0
- data/spec/lib/rambling/trie/compressor_spec.rb +31 -0
- data/spec/lib/rambling/trie/container_spec.rb +470 -0
- data/spec/lib/rambling/trie/enumerable_spec.rb +2 -2
- data/spec/lib/rambling/trie/inspector_spec.rb +21 -14
- data/spec/lib/rambling/trie/node_spec.rb +72 -209
- data/spec/lib/rambling/trie/raw_node_spec.rb +377 -0
- data/spec/lib/rambling/trie_spec.rb +46 -25
- metadata +57 -16
- data/lib/rambling/trie/branches.rb +0 -149
- data/spec/lib/rambling/trie/branches_spec.rb +0 -52
- data/spec/lib/rambling/trie/root_spec.rb +0 -376
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3bb17c6b1df5c473eb696bc484ff0e3b46517867
|
4
|
+
data.tar.gz: 0b6daafa67a20409389b80aaf9ac5d1bc6694cd2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ecaecb91b920793208c878134881f08a7718d9f9771857a49cb15f8059e35273e29f81ce21a497de37566cfae8273a13ff611b43f0d049694b0207c65efa35bd
|
7
|
+
data.tar.gz: 810db517696450d411d75ad59df68e410e0c1109ad1133bc7aa95363609520f8db51031a72e1b3392475524f8d10b28ca1e827bd5dbb9c3fba0aaa2417eeef41
|
data/Rakefile
CHANGED
data/lib/rambling/trie.rb
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
require 'forwardable'
|
2
2
|
%w{
|
3
|
-
|
4
|
-
|
5
|
-
plain_text_reader root version
|
3
|
+
compression compressor inspector container enumerable invalid_operation
|
4
|
+
plain_text_reader node missing_node compressed_node raw_node version
|
6
5
|
}.each do |file|
|
7
6
|
require File.join('rambling', 'trie', file)
|
8
7
|
end
|
@@ -14,14 +13,27 @@ module Rambling
|
|
14
13
|
class << self
|
15
14
|
# Creates a new Trie. Entry point for the Rambling::Trie API.
|
16
15
|
# @param [String, nil] filepath the file to load the words from.
|
17
|
-
# @return [
|
18
|
-
# @yield [
|
19
|
-
def create filepath = nil, reader =
|
20
|
-
|
21
|
-
|
22
|
-
|
16
|
+
# @return [Container] the trie just created.
|
17
|
+
# @yield [Container] the trie just created.
|
18
|
+
def create filepath = nil, reader = nil
|
19
|
+
reader ||= default_reader
|
20
|
+
|
21
|
+
Rambling::Trie::Container.new do |container|
|
22
|
+
if filepath
|
23
|
+
reader.each_word filepath do |word|
|
24
|
+
container << word
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
yield container if block_given?
|
23
29
|
end
|
24
30
|
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def default_reader
|
35
|
+
Rambling::Trie::PlainTextReader.new
|
36
|
+
end
|
25
37
|
end
|
26
38
|
end
|
27
39
|
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
module Rambling
|
2
|
+
module Trie
|
3
|
+
# A representation of a node in an compressed Trie data structure.
|
4
|
+
class CompressedNode < Rambling::Trie::Node
|
5
|
+
# Always raises [Rambling::Trie::InvalidOperation] when trying to add a
|
6
|
+
# branch to the current trie node based on the word
|
7
|
+
# @param [String] word the word to add the branch from.
|
8
|
+
# @raise [InvalidOperation] if the trie is already compressed.
|
9
|
+
def add word
|
10
|
+
raise Rambling::Trie::InvalidOperation, 'Cannot add branch to compressed trie'
|
11
|
+
end
|
12
|
+
|
13
|
+
# Checks if a path for set of characters exists in the trie.
|
14
|
+
# @param [Array] chars the characters to look for in the trie.
|
15
|
+
# @return [Boolean] `true` if the characters are found, `false` otherwise.
|
16
|
+
def partial_word? chars
|
17
|
+
chars.empty? || has_partial_word?(chars)
|
18
|
+
end
|
19
|
+
|
20
|
+
# Checks if a path for set of characters represents a word in the trie.
|
21
|
+
# @param [Array] chars the characters to look for in the trie.
|
22
|
+
# @return [Boolean] `true` if the characters are found and form a word,
|
23
|
+
# `false` otherwise.
|
24
|
+
def word? chars
|
25
|
+
if chars.empty?
|
26
|
+
terminal?
|
27
|
+
else
|
28
|
+
has_word? chars
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# Returns all words that start with the specified characters.
|
33
|
+
# @param [Array] chars the characters to look for in the trie.
|
34
|
+
# @return [Array] all the words contained in the trie that start with the specified characters.
|
35
|
+
def scan chars
|
36
|
+
closest_node(chars).to_a
|
37
|
+
end
|
38
|
+
|
39
|
+
# Always return `true` for a raw (compressed) node.
|
40
|
+
# @return [Boolean] always true for a raw (compressed) node.
|
41
|
+
def compressed?
|
42
|
+
true
|
43
|
+
end
|
44
|
+
|
45
|
+
protected
|
46
|
+
|
47
|
+
def closest_node chars
|
48
|
+
if chars.empty?
|
49
|
+
self
|
50
|
+
else
|
51
|
+
current_length = 0
|
52
|
+
current_key, current_key_string = current_key chars.slice!(0)
|
53
|
+
|
54
|
+
begin
|
55
|
+
current_length += 1
|
56
|
+
|
57
|
+
if current_key_string.length == current_length || chars.empty?
|
58
|
+
return children_tree[current_key].closest_node chars
|
59
|
+
end
|
60
|
+
end while current_key_string[current_length] == chars.slice!(0)
|
61
|
+
|
62
|
+
Rambling::Trie::MissingNode.new
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
private
|
67
|
+
|
68
|
+
def has_partial_word? chars
|
69
|
+
current_length = 0
|
70
|
+
current_key, current_key_string = current_key chars.slice!(0)
|
71
|
+
|
72
|
+
begin
|
73
|
+
current_length += 1
|
74
|
+
|
75
|
+
if current_key_string.length == current_length || chars.empty?
|
76
|
+
return children_tree[current_key].partial_word? chars
|
77
|
+
end
|
78
|
+
end while current_key_string[current_length] == chars.slice!(0)
|
79
|
+
|
80
|
+
false
|
81
|
+
end
|
82
|
+
|
83
|
+
def has_word? chars
|
84
|
+
current_key_string = ''
|
85
|
+
|
86
|
+
while !chars.empty?
|
87
|
+
current_key_string << chars.slice!(0)
|
88
|
+
current_key = current_key_string.to_sym
|
89
|
+
child = children_tree[current_key]
|
90
|
+
return child.word? chars if child
|
91
|
+
end
|
92
|
+
|
93
|
+
false
|
94
|
+
end
|
95
|
+
|
96
|
+
def current_key letter
|
97
|
+
current_key_string = current_key = ''
|
98
|
+
|
99
|
+
children_tree.keys.each do |key|
|
100
|
+
key_string = key.to_s
|
101
|
+
if key_string.start_with? letter
|
102
|
+
current_key = key
|
103
|
+
current_key_string = key_string
|
104
|
+
break
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
[current_key, current_key_string]
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module Rambling
|
2
|
+
module Trie
|
3
|
+
# Provides the compressing behavior for the Trie data structure.
|
4
|
+
module Compression
|
5
|
+
# Indicates if the current [Rambling::Trie::Node] can be compressed.
|
6
|
+
# @return [Boolean] `true` for non-terminal nodes with one child,
|
7
|
+
# `false` otherwise.
|
8
|
+
def compressable?
|
9
|
+
!(root? || terminal?) && children_tree.size == 1
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -1,47 +1,46 @@
|
|
1
1
|
module Rambling
|
2
2
|
module Trie
|
3
|
-
#
|
4
|
-
|
5
|
-
#
|
6
|
-
# @
|
7
|
-
|
8
|
-
|
3
|
+
# Responsible for the compression process of a Trie data structure.
|
4
|
+
class Compressor
|
5
|
+
# Compresses a node from a Trie data structure.
|
6
|
+
# @param [RawNode] node the node to compress
|
7
|
+
# @return [CompressedNode] node the compressed version of the node
|
8
|
+
def compress node, parent = nil
|
9
|
+
if node.compressable?
|
10
|
+
merge_node_with_compressed_child node, parent
|
11
|
+
else
|
12
|
+
copy_node_and_compress_children node, parent
|
13
|
+
end
|
9
14
|
end
|
10
15
|
|
11
|
-
|
12
|
-
# @return [Root, Node] the compressed node.
|
13
|
-
def compress_tree!
|
14
|
-
if compressable?
|
15
|
-
merge_with! children.first
|
16
|
-
compress_tree!
|
17
|
-
end
|
16
|
+
private
|
18
17
|
|
19
|
-
|
18
|
+
def merge_node_with_compressed_child node, parent
|
19
|
+
compressed_child = compress node.children.first
|
20
20
|
|
21
|
-
|
22
|
-
|
21
|
+
new_node = Rambling::Trie::CompressedNode.new parent
|
22
|
+
new_node.letter = node.letter.to_s << compressed_child.letter.to_s
|
23
|
+
new_node.terminal! if compressed_child.terminal?
|
24
|
+
new_node.children_tree = compressed_child.children_tree
|
23
25
|
|
24
|
-
|
26
|
+
new_node.children.each do |child|
|
27
|
+
child.parent = new_node
|
28
|
+
end
|
25
29
|
|
26
|
-
|
27
|
-
!(root? || terminal?) && children_tree.size == 1
|
30
|
+
new_node
|
28
31
|
end
|
29
32
|
|
30
|
-
def
|
31
|
-
|
32
|
-
redefine_self! child
|
33
|
+
def copy_node_and_compress_children node, parent
|
34
|
+
new_node = Rambling::Trie::CompressedNode.new parent
|
33
35
|
|
34
|
-
|
35
|
-
|
36
|
+
new_node.letter = node.letter
|
37
|
+
new_node.terminal! if node.terminal?
|
36
38
|
|
37
|
-
|
38
|
-
|
39
|
-
|
39
|
+
node.children.map do |child|
|
40
|
+
compress child, new_node
|
41
|
+
end
|
40
42
|
|
41
|
-
|
42
|
-
self.letter = letter.to_s << merged_node.letter.to_s
|
43
|
-
self.children_tree = merged_node.children_tree
|
44
|
-
self.terminal = merged_node.terminal?
|
43
|
+
new_node
|
45
44
|
end
|
46
45
|
end
|
47
46
|
end
|
@@ -1,12 +1,27 @@
|
|
1
1
|
module Rambling
|
2
2
|
module Trie
|
3
|
-
#
|
4
|
-
class
|
3
|
+
# Wrapper on top of Trie data structure.
|
4
|
+
class Container
|
5
|
+
extend ::Forwardable
|
6
|
+
|
7
|
+
include ::Enumerable
|
8
|
+
|
9
|
+
delegate [
|
10
|
+
:each,
|
11
|
+
:compressed?,
|
12
|
+
:[],
|
13
|
+
:letter,
|
14
|
+
:inspect
|
15
|
+
] => :root
|
16
|
+
|
5
17
|
# Creates a new Trie.
|
6
|
-
# @
|
7
|
-
|
8
|
-
|
9
|
-
|
18
|
+
# @param [Node] root the root node for the trie
|
19
|
+
# @param [Compressor] compressor responsible for compressing the trie
|
20
|
+
# @yield [Container] the trie just created.
|
21
|
+
def initialize root = nil, compressor = nil
|
22
|
+
@root = root || default_root
|
23
|
+
@compressor = compressor || default_compressor
|
24
|
+
|
10
25
|
yield self if block_given?
|
11
26
|
end
|
12
27
|
|
@@ -14,71 +29,59 @@ module Rambling
|
|
14
29
|
# @param [String] word the word to add the branch from.
|
15
30
|
# @return [Node] the just added branch's root node.
|
16
31
|
# @raise [InvalidOperation] if the trie is already compressed.
|
17
|
-
# @see
|
32
|
+
# @see RawNode#add
|
33
|
+
# @see CompressedNode#add
|
18
34
|
# @note Avoids clearing the contents of the word variable.
|
19
35
|
def add word
|
20
|
-
|
36
|
+
root.add word.clone
|
21
37
|
end
|
22
38
|
|
23
|
-
# Compresses the existing tree using redundant node elimination. Flags
|
24
|
-
#
|
39
|
+
# Compresses the existing tree using redundant node elimination. Flags
|
40
|
+
# the trie as compressed.
|
41
|
+
# @return [Container] self
|
42
|
+
# @note Avoids compressing again if the trie has already been compressed.
|
25
43
|
def compress!
|
26
|
-
self.
|
44
|
+
self.root = compressor.compress root unless root.compressed?
|
27
45
|
self
|
28
46
|
end
|
29
47
|
|
30
|
-
# Flag for compressed tries. Overrides {Compressor#compressed?}.
|
31
|
-
# @return [Boolean] `true` for compressed tries, `false` otherwise.
|
32
|
-
def compressed?
|
33
|
-
!!compressed
|
34
|
-
end
|
35
|
-
|
36
48
|
# Checks if a path for a word or partial word exists in the trie.
|
37
49
|
# @param [String] word the word or partial word to look for in the trie.
|
38
50
|
# @return [Boolean] `true` if the word or partial word is found, `false` otherwise.
|
39
51
|
def partial_word? word = ''
|
40
|
-
|
41
|
-
end
|
42
|
-
|
43
|
-
alias_method :match?, :partial_word?
|
44
|
-
|
45
|
-
# If the current node is the root node.
|
46
|
-
# @return [Boolean] `true`
|
47
|
-
def root?
|
48
|
-
true
|
52
|
+
root.partial_word? word.chars.to_a
|
49
53
|
end
|
50
54
|
|
51
55
|
# Checks if a whole word exists in the trie.
|
52
56
|
# @param [String] word the word to look for in the trie.
|
53
57
|
# @return [Boolean] `true` only if the word is found and the last character corresponds to a terminal node.
|
54
58
|
def word? word = ''
|
55
|
-
|
59
|
+
root.word? word.chars.to_a
|
56
60
|
end
|
57
61
|
|
58
|
-
alias_method :include?, :word?
|
59
|
-
|
60
62
|
# Returns all words that start with the specified characters.
|
61
63
|
# @param [String] word the word to look for in the trie.
|
62
64
|
# @return [Array] all the words contained in the trie that start with the specified characters.
|
63
65
|
def scan word = ''
|
64
|
-
|
66
|
+
root.scan(word.chars).to_a
|
65
67
|
end
|
66
68
|
|
69
|
+
alias_method :include?, :word?
|
70
|
+
alias_method :match?, :partial_word?
|
67
71
|
alias_method :words, :scan
|
72
|
+
alias_method :<<, :add
|
68
73
|
|
69
74
|
private
|
70
75
|
|
71
|
-
|
76
|
+
attr_reader :compressor
|
77
|
+
attr_accessor :root
|
72
78
|
|
73
|
-
def
|
74
|
-
|
75
|
-
send method, word.chars.to_a
|
79
|
+
def default_root
|
80
|
+
Rambling::Trie::RawNode.new
|
76
81
|
end
|
77
82
|
|
78
|
-
def
|
79
|
-
|
80
|
-
|
81
|
-
send method, word.chars.to_a
|
83
|
+
def default_compressor
|
84
|
+
Rambling::Trie::Compressor.new
|
82
85
|
end
|
83
86
|
end
|
84
87
|
end
|
@@ -6,14 +6,18 @@ module Rambling
|
|
6
6
|
|
7
7
|
alias_method :size, :count
|
8
8
|
|
9
|
-
#
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
9
|
+
# Iterates over the words contained in the trie.
|
10
|
+
# @yield [String] the words contained in this trie node.
|
11
|
+
def each
|
12
|
+
return enum_for :each unless block_given?
|
13
|
+
|
14
|
+
yield as_word if terminal?
|
15
15
|
|
16
|
-
|
16
|
+
children.each do |child|
|
17
|
+
child.each do |word|
|
18
|
+
yield word
|
19
|
+
end
|
20
|
+
end
|
17
21
|
end
|
18
22
|
end
|
19
23
|
end
|
data/lib/rambling/trie/node.rb
CHANGED
@@ -2,14 +2,18 @@ module Rambling
|
|
2
2
|
module Trie
|
3
3
|
# A representation of a node in the Trie data structure.
|
4
4
|
class Node
|
5
|
-
extend Forwardable
|
5
|
+
extend ::Forwardable
|
6
6
|
|
7
|
-
delegate [
|
7
|
+
delegate [
|
8
|
+
:[],
|
9
|
+
:[]=,
|
10
|
+
:delete,
|
11
|
+
:has_key?
|
12
|
+
] => :children_tree
|
8
13
|
|
9
|
-
include
|
10
|
-
include
|
11
|
-
include
|
12
|
-
include Inspector
|
14
|
+
include Rambling::Trie::Compression
|
15
|
+
include Rambling::Trie::Enumerable
|
16
|
+
include Rambling::Trie::Inspector
|
13
17
|
|
14
18
|
# Letter or letters corresponding to this node.
|
15
19
|
# @return [Symbol, nil] the corresponding letter(s) or nil.
|
@@ -17,24 +21,17 @@ module Rambling
|
|
17
21
|
|
18
22
|
# Children nodes.
|
19
23
|
# @return [Hash] the children_tree hash, consisting of :letter => node.
|
20
|
-
|
24
|
+
attr_accessor :children_tree
|
21
25
|
|
22
26
|
# Parent node.
|
23
27
|
# @return [Node, nil] the parent node or nil for the root element.
|
24
28
|
attr_accessor :parent
|
25
29
|
|
26
30
|
# Creates a new Node.
|
27
|
-
# @param [String, nil] word the word from which to create this Node and his branch.
|
28
31
|
# @param [Node, nil] parent the parent of this node.
|
29
|
-
def initialize
|
32
|
+
def initialize parent = nil
|
30
33
|
self.parent = parent
|
31
34
|
self.children_tree = {}
|
32
|
-
|
33
|
-
unless word.nil? || word.empty?
|
34
|
-
self.letter = word.slice! 0
|
35
|
-
self.terminal = word.empty?
|
36
|
-
self << word
|
37
|
-
end
|
38
35
|
end
|
39
36
|
|
40
37
|
# String representation of the current node, if it is a terminal node.
|
@@ -52,9 +49,9 @@ module Rambling
|
|
52
49
|
end
|
53
50
|
|
54
51
|
# If the current node is the root node.
|
55
|
-
# @return [Boolean] `
|
52
|
+
# @return [Boolean] `true` only if the node does not have a parent
|
56
53
|
def root?
|
57
|
-
|
54
|
+
!parent
|
58
55
|
end
|
59
56
|
|
60
57
|
# Flag for terminal nodes.
|
@@ -63,23 +60,29 @@ module Rambling
|
|
63
60
|
!!terminal
|
64
61
|
end
|
65
62
|
|
63
|
+
# Force [Node] to be `terminal`
|
64
|
+
# @return [Node] the modified node.
|
65
|
+
def terminal!
|
66
|
+
self.terminal = true
|
67
|
+
self
|
68
|
+
end
|
69
|
+
|
66
70
|
# String representation of the current node.
|
67
71
|
# @return [String] the string representation of the current node.
|
68
72
|
def to_s
|
69
73
|
parent.to_s << letter.to_s
|
70
74
|
end
|
71
75
|
|
72
|
-
protected
|
73
|
-
|
74
|
-
attr_writer :children_tree
|
75
|
-
attr_accessor :terminal
|
76
|
-
|
77
76
|
def letter= new_letter
|
78
77
|
if new_letter
|
79
78
|
@letter = new_letter.to_sym
|
80
79
|
parent[letter] = self if parent
|
81
80
|
end
|
82
81
|
end
|
82
|
+
|
83
|
+
private
|
84
|
+
|
85
|
+
attr_accessor :terminal
|
83
86
|
end
|
84
87
|
end
|
85
88
|
end
|