rambling-trie 1.0.2 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/Gemfile +6 -3
- data/Guardfile +3 -1
- data/README.md +30 -12
- data/Rakefile +8 -0
- data/lib/rambling-trie.rb +2 -0
- data/lib/rambling/trie.rb +48 -26
- data/lib/rambling/trie/comparable.rb +6 -3
- data/lib/rambling/trie/compressible.rb +16 -0
- data/lib/rambling/trie/compressor.rb +39 -24
- data/lib/rambling/trie/configuration.rb +3 -1
- data/lib/rambling/trie/configuration/properties.rb +18 -9
- data/lib/rambling/trie/configuration/provider_collection.rb +38 -17
- data/lib/rambling/trie/container.rb +123 -36
- data/lib/rambling/trie/enumerable.rb +6 -4
- data/lib/rambling/trie/inspectable.rb +2 -0
- data/lib/rambling/trie/invalid_operation.rb +3 -1
- data/lib/rambling/trie/nodes.rb +13 -0
- data/lib/rambling/trie/nodes/compressed.rb +98 -0
- data/lib/rambling/trie/nodes/missing.rb +12 -0
- data/lib/rambling/trie/nodes/node.rb +183 -0
- data/lib/rambling/trie/nodes/raw.rb +82 -0
- data/lib/rambling/trie/readers.rb +3 -1
- data/lib/rambling/trie/readers/plain_text.rb +3 -11
- data/lib/rambling/trie/serializers.rb +3 -1
- data/lib/rambling/trie/serializers/file.rb +2 -0
- data/lib/rambling/trie/serializers/marshal.rb +15 -5
- data/lib/rambling/trie/serializers/yaml.rb +21 -5
- data/lib/rambling/trie/serializers/zip.rb +15 -8
- data/lib/rambling/trie/stringifyable.rb +8 -2
- data/lib/rambling/trie/version.rb +3 -1
- data/rambling-trie.gemspec +21 -10
- data/spec/assets/test_words.es_DO.txt +1 -0
- data/spec/integration/rambling/trie_spec.rb +44 -35
- data/spec/lib/rambling/trie/comparable_spec.rb +8 -15
- data/spec/lib/rambling/trie/compressor_spec.rb +90 -13
- data/spec/lib/rambling/trie/configuration/properties_spec.rb +21 -13
- data/spec/lib/rambling/trie/configuration/provider_collection_spec.rb +18 -34
- data/spec/lib/rambling/trie/container_spec.rb +183 -217
- data/spec/lib/rambling/trie/enumerable_spec.rb +14 -9
- data/spec/lib/rambling/trie/inspectable_spec.rb +36 -11
- data/spec/lib/rambling/trie/nodes/compressed_spec.rb +37 -0
- data/spec/lib/rambling/trie/nodes/node_spec.rb +9 -0
- data/spec/lib/rambling/trie/nodes/raw_spec.rb +179 -0
- data/spec/lib/rambling/trie/readers/plain_text_spec.rb +3 -1
- data/spec/lib/rambling/trie/serializers/file_spec.rb +6 -4
- data/spec/lib/rambling/trie/serializers/marshal_spec.rb +5 -7
- data/spec/lib/rambling/trie/serializers/yaml_spec.rb +5 -7
- data/spec/lib/rambling/trie/serializers/zip_spec.rb +18 -20
- data/spec/lib/rambling/trie/stringifyable_spec.rb +14 -11
- data/spec/lib/rambling/trie_spec.rb +18 -11
- data/spec/spec_helper.rb +10 -5
- data/spec/support/config.rb +10 -0
- data/spec/support/helpers/add_word.rb +20 -0
- data/spec/support/helpers/one_line_heredoc.rb +11 -0
- data/spec/support/shared_examples/a_compressible_trie.rb +40 -0
- data/spec/support/shared_examples/a_serializable_trie.rb +10 -6
- data/spec/support/shared_examples/a_serializer.rb +9 -1
- data/spec/support/shared_examples/a_trie_data_structure.rb +2 -0
- data/spec/support/shared_examples/a_trie_node.rb +127 -0
- data/spec/{lib/rambling/trie/compressed_node_spec.rb → support/shared_examples/a_trie_node_implementation.rb} +25 -72
- metadata +42 -31
- data/lib/rambling/trie/compressable.rb +0 -14
- data/lib/rambling/trie/compressed_node.rb +0 -120
- data/lib/rambling/trie/missing_node.rb +0 -8
- data/lib/rambling/trie/node.rb +0 -97
- data/lib/rambling/trie/raw_node.rb +0 -96
- data/spec/lib/rambling/trie/node_spec.rb +0 -86
- data/spec/lib/rambling/trie/raw_node_spec.rb +0 -389
- data/spec/support/shared_examples/a_compressable_trie.rb +0 -26
@@ -1,10 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Rambling
|
2
4
|
module Trie
|
3
5
|
module Configuration
|
4
6
|
# Collection of configurable providers.
|
5
7
|
class ProviderCollection
|
6
|
-
extend ::Forwardable
|
7
|
-
|
8
8
|
# The name of this provider collection.
|
9
9
|
# @return [String] the name of this provider collection.
|
10
10
|
attr_reader :name
|
@@ -23,13 +23,6 @@ module Rambling
|
|
23
23
|
# cannot be resolved in {ProviderCollection#resolve #resolve}.
|
24
24
|
attr_reader :default
|
25
25
|
|
26
|
-
delegate [
|
27
|
-
:[],
|
28
|
-
:[]=,
|
29
|
-
:keys,
|
30
|
-
:values,
|
31
|
-
] => :providers
|
32
|
-
|
33
26
|
# Creates a new provider collection.
|
34
27
|
# @param [String] name the name for this provider collection.
|
35
28
|
# @param [Hash] providers the configured providers.
|
@@ -52,8 +45,9 @@ module Rambling
|
|
52
45
|
end
|
53
46
|
|
54
47
|
def default= provider
|
55
|
-
|
56
|
-
raise ArgumentError,
|
48
|
+
unless contains? provider
|
49
|
+
raise ArgumentError,
|
50
|
+
"default #{name} should be part of configured #{name}s"
|
57
51
|
end
|
58
52
|
|
59
53
|
@default = provider
|
@@ -71,30 +65,57 @@ module Rambling
|
|
71
65
|
# @return [Object] the provider corresponding to the file extension in
|
72
66
|
# this provider collection. {#default} if not found.
|
73
67
|
def resolve filepath
|
74
|
-
providers[
|
68
|
+
providers[file_format filepath] || default
|
75
69
|
end
|
76
70
|
|
77
71
|
# Resets the provider collection to the initial values.
|
78
72
|
def reset
|
79
73
|
providers.clear
|
80
|
-
configured_providers.each { |k, v|
|
74
|
+
configured_providers.each { |k, v| self[k] = v }
|
81
75
|
self.default = configured_default
|
82
76
|
end
|
83
77
|
|
78
|
+
# Get provider corresponding to a given format.
|
79
|
+
# @return [Array<Symbol>] the provider corresponding to that format.
|
80
|
+
# @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-5B-5D
|
81
|
+
# Hash#keys
|
82
|
+
def formats
|
83
|
+
providers.keys
|
84
|
+
end
|
85
|
+
|
86
|
+
# Get provider corresponding to a given format.
|
87
|
+
# @param [Symbol] format the format to search for in the collection.
|
88
|
+
# @return [Object] the provider corresponding to that format.
|
89
|
+
# @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-5B-5D
|
90
|
+
# Hash#[]
|
91
|
+
def [] format
|
92
|
+
providers[format]
|
93
|
+
end
|
94
|
+
|
84
95
|
private
|
85
96
|
|
86
97
|
attr_reader :configured_providers, :configured_default
|
87
98
|
|
88
|
-
def format
|
99
|
+
def []= format, instance
|
100
|
+
providers[format] = instance
|
101
|
+
end
|
102
|
+
|
103
|
+
def values
|
104
|
+
providers.values
|
105
|
+
end
|
106
|
+
|
107
|
+
def file_format filepath
|
89
108
|
format = File.extname filepath
|
90
109
|
format.slice! 0
|
91
110
|
format.to_sym
|
92
111
|
end
|
93
112
|
|
94
|
-
def
|
95
|
-
|
96
|
-
(providers.
|
113
|
+
def contains? provider
|
114
|
+
provider.nil? ||
|
115
|
+
(providers.any? && provider_instances.include?(provider))
|
97
116
|
end
|
117
|
+
|
118
|
+
alias_method :provider_instances, :values
|
98
119
|
end
|
99
120
|
end
|
100
121
|
end
|
@@ -1,32 +1,17 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Rambling
|
2
4
|
module Trie
|
3
5
|
# Wrapper on top of trie data structure.
|
4
6
|
class Container
|
5
|
-
extend ::Forwardable
|
6
7
|
include ::Enumerable
|
7
8
|
|
8
|
-
delegate [
|
9
|
-
:[],
|
10
|
-
:as_word,
|
11
|
-
:children,
|
12
|
-
:children_tree,
|
13
|
-
:compressed?,
|
14
|
-
:each,
|
15
|
-
:to_a,
|
16
|
-
:has_key?,
|
17
|
-
:inspect,
|
18
|
-
:letter,
|
19
|
-
:parent,
|
20
|
-
:size,
|
21
|
-
:to_s
|
22
|
-
] => :root
|
23
|
-
|
24
9
|
# The root node of this trie.
|
25
|
-
# @return [Node] the root node of this trie.
|
10
|
+
# @return [Nodes::Node] the root node of this trie.
|
26
11
|
attr_reader :root
|
27
12
|
|
28
13
|
# Creates a new trie.
|
29
|
-
# @param [Node] root the root node for the trie
|
14
|
+
# @param [Nodes::Node] root the root node for the trie
|
30
15
|
# @param [Compressor] compressor responsible for compressing the trie
|
31
16
|
# @yield [Container] the trie just created.
|
32
17
|
def initialize root, compressor
|
@@ -36,32 +21,53 @@ module Rambling
|
|
36
21
|
yield self if block_given?
|
37
22
|
end
|
38
23
|
|
39
|
-
# Adds a word to the trie
|
24
|
+
# Adds a word to the trie.
|
40
25
|
# @param [String] word the word to add the branch from.
|
41
|
-
# @return [Node] the just added branch's root node.
|
26
|
+
# @return [Nodes::Node] the just added branch's root node.
|
42
27
|
# @raise [InvalidOperation] if the trie is already compressed.
|
43
|
-
# @see
|
44
|
-
# @see
|
45
|
-
# @note Avoids altering the contents of the word variable.
|
28
|
+
# @see Nodes::Raw#add
|
29
|
+
# @see Nodes::Compressed#add
|
46
30
|
def add word
|
47
|
-
root.add word
|
31
|
+
root.add char_symbols word
|
32
|
+
end
|
33
|
+
|
34
|
+
# Adds all provided words to the trie.
|
35
|
+
# @param [Array<String>] words the words to add the branch from.
|
36
|
+
# @return [Array<Nodes::Node>] the collection of nodes added.
|
37
|
+
# @raise [InvalidOperation] if the trie is already compressed.
|
38
|
+
# @see Nodes::Raw#add
|
39
|
+
# @see Nodes::Compressed#add
|
40
|
+
def concat words
|
41
|
+
words.map { |word| add word }
|
48
42
|
end
|
49
43
|
|
50
|
-
# Compresses the existing
|
51
|
-
# the trie as compressed.
|
44
|
+
# Compresses the existing trie using redundant node elimination. Marks
|
45
|
+
# the trie as compressed. Does nothing if the trie has already been
|
46
|
+
# compressed.
|
52
47
|
# @return [Container] self
|
53
|
-
# @note
|
48
|
+
# @note This method replaces the root {Nodes::Raw Raw} node with a
|
49
|
+
# {Nodes::Compressed Compressed} version of it.
|
54
50
|
def compress!
|
55
|
-
self.root =
|
51
|
+
self.root = compress_root unless root.compressed?
|
56
52
|
self
|
57
53
|
end
|
58
54
|
|
55
|
+
# Compresses the existing trie using redundant node elimination. Returns
|
56
|
+
# a new trie with the compressed root.
|
57
|
+
# @return [Container] A new {Container} with the {Nodes::Compressed
|
58
|
+
# Compressed} root node or self if the trie has already been
|
59
|
+
# compressed.
|
60
|
+
def compress
|
61
|
+
return self if root.compressed?
|
62
|
+
Rambling::Trie::Container.new compress_root, compressor
|
63
|
+
end
|
64
|
+
|
59
65
|
# Checks if a path for a word or partial word exists in the trie.
|
60
66
|
# @param [String] word the word or partial word to look for in the trie.
|
61
67
|
# @return [Boolean] `true` if the word or partial word is found, `false`
|
62
68
|
# otherwise.
|
63
|
-
# @see
|
64
|
-
# @see
|
69
|
+
# @see Nodes::Raw#partial_word?
|
70
|
+
# @see Nodes::Compressed#partial_word?
|
65
71
|
def partial_word? word = ''
|
66
72
|
root.partial_word? word.chars
|
67
73
|
end
|
@@ -70,8 +76,8 @@ module Rambling
|
|
70
76
|
# @param [String] word the word to look for in the trie.
|
71
77
|
# @return [Boolean] `true` only if the word is found and the last
|
72
78
|
# character corresponds to a terminal node, `false` otherwise.
|
73
|
-
# @see
|
74
|
-
# @see
|
79
|
+
# @see Nodes::Raw#word?
|
80
|
+
# @see Nodes::Compressed#word?
|
75
81
|
def word? word = ''
|
76
82
|
root.word? word.chars
|
77
83
|
end
|
@@ -80,8 +86,8 @@ module Rambling
|
|
80
86
|
# @param [String] word the word to look for in the trie.
|
81
87
|
# @return [Array<String>] all the words contained in the trie that start
|
82
88
|
# with the specified characters.
|
83
|
-
# @see
|
84
|
-
# @see
|
89
|
+
# @see Nodes::Raw#scan
|
90
|
+
# @see Nodes::Compressed#scan
|
85
91
|
def scan word = ''
|
86
92
|
root.scan(word.chars).to_a
|
87
93
|
end
|
@@ -92,7 +98,7 @@ module Rambling
|
|
92
98
|
# @return [Enumerator<String>] all the words in the given string that
|
93
99
|
# match a word in the trie.
|
94
100
|
# @yield [String] each word found in phrase.
|
95
|
-
# @see Node#words_within
|
101
|
+
# @see Nodes::Node#words_within
|
96
102
|
def words_within phrase
|
97
103
|
words_within_root(phrase).to_a
|
98
104
|
end
|
@@ -113,10 +119,81 @@ module Rambling
|
|
113
119
|
root == other.root
|
114
120
|
end
|
115
121
|
|
122
|
+
# Iterates over the words contained in the trie.
|
123
|
+
# @yield [String] the words contained in this trie node.
|
124
|
+
def each
|
125
|
+
return enum_for :each unless block_given?
|
126
|
+
|
127
|
+
root.each do |word|
|
128
|
+
yield word
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
# @return [String] a string representation of the container.
|
133
|
+
def inspect
|
134
|
+
"#<#{self.class.name} root: #{root.inspect}>"
|
135
|
+
end
|
136
|
+
|
137
|
+
# Get {Nodes::Node Node} corresponding to a given letter.
|
138
|
+
# @param [Symbol] letter the letter to search for in the root node.
|
139
|
+
# @return [Nodes::Node] the node corresponding to that letter.
|
140
|
+
# @see Nodes::Node#[]
|
141
|
+
def [] letter
|
142
|
+
root[letter]
|
143
|
+
end
|
144
|
+
|
145
|
+
# Root node's child nodes.
|
146
|
+
# @return [Array<Nodes::Node>] the array of children nodes contained in
|
147
|
+
# the root node.
|
148
|
+
# @see Nodes::Node#children
|
149
|
+
def children
|
150
|
+
root.children
|
151
|
+
end
|
152
|
+
|
153
|
+
# Root node's children tree.
|
154
|
+
# @return [Array<Nodes::Node>] the array of children nodes contained in
|
155
|
+
# the root node.
|
156
|
+
# @see Nodes::Node#children_tree
|
157
|
+
def children_tree
|
158
|
+
root.children_tree
|
159
|
+
end
|
160
|
+
|
161
|
+
# Indicates if the root {Nodes::Node Node} can be
|
162
|
+
# compressed or not.
|
163
|
+
# @return [Boolean] `true` for non-{Nodes::Node#terminal? terminal}
|
164
|
+
# nodes with one child, `false` otherwise.
|
165
|
+
def compressed?
|
166
|
+
root.compressed?
|
167
|
+
end
|
168
|
+
|
169
|
+
# Array of words contained in the root {Nodes::Node Node}.
|
170
|
+
# @return [Array<String>] all words contained in this trie.
|
171
|
+
# @see https://ruby-doc.org/core-2.5.0/Enumerable.html#method-i-to_a
|
172
|
+
# Enumerable#to_a
|
173
|
+
def to_a
|
174
|
+
root.to_a
|
175
|
+
end
|
176
|
+
|
177
|
+
# Check if a letter is part of the root {Nodes::Node}'s children tree.
|
178
|
+
# @param [Symbol] letter the letter to search for in the root node.
|
179
|
+
# @return [Boolean] whether the letter is contained or not.
|
180
|
+
# @see Nodes::Node#key?
|
181
|
+
def key? letter
|
182
|
+
root.key? letter
|
183
|
+
end
|
184
|
+
|
185
|
+
# Size of the Root {Nodes::Node Node}'s children tree.
|
186
|
+
# @return [Integer] the number of letters in the root node.
|
187
|
+
def size
|
188
|
+
root.size
|
189
|
+
end
|
190
|
+
|
116
191
|
alias_method :include?, :word?
|
117
192
|
alias_method :match?, :partial_word?
|
118
193
|
alias_method :words, :scan
|
119
194
|
alias_method :<<, :add
|
195
|
+
alias_method :has_key?, :key?
|
196
|
+
alias_method :has_letter?, :key?
|
120
197
|
|
121
198
|
private
|
122
199
|
|
@@ -134,6 +211,16 @@ module Rambling
|
|
134
211
|
end
|
135
212
|
end
|
136
213
|
end
|
214
|
+
|
215
|
+
def compress_root
|
216
|
+
compressor.compress root
|
217
|
+
end
|
218
|
+
|
219
|
+
def char_symbols word
|
220
|
+
symbols = []
|
221
|
+
word.reverse.each_char { |c| symbols << c.to_sym }
|
222
|
+
symbols
|
223
|
+
end
|
137
224
|
end
|
138
225
|
end
|
139
226
|
end
|
@@ -1,12 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Rambling
|
2
4
|
module Trie
|
3
5
|
# Provides enumerable behavior to the trie data structure.
|
4
6
|
module Enumerable
|
5
7
|
include ::Enumerable
|
6
8
|
|
7
|
-
# Returns number of words contained in the trie
|
8
|
-
#
|
9
|
-
#
|
9
|
+
# Returns number of words contained in the trie
|
10
|
+
# @see https://ruby-doc.org/core-2.5.0/Enumerable.html#method-i-count
|
11
|
+
# Enumerable#count
|
10
12
|
alias_method :size, :count
|
11
13
|
|
12
14
|
# Iterates over the words contained in the trie.
|
@@ -16,7 +18,7 @@ module Rambling
|
|
16
18
|
|
17
19
|
yield as_word if terminal?
|
18
20
|
|
19
|
-
|
21
|
+
children_tree.each_value do |child|
|
20
22
|
child.each do |word|
|
21
23
|
yield word
|
22
24
|
end
|
@@ -1,8 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Rambling
|
2
4
|
module Trie
|
3
5
|
# Raised when trying to execute an invalid operation on a trie data
|
4
6
|
# structure.
|
5
|
-
class InvalidOperation <
|
7
|
+
class InvalidOperation < RuntimeError
|
6
8
|
# Creates a new {InvalidOperation InvalidOperation} exception.
|
7
9
|
# @param [String, nil] message the exception message.
|
8
10
|
def initialize message = nil
|
@@ -0,0 +1,98 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Rambling
|
4
|
+
module Trie
|
5
|
+
module Nodes
|
6
|
+
# A representation of a node in an compressed trie data structure.
|
7
|
+
class Compressed < Rambling::Trie::Nodes::Node
|
8
|
+
# Always raises {Rambling::Trie::InvalidOperation InvalidOperation} when
|
9
|
+
# trying to add a word to the current compressed trie node
|
10
|
+
# @param [String] _ the word to add to the trie.
|
11
|
+
# @raise [InvalidOperation] if the trie is already compressed.
|
12
|
+
# @return [nil] this never returns as it always raises an exception.
|
13
|
+
def add _
|
14
|
+
raise Rambling::Trie::InvalidOperation,
|
15
|
+
'Cannot add word to compressed trie'
|
16
|
+
end
|
17
|
+
|
18
|
+
# Always return `true` for a compressed node.
|
19
|
+
# @return [Boolean] always `true` for a compressed node.
|
20
|
+
def compressed?
|
21
|
+
true
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def partial_word_chars? chars
|
27
|
+
child = children_tree[chars.first.to_sym]
|
28
|
+
return false unless child
|
29
|
+
|
30
|
+
child_letter = child.letter.to_s
|
31
|
+
|
32
|
+
if chars.size >= child_letter.size
|
33
|
+
letter = chars.slice!(0, child_letter.size).join
|
34
|
+
return child.partial_word? chars if child_letter == letter
|
35
|
+
end
|
36
|
+
|
37
|
+
letter = chars.join
|
38
|
+
child_letter = child_letter.slice 0, letter.size
|
39
|
+
child_letter == letter
|
40
|
+
end
|
41
|
+
|
42
|
+
def word_chars? chars
|
43
|
+
letter = chars.slice! 0
|
44
|
+
letter_sym = letter.to_sym
|
45
|
+
|
46
|
+
child = children_tree[letter_sym]
|
47
|
+
return false unless child
|
48
|
+
|
49
|
+
loop do
|
50
|
+
return child.word? chars if letter_sym == child.letter
|
51
|
+
|
52
|
+
break if chars.empty?
|
53
|
+
|
54
|
+
letter << chars.slice!(0)
|
55
|
+
letter_sym = letter.to_sym
|
56
|
+
end
|
57
|
+
|
58
|
+
false
|
59
|
+
end
|
60
|
+
|
61
|
+
def closest_node chars
|
62
|
+
child = children_tree[chars.first.to_sym]
|
63
|
+
return missing unless child
|
64
|
+
|
65
|
+
child_letter = child.letter.to_s
|
66
|
+
|
67
|
+
if chars.size >= child_letter.size
|
68
|
+
letter = chars.slice!(0, child_letter.size).join
|
69
|
+
return child.scan chars if child_letter == letter
|
70
|
+
end
|
71
|
+
|
72
|
+
letter = chars.join
|
73
|
+
child_letter = child_letter.slice 0, letter.size
|
74
|
+
|
75
|
+
child_letter == letter ? child : missing
|
76
|
+
end
|
77
|
+
|
78
|
+
def children_match_prefix chars
|
79
|
+
return enum_for :children_match_prefix, chars unless block_given?
|
80
|
+
|
81
|
+
return if chars.empty?
|
82
|
+
|
83
|
+
child = children_tree[chars.first.to_sym]
|
84
|
+
return unless child
|
85
|
+
|
86
|
+
child_letter = child.letter.to_s
|
87
|
+
letter = chars.slice!(0, child_letter.size).join
|
88
|
+
|
89
|
+
return unless child_letter == letter
|
90
|
+
|
91
|
+
child.match_prefix chars do |word|
|
92
|
+
yield word
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|