rambling-trie 1.0.2 → 2.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/Gemfile +6 -3
- data/Guardfile +3 -1
- data/README.md +30 -12
- data/Rakefile +8 -0
- data/lib/rambling-trie.rb +2 -0
- data/lib/rambling/trie.rb +48 -26
- data/lib/rambling/trie/comparable.rb +6 -3
- data/lib/rambling/trie/compressible.rb +16 -0
- data/lib/rambling/trie/compressor.rb +39 -24
- data/lib/rambling/trie/configuration.rb +3 -1
- data/lib/rambling/trie/configuration/properties.rb +18 -9
- data/lib/rambling/trie/configuration/provider_collection.rb +38 -17
- data/lib/rambling/trie/container.rb +123 -36
- data/lib/rambling/trie/enumerable.rb +6 -4
- data/lib/rambling/trie/inspectable.rb +2 -0
- data/lib/rambling/trie/invalid_operation.rb +3 -1
- data/lib/rambling/trie/nodes.rb +13 -0
- data/lib/rambling/trie/nodes/compressed.rb +98 -0
- data/lib/rambling/trie/nodes/missing.rb +12 -0
- data/lib/rambling/trie/nodes/node.rb +183 -0
- data/lib/rambling/trie/nodes/raw.rb +82 -0
- data/lib/rambling/trie/readers.rb +3 -1
- data/lib/rambling/trie/readers/plain_text.rb +3 -11
- data/lib/rambling/trie/serializers.rb +3 -1
- data/lib/rambling/trie/serializers/file.rb +2 -0
- data/lib/rambling/trie/serializers/marshal.rb +15 -5
- data/lib/rambling/trie/serializers/yaml.rb +21 -5
- data/lib/rambling/trie/serializers/zip.rb +15 -8
- data/lib/rambling/trie/stringifyable.rb +8 -2
- data/lib/rambling/trie/version.rb +3 -1
- data/rambling-trie.gemspec +21 -10
- data/spec/assets/test_words.es_DO.txt +1 -0
- data/spec/integration/rambling/trie_spec.rb +44 -35
- data/spec/lib/rambling/trie/comparable_spec.rb +8 -15
- data/spec/lib/rambling/trie/compressor_spec.rb +90 -13
- data/spec/lib/rambling/trie/configuration/properties_spec.rb +21 -13
- data/spec/lib/rambling/trie/configuration/provider_collection_spec.rb +18 -34
- data/spec/lib/rambling/trie/container_spec.rb +183 -217
- data/spec/lib/rambling/trie/enumerable_spec.rb +14 -9
- data/spec/lib/rambling/trie/inspectable_spec.rb +36 -11
- data/spec/lib/rambling/trie/nodes/compressed_spec.rb +37 -0
- data/spec/lib/rambling/trie/nodes/node_spec.rb +9 -0
- data/spec/lib/rambling/trie/nodes/raw_spec.rb +179 -0
- data/spec/lib/rambling/trie/readers/plain_text_spec.rb +3 -1
- data/spec/lib/rambling/trie/serializers/file_spec.rb +6 -4
- data/spec/lib/rambling/trie/serializers/marshal_spec.rb +5 -7
- data/spec/lib/rambling/trie/serializers/yaml_spec.rb +5 -7
- data/spec/lib/rambling/trie/serializers/zip_spec.rb +18 -20
- data/spec/lib/rambling/trie/stringifyable_spec.rb +14 -11
- data/spec/lib/rambling/trie_spec.rb +18 -11
- data/spec/spec_helper.rb +10 -5
- data/spec/support/config.rb +10 -0
- data/spec/support/helpers/add_word.rb +20 -0
- data/spec/support/helpers/one_line_heredoc.rb +11 -0
- data/spec/support/shared_examples/a_compressible_trie.rb +40 -0
- data/spec/support/shared_examples/a_serializable_trie.rb +10 -6
- data/spec/support/shared_examples/a_serializer.rb +9 -1
- data/spec/support/shared_examples/a_trie_data_structure.rb +2 -0
- data/spec/support/shared_examples/a_trie_node.rb +127 -0
- data/spec/{lib/rambling/trie/compressed_node_spec.rb → support/shared_examples/a_trie_node_implementation.rb} +25 -72
- metadata +42 -31
- data/lib/rambling/trie/compressable.rb +0 -14
- data/lib/rambling/trie/compressed_node.rb +0 -120
- data/lib/rambling/trie/missing_node.rb +0 -8
- data/lib/rambling/trie/node.rb +0 -97
- data/lib/rambling/trie/raw_node.rb +0 -96
- data/spec/lib/rambling/trie/node_spec.rb +0 -86
- data/spec/lib/rambling/trie/raw_node_spec.rb +0 -389
- data/spec/support/shared_examples/a_compressable_trie.rb +0 -26
@@ -1,10 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Rambling
|
2
4
|
module Trie
|
3
5
|
module Configuration
|
4
6
|
# Collection of configurable providers.
|
5
7
|
class ProviderCollection
|
6
|
-
extend ::Forwardable
|
7
|
-
|
8
8
|
# The name of this provider collection.
|
9
9
|
# @return [String] the name of this provider collection.
|
10
10
|
attr_reader :name
|
@@ -23,13 +23,6 @@ module Rambling
|
|
23
23
|
# cannot be resolved in {ProviderCollection#resolve #resolve}.
|
24
24
|
attr_reader :default
|
25
25
|
|
26
|
-
delegate [
|
27
|
-
:[],
|
28
|
-
:[]=,
|
29
|
-
:keys,
|
30
|
-
:values,
|
31
|
-
] => :providers
|
32
|
-
|
33
26
|
# Creates a new provider collection.
|
34
27
|
# @param [String] name the name for this provider collection.
|
35
28
|
# @param [Hash] providers the configured providers.
|
@@ -52,8 +45,9 @@ module Rambling
|
|
52
45
|
end
|
53
46
|
|
54
47
|
def default= provider
|
55
|
-
|
56
|
-
raise ArgumentError,
|
48
|
+
unless contains? provider
|
49
|
+
raise ArgumentError,
|
50
|
+
"default #{name} should be part of configured #{name}s"
|
57
51
|
end
|
58
52
|
|
59
53
|
@default = provider
|
@@ -71,30 +65,57 @@ module Rambling
|
|
71
65
|
# @return [Object] the provider corresponding to the file extension in
|
72
66
|
# this provider collection. {#default} if not found.
|
73
67
|
def resolve filepath
|
74
|
-
providers[
|
68
|
+
providers[file_format filepath] || default
|
75
69
|
end
|
76
70
|
|
77
71
|
# Resets the provider collection to the initial values.
|
78
72
|
def reset
|
79
73
|
providers.clear
|
80
|
-
configured_providers.each { |k, v|
|
74
|
+
configured_providers.each { |k, v| self[k] = v }
|
81
75
|
self.default = configured_default
|
82
76
|
end
|
83
77
|
|
78
|
+
# Get provider corresponding to a given format.
|
79
|
+
# @return [Array<Symbol>] the provider corresponding to that format.
|
80
|
+
# @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-5B-5D
|
81
|
+
# Hash#keys
|
82
|
+
def formats
|
83
|
+
providers.keys
|
84
|
+
end
|
85
|
+
|
86
|
+
# Get provider corresponding to a given format.
|
87
|
+
# @param [Symbol] format the format to search for in the collection.
|
88
|
+
# @return [Object] the provider corresponding to that format.
|
89
|
+
# @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-5B-5D
|
90
|
+
# Hash#[]
|
91
|
+
def [] format
|
92
|
+
providers[format]
|
93
|
+
end
|
94
|
+
|
84
95
|
private
|
85
96
|
|
86
97
|
attr_reader :configured_providers, :configured_default
|
87
98
|
|
88
|
-
def format
|
99
|
+
def []= format, instance
|
100
|
+
providers[format] = instance
|
101
|
+
end
|
102
|
+
|
103
|
+
def values
|
104
|
+
providers.values
|
105
|
+
end
|
106
|
+
|
107
|
+
def file_format filepath
|
89
108
|
format = File.extname filepath
|
90
109
|
format.slice! 0
|
91
110
|
format.to_sym
|
92
111
|
end
|
93
112
|
|
94
|
-
def
|
95
|
-
|
96
|
-
(providers.
|
113
|
+
def contains? provider
|
114
|
+
provider.nil? ||
|
115
|
+
(providers.any? && provider_instances.include?(provider))
|
97
116
|
end
|
117
|
+
|
118
|
+
alias_method :provider_instances, :values
|
98
119
|
end
|
99
120
|
end
|
100
121
|
end
|
@@ -1,32 +1,17 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Rambling
|
2
4
|
module Trie
|
3
5
|
# Wrapper on top of trie data structure.
|
4
6
|
class Container
|
5
|
-
extend ::Forwardable
|
6
7
|
include ::Enumerable
|
7
8
|
|
8
|
-
delegate [
|
9
|
-
:[],
|
10
|
-
:as_word,
|
11
|
-
:children,
|
12
|
-
:children_tree,
|
13
|
-
:compressed?,
|
14
|
-
:each,
|
15
|
-
:to_a,
|
16
|
-
:has_key?,
|
17
|
-
:inspect,
|
18
|
-
:letter,
|
19
|
-
:parent,
|
20
|
-
:size,
|
21
|
-
:to_s
|
22
|
-
] => :root
|
23
|
-
|
24
9
|
# The root node of this trie.
|
25
|
-
# @return [Node] the root node of this trie.
|
10
|
+
# @return [Nodes::Node] the root node of this trie.
|
26
11
|
attr_reader :root
|
27
12
|
|
28
13
|
# Creates a new trie.
|
29
|
-
# @param [Node] root the root node for the trie
|
14
|
+
# @param [Nodes::Node] root the root node for the trie
|
30
15
|
# @param [Compressor] compressor responsible for compressing the trie
|
31
16
|
# @yield [Container] the trie just created.
|
32
17
|
def initialize root, compressor
|
@@ -36,32 +21,53 @@ module Rambling
|
|
36
21
|
yield self if block_given?
|
37
22
|
end
|
38
23
|
|
39
|
-
# Adds a word to the trie
|
24
|
+
# Adds a word to the trie.
|
40
25
|
# @param [String] word the word to add the branch from.
|
41
|
-
# @return [Node] the just added branch's root node.
|
26
|
+
# @return [Nodes::Node] the just added branch's root node.
|
42
27
|
# @raise [InvalidOperation] if the trie is already compressed.
|
43
|
-
# @see
|
44
|
-
# @see
|
45
|
-
# @note Avoids altering the contents of the word variable.
|
28
|
+
# @see Nodes::Raw#add
|
29
|
+
# @see Nodes::Compressed#add
|
46
30
|
def add word
|
47
|
-
root.add word
|
31
|
+
root.add char_symbols word
|
32
|
+
end
|
33
|
+
|
34
|
+
# Adds all provided words to the trie.
|
35
|
+
# @param [Array<String>] words the words to add the branch from.
|
36
|
+
# @return [Array<Nodes::Node>] the collection of nodes added.
|
37
|
+
# @raise [InvalidOperation] if the trie is already compressed.
|
38
|
+
# @see Nodes::Raw#add
|
39
|
+
# @see Nodes::Compressed#add
|
40
|
+
def concat words
|
41
|
+
words.map { |word| add word }
|
48
42
|
end
|
49
43
|
|
50
|
-
# Compresses the existing
|
51
|
-
# the trie as compressed.
|
44
|
+
# Compresses the existing trie using redundant node elimination. Marks
|
45
|
+
# the trie as compressed. Does nothing if the trie has already been
|
46
|
+
# compressed.
|
52
47
|
# @return [Container] self
|
53
|
-
# @note
|
48
|
+
# @note This method replaces the root {Nodes::Raw Raw} node with a
|
49
|
+
# {Nodes::Compressed Compressed} version of it.
|
54
50
|
def compress!
|
55
|
-
self.root =
|
51
|
+
self.root = compress_root unless root.compressed?
|
56
52
|
self
|
57
53
|
end
|
58
54
|
|
55
|
+
# Compresses the existing trie using redundant node elimination. Returns
|
56
|
+
# a new trie with the compressed root.
|
57
|
+
# @return [Container] A new {Container} with the {Nodes::Compressed
|
58
|
+
# Compressed} root node or self if the trie has already been
|
59
|
+
# compressed.
|
60
|
+
def compress
|
61
|
+
return self if root.compressed?
|
62
|
+
Rambling::Trie::Container.new compress_root, compressor
|
63
|
+
end
|
64
|
+
|
59
65
|
# Checks if a path for a word or partial word exists in the trie.
|
60
66
|
# @param [String] word the word or partial word to look for in the trie.
|
61
67
|
# @return [Boolean] `true` if the word or partial word is found, `false`
|
62
68
|
# otherwise.
|
63
|
-
# @see
|
64
|
-
# @see
|
69
|
+
# @see Nodes::Raw#partial_word?
|
70
|
+
# @see Nodes::Compressed#partial_word?
|
65
71
|
def partial_word? word = ''
|
66
72
|
root.partial_word? word.chars
|
67
73
|
end
|
@@ -70,8 +76,8 @@ module Rambling
|
|
70
76
|
# @param [String] word the word to look for in the trie.
|
71
77
|
# @return [Boolean] `true` only if the word is found and the last
|
72
78
|
# character corresponds to a terminal node, `false` otherwise.
|
73
|
-
# @see
|
74
|
-
# @see
|
79
|
+
# @see Nodes::Raw#word?
|
80
|
+
# @see Nodes::Compressed#word?
|
75
81
|
def word? word = ''
|
76
82
|
root.word? word.chars
|
77
83
|
end
|
@@ -80,8 +86,8 @@ module Rambling
|
|
80
86
|
# @param [String] word the word to look for in the trie.
|
81
87
|
# @return [Array<String>] all the words contained in the trie that start
|
82
88
|
# with the specified characters.
|
83
|
-
# @see
|
84
|
-
# @see
|
89
|
+
# @see Nodes::Raw#scan
|
90
|
+
# @see Nodes::Compressed#scan
|
85
91
|
def scan word = ''
|
86
92
|
root.scan(word.chars).to_a
|
87
93
|
end
|
@@ -92,7 +98,7 @@ module Rambling
|
|
92
98
|
# @return [Enumerator<String>] all the words in the given string that
|
93
99
|
# match a word in the trie.
|
94
100
|
# @yield [String] each word found in phrase.
|
95
|
-
# @see Node#words_within
|
101
|
+
# @see Nodes::Node#words_within
|
96
102
|
def words_within phrase
|
97
103
|
words_within_root(phrase).to_a
|
98
104
|
end
|
@@ -113,10 +119,81 @@ module Rambling
|
|
113
119
|
root == other.root
|
114
120
|
end
|
115
121
|
|
122
|
+
# Iterates over the words contained in the trie.
|
123
|
+
# @yield [String] the words contained in this trie node.
|
124
|
+
def each
|
125
|
+
return enum_for :each unless block_given?
|
126
|
+
|
127
|
+
root.each do |word|
|
128
|
+
yield word
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
# @return [String] a string representation of the container.
|
133
|
+
def inspect
|
134
|
+
"#<#{self.class.name} root: #{root.inspect}>"
|
135
|
+
end
|
136
|
+
|
137
|
+
# Get {Nodes::Node Node} corresponding to a given letter.
|
138
|
+
# @param [Symbol] letter the letter to search for in the root node.
|
139
|
+
# @return [Nodes::Node] the node corresponding to that letter.
|
140
|
+
# @see Nodes::Node#[]
|
141
|
+
def [] letter
|
142
|
+
root[letter]
|
143
|
+
end
|
144
|
+
|
145
|
+
# Root node's child nodes.
|
146
|
+
# @return [Array<Nodes::Node>] the array of children nodes contained in
|
147
|
+
# the root node.
|
148
|
+
# @see Nodes::Node#children
|
149
|
+
def children
|
150
|
+
root.children
|
151
|
+
end
|
152
|
+
|
153
|
+
# Root node's children tree.
|
154
|
+
# @return [Array<Nodes::Node>] the array of children nodes contained in
|
155
|
+
# the root node.
|
156
|
+
# @see Nodes::Node#children_tree
|
157
|
+
def children_tree
|
158
|
+
root.children_tree
|
159
|
+
end
|
160
|
+
|
161
|
+
# Indicates if the root {Nodes::Node Node} can be
|
162
|
+
# compressed or not.
|
163
|
+
# @return [Boolean] `true` for non-{Nodes::Node#terminal? terminal}
|
164
|
+
# nodes with one child, `false` otherwise.
|
165
|
+
def compressed?
|
166
|
+
root.compressed?
|
167
|
+
end
|
168
|
+
|
169
|
+
# Array of words contained in the root {Nodes::Node Node}.
|
170
|
+
# @return [Array<String>] all words contained in this trie.
|
171
|
+
# @see https://ruby-doc.org/core-2.5.0/Enumerable.html#method-i-to_a
|
172
|
+
# Enumerable#to_a
|
173
|
+
def to_a
|
174
|
+
root.to_a
|
175
|
+
end
|
176
|
+
|
177
|
+
# Check if a letter is part of the root {Nodes::Node}'s children tree.
|
178
|
+
# @param [Symbol] letter the letter to search for in the root node.
|
179
|
+
# @return [Boolean] whether the letter is contained or not.
|
180
|
+
# @see Nodes::Node#key?
|
181
|
+
def key? letter
|
182
|
+
root.key? letter
|
183
|
+
end
|
184
|
+
|
185
|
+
# Size of the Root {Nodes::Node Node}'s children tree.
|
186
|
+
# @return [Integer] the number of letters in the root node.
|
187
|
+
def size
|
188
|
+
root.size
|
189
|
+
end
|
190
|
+
|
116
191
|
alias_method :include?, :word?
|
117
192
|
alias_method :match?, :partial_word?
|
118
193
|
alias_method :words, :scan
|
119
194
|
alias_method :<<, :add
|
195
|
+
alias_method :has_key?, :key?
|
196
|
+
alias_method :has_letter?, :key?
|
120
197
|
|
121
198
|
private
|
122
199
|
|
@@ -134,6 +211,16 @@ module Rambling
|
|
134
211
|
end
|
135
212
|
end
|
136
213
|
end
|
214
|
+
|
215
|
+
def compress_root
|
216
|
+
compressor.compress root
|
217
|
+
end
|
218
|
+
|
219
|
+
def char_symbols word
|
220
|
+
symbols = []
|
221
|
+
word.reverse.each_char { |c| symbols << c.to_sym }
|
222
|
+
symbols
|
223
|
+
end
|
137
224
|
end
|
138
225
|
end
|
139
226
|
end
|
@@ -1,12 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Rambling
|
2
4
|
module Trie
|
3
5
|
# Provides enumerable behavior to the trie data structure.
|
4
6
|
module Enumerable
|
5
7
|
include ::Enumerable
|
6
8
|
|
7
|
-
# Returns number of words contained in the trie
|
8
|
-
#
|
9
|
-
#
|
9
|
+
# Returns number of words contained in the trie
|
10
|
+
# @see https://ruby-doc.org/core-2.5.0/Enumerable.html#method-i-count
|
11
|
+
# Enumerable#count
|
10
12
|
alias_method :size, :count
|
11
13
|
|
12
14
|
# Iterates over the words contained in the trie.
|
@@ -16,7 +18,7 @@ module Rambling
|
|
16
18
|
|
17
19
|
yield as_word if terminal?
|
18
20
|
|
19
|
-
|
21
|
+
children_tree.each_value do |child|
|
20
22
|
child.each do |word|
|
21
23
|
yield word
|
22
24
|
end
|
@@ -1,8 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Rambling
|
2
4
|
module Trie
|
3
5
|
# Raised when trying to execute an invalid operation on a trie data
|
4
6
|
# structure.
|
5
|
-
class InvalidOperation <
|
7
|
+
class InvalidOperation < RuntimeError
|
6
8
|
# Creates a new {InvalidOperation InvalidOperation} exception.
|
7
9
|
# @param [String, nil] message the exception message.
|
8
10
|
def initialize message = nil
|
@@ -0,0 +1,98 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Rambling
|
4
|
+
module Trie
|
5
|
+
module Nodes
|
6
|
+
# A representation of a node in an compressed trie data structure.
|
7
|
+
class Compressed < Rambling::Trie::Nodes::Node
|
8
|
+
# Always raises {Rambling::Trie::InvalidOperation InvalidOperation} when
|
9
|
+
# trying to add a word to the current compressed trie node
|
10
|
+
# @param [String] _ the word to add to the trie.
|
11
|
+
# @raise [InvalidOperation] if the trie is already compressed.
|
12
|
+
# @return [nil] this never returns as it always raises an exception.
|
13
|
+
def add _
|
14
|
+
raise Rambling::Trie::InvalidOperation,
|
15
|
+
'Cannot add word to compressed trie'
|
16
|
+
end
|
17
|
+
|
18
|
+
# Always return `true` for a compressed node.
|
19
|
+
# @return [Boolean] always `true` for a compressed node.
|
20
|
+
def compressed?
|
21
|
+
true
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def partial_word_chars? chars
|
27
|
+
child = children_tree[chars.first.to_sym]
|
28
|
+
return false unless child
|
29
|
+
|
30
|
+
child_letter = child.letter.to_s
|
31
|
+
|
32
|
+
if chars.size >= child_letter.size
|
33
|
+
letter = chars.slice!(0, child_letter.size).join
|
34
|
+
return child.partial_word? chars if child_letter == letter
|
35
|
+
end
|
36
|
+
|
37
|
+
letter = chars.join
|
38
|
+
child_letter = child_letter.slice 0, letter.size
|
39
|
+
child_letter == letter
|
40
|
+
end
|
41
|
+
|
42
|
+
def word_chars? chars
|
43
|
+
letter = chars.slice! 0
|
44
|
+
letter_sym = letter.to_sym
|
45
|
+
|
46
|
+
child = children_tree[letter_sym]
|
47
|
+
return false unless child
|
48
|
+
|
49
|
+
loop do
|
50
|
+
return child.word? chars if letter_sym == child.letter
|
51
|
+
|
52
|
+
break if chars.empty?
|
53
|
+
|
54
|
+
letter << chars.slice!(0)
|
55
|
+
letter_sym = letter.to_sym
|
56
|
+
end
|
57
|
+
|
58
|
+
false
|
59
|
+
end
|
60
|
+
|
61
|
+
def closest_node chars
|
62
|
+
child = children_tree[chars.first.to_sym]
|
63
|
+
return missing unless child
|
64
|
+
|
65
|
+
child_letter = child.letter.to_s
|
66
|
+
|
67
|
+
if chars.size >= child_letter.size
|
68
|
+
letter = chars.slice!(0, child_letter.size).join
|
69
|
+
return child.scan chars if child_letter == letter
|
70
|
+
end
|
71
|
+
|
72
|
+
letter = chars.join
|
73
|
+
child_letter = child_letter.slice 0, letter.size
|
74
|
+
|
75
|
+
child_letter == letter ? child : missing
|
76
|
+
end
|
77
|
+
|
78
|
+
def children_match_prefix chars
|
79
|
+
return enum_for :children_match_prefix, chars unless block_given?
|
80
|
+
|
81
|
+
return if chars.empty?
|
82
|
+
|
83
|
+
child = children_tree[chars.first.to_sym]
|
84
|
+
return unless child
|
85
|
+
|
86
|
+
child_letter = child.letter.to_s
|
87
|
+
letter = chars.slice!(0, child_letter.size).join
|
88
|
+
|
89
|
+
return unless child_letter == letter
|
90
|
+
|
91
|
+
child.match_prefix chars do |word|
|
92
|
+
yield word
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|