rambling-trie 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +2 -1
  3. data/README.md +23 -7
  4. data/Rakefile +4 -0
  5. data/lib/rambling/trie.rb +27 -21
  6. data/lib/rambling/trie/comparable.rb +3 -3
  7. data/lib/rambling/trie/compressible.rb +14 -0
  8. data/lib/rambling/trie/compressor.rb +37 -24
  9. data/lib/rambling/trie/configuration/properties.rb +8 -6
  10. data/lib/rambling/trie/configuration/provider_collection.rb +34 -16
  11. data/lib/rambling/trie/container.rb +156 -36
  12. data/lib/rambling/trie/enumerable.rb +4 -4
  13. data/lib/rambling/trie/nodes.rb +11 -0
  14. data/lib/rambling/trie/nodes/compressed.rb +115 -0
  15. data/lib/rambling/trie/nodes/missing.rb +10 -0
  16. data/lib/rambling/trie/nodes/node.rb +151 -0
  17. data/lib/rambling/trie/nodes/raw.rb +89 -0
  18. data/lib/rambling/trie/readers/plain_text.rb +1 -11
  19. data/lib/rambling/trie/serializers/marshal.rb +4 -4
  20. data/lib/rambling/trie/serializers/yaml.rb +4 -4
  21. data/lib/rambling/trie/serializers/zip.rb +9 -8
  22. data/lib/rambling/trie/version.rb +1 -1
  23. data/spec/assets/test_words.es_DO.txt +1 -0
  24. data/spec/integration/rambling/trie_spec.rb +40 -35
  25. data/spec/lib/rambling/trie/comparable_spec.rb +6 -15
  26. data/spec/lib/rambling/trie/compressor_spec.rb +88 -13
  27. data/spec/lib/rambling/trie/configuration/properties_spec.rb +7 -7
  28. data/spec/lib/rambling/trie/configuration/provider_collection_spec.rb +8 -20
  29. data/spec/lib/rambling/trie/container_spec.rb +159 -168
  30. data/spec/lib/rambling/trie/enumerable_spec.rb +12 -9
  31. data/spec/lib/rambling/trie/inspectable_spec.rb +11 -11
  32. data/spec/lib/rambling/trie/nodes/compressed_spec.rb +35 -0
  33. data/spec/lib/rambling/trie/nodes/node_spec.rb +7 -0
  34. data/spec/lib/rambling/trie/nodes/raw_spec.rb +177 -0
  35. data/spec/lib/rambling/trie/serializers/file_spec.rb +4 -4
  36. data/spec/lib/rambling/trie/serializers/marshal_spec.rb +3 -7
  37. data/spec/lib/rambling/trie/serializers/yaml_spec.rb +3 -7
  38. data/spec/lib/rambling/trie/serializers/zip_spec.rb +16 -20
  39. data/spec/lib/rambling/trie/stringifyable_spec.rb +7 -8
  40. data/spec/lib/rambling/trie_spec.rb +2 -2
  41. data/spec/spec_helper.rb +3 -1
  42. data/spec/support/config.rb +4 -0
  43. data/spec/support/helpers/add_word.rb +18 -0
  44. data/spec/support/shared_examples/{a_compressable_trie.rb → a_compressible_trie.rb} +13 -3
  45. data/spec/support/shared_examples/a_serializable_trie.rb +8 -6
  46. data/spec/support/shared_examples/a_serializer.rb +6 -0
  47. data/spec/{lib/rambling/trie/node_spec.rb → support/shared_examples/a_trie_node.rb} +61 -30
  48. data/spec/{lib/rambling/trie/compressed_node_spec.rb → support/shared_examples/a_trie_node_implementation.rb} +18 -69
  49. metadata +22 -15
  50. data/lib/rambling/trie/compressable.rb +0 -14
  51. data/lib/rambling/trie/compressed_node.rb +0 -120
  52. data/lib/rambling/trie/missing_node.rb +0 -8
  53. data/lib/rambling/trie/node.rb +0 -97
  54. data/lib/rambling/trie/raw_node.rb +0 -96
  55. data/spec/lib/rambling/trie/raw_node_spec.rb +0 -389
@@ -2,31 +2,14 @@ module Rambling
2
2
  module Trie
3
3
  # Wrapper on top of trie data structure.
4
4
  class Container
5
- extend ::Forwardable
6
5
  include ::Enumerable
7
6
 
8
- delegate [
9
- :[],
10
- :as_word,
11
- :children,
12
- :children_tree,
13
- :compressed?,
14
- :each,
15
- :to_a,
16
- :has_key?,
17
- :inspect,
18
- :letter,
19
- :parent,
20
- :size,
21
- :to_s
22
- ] => :root
23
-
24
7
  # The root node of this trie.
25
- # @return [Node] the root node of this trie.
8
+ # @return [Nodes::Node] the root node of this trie.
26
9
  attr_reader :root
27
10
 
28
11
  # Creates a new trie.
29
- # @param [Node] root the root node for the trie
12
+ # @param [Nodes::Node] root the root node for the trie
30
13
  # @param [Compressor] compressor responsible for compressing the trie
31
14
  # @yield [Container] the trie just created.
32
15
  def initialize root, compressor
@@ -36,32 +19,53 @@ module Rambling
36
19
  yield self if block_given?
37
20
  end
38
21
 
39
- # Adds a word to the trie, without altering the passed word.
22
+ # Adds a word to the trie.
40
23
  # @param [String] word the word to add the branch from.
41
- # @return [Node] the just added branch's root node.
24
+ # @return [Nodes::Node] the just added branch's root node.
42
25
  # @raise [InvalidOperation] if the trie is already compressed.
43
- # @see RawNode#add
44
- # @see CompressedNode#add
45
- # @note Avoids altering the contents of the word variable.
26
+ # @see Nodes::Raw#add
27
+ # @see Nodes::Compressed#add
46
28
  def add word
47
- root.add word.clone
29
+ root.add char_symbols word
48
30
  end
49
31
 
50
- # Compresses the existing tree using redundant node elimination. Marks
51
- # the trie as compressed.
32
+ # Adds all provided words to the trie.
33
+ # @param [Array<String>] words the words to add the branch from.
34
+ # @return [Array<Nodes::Node>] the collection of nodes added.
35
+ # @raise [InvalidOperation] if the trie is already compressed.
36
+ # @see Nodes::Raw#add
37
+ # @see Nodes::Compressed#add
38
+ def concat words
39
+ words.map { |word| add word }
40
+ end
41
+
42
+ # Compresses the existing trie using redundant node elimination. Marks
43
+ # the trie as compressed. Does nothing if the trie has already been
44
+ # compressed.
52
45
  # @return [Container] self
53
- # @note Only compresses tries that have not already been compressed.
46
+ # @note This method replaces the root {Nodes::Raw Raw} node with a
47
+ # {Nodes::Compressed Compressed} version of it.
54
48
  def compress!
55
- self.root = compressor.compress root unless root.compressed?
49
+ self.root = compress_root unless root.compressed?
56
50
  self
57
51
  end
58
52
 
53
+ # Compresses the existing trie using redundant node elimination. Returns
54
+ # a new trie with the compressed root.
55
+ # @return [Container] A new {Container} with the {Nodes::Compressed
56
+ # Compressed} root node or self if the trie has already been
57
+ # compressed.
58
+ def compress
59
+ return self if root.compressed?
60
+ Rambling::Trie::Container.new compress_root, compressor
61
+ end
62
+
59
63
  # Checks if a path for a word or partial word exists in the trie.
60
64
  # @param [String] word the word or partial word to look for in the trie.
61
65
  # @return [Boolean] `true` if the word or partial word is found, `false`
62
66
  # otherwise.
63
- # @see RawNode#partial_word?
64
- # @see CompressedNode#partial_word?
67
+ # @see Nodes::Raw#partial_word?
68
+ # @see Nodes::Compressed#partial_word?
65
69
  def partial_word? word = ''
66
70
  root.partial_word? word.chars
67
71
  end
@@ -70,8 +74,8 @@ module Rambling
70
74
  # @param [String] word the word to look for in the trie.
71
75
  # @return [Boolean] `true` only if the word is found and the last
72
76
  # character corresponds to a terminal node, `false` otherwise.
73
- # @see RawNode#word?
74
- # @see CompressedNode#word?
77
+ # @see Nodes::Raw#word?
78
+ # @see Nodes::Compressed#word?
75
79
  def word? word = ''
76
80
  root.word? word.chars
77
81
  end
@@ -80,8 +84,8 @@ module Rambling
80
84
  # @param [String] word the word to look for in the trie.
81
85
  # @return [Array<String>] all the words contained in the trie that start
82
86
  # with the specified characters.
83
- # @see RawNode#scan
84
- # @see CompressedNode#scan
87
+ # @see Nodes::Raw#scan
88
+ # @see Nodes::Compressed#scan
85
89
  def scan word = ''
86
90
  root.scan(word.chars).to_a
87
91
  end
@@ -92,7 +96,7 @@ module Rambling
92
96
  # @return [Enumerator<String>] all the words in the given string that
93
97
  # match a word in the trie.
94
98
  # @yield [String] each word found in phrase.
95
- # @see Node#words_within
99
+ # @see Nodes::Node#words_within
96
100
  def words_within phrase
97
101
  words_within_root(phrase).to_a
98
102
  end
@@ -113,10 +117,116 @@ module Rambling
113
117
  root == other.root
114
118
  end
115
119
 
120
+ # Iterates over the words contained in the trie.
121
+ # @yield [String] the words contained in this trie node.
122
+ def each
123
+ return enum_for :each unless block_given?
124
+
125
+ root.each do |word|
126
+ yield word
127
+ end
128
+ end
129
+
130
+ # @return [String] a string representation of the container.
131
+ def inspect
132
+ "#<#{self.class.name} root: #{root.inspect}>"
133
+ end
134
+
135
+ # Get {Nodes::Node Node} corresponding to a given letter.
136
+ # @param [Symbol] letter the letter to search for in the root node.
137
+ # @return [Nodes::Node] the node corresponding to that letter.
138
+ # @see Nodes::Node#[]
139
+ def [] letter
140
+ root[letter]
141
+ end
142
+
143
+ # Root node's child nodes.
144
+ # @return [Array<Nodes::Node>] the array of children nodes contained in
145
+ # the root node.
146
+ # @see Nodes::Node#children
147
+ def children
148
+ root.children
149
+ end
150
+
151
+ # Root node's children tree.
152
+ # @return [Array<Nodes::Node>] the array of children nodes contained in
153
+ # the root node.
154
+ # @see Nodes::Node#children_tree
155
+ def children_tree
156
+ root.children_tree
157
+ end
158
+
159
+ # Indicates if the root {Nodes::Node Node} can be
160
+ # compressed or not.
161
+ # @return [Boolean] `true` for non-{Nodes::Node#terminal? terminal}
162
+ # nodes with one child, `false` otherwise.
163
+ def compressed?
164
+ root.compressed?
165
+ end
166
+
167
+ # Array of words contained in the root {Nodes::Node Node}.
168
+ # @return [Array<String>] all words contained in this trie.
169
+ # @see https://ruby-doc.org/core-2.5.0/Enumerable.html#method-i-to_a
170
+ # Enumerable#to_a
171
+ def to_a
172
+ root.to_a
173
+ end
174
+
175
+ # Check if a letter is part of the root {Nodes::Node}'s children tree.
176
+ # @param [Symbol] letter the letter to search for in the root node.
177
+ # @return [Boolean] whether the letter is contained or not.
178
+ # @see Nodes::Node#has_key?
179
+ def has_key? letter
180
+ root.has_key? letter
181
+ end
182
+
183
+ # Size of the Root {Nodes::Node Node}'s children tree.
184
+ # @return [Integer] the number of letters in the root node.
185
+ def size
186
+ root.size
187
+ end
188
+
189
+ # String representation of the current node, if it is a terminal node.
190
+ # @return [String] the string representation of the current node.
191
+ # @raise [InvalidOperation] if node is not terminal or is root.
192
+ # @deprecated This will always raise an {InvalidOperation} exception.
193
+ def as_word
194
+ warn '[DEPRECATION WARNING] `#as_word` is deprecated. Please use `#root#as_word` instead.'
195
+ root.as_word
196
+ end
197
+
198
+ # Root {Nodes::Node Node}'s letter.
199
+ # @return [Symbol] the root node's letter
200
+ # @see Nodes::Node#letter
201
+ # @deprecated This will always return `nil`.
202
+ def letter
203
+ warn '[DEPRECATION WARNING] `#letter` is deprecated. Please use `#root#letter` instead.'
204
+ root.letter
205
+ end
206
+
207
+ # Root {Nodes::Node Node}'s parent.
208
+ # @return [Symbol] the root node's parent
209
+ # @see Nodes::Node#parent
210
+ # @deprecated This will always return `nil`.
211
+ def parent
212
+ warn '[DEPRECATION WARNING] `#parent` is deprecated. Please use `#root#parent` instead.'
213
+ root.parent
214
+ end
215
+
216
+ # String representation of root {Nodes::Node Node}.
217
+ # @return [String] the root node's string representation.
218
+ # @see Stringifyable#to_s
219
+ # @deprecated This will always return an empty string (`''`).
220
+ def to_s
221
+ warn '[DEPRECATION WARNING] `#to_s` is deprecated. Please use `#root#to_s` instead.'
222
+ root.to_s
223
+ end
224
+
116
225
  alias_method :include?, :word?
117
226
  alias_method :match?, :partial_word?
118
227
  alias_method :words, :scan
119
228
  alias_method :<<, :add
229
+ alias_method :has_letter?, :has_key?
120
230
 
121
231
  private
122
232
 
@@ -134,6 +244,16 @@ module Rambling
134
244
  end
135
245
  end
136
246
  end
247
+
248
+ def compress_root
249
+ compressor.compress root
250
+ end
251
+
252
+ def char_symbols word
253
+ symbols = []
254
+ word.reverse.each_char { |c| symbols << c.to_sym }
255
+ symbols
256
+ end
137
257
  end
138
258
  end
139
259
  end
@@ -4,9 +4,9 @@ module Rambling
4
4
  module Enumerable
5
5
  include ::Enumerable
6
6
 
7
- # Returns number of words contained in the trie. See
8
- # {https://ruby-doc.org/core-2.4.0/Enumerable.html#method-i-count
9
- # Enumerable}
7
+ # Returns number of words contained in the trie
8
+ # @see https://ruby-doc.org/core-2.5.0/Enumerable.html#method-i-count
9
+ # Enumerable#count
10
10
  alias_method :size, :count
11
11
 
12
12
  # Iterates over the words contained in the trie.
@@ -16,7 +16,7 @@ module Rambling
16
16
 
17
17
  yield as_word if terminal?
18
18
 
19
- children.each do |child|
19
+ children_tree.each_value do |child|
20
20
  child.each do |word|
21
21
  yield word
22
22
  end
@@ -0,0 +1,11 @@
1
+ %w{node missing compressed raw}.each do |file|
2
+ require File.join('rambling', 'trie', 'nodes', file)
3
+ end
4
+
5
+ module Rambling
6
+ module Trie
7
+ # Namespace for all nodes.
8
+ module Nodes
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,115 @@
1
+ module Rambling
2
+ module Trie
3
+ module Nodes
4
+ # A representation of a node in an compressed trie data structure.
5
+ class Compressed < Rambling::Trie::Nodes::Node
6
+ # Always raises {Rambling::Trie::InvalidOperation InvalidOperation} when
7
+ # trying to add a word to the current compressed trie node
8
+ # @param [String] word the word to add to the trie.
9
+ # @raise [InvalidOperation] if the trie is already compressed.
10
+ # @return [nil] this never returns as it always raises an exception.
11
+ def add word
12
+ raise Rambling::Trie::InvalidOperation, 'Cannot add word to compressed trie'
13
+ end
14
+
15
+ # Checks if a path for set a of characters exists in the trie.
16
+ # @param [Array<String>] chars the characters to look for in the trie.
17
+ # @return [Boolean] `true` if the characters are found, `false` otherwise.
18
+ def partial_word? chars
19
+ chars.empty? || has_partial_word?(chars)
20
+ end
21
+
22
+ # Checks if a path for set of characters represents a word in the trie.
23
+ # @param [Array<String>] chars the characters to look for in the trie.
24
+ # @return [Boolean] `true` if the characters are found and form a word,
25
+ # `false` otherwise.
26
+ def word? chars
27
+ chars.empty? ? terminal? : has_word?(chars)
28
+ end
29
+
30
+ # Always return `true` for a compressed node.
31
+ # @return [Boolean] always `true` for a compressed node.
32
+ def compressed?
33
+ true
34
+ end
35
+
36
+ private
37
+
38
+ def has_partial_word? chars
39
+ recursive_get(:partial_word?, chars) || false
40
+ end
41
+
42
+ def has_word? chars
43
+ current_key = nil
44
+
45
+ while !chars.empty?
46
+ if current_key
47
+ current_key << chars.slice!(0)
48
+ else
49
+ current_key = chars.slice!(0)
50
+ end
51
+
52
+ child = children_tree[current_key.to_sym]
53
+ return child.word? chars if child
54
+ end
55
+
56
+ false
57
+ end
58
+
59
+ def closest_node chars
60
+ recursive_get(:scan, chars) || Rambling::Trie::Nodes::Missing.new
61
+ end
62
+
63
+ def children_match_prefix chars
64
+ return enum_for :children_match_prefix, chars unless block_given?
65
+
66
+ current_key = nil
67
+
68
+ while !chars.empty?
69
+ if current_key
70
+ current_key << chars.slice!(0)
71
+ else
72
+ current_key = chars.slice!(0)
73
+ end
74
+
75
+ child = children_tree[current_key.to_sym]
76
+
77
+ next unless child
78
+
79
+ child.match_prefix chars do |word|
80
+ yield word
81
+ end
82
+ end
83
+ end
84
+
85
+ def recursive_get method, chars
86
+ current_length = 0
87
+ current_key = current_key chars.slice!(0)
88
+
89
+ begin
90
+ current_length += 1
91
+
92
+ if current_key && (current_key.length == current_length || chars.empty?)
93
+ return children_tree[current_key.to_sym].send method, chars
94
+ end
95
+ end while current_key && current_key[current_length] == chars.slice!(0)
96
+ end
97
+
98
+ def current_key letter
99
+ current_key = nil
100
+
101
+ children_tree.each_key do |letters|
102
+ letters_string = letters.to_s
103
+
104
+ if letters_string.start_with? letter
105
+ current_key = letters_string
106
+ break
107
+ end
108
+ end
109
+
110
+ current_key
111
+ end
112
+ end
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,10 @@
1
+ module Rambling
2
+ module Trie
3
+ module Nodes
4
+ # A representation of a missing node in the trie data structure. Returned
5
+ # when a node is not found.
6
+ class Missing < Rambling::Trie::Nodes::Node
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,151 @@
1
+ module Rambling
2
+ module Trie
3
+ module Nodes
4
+ # A representation of a node in the trie data structure.
5
+ class Node
6
+ include Rambling::Trie::Compressible
7
+ include Rambling::Trie::Enumerable
8
+ include Rambling::Trie::Comparable
9
+ include Rambling::Trie::Stringifyable
10
+ include Rambling::Trie::Inspectable
11
+
12
+ # @overload letter
13
+ # Letter(s) corresponding to the current node.
14
+ # @overload letter=(letter)
15
+ # Sets the letter(s) corresponding to the current node. Ensures the
16
+ # {Node#letter #letter} in the {Node#parent #parent}'s
17
+ # {Node#children_tree #children_tree} is updated.
18
+ # @param [String, Symbol, nil] letter the letter value.
19
+ # @return [Symbol, nil] the corresponding letter(s).
20
+ attr_reader :letter
21
+
22
+ # Child nodes tree.
23
+ # @return [Hash] the children_tree hash, consisting of `:letter => node`.
24
+ attr_accessor :children_tree
25
+
26
+ # Parent node.
27
+ # @return [Node, nil] the parent of the current node.
28
+ attr_accessor :parent
29
+
30
+ # Creates a new node.
31
+ # @param [Symbol, nil] letter the Node's letter value
32
+ # @param [Node, nil] parent the parent of the current node.
33
+ def initialize letter = nil, parent = nil, children_tree = {}
34
+ @letter = letter
35
+ @parent = parent
36
+ @children_tree = children_tree
37
+ end
38
+
39
+ # Child nodes.
40
+ # @return [Array<Node>] the array of children nodes contained
41
+ # in the current node.
42
+ def children
43
+ children_tree.values
44
+ end
45
+
46
+ # First child node.
47
+ # @return [Node, nil] the first child contained in the current node.
48
+ def first_child
49
+ return if children_tree.empty?
50
+
51
+ children_tree.each_value do |child|
52
+ return child
53
+ end
54
+ end
55
+
56
+ # Indicates if the current node is the root node.
57
+ # @return [Boolean] `true` if the node does not have a parent, `false`
58
+ # otherwise.
59
+ def root?
60
+ !parent
61
+ end
62
+
63
+ # Indicates if a {Node Node} is terminal or not.
64
+ # @return [Boolean] `true` for terminal nodes, `false` otherwise.
65
+ def terminal?
66
+ !!terminal
67
+ end
68
+
69
+ # Mark {Node Node} as terminal.
70
+ # @return [Node] the modified node.
71
+ def terminal!
72
+ self.terminal = true
73
+ self
74
+ end
75
+
76
+ def letter= letter
77
+ @letter = letter.to_sym if letter
78
+ end
79
+
80
+ # Returns the node that starts with the specified characters.
81
+ # @param [Array<String>] chars the characters to look for in the trie.
82
+ # @return [Node] the node that matches the specified characters.
83
+ # {Missing Missing} when not found.
84
+ def scan chars
85
+ return self if chars.empty?
86
+
87
+ closest_node chars
88
+ end
89
+
90
+ # Returns all words that match a prefix of any length within chars.
91
+ # @param [String] chars the chars to base the prefix on.
92
+ # @return [Enumerator<String>] all the words that match a prefix given by
93
+ # chars.
94
+ # @yield [String] each word found.
95
+ def match_prefix chars
96
+ return enum_for :match_prefix, chars unless block_given?
97
+
98
+ yield as_word if terminal?
99
+ children_match_prefix chars do |word|
100
+ yield word
101
+ end
102
+ end
103
+
104
+ # Get {Node Node} corresponding to a given letter.
105
+ # @param [Symbol] letter the letter to search for in the node.
106
+ # @return [Node] the node corresponding to that letter.
107
+ # @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-5B-5D
108
+ # Hash#[]
109
+ def [] letter
110
+ children_tree[letter]
111
+ end
112
+
113
+ # Set the {Node Node} that corresponds to a given letter.
114
+ # @param [Symbol] letter the letter to insert or update in the node's
115
+ # @param [Node] node the {Node Node} to assign to that letter.
116
+ # @return [Node] the node corresponding to the inserted or
117
+ # updated letter.
118
+ # @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-5B-5D
119
+ # Hash#[]
120
+ def []= letter, node
121
+ children_tree[letter] = node
122
+ end
123
+
124
+ # Check if a {Node Node}'s children tree contains a given
125
+ # letter.
126
+ # @param [Symbol] letter the letter to search for in the node.
127
+ # @return [Boolean] `true` if the letter is present, `false` otherwise
128
+ # @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-has_key-3F
129
+ # Hash#has_key?
130
+ def has_key? letter
131
+ children_tree.has_key? letter
132
+ end
133
+
134
+ # Delete a given letter and its corresponding {Node Node} from
135
+ # this {Node Node}'s children tree.
136
+ # @param [Symbol] letter the letter to delete from the node's children
137
+ # tree.
138
+ # @return [Node] the node corresponding to the deleted letter.
139
+ # @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-delete
140
+ # Hash#delete
141
+ def delete letter
142
+ children_tree.delete letter
143
+ end
144
+
145
+ private
146
+
147
+ attr_accessor :terminal
148
+ end
149
+ end
150
+ end
151
+ end