rambling-trie 1.0.2 → 1.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +2 -1
  3. data/README.md +23 -7
  4. data/Rakefile +4 -0
  5. data/lib/rambling/trie.rb +27 -21
  6. data/lib/rambling/trie/comparable.rb +3 -3
  7. data/lib/rambling/trie/compressible.rb +14 -0
  8. data/lib/rambling/trie/compressor.rb +37 -24
  9. data/lib/rambling/trie/configuration/properties.rb +8 -6
  10. data/lib/rambling/trie/configuration/provider_collection.rb +34 -16
  11. data/lib/rambling/trie/container.rb +156 -36
  12. data/lib/rambling/trie/enumerable.rb +4 -4
  13. data/lib/rambling/trie/nodes.rb +11 -0
  14. data/lib/rambling/trie/nodes/compressed.rb +115 -0
  15. data/lib/rambling/trie/nodes/missing.rb +10 -0
  16. data/lib/rambling/trie/nodes/node.rb +151 -0
  17. data/lib/rambling/trie/nodes/raw.rb +89 -0
  18. data/lib/rambling/trie/readers/plain_text.rb +1 -11
  19. data/lib/rambling/trie/serializers/marshal.rb +4 -4
  20. data/lib/rambling/trie/serializers/yaml.rb +4 -4
  21. data/lib/rambling/trie/serializers/zip.rb +9 -8
  22. data/lib/rambling/trie/version.rb +1 -1
  23. data/spec/assets/test_words.es_DO.txt +1 -0
  24. data/spec/integration/rambling/trie_spec.rb +40 -35
  25. data/spec/lib/rambling/trie/comparable_spec.rb +6 -15
  26. data/spec/lib/rambling/trie/compressor_spec.rb +88 -13
  27. data/spec/lib/rambling/trie/configuration/properties_spec.rb +7 -7
  28. data/spec/lib/rambling/trie/configuration/provider_collection_spec.rb +8 -20
  29. data/spec/lib/rambling/trie/container_spec.rb +159 -168
  30. data/spec/lib/rambling/trie/enumerable_spec.rb +12 -9
  31. data/spec/lib/rambling/trie/inspectable_spec.rb +11 -11
  32. data/spec/lib/rambling/trie/nodes/compressed_spec.rb +35 -0
  33. data/spec/lib/rambling/trie/nodes/node_spec.rb +7 -0
  34. data/spec/lib/rambling/trie/nodes/raw_spec.rb +177 -0
  35. data/spec/lib/rambling/trie/serializers/file_spec.rb +4 -4
  36. data/spec/lib/rambling/trie/serializers/marshal_spec.rb +3 -7
  37. data/spec/lib/rambling/trie/serializers/yaml_spec.rb +3 -7
  38. data/spec/lib/rambling/trie/serializers/zip_spec.rb +16 -20
  39. data/spec/lib/rambling/trie/stringifyable_spec.rb +7 -8
  40. data/spec/lib/rambling/trie_spec.rb +2 -2
  41. data/spec/spec_helper.rb +3 -1
  42. data/spec/support/config.rb +4 -0
  43. data/spec/support/helpers/add_word.rb +18 -0
  44. data/spec/support/shared_examples/{a_compressable_trie.rb → a_compressible_trie.rb} +13 -3
  45. data/spec/support/shared_examples/a_serializable_trie.rb +8 -6
  46. data/spec/support/shared_examples/a_serializer.rb +6 -0
  47. data/spec/{lib/rambling/trie/node_spec.rb → support/shared_examples/a_trie_node.rb} +61 -30
  48. data/spec/{lib/rambling/trie/compressed_node_spec.rb → support/shared_examples/a_trie_node_implementation.rb} +18 -69
  49. metadata +22 -15
  50. data/lib/rambling/trie/compressable.rb +0 -14
  51. data/lib/rambling/trie/compressed_node.rb +0 -120
  52. data/lib/rambling/trie/missing_node.rb +0 -8
  53. data/lib/rambling/trie/node.rb +0 -97
  54. data/lib/rambling/trie/raw_node.rb +0 -96
  55. data/spec/lib/rambling/trie/raw_node_spec.rb +0 -389
@@ -2,31 +2,14 @@ module Rambling
2
2
  module Trie
3
3
  # Wrapper on top of trie data structure.
4
4
  class Container
5
- extend ::Forwardable
6
5
  include ::Enumerable
7
6
 
8
- delegate [
9
- :[],
10
- :as_word,
11
- :children,
12
- :children_tree,
13
- :compressed?,
14
- :each,
15
- :to_a,
16
- :has_key?,
17
- :inspect,
18
- :letter,
19
- :parent,
20
- :size,
21
- :to_s
22
- ] => :root
23
-
24
7
  # The root node of this trie.
25
- # @return [Node] the root node of this trie.
8
+ # @return [Nodes::Node] the root node of this trie.
26
9
  attr_reader :root
27
10
 
28
11
  # Creates a new trie.
29
- # @param [Node] root the root node for the trie
12
+ # @param [Nodes::Node] root the root node for the trie
30
13
  # @param [Compressor] compressor responsible for compressing the trie
31
14
  # @yield [Container] the trie just created.
32
15
  def initialize root, compressor
@@ -36,32 +19,53 @@ module Rambling
36
19
  yield self if block_given?
37
20
  end
38
21
 
39
- # Adds a word to the trie, without altering the passed word.
22
+ # Adds a word to the trie.
40
23
  # @param [String] word the word to add the branch from.
41
- # @return [Node] the just added branch's root node.
24
+ # @return [Nodes::Node] the just added branch's root node.
42
25
  # @raise [InvalidOperation] if the trie is already compressed.
43
- # @see RawNode#add
44
- # @see CompressedNode#add
45
- # @note Avoids altering the contents of the word variable.
26
+ # @see Nodes::Raw#add
27
+ # @see Nodes::Compressed#add
46
28
  def add word
47
- root.add word.clone
29
+ root.add char_symbols word
48
30
  end
49
31
 
50
- # Compresses the existing tree using redundant node elimination. Marks
51
- # the trie as compressed.
32
+ # Adds all provided words to the trie.
33
+ # @param [Array<String>] words the words to add the branch from.
34
+ # @return [Array<Nodes::Node>] the collection of nodes added.
35
+ # @raise [InvalidOperation] if the trie is already compressed.
36
+ # @see Nodes::Raw#add
37
+ # @see Nodes::Compressed#add
38
+ def concat words
39
+ words.map { |word| add word }
40
+ end
41
+
42
+ # Compresses the existing trie using redundant node elimination. Marks
43
+ # the trie as compressed. Does nothing if the trie has already been
44
+ # compressed.
52
45
  # @return [Container] self
53
- # @note Only compresses tries that have not already been compressed.
46
+ # @note This method replaces the root {Nodes::Raw Raw} node with a
47
+ # {Nodes::Compressed Compressed} version of it.
54
48
  def compress!
55
- self.root = compressor.compress root unless root.compressed?
49
+ self.root = compress_root unless root.compressed?
56
50
  self
57
51
  end
58
52
 
53
+ # Compresses the existing trie using redundant node elimination. Returns
54
+ # a new trie with the compressed root.
55
+ # @return [Container] A new {Container} with the {Nodes::Compressed
56
+ # Compressed} root node or self if the trie has already been
57
+ # compressed.
58
+ def compress
59
+ return self if root.compressed?
60
+ Rambling::Trie::Container.new compress_root, compressor
61
+ end
62
+
59
63
  # Checks if a path for a word or partial word exists in the trie.
60
64
  # @param [String] word the word or partial word to look for in the trie.
61
65
  # @return [Boolean] `true` if the word or partial word is found, `false`
62
66
  # otherwise.
63
- # @see RawNode#partial_word?
64
- # @see CompressedNode#partial_word?
67
+ # @see Nodes::Raw#partial_word?
68
+ # @see Nodes::Compressed#partial_word?
65
69
  def partial_word? word = ''
66
70
  root.partial_word? word.chars
67
71
  end
@@ -70,8 +74,8 @@ module Rambling
70
74
  # @param [String] word the word to look for in the trie.
71
75
  # @return [Boolean] `true` only if the word is found and the last
72
76
  # character corresponds to a terminal node, `false` otherwise.
73
- # @see RawNode#word?
74
- # @see CompressedNode#word?
77
+ # @see Nodes::Raw#word?
78
+ # @see Nodes::Compressed#word?
75
79
  def word? word = ''
76
80
  root.word? word.chars
77
81
  end
@@ -80,8 +84,8 @@ module Rambling
80
84
  # @param [String] word the word to look for in the trie.
81
85
  # @return [Array<String>] all the words contained in the trie that start
82
86
  # with the specified characters.
83
- # @see RawNode#scan
84
- # @see CompressedNode#scan
87
+ # @see Nodes::Raw#scan
88
+ # @see Nodes::Compressed#scan
85
89
  def scan word = ''
86
90
  root.scan(word.chars).to_a
87
91
  end
@@ -92,7 +96,7 @@ module Rambling
92
96
  # @return [Enumerator<String>] all the words in the given string that
93
97
  # match a word in the trie.
94
98
  # @yield [String] each word found in phrase.
95
- # @see Node#words_within
99
+ # @see Nodes::Node#words_within
96
100
  def words_within phrase
97
101
  words_within_root(phrase).to_a
98
102
  end
@@ -113,10 +117,116 @@ module Rambling
113
117
  root == other.root
114
118
  end
115
119
 
120
+ # Iterates over the words contained in the trie.
121
+ # @yield [String] the words contained in this trie node.
122
+ def each
123
+ return enum_for :each unless block_given?
124
+
125
+ root.each do |word|
126
+ yield word
127
+ end
128
+ end
129
+
130
+ # @return [String] a string representation of the container.
131
+ def inspect
132
+ "#<#{self.class.name} root: #{root.inspect}>"
133
+ end
134
+
135
+ # Get {Nodes::Node Node} corresponding to a given letter.
136
+ # @param [Symbol] letter the letter to search for in the root node.
137
+ # @return [Nodes::Node] the node corresponding to that letter.
138
+ # @see Nodes::Node#[]
139
+ def [] letter
140
+ root[letter]
141
+ end
142
+
143
+ # Root node's child nodes.
144
+ # @return [Array<Nodes::Node>] the array of children nodes contained in
145
+ # the root node.
146
+ # @see Nodes::Node#children
147
+ def children
148
+ root.children
149
+ end
150
+
151
+ # Root node's children tree.
152
+ # @return [Array<Nodes::Node>] the array of children nodes contained in
153
+ # the root node.
154
+ # @see Nodes::Node#children_tree
155
+ def children_tree
156
+ root.children_tree
157
+ end
158
+
159
+ # Indicates if the root {Nodes::Node Node} can be
160
+ # compressed or not.
161
+ # @return [Boolean] `true` for non-{Nodes::Node#terminal? terminal}
162
+ # nodes with one child, `false` otherwise.
163
+ def compressed?
164
+ root.compressed?
165
+ end
166
+
167
+ # Array of words contained in the root {Nodes::Node Node}.
168
+ # @return [Array<String>] all words contained in this trie.
169
+ # @see https://ruby-doc.org/core-2.5.0/Enumerable.html#method-i-to_a
170
+ # Enumerable#to_a
171
+ def to_a
172
+ root.to_a
173
+ end
174
+
175
+ # Check if a letter is part of the root {Nodes::Node}'s children tree.
176
+ # @param [Symbol] letter the letter to search for in the root node.
177
+ # @return [Boolean] whether the letter is contained or not.
178
+ # @see Nodes::Node#has_key?
179
+ def has_key? letter
180
+ root.has_key? letter
181
+ end
182
+
183
+ # Size of the Root {Nodes::Node Node}'s children tree.
184
+ # @return [Integer] the number of letters in the root node.
185
+ def size
186
+ root.size
187
+ end
188
+
189
+ # String representation of the current node, if it is a terminal node.
190
+ # @return [String] the string representation of the current node.
191
+ # @raise [InvalidOperation] if node is not terminal or is root.
192
+ # @deprecated This will always raise an {InvalidOperation} exception.
193
+ def as_word
194
+ warn '[DEPRECATION WARNING] `#as_word` is deprecated. Please use `#root#as_word` instead.'
195
+ root.as_word
196
+ end
197
+
198
+ # Root {Nodes::Node Node}'s letter.
199
+ # @return [Symbol] the root node's letter
200
+ # @see Nodes::Node#letter
201
+ # @deprecated This will always return `nil`.
202
+ def letter
203
+ warn '[DEPRECATION WARNING] `#letter` is deprecated. Please use `#root#letter` instead.'
204
+ root.letter
205
+ end
206
+
207
+ # Root {Nodes::Node Node}'s parent.
208
+ # @return [Symbol] the root node's parent
209
+ # @see Nodes::Node#parent
210
+ # @deprecated This will always return `nil`.
211
+ def parent
212
+ warn '[DEPRECATION WARNING] `#parent` is deprecated. Please use `#root#parent` instead.'
213
+ root.parent
214
+ end
215
+
216
+ # String representation of root {Nodes::Node Node}.
217
+ # @return [String] the root node's string representation.
218
+ # @see Stringifyable#to_s
219
+ # @deprecated This will always return an empty string (`''`).
220
+ def to_s
221
+ warn '[DEPRECATION WARNING] `#to_s` is deprecated. Please use `#root#to_s` instead.'
222
+ root.to_s
223
+ end
224
+
116
225
  alias_method :include?, :word?
117
226
  alias_method :match?, :partial_word?
118
227
  alias_method :words, :scan
119
228
  alias_method :<<, :add
229
+ alias_method :has_letter?, :has_key?
120
230
 
121
231
  private
122
232
 
@@ -134,6 +244,16 @@ module Rambling
134
244
  end
135
245
  end
136
246
  end
247
+
248
+ def compress_root
249
+ compressor.compress root
250
+ end
251
+
252
+ def char_symbols word
253
+ symbols = []
254
+ word.reverse.each_char { |c| symbols << c.to_sym }
255
+ symbols
256
+ end
137
257
  end
138
258
  end
139
259
  end
@@ -4,9 +4,9 @@ module Rambling
4
4
  module Enumerable
5
5
  include ::Enumerable
6
6
 
7
- # Returns number of words contained in the trie. See
8
- # {https://ruby-doc.org/core-2.4.0/Enumerable.html#method-i-count
9
- # Enumerable}
7
+ # Returns number of words contained in the trie
8
+ # @see https://ruby-doc.org/core-2.5.0/Enumerable.html#method-i-count
9
+ # Enumerable#count
10
10
  alias_method :size, :count
11
11
 
12
12
  # Iterates over the words contained in the trie.
@@ -16,7 +16,7 @@ module Rambling
16
16
 
17
17
  yield as_word if terminal?
18
18
 
19
- children.each do |child|
19
+ children_tree.each_value do |child|
20
20
  child.each do |word|
21
21
  yield word
22
22
  end
@@ -0,0 +1,11 @@
1
+ %w{node missing compressed raw}.each do |file|
2
+ require File.join('rambling', 'trie', 'nodes', file)
3
+ end
4
+
5
+ module Rambling
6
+ module Trie
7
+ # Namespace for all nodes.
8
+ module Nodes
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,115 @@
1
+ module Rambling
2
+ module Trie
3
+ module Nodes
4
+ # A representation of a node in an compressed trie data structure.
5
+ class Compressed < Rambling::Trie::Nodes::Node
6
+ # Always raises {Rambling::Trie::InvalidOperation InvalidOperation} when
7
+ # trying to add a word to the current compressed trie node
8
+ # @param [String] word the word to add to the trie.
9
+ # @raise [InvalidOperation] if the trie is already compressed.
10
+ # @return [nil] this never returns as it always raises an exception.
11
+ def add word
12
+ raise Rambling::Trie::InvalidOperation, 'Cannot add word to compressed trie'
13
+ end
14
+
15
+ # Checks if a path for set a of characters exists in the trie.
16
+ # @param [Array<String>] chars the characters to look for in the trie.
17
+ # @return [Boolean] `true` if the characters are found, `false` otherwise.
18
+ def partial_word? chars
19
+ chars.empty? || has_partial_word?(chars)
20
+ end
21
+
22
+ # Checks if a path for set of characters represents a word in the trie.
23
+ # @param [Array<String>] chars the characters to look for in the trie.
24
+ # @return [Boolean] `true` if the characters are found and form a word,
25
+ # `false` otherwise.
26
+ def word? chars
27
+ chars.empty? ? terminal? : has_word?(chars)
28
+ end
29
+
30
+ # Always return `true` for a compressed node.
31
+ # @return [Boolean] always `true` for a compressed node.
32
+ def compressed?
33
+ true
34
+ end
35
+
36
+ private
37
+
38
+ def has_partial_word? chars
39
+ recursive_get(:partial_word?, chars) || false
40
+ end
41
+
42
+ def has_word? chars
43
+ current_key = nil
44
+
45
+ while !chars.empty?
46
+ if current_key
47
+ current_key << chars.slice!(0)
48
+ else
49
+ current_key = chars.slice!(0)
50
+ end
51
+
52
+ child = children_tree[current_key.to_sym]
53
+ return child.word? chars if child
54
+ end
55
+
56
+ false
57
+ end
58
+
59
+ def closest_node chars
60
+ recursive_get(:scan, chars) || Rambling::Trie::Nodes::Missing.new
61
+ end
62
+
63
+ def children_match_prefix chars
64
+ return enum_for :children_match_prefix, chars unless block_given?
65
+
66
+ current_key = nil
67
+
68
+ while !chars.empty?
69
+ if current_key
70
+ current_key << chars.slice!(0)
71
+ else
72
+ current_key = chars.slice!(0)
73
+ end
74
+
75
+ child = children_tree[current_key.to_sym]
76
+
77
+ next unless child
78
+
79
+ child.match_prefix chars do |word|
80
+ yield word
81
+ end
82
+ end
83
+ end
84
+
85
+ def recursive_get method, chars
86
+ current_length = 0
87
+ current_key = current_key chars.slice!(0)
88
+
89
+ begin
90
+ current_length += 1
91
+
92
+ if current_key && (current_key.length == current_length || chars.empty?)
93
+ return children_tree[current_key.to_sym].send method, chars
94
+ end
95
+ end while current_key && current_key[current_length] == chars.slice!(0)
96
+ end
97
+
98
+ def current_key letter
99
+ current_key = nil
100
+
101
+ children_tree.each_key do |letters|
102
+ letters_string = letters.to_s
103
+
104
+ if letters_string.start_with? letter
105
+ current_key = letters_string
106
+ break
107
+ end
108
+ end
109
+
110
+ current_key
111
+ end
112
+ end
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,10 @@
1
+ module Rambling
2
+ module Trie
3
+ module Nodes
4
+ # A representation of a missing node in the trie data structure. Returned
5
+ # when a node is not found.
6
+ class Missing < Rambling::Trie::Nodes::Node
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,151 @@
1
+ module Rambling
2
+ module Trie
3
+ module Nodes
4
+ # A representation of a node in the trie data structure.
5
+ class Node
6
+ include Rambling::Trie::Compressible
7
+ include Rambling::Trie::Enumerable
8
+ include Rambling::Trie::Comparable
9
+ include Rambling::Trie::Stringifyable
10
+ include Rambling::Trie::Inspectable
11
+
12
+ # @overload letter
13
+ # Letter(s) corresponding to the current node.
14
+ # @overload letter=(letter)
15
+ # Sets the letter(s) corresponding to the current node. Ensures the
16
+ # {Node#letter #letter} in the {Node#parent #parent}'s
17
+ # {Node#children_tree #children_tree} is updated.
18
+ # @param [String, Symbol, nil] letter the letter value.
19
+ # @return [Symbol, nil] the corresponding letter(s).
20
+ attr_reader :letter
21
+
22
+ # Child nodes tree.
23
+ # @return [Hash] the children_tree hash, consisting of `:letter => node`.
24
+ attr_accessor :children_tree
25
+
26
+ # Parent node.
27
+ # @return [Node, nil] the parent of the current node.
28
+ attr_accessor :parent
29
+
30
+ # Creates a new node.
31
+ # @param [Symbol, nil] letter the Node's letter value
32
+ # @param [Node, nil] parent the parent of the current node.
33
+ def initialize letter = nil, parent = nil, children_tree = {}
34
+ @letter = letter
35
+ @parent = parent
36
+ @children_tree = children_tree
37
+ end
38
+
39
+ # Child nodes.
40
+ # @return [Array<Node>] the array of children nodes contained
41
+ # in the current node.
42
+ def children
43
+ children_tree.values
44
+ end
45
+
46
+ # First child node.
47
+ # @return [Node, nil] the first child contained in the current node.
48
+ def first_child
49
+ return if children_tree.empty?
50
+
51
+ children_tree.each_value do |child|
52
+ return child
53
+ end
54
+ end
55
+
56
+ # Indicates if the current node is the root node.
57
+ # @return [Boolean] `true` if the node does not have a parent, `false`
58
+ # otherwise.
59
+ def root?
60
+ !parent
61
+ end
62
+
63
+ # Indicates if a {Node Node} is terminal or not.
64
+ # @return [Boolean] `true` for terminal nodes, `false` otherwise.
65
+ def terminal?
66
+ !!terminal
67
+ end
68
+
69
+ # Mark {Node Node} as terminal.
70
+ # @return [Node] the modified node.
71
+ def terminal!
72
+ self.terminal = true
73
+ self
74
+ end
75
+
76
+ def letter= letter
77
+ @letter = letter.to_sym if letter
78
+ end
79
+
80
+ # Returns the node that starts with the specified characters.
81
+ # @param [Array<String>] chars the characters to look for in the trie.
82
+ # @return [Node] the node that matches the specified characters.
83
+ # {Missing Missing} when not found.
84
+ def scan chars
85
+ return self if chars.empty?
86
+
87
+ closest_node chars
88
+ end
89
+
90
+ # Returns all words that match a prefix of any length within chars.
91
+ # @param [String] chars the chars to base the prefix on.
92
+ # @return [Enumerator<String>] all the words that match a prefix given by
93
+ # chars.
94
+ # @yield [String] each word found.
95
+ def match_prefix chars
96
+ return enum_for :match_prefix, chars unless block_given?
97
+
98
+ yield as_word if terminal?
99
+ children_match_prefix chars do |word|
100
+ yield word
101
+ end
102
+ end
103
+
104
+ # Get {Node Node} corresponding to a given letter.
105
+ # @param [Symbol] letter the letter to search for in the node.
106
+ # @return [Node] the node corresponding to that letter.
107
+ # @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-5B-5D
108
+ # Hash#[]
109
+ def [] letter
110
+ children_tree[letter]
111
+ end
112
+
113
+ # Set the {Node Node} that corresponds to a given letter.
114
+ # @param [Symbol] letter the letter to insert or update in the node's
115
+ # @param [Node] node the {Node Node} to assign to that letter.
116
+ # @return [Node] the node corresponding to the inserted or
117
+ # updated letter.
118
+ # @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-5B-5D
119
+ # Hash#[]
120
+ def []= letter, node
121
+ children_tree[letter] = node
122
+ end
123
+
124
+ # Check if a {Node Node}'s children tree contains a given
125
+ # letter.
126
+ # @param [Symbol] letter the letter to search for in the node.
127
+ # @return [Boolean] `true` if the letter is present, `false` otherwise
128
+ # @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-has_key-3F
129
+ # Hash#has_key?
130
+ def has_key? letter
131
+ children_tree.has_key? letter
132
+ end
133
+
134
+ # Delete a given letter and its corresponding {Node Node} from
135
+ # this {Node Node}'s children tree.
136
+ # @param [Symbol] letter the letter to delete from the node's children
137
+ # tree.
138
+ # @return [Node] the node corresponding to the deleted letter.
139
+ # @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-delete
140
+ # Hash#delete
141
+ def delete letter
142
+ children_tree.delete letter
143
+ end
144
+
145
+ private
146
+
147
+ attr_accessor :terminal
148
+ end
149
+ end
150
+ end
151
+ end