rambling-trie 1.0.2 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +5 -5
  2. data/Gemfile +6 -3
  3. data/Guardfile +3 -1
  4. data/README.md +30 -12
  5. data/Rakefile +8 -0
  6. data/lib/rambling-trie.rb +2 -0
  7. data/lib/rambling/trie.rb +48 -26
  8. data/lib/rambling/trie/comparable.rb +6 -3
  9. data/lib/rambling/trie/compressible.rb +16 -0
  10. data/lib/rambling/trie/compressor.rb +39 -24
  11. data/lib/rambling/trie/configuration.rb +3 -1
  12. data/lib/rambling/trie/configuration/properties.rb +18 -9
  13. data/lib/rambling/trie/configuration/provider_collection.rb +38 -17
  14. data/lib/rambling/trie/container.rb +123 -36
  15. data/lib/rambling/trie/enumerable.rb +6 -4
  16. data/lib/rambling/trie/inspectable.rb +2 -0
  17. data/lib/rambling/trie/invalid_operation.rb +3 -1
  18. data/lib/rambling/trie/nodes.rb +13 -0
  19. data/lib/rambling/trie/nodes/compressed.rb +98 -0
  20. data/lib/rambling/trie/nodes/missing.rb +12 -0
  21. data/lib/rambling/trie/nodes/node.rb +183 -0
  22. data/lib/rambling/trie/nodes/raw.rb +82 -0
  23. data/lib/rambling/trie/readers.rb +3 -1
  24. data/lib/rambling/trie/readers/plain_text.rb +3 -11
  25. data/lib/rambling/trie/serializers.rb +3 -1
  26. data/lib/rambling/trie/serializers/file.rb +2 -0
  27. data/lib/rambling/trie/serializers/marshal.rb +15 -5
  28. data/lib/rambling/trie/serializers/yaml.rb +21 -5
  29. data/lib/rambling/trie/serializers/zip.rb +15 -8
  30. data/lib/rambling/trie/stringifyable.rb +8 -2
  31. data/lib/rambling/trie/version.rb +3 -1
  32. data/rambling-trie.gemspec +21 -10
  33. data/spec/assets/test_words.es_DO.txt +1 -0
  34. data/spec/integration/rambling/trie_spec.rb +44 -35
  35. data/spec/lib/rambling/trie/comparable_spec.rb +8 -15
  36. data/spec/lib/rambling/trie/compressor_spec.rb +90 -13
  37. data/spec/lib/rambling/trie/configuration/properties_spec.rb +21 -13
  38. data/spec/lib/rambling/trie/configuration/provider_collection_spec.rb +18 -34
  39. data/spec/lib/rambling/trie/container_spec.rb +183 -217
  40. data/spec/lib/rambling/trie/enumerable_spec.rb +14 -9
  41. data/spec/lib/rambling/trie/inspectable_spec.rb +36 -11
  42. data/spec/lib/rambling/trie/nodes/compressed_spec.rb +37 -0
  43. data/spec/lib/rambling/trie/nodes/node_spec.rb +9 -0
  44. data/spec/lib/rambling/trie/nodes/raw_spec.rb +179 -0
  45. data/spec/lib/rambling/trie/readers/plain_text_spec.rb +3 -1
  46. data/spec/lib/rambling/trie/serializers/file_spec.rb +6 -4
  47. data/spec/lib/rambling/trie/serializers/marshal_spec.rb +5 -7
  48. data/spec/lib/rambling/trie/serializers/yaml_spec.rb +5 -7
  49. data/spec/lib/rambling/trie/serializers/zip_spec.rb +18 -20
  50. data/spec/lib/rambling/trie/stringifyable_spec.rb +14 -11
  51. data/spec/lib/rambling/trie_spec.rb +18 -11
  52. data/spec/spec_helper.rb +10 -5
  53. data/spec/support/config.rb +10 -0
  54. data/spec/support/helpers/add_word.rb +20 -0
  55. data/spec/support/helpers/one_line_heredoc.rb +11 -0
  56. data/spec/support/shared_examples/a_compressible_trie.rb +40 -0
  57. data/spec/support/shared_examples/a_serializable_trie.rb +10 -6
  58. data/spec/support/shared_examples/a_serializer.rb +9 -1
  59. data/spec/support/shared_examples/a_trie_data_structure.rb +2 -0
  60. data/spec/support/shared_examples/a_trie_node.rb +127 -0
  61. data/spec/{lib/rambling/trie/compressed_node_spec.rb → support/shared_examples/a_trie_node_implementation.rb} +25 -72
  62. metadata +42 -31
  63. data/lib/rambling/trie/compressable.rb +0 -14
  64. data/lib/rambling/trie/compressed_node.rb +0 -120
  65. data/lib/rambling/trie/missing_node.rb +0 -8
  66. data/lib/rambling/trie/node.rb +0 -97
  67. data/lib/rambling/trie/raw_node.rb +0 -96
  68. data/spec/lib/rambling/trie/node_spec.rb +0 -86
  69. data/spec/lib/rambling/trie/raw_node_spec.rb +0 -389
  70. data/spec/support/shared_examples/a_compressable_trie.rb +0 -26
@@ -1,10 +1,10 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Rambling
2
4
  module Trie
3
5
  module Configuration
4
6
  # Collection of configurable providers.
5
7
  class ProviderCollection
6
- extend ::Forwardable
7
-
8
8
  # The name of this provider collection.
9
9
  # @return [String] the name of this provider collection.
10
10
  attr_reader :name
@@ -23,13 +23,6 @@ module Rambling
23
23
  # cannot be resolved in {ProviderCollection#resolve #resolve}.
24
24
  attr_reader :default
25
25
 
26
- delegate [
27
- :[],
28
- :[]=,
29
- :keys,
30
- :values,
31
- ] => :providers
32
-
33
26
  # Creates a new provider collection.
34
27
  # @param [String] name the name for this provider collection.
35
28
  # @param [Hash] providers the configured providers.
@@ -52,8 +45,9 @@ module Rambling
52
45
  end
53
46
 
54
47
  def default= provider
55
- if provider_not_in_list? provider
56
- raise ArgumentError, "default #{name} should be part of configured #{name}s"
48
+ unless contains? provider
49
+ raise ArgumentError,
50
+ "default #{name} should be part of configured #{name}s"
57
51
  end
58
52
 
59
53
  @default = provider
@@ -71,30 +65,57 @@ module Rambling
71
65
  # @return [Object] the provider corresponding to the file extension in
72
66
  # this provider collection. {#default} if not found.
73
67
  def resolve filepath
74
- providers[format filepath] || default
68
+ providers[file_format filepath] || default
75
69
  end
76
70
 
77
71
  # Resets the provider collection to the initial values.
78
72
  def reset
79
73
  providers.clear
80
- configured_providers.each { |k, v| providers[k] = v }
74
+ configured_providers.each { |k, v| self[k] = v }
81
75
  self.default = configured_default
82
76
  end
83
77
 
78
+ # Get provider corresponding to a given format.
79
+ # @return [Array<Symbol>] the provider corresponding to that format.
80
+ # @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-5B-5D
81
+ # Hash#keys
82
+ def formats
83
+ providers.keys
84
+ end
85
+
86
+ # Get provider corresponding to a given format.
87
+ # @param [Symbol] format the format to search for in the collection.
88
+ # @return [Object] the provider corresponding to that format.
89
+ # @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-5B-5D
90
+ # Hash#[]
91
+ def [] format
92
+ providers[format]
93
+ end
94
+
84
95
  private
85
96
 
86
97
  attr_reader :configured_providers, :configured_default
87
98
 
88
- def format filepath
99
+ def []= format, instance
100
+ providers[format] = instance
101
+ end
102
+
103
+ def values
104
+ providers.values
105
+ end
106
+
107
+ def file_format filepath
89
108
  format = File.extname filepath
90
109
  format.slice! 0
91
110
  format.to_sym
92
111
  end
93
112
 
94
- def provider_not_in_list? provider
95
- (provider && providers.values.empty?) ||
96
- (providers.values.any? && !providers.values.include?(provider))
113
+ def contains? provider
114
+ provider.nil? ||
115
+ (providers.any? && provider_instances.include?(provider))
97
116
  end
117
+
118
+ alias_method :provider_instances, :values
98
119
  end
99
120
  end
100
121
  end
@@ -1,32 +1,17 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Rambling
2
4
  module Trie
3
5
  # Wrapper on top of trie data structure.
4
6
  class Container
5
- extend ::Forwardable
6
7
  include ::Enumerable
7
8
 
8
- delegate [
9
- :[],
10
- :as_word,
11
- :children,
12
- :children_tree,
13
- :compressed?,
14
- :each,
15
- :to_a,
16
- :has_key?,
17
- :inspect,
18
- :letter,
19
- :parent,
20
- :size,
21
- :to_s
22
- ] => :root
23
-
24
9
  # The root node of this trie.
25
- # @return [Node] the root node of this trie.
10
+ # @return [Nodes::Node] the root node of this trie.
26
11
  attr_reader :root
27
12
 
28
13
  # Creates a new trie.
29
- # @param [Node] root the root node for the trie
14
+ # @param [Nodes::Node] root the root node for the trie
30
15
  # @param [Compressor] compressor responsible for compressing the trie
31
16
  # @yield [Container] the trie just created.
32
17
  def initialize root, compressor
@@ -36,32 +21,53 @@ module Rambling
36
21
  yield self if block_given?
37
22
  end
38
23
 
39
- # Adds a word to the trie, without altering the passed word.
24
+ # Adds a word to the trie.
40
25
  # @param [String] word the word to add the branch from.
41
- # @return [Node] the just added branch's root node.
26
+ # @return [Nodes::Node] the just added branch's root node.
42
27
  # @raise [InvalidOperation] if the trie is already compressed.
43
- # @see RawNode#add
44
- # @see CompressedNode#add
45
- # @note Avoids altering the contents of the word variable.
28
+ # @see Nodes::Raw#add
29
+ # @see Nodes::Compressed#add
46
30
  def add word
47
- root.add word.clone
31
+ root.add char_symbols word
32
+ end
33
+
34
+ # Adds all provided words to the trie.
35
+ # @param [Array<String>] words the words to add the branch from.
36
+ # @return [Array<Nodes::Node>] the collection of nodes added.
37
+ # @raise [InvalidOperation] if the trie is already compressed.
38
+ # @see Nodes::Raw#add
39
+ # @see Nodes::Compressed#add
40
+ def concat words
41
+ words.map { |word| add word }
48
42
  end
49
43
 
50
- # Compresses the existing tree using redundant node elimination. Marks
51
- # the trie as compressed.
44
+ # Compresses the existing trie using redundant node elimination. Marks
45
+ # the trie as compressed. Does nothing if the trie has already been
46
+ # compressed.
52
47
  # @return [Container] self
53
- # @note Only compresses tries that have not already been compressed.
48
+ # @note This method replaces the root {Nodes::Raw Raw} node with a
49
+ # {Nodes::Compressed Compressed} version of it.
54
50
  def compress!
55
- self.root = compressor.compress root unless root.compressed?
51
+ self.root = compress_root unless root.compressed?
56
52
  self
57
53
  end
58
54
 
55
+ # Compresses the existing trie using redundant node elimination. Returns
56
+ # a new trie with the compressed root.
57
+ # @return [Container] A new {Container} with the {Nodes::Compressed
58
+ # Compressed} root node or self if the trie has already been
59
+ # compressed.
60
+ def compress
61
+ return self if root.compressed?
62
+ Rambling::Trie::Container.new compress_root, compressor
63
+ end
64
+
59
65
  # Checks if a path for a word or partial word exists in the trie.
60
66
  # @param [String] word the word or partial word to look for in the trie.
61
67
  # @return [Boolean] `true` if the word or partial word is found, `false`
62
68
  # otherwise.
63
- # @see RawNode#partial_word?
64
- # @see CompressedNode#partial_word?
69
+ # @see Nodes::Raw#partial_word?
70
+ # @see Nodes::Compressed#partial_word?
65
71
  def partial_word? word = ''
66
72
  root.partial_word? word.chars
67
73
  end
@@ -70,8 +76,8 @@ module Rambling
70
76
  # @param [String] word the word to look for in the trie.
71
77
  # @return [Boolean] `true` only if the word is found and the last
72
78
  # character corresponds to a terminal node, `false` otherwise.
73
- # @see RawNode#word?
74
- # @see CompressedNode#word?
79
+ # @see Nodes::Raw#word?
80
+ # @see Nodes::Compressed#word?
75
81
  def word? word = ''
76
82
  root.word? word.chars
77
83
  end
@@ -80,8 +86,8 @@ module Rambling
80
86
  # @param [String] word the word to look for in the trie.
81
87
  # @return [Array<String>] all the words contained in the trie that start
82
88
  # with the specified characters.
83
- # @see RawNode#scan
84
- # @see CompressedNode#scan
89
+ # @see Nodes::Raw#scan
90
+ # @see Nodes::Compressed#scan
85
91
  def scan word = ''
86
92
  root.scan(word.chars).to_a
87
93
  end
@@ -92,7 +98,7 @@ module Rambling
92
98
  # @return [Enumerator<String>] all the words in the given string that
93
99
  # match a word in the trie.
94
100
  # @yield [String] each word found in phrase.
95
- # @see Node#words_within
101
+ # @see Nodes::Node#words_within
96
102
  def words_within phrase
97
103
  words_within_root(phrase).to_a
98
104
  end
@@ -113,10 +119,81 @@ module Rambling
113
119
  root == other.root
114
120
  end
115
121
 
122
+ # Iterates over the words contained in the trie.
123
+ # @yield [String] the words contained in this trie node.
124
+ def each
125
+ return enum_for :each unless block_given?
126
+
127
+ root.each do |word|
128
+ yield word
129
+ end
130
+ end
131
+
132
+ # @return [String] a string representation of the container.
133
+ def inspect
134
+ "#<#{self.class.name} root: #{root.inspect}>"
135
+ end
136
+
137
+ # Get {Nodes::Node Node} corresponding to a given letter.
138
+ # @param [Symbol] letter the letter to search for in the root node.
139
+ # @return [Nodes::Node] the node corresponding to that letter.
140
+ # @see Nodes::Node#[]
141
+ def [] letter
142
+ root[letter]
143
+ end
144
+
145
+ # Root node's child nodes.
146
+ # @return [Array<Nodes::Node>] the array of children nodes contained in
147
+ # the root node.
148
+ # @see Nodes::Node#children
149
+ def children
150
+ root.children
151
+ end
152
+
153
+ # Root node's children tree.
154
+ # @return [Array<Nodes::Node>] the array of children nodes contained in
155
+ # the root node.
156
+ # @see Nodes::Node#children_tree
157
+ def children_tree
158
+ root.children_tree
159
+ end
160
+
161
+ # Indicates if the root {Nodes::Node Node} can be
162
+ # compressed or not.
163
+ # @return [Boolean] `true` for non-{Nodes::Node#terminal? terminal}
164
+ # nodes with one child, `false` otherwise.
165
+ def compressed?
166
+ root.compressed?
167
+ end
168
+
169
+ # Array of words contained in the root {Nodes::Node Node}.
170
+ # @return [Array<String>] all words contained in this trie.
171
+ # @see https://ruby-doc.org/core-2.5.0/Enumerable.html#method-i-to_a
172
+ # Enumerable#to_a
173
+ def to_a
174
+ root.to_a
175
+ end
176
+
177
+ # Check if a letter is part of the root {Nodes::Node}'s children tree.
178
+ # @param [Symbol] letter the letter to search for in the root node.
179
+ # @return [Boolean] whether the letter is contained or not.
180
+ # @see Nodes::Node#key?
181
+ def key? letter
182
+ root.key? letter
183
+ end
184
+
185
+ # Size of the Root {Nodes::Node Node}'s children tree.
186
+ # @return [Integer] the number of letters in the root node.
187
+ def size
188
+ root.size
189
+ end
190
+
116
191
  alias_method :include?, :word?
117
192
  alias_method :match?, :partial_word?
118
193
  alias_method :words, :scan
119
194
  alias_method :<<, :add
195
+ alias_method :has_key?, :key?
196
+ alias_method :has_letter?, :key?
120
197
 
121
198
  private
122
199
 
@@ -134,6 +211,16 @@ module Rambling
134
211
  end
135
212
  end
136
213
  end
214
+
215
+ def compress_root
216
+ compressor.compress root
217
+ end
218
+
219
+ def char_symbols word
220
+ symbols = []
221
+ word.reverse.each_char { |c| symbols << c.to_sym }
222
+ symbols
223
+ end
137
224
  end
138
225
  end
139
226
  end
@@ -1,12 +1,14 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Rambling
2
4
  module Trie
3
5
  # Provides enumerable behavior to the trie data structure.
4
6
  module Enumerable
5
7
  include ::Enumerable
6
8
 
7
- # Returns number of words contained in the trie. See
8
- # {https://ruby-doc.org/core-2.4.0/Enumerable.html#method-i-count
9
- # Enumerable}
9
+ # Returns number of words contained in the trie
10
+ # @see https://ruby-doc.org/core-2.5.0/Enumerable.html#method-i-count
11
+ # Enumerable#count
10
12
  alias_method :size, :count
11
13
 
12
14
  # Iterates over the words contained in the trie.
@@ -16,7 +18,7 @@ module Rambling
16
18
 
17
19
  yield as_word if terminal?
18
20
 
19
- children.each do |child|
21
+ children_tree.each_value do |child|
20
22
  child.each do |word|
21
23
  yield word
22
24
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Rambling
2
4
  module Trie
3
5
  # Provides pretty printing behavior for the trie data structure.
@@ -1,8 +1,10 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Rambling
2
4
  module Trie
3
5
  # Raised when trying to execute an invalid operation on a trie data
4
6
  # structure.
5
- class InvalidOperation < Exception
7
+ class InvalidOperation < RuntimeError
6
8
  # Creates a new {InvalidOperation InvalidOperation} exception.
7
9
  # @param [String, nil] message the exception message.
8
10
  def initialize message = nil
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ %w(node missing compressed raw).each do |file|
4
+ require File.join('rambling', 'trie', 'nodes', file)
5
+ end
6
+
7
+ module Rambling
8
+ module Trie
9
+ # Namespace for all nodes.
10
+ module Nodes
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rambling
4
+ module Trie
5
+ module Nodes
6
+ # A representation of a node in an compressed trie data structure.
7
+ class Compressed < Rambling::Trie::Nodes::Node
8
+ # Always raises {Rambling::Trie::InvalidOperation InvalidOperation} when
9
+ # trying to add a word to the current compressed trie node
10
+ # @param [String] _ the word to add to the trie.
11
+ # @raise [InvalidOperation] if the trie is already compressed.
12
+ # @return [nil] this never returns as it always raises an exception.
13
+ def add _
14
+ raise Rambling::Trie::InvalidOperation,
15
+ 'Cannot add word to compressed trie'
16
+ end
17
+
18
+ # Always return `true` for a compressed node.
19
+ # @return [Boolean] always `true` for a compressed node.
20
+ def compressed?
21
+ true
22
+ end
23
+
24
+ private
25
+
26
+ def partial_word_chars? chars
27
+ child = children_tree[chars.first.to_sym]
28
+ return false unless child
29
+
30
+ child_letter = child.letter.to_s
31
+
32
+ if chars.size >= child_letter.size
33
+ letter = chars.slice!(0, child_letter.size).join
34
+ return child.partial_word? chars if child_letter == letter
35
+ end
36
+
37
+ letter = chars.join
38
+ child_letter = child_letter.slice 0, letter.size
39
+ child_letter == letter
40
+ end
41
+
42
+ def word_chars? chars
43
+ letter = chars.slice! 0
44
+ letter_sym = letter.to_sym
45
+
46
+ child = children_tree[letter_sym]
47
+ return false unless child
48
+
49
+ loop do
50
+ return child.word? chars if letter_sym == child.letter
51
+
52
+ break if chars.empty?
53
+
54
+ letter << chars.slice!(0)
55
+ letter_sym = letter.to_sym
56
+ end
57
+
58
+ false
59
+ end
60
+
61
+ def closest_node chars
62
+ child = children_tree[chars.first.to_sym]
63
+ return missing unless child
64
+
65
+ child_letter = child.letter.to_s
66
+
67
+ if chars.size >= child_letter.size
68
+ letter = chars.slice!(0, child_letter.size).join
69
+ return child.scan chars if child_letter == letter
70
+ end
71
+
72
+ letter = chars.join
73
+ child_letter = child_letter.slice 0, letter.size
74
+
75
+ child_letter == letter ? child : missing
76
+ end
77
+
78
+ def children_match_prefix chars
79
+ return enum_for :children_match_prefix, chars unless block_given?
80
+
81
+ return if chars.empty?
82
+
83
+ child = children_tree[chars.first.to_sym]
84
+ return unless child
85
+
86
+ child_letter = child.letter.to_s
87
+ letter = chars.slice!(0, child_letter.size).join
88
+
89
+ return unless child_letter == letter
90
+
91
+ child.match_prefix chars do |word|
92
+ yield word
93
+ end
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end