rambling-trie 1.0.2 → 2.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. checksums.yaml +5 -5
  2. data/Gemfile +6 -3
  3. data/Guardfile +3 -1
  4. data/README.md +30 -12
  5. data/Rakefile +8 -0
  6. data/lib/rambling-trie.rb +2 -0
  7. data/lib/rambling/trie.rb +48 -26
  8. data/lib/rambling/trie/comparable.rb +6 -3
  9. data/lib/rambling/trie/compressible.rb +16 -0
  10. data/lib/rambling/trie/compressor.rb +39 -24
  11. data/lib/rambling/trie/configuration.rb +3 -1
  12. data/lib/rambling/trie/configuration/properties.rb +18 -9
  13. data/lib/rambling/trie/configuration/provider_collection.rb +38 -17
  14. data/lib/rambling/trie/container.rb +123 -36
  15. data/lib/rambling/trie/enumerable.rb +6 -4
  16. data/lib/rambling/trie/inspectable.rb +2 -0
  17. data/lib/rambling/trie/invalid_operation.rb +3 -1
  18. data/lib/rambling/trie/nodes.rb +13 -0
  19. data/lib/rambling/trie/nodes/compressed.rb +98 -0
  20. data/lib/rambling/trie/nodes/missing.rb +12 -0
  21. data/lib/rambling/trie/nodes/node.rb +183 -0
  22. data/lib/rambling/trie/nodes/raw.rb +82 -0
  23. data/lib/rambling/trie/readers.rb +3 -1
  24. data/lib/rambling/trie/readers/plain_text.rb +3 -11
  25. data/lib/rambling/trie/serializers.rb +3 -1
  26. data/lib/rambling/trie/serializers/file.rb +2 -0
  27. data/lib/rambling/trie/serializers/marshal.rb +15 -5
  28. data/lib/rambling/trie/serializers/yaml.rb +21 -5
  29. data/lib/rambling/trie/serializers/zip.rb +15 -8
  30. data/lib/rambling/trie/stringifyable.rb +8 -2
  31. data/lib/rambling/trie/version.rb +3 -1
  32. data/rambling-trie.gemspec +21 -10
  33. data/spec/assets/test_words.es_DO.txt +1 -0
  34. data/spec/integration/rambling/trie_spec.rb +44 -35
  35. data/spec/lib/rambling/trie/comparable_spec.rb +8 -15
  36. data/spec/lib/rambling/trie/compressor_spec.rb +90 -13
  37. data/spec/lib/rambling/trie/configuration/properties_spec.rb +21 -13
  38. data/spec/lib/rambling/trie/configuration/provider_collection_spec.rb +18 -34
  39. data/spec/lib/rambling/trie/container_spec.rb +183 -217
  40. data/spec/lib/rambling/trie/enumerable_spec.rb +14 -9
  41. data/spec/lib/rambling/trie/inspectable_spec.rb +36 -11
  42. data/spec/lib/rambling/trie/nodes/compressed_spec.rb +37 -0
  43. data/spec/lib/rambling/trie/nodes/node_spec.rb +9 -0
  44. data/spec/lib/rambling/trie/nodes/raw_spec.rb +179 -0
  45. data/spec/lib/rambling/trie/readers/plain_text_spec.rb +3 -1
  46. data/spec/lib/rambling/trie/serializers/file_spec.rb +6 -4
  47. data/spec/lib/rambling/trie/serializers/marshal_spec.rb +5 -7
  48. data/spec/lib/rambling/trie/serializers/yaml_spec.rb +5 -7
  49. data/spec/lib/rambling/trie/serializers/zip_spec.rb +18 -20
  50. data/spec/lib/rambling/trie/stringifyable_spec.rb +14 -11
  51. data/spec/lib/rambling/trie_spec.rb +18 -11
  52. data/spec/spec_helper.rb +10 -5
  53. data/spec/support/config.rb +10 -0
  54. data/spec/support/helpers/add_word.rb +20 -0
  55. data/spec/support/helpers/one_line_heredoc.rb +11 -0
  56. data/spec/support/shared_examples/a_compressible_trie.rb +40 -0
  57. data/spec/support/shared_examples/a_serializable_trie.rb +10 -6
  58. data/spec/support/shared_examples/a_serializer.rb +9 -1
  59. data/spec/support/shared_examples/a_trie_data_structure.rb +2 -0
  60. data/spec/support/shared_examples/a_trie_node.rb +127 -0
  61. data/spec/{lib/rambling/trie/compressed_node_spec.rb → support/shared_examples/a_trie_node_implementation.rb} +25 -72
  62. metadata +42 -31
  63. data/lib/rambling/trie/compressable.rb +0 -14
  64. data/lib/rambling/trie/compressed_node.rb +0 -120
  65. data/lib/rambling/trie/missing_node.rb +0 -8
  66. data/lib/rambling/trie/node.rb +0 -97
  67. data/lib/rambling/trie/raw_node.rb +0 -96
  68. data/spec/lib/rambling/trie/node_spec.rb +0 -86
  69. data/spec/lib/rambling/trie/raw_node_spec.rb +0 -389
  70. data/spec/support/shared_examples/a_compressable_trie.rb +0 -26
@@ -1,10 +1,10 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Rambling
2
4
  module Trie
3
5
  module Configuration
4
6
  # Collection of configurable providers.
5
7
  class ProviderCollection
6
- extend ::Forwardable
7
-
8
8
  # The name of this provider collection.
9
9
  # @return [String] the name of this provider collection.
10
10
  attr_reader :name
@@ -23,13 +23,6 @@ module Rambling
23
23
  # cannot be resolved in {ProviderCollection#resolve #resolve}.
24
24
  attr_reader :default
25
25
 
26
- delegate [
27
- :[],
28
- :[]=,
29
- :keys,
30
- :values,
31
- ] => :providers
32
-
33
26
  # Creates a new provider collection.
34
27
  # @param [String] name the name for this provider collection.
35
28
  # @param [Hash] providers the configured providers.
@@ -52,8 +45,9 @@ module Rambling
52
45
  end
53
46
 
54
47
  def default= provider
55
- if provider_not_in_list? provider
56
- raise ArgumentError, "default #{name} should be part of configured #{name}s"
48
+ unless contains? provider
49
+ raise ArgumentError,
50
+ "default #{name} should be part of configured #{name}s"
57
51
  end
58
52
 
59
53
  @default = provider
@@ -71,30 +65,57 @@ module Rambling
71
65
  # @return [Object] the provider corresponding to the file extension in
72
66
  # this provider collection. {#default} if not found.
73
67
  def resolve filepath
74
- providers[format filepath] || default
68
+ providers[file_format filepath] || default
75
69
  end
76
70
 
77
71
  # Resets the provider collection to the initial values.
78
72
  def reset
79
73
  providers.clear
80
- configured_providers.each { |k, v| providers[k] = v }
74
+ configured_providers.each { |k, v| self[k] = v }
81
75
  self.default = configured_default
82
76
  end
83
77
 
78
+ # Get provider corresponding to a given format.
79
+ # @return [Array<Symbol>] the provider corresponding to that format.
80
+ # @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-5B-5D
81
+ # Hash#keys
82
+ def formats
83
+ providers.keys
84
+ end
85
+
86
+ # Get provider corresponding to a given format.
87
+ # @param [Symbol] format the format to search for in the collection.
88
+ # @return [Object] the provider corresponding to that format.
89
+ # @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-5B-5D
90
+ # Hash#[]
91
+ def [] format
92
+ providers[format]
93
+ end
94
+
84
95
  private
85
96
 
86
97
  attr_reader :configured_providers, :configured_default
87
98
 
88
- def format filepath
99
+ def []= format, instance
100
+ providers[format] = instance
101
+ end
102
+
103
+ def values
104
+ providers.values
105
+ end
106
+
107
+ def file_format filepath
89
108
  format = File.extname filepath
90
109
  format.slice! 0
91
110
  format.to_sym
92
111
  end
93
112
 
94
- def provider_not_in_list? provider
95
- (provider && providers.values.empty?) ||
96
- (providers.values.any? && !providers.values.include?(provider))
113
+ def contains? provider
114
+ provider.nil? ||
115
+ (providers.any? && provider_instances.include?(provider))
97
116
  end
117
+
118
+ alias_method :provider_instances, :values
98
119
  end
99
120
  end
100
121
  end
@@ -1,32 +1,17 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Rambling
2
4
  module Trie
3
5
  # Wrapper on top of trie data structure.
4
6
  class Container
5
- extend ::Forwardable
6
7
  include ::Enumerable
7
8
 
8
- delegate [
9
- :[],
10
- :as_word,
11
- :children,
12
- :children_tree,
13
- :compressed?,
14
- :each,
15
- :to_a,
16
- :has_key?,
17
- :inspect,
18
- :letter,
19
- :parent,
20
- :size,
21
- :to_s
22
- ] => :root
23
-
24
9
  # The root node of this trie.
25
- # @return [Node] the root node of this trie.
10
+ # @return [Nodes::Node] the root node of this trie.
26
11
  attr_reader :root
27
12
 
28
13
  # Creates a new trie.
29
- # @param [Node] root the root node for the trie
14
+ # @param [Nodes::Node] root the root node for the trie
30
15
  # @param [Compressor] compressor responsible for compressing the trie
31
16
  # @yield [Container] the trie just created.
32
17
  def initialize root, compressor
@@ -36,32 +21,53 @@ module Rambling
36
21
  yield self if block_given?
37
22
  end
38
23
 
39
- # Adds a word to the trie, without altering the passed word.
24
+ # Adds a word to the trie.
40
25
  # @param [String] word the word to add the branch from.
41
- # @return [Node] the just added branch's root node.
26
+ # @return [Nodes::Node] the just added branch's root node.
42
27
  # @raise [InvalidOperation] if the trie is already compressed.
43
- # @see RawNode#add
44
- # @see CompressedNode#add
45
- # @note Avoids altering the contents of the word variable.
28
+ # @see Nodes::Raw#add
29
+ # @see Nodes::Compressed#add
46
30
  def add word
47
- root.add word.clone
31
+ root.add char_symbols word
32
+ end
33
+
34
+ # Adds all provided words to the trie.
35
+ # @param [Array<String>] words the words to add the branch from.
36
+ # @return [Array<Nodes::Node>] the collection of nodes added.
37
+ # @raise [InvalidOperation] if the trie is already compressed.
38
+ # @see Nodes::Raw#add
39
+ # @see Nodes::Compressed#add
40
+ def concat words
41
+ words.map { |word| add word }
48
42
  end
49
43
 
50
- # Compresses the existing tree using redundant node elimination. Marks
51
- # the trie as compressed.
44
+ # Compresses the existing trie using redundant node elimination. Marks
45
+ # the trie as compressed. Does nothing if the trie has already been
46
+ # compressed.
52
47
  # @return [Container] self
53
- # @note Only compresses tries that have not already been compressed.
48
+ # @note This method replaces the root {Nodes::Raw Raw} node with a
49
+ # {Nodes::Compressed Compressed} version of it.
54
50
  def compress!
55
- self.root = compressor.compress root unless root.compressed?
51
+ self.root = compress_root unless root.compressed?
56
52
  self
57
53
  end
58
54
 
55
+ # Compresses the existing trie using redundant node elimination. Returns
56
+ # a new trie with the compressed root.
57
+ # @return [Container] A new {Container} with the {Nodes::Compressed
58
+ # Compressed} root node or self if the trie has already been
59
+ # compressed.
60
+ def compress
61
+ return self if root.compressed?
62
+ Rambling::Trie::Container.new compress_root, compressor
63
+ end
64
+
59
65
  # Checks if a path for a word or partial word exists in the trie.
60
66
  # @param [String] word the word or partial word to look for in the trie.
61
67
  # @return [Boolean] `true` if the word or partial word is found, `false`
62
68
  # otherwise.
63
- # @see RawNode#partial_word?
64
- # @see CompressedNode#partial_word?
69
+ # @see Nodes::Raw#partial_word?
70
+ # @see Nodes::Compressed#partial_word?
65
71
  def partial_word? word = ''
66
72
  root.partial_word? word.chars
67
73
  end
@@ -70,8 +76,8 @@ module Rambling
70
76
  # @param [String] word the word to look for in the trie.
71
77
  # @return [Boolean] `true` only if the word is found and the last
72
78
  # character corresponds to a terminal node, `false` otherwise.
73
- # @see RawNode#word?
74
- # @see CompressedNode#word?
79
+ # @see Nodes::Raw#word?
80
+ # @see Nodes::Compressed#word?
75
81
  def word? word = ''
76
82
  root.word? word.chars
77
83
  end
@@ -80,8 +86,8 @@ module Rambling
80
86
  # @param [String] word the word to look for in the trie.
81
87
  # @return [Array<String>] all the words contained in the trie that start
82
88
  # with the specified characters.
83
- # @see RawNode#scan
84
- # @see CompressedNode#scan
89
+ # @see Nodes::Raw#scan
90
+ # @see Nodes::Compressed#scan
85
91
  def scan word = ''
86
92
  root.scan(word.chars).to_a
87
93
  end
@@ -92,7 +98,7 @@ module Rambling
92
98
  # @return [Enumerator<String>] all the words in the given string that
93
99
  # match a word in the trie.
94
100
  # @yield [String] each word found in phrase.
95
- # @see Node#words_within
101
+ # @see Nodes::Node#words_within
96
102
  def words_within phrase
97
103
  words_within_root(phrase).to_a
98
104
  end
@@ -113,10 +119,81 @@ module Rambling
113
119
  root == other.root
114
120
  end
115
121
 
122
+ # Iterates over the words contained in the trie.
123
+ # @yield [String] the words contained in this trie node.
124
+ def each
125
+ return enum_for :each unless block_given?
126
+
127
+ root.each do |word|
128
+ yield word
129
+ end
130
+ end
131
+
132
+ # @return [String] a string representation of the container.
133
+ def inspect
134
+ "#<#{self.class.name} root: #{root.inspect}>"
135
+ end
136
+
137
+ # Get {Nodes::Node Node} corresponding to a given letter.
138
+ # @param [Symbol] letter the letter to search for in the root node.
139
+ # @return [Nodes::Node] the node corresponding to that letter.
140
+ # @see Nodes::Node#[]
141
+ def [] letter
142
+ root[letter]
143
+ end
144
+
145
+ # Root node's child nodes.
146
+ # @return [Array<Nodes::Node>] the array of children nodes contained in
147
+ # the root node.
148
+ # @see Nodes::Node#children
149
+ def children
150
+ root.children
151
+ end
152
+
153
+ # Root node's children tree.
154
+ # @return [Array<Nodes::Node>] the array of children nodes contained in
155
+ # the root node.
156
+ # @see Nodes::Node#children_tree
157
+ def children_tree
158
+ root.children_tree
159
+ end
160
+
161
+ # Indicates if the root {Nodes::Node Node} can be
162
+ # compressed or not.
163
+ # @return [Boolean] `true` for non-{Nodes::Node#terminal? terminal}
164
+ # nodes with one child, `false` otherwise.
165
+ def compressed?
166
+ root.compressed?
167
+ end
168
+
169
+ # Array of words contained in the root {Nodes::Node Node}.
170
+ # @return [Array<String>] all words contained in this trie.
171
+ # @see https://ruby-doc.org/core-2.5.0/Enumerable.html#method-i-to_a
172
+ # Enumerable#to_a
173
+ def to_a
174
+ root.to_a
175
+ end
176
+
177
+ # Check if a letter is part of the root {Nodes::Node}'s children tree.
178
+ # @param [Symbol] letter the letter to search for in the root node.
179
+ # @return [Boolean] whether the letter is contained or not.
180
+ # @see Nodes::Node#key?
181
+ def key? letter
182
+ root.key? letter
183
+ end
184
+
185
+ # Size of the Root {Nodes::Node Node}'s children tree.
186
+ # @return [Integer] the number of letters in the root node.
187
+ def size
188
+ root.size
189
+ end
190
+
116
191
  alias_method :include?, :word?
117
192
  alias_method :match?, :partial_word?
118
193
  alias_method :words, :scan
119
194
  alias_method :<<, :add
195
+ alias_method :has_key?, :key?
196
+ alias_method :has_letter?, :key?
120
197
 
121
198
  private
122
199
 
@@ -134,6 +211,16 @@ module Rambling
134
211
  end
135
212
  end
136
213
  end
214
+
215
+ def compress_root
216
+ compressor.compress root
217
+ end
218
+
219
+ def char_symbols word
220
+ symbols = []
221
+ word.reverse.each_char { |c| symbols << c.to_sym }
222
+ symbols
223
+ end
137
224
  end
138
225
  end
139
226
  end
@@ -1,12 +1,14 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Rambling
2
4
  module Trie
3
5
  # Provides enumerable behavior to the trie data structure.
4
6
  module Enumerable
5
7
  include ::Enumerable
6
8
 
7
- # Returns number of words contained in the trie. See
8
- # {https://ruby-doc.org/core-2.4.0/Enumerable.html#method-i-count
9
- # Enumerable}
9
+ # Returns number of words contained in the trie
10
+ # @see https://ruby-doc.org/core-2.5.0/Enumerable.html#method-i-count
11
+ # Enumerable#count
10
12
  alias_method :size, :count
11
13
 
12
14
  # Iterates over the words contained in the trie.
@@ -16,7 +18,7 @@ module Rambling
16
18
 
17
19
  yield as_word if terminal?
18
20
 
19
- children.each do |child|
21
+ children_tree.each_value do |child|
20
22
  child.each do |word|
21
23
  yield word
22
24
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Rambling
2
4
  module Trie
3
5
  # Provides pretty printing behavior for the trie data structure.
@@ -1,8 +1,10 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Rambling
2
4
  module Trie
3
5
  # Raised when trying to execute an invalid operation on a trie data
4
6
  # structure.
5
- class InvalidOperation < Exception
7
+ class InvalidOperation < RuntimeError
6
8
  # Creates a new {InvalidOperation InvalidOperation} exception.
7
9
  # @param [String, nil] message the exception message.
8
10
  def initialize message = nil
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ %w(node missing compressed raw).each do |file|
4
+ require File.join('rambling', 'trie', 'nodes', file)
5
+ end
6
+
7
+ module Rambling
8
+ module Trie
9
+ # Namespace for all nodes.
10
+ module Nodes
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rambling
4
+ module Trie
5
+ module Nodes
6
+ # A representation of a node in an compressed trie data structure.
7
+ class Compressed < Rambling::Trie::Nodes::Node
8
+ # Always raises {Rambling::Trie::InvalidOperation InvalidOperation} when
9
+ # trying to add a word to the current compressed trie node
10
+ # @param [String] _ the word to add to the trie.
11
+ # @raise [InvalidOperation] if the trie is already compressed.
12
+ # @return [nil] this never returns as it always raises an exception.
13
+ def add _
14
+ raise Rambling::Trie::InvalidOperation,
15
+ 'Cannot add word to compressed trie'
16
+ end
17
+
18
+ # Always return `true` for a compressed node.
19
+ # @return [Boolean] always `true` for a compressed node.
20
+ def compressed?
21
+ true
22
+ end
23
+
24
+ private
25
+
26
+ def partial_word_chars? chars
27
+ child = children_tree[chars.first.to_sym]
28
+ return false unless child
29
+
30
+ child_letter = child.letter.to_s
31
+
32
+ if chars.size >= child_letter.size
33
+ letter = chars.slice!(0, child_letter.size).join
34
+ return child.partial_word? chars if child_letter == letter
35
+ end
36
+
37
+ letter = chars.join
38
+ child_letter = child_letter.slice 0, letter.size
39
+ child_letter == letter
40
+ end
41
+
42
+ def word_chars? chars
43
+ letter = chars.slice! 0
44
+ letter_sym = letter.to_sym
45
+
46
+ child = children_tree[letter_sym]
47
+ return false unless child
48
+
49
+ loop do
50
+ return child.word? chars if letter_sym == child.letter
51
+
52
+ break if chars.empty?
53
+
54
+ letter << chars.slice!(0)
55
+ letter_sym = letter.to_sym
56
+ end
57
+
58
+ false
59
+ end
60
+
61
+ def closest_node chars
62
+ child = children_tree[chars.first.to_sym]
63
+ return missing unless child
64
+
65
+ child_letter = child.letter.to_s
66
+
67
+ if chars.size >= child_letter.size
68
+ letter = chars.slice!(0, child_letter.size).join
69
+ return child.scan chars if child_letter == letter
70
+ end
71
+
72
+ letter = chars.join
73
+ child_letter = child_letter.slice 0, letter.size
74
+
75
+ child_letter == letter ? child : missing
76
+ end
77
+
78
+ def children_match_prefix chars
79
+ return enum_for :children_match_prefix, chars unless block_given?
80
+
81
+ return if chars.empty?
82
+
83
+ child = children_tree[chars.first.to_sym]
84
+ return unless child
85
+
86
+ child_letter = child.letter.to_s
87
+ letter = chars.slice!(0, child_letter.size).join
88
+
89
+ return unless child_letter == letter
90
+
91
+ child.match_prefix chars do |word|
92
+ yield word
93
+ end
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end