rambling-trie 1.0.2 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +5 -5
  2. data/Gemfile +6 -3
  3. data/Guardfile +3 -1
  4. data/README.md +30 -12
  5. data/Rakefile +8 -0
  6. data/lib/rambling-trie.rb +2 -0
  7. data/lib/rambling/trie.rb +48 -26
  8. data/lib/rambling/trie/comparable.rb +6 -3
  9. data/lib/rambling/trie/compressible.rb +16 -0
  10. data/lib/rambling/trie/compressor.rb +39 -24
  11. data/lib/rambling/trie/configuration.rb +3 -1
  12. data/lib/rambling/trie/configuration/properties.rb +18 -9
  13. data/lib/rambling/trie/configuration/provider_collection.rb +38 -17
  14. data/lib/rambling/trie/container.rb +123 -36
  15. data/lib/rambling/trie/enumerable.rb +6 -4
  16. data/lib/rambling/trie/inspectable.rb +2 -0
  17. data/lib/rambling/trie/invalid_operation.rb +3 -1
  18. data/lib/rambling/trie/nodes.rb +13 -0
  19. data/lib/rambling/trie/nodes/compressed.rb +98 -0
  20. data/lib/rambling/trie/nodes/missing.rb +12 -0
  21. data/lib/rambling/trie/nodes/node.rb +183 -0
  22. data/lib/rambling/trie/nodes/raw.rb +82 -0
  23. data/lib/rambling/trie/readers.rb +3 -1
  24. data/lib/rambling/trie/readers/plain_text.rb +3 -11
  25. data/lib/rambling/trie/serializers.rb +3 -1
  26. data/lib/rambling/trie/serializers/file.rb +2 -0
  27. data/lib/rambling/trie/serializers/marshal.rb +15 -5
  28. data/lib/rambling/trie/serializers/yaml.rb +21 -5
  29. data/lib/rambling/trie/serializers/zip.rb +15 -8
  30. data/lib/rambling/trie/stringifyable.rb +8 -2
  31. data/lib/rambling/trie/version.rb +3 -1
  32. data/rambling-trie.gemspec +21 -10
  33. data/spec/assets/test_words.es_DO.txt +1 -0
  34. data/spec/integration/rambling/trie_spec.rb +44 -35
  35. data/spec/lib/rambling/trie/comparable_spec.rb +8 -15
  36. data/spec/lib/rambling/trie/compressor_spec.rb +90 -13
  37. data/spec/lib/rambling/trie/configuration/properties_spec.rb +21 -13
  38. data/spec/lib/rambling/trie/configuration/provider_collection_spec.rb +18 -34
  39. data/spec/lib/rambling/trie/container_spec.rb +183 -217
  40. data/spec/lib/rambling/trie/enumerable_spec.rb +14 -9
  41. data/spec/lib/rambling/trie/inspectable_spec.rb +36 -11
  42. data/spec/lib/rambling/trie/nodes/compressed_spec.rb +37 -0
  43. data/spec/lib/rambling/trie/nodes/node_spec.rb +9 -0
  44. data/spec/lib/rambling/trie/nodes/raw_spec.rb +179 -0
  45. data/spec/lib/rambling/trie/readers/plain_text_spec.rb +3 -1
  46. data/spec/lib/rambling/trie/serializers/file_spec.rb +6 -4
  47. data/spec/lib/rambling/trie/serializers/marshal_spec.rb +5 -7
  48. data/spec/lib/rambling/trie/serializers/yaml_spec.rb +5 -7
  49. data/spec/lib/rambling/trie/serializers/zip_spec.rb +18 -20
  50. data/spec/lib/rambling/trie/stringifyable_spec.rb +14 -11
  51. data/spec/lib/rambling/trie_spec.rb +18 -11
  52. data/spec/spec_helper.rb +10 -5
  53. data/spec/support/config.rb +10 -0
  54. data/spec/support/helpers/add_word.rb +20 -0
  55. data/spec/support/helpers/one_line_heredoc.rb +11 -0
  56. data/spec/support/shared_examples/a_compressible_trie.rb +40 -0
  57. data/spec/support/shared_examples/a_serializable_trie.rb +10 -6
  58. data/spec/support/shared_examples/a_serializer.rb +9 -1
  59. data/spec/support/shared_examples/a_trie_data_structure.rb +2 -0
  60. data/spec/support/shared_examples/a_trie_node.rb +127 -0
  61. data/spec/{lib/rambling/trie/compressed_node_spec.rb → support/shared_examples/a_trie_node_implementation.rb} +25 -72
  62. metadata +42 -31
  63. data/lib/rambling/trie/compressable.rb +0 -14
  64. data/lib/rambling/trie/compressed_node.rb +0 -120
  65. data/lib/rambling/trie/missing_node.rb +0 -8
  66. data/lib/rambling/trie/node.rb +0 -97
  67. data/lib/rambling/trie/raw_node.rb +0 -96
  68. data/spec/lib/rambling/trie/node_spec.rb +0 -86
  69. data/spec/lib/rambling/trie/raw_node_spec.rb +0 -389
  70. data/spec/support/shared_examples/a_compressable_trie.rb +0 -26
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rambling
4
+ module Trie
5
+ module Nodes
6
+ # A representation of a missing node in the trie data structure. Returned
7
+ # when a node is not found.
8
+ class Missing < Rambling::Trie::Nodes::Node
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,183 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rambling
4
+ module Trie
5
+ module Nodes
6
+ # A representation of a node in the trie data structure.
7
+ class Node
8
+ include Rambling::Trie::Compressible
9
+ include Rambling::Trie::Enumerable
10
+ include Rambling::Trie::Comparable
11
+ include Rambling::Trie::Stringifyable
12
+ include Rambling::Trie::Inspectable
13
+
14
+ # @overload letter
15
+ # Letter(s) corresponding to the current node.
16
+ # @overload letter=(letter)
17
+ # Sets the letter(s) corresponding to the current node. Ensures the
18
+ # {Node#letter #letter} in the {Node#parent #parent}'s
19
+ # {Node#children_tree #children_tree} is updated.
20
+ # @param [String, Symbol, nil] letter the letter value.
21
+ # @return [Symbol, nil] the corresponding letter(s).
22
+ attr_reader :letter
23
+
24
+ # Child nodes tree.
25
+ # @return [Hash] the children_tree hash, consisting of `:letter =>
26
+ # node`.
27
+ attr_accessor :children_tree
28
+
29
+ # Parent node.
30
+ # @return [Node, nil] the parent of the current node.
31
+ attr_accessor :parent
32
+
33
+ # Creates a new node.
34
+ # @param [Symbol, nil] letter the Node's letter value
35
+ # @param [Node, nil] parent the parent of the current node.
36
+ def initialize letter = nil, parent = nil, children_tree = {}
37
+ @letter = letter
38
+ @parent = parent
39
+ @children_tree = children_tree
40
+ end
41
+
42
+ # Child nodes.
43
+ # @return [Array<Node>] the array of children nodes contained
44
+ # in the current node.
45
+ def children
46
+ children_tree.values
47
+ end
48
+
49
+ # First child node.
50
+ # @return [Node, nil] the first child contained in the current node.
51
+ def first_child
52
+ return if children_tree.empty?
53
+
54
+ children_tree.each_value do |child|
55
+ return child
56
+ end
57
+ end
58
+
59
+ # Indicates if the current node is the root node.
60
+ # @return [Boolean] `true` if the node does not have a parent, `false`
61
+ # otherwise.
62
+ def root?
63
+ !parent
64
+ end
65
+
66
+ # Indicates if a {Node Node} is terminal or not.
67
+ # @return [Boolean] `true` for terminal nodes, `false` otherwise.
68
+ def terminal?
69
+ !!terminal
70
+ end
71
+
72
+ # Mark {Node Node} as terminal.
73
+ # @return [Node] the modified node.
74
+ def terminal!
75
+ self.terminal = true
76
+ self
77
+ end
78
+
79
+ def letter= letter
80
+ @letter = letter.to_sym if letter
81
+ end
82
+
83
+ # Checks if a path for a set of characters exists in the trie.
84
+ # @param [Array<String>] chars the characters to look for in the trie.
85
+ # @return [Boolean] `true` if the characters are found, `false`
86
+ # otherwise.
87
+ def partial_word? chars
88
+ return true if chars.empty?
89
+
90
+ partial_word_chars? chars
91
+ end
92
+
93
+ # Checks if a path for set of characters represents a word in the trie.
94
+ # @param [Array<String>] chars the characters to look for in the trie.
95
+ # @return [Boolean] `true` if the characters are found and form a word,
96
+ # `false` otherwise.
97
+ def word? chars = []
98
+ return terminal? if chars.empty?
99
+
100
+ word_chars? chars
101
+ end
102
+
103
+ # Returns the node that starts with the specified characters.
104
+ # @param [Array<String>] chars the characters to look for in the trie.
105
+ # @return [Node] the node that matches the specified characters.
106
+ # {Missing Missing} when not found.
107
+ def scan chars
108
+ return self if chars.empty?
109
+
110
+ closest_node chars
111
+ end
112
+
113
+ # Returns all words that match a prefix of any length within chars.
114
+ # @param [String] chars the chars to base the prefix on.
115
+ # @return [Enumerator<String>] all the words that match a prefix given
116
+ # by chars.
117
+ # @yield [String] each word found.
118
+ def match_prefix chars
119
+ return enum_for :match_prefix, chars unless block_given?
120
+
121
+ yield as_word if terminal?
122
+
123
+ children_match_prefix chars do |word|
124
+ yield word
125
+ end
126
+ end
127
+
128
+ # Get {Node Node} corresponding to a given letter.
129
+ # @param [Symbol] letter the letter to search for in the node.
130
+ # @return [Node] the node corresponding to that letter.
131
+ # @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-5B-5D
132
+ # Hash#[]
133
+ def [] letter
134
+ children_tree[letter]
135
+ end
136
+
137
+ # Set the {Node Node} that corresponds to a given letter.
138
+ # @param [Symbol] letter the letter to insert or update in the node's
139
+ # @param [Node] node the {Node Node} to assign to that letter.
140
+ # @return [Node] the node corresponding to the inserted or
141
+ # updated letter.
142
+ # @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-5B-5D
143
+ # Hash#[]
144
+ def []= letter, node
145
+ children_tree[letter] = node
146
+ end
147
+
148
+ # Check if a {Node Node}'s children tree contains a given
149
+ # letter.
150
+ # @param [Symbol] letter the letter to search for in the node.
151
+ # @return [Boolean] `true` if the letter is present, `false` otherwise
152
+ # @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-has_key-3F
153
+ # Hash#key?
154
+ def key? letter
155
+ children_tree.key? letter
156
+ end
157
+
158
+ # Delete a given letter and its corresponding {Node Node} from
159
+ # this {Node Node}'s children tree.
160
+ # @param [Symbol] letter the letter to delete from the node's children
161
+ # tree.
162
+ # @return [Node] the node corresponding to the deleted letter.
163
+ # @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-delete
164
+ # Hash#delete
165
+ def delete letter
166
+ children_tree.delete letter
167
+ end
168
+
169
+ alias_method :has_key?, :key?
170
+
171
+ protected
172
+
173
+ def missing
174
+ Rambling::Trie::Nodes::Missing.new
175
+ end
176
+
177
+ private
178
+
179
+ attr_accessor :terminal
180
+ end
181
+ end
182
+ end
183
+ end
@@ -0,0 +1,82 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rambling
4
+ module Trie
5
+ module Nodes
6
+ # A representation of a node in an uncompressed trie data structure.
7
+ class Raw < Rambling::Trie::Nodes::Node
8
+ # Adds a word to the current raw (uncompressed) trie node.
9
+ # @param [Array<Symbol>] chars the char array to add to the trie.
10
+ # @return [Raw] the added/modified node based on the word added.
11
+ # @note This method clears the contents of the chars variable.
12
+ def add chars
13
+ if chars.empty?
14
+ terminal!
15
+ else
16
+ add_to_children_tree chars
17
+ end
18
+ end
19
+
20
+ # Always return `false` for a raw (uncompressed) node.
21
+ # @return [Boolean] always `false` for a raw (uncompressed) node.
22
+ def compressed?
23
+ false
24
+ end
25
+
26
+ private
27
+
28
+ def add_to_children_tree chars
29
+ letter = chars.pop
30
+ child = children_tree[letter] || new_node(letter)
31
+ child.add chars
32
+ child
33
+ end
34
+
35
+ def new_node letter
36
+ node = Rambling::Trie::Nodes::Raw.new letter, self
37
+ children_tree[letter] = node
38
+ node
39
+ end
40
+
41
+ def partial_word_chars? chars = []
42
+ letter = chars.shift.to_sym
43
+ child = children_tree[letter]
44
+ return false unless child
45
+
46
+ child.partial_word? chars
47
+ end
48
+
49
+ def word_chars? chars = []
50
+ letter = chars.shift.to_sym
51
+ child = children_tree[letter]
52
+ return false unless child
53
+
54
+ child.word? chars
55
+ end
56
+
57
+ def closest_node chars
58
+ letter = chars.shift.to_sym
59
+ child = children_tree[letter]
60
+ return missing unless child
61
+
62
+ child.scan chars
63
+ end
64
+
65
+ def children_match_prefix chars
66
+ return enum_for :children_match_prefix, chars unless block_given?
67
+
68
+ return if chars.empty?
69
+
70
+ letter = chars.shift.to_sym
71
+ child = children_tree[letter]
72
+
73
+ return unless child
74
+
75
+ child.match_prefix chars do |word|
76
+ yield word
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end
82
+ end
@@ -1,4 +1,6 @@
1
- %w{plain_text}.each do |file|
1
+ # frozen_string_literal: true
2
+
3
+ %w(plain_text).each do |file|
2
4
  require File.join('rambling', 'trie', 'readers', file)
3
5
  end
4
6
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Rambling
2
4
  module Trie
3
5
  module Readers
@@ -8,17 +10,7 @@ module Rambling
8
10
  # from.
9
11
  # @yield [String] Each line read from the file.
10
12
  def each_word filepath
11
- each_line(filepath) { |line| yield line.chomp! }
12
- end
13
-
14
- private
15
-
16
- def each_line filepath
17
- open(filepath) { |file| file.each_line { |line| yield line } }
18
- end
19
-
20
- def open filepath
21
- File.open(filepath) { |file| yield file }
13
+ File.foreach(filepath) { |line| yield line.chomp! }
22
14
  end
23
15
  end
24
16
  end
@@ -1,4 +1,6 @@
1
- %w{file marshal yaml zip}.each do |file|
1
+ # frozen_string_literal: true
2
+
3
+ %w(file marshal yaml zip).each do |file|
2
4
  require File.join('rambling', 'trie', 'serializers', file)
3
5
  end
4
6
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Rambling
2
4
  module Trie
3
5
  module Serializers
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Rambling
2
4
  module Trie
3
5
  module Serializers
@@ -11,20 +13,28 @@ module Rambling
11
13
  end
12
14
 
13
15
  # Loads marshaled object from contents in filepath and deserializes it
14
- # into a {Node Node}.
16
+ # into a {Nodes::Node Node}.
15
17
  # @param [String] filepath the full path of the file to load the
16
18
  # marshaled object from.
17
- # @return [Node] The deserialized {Node Node}.
19
+ # @return [Nodes::Node] The deserialized {Nodes::Node Node}.
20
+ # @see https://ruby-doc.org/core-2.5.0/Marshal.html#method-c-load
21
+ # Marshal.load
22
+ # @note Use of
23
+ # {https://ruby-doc.org/core-2.5.0/Marshal.html#method-c-load
24
+ # Marshal.load} is generally discouraged. Only use this with trusted
25
+ # input.
18
26
  def load filepath
19
27
  ::Marshal.load serializer.load filepath
20
28
  end
21
29
 
22
- # Serializes a {Node Node} and dumps it as a marshaled object into
23
- # filepath.
24
- # @param [Node] node the node to serialize
30
+ # Serializes a {Nodes::Node Node} and dumps it as a marshaled object
31
+ # into filepath.
32
+ # @param [Nodes::Node] node the node to serialize
25
33
  # @param [String] filepath the full path of the file to dump the
26
34
  # marshaled object into.
27
35
  # @return [Numeric] number of bytes written to disk.
36
+ # @see https://ruby-doc.org/core-2.5.0/Marshal.html#method-c-dump
37
+ # Marshal.dump
28
38
  def dump node, filepath
29
39
  serializer.dump ::Marshal.dump(node), filepath
30
40
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Rambling
2
4
  module Trie
3
5
  module Serializers
@@ -11,20 +13,34 @@ module Rambling
11
13
  end
12
14
 
13
15
  # Loads serialized object from YAML file in filepath and deserializes
14
- # it into a {Node Node}.
16
+ # it into a {Nodes::Node Node}.
15
17
  # @param [String] filepath the full path of the file to load the
16
18
  # serialized YAML object from.
17
- # @return [Node] The deserialized {Node Node}.
19
+ # @return [Nodes::Node] The deserialized {Nodes::Node Node}.
20
+ # @see https://ruby-doc.org/stdlib-2.5.0/libdoc/psych/rdoc/Psych.html#method-c-safe_load
21
+ # Psych.safe_load
18
22
  def load filepath
19
23
  require 'yaml'
20
- ::YAML.load serializer.load filepath
24
+ ::YAML.safe_load(
25
+ serializer.load(filepath),
26
+ [
27
+ Symbol,
28
+ Rambling::Trie::Nodes::Raw,
29
+ Rambling::Trie::Nodes::Compressed,
30
+ ],
31
+ [],
32
+ true,
33
+ )
21
34
  end
22
35
 
23
- # Serializes a {Node Node} and dumps it as a YAML object into filepath.
24
- # @param [Node] node the node to serialize
36
+ # Serializes a {Nodes::Node Node} and dumps it as a YAML object into
37
+ # filepath.
38
+ # @param [Nodes::Node] node the node to serialize
25
39
  # @param [String] filepath the full path of the file to dump the YAML
26
40
  # object into.
27
41
  # @return [Numeric] number of bytes written to disk.
42
+ # @see https://ruby-doc.org/stdlib-2.5.0/libdoc/psych/rdoc/Psych.html#method-c-dump
43
+ # Psych.dump
28
44
  def dump node, filepath
29
45
  require 'yaml'
30
46
  serializer.dump ::YAML.dump(node), filepath
@@ -1,14 +1,14 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Rambling
2
4
  module Trie
3
5
  module Serializers
4
6
  # Zip file serializer. Dumps/loads contents from zip files. Automatically
5
7
  # detects if zip file contains `.marshal` or `.yml` file
6
8
  class Zip
7
- extend ::Forwardable
8
-
9
9
  # Creates a new Zip serializer.
10
- # @param [Properties] properties the configuration properties set up so
11
- # far.
10
+ # @param [Configuration::Properties] properties the configuration
11
+ # properties set up so far.
12
12
  def initialize properties
13
13
  @properties = properties
14
14
  end
@@ -17,6 +17,8 @@ module Rambling
17
17
  # unzipped files.
18
18
  # @param [String] filepath the filepath to load contents from.
19
19
  # @return [String] all contents of the unzipped loaded file.
20
+ # @see https://github.com/rubyzip/rubyzip#reading-a-zip-file Zip
21
+ # reading a file
20
22
  def load filepath
21
23
  require 'zip'
22
24
 
@@ -34,6 +36,8 @@ module Rambling
34
36
  # @param [String] contents the contents to dump.
35
37
  # @param [String] filepath the filepath to dump the contents to.
36
38
  # @return [Numeric] number of bytes written to disk.
39
+ # @see https://github.com/rubyzip/rubyzip#basic-zip-archive-creation
40
+ # Zip archive creation
37
41
  def dump contents, filepath
38
42
  require 'zip'
39
43
 
@@ -52,10 +56,13 @@ module Rambling
52
56
 
53
57
  attr_reader :properties
54
58
 
55
- delegate [
56
- :serializers,
57
- :tmp_path
58
- ] => :properties
59
+ def serializers
60
+ properties.serializers
61
+ end
62
+
63
+ def tmp_path
64
+ properties.tmp_path
65
+ end
59
66
 
60
67
  def path filename
61
68
  require 'securerandom'