rambling-trie 1.0.2 → 2.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. checksums.yaml +5 -5
  2. data/Gemfile +6 -3
  3. data/Guardfile +3 -1
  4. data/README.md +30 -12
  5. data/Rakefile +8 -0
  6. data/lib/rambling-trie.rb +2 -0
  7. data/lib/rambling/trie.rb +48 -26
  8. data/lib/rambling/trie/comparable.rb +6 -3
  9. data/lib/rambling/trie/compressible.rb +16 -0
  10. data/lib/rambling/trie/compressor.rb +39 -24
  11. data/lib/rambling/trie/configuration.rb +3 -1
  12. data/lib/rambling/trie/configuration/properties.rb +18 -9
  13. data/lib/rambling/trie/configuration/provider_collection.rb +38 -17
  14. data/lib/rambling/trie/container.rb +123 -36
  15. data/lib/rambling/trie/enumerable.rb +6 -4
  16. data/lib/rambling/trie/inspectable.rb +2 -0
  17. data/lib/rambling/trie/invalid_operation.rb +3 -1
  18. data/lib/rambling/trie/nodes.rb +13 -0
  19. data/lib/rambling/trie/nodes/compressed.rb +98 -0
  20. data/lib/rambling/trie/nodes/missing.rb +12 -0
  21. data/lib/rambling/trie/nodes/node.rb +183 -0
  22. data/lib/rambling/trie/nodes/raw.rb +82 -0
  23. data/lib/rambling/trie/readers.rb +3 -1
  24. data/lib/rambling/trie/readers/plain_text.rb +3 -11
  25. data/lib/rambling/trie/serializers.rb +3 -1
  26. data/lib/rambling/trie/serializers/file.rb +2 -0
  27. data/lib/rambling/trie/serializers/marshal.rb +15 -5
  28. data/lib/rambling/trie/serializers/yaml.rb +21 -5
  29. data/lib/rambling/trie/serializers/zip.rb +15 -8
  30. data/lib/rambling/trie/stringifyable.rb +8 -2
  31. data/lib/rambling/trie/version.rb +3 -1
  32. data/rambling-trie.gemspec +21 -10
  33. data/spec/assets/test_words.es_DO.txt +1 -0
  34. data/spec/integration/rambling/trie_spec.rb +44 -35
  35. data/spec/lib/rambling/trie/comparable_spec.rb +8 -15
  36. data/spec/lib/rambling/trie/compressor_spec.rb +90 -13
  37. data/spec/lib/rambling/trie/configuration/properties_spec.rb +21 -13
  38. data/spec/lib/rambling/trie/configuration/provider_collection_spec.rb +18 -34
  39. data/spec/lib/rambling/trie/container_spec.rb +183 -217
  40. data/spec/lib/rambling/trie/enumerable_spec.rb +14 -9
  41. data/spec/lib/rambling/trie/inspectable_spec.rb +36 -11
  42. data/spec/lib/rambling/trie/nodes/compressed_spec.rb +37 -0
  43. data/spec/lib/rambling/trie/nodes/node_spec.rb +9 -0
  44. data/spec/lib/rambling/trie/nodes/raw_spec.rb +179 -0
  45. data/spec/lib/rambling/trie/readers/plain_text_spec.rb +3 -1
  46. data/spec/lib/rambling/trie/serializers/file_spec.rb +6 -4
  47. data/spec/lib/rambling/trie/serializers/marshal_spec.rb +5 -7
  48. data/spec/lib/rambling/trie/serializers/yaml_spec.rb +5 -7
  49. data/spec/lib/rambling/trie/serializers/zip_spec.rb +18 -20
  50. data/spec/lib/rambling/trie/stringifyable_spec.rb +14 -11
  51. data/spec/lib/rambling/trie_spec.rb +18 -11
  52. data/spec/spec_helper.rb +10 -5
  53. data/spec/support/config.rb +10 -0
  54. data/spec/support/helpers/add_word.rb +20 -0
  55. data/spec/support/helpers/one_line_heredoc.rb +11 -0
  56. data/spec/support/shared_examples/a_compressible_trie.rb +40 -0
  57. data/spec/support/shared_examples/a_serializable_trie.rb +10 -6
  58. data/spec/support/shared_examples/a_serializer.rb +9 -1
  59. data/spec/support/shared_examples/a_trie_data_structure.rb +2 -0
  60. data/spec/support/shared_examples/a_trie_node.rb +127 -0
  61. data/spec/{lib/rambling/trie/compressed_node_spec.rb → support/shared_examples/a_trie_node_implementation.rb} +25 -72
  62. metadata +42 -31
  63. data/lib/rambling/trie/compressable.rb +0 -14
  64. data/lib/rambling/trie/compressed_node.rb +0 -120
  65. data/lib/rambling/trie/missing_node.rb +0 -8
  66. data/lib/rambling/trie/node.rb +0 -97
  67. data/lib/rambling/trie/raw_node.rb +0 -96
  68. data/spec/lib/rambling/trie/node_spec.rb +0 -86
  69. data/spec/lib/rambling/trie/raw_node_spec.rb +0 -389
  70. data/spec/support/shared_examples/a_compressable_trie.rb +0 -26
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rambling
4
+ module Trie
5
+ module Nodes
6
+ # A representation of a missing node in the trie data structure. Returned
7
+ # when a node is not found.
8
+ class Missing < Rambling::Trie::Nodes::Node
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,183 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rambling
4
+ module Trie
5
+ module Nodes
6
+ # A representation of a node in the trie data structure.
7
+ class Node
8
+ include Rambling::Trie::Compressible
9
+ include Rambling::Trie::Enumerable
10
+ include Rambling::Trie::Comparable
11
+ include Rambling::Trie::Stringifyable
12
+ include Rambling::Trie::Inspectable
13
+
14
+ # @overload letter
15
+ # Letter(s) corresponding to the current node.
16
+ # @overload letter=(letter)
17
+ # Sets the letter(s) corresponding to the current node. Ensures the
18
+ # {Node#letter #letter} in the {Node#parent #parent}'s
19
+ # {Node#children_tree #children_tree} is updated.
20
+ # @param [String, Symbol, nil] letter the letter value.
21
+ # @return [Symbol, nil] the corresponding letter(s).
22
+ attr_reader :letter
23
+
24
+ # Child nodes tree.
25
+ # @return [Hash] the children_tree hash, consisting of `:letter =>
26
+ # node`.
27
+ attr_accessor :children_tree
28
+
29
+ # Parent node.
30
+ # @return [Node, nil] the parent of the current node.
31
+ attr_accessor :parent
32
+
33
+ # Creates a new node.
34
+ # @param [Symbol, nil] letter the Node's letter value
35
+ # @param [Node, nil] parent the parent of the current node.
36
+ def initialize letter = nil, parent = nil, children_tree = {}
37
+ @letter = letter
38
+ @parent = parent
39
+ @children_tree = children_tree
40
+ end
41
+
42
+ # Child nodes.
43
+ # @return [Array<Node>] the array of children nodes contained
44
+ # in the current node.
45
+ def children
46
+ children_tree.values
47
+ end
48
+
49
+ # First child node.
50
+ # @return [Node, nil] the first child contained in the current node.
51
+ def first_child
52
+ return if children_tree.empty?
53
+
54
+ children_tree.each_value do |child|
55
+ return child
56
+ end
57
+ end
58
+
59
+ # Indicates if the current node is the root node.
60
+ # @return [Boolean] `true` if the node does not have a parent, `false`
61
+ # otherwise.
62
+ def root?
63
+ !parent
64
+ end
65
+
66
+ # Indicates if a {Node Node} is terminal or not.
67
+ # @return [Boolean] `true` for terminal nodes, `false` otherwise.
68
+ def terminal?
69
+ !!terminal
70
+ end
71
+
72
+ # Mark {Node Node} as terminal.
73
+ # @return [Node] the modified node.
74
+ def terminal!
75
+ self.terminal = true
76
+ self
77
+ end
78
+
79
+ def letter= letter
80
+ @letter = letter.to_sym if letter
81
+ end
82
+
83
+ # Checks if a path for a set of characters exists in the trie.
84
+ # @param [Array<String>] chars the characters to look for in the trie.
85
+ # @return [Boolean] `true` if the characters are found, `false`
86
+ # otherwise.
87
+ def partial_word? chars
88
+ return true if chars.empty?
89
+
90
+ partial_word_chars? chars
91
+ end
92
+
93
+ # Checks if a path for set of characters represents a word in the trie.
94
+ # @param [Array<String>] chars the characters to look for in the trie.
95
+ # @return [Boolean] `true` if the characters are found and form a word,
96
+ # `false` otherwise.
97
+ def word? chars = []
98
+ return terminal? if chars.empty?
99
+
100
+ word_chars? chars
101
+ end
102
+
103
+ # Returns the node that starts with the specified characters.
104
+ # @param [Array<String>] chars the characters to look for in the trie.
105
+ # @return [Node] the node that matches the specified characters.
106
+ # {Missing Missing} when not found.
107
+ def scan chars
108
+ return self if chars.empty?
109
+
110
+ closest_node chars
111
+ end
112
+
113
+ # Returns all words that match a prefix of any length within chars.
114
+ # @param [String] chars the chars to base the prefix on.
115
+ # @return [Enumerator<String>] all the words that match a prefix given
116
+ # by chars.
117
+ # @yield [String] each word found.
118
+ def match_prefix chars
119
+ return enum_for :match_prefix, chars unless block_given?
120
+
121
+ yield as_word if terminal?
122
+
123
+ children_match_prefix chars do |word|
124
+ yield word
125
+ end
126
+ end
127
+
128
+ # Get {Node Node} corresponding to a given letter.
129
+ # @param [Symbol] letter the letter to search for in the node.
130
+ # @return [Node] the node corresponding to that letter.
131
+ # @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-5B-5D
132
+ # Hash#[]
133
+ def [] letter
134
+ children_tree[letter]
135
+ end
136
+
137
+ # Set the {Node Node} that corresponds to a given letter.
138
+ # @param [Symbol] letter the letter to insert or update in the node's
139
+ # @param [Node] node the {Node Node} to assign to that letter.
140
+ # @return [Node] the node corresponding to the inserted or
141
+ # updated letter.
142
+ # @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-5B-5D
143
+ # Hash#[]
144
+ def []= letter, node
145
+ children_tree[letter] = node
146
+ end
147
+
148
+ # Check if a {Node Node}'s children tree contains a given
149
+ # letter.
150
+ # @param [Symbol] letter the letter to search for in the node.
151
+ # @return [Boolean] `true` if the letter is present, `false` otherwise
152
+ # @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-has_key-3F
153
+ # Hash#key?
154
+ def key? letter
155
+ children_tree.key? letter
156
+ end
157
+
158
+ # Delete a given letter and its corresponding {Node Node} from
159
+ # this {Node Node}'s children tree.
160
+ # @param [Symbol] letter the letter to delete from the node's children
161
+ # tree.
162
+ # @return [Node] the node corresponding to the deleted letter.
163
+ # @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-delete
164
+ # Hash#delete
165
+ def delete letter
166
+ children_tree.delete letter
167
+ end
168
+
169
+ alias_method :has_key?, :key?
170
+
171
+ protected
172
+
173
+ def missing
174
+ Rambling::Trie::Nodes::Missing.new
175
+ end
176
+
177
+ private
178
+
179
+ attr_accessor :terminal
180
+ end
181
+ end
182
+ end
183
+ end
@@ -0,0 +1,82 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rambling
4
+ module Trie
5
+ module Nodes
6
+ # A representation of a node in an uncompressed trie data structure.
7
+ class Raw < Rambling::Trie::Nodes::Node
8
+ # Adds a word to the current raw (uncompressed) trie node.
9
+ # @param [Array<Symbol>] chars the char array to add to the trie.
10
+ # @return [Raw] the added/modified node based on the word added.
11
+ # @note This method clears the contents of the chars variable.
12
+ def add chars
13
+ if chars.empty?
14
+ terminal!
15
+ else
16
+ add_to_children_tree chars
17
+ end
18
+ end
19
+
20
+ # Always return `false` for a raw (uncompressed) node.
21
+ # @return [Boolean] always `false` for a raw (uncompressed) node.
22
+ def compressed?
23
+ false
24
+ end
25
+
26
+ private
27
+
28
+ def add_to_children_tree chars
29
+ letter = chars.pop
30
+ child = children_tree[letter] || new_node(letter)
31
+ child.add chars
32
+ child
33
+ end
34
+
35
+ def new_node letter
36
+ node = Rambling::Trie::Nodes::Raw.new letter, self
37
+ children_tree[letter] = node
38
+ node
39
+ end
40
+
41
+ def partial_word_chars? chars = []
42
+ letter = chars.shift.to_sym
43
+ child = children_tree[letter]
44
+ return false unless child
45
+
46
+ child.partial_word? chars
47
+ end
48
+
49
+ def word_chars? chars = []
50
+ letter = chars.shift.to_sym
51
+ child = children_tree[letter]
52
+ return false unless child
53
+
54
+ child.word? chars
55
+ end
56
+
57
+ def closest_node chars
58
+ letter = chars.shift.to_sym
59
+ child = children_tree[letter]
60
+ return missing unless child
61
+
62
+ child.scan chars
63
+ end
64
+
65
+ def children_match_prefix chars
66
+ return enum_for :children_match_prefix, chars unless block_given?
67
+
68
+ return if chars.empty?
69
+
70
+ letter = chars.shift.to_sym
71
+ child = children_tree[letter]
72
+
73
+ return unless child
74
+
75
+ child.match_prefix chars do |word|
76
+ yield word
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end
82
+ end
@@ -1,4 +1,6 @@
1
- %w{plain_text}.each do |file|
1
+ # frozen_string_literal: true
2
+
3
+ %w(plain_text).each do |file|
2
4
  require File.join('rambling', 'trie', 'readers', file)
3
5
  end
4
6
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Rambling
2
4
  module Trie
3
5
  module Readers
@@ -8,17 +10,7 @@ module Rambling
8
10
  # from.
9
11
  # @yield [String] Each line read from the file.
10
12
  def each_word filepath
11
- each_line(filepath) { |line| yield line.chomp! }
12
- end
13
-
14
- private
15
-
16
- def each_line filepath
17
- open(filepath) { |file| file.each_line { |line| yield line } }
18
- end
19
-
20
- def open filepath
21
- File.open(filepath) { |file| yield file }
13
+ File.foreach(filepath) { |line| yield line.chomp! }
22
14
  end
23
15
  end
24
16
  end
@@ -1,4 +1,6 @@
1
- %w{file marshal yaml zip}.each do |file|
1
+ # frozen_string_literal: true
2
+
3
+ %w(file marshal yaml zip).each do |file|
2
4
  require File.join('rambling', 'trie', 'serializers', file)
3
5
  end
4
6
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Rambling
2
4
  module Trie
3
5
  module Serializers
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Rambling
2
4
  module Trie
3
5
  module Serializers
@@ -11,20 +13,28 @@ module Rambling
11
13
  end
12
14
 
13
15
  # Loads marshaled object from contents in filepath and deserializes it
14
- # into a {Node Node}.
16
+ # into a {Nodes::Node Node}.
15
17
  # @param [String] filepath the full path of the file to load the
16
18
  # marshaled object from.
17
- # @return [Node] The deserialized {Node Node}.
19
+ # @return [Nodes::Node] The deserialized {Nodes::Node Node}.
20
+ # @see https://ruby-doc.org/core-2.5.0/Marshal.html#method-c-load
21
+ # Marshal.load
22
+ # @note Use of
23
+ # {https://ruby-doc.org/core-2.5.0/Marshal.html#method-c-load
24
+ # Marshal.load} is generally discouraged. Only use this with trusted
25
+ # input.
18
26
  def load filepath
19
27
  ::Marshal.load serializer.load filepath
20
28
  end
21
29
 
22
- # Serializes a {Node Node} and dumps it as a marshaled object into
23
- # filepath.
24
- # @param [Node] node the node to serialize
30
+ # Serializes a {Nodes::Node Node} and dumps it as a marshaled object
31
+ # into filepath.
32
+ # @param [Nodes::Node] node the node to serialize
25
33
  # @param [String] filepath the full path of the file to dump the
26
34
  # marshaled object into.
27
35
  # @return [Numeric] number of bytes written to disk.
36
+ # @see https://ruby-doc.org/core-2.5.0/Marshal.html#method-c-dump
37
+ # Marshal.dump
28
38
  def dump node, filepath
29
39
  serializer.dump ::Marshal.dump(node), filepath
30
40
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Rambling
2
4
  module Trie
3
5
  module Serializers
@@ -11,20 +13,34 @@ module Rambling
11
13
  end
12
14
 
13
15
  # Loads serialized object from YAML file in filepath and deserializes
14
- # it into a {Node Node}.
16
+ # it into a {Nodes::Node Node}.
15
17
  # @param [String] filepath the full path of the file to load the
16
18
  # serialized YAML object from.
17
- # @return [Node] The deserialized {Node Node}.
19
+ # @return [Nodes::Node] The deserialized {Nodes::Node Node}.
20
+ # @see https://ruby-doc.org/stdlib-2.5.0/libdoc/psych/rdoc/Psych.html#method-c-safe_load
21
+ # Psych.safe_load
18
22
  def load filepath
19
23
  require 'yaml'
20
- ::YAML.load serializer.load filepath
24
+ ::YAML.safe_load(
25
+ serializer.load(filepath),
26
+ [
27
+ Symbol,
28
+ Rambling::Trie::Nodes::Raw,
29
+ Rambling::Trie::Nodes::Compressed,
30
+ ],
31
+ [],
32
+ true,
33
+ )
21
34
  end
22
35
 
23
- # Serializes a {Node Node} and dumps it as a YAML object into filepath.
24
- # @param [Node] node the node to serialize
36
+ # Serializes a {Nodes::Node Node} and dumps it as a YAML object into
37
+ # filepath.
38
+ # @param [Nodes::Node] node the node to serialize
25
39
  # @param [String] filepath the full path of the file to dump the YAML
26
40
  # object into.
27
41
  # @return [Numeric] number of bytes written to disk.
42
+ # @see https://ruby-doc.org/stdlib-2.5.0/libdoc/psych/rdoc/Psych.html#method-c-dump
43
+ # Psych.dump
28
44
  def dump node, filepath
29
45
  require 'yaml'
30
46
  serializer.dump ::YAML.dump(node), filepath
@@ -1,14 +1,14 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Rambling
2
4
  module Trie
3
5
  module Serializers
4
6
  # Zip file serializer. Dumps/loads contents from zip files. Automatically
5
7
  # detects if zip file contains `.marshal` or `.yml` file
6
8
  class Zip
7
- extend ::Forwardable
8
-
9
9
  # Creates a new Zip serializer.
10
- # @param [Properties] properties the configuration properties set up so
11
- # far.
10
+ # @param [Configuration::Properties] properties the configuration
11
+ # properties set up so far.
12
12
  def initialize properties
13
13
  @properties = properties
14
14
  end
@@ -17,6 +17,8 @@ module Rambling
17
17
  # unzipped files.
18
18
  # @param [String] filepath the filepath to load contents from.
19
19
  # @return [String] all contents of the unzipped loaded file.
20
+ # @see https://github.com/rubyzip/rubyzip#reading-a-zip-file Zip
21
+ # reading a file
20
22
  def load filepath
21
23
  require 'zip'
22
24
 
@@ -34,6 +36,8 @@ module Rambling
34
36
  # @param [String] contents the contents to dump.
35
37
  # @param [String] filepath the filepath to dump the contents to.
36
38
  # @return [Numeric] number of bytes written to disk.
39
+ # @see https://github.com/rubyzip/rubyzip#basic-zip-archive-creation
40
+ # Zip archive creation
37
41
  def dump contents, filepath
38
42
  require 'zip'
39
43
 
@@ -52,10 +56,13 @@ module Rambling
52
56
 
53
57
  attr_reader :properties
54
58
 
55
- delegate [
56
- :serializers,
57
- :tmp_path
58
- ] => :properties
59
+ def serializers
60
+ properties.serializers
61
+ end
62
+
63
+ def tmp_path
64
+ properties.tmp_path
65
+ end
59
66
 
60
67
  def path filename
61
68
  require 'securerandom'