rambling-trie 1.0.2 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/Gemfile +6 -3
- data/Guardfile +3 -1
- data/README.md +30 -12
- data/Rakefile +8 -0
- data/lib/rambling-trie.rb +2 -0
- data/lib/rambling/trie.rb +48 -26
- data/lib/rambling/trie/comparable.rb +6 -3
- data/lib/rambling/trie/compressible.rb +16 -0
- data/lib/rambling/trie/compressor.rb +39 -24
- data/lib/rambling/trie/configuration.rb +3 -1
- data/lib/rambling/trie/configuration/properties.rb +18 -9
- data/lib/rambling/trie/configuration/provider_collection.rb +38 -17
- data/lib/rambling/trie/container.rb +123 -36
- data/lib/rambling/trie/enumerable.rb +6 -4
- data/lib/rambling/trie/inspectable.rb +2 -0
- data/lib/rambling/trie/invalid_operation.rb +3 -1
- data/lib/rambling/trie/nodes.rb +13 -0
- data/lib/rambling/trie/nodes/compressed.rb +98 -0
- data/lib/rambling/trie/nodes/missing.rb +12 -0
- data/lib/rambling/trie/nodes/node.rb +183 -0
- data/lib/rambling/trie/nodes/raw.rb +82 -0
- data/lib/rambling/trie/readers.rb +3 -1
- data/lib/rambling/trie/readers/plain_text.rb +3 -11
- data/lib/rambling/trie/serializers.rb +3 -1
- data/lib/rambling/trie/serializers/file.rb +2 -0
- data/lib/rambling/trie/serializers/marshal.rb +15 -5
- data/lib/rambling/trie/serializers/yaml.rb +21 -5
- data/lib/rambling/trie/serializers/zip.rb +15 -8
- data/lib/rambling/trie/stringifyable.rb +8 -2
- data/lib/rambling/trie/version.rb +3 -1
- data/rambling-trie.gemspec +21 -10
- data/spec/assets/test_words.es_DO.txt +1 -0
- data/spec/integration/rambling/trie_spec.rb +44 -35
- data/spec/lib/rambling/trie/comparable_spec.rb +8 -15
- data/spec/lib/rambling/trie/compressor_spec.rb +90 -13
- data/spec/lib/rambling/trie/configuration/properties_spec.rb +21 -13
- data/spec/lib/rambling/trie/configuration/provider_collection_spec.rb +18 -34
- data/spec/lib/rambling/trie/container_spec.rb +183 -217
- data/spec/lib/rambling/trie/enumerable_spec.rb +14 -9
- data/spec/lib/rambling/trie/inspectable_spec.rb +36 -11
- data/spec/lib/rambling/trie/nodes/compressed_spec.rb +37 -0
- data/spec/lib/rambling/trie/nodes/node_spec.rb +9 -0
- data/spec/lib/rambling/trie/nodes/raw_spec.rb +179 -0
- data/spec/lib/rambling/trie/readers/plain_text_spec.rb +3 -1
- data/spec/lib/rambling/trie/serializers/file_spec.rb +6 -4
- data/spec/lib/rambling/trie/serializers/marshal_spec.rb +5 -7
- data/spec/lib/rambling/trie/serializers/yaml_spec.rb +5 -7
- data/spec/lib/rambling/trie/serializers/zip_spec.rb +18 -20
- data/spec/lib/rambling/trie/stringifyable_spec.rb +14 -11
- data/spec/lib/rambling/trie_spec.rb +18 -11
- data/spec/spec_helper.rb +10 -5
- data/spec/support/config.rb +10 -0
- data/spec/support/helpers/add_word.rb +20 -0
- data/spec/support/helpers/one_line_heredoc.rb +11 -0
- data/spec/support/shared_examples/a_compressible_trie.rb +40 -0
- data/spec/support/shared_examples/a_serializable_trie.rb +10 -6
- data/spec/support/shared_examples/a_serializer.rb +9 -1
- data/spec/support/shared_examples/a_trie_data_structure.rb +2 -0
- data/spec/support/shared_examples/a_trie_node.rb +127 -0
- data/spec/{lib/rambling/trie/compressed_node_spec.rb → support/shared_examples/a_trie_node_implementation.rb} +25 -72
- metadata +42 -31
- data/lib/rambling/trie/compressable.rb +0 -14
- data/lib/rambling/trie/compressed_node.rb +0 -120
- data/lib/rambling/trie/missing_node.rb +0 -8
- data/lib/rambling/trie/node.rb +0 -97
- data/lib/rambling/trie/raw_node.rb +0 -96
- data/spec/lib/rambling/trie/node_spec.rb +0 -86
- data/spec/lib/rambling/trie/raw_node_spec.rb +0 -389
- data/spec/support/shared_examples/a_compressable_trie.rb +0 -26
@@ -0,0 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Rambling
|
4
|
+
module Trie
|
5
|
+
module Nodes
|
6
|
+
# A representation of a missing node in the trie data structure. Returned
|
7
|
+
# when a node is not found.
|
8
|
+
class Missing < Rambling::Trie::Nodes::Node
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,183 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Rambling
|
4
|
+
module Trie
|
5
|
+
module Nodes
|
6
|
+
# A representation of a node in the trie data structure.
|
7
|
+
class Node
|
8
|
+
include Rambling::Trie::Compressible
|
9
|
+
include Rambling::Trie::Enumerable
|
10
|
+
include Rambling::Trie::Comparable
|
11
|
+
include Rambling::Trie::Stringifyable
|
12
|
+
include Rambling::Trie::Inspectable
|
13
|
+
|
14
|
+
# @overload letter
|
15
|
+
# Letter(s) corresponding to the current node.
|
16
|
+
# @overload letter=(letter)
|
17
|
+
# Sets the letter(s) corresponding to the current node. Ensures the
|
18
|
+
# {Node#letter #letter} in the {Node#parent #parent}'s
|
19
|
+
# {Node#children_tree #children_tree} is updated.
|
20
|
+
# @param [String, Symbol, nil] letter the letter value.
|
21
|
+
# @return [Symbol, nil] the corresponding letter(s).
|
22
|
+
attr_reader :letter
|
23
|
+
|
24
|
+
# Child nodes tree.
|
25
|
+
# @return [Hash] the children_tree hash, consisting of `:letter =>
|
26
|
+
# node`.
|
27
|
+
attr_accessor :children_tree
|
28
|
+
|
29
|
+
# Parent node.
|
30
|
+
# @return [Node, nil] the parent of the current node.
|
31
|
+
attr_accessor :parent
|
32
|
+
|
33
|
+
# Creates a new node.
|
34
|
+
# @param [Symbol, nil] letter the Node's letter value
|
35
|
+
# @param [Node, nil] parent the parent of the current node.
|
36
|
+
def initialize letter = nil, parent = nil, children_tree = {}
|
37
|
+
@letter = letter
|
38
|
+
@parent = parent
|
39
|
+
@children_tree = children_tree
|
40
|
+
end
|
41
|
+
|
42
|
+
# Child nodes.
|
43
|
+
# @return [Array<Node>] the array of children nodes contained
|
44
|
+
# in the current node.
|
45
|
+
def children
|
46
|
+
children_tree.values
|
47
|
+
end
|
48
|
+
|
49
|
+
# First child node.
|
50
|
+
# @return [Node, nil] the first child contained in the current node.
|
51
|
+
def first_child
|
52
|
+
return if children_tree.empty?
|
53
|
+
|
54
|
+
children_tree.each_value do |child|
|
55
|
+
return child
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# Indicates if the current node is the root node.
|
60
|
+
# @return [Boolean] `true` if the node does not have a parent, `false`
|
61
|
+
# otherwise.
|
62
|
+
def root?
|
63
|
+
!parent
|
64
|
+
end
|
65
|
+
|
66
|
+
# Indicates if a {Node Node} is terminal or not.
|
67
|
+
# @return [Boolean] `true` for terminal nodes, `false` otherwise.
|
68
|
+
def terminal?
|
69
|
+
!!terminal
|
70
|
+
end
|
71
|
+
|
72
|
+
# Mark {Node Node} as terminal.
|
73
|
+
# @return [Node] the modified node.
|
74
|
+
def terminal!
|
75
|
+
self.terminal = true
|
76
|
+
self
|
77
|
+
end
|
78
|
+
|
79
|
+
def letter= letter
|
80
|
+
@letter = letter.to_sym if letter
|
81
|
+
end
|
82
|
+
|
83
|
+
# Checks if a path for a set of characters exists in the trie.
|
84
|
+
# @param [Array<String>] chars the characters to look for in the trie.
|
85
|
+
# @return [Boolean] `true` if the characters are found, `false`
|
86
|
+
# otherwise.
|
87
|
+
def partial_word? chars
|
88
|
+
return true if chars.empty?
|
89
|
+
|
90
|
+
partial_word_chars? chars
|
91
|
+
end
|
92
|
+
|
93
|
+
# Checks if a path for set of characters represents a word in the trie.
|
94
|
+
# @param [Array<String>] chars the characters to look for in the trie.
|
95
|
+
# @return [Boolean] `true` if the characters are found and form a word,
|
96
|
+
# `false` otherwise.
|
97
|
+
def word? chars = []
|
98
|
+
return terminal? if chars.empty?
|
99
|
+
|
100
|
+
word_chars? chars
|
101
|
+
end
|
102
|
+
|
103
|
+
# Returns the node that starts with the specified characters.
|
104
|
+
# @param [Array<String>] chars the characters to look for in the trie.
|
105
|
+
# @return [Node] the node that matches the specified characters.
|
106
|
+
# {Missing Missing} when not found.
|
107
|
+
def scan chars
|
108
|
+
return self if chars.empty?
|
109
|
+
|
110
|
+
closest_node chars
|
111
|
+
end
|
112
|
+
|
113
|
+
# Returns all words that match a prefix of any length within chars.
|
114
|
+
# @param [String] chars the chars to base the prefix on.
|
115
|
+
# @return [Enumerator<String>] all the words that match a prefix given
|
116
|
+
# by chars.
|
117
|
+
# @yield [String] each word found.
|
118
|
+
def match_prefix chars
|
119
|
+
return enum_for :match_prefix, chars unless block_given?
|
120
|
+
|
121
|
+
yield as_word if terminal?
|
122
|
+
|
123
|
+
children_match_prefix chars do |word|
|
124
|
+
yield word
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
# Get {Node Node} corresponding to a given letter.
|
129
|
+
# @param [Symbol] letter the letter to search for in the node.
|
130
|
+
# @return [Node] the node corresponding to that letter.
|
131
|
+
# @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-5B-5D
|
132
|
+
# Hash#[]
|
133
|
+
def [] letter
|
134
|
+
children_tree[letter]
|
135
|
+
end
|
136
|
+
|
137
|
+
# Set the {Node Node} that corresponds to a given letter.
|
138
|
+
# @param [Symbol] letter the letter to insert or update in the node's
|
139
|
+
# @param [Node] node the {Node Node} to assign to that letter.
|
140
|
+
# @return [Node] the node corresponding to the inserted or
|
141
|
+
# updated letter.
|
142
|
+
# @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-5B-5D
|
143
|
+
# Hash#[]
|
144
|
+
def []= letter, node
|
145
|
+
children_tree[letter] = node
|
146
|
+
end
|
147
|
+
|
148
|
+
# Check if a {Node Node}'s children tree contains a given
|
149
|
+
# letter.
|
150
|
+
# @param [Symbol] letter the letter to search for in the node.
|
151
|
+
# @return [Boolean] `true` if the letter is present, `false` otherwise
|
152
|
+
# @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-has_key-3F
|
153
|
+
# Hash#key?
|
154
|
+
def key? letter
|
155
|
+
children_tree.key? letter
|
156
|
+
end
|
157
|
+
|
158
|
+
# Delete a given letter and its corresponding {Node Node} from
|
159
|
+
# this {Node Node}'s children tree.
|
160
|
+
# @param [Symbol] letter the letter to delete from the node's children
|
161
|
+
# tree.
|
162
|
+
# @return [Node] the node corresponding to the deleted letter.
|
163
|
+
# @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-delete
|
164
|
+
# Hash#delete
|
165
|
+
def delete letter
|
166
|
+
children_tree.delete letter
|
167
|
+
end
|
168
|
+
|
169
|
+
alias_method :has_key?, :key?
|
170
|
+
|
171
|
+
protected
|
172
|
+
|
173
|
+
def missing
|
174
|
+
Rambling::Trie::Nodes::Missing.new
|
175
|
+
end
|
176
|
+
|
177
|
+
private
|
178
|
+
|
179
|
+
attr_accessor :terminal
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Rambling
|
4
|
+
module Trie
|
5
|
+
module Nodes
|
6
|
+
# A representation of a node in an uncompressed trie data structure.
|
7
|
+
class Raw < Rambling::Trie::Nodes::Node
|
8
|
+
# Adds a word to the current raw (uncompressed) trie node.
|
9
|
+
# @param [Array<Symbol>] chars the char array to add to the trie.
|
10
|
+
# @return [Raw] the added/modified node based on the word added.
|
11
|
+
# @note This method clears the contents of the chars variable.
|
12
|
+
def add chars
|
13
|
+
if chars.empty?
|
14
|
+
terminal!
|
15
|
+
else
|
16
|
+
add_to_children_tree chars
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
# Always return `false` for a raw (uncompressed) node.
|
21
|
+
# @return [Boolean] always `false` for a raw (uncompressed) node.
|
22
|
+
def compressed?
|
23
|
+
false
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def add_to_children_tree chars
|
29
|
+
letter = chars.pop
|
30
|
+
child = children_tree[letter] || new_node(letter)
|
31
|
+
child.add chars
|
32
|
+
child
|
33
|
+
end
|
34
|
+
|
35
|
+
def new_node letter
|
36
|
+
node = Rambling::Trie::Nodes::Raw.new letter, self
|
37
|
+
children_tree[letter] = node
|
38
|
+
node
|
39
|
+
end
|
40
|
+
|
41
|
+
def partial_word_chars? chars = []
|
42
|
+
letter = chars.shift.to_sym
|
43
|
+
child = children_tree[letter]
|
44
|
+
return false unless child
|
45
|
+
|
46
|
+
child.partial_word? chars
|
47
|
+
end
|
48
|
+
|
49
|
+
def word_chars? chars = []
|
50
|
+
letter = chars.shift.to_sym
|
51
|
+
child = children_tree[letter]
|
52
|
+
return false unless child
|
53
|
+
|
54
|
+
child.word? chars
|
55
|
+
end
|
56
|
+
|
57
|
+
def closest_node chars
|
58
|
+
letter = chars.shift.to_sym
|
59
|
+
child = children_tree[letter]
|
60
|
+
return missing unless child
|
61
|
+
|
62
|
+
child.scan chars
|
63
|
+
end
|
64
|
+
|
65
|
+
def children_match_prefix chars
|
66
|
+
return enum_for :children_match_prefix, chars unless block_given?
|
67
|
+
|
68
|
+
return if chars.empty?
|
69
|
+
|
70
|
+
letter = chars.shift.to_sym
|
71
|
+
child = children_tree[letter]
|
72
|
+
|
73
|
+
return unless child
|
74
|
+
|
75
|
+
child.match_prefix chars do |word|
|
76
|
+
yield word
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Rambling
|
2
4
|
module Trie
|
3
5
|
module Readers
|
@@ -8,17 +10,7 @@ module Rambling
|
|
8
10
|
# from.
|
9
11
|
# @yield [String] Each line read from the file.
|
10
12
|
def each_word filepath
|
11
|
-
|
12
|
-
end
|
13
|
-
|
14
|
-
private
|
15
|
-
|
16
|
-
def each_line filepath
|
17
|
-
open(filepath) { |file| file.each_line { |line| yield line } }
|
18
|
-
end
|
19
|
-
|
20
|
-
def open filepath
|
21
|
-
File.open(filepath) { |file| yield file }
|
13
|
+
File.foreach(filepath) { |line| yield line.chomp! }
|
22
14
|
end
|
23
15
|
end
|
24
16
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Rambling
|
2
4
|
module Trie
|
3
5
|
module Serializers
|
@@ -11,20 +13,28 @@ module Rambling
|
|
11
13
|
end
|
12
14
|
|
13
15
|
# Loads marshaled object from contents in filepath and deserializes it
|
14
|
-
# into a {Node Node}.
|
16
|
+
# into a {Nodes::Node Node}.
|
15
17
|
# @param [String] filepath the full path of the file to load the
|
16
18
|
# marshaled object from.
|
17
|
-
# @return [Node] The deserialized {Node Node}.
|
19
|
+
# @return [Nodes::Node] The deserialized {Nodes::Node Node}.
|
20
|
+
# @see https://ruby-doc.org/core-2.5.0/Marshal.html#method-c-load
|
21
|
+
# Marshal.load
|
22
|
+
# @note Use of
|
23
|
+
# {https://ruby-doc.org/core-2.5.0/Marshal.html#method-c-load
|
24
|
+
# Marshal.load} is generally discouraged. Only use this with trusted
|
25
|
+
# input.
|
18
26
|
def load filepath
|
19
27
|
::Marshal.load serializer.load filepath
|
20
28
|
end
|
21
29
|
|
22
|
-
# Serializes a {Node Node} and dumps it as a marshaled object
|
23
|
-
# filepath.
|
24
|
-
# @param [Node] node the node to serialize
|
30
|
+
# Serializes a {Nodes::Node Node} and dumps it as a marshaled object
|
31
|
+
# into filepath.
|
32
|
+
# @param [Nodes::Node] node the node to serialize
|
25
33
|
# @param [String] filepath the full path of the file to dump the
|
26
34
|
# marshaled object into.
|
27
35
|
# @return [Numeric] number of bytes written to disk.
|
36
|
+
# @see https://ruby-doc.org/core-2.5.0/Marshal.html#method-c-dump
|
37
|
+
# Marshal.dump
|
28
38
|
def dump node, filepath
|
29
39
|
serializer.dump ::Marshal.dump(node), filepath
|
30
40
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Rambling
|
2
4
|
module Trie
|
3
5
|
module Serializers
|
@@ -11,20 +13,34 @@ module Rambling
|
|
11
13
|
end
|
12
14
|
|
13
15
|
# Loads serialized object from YAML file in filepath and deserializes
|
14
|
-
# it into a {Node Node}.
|
16
|
+
# it into a {Nodes::Node Node}.
|
15
17
|
# @param [String] filepath the full path of the file to load the
|
16
18
|
# serialized YAML object from.
|
17
|
-
# @return [Node] The deserialized {Node Node}.
|
19
|
+
# @return [Nodes::Node] The deserialized {Nodes::Node Node}.
|
20
|
+
# @see https://ruby-doc.org/stdlib-2.5.0/libdoc/psych/rdoc/Psych.html#method-c-safe_load
|
21
|
+
# Psych.safe_load
|
18
22
|
def load filepath
|
19
23
|
require 'yaml'
|
20
|
-
::YAML.
|
24
|
+
::YAML.safe_load(
|
25
|
+
serializer.load(filepath),
|
26
|
+
[
|
27
|
+
Symbol,
|
28
|
+
Rambling::Trie::Nodes::Raw,
|
29
|
+
Rambling::Trie::Nodes::Compressed,
|
30
|
+
],
|
31
|
+
[],
|
32
|
+
true,
|
33
|
+
)
|
21
34
|
end
|
22
35
|
|
23
|
-
# Serializes a {Node Node} and dumps it as a YAML object into
|
24
|
-
#
|
36
|
+
# Serializes a {Nodes::Node Node} and dumps it as a YAML object into
|
37
|
+
# filepath.
|
38
|
+
# @param [Nodes::Node] node the node to serialize
|
25
39
|
# @param [String] filepath the full path of the file to dump the YAML
|
26
40
|
# object into.
|
27
41
|
# @return [Numeric] number of bytes written to disk.
|
42
|
+
# @see https://ruby-doc.org/stdlib-2.5.0/libdoc/psych/rdoc/Psych.html#method-c-dump
|
43
|
+
# Psych.dump
|
28
44
|
def dump node, filepath
|
29
45
|
require 'yaml'
|
30
46
|
serializer.dump ::YAML.dump(node), filepath
|
@@ -1,14 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Rambling
|
2
4
|
module Trie
|
3
5
|
module Serializers
|
4
6
|
# Zip file serializer. Dumps/loads contents from zip files. Automatically
|
5
7
|
# detects if zip file contains `.marshal` or `.yml` file
|
6
8
|
class Zip
|
7
|
-
extend ::Forwardable
|
8
|
-
|
9
9
|
# Creates a new Zip serializer.
|
10
|
-
# @param [Properties] properties the configuration
|
11
|
-
# far.
|
10
|
+
# @param [Configuration::Properties] properties the configuration
|
11
|
+
# properties set up so far.
|
12
12
|
def initialize properties
|
13
13
|
@properties = properties
|
14
14
|
end
|
@@ -17,6 +17,8 @@ module Rambling
|
|
17
17
|
# unzipped files.
|
18
18
|
# @param [String] filepath the filepath to load contents from.
|
19
19
|
# @return [String] all contents of the unzipped loaded file.
|
20
|
+
# @see https://github.com/rubyzip/rubyzip#reading-a-zip-file Zip
|
21
|
+
# reading a file
|
20
22
|
def load filepath
|
21
23
|
require 'zip'
|
22
24
|
|
@@ -34,6 +36,8 @@ module Rambling
|
|
34
36
|
# @param [String] contents the contents to dump.
|
35
37
|
# @param [String] filepath the filepath to dump the contents to.
|
36
38
|
# @return [Numeric] number of bytes written to disk.
|
39
|
+
# @see https://github.com/rubyzip/rubyzip#basic-zip-archive-creation
|
40
|
+
# Zip archive creation
|
37
41
|
def dump contents, filepath
|
38
42
|
require 'zip'
|
39
43
|
|
@@ -52,10 +56,13 @@ module Rambling
|
|
52
56
|
|
53
57
|
attr_reader :properties
|
54
58
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
+
def serializers
|
60
|
+
properties.serializers
|
61
|
+
end
|
62
|
+
|
63
|
+
def tmp_path
|
64
|
+
properties.tmp_path
|
65
|
+
end
|
59
66
|
|
60
67
|
def path filename
|
61
68
|
require 'securerandom'
|