rambling-trie 1.0.2 → 2.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/Gemfile +6 -3
- data/Guardfile +3 -1
- data/README.md +30 -12
- data/Rakefile +8 -0
- data/lib/rambling-trie.rb +2 -0
- data/lib/rambling/trie.rb +48 -26
- data/lib/rambling/trie/comparable.rb +6 -3
- data/lib/rambling/trie/compressible.rb +16 -0
- data/lib/rambling/trie/compressor.rb +39 -24
- data/lib/rambling/trie/configuration.rb +3 -1
- data/lib/rambling/trie/configuration/properties.rb +18 -9
- data/lib/rambling/trie/configuration/provider_collection.rb +38 -17
- data/lib/rambling/trie/container.rb +123 -36
- data/lib/rambling/trie/enumerable.rb +6 -4
- data/lib/rambling/trie/inspectable.rb +2 -0
- data/lib/rambling/trie/invalid_operation.rb +3 -1
- data/lib/rambling/trie/nodes.rb +13 -0
- data/lib/rambling/trie/nodes/compressed.rb +98 -0
- data/lib/rambling/trie/nodes/missing.rb +12 -0
- data/lib/rambling/trie/nodes/node.rb +183 -0
- data/lib/rambling/trie/nodes/raw.rb +82 -0
- data/lib/rambling/trie/readers.rb +3 -1
- data/lib/rambling/trie/readers/plain_text.rb +3 -11
- data/lib/rambling/trie/serializers.rb +3 -1
- data/lib/rambling/trie/serializers/file.rb +2 -0
- data/lib/rambling/trie/serializers/marshal.rb +15 -5
- data/lib/rambling/trie/serializers/yaml.rb +21 -5
- data/lib/rambling/trie/serializers/zip.rb +15 -8
- data/lib/rambling/trie/stringifyable.rb +8 -2
- data/lib/rambling/trie/version.rb +3 -1
- data/rambling-trie.gemspec +21 -10
- data/spec/assets/test_words.es_DO.txt +1 -0
- data/spec/integration/rambling/trie_spec.rb +44 -35
- data/spec/lib/rambling/trie/comparable_spec.rb +8 -15
- data/spec/lib/rambling/trie/compressor_spec.rb +90 -13
- data/spec/lib/rambling/trie/configuration/properties_spec.rb +21 -13
- data/spec/lib/rambling/trie/configuration/provider_collection_spec.rb +18 -34
- data/spec/lib/rambling/trie/container_spec.rb +183 -217
- data/spec/lib/rambling/trie/enumerable_spec.rb +14 -9
- data/spec/lib/rambling/trie/inspectable_spec.rb +36 -11
- data/spec/lib/rambling/trie/nodes/compressed_spec.rb +37 -0
- data/spec/lib/rambling/trie/nodes/node_spec.rb +9 -0
- data/spec/lib/rambling/trie/nodes/raw_spec.rb +179 -0
- data/spec/lib/rambling/trie/readers/plain_text_spec.rb +3 -1
- data/spec/lib/rambling/trie/serializers/file_spec.rb +6 -4
- data/spec/lib/rambling/trie/serializers/marshal_spec.rb +5 -7
- data/spec/lib/rambling/trie/serializers/yaml_spec.rb +5 -7
- data/spec/lib/rambling/trie/serializers/zip_spec.rb +18 -20
- data/spec/lib/rambling/trie/stringifyable_spec.rb +14 -11
- data/spec/lib/rambling/trie_spec.rb +18 -11
- data/spec/spec_helper.rb +10 -5
- data/spec/support/config.rb +10 -0
- data/spec/support/helpers/add_word.rb +20 -0
- data/spec/support/helpers/one_line_heredoc.rb +11 -0
- data/spec/support/shared_examples/a_compressible_trie.rb +40 -0
- data/spec/support/shared_examples/a_serializable_trie.rb +10 -6
- data/spec/support/shared_examples/a_serializer.rb +9 -1
- data/spec/support/shared_examples/a_trie_data_structure.rb +2 -0
- data/spec/support/shared_examples/a_trie_node.rb +127 -0
- data/spec/{lib/rambling/trie/compressed_node_spec.rb → support/shared_examples/a_trie_node_implementation.rb} +25 -72
- metadata +42 -31
- data/lib/rambling/trie/compressable.rb +0 -14
- data/lib/rambling/trie/compressed_node.rb +0 -120
- data/lib/rambling/trie/missing_node.rb +0 -8
- data/lib/rambling/trie/node.rb +0 -97
- data/lib/rambling/trie/raw_node.rb +0 -96
- data/spec/lib/rambling/trie/node_spec.rb +0 -86
- data/spec/lib/rambling/trie/raw_node_spec.rb +0 -389
- data/spec/support/shared_examples/a_compressable_trie.rb +0 -26
@@ -0,0 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Rambling
|
4
|
+
module Trie
|
5
|
+
module Nodes
|
6
|
+
# A representation of a missing node in the trie data structure. Returned
|
7
|
+
# when a node is not found.
|
8
|
+
class Missing < Rambling::Trie::Nodes::Node
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,183 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Rambling
|
4
|
+
module Trie
|
5
|
+
module Nodes
|
6
|
+
# A representation of a node in the trie data structure.
|
7
|
+
class Node
|
8
|
+
include Rambling::Trie::Compressible
|
9
|
+
include Rambling::Trie::Enumerable
|
10
|
+
include Rambling::Trie::Comparable
|
11
|
+
include Rambling::Trie::Stringifyable
|
12
|
+
include Rambling::Trie::Inspectable
|
13
|
+
|
14
|
+
# @overload letter
|
15
|
+
# Letter(s) corresponding to the current node.
|
16
|
+
# @overload letter=(letter)
|
17
|
+
# Sets the letter(s) corresponding to the current node. Ensures the
|
18
|
+
# {Node#letter #letter} in the {Node#parent #parent}'s
|
19
|
+
# {Node#children_tree #children_tree} is updated.
|
20
|
+
# @param [String, Symbol, nil] letter the letter value.
|
21
|
+
# @return [Symbol, nil] the corresponding letter(s).
|
22
|
+
attr_reader :letter
|
23
|
+
|
24
|
+
# Child nodes tree.
|
25
|
+
# @return [Hash] the children_tree hash, consisting of `:letter =>
|
26
|
+
# node`.
|
27
|
+
attr_accessor :children_tree
|
28
|
+
|
29
|
+
# Parent node.
|
30
|
+
# @return [Node, nil] the parent of the current node.
|
31
|
+
attr_accessor :parent
|
32
|
+
|
33
|
+
# Creates a new node.
|
34
|
+
# @param [Symbol, nil] letter the Node's letter value
|
35
|
+
# @param [Node, nil] parent the parent of the current node.
|
36
|
+
def initialize letter = nil, parent = nil, children_tree = {}
|
37
|
+
@letter = letter
|
38
|
+
@parent = parent
|
39
|
+
@children_tree = children_tree
|
40
|
+
end
|
41
|
+
|
42
|
+
# Child nodes.
|
43
|
+
# @return [Array<Node>] the array of children nodes contained
|
44
|
+
# in the current node.
|
45
|
+
def children
|
46
|
+
children_tree.values
|
47
|
+
end
|
48
|
+
|
49
|
+
# First child node.
|
50
|
+
# @return [Node, nil] the first child contained in the current node.
|
51
|
+
def first_child
|
52
|
+
return if children_tree.empty?
|
53
|
+
|
54
|
+
children_tree.each_value do |child|
|
55
|
+
return child
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# Indicates if the current node is the root node.
|
60
|
+
# @return [Boolean] `true` if the node does not have a parent, `false`
|
61
|
+
# otherwise.
|
62
|
+
def root?
|
63
|
+
!parent
|
64
|
+
end
|
65
|
+
|
66
|
+
# Indicates if a {Node Node} is terminal or not.
|
67
|
+
# @return [Boolean] `true` for terminal nodes, `false` otherwise.
|
68
|
+
def terminal?
|
69
|
+
!!terminal
|
70
|
+
end
|
71
|
+
|
72
|
+
# Mark {Node Node} as terminal.
|
73
|
+
# @return [Node] the modified node.
|
74
|
+
def terminal!
|
75
|
+
self.terminal = true
|
76
|
+
self
|
77
|
+
end
|
78
|
+
|
79
|
+
def letter= letter
|
80
|
+
@letter = letter.to_sym if letter
|
81
|
+
end
|
82
|
+
|
83
|
+
# Checks if a path for a set of characters exists in the trie.
|
84
|
+
# @param [Array<String>] chars the characters to look for in the trie.
|
85
|
+
# @return [Boolean] `true` if the characters are found, `false`
|
86
|
+
# otherwise.
|
87
|
+
def partial_word? chars
|
88
|
+
return true if chars.empty?
|
89
|
+
|
90
|
+
partial_word_chars? chars
|
91
|
+
end
|
92
|
+
|
93
|
+
# Checks if a path for set of characters represents a word in the trie.
|
94
|
+
# @param [Array<String>] chars the characters to look for in the trie.
|
95
|
+
# @return [Boolean] `true` if the characters are found and form a word,
|
96
|
+
# `false` otherwise.
|
97
|
+
def word? chars = []
|
98
|
+
return terminal? if chars.empty?
|
99
|
+
|
100
|
+
word_chars? chars
|
101
|
+
end
|
102
|
+
|
103
|
+
# Returns the node that starts with the specified characters.
|
104
|
+
# @param [Array<String>] chars the characters to look for in the trie.
|
105
|
+
# @return [Node] the node that matches the specified characters.
|
106
|
+
# {Missing Missing} when not found.
|
107
|
+
def scan chars
|
108
|
+
return self if chars.empty?
|
109
|
+
|
110
|
+
closest_node chars
|
111
|
+
end
|
112
|
+
|
113
|
+
# Returns all words that match a prefix of any length within chars.
|
114
|
+
# @param [String] chars the chars to base the prefix on.
|
115
|
+
# @return [Enumerator<String>] all the words that match a prefix given
|
116
|
+
# by chars.
|
117
|
+
# @yield [String] each word found.
|
118
|
+
def match_prefix chars
|
119
|
+
return enum_for :match_prefix, chars unless block_given?
|
120
|
+
|
121
|
+
yield as_word if terminal?
|
122
|
+
|
123
|
+
children_match_prefix chars do |word|
|
124
|
+
yield word
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
# Get {Node Node} corresponding to a given letter.
|
129
|
+
# @param [Symbol] letter the letter to search for in the node.
|
130
|
+
# @return [Node] the node corresponding to that letter.
|
131
|
+
# @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-5B-5D
|
132
|
+
# Hash#[]
|
133
|
+
def [] letter
|
134
|
+
children_tree[letter]
|
135
|
+
end
|
136
|
+
|
137
|
+
# Set the {Node Node} that corresponds to a given letter.
|
138
|
+
# @param [Symbol] letter the letter to insert or update in the node's
|
139
|
+
# @param [Node] node the {Node Node} to assign to that letter.
|
140
|
+
# @return [Node] the node corresponding to the inserted or
|
141
|
+
# updated letter.
|
142
|
+
# @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-5B-5D
|
143
|
+
# Hash#[]
|
144
|
+
def []= letter, node
|
145
|
+
children_tree[letter] = node
|
146
|
+
end
|
147
|
+
|
148
|
+
# Check if a {Node Node}'s children tree contains a given
|
149
|
+
# letter.
|
150
|
+
# @param [Symbol] letter the letter to search for in the node.
|
151
|
+
# @return [Boolean] `true` if the letter is present, `false` otherwise
|
152
|
+
# @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-has_key-3F
|
153
|
+
# Hash#key?
|
154
|
+
def key? letter
|
155
|
+
children_tree.key? letter
|
156
|
+
end
|
157
|
+
|
158
|
+
# Delete a given letter and its corresponding {Node Node} from
|
159
|
+
# this {Node Node}'s children tree.
|
160
|
+
# @param [Symbol] letter the letter to delete from the node's children
|
161
|
+
# tree.
|
162
|
+
# @return [Node] the node corresponding to the deleted letter.
|
163
|
+
# @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-delete
|
164
|
+
# Hash#delete
|
165
|
+
def delete letter
|
166
|
+
children_tree.delete letter
|
167
|
+
end
|
168
|
+
|
169
|
+
alias_method :has_key?, :key?
|
170
|
+
|
171
|
+
protected
|
172
|
+
|
173
|
+
def missing
|
174
|
+
Rambling::Trie::Nodes::Missing.new
|
175
|
+
end
|
176
|
+
|
177
|
+
private
|
178
|
+
|
179
|
+
attr_accessor :terminal
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Rambling
|
4
|
+
module Trie
|
5
|
+
module Nodes
|
6
|
+
# A representation of a node in an uncompressed trie data structure.
|
7
|
+
class Raw < Rambling::Trie::Nodes::Node
|
8
|
+
# Adds a word to the current raw (uncompressed) trie node.
|
9
|
+
# @param [Array<Symbol>] chars the char array to add to the trie.
|
10
|
+
# @return [Raw] the added/modified node based on the word added.
|
11
|
+
# @note This method clears the contents of the chars variable.
|
12
|
+
def add chars
|
13
|
+
if chars.empty?
|
14
|
+
terminal!
|
15
|
+
else
|
16
|
+
add_to_children_tree chars
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
# Always return `false` for a raw (uncompressed) node.
|
21
|
+
# @return [Boolean] always `false` for a raw (uncompressed) node.
|
22
|
+
def compressed?
|
23
|
+
false
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def add_to_children_tree chars
|
29
|
+
letter = chars.pop
|
30
|
+
child = children_tree[letter] || new_node(letter)
|
31
|
+
child.add chars
|
32
|
+
child
|
33
|
+
end
|
34
|
+
|
35
|
+
def new_node letter
|
36
|
+
node = Rambling::Trie::Nodes::Raw.new letter, self
|
37
|
+
children_tree[letter] = node
|
38
|
+
node
|
39
|
+
end
|
40
|
+
|
41
|
+
def partial_word_chars? chars = []
|
42
|
+
letter = chars.shift.to_sym
|
43
|
+
child = children_tree[letter]
|
44
|
+
return false unless child
|
45
|
+
|
46
|
+
child.partial_word? chars
|
47
|
+
end
|
48
|
+
|
49
|
+
def word_chars? chars = []
|
50
|
+
letter = chars.shift.to_sym
|
51
|
+
child = children_tree[letter]
|
52
|
+
return false unless child
|
53
|
+
|
54
|
+
child.word? chars
|
55
|
+
end
|
56
|
+
|
57
|
+
def closest_node chars
|
58
|
+
letter = chars.shift.to_sym
|
59
|
+
child = children_tree[letter]
|
60
|
+
return missing unless child
|
61
|
+
|
62
|
+
child.scan chars
|
63
|
+
end
|
64
|
+
|
65
|
+
def children_match_prefix chars
|
66
|
+
return enum_for :children_match_prefix, chars unless block_given?
|
67
|
+
|
68
|
+
return if chars.empty?
|
69
|
+
|
70
|
+
letter = chars.shift.to_sym
|
71
|
+
child = children_tree[letter]
|
72
|
+
|
73
|
+
return unless child
|
74
|
+
|
75
|
+
child.match_prefix chars do |word|
|
76
|
+
yield word
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Rambling
|
2
4
|
module Trie
|
3
5
|
module Readers
|
@@ -8,17 +10,7 @@ module Rambling
|
|
8
10
|
# from.
|
9
11
|
# @yield [String] Each line read from the file.
|
10
12
|
def each_word filepath
|
11
|
-
|
12
|
-
end
|
13
|
-
|
14
|
-
private
|
15
|
-
|
16
|
-
def each_line filepath
|
17
|
-
open(filepath) { |file| file.each_line { |line| yield line } }
|
18
|
-
end
|
19
|
-
|
20
|
-
def open filepath
|
21
|
-
File.open(filepath) { |file| yield file }
|
13
|
+
File.foreach(filepath) { |line| yield line.chomp! }
|
22
14
|
end
|
23
15
|
end
|
24
16
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Rambling
|
2
4
|
module Trie
|
3
5
|
module Serializers
|
@@ -11,20 +13,28 @@ module Rambling
|
|
11
13
|
end
|
12
14
|
|
13
15
|
# Loads marshaled object from contents in filepath and deserializes it
|
14
|
-
# into a {Node Node}.
|
16
|
+
# into a {Nodes::Node Node}.
|
15
17
|
# @param [String] filepath the full path of the file to load the
|
16
18
|
# marshaled object from.
|
17
|
-
# @return [Node] The deserialized {Node Node}.
|
19
|
+
# @return [Nodes::Node] The deserialized {Nodes::Node Node}.
|
20
|
+
# @see https://ruby-doc.org/core-2.5.0/Marshal.html#method-c-load
|
21
|
+
# Marshal.load
|
22
|
+
# @note Use of
|
23
|
+
# {https://ruby-doc.org/core-2.5.0/Marshal.html#method-c-load
|
24
|
+
# Marshal.load} is generally discouraged. Only use this with trusted
|
25
|
+
# input.
|
18
26
|
def load filepath
|
19
27
|
::Marshal.load serializer.load filepath
|
20
28
|
end
|
21
29
|
|
22
|
-
# Serializes a {Node Node} and dumps it as a marshaled object
|
23
|
-
# filepath.
|
24
|
-
# @param [Node] node the node to serialize
|
30
|
+
# Serializes a {Nodes::Node Node} and dumps it as a marshaled object
|
31
|
+
# into filepath.
|
32
|
+
# @param [Nodes::Node] node the node to serialize
|
25
33
|
# @param [String] filepath the full path of the file to dump the
|
26
34
|
# marshaled object into.
|
27
35
|
# @return [Numeric] number of bytes written to disk.
|
36
|
+
# @see https://ruby-doc.org/core-2.5.0/Marshal.html#method-c-dump
|
37
|
+
# Marshal.dump
|
28
38
|
def dump node, filepath
|
29
39
|
serializer.dump ::Marshal.dump(node), filepath
|
30
40
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Rambling
|
2
4
|
module Trie
|
3
5
|
module Serializers
|
@@ -11,20 +13,34 @@ module Rambling
|
|
11
13
|
end
|
12
14
|
|
13
15
|
# Loads serialized object from YAML file in filepath and deserializes
|
14
|
-
# it into a {Node Node}.
|
16
|
+
# it into a {Nodes::Node Node}.
|
15
17
|
# @param [String] filepath the full path of the file to load the
|
16
18
|
# serialized YAML object from.
|
17
|
-
# @return [Node] The deserialized {Node Node}.
|
19
|
+
# @return [Nodes::Node] The deserialized {Nodes::Node Node}.
|
20
|
+
# @see https://ruby-doc.org/stdlib-2.5.0/libdoc/psych/rdoc/Psych.html#method-c-safe_load
|
21
|
+
# Psych.safe_load
|
18
22
|
def load filepath
|
19
23
|
require 'yaml'
|
20
|
-
::YAML.
|
24
|
+
::YAML.safe_load(
|
25
|
+
serializer.load(filepath),
|
26
|
+
[
|
27
|
+
Symbol,
|
28
|
+
Rambling::Trie::Nodes::Raw,
|
29
|
+
Rambling::Trie::Nodes::Compressed,
|
30
|
+
],
|
31
|
+
[],
|
32
|
+
true,
|
33
|
+
)
|
21
34
|
end
|
22
35
|
|
23
|
-
# Serializes a {Node Node} and dumps it as a YAML object into
|
24
|
-
#
|
36
|
+
# Serializes a {Nodes::Node Node} and dumps it as a YAML object into
|
37
|
+
# filepath.
|
38
|
+
# @param [Nodes::Node] node the node to serialize
|
25
39
|
# @param [String] filepath the full path of the file to dump the YAML
|
26
40
|
# object into.
|
27
41
|
# @return [Numeric] number of bytes written to disk.
|
42
|
+
# @see https://ruby-doc.org/stdlib-2.5.0/libdoc/psych/rdoc/Psych.html#method-c-dump
|
43
|
+
# Psych.dump
|
28
44
|
def dump node, filepath
|
29
45
|
require 'yaml'
|
30
46
|
serializer.dump ::YAML.dump(node), filepath
|
@@ -1,14 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Rambling
|
2
4
|
module Trie
|
3
5
|
module Serializers
|
4
6
|
# Zip file serializer. Dumps/loads contents from zip files. Automatically
|
5
7
|
# detects if zip file contains `.marshal` or `.yml` file
|
6
8
|
class Zip
|
7
|
-
extend ::Forwardable
|
8
|
-
|
9
9
|
# Creates a new Zip serializer.
|
10
|
-
# @param [Properties] properties the configuration
|
11
|
-
# far.
|
10
|
+
# @param [Configuration::Properties] properties the configuration
|
11
|
+
# properties set up so far.
|
12
12
|
def initialize properties
|
13
13
|
@properties = properties
|
14
14
|
end
|
@@ -17,6 +17,8 @@ module Rambling
|
|
17
17
|
# unzipped files.
|
18
18
|
# @param [String] filepath the filepath to load contents from.
|
19
19
|
# @return [String] all contents of the unzipped loaded file.
|
20
|
+
# @see https://github.com/rubyzip/rubyzip#reading-a-zip-file Zip
|
21
|
+
# reading a file
|
20
22
|
def load filepath
|
21
23
|
require 'zip'
|
22
24
|
|
@@ -34,6 +36,8 @@ module Rambling
|
|
34
36
|
# @param [String] contents the contents to dump.
|
35
37
|
# @param [String] filepath the filepath to dump the contents to.
|
36
38
|
# @return [Numeric] number of bytes written to disk.
|
39
|
+
# @see https://github.com/rubyzip/rubyzip#basic-zip-archive-creation
|
40
|
+
# Zip archive creation
|
37
41
|
def dump contents, filepath
|
38
42
|
require 'zip'
|
39
43
|
|
@@ -52,10 +56,13 @@ module Rambling
|
|
52
56
|
|
53
57
|
attr_reader :properties
|
54
58
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
+
def serializers
|
60
|
+
properties.serializers
|
61
|
+
end
|
62
|
+
|
63
|
+
def tmp_path
|
64
|
+
properties.tmp_path
|
65
|
+
end
|
59
66
|
|
60
67
|
def path filename
|
61
68
|
require 'securerandom'
|