rambling-trie-opal 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +26 -0
- data/Guardfile +10 -0
- data/LICENSE +26 -0
- data/README.md +301 -0
- data/Rakefile +15 -0
- data/lib/rambling-trie.rb +3 -0
- data/lib/rambling/trie.rb +119 -0
- data/lib/rambling/trie/comparable.rb +19 -0
- data/lib/rambling/trie/compressible.rb +16 -0
- data/lib/rambling/trie/compressor.rb +64 -0
- data/lib/rambling/trie/configuration.rb +16 -0
- data/lib/rambling/trie/configuration/properties.rb +75 -0
- data/lib/rambling/trie/configuration/provider_collection.rb +122 -0
- data/lib/rambling/trie/container.rb +226 -0
- data/lib/rambling/trie/enumerable.rb +29 -0
- data/lib/rambling/trie/inspectable.rb +39 -0
- data/lib/rambling/trie/invalid_operation.rb +15 -0
- data/lib/rambling/trie/nodes.rb +18 -0
- data/lib/rambling/trie/nodes/compressed.rb +98 -0
- data/lib/rambling/trie/nodes/missing.rb +12 -0
- data/lib/rambling/trie/nodes/node.rb +183 -0
- data/lib/rambling/trie/nodes/raw.rb +82 -0
- data/lib/rambling/trie/readers.rb +15 -0
- data/lib/rambling/trie/readers/plain_text.rb +18 -0
- data/lib/rambling/trie/serializers.rb +18 -0
- data/lib/rambling/trie/serializers/file.rb +27 -0
- data/lib/rambling/trie/serializers/marshal.rb +48 -0
- data/lib/rambling/trie/serializers/yaml.rb +55 -0
- data/lib/rambling/trie/serializers/zip.rb +74 -0
- data/lib/rambling/trie/stringifyable.rb +26 -0
- data/lib/rambling/trie/version.rb +8 -0
- data/rambling-trie-opal.gemspec +36 -0
- data/spec/assets/test_words.en_US.txt +23 -0
- data/spec/assets/test_words.es_DO.txt +24 -0
- data/spec/integration/rambling/trie_spec.rb +87 -0
- data/spec/lib/rambling/trie/comparable_spec.rb +97 -0
- data/spec/lib/rambling/trie/compressor_spec.rb +108 -0
- data/spec/lib/rambling/trie/configuration/properties_spec.rb +57 -0
- data/spec/lib/rambling/trie/configuration/provider_collection_spec.rb +149 -0
- data/spec/lib/rambling/trie/container_spec.rb +591 -0
- data/spec/lib/rambling/trie/enumerable_spec.rb +42 -0
- data/spec/lib/rambling/trie/inspectable_spec.rb +56 -0
- data/spec/lib/rambling/trie/nodes/compressed_spec.rb +37 -0
- data/spec/lib/rambling/trie/nodes/node_spec.rb +9 -0
- data/spec/lib/rambling/trie/nodes/raw_spec.rb +179 -0
- data/spec/lib/rambling/trie/readers/plain_text_spec.rb +16 -0
- data/spec/lib/rambling/trie/serializers/file_spec.rb +13 -0
- data/spec/lib/rambling/trie/serializers/marshal_spec.rb +12 -0
- data/spec/lib/rambling/trie/serializers/yaml_spec.rb +12 -0
- data/spec/lib/rambling/trie/serializers/zip_spec.rb +28 -0
- data/spec/lib/rambling/trie/stringifyable_spec.rb +85 -0
- data/spec/lib/rambling/trie_spec.rb +182 -0
- data/spec/spec_helper.rb +37 -0
- data/spec/support/config.rb +15 -0
- data/spec/support/helpers/add_word.rb +20 -0
- data/spec/support/helpers/one_line_heredoc.rb +11 -0
- data/spec/support/shared_examples/a_compressible_trie.rb +40 -0
- data/spec/support/shared_examples/a_serializable_trie.rb +30 -0
- data/spec/support/shared_examples/a_serializer.rb +37 -0
- data/spec/support/shared_examples/a_trie_data_structure.rb +31 -0
- data/spec/support/shared_examples/a_trie_node.rb +127 -0
- data/spec/support/shared_examples/a_trie_node_implementation.rb +152 -0
- data/spec/tmp/.gitkeep +0 -0
- metadata +179 -0
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Rambling
|
4
|
+
module Trie
|
5
|
+
# Provides enumerable behavior to the trie data structure.
|
6
|
+
module Enumerable
|
7
|
+
include ::Enumerable
|
8
|
+
|
9
|
+
# Returns number of words contained in the trie
|
10
|
+
# @see https://ruby-doc.org/core-2.5.0/Enumerable.html#method-i-count
|
11
|
+
# Enumerable#count
|
12
|
+
alias_method :size, :count
|
13
|
+
|
14
|
+
# Iterates over the words contained in the trie.
|
15
|
+
# @yield [String] the words contained in this trie node.
|
16
|
+
def each
|
17
|
+
return enum_for :each unless block_given?
|
18
|
+
|
19
|
+
yield as_word if terminal?
|
20
|
+
|
21
|
+
children_tree.each_value do |child|
|
22
|
+
child.each do |word|
|
23
|
+
yield word
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Rambling
|
4
|
+
module Trie
|
5
|
+
# Provides pretty printing behavior for the trie data structure.
|
6
|
+
module Inspectable
|
7
|
+
# @return [String] a string representation of the current node.
|
8
|
+
def inspect
|
9
|
+
"#<#{class_name} #{attributes}>"
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def class_name
|
15
|
+
self.class.name
|
16
|
+
end
|
17
|
+
|
18
|
+
def attributes
|
19
|
+
[
|
20
|
+
letter_inspect,
|
21
|
+
terminal_inspect,
|
22
|
+
children_inspect,
|
23
|
+
].join ', '
|
24
|
+
end
|
25
|
+
|
26
|
+
def letter_inspect
|
27
|
+
"letter: #{letter.inspect}"
|
28
|
+
end
|
29
|
+
|
30
|
+
def terminal_inspect
|
31
|
+
"terminal: #{terminal.inspect}"
|
32
|
+
end
|
33
|
+
|
34
|
+
def children_inspect
|
35
|
+
"children: #{children_tree.keys.inspect}"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Rambling
|
4
|
+
module Trie
|
5
|
+
# Raised when trying to execute an invalid operation on a trie data
|
6
|
+
# structure.
|
7
|
+
class InvalidOperation < RuntimeError
|
8
|
+
# Creates a new {InvalidOperation InvalidOperation} exception.
|
9
|
+
# @param [String, nil] message the exception message.
|
10
|
+
def initialize message = nil
|
11
|
+
super
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# %w(node missing compressed raw).each do |file|
|
4
|
+
# require File.join('rambling', 'trie', 'nodes', file)
|
5
|
+
# end
|
6
|
+
|
7
|
+
require 'rambling/trie/nodes/node'
|
8
|
+
require 'rambling/trie/nodes/compressed'
|
9
|
+
require 'rambling/trie/nodes/missing'
|
10
|
+
require 'rambling/trie/nodes/raw'
|
11
|
+
|
12
|
+
module Rambling
|
13
|
+
module Trie
|
14
|
+
# Namespace for all nodes.
|
15
|
+
module Nodes
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Rambling
|
4
|
+
module Trie
|
5
|
+
module Nodes
|
6
|
+
# A representation of a node in an compressed trie data structure.
|
7
|
+
class Compressed < Rambling::Trie::Nodes::Node
|
8
|
+
# Always raises {Rambling::Trie::InvalidOperation InvalidOperation} when
|
9
|
+
# trying to add a word to the current compressed trie node
|
10
|
+
# @param [String] _ the word to add to the trie.
|
11
|
+
# @raise [InvalidOperation] if the trie is already compressed.
|
12
|
+
# @return [nil] this never returns as it always raises an exception.
|
13
|
+
def add _
|
14
|
+
raise Rambling::Trie::InvalidOperation,
|
15
|
+
'Cannot add word to compressed trie'
|
16
|
+
end
|
17
|
+
|
18
|
+
# Always return `true` for a compressed node.
|
19
|
+
# @return [Boolean] always `true` for a compressed node.
|
20
|
+
def compressed?
|
21
|
+
true
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def partial_word_chars? chars
|
27
|
+
child = children_tree[chars.first.to_sym]
|
28
|
+
return false unless child
|
29
|
+
|
30
|
+
child_letter = child.letter.to_s
|
31
|
+
|
32
|
+
if chars.size >= child_letter.size
|
33
|
+
letter = chars.slice!(0, child_letter.size).join
|
34
|
+
return child.partial_word? chars if child_letter == letter
|
35
|
+
end
|
36
|
+
|
37
|
+
letter = chars.join
|
38
|
+
child_letter = child_letter.slice 0, letter.size
|
39
|
+
child_letter == letter
|
40
|
+
end
|
41
|
+
|
42
|
+
def word_chars? chars
|
43
|
+
letter = chars.slice! 0
|
44
|
+
letter_sym = letter.to_sym
|
45
|
+
|
46
|
+
child = children_tree[letter_sym]
|
47
|
+
return false unless child
|
48
|
+
|
49
|
+
loop do
|
50
|
+
return child.word? chars if letter_sym == child.letter
|
51
|
+
|
52
|
+
break if chars.empty?
|
53
|
+
|
54
|
+
letter << chars.slice!(0)
|
55
|
+
letter_sym = letter.to_sym
|
56
|
+
end
|
57
|
+
|
58
|
+
false
|
59
|
+
end
|
60
|
+
|
61
|
+
def closest_node chars
|
62
|
+
child = children_tree[chars.first.to_sym]
|
63
|
+
return missing unless child
|
64
|
+
|
65
|
+
child_letter = child.letter.to_s
|
66
|
+
|
67
|
+
if chars.size >= child_letter.size
|
68
|
+
letter = chars.slice!(0, child_letter.size).join
|
69
|
+
return child.scan chars if child_letter == letter
|
70
|
+
end
|
71
|
+
|
72
|
+
letter = chars.join
|
73
|
+
child_letter = child_letter.slice 0, letter.size
|
74
|
+
|
75
|
+
child_letter == letter ? child : missing
|
76
|
+
end
|
77
|
+
|
78
|
+
def children_match_prefix chars
|
79
|
+
return enum_for :children_match_prefix, chars unless block_given?
|
80
|
+
|
81
|
+
return if chars.empty?
|
82
|
+
|
83
|
+
child = children_tree[chars.first.to_sym]
|
84
|
+
return unless child
|
85
|
+
|
86
|
+
child_letter = child.letter.to_s
|
87
|
+
letter = chars.slice!(0, child_letter.size).join
|
88
|
+
|
89
|
+
return unless child_letter == letter
|
90
|
+
|
91
|
+
child.match_prefix chars do |word|
|
92
|
+
yield word
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Rambling
|
4
|
+
module Trie
|
5
|
+
module Nodes
|
6
|
+
# A representation of a missing node in the trie data structure. Returned
|
7
|
+
# when a node is not found.
|
8
|
+
class Missing < Rambling::Trie::Nodes::Node
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,183 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Rambling
|
4
|
+
module Trie
|
5
|
+
module Nodes
|
6
|
+
# A representation of a node in the trie data structure.
|
7
|
+
class Node
|
8
|
+
include Rambling::Trie::Compressible
|
9
|
+
include Rambling::Trie::Enumerable
|
10
|
+
include Rambling::Trie::Comparable
|
11
|
+
include Rambling::Trie::Stringifyable
|
12
|
+
include Rambling::Trie::Inspectable
|
13
|
+
|
14
|
+
# @overload letter
|
15
|
+
# Letter(s) corresponding to the current node.
|
16
|
+
# @overload letter=(letter)
|
17
|
+
# Sets the letter(s) corresponding to the current node. Ensures the
|
18
|
+
# {Node#letter #letter} in the {Node#parent #parent}'s
|
19
|
+
# {Node#children_tree #children_tree} is updated.
|
20
|
+
# @param [String, Symbol, nil] letter the letter value.
|
21
|
+
# @return [Symbol, nil] the corresponding letter(s).
|
22
|
+
attr_reader :letter
|
23
|
+
|
24
|
+
# Child nodes tree.
|
25
|
+
# @return [Hash] the children_tree hash, consisting of `:letter =>
|
26
|
+
# node`.
|
27
|
+
attr_accessor :children_tree
|
28
|
+
|
29
|
+
# Parent node.
|
30
|
+
# @return [Node, nil] the parent of the current node.
|
31
|
+
attr_accessor :parent
|
32
|
+
|
33
|
+
# Creates a new node.
|
34
|
+
# @param [Symbol, nil] letter the Node's letter value
|
35
|
+
# @param [Node, nil] parent the parent of the current node.
|
36
|
+
def initialize letter = nil, parent = nil, children_tree = {}
|
37
|
+
@letter = letter
|
38
|
+
@parent = parent
|
39
|
+
@children_tree = children_tree
|
40
|
+
end
|
41
|
+
|
42
|
+
# Child nodes.
|
43
|
+
# @return [Array<Node>] the array of children nodes contained
|
44
|
+
# in the current node.
|
45
|
+
def children
|
46
|
+
children_tree.values
|
47
|
+
end
|
48
|
+
|
49
|
+
# First child node.
|
50
|
+
# @return [Node, nil] the first child contained in the current node.
|
51
|
+
def first_child
|
52
|
+
return if children_tree.empty?
|
53
|
+
|
54
|
+
children_tree.each_value do |child|
|
55
|
+
return child
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# Indicates if the current node is the root node.
|
60
|
+
# @return [Boolean] `true` if the node does not have a parent, `false`
|
61
|
+
# otherwise.
|
62
|
+
def root?
|
63
|
+
!parent
|
64
|
+
end
|
65
|
+
|
66
|
+
# Indicates if a {Node Node} is terminal or not.
|
67
|
+
# @return [Boolean] `true` for terminal nodes, `false` otherwise.
|
68
|
+
def terminal?
|
69
|
+
!!terminal
|
70
|
+
end
|
71
|
+
|
72
|
+
# Mark {Node Node} as terminal.
|
73
|
+
# @return [Node] the modified node.
|
74
|
+
def terminal!
|
75
|
+
self.terminal = true
|
76
|
+
self
|
77
|
+
end
|
78
|
+
|
79
|
+
def letter= letter
|
80
|
+
@letter = letter.to_sym if letter
|
81
|
+
end
|
82
|
+
|
83
|
+
# Checks if a path for a set of characters exists in the trie.
|
84
|
+
# @param [Array<String>] chars the characters to look for in the trie.
|
85
|
+
# @return [Boolean] `true` if the characters are found, `false`
|
86
|
+
# otherwise.
|
87
|
+
def partial_word? chars
|
88
|
+
return true if chars.empty?
|
89
|
+
|
90
|
+
partial_word_chars? chars
|
91
|
+
end
|
92
|
+
|
93
|
+
# Checks if a path for set of characters represents a word in the trie.
|
94
|
+
# @param [Array<String>] chars the characters to look for in the trie.
|
95
|
+
# @return [Boolean] `true` if the characters are found and form a word,
|
96
|
+
# `false` otherwise.
|
97
|
+
def word? chars = []
|
98
|
+
return terminal? if chars.empty?
|
99
|
+
|
100
|
+
word_chars? chars
|
101
|
+
end
|
102
|
+
|
103
|
+
# Returns the node that starts with the specified characters.
|
104
|
+
# @param [Array<String>] chars the characters to look for in the trie.
|
105
|
+
# @return [Node] the node that matches the specified characters.
|
106
|
+
# {Missing Missing} when not found.
|
107
|
+
def scan chars
|
108
|
+
return self if chars.empty?
|
109
|
+
|
110
|
+
closest_node chars
|
111
|
+
end
|
112
|
+
|
113
|
+
# Returns all words that match a prefix of any length within chars.
|
114
|
+
# @param [String] chars the chars to base the prefix on.
|
115
|
+
# @return [Enumerator<String>] all the words that match a prefix given
|
116
|
+
# by chars.
|
117
|
+
# @yield [String] each word found.
|
118
|
+
def match_prefix chars
|
119
|
+
return enum_for :match_prefix, chars unless block_given?
|
120
|
+
|
121
|
+
yield as_word if terminal?
|
122
|
+
|
123
|
+
children_match_prefix chars do |word|
|
124
|
+
yield word
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
# Get {Node Node} corresponding to a given letter.
|
129
|
+
# @param [Symbol] letter the letter to search for in the node.
|
130
|
+
# @return [Node] the node corresponding to that letter.
|
131
|
+
# @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-5B-5D
|
132
|
+
# Hash#[]
|
133
|
+
def [] letter
|
134
|
+
children_tree[letter]
|
135
|
+
end
|
136
|
+
|
137
|
+
# Set the {Node Node} that corresponds to a given letter.
|
138
|
+
# @param [Symbol] letter the letter to insert or update in the node's
|
139
|
+
# @param [Node] node the {Node Node} to assign to that letter.
|
140
|
+
# @return [Node] the node corresponding to the inserted or
|
141
|
+
# updated letter.
|
142
|
+
# @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-5B-5D
|
143
|
+
# Hash#[]
|
144
|
+
def []= letter, node
|
145
|
+
children_tree[letter] = node
|
146
|
+
end
|
147
|
+
|
148
|
+
# Check if a {Node Node}'s children tree contains a given
|
149
|
+
# letter.
|
150
|
+
# @param [Symbol] letter the letter to search for in the node.
|
151
|
+
# @return [Boolean] `true` if the letter is present, `false` otherwise
|
152
|
+
# @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-has_key-3F
|
153
|
+
# Hash#key?
|
154
|
+
def key? letter
|
155
|
+
children_tree.key? letter
|
156
|
+
end
|
157
|
+
|
158
|
+
# Delete a given letter and its corresponding {Node Node} from
|
159
|
+
# this {Node Node}'s children tree.
|
160
|
+
# @param [Symbol] letter the letter to delete from the node's children
|
161
|
+
# tree.
|
162
|
+
# @return [Node] the node corresponding to the deleted letter.
|
163
|
+
# @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-delete
|
164
|
+
# Hash#delete
|
165
|
+
def delete letter
|
166
|
+
children_tree.delete letter
|
167
|
+
end
|
168
|
+
|
169
|
+
alias_method :has_key?, :key?
|
170
|
+
|
171
|
+
protected
|
172
|
+
|
173
|
+
def missing
|
174
|
+
Rambling::Trie::Nodes::Missing.new
|
175
|
+
end
|
176
|
+
|
177
|
+
private
|
178
|
+
|
179
|
+
attr_accessor :terminal
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Rambling
|
4
|
+
module Trie
|
5
|
+
module Nodes
|
6
|
+
# A representation of a node in an uncompressed trie data structure.
|
7
|
+
class Raw < Rambling::Trie::Nodes::Node
|
8
|
+
# Adds a word to the current raw (uncompressed) trie node.
|
9
|
+
# @param [Array<Symbol>] chars the char array to add to the trie.
|
10
|
+
# @return [Raw] the added/modified node based on the word added.
|
11
|
+
# @note This method clears the contents of the chars variable.
|
12
|
+
def add chars
|
13
|
+
if chars.empty?
|
14
|
+
terminal!
|
15
|
+
else
|
16
|
+
add_to_children_tree chars
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
# Always return `false` for a raw (uncompressed) node.
|
21
|
+
# @return [Boolean] always `false` for a raw (uncompressed) node.
|
22
|
+
def compressed?
|
23
|
+
false
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def add_to_children_tree chars
|
29
|
+
letter = chars.pop
|
30
|
+
child = children_tree[letter] || new_node(letter)
|
31
|
+
child.add chars
|
32
|
+
child
|
33
|
+
end
|
34
|
+
|
35
|
+
def new_node letter
|
36
|
+
node = Rambling::Trie::Nodes::Raw.new letter, self
|
37
|
+
children_tree[letter] = node
|
38
|
+
node
|
39
|
+
end
|
40
|
+
|
41
|
+
def partial_word_chars? chars = []
|
42
|
+
letter = chars.shift.to_sym
|
43
|
+
child = children_tree[letter]
|
44
|
+
return false unless child
|
45
|
+
|
46
|
+
child.partial_word? chars
|
47
|
+
end
|
48
|
+
|
49
|
+
def word_chars? chars = []
|
50
|
+
letter = chars.shift.to_sym
|
51
|
+
child = children_tree[letter]
|
52
|
+
return false unless child
|
53
|
+
|
54
|
+
child.word? chars
|
55
|
+
end
|
56
|
+
|
57
|
+
def closest_node chars
|
58
|
+
letter = chars.shift.to_sym
|
59
|
+
child = children_tree[letter]
|
60
|
+
return missing unless child
|
61
|
+
|
62
|
+
child.scan chars
|
63
|
+
end
|
64
|
+
|
65
|
+
def children_match_prefix chars
|
66
|
+
return enum_for :children_match_prefix, chars unless block_given?
|
67
|
+
|
68
|
+
return if chars.empty?
|
69
|
+
|
70
|
+
letter = chars.shift.to_sym
|
71
|
+
child = children_tree[letter]
|
72
|
+
|
73
|
+
return unless child
|
74
|
+
|
75
|
+
child.match_prefix chars do |word|
|
76
|
+
yield word
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|