rambling-trie-opal 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +26 -0
  3. data/Guardfile +10 -0
  4. data/LICENSE +26 -0
  5. data/README.md +301 -0
  6. data/Rakefile +15 -0
  7. data/lib/rambling-trie.rb +3 -0
  8. data/lib/rambling/trie.rb +119 -0
  9. data/lib/rambling/trie/comparable.rb +19 -0
  10. data/lib/rambling/trie/compressible.rb +16 -0
  11. data/lib/rambling/trie/compressor.rb +64 -0
  12. data/lib/rambling/trie/configuration.rb +16 -0
  13. data/lib/rambling/trie/configuration/properties.rb +75 -0
  14. data/lib/rambling/trie/configuration/provider_collection.rb +122 -0
  15. data/lib/rambling/trie/container.rb +226 -0
  16. data/lib/rambling/trie/enumerable.rb +29 -0
  17. data/lib/rambling/trie/inspectable.rb +39 -0
  18. data/lib/rambling/trie/invalid_operation.rb +15 -0
  19. data/lib/rambling/trie/nodes.rb +18 -0
  20. data/lib/rambling/trie/nodes/compressed.rb +98 -0
  21. data/lib/rambling/trie/nodes/missing.rb +12 -0
  22. data/lib/rambling/trie/nodes/node.rb +183 -0
  23. data/lib/rambling/trie/nodes/raw.rb +82 -0
  24. data/lib/rambling/trie/readers.rb +15 -0
  25. data/lib/rambling/trie/readers/plain_text.rb +18 -0
  26. data/lib/rambling/trie/serializers.rb +18 -0
  27. data/lib/rambling/trie/serializers/file.rb +27 -0
  28. data/lib/rambling/trie/serializers/marshal.rb +48 -0
  29. data/lib/rambling/trie/serializers/yaml.rb +55 -0
  30. data/lib/rambling/trie/serializers/zip.rb +74 -0
  31. data/lib/rambling/trie/stringifyable.rb +26 -0
  32. data/lib/rambling/trie/version.rb +8 -0
  33. data/rambling-trie-opal.gemspec +36 -0
  34. data/spec/assets/test_words.en_US.txt +23 -0
  35. data/spec/assets/test_words.es_DO.txt +24 -0
  36. data/spec/integration/rambling/trie_spec.rb +87 -0
  37. data/spec/lib/rambling/trie/comparable_spec.rb +97 -0
  38. data/spec/lib/rambling/trie/compressor_spec.rb +108 -0
  39. data/spec/lib/rambling/trie/configuration/properties_spec.rb +57 -0
  40. data/spec/lib/rambling/trie/configuration/provider_collection_spec.rb +149 -0
  41. data/spec/lib/rambling/trie/container_spec.rb +591 -0
  42. data/spec/lib/rambling/trie/enumerable_spec.rb +42 -0
  43. data/spec/lib/rambling/trie/inspectable_spec.rb +56 -0
  44. data/spec/lib/rambling/trie/nodes/compressed_spec.rb +37 -0
  45. data/spec/lib/rambling/trie/nodes/node_spec.rb +9 -0
  46. data/spec/lib/rambling/trie/nodes/raw_spec.rb +179 -0
  47. data/spec/lib/rambling/trie/readers/plain_text_spec.rb +16 -0
  48. data/spec/lib/rambling/trie/serializers/file_spec.rb +13 -0
  49. data/spec/lib/rambling/trie/serializers/marshal_spec.rb +12 -0
  50. data/spec/lib/rambling/trie/serializers/yaml_spec.rb +12 -0
  51. data/spec/lib/rambling/trie/serializers/zip_spec.rb +28 -0
  52. data/spec/lib/rambling/trie/stringifyable_spec.rb +85 -0
  53. data/spec/lib/rambling/trie_spec.rb +182 -0
  54. data/spec/spec_helper.rb +37 -0
  55. data/spec/support/config.rb +15 -0
  56. data/spec/support/helpers/add_word.rb +20 -0
  57. data/spec/support/helpers/one_line_heredoc.rb +11 -0
  58. data/spec/support/shared_examples/a_compressible_trie.rb +40 -0
  59. data/spec/support/shared_examples/a_serializable_trie.rb +30 -0
  60. data/spec/support/shared_examples/a_serializer.rb +37 -0
  61. data/spec/support/shared_examples/a_trie_data_structure.rb +31 -0
  62. data/spec/support/shared_examples/a_trie_node.rb +127 -0
  63. data/spec/support/shared_examples/a_trie_node_implementation.rb +152 -0
  64. data/spec/tmp/.gitkeep +0 -0
  65. metadata +179 -0
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rambling
4
+ module Trie
5
+ # Provides the comparable behavior for the trie data structure.
6
+ module Comparable
7
+ # Compares two nodes.
8
+ # @param [Nodes::Node] other the node to compare against.
9
+ # @return [Boolean] `true` if the nodes' {Nodes::Node#letter #letter} and
10
+ # {Nodes::Node#children_tree #children_tree} are equal, `false`
11
+ # otherwise.
12
+ def == other
13
+ letter == other.letter &&
14
+ terminal? == other.terminal? &&
15
+ children_tree == other.children_tree
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rambling
4
+ module Trie
5
+ # Provides the compressible behavior for the trie data structure.
6
+ module Compressible
7
+ # Indicates if the current {Rambling::Trie::Nodes::Node Node} can be
8
+ # compressed or not.
9
+ # @return [Boolean] `true` for non-{Nodes::Node#terminal? terminal} nodes
10
+ # with one child, `false` otherwise.
11
+ def compressible?
12
+ !(root? || terminal?) && children_tree.size == 1
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rambling
4
+ module Trie
5
+ # Responsible for the compression process of a trie data structure.
6
+ class Compressor
7
+ # Compresses a {Nodes::Node Node} from a trie data structure.
8
+ # @param [Nodes::Raw] node the node to compress.
9
+ # @return [Nodes::Compressed] node the compressed version of the node.
10
+ def compress node
11
+ if node.compressible?
12
+ compress_child_and_merge node
13
+ else
14
+ compress_children_and_copy node
15
+ end
16
+ end
17
+
18
+ private
19
+
20
+ def compress_child_and_merge node
21
+ merge node, compress(node.first_child)
22
+ end
23
+
24
+ def merge node, other
25
+ letter = node.letter.to_s << other.letter.to_s
26
+
27
+ new_compressed_node(
28
+ letter.to_sym,
29
+ node.parent,
30
+ other.children_tree,
31
+ other.terminal?,
32
+ )
33
+ end
34
+
35
+ def compress_children_and_copy node
36
+ new_compressed_node(
37
+ node.letter,
38
+ node.parent,
39
+ compress_children(node.children_tree),
40
+ node.terminal?,
41
+ )
42
+ end
43
+
44
+ def compress_children tree
45
+ new_tree = {}
46
+
47
+ tree.each do |letter, child|
48
+ compressed_child = compress child
49
+ new_tree[letter] = compressed_child
50
+ end
51
+
52
+ new_tree
53
+ end
54
+
55
+ def new_compressed_node letter, parent, tree, terminal
56
+ node = Rambling::Trie::Nodes::Compressed.new letter, parent, tree
57
+ node.terminal! if terminal
58
+
59
+ tree.each_value { |child| child.parent = node }
60
+ node
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ # %w(properties provider_collection).each do |file|
4
+ # require File.join('rambling', 'trie', 'configuration', file)
5
+ # end
6
+
7
+ require 'rambling/trie/configuration/properties'
8
+ require 'rambling/trie/configuration/provider_collection'
9
+
10
+ module Rambling
11
+ module Trie
12
+ # Namespace for configuration classes.
13
+ module Configuration
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rambling
4
+ module Trie
5
+ module Configuration
6
+ # Provides configurable properties for Rambling::Trie.
7
+ class Properties
8
+ # The configured {Readers Readers}.
9
+ # @return [ProviderCollection] the mapping of configured {Readers
10
+ # Readers}.
11
+ attr_reader :readers
12
+
13
+ # The configured {Serializers Serializers}.
14
+ # @return [ProviderCollection] the mapping of configured {Serializers
15
+ # Serializers}.
16
+ attr_reader :serializers
17
+
18
+ # The configured {Compressor Compressor}.
19
+ # @return [Compressor] the configured compressor.
20
+ attr_accessor :compressor
21
+
22
+ # The configured root_builder, which should return a {Nodes::Node Node}
23
+ # when called.
24
+ # @return [Proc<Nodes::Node>] the configured root_builder.
25
+ attr_accessor :root_builder
26
+
27
+ # The configured tmp_path, which will be used for throwaway files.
28
+ # @return [String] the configured tmp_path.
29
+ attr_accessor :tmp_path
30
+
31
+ # Returns a new properties instance.
32
+ def initialize
33
+ reset
34
+ end
35
+
36
+ # Resets back to default properties.
37
+ def reset
38
+ reset_readers
39
+ reset_serializers
40
+
41
+ @compressor = Rambling::Trie::Compressor.new
42
+ @root_builder = -> { Rambling::Trie::Nodes::Raw.new }
43
+ @tmp_path = '/tmp'
44
+ end
45
+
46
+ private
47
+
48
+ attr_writer :readers, :serializers
49
+
50
+ def reset_readers
51
+ plain_text_reader = Rambling::Trie::Readers::PlainText.new
52
+
53
+ @readers = Rambling::Trie::Configuration::ProviderCollection.new(
54
+ :reader,
55
+ txt: plain_text_reader,
56
+ )
57
+ end
58
+
59
+ def reset_serializers
60
+ marshal_serializer = Rambling::Trie::Serializers::Marshal.new
61
+ yaml_serializer = Rambling::Trie::Serializers::Yaml.new
62
+ # zip_serializer = Rambling::Trie::Serializers::Zip.new self
63
+
64
+ @serializers = Rambling::Trie::Configuration::ProviderCollection.new(
65
+ :serializer,
66
+ marshal: marshal_serializer,
67
+ yml: yaml_serializer,
68
+ yaml: yaml_serializer,
69
+ # zip: zip_serializer,
70
+ )
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,122 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rambling
4
+ module Trie
5
+ module Configuration
6
+ # Collection of configurable providers.
7
+ class ProviderCollection
8
+ # The name of this provider collection.
9
+ # @return [String] the name of this provider collection.
10
+ attr_reader :name
11
+
12
+ # @overload default
13
+ # The default provider. Used when a provider cannot be resolved in
14
+ # {ProviderCollection#resolve #resolve}.
15
+ # @overload default=(provider)
16
+ # Sets the default provider. Needs to be one of the configured
17
+ # providers.
18
+ # @param [Object] provider the provider to use as default.
19
+ # @raise [ArgumentError] when the given provider is not in the
20
+ # provider collection.
21
+ # @note If no providers have been configured, `nil` will be assigned.
22
+ # @return [Object, nil] the default provider to use when a provider
23
+ # cannot be resolved in {ProviderCollection#resolve #resolve}.
24
+ attr_reader :default
25
+
26
+ # Creates a new provider collection.
27
+ # @param [String] name the name for this provider collection.
28
+ # @param [Hash] providers the configured providers.
29
+ # @param [Object] default the configured default provider.
30
+ def initialize name, providers = {}, default = nil
31
+ @name = name
32
+ @configured_providers = providers
33
+ @configured_default = default || providers.values.first
34
+
35
+ reset
36
+ end
37
+
38
+ # Adds a new provider to the provider collection.
39
+ # @param [Symbol] extension the extension that the provider will
40
+ # correspond to.
41
+ # @param [provider] provider the provider to add to the provider
42
+ # collection.
43
+ def add extension, provider
44
+ providers[extension] = provider
45
+ end
46
+
47
+ def default= provider
48
+ unless contains? provider
49
+ raise ArgumentError,
50
+ "default #{name} should be part of configured #{name}s"
51
+ end
52
+
53
+ @default = provider
54
+ end
55
+
56
+ # List of configured providers.
57
+ # @return [Hash] the mapping of extensions to their corresponding
58
+ # providers.
59
+ def providers
60
+ @providers ||= {}
61
+ end
62
+
63
+ # Resolves the provider from a filepath based on the file extension.
64
+ # @param [String] filepath the filepath to resolve into a provider.
65
+ # @return [Object] the provider corresponding to the file extension in
66
+ # this provider collection. {#default} if not found.
67
+ def resolve filepath
68
+ providers[file_format filepath] || default
69
+ end
70
+
71
+ # Resets the provider collection to the initial values.
72
+ def reset
73
+ providers.clear
74
+ configured_providers.each { |k, v| self[k] = v }
75
+ self.default = configured_default
76
+ end
77
+
78
+ # Get provider corresponding to a given format.
79
+ # @return [Array<Symbol>] the provider corresponding to that format.
80
+ # @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-5B-5D
81
+ # Hash#keys
82
+ def formats
83
+ providers.keys
84
+ end
85
+
86
+ # Get provider corresponding to a given format.
87
+ # @param [Symbol] format the format to search for in the collection.
88
+ # @return [Object] the provider corresponding to that format.
89
+ # @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-5B-5D
90
+ # Hash#[]
91
+ def [] format
92
+ providers[format]
93
+ end
94
+
95
+ private
96
+
97
+ attr_reader :configured_providers, :configured_default
98
+
99
+ def []= format, instance
100
+ providers[format] = instance
101
+ end
102
+
103
+ def values
104
+ providers.values
105
+ end
106
+
107
+ def file_format filepath
108
+ format = File.extname filepath
109
+ format.slice! 0
110
+ format.to_sym
111
+ end
112
+
113
+ def contains? provider
114
+ provider.nil? ||
115
+ (providers.any? && provider_instances.include?(provider))
116
+ end
117
+
118
+ alias_method :provider_instances, :values
119
+ end
120
+ end
121
+ end
122
+ end
@@ -0,0 +1,226 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rambling
4
+ module Trie
5
+ # Wrapper on top of trie data structure.
6
+ class Container
7
+ include ::Enumerable
8
+
9
+ # The root node of this trie.
10
+ # @return [Nodes::Node] the root node of this trie.
11
+ attr_reader :root
12
+
13
+ # Creates a new trie.
14
+ # @param [Nodes::Node] root the root node for the trie
15
+ # @param [Compressor] compressor responsible for compressing the trie
16
+ # @yield [Container] the trie just created.
17
+ def initialize root, compressor
18
+ @root = root
19
+ @compressor = compressor
20
+
21
+ yield self if block_given?
22
+ end
23
+
24
+ # Adds a word to the trie.
25
+ # @param [String] word the word to add the branch from.
26
+ # @return [Nodes::Node] the just added branch's root node.
27
+ # @raise [InvalidOperation] if the trie is already compressed.
28
+ # @see Nodes::Raw#add
29
+ # @see Nodes::Compressed#add
30
+ def add word
31
+ root.add char_symbols word
32
+ end
33
+
34
+ # Adds all provided words to the trie.
35
+ # @param [Array<String>] words the words to add the branch from.
36
+ # @return [Array<Nodes::Node>] the collection of nodes added.
37
+ # @raise [InvalidOperation] if the trie is already compressed.
38
+ # @see Nodes::Raw#add
39
+ # @see Nodes::Compressed#add
40
+ def concat words
41
+ words.map { |word| add word }
42
+ end
43
+
44
+ # Compresses the existing trie using redundant node elimination. Marks
45
+ # the trie as compressed. Does nothing if the trie has already been
46
+ # compressed.
47
+ # @return [Container] self
48
+ # @note This method replaces the root {Nodes::Raw Raw} node with a
49
+ # {Nodes::Compressed Compressed} version of it.
50
+ def compress!
51
+ self.root = compress_root unless root.compressed?
52
+ self
53
+ end
54
+
55
+ # Compresses the existing trie using redundant node elimination. Returns
56
+ # a new trie with the compressed root.
57
+ # @return [Container] A new {Container} with the {Nodes::Compressed
58
+ # Compressed} root node or self if the trie has already been
59
+ # compressed.
60
+ def compress
61
+ return self if root.compressed?
62
+ Rambling::Trie::Container.new compress_root, compressor
63
+ end
64
+
65
+ # Checks if a path for a word or partial word exists in the trie.
66
+ # @param [String] word the word or partial word to look for in the trie.
67
+ # @return [Boolean] `true` if the word or partial word is found, `false`
68
+ # otherwise.
69
+ # @see Nodes::Raw#partial_word?
70
+ # @see Nodes::Compressed#partial_word?
71
+ def partial_word? word = ''
72
+ root.partial_word? word.chars
73
+ end
74
+
75
+ # Checks if a whole word exists in the trie.
76
+ # @param [String] word the word to look for in the trie.
77
+ # @return [Boolean] `true` only if the word is found and the last
78
+ # character corresponds to a terminal node, `false` otherwise.
79
+ # @see Nodes::Raw#word?
80
+ # @see Nodes::Compressed#word?
81
+ def word? word = ''
82
+ root.word? word.chars
83
+ end
84
+
85
+ # Returns all words that start with the specified characters.
86
+ # @param [String] word the word to look for in the trie.
87
+ # @return [Array<String>] all the words contained in the trie that start
88
+ # with the specified characters.
89
+ # @see Nodes::Raw#scan
90
+ # @see Nodes::Compressed#scan
91
+ def scan word = ''
92
+ root.scan(word.chars).to_a
93
+ end
94
+
95
+ # Returns all words within a string that match a word contained in the
96
+ # trie.
97
+ # @param [String] phrase the string to look for matching words in.
98
+ # @return [Enumerator<String>] all the words in the given string that
99
+ # match a word in the trie.
100
+ # @yield [String] each word found in phrase.
101
+ # @see Nodes::Node#words_within
102
+ def words_within phrase
103
+ words_within_root(phrase).to_a
104
+ end
105
+
106
+ # Checks if there are any valid words in a given string.
107
+ # @param [String] phrase the string to look for matching words in.
108
+ # @return [Boolean] `true` if any word within phrase is contained in the
109
+ # trie, `false` otherwise.
110
+ # @see Container#words_within
111
+ def words_within? phrase
112
+ words_within_root(phrase).any?
113
+ end
114
+
115
+ # Compares two trie data structures.
116
+ # @param [Container] other the trie to compare against.
117
+ # @return [Boolean] `true` if the tries are equal, `false` otherwise.
118
+ def == other
119
+ root == other.root
120
+ end
121
+
122
+ # Iterates over the words contained in the trie.
123
+ # @yield [String] the words contained in this trie node.
124
+ def each
125
+ return enum_for :each unless block_given?
126
+
127
+ root.each do |word|
128
+ yield word
129
+ end
130
+ end
131
+
132
+ # @return [String] a string representation of the container.
133
+ def inspect
134
+ "#<#{self.class.name} root: #{root.inspect}>"
135
+ end
136
+
137
+ # Get {Nodes::Node Node} corresponding to a given letter.
138
+ # @param [Symbol] letter the letter to search for in the root node.
139
+ # @return [Nodes::Node] the node corresponding to that letter.
140
+ # @see Nodes::Node#[]
141
+ def [] letter
142
+ root[letter]
143
+ end
144
+
145
+ # Root node's child nodes.
146
+ # @return [Array<Nodes::Node>] the array of children nodes contained in
147
+ # the root node.
148
+ # @see Nodes::Node#children
149
+ def children
150
+ root.children
151
+ end
152
+
153
+ # Root node's children tree.
154
+ # @return [Array<Nodes::Node>] the array of children nodes contained in
155
+ # the root node.
156
+ # @see Nodes::Node#children_tree
157
+ def children_tree
158
+ root.children_tree
159
+ end
160
+
161
+ # Indicates if the root {Nodes::Node Node} can be
162
+ # compressed or not.
163
+ # @return [Boolean] `true` for non-{Nodes::Node#terminal? terminal}
164
+ # nodes with one child, `false` otherwise.
165
+ def compressed?
166
+ root.compressed?
167
+ end
168
+
169
+ # Array of words contained in the root {Nodes::Node Node}.
170
+ # @return [Array<String>] all words contained in this trie.
171
+ # @see https://ruby-doc.org/core-2.5.0/Enumerable.html#method-i-to_a
172
+ # Enumerable#to_a
173
+ def to_a
174
+ root.to_a
175
+ end
176
+
177
+ # Check if a letter is part of the root {Nodes::Node}'s children tree.
178
+ # @param [Symbol] letter the letter to search for in the root node.
179
+ # @return [Boolean] whether the letter is contained or not.
180
+ # @see Nodes::Node#key?
181
+ def key? letter
182
+ root.key? letter
183
+ end
184
+
185
+ # Size of the Root {Nodes::Node Node}'s children tree.
186
+ # @return [Integer] the number of letters in the root node.
187
+ def size
188
+ root.size
189
+ end
190
+
191
+ alias_method :include?, :word?
192
+ alias_method :match?, :partial_word?
193
+ alias_method :words, :scan
194
+ alias_method :<<, :add
195
+ alias_method :has_key?, :key?
196
+ alias_method :has_letter?, :key?
197
+
198
+ private
199
+
200
+ attr_reader :compressor
201
+ attr_writer :root
202
+
203
+ def words_within_root phrase
204
+ return enum_for :words_within_root, phrase unless block_given?
205
+
206
+ chars = phrase.chars
207
+ 0.upto(chars.length - 1).each do |starting_index|
208
+ new_phrase = chars.slice starting_index..(chars.length - 1)
209
+ root.match_prefix new_phrase do |word|
210
+ yield word
211
+ end
212
+ end
213
+ end
214
+
215
+ def compress_root
216
+ compressor.compress root
217
+ end
218
+
219
+ def char_symbols word
220
+ symbols = []
221
+ word.reverse.each_char { |c| symbols << c.to_sym }
222
+ symbols
223
+ end
224
+ end
225
+ end
226
+ end