rambling-trie-opal 2.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (65) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +26 -0
  3. data/Guardfile +10 -0
  4. data/LICENSE +26 -0
  5. data/README.md +301 -0
  6. data/Rakefile +15 -0
  7. data/lib/rambling-trie.rb +3 -0
  8. data/lib/rambling/trie.rb +119 -0
  9. data/lib/rambling/trie/comparable.rb +19 -0
  10. data/lib/rambling/trie/compressible.rb +16 -0
  11. data/lib/rambling/trie/compressor.rb +64 -0
  12. data/lib/rambling/trie/configuration.rb +16 -0
  13. data/lib/rambling/trie/configuration/properties.rb +75 -0
  14. data/lib/rambling/trie/configuration/provider_collection.rb +122 -0
  15. data/lib/rambling/trie/container.rb +226 -0
  16. data/lib/rambling/trie/enumerable.rb +29 -0
  17. data/lib/rambling/trie/inspectable.rb +39 -0
  18. data/lib/rambling/trie/invalid_operation.rb +15 -0
  19. data/lib/rambling/trie/nodes.rb +18 -0
  20. data/lib/rambling/trie/nodes/compressed.rb +98 -0
  21. data/lib/rambling/trie/nodes/missing.rb +12 -0
  22. data/lib/rambling/trie/nodes/node.rb +183 -0
  23. data/lib/rambling/trie/nodes/raw.rb +82 -0
  24. data/lib/rambling/trie/readers.rb +15 -0
  25. data/lib/rambling/trie/readers/plain_text.rb +18 -0
  26. data/lib/rambling/trie/serializers.rb +18 -0
  27. data/lib/rambling/trie/serializers/file.rb +27 -0
  28. data/lib/rambling/trie/serializers/marshal.rb +48 -0
  29. data/lib/rambling/trie/serializers/yaml.rb +55 -0
  30. data/lib/rambling/trie/serializers/zip.rb +74 -0
  31. data/lib/rambling/trie/stringifyable.rb +26 -0
  32. data/lib/rambling/trie/version.rb +8 -0
  33. data/rambling-trie-opal.gemspec +36 -0
  34. data/spec/assets/test_words.en_US.txt +23 -0
  35. data/spec/assets/test_words.es_DO.txt +24 -0
  36. data/spec/integration/rambling/trie_spec.rb +87 -0
  37. data/spec/lib/rambling/trie/comparable_spec.rb +97 -0
  38. data/spec/lib/rambling/trie/compressor_spec.rb +108 -0
  39. data/spec/lib/rambling/trie/configuration/properties_spec.rb +57 -0
  40. data/spec/lib/rambling/trie/configuration/provider_collection_spec.rb +149 -0
  41. data/spec/lib/rambling/trie/container_spec.rb +591 -0
  42. data/spec/lib/rambling/trie/enumerable_spec.rb +42 -0
  43. data/spec/lib/rambling/trie/inspectable_spec.rb +56 -0
  44. data/spec/lib/rambling/trie/nodes/compressed_spec.rb +37 -0
  45. data/spec/lib/rambling/trie/nodes/node_spec.rb +9 -0
  46. data/spec/lib/rambling/trie/nodes/raw_spec.rb +179 -0
  47. data/spec/lib/rambling/trie/readers/plain_text_spec.rb +16 -0
  48. data/spec/lib/rambling/trie/serializers/file_spec.rb +13 -0
  49. data/spec/lib/rambling/trie/serializers/marshal_spec.rb +12 -0
  50. data/spec/lib/rambling/trie/serializers/yaml_spec.rb +12 -0
  51. data/spec/lib/rambling/trie/serializers/zip_spec.rb +28 -0
  52. data/spec/lib/rambling/trie/stringifyable_spec.rb +85 -0
  53. data/spec/lib/rambling/trie_spec.rb +182 -0
  54. data/spec/spec_helper.rb +37 -0
  55. data/spec/support/config.rb +15 -0
  56. data/spec/support/helpers/add_word.rb +20 -0
  57. data/spec/support/helpers/one_line_heredoc.rb +11 -0
  58. data/spec/support/shared_examples/a_compressible_trie.rb +40 -0
  59. data/spec/support/shared_examples/a_serializable_trie.rb +30 -0
  60. data/spec/support/shared_examples/a_serializer.rb +37 -0
  61. data/spec/support/shared_examples/a_trie_data_structure.rb +31 -0
  62. data/spec/support/shared_examples/a_trie_node.rb +127 -0
  63. data/spec/support/shared_examples/a_trie_node_implementation.rb +152 -0
  64. data/spec/tmp/.gitkeep +0 -0
  65. metadata +179 -0
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rambling
4
+ module Trie
5
+ # Provides the comparable behavior for the trie data structure.
6
+ module Comparable
7
+ # Compares two nodes.
8
+ # @param [Nodes::Node] other the node to compare against.
9
+ # @return [Boolean] `true` if the nodes' {Nodes::Node#letter #letter} and
10
+ # {Nodes::Node#children_tree #children_tree} are equal, `false`
11
+ # otherwise.
12
+ def == other
13
+ letter == other.letter &&
14
+ terminal? == other.terminal? &&
15
+ children_tree == other.children_tree
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rambling
4
+ module Trie
5
+ # Provides the compressible behavior for the trie data structure.
6
+ module Compressible
7
+ # Indicates if the current {Rambling::Trie::Nodes::Node Node} can be
8
+ # compressed or not.
9
+ # @return [Boolean] `true` for non-{Nodes::Node#terminal? terminal} nodes
10
+ # with one child, `false` otherwise.
11
+ def compressible?
12
+ !(root? || terminal?) && children_tree.size == 1
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rambling
4
+ module Trie
5
+ # Responsible for the compression process of a trie data structure.
6
+ class Compressor
7
+ # Compresses a {Nodes::Node Node} from a trie data structure.
8
+ # @param [Nodes::Raw] node the node to compress.
9
+ # @return [Nodes::Compressed] node the compressed version of the node.
10
+ def compress node
11
+ if node.compressible?
12
+ compress_child_and_merge node
13
+ else
14
+ compress_children_and_copy node
15
+ end
16
+ end
17
+
18
+ private
19
+
20
+ def compress_child_and_merge node
21
+ merge node, compress(node.first_child)
22
+ end
23
+
24
+ def merge node, other
25
+ letter = node.letter.to_s << other.letter.to_s
26
+
27
+ new_compressed_node(
28
+ letter.to_sym,
29
+ node.parent,
30
+ other.children_tree,
31
+ other.terminal?,
32
+ )
33
+ end
34
+
35
+ def compress_children_and_copy node
36
+ new_compressed_node(
37
+ node.letter,
38
+ node.parent,
39
+ compress_children(node.children_tree),
40
+ node.terminal?,
41
+ )
42
+ end
43
+
44
+ def compress_children tree
45
+ new_tree = {}
46
+
47
+ tree.each do |letter, child|
48
+ compressed_child = compress child
49
+ new_tree[letter] = compressed_child
50
+ end
51
+
52
+ new_tree
53
+ end
54
+
55
+ def new_compressed_node letter, parent, tree, terminal
56
+ node = Rambling::Trie::Nodes::Compressed.new letter, parent, tree
57
+ node.terminal! if terminal
58
+
59
+ tree.each_value { |child| child.parent = node }
60
+ node
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ # %w(properties provider_collection).each do |file|
4
+ # require File.join('rambling', 'trie', 'configuration', file)
5
+ # end
6
+
7
+ require 'rambling/trie/configuration/properties'
8
+ require 'rambling/trie/configuration/provider_collection'
9
+
10
+ module Rambling
11
+ module Trie
12
+ # Namespace for configuration classes.
13
+ module Configuration
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rambling
4
+ module Trie
5
+ module Configuration
6
+ # Provides configurable properties for Rambling::Trie.
7
+ class Properties
8
+ # The configured {Readers Readers}.
9
+ # @return [ProviderCollection] the mapping of configured {Readers
10
+ # Readers}.
11
+ attr_reader :readers
12
+
13
+ # The configured {Serializers Serializers}.
14
+ # @return [ProviderCollection] the mapping of configured {Serializers
15
+ # Serializers}.
16
+ attr_reader :serializers
17
+
18
+ # The configured {Compressor Compressor}.
19
+ # @return [Compressor] the configured compressor.
20
+ attr_accessor :compressor
21
+
22
+ # The configured root_builder, which should return a {Nodes::Node Node}
23
+ # when called.
24
+ # @return [Proc<Nodes::Node>] the configured root_builder.
25
+ attr_accessor :root_builder
26
+
27
+ # The configured tmp_path, which will be used for throwaway files.
28
+ # @return [String] the configured tmp_path.
29
+ attr_accessor :tmp_path
30
+
31
+ # Returns a new properties instance.
32
+ def initialize
33
+ reset
34
+ end
35
+
36
+ # Resets back to default properties.
37
+ def reset
38
+ reset_readers
39
+ reset_serializers
40
+
41
+ @compressor = Rambling::Trie::Compressor.new
42
+ @root_builder = -> { Rambling::Trie::Nodes::Raw.new }
43
+ @tmp_path = '/tmp'
44
+ end
45
+
46
+ private
47
+
48
+ attr_writer :readers, :serializers
49
+
50
+ def reset_readers
51
+ plain_text_reader = Rambling::Trie::Readers::PlainText.new
52
+
53
+ @readers = Rambling::Trie::Configuration::ProviderCollection.new(
54
+ :reader,
55
+ txt: plain_text_reader,
56
+ )
57
+ end
58
+
59
+ def reset_serializers
60
+ marshal_serializer = Rambling::Trie::Serializers::Marshal.new
61
+ yaml_serializer = Rambling::Trie::Serializers::Yaml.new
62
+ # zip_serializer = Rambling::Trie::Serializers::Zip.new self
63
+
64
+ @serializers = Rambling::Trie::Configuration::ProviderCollection.new(
65
+ :serializer,
66
+ marshal: marshal_serializer,
67
+ yml: yaml_serializer,
68
+ yaml: yaml_serializer,
69
+ # zip: zip_serializer,
70
+ )
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,122 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rambling
4
+ module Trie
5
+ module Configuration
6
+ # Collection of configurable providers.
7
+ class ProviderCollection
8
+ # The name of this provider collection.
9
+ # @return [String] the name of this provider collection.
10
+ attr_reader :name
11
+
12
+ # @overload default
13
+ # The default provider. Used when a provider cannot be resolved in
14
+ # {ProviderCollection#resolve #resolve}.
15
+ # @overload default=(provider)
16
+ # Sets the default provider. Needs to be one of the configured
17
+ # providers.
18
+ # @param [Object] provider the provider to use as default.
19
+ # @raise [ArgumentError] when the given provider is not in the
20
+ # provider collection.
21
+ # @note If no providers have been configured, `nil` will be assigned.
22
+ # @return [Object, nil] the default provider to use when a provider
23
+ # cannot be resolved in {ProviderCollection#resolve #resolve}.
24
+ attr_reader :default
25
+
26
+ # Creates a new provider collection.
27
+ # @param [String] name the name for this provider collection.
28
+ # @param [Hash] providers the configured providers.
29
+ # @param [Object] default the configured default provider.
30
+ def initialize name, providers = {}, default = nil
31
+ @name = name
32
+ @configured_providers = providers
33
+ @configured_default = default || providers.values.first
34
+
35
+ reset
36
+ end
37
+
38
+ # Adds a new provider to the provider collection.
39
+ # @param [Symbol] extension the extension that the provider will
40
+ # correspond to.
41
+ # @param [provider] provider the provider to add to the provider
42
+ # collection.
43
+ def add extension, provider
44
+ providers[extension] = provider
45
+ end
46
+
47
+ def default= provider
48
+ unless contains? provider
49
+ raise ArgumentError,
50
+ "default #{name} should be part of configured #{name}s"
51
+ end
52
+
53
+ @default = provider
54
+ end
55
+
56
+ # List of configured providers.
57
+ # @return [Hash] the mapping of extensions to their corresponding
58
+ # providers.
59
+ def providers
60
+ @providers ||= {}
61
+ end
62
+
63
+ # Resolves the provider from a filepath based on the file extension.
64
+ # @param [String] filepath the filepath to resolve into a provider.
65
+ # @return [Object] the provider corresponding to the file extension in
66
+ # this provider collection. {#default} if not found.
67
+ def resolve filepath
68
+ providers[file_format filepath] || default
69
+ end
70
+
71
+ # Resets the provider collection to the initial values.
72
+ def reset
73
+ providers.clear
74
+ configured_providers.each { |k, v| self[k] = v }
75
+ self.default = configured_default
76
+ end
77
+
78
+ # Get provider corresponding to a given format.
79
+ # @return [Array<Symbol>] the provider corresponding to that format.
80
+ # @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-5B-5D
81
+ # Hash#keys
82
+ def formats
83
+ providers.keys
84
+ end
85
+
86
+ # Get provider corresponding to a given format.
87
+ # @param [Symbol] format the format to search for in the collection.
88
+ # @return [Object] the provider corresponding to that format.
89
+ # @see https://ruby-doc.org/core-2.5.0/Hash.html#method-i-5B-5D
90
+ # Hash#[]
91
+ def [] format
92
+ providers[format]
93
+ end
94
+
95
+ private
96
+
97
+ attr_reader :configured_providers, :configured_default
98
+
99
+ def []= format, instance
100
+ providers[format] = instance
101
+ end
102
+
103
+ def values
104
+ providers.values
105
+ end
106
+
107
+ def file_format filepath
108
+ format = File.extname filepath
109
+ format.slice! 0
110
+ format.to_sym
111
+ end
112
+
113
+ def contains? provider
114
+ provider.nil? ||
115
+ (providers.any? && provider_instances.include?(provider))
116
+ end
117
+
118
+ alias_method :provider_instances, :values
119
+ end
120
+ end
121
+ end
122
+ end
@@ -0,0 +1,226 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rambling
4
+ module Trie
5
+ # Wrapper on top of trie data structure.
6
+ class Container
7
+ include ::Enumerable
8
+
9
+ # The root node of this trie.
10
+ # @return [Nodes::Node] the root node of this trie.
11
+ attr_reader :root
12
+
13
+ # Creates a new trie.
14
+ # @param [Nodes::Node] root the root node for the trie
15
+ # @param [Compressor] compressor responsible for compressing the trie
16
+ # @yield [Container] the trie just created.
17
+ def initialize root, compressor
18
+ @root = root
19
+ @compressor = compressor
20
+
21
+ yield self if block_given?
22
+ end
23
+
24
+ # Adds a word to the trie.
25
+ # @param [String] word the word to add the branch from.
26
+ # @return [Nodes::Node] the just added branch's root node.
27
+ # @raise [InvalidOperation] if the trie is already compressed.
28
+ # @see Nodes::Raw#add
29
+ # @see Nodes::Compressed#add
30
+ def add word
31
+ root.add char_symbols word
32
+ end
33
+
34
+ # Adds all provided words to the trie.
35
+ # @param [Array<String>] words the words to add the branch from.
36
+ # @return [Array<Nodes::Node>] the collection of nodes added.
37
+ # @raise [InvalidOperation] if the trie is already compressed.
38
+ # @see Nodes::Raw#add
39
+ # @see Nodes::Compressed#add
40
+ def concat words
41
+ words.map { |word| add word }
42
+ end
43
+
44
+ # Compresses the existing trie using redundant node elimination. Marks
45
+ # the trie as compressed. Does nothing if the trie has already been
46
+ # compressed.
47
+ # @return [Container] self
48
+ # @note This method replaces the root {Nodes::Raw Raw} node with a
49
+ # {Nodes::Compressed Compressed} version of it.
50
+ def compress!
51
+ self.root = compress_root unless root.compressed?
52
+ self
53
+ end
54
+
55
+ # Compresses the existing trie using redundant node elimination. Returns
56
+ # a new trie with the compressed root.
57
+ # @return [Container] A new {Container} with the {Nodes::Compressed
58
+ # Compressed} root node or self if the trie has already been
59
+ # compressed.
60
+ def compress
61
+ return self if root.compressed?
62
+ Rambling::Trie::Container.new compress_root, compressor
63
+ end
64
+
65
+ # Checks if a path for a word or partial word exists in the trie.
66
+ # @param [String] word the word or partial word to look for in the trie.
67
+ # @return [Boolean] `true` if the word or partial word is found, `false`
68
+ # otherwise.
69
+ # @see Nodes::Raw#partial_word?
70
+ # @see Nodes::Compressed#partial_word?
71
+ def partial_word? word = ''
72
+ root.partial_word? word.chars
73
+ end
74
+
75
+ # Checks if a whole word exists in the trie.
76
+ # @param [String] word the word to look for in the trie.
77
+ # @return [Boolean] `true` only if the word is found and the last
78
+ # character corresponds to a terminal node, `false` otherwise.
79
+ # @see Nodes::Raw#word?
80
+ # @see Nodes::Compressed#word?
81
+ def word? word = ''
82
+ root.word? word.chars
83
+ end
84
+
85
+ # Returns all words that start with the specified characters.
86
+ # @param [String] word the word to look for in the trie.
87
+ # @return [Array<String>] all the words contained in the trie that start
88
+ # with the specified characters.
89
+ # @see Nodes::Raw#scan
90
+ # @see Nodes::Compressed#scan
91
+ def scan word = ''
92
+ root.scan(word.chars).to_a
93
+ end
94
+
95
+ # Returns all words within a string that match a word contained in the
96
+ # trie.
97
+ # @param [String] phrase the string to look for matching words in.
98
+ # @return [Enumerator<String>] all the words in the given string that
99
+ # match a word in the trie.
100
+ # @yield [String] each word found in phrase.
101
+ # @see Nodes::Node#words_within
102
+ def words_within phrase
103
+ words_within_root(phrase).to_a
104
+ end
105
+
106
+ # Checks if there are any valid words in a given string.
107
+ # @param [String] phrase the string to look for matching words in.
108
+ # @return [Boolean] `true` if any word within phrase is contained in the
109
+ # trie, `false` otherwise.
110
+ # @see Container#words_within
111
+ def words_within? phrase
112
+ words_within_root(phrase).any?
113
+ end
114
+
115
+ # Compares two trie data structures.
116
+ # @param [Container] other the trie to compare against.
117
+ # @return [Boolean] `true` if the tries are equal, `false` otherwise.
118
+ def == other
119
+ root == other.root
120
+ end
121
+
122
+ # Iterates over the words contained in the trie.
123
+ # @yield [String] the words contained in this trie node.
124
+ def each
125
+ return enum_for :each unless block_given?
126
+
127
+ root.each do |word|
128
+ yield word
129
+ end
130
+ end
131
+
132
+ # @return [String] a string representation of the container.
133
+ def inspect
134
+ "#<#{self.class.name} root: #{root.inspect}>"
135
+ end
136
+
137
+ # Get {Nodes::Node Node} corresponding to a given letter.
138
+ # @param [Symbol] letter the letter to search for in the root node.
139
+ # @return [Nodes::Node] the node corresponding to that letter.
140
+ # @see Nodes::Node#[]
141
+ def [] letter
142
+ root[letter]
143
+ end
144
+
145
+ # Root node's child nodes.
146
+ # @return [Array<Nodes::Node>] the array of children nodes contained in
147
+ # the root node.
148
+ # @see Nodes::Node#children
149
+ def children
150
+ root.children
151
+ end
152
+
153
+ # Root node's children tree.
154
+ # @return [Array<Nodes::Node>] the array of children nodes contained in
155
+ # the root node.
156
+ # @see Nodes::Node#children_tree
157
+ def children_tree
158
+ root.children_tree
159
+ end
160
+
161
+ # Indicates if the root {Nodes::Node Node} can be
162
+ # compressed or not.
163
+ # @return [Boolean] `true` for non-{Nodes::Node#terminal? terminal}
164
+ # nodes with one child, `false` otherwise.
165
+ def compressed?
166
+ root.compressed?
167
+ end
168
+
169
+ # Array of words contained in the root {Nodes::Node Node}.
170
+ # @return [Array<String>] all words contained in this trie.
171
+ # @see https://ruby-doc.org/core-2.5.0/Enumerable.html#method-i-to_a
172
+ # Enumerable#to_a
173
+ def to_a
174
+ root.to_a
175
+ end
176
+
177
+ # Check if a letter is part of the root {Nodes::Node}'s children tree.
178
+ # @param [Symbol] letter the letter to search for in the root node.
179
+ # @return [Boolean] whether the letter is contained or not.
180
+ # @see Nodes::Node#key?
181
+ def key? letter
182
+ root.key? letter
183
+ end
184
+
185
+ # Size of the Root {Nodes::Node Node}'s children tree.
186
+ # @return [Integer] the number of letters in the root node.
187
+ def size
188
+ root.size
189
+ end
190
+
191
+ alias_method :include?, :word?
192
+ alias_method :match?, :partial_word?
193
+ alias_method :words, :scan
194
+ alias_method :<<, :add
195
+ alias_method :has_key?, :key?
196
+ alias_method :has_letter?, :key?
197
+
198
+ private
199
+
200
+ attr_reader :compressor
201
+ attr_writer :root
202
+
203
+ def words_within_root phrase
204
+ return enum_for :words_within_root, phrase unless block_given?
205
+
206
+ chars = phrase.chars
207
+ 0.upto(chars.length - 1).each do |starting_index|
208
+ new_phrase = chars.slice starting_index..(chars.length - 1)
209
+ root.match_prefix new_phrase do |word|
210
+ yield word
211
+ end
212
+ end
213
+ end
214
+
215
+ def compress_root
216
+ compressor.compress root
217
+ end
218
+
219
+ def char_symbols word
220
+ symbols = []
221
+ word.reverse.each_char { |c| symbols << c.to_sym }
222
+ symbols
223
+ end
224
+ end
225
+ end
226
+ end