rambling-trie 0.9.3 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +2 -0
- data/LICENSE +1 -1
- data/README.md +133 -26
- data/Rakefile +1 -2
- data/lib/rambling/trie.rb +53 -9
- data/lib/rambling/trie/comparable.rb +16 -0
- data/lib/rambling/trie/compressable.rb +14 -0
- data/lib/rambling/trie/compressed_node.rb +38 -14
- data/lib/rambling/trie/compressor.rb +14 -10
- data/lib/rambling/trie/configuration.rb +11 -0
- data/lib/rambling/trie/configuration/properties.rb +66 -0
- data/lib/rambling/trie/configuration/provider_collection.rb +101 -0
- data/lib/rambling/trie/container.rb +57 -17
- data/lib/rambling/trie/enumerable.rb +1 -1
- data/lib/rambling/trie/forwardable.rb +9 -4
- data/lib/rambling/trie/inspectable.rb +37 -0
- data/lib/rambling/trie/invalid_operation.rb +3 -2
- data/lib/rambling/trie/missing_node.rb +2 -1
- data/lib/rambling/trie/node.rb +40 -30
- data/lib/rambling/trie/raw_node.rb +29 -13
- data/lib/rambling/trie/readers.rb +11 -0
- data/lib/rambling/trie/readers/plain_text.rb +26 -0
- data/lib/rambling/trie/serializers.rb +11 -0
- data/lib/rambling/trie/serializers/file.rb +25 -0
- data/lib/rambling/trie/serializers/marshal.rb +38 -0
- data/lib/rambling/trie/serializers/yaml.rb +39 -0
- data/lib/rambling/trie/serializers/zip.rb +67 -0
- data/lib/rambling/trie/stringifyable.rb +20 -0
- data/lib/rambling/trie/version.rb +1 -1
- data/rambling-trie.gemspec +2 -2
- data/spec/integration/rambling/trie_spec.rb +45 -49
- data/spec/lib/rambling/trie/comparable_spec.rb +104 -0
- data/spec/lib/rambling/trie/compressed_node_spec.rb +44 -0
- data/spec/lib/rambling/trie/configuration/properties_spec.rb +49 -0
- data/spec/lib/rambling/trie/configuration/provider_collection_spec.rb +165 -0
- data/spec/lib/rambling/trie/container_spec.rb +127 -38
- data/spec/lib/rambling/trie/{inspector_spec.rb → inspectable_spec.rb} +7 -5
- data/spec/lib/rambling/trie/raw_node_spec.rb +22 -41
- data/spec/lib/rambling/trie/readers/plain_text_spec.rb +14 -0
- data/spec/lib/rambling/trie/serializers/file_spec.rb +11 -0
- data/spec/lib/rambling/trie/serializers/marshal_spec.rb +14 -0
- data/spec/lib/rambling/trie/serializers/yaml_spec.rb +14 -0
- data/spec/lib/rambling/trie/serializers/zip_spec.rb +30 -0
- data/spec/lib/rambling/trie/stringifyable_spec.rb +82 -0
- data/spec/lib/rambling/trie_spec.rb +120 -7
- data/spec/spec_helper.rb +7 -1
- data/spec/support/config.rb +5 -0
- data/spec/support/shared_examples/a_compressable_trie.rb +26 -0
- data/spec/support/shared_examples/a_serializable_trie.rb +26 -0
- data/spec/support/shared_examples/a_serializer.rb +29 -0
- data/spec/support/shared_examples/a_trie_data_structure.rb +29 -0
- data/spec/tmp/.gitkeep +0 -0
- metadata +51 -24
- data/lib/rambling/trie/compression.rb +0 -13
- data/lib/rambling/trie/inspector.rb +0 -11
- data/lib/rambling/trie/plain_text_reader.rb +0 -23
- data/lib/rambling/trie/tasks/gem.rb +0 -17
- data/lib/rambling/trie/tasks/helpers/path.rb +0 -17
- data/lib/rambling/trie/tasks/helpers/performance_report.rb +0 -17
- data/lib/rambling/trie/tasks/helpers/time.rb +0 -7
- data/lib/rambling/trie/tasks/performance.rb +0 -15
- data/lib/rambling/trie/tasks/performance/all.rb +0 -17
- data/lib/rambling/trie/tasks/performance/benchmark.rb +0 -201
- data/lib/rambling/trie/tasks/performance/directory.rb +0 -11
- data/lib/rambling/trie/tasks/performance/flamegraph.rb +0 -119
- data/lib/rambling/trie/tasks/performance/profile/call_tree.rb +0 -147
- data/lib/rambling/trie/tasks/performance/profile/memory.rb +0 -143
- data/spec/lib/rambling/trie/plain_text_reader_spec.rb +0 -18
@@ -1,10 +1,10 @@
|
|
1
1
|
module Rambling
|
2
2
|
module Trie
|
3
|
-
# Responsible for the compression process of a
|
3
|
+
# Responsible for the compression process of a trie data structure.
|
4
4
|
class Compressor
|
5
|
-
# Compresses a
|
6
|
-
# @param [RawNode] node the node to compress
|
7
|
-
# @return [CompressedNode] node the compressed version of the node
|
5
|
+
# Compresses a {Node Node} from a trie data structure.
|
6
|
+
# @param [RawNode] node the node to compress.
|
7
|
+
# @return [CompressedNode] node the compressed version of the node.
|
8
8
|
def compress node
|
9
9
|
if node.compressable?
|
10
10
|
merge_with_child_and_compress node
|
@@ -18,18 +18,15 @@ module Rambling
|
|
18
18
|
def merge_with_child_and_compress node
|
19
19
|
child = node.children.first
|
20
20
|
|
21
|
-
|
22
|
-
new_node
|
23
|
-
new_node.terminal! if child.terminal?
|
21
|
+
letter = node.letter.to_s << child.letter.to_s
|
22
|
+
new_node = new_compressed_node node, letter, child.terminal?
|
24
23
|
new_node.children_tree = child.children_tree
|
25
24
|
|
26
25
|
compress new_node
|
27
26
|
end
|
28
27
|
|
29
28
|
def copy_node_and_compress_children node
|
30
|
-
new_node =
|
31
|
-
new_node.letter = node.letter
|
32
|
-
new_node.terminal! if node.terminal?
|
29
|
+
new_node = new_compressed_node node, node.letter, node.terminal?
|
33
30
|
|
34
31
|
node.children.each do |child|
|
35
32
|
compressed_child = compress child
|
@@ -40,6 +37,13 @@ module Rambling
|
|
40
37
|
|
41
38
|
new_node
|
42
39
|
end
|
40
|
+
|
41
|
+
def new_compressed_node node, letter, terminal
|
42
|
+
new_node = Rambling::Trie::CompressedNode.new node.parent
|
43
|
+
new_node.letter = letter
|
44
|
+
new_node.terminal! if terminal
|
45
|
+
new_node
|
46
|
+
end
|
43
47
|
end
|
44
48
|
end
|
45
49
|
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
module Rambling
|
2
|
+
module Trie
|
3
|
+
module Configuration
|
4
|
+
# Provides configurable properties for Rambling::Trie.
|
5
|
+
class Properties
|
6
|
+
# The configured {Readers Readers}.
|
7
|
+
# @return [ProviderCollection] the mapping of configured {Readers
|
8
|
+
# Readers}.
|
9
|
+
attr_reader :readers
|
10
|
+
|
11
|
+
# The configured {Serializers Serializers}.
|
12
|
+
# @return [ProviderCollection] the mapping of configured {Serializers
|
13
|
+
# Serializers}.
|
14
|
+
attr_reader :serializers
|
15
|
+
|
16
|
+
# The configured {Compressor Compressor}.
|
17
|
+
# @return [Compressor] the configured compressor.
|
18
|
+
attr_accessor :compressor
|
19
|
+
|
20
|
+
# The configured root_builder, which should return a {Node Node} when
|
21
|
+
# called.
|
22
|
+
# @return [Proc<Node>] the configured root_builder.
|
23
|
+
attr_accessor :root_builder
|
24
|
+
|
25
|
+
attr_accessor :tmp_path
|
26
|
+
|
27
|
+
# Returns a new properties instance.
|
28
|
+
def initialize
|
29
|
+
reset
|
30
|
+
end
|
31
|
+
|
32
|
+
# Resets back to default properties.
|
33
|
+
def reset
|
34
|
+
reset_readers
|
35
|
+
reset_serializers
|
36
|
+
|
37
|
+
self.compressor = Rambling::Trie::Compressor.new
|
38
|
+
self.root_builder = lambda { Rambling::Trie::RawNode.new }
|
39
|
+
self.tmp_path = '/tmp'
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
attr_writer :readers, :serializers
|
45
|
+
|
46
|
+
def reset_readers
|
47
|
+
plain_text_reader = Rambling::Trie::Readers::PlainText.new
|
48
|
+
|
49
|
+
self.readers = Rambling::Trie::Configuration::ProviderCollection.new 'reader', txt: plain_text_reader
|
50
|
+
end
|
51
|
+
|
52
|
+
def reset_serializers
|
53
|
+
marshal_serializer = Rambling::Trie::Serializers::Marshal.new
|
54
|
+
yaml_serializer = Rambling::Trie::Serializers::Yaml.new
|
55
|
+
zip_serializer = Rambling::Trie::Serializers::Zip.new self
|
56
|
+
|
57
|
+
self.serializers = Rambling::Trie::Configuration::ProviderCollection.new 'serializer',
|
58
|
+
marshal: marshal_serializer,
|
59
|
+
yml: yaml_serializer,
|
60
|
+
yaml: yaml_serializer,
|
61
|
+
zip: zip_serializer
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,101 @@
|
|
1
|
+
module Rambling
|
2
|
+
module Trie
|
3
|
+
module Configuration
|
4
|
+
# Collection of configurable providers.
|
5
|
+
class ProviderCollection
|
6
|
+
extend Rambling::Trie::Forwardable
|
7
|
+
|
8
|
+
# The name of this provider collection.
|
9
|
+
# @return [String] the name of this provider collection.
|
10
|
+
attr_reader :name
|
11
|
+
|
12
|
+
# @overload default
|
13
|
+
# The default provider. Used when a provider cannot be resolved in
|
14
|
+
# {ProviderCollection#resolve #resolve}.
|
15
|
+
# @overload default=(provider)
|
16
|
+
# Sets the default provider. Needs to be one of the configured
|
17
|
+
# providers.
|
18
|
+
# @param [Object] provider the provider to use as default.
|
19
|
+
# @raise [ArgumentError] when the given provider is not in the
|
20
|
+
# provider collection.
|
21
|
+
# @note If no providers have been configured, `nil` will be assigned.
|
22
|
+
# @return [Object, nil] the default provider to use when a provider
|
23
|
+
# cannot be resolved in {ProviderCollection#resolve #resolve}.
|
24
|
+
attr_reader :default
|
25
|
+
|
26
|
+
delegate [
|
27
|
+
:[],
|
28
|
+
:[]=,
|
29
|
+
:keys,
|
30
|
+
:values,
|
31
|
+
] => :providers
|
32
|
+
|
33
|
+
# Creates a new provider collection.
|
34
|
+
# @param [String] name the name for this provider collection.
|
35
|
+
# @param [Hash] providers the configured providers.
|
36
|
+
# @param [Object] default the configured default provider.
|
37
|
+
def initialize name, providers = {}, default = nil
|
38
|
+
@name = name
|
39
|
+
@configured_providers = providers
|
40
|
+
@configured_default = default || providers.values.first
|
41
|
+
|
42
|
+
reset
|
43
|
+
end
|
44
|
+
|
45
|
+
# Adds a new provider to the provider collection.
|
46
|
+
# @param [Symbol] extension the extension that the provider will
|
47
|
+
# correspond to.
|
48
|
+
# @param [provider] provider the provider to add to the provider
|
49
|
+
# collection.
|
50
|
+
def add extension, provider
|
51
|
+
providers[extension] = provider
|
52
|
+
end
|
53
|
+
|
54
|
+
def default= provider
|
55
|
+
if provider_not_in_list? provider
|
56
|
+
raise ArgumentError, "default #{name} should be part of configured #{name}s"
|
57
|
+
end
|
58
|
+
|
59
|
+
@default = provider
|
60
|
+
end
|
61
|
+
|
62
|
+
# List of configured providers.
|
63
|
+
# @return [Hash] the mapping of extensions to their corresponding
|
64
|
+
# providers.
|
65
|
+
def providers
|
66
|
+
@providers ||= {}
|
67
|
+
end
|
68
|
+
|
69
|
+
# Resolves the provider from a filepath based on the file extension.
|
70
|
+
# @param [String] filepath the filepath to resolve into a provider.
|
71
|
+
# @return [Object] the provider corresponding to the file extension in
|
72
|
+
# this provider collection. {#default} if not found.
|
73
|
+
def resolve filepath
|
74
|
+
providers[format filepath] || default
|
75
|
+
end
|
76
|
+
|
77
|
+
# Resets the provider collection to the initial values.
|
78
|
+
def reset
|
79
|
+
providers.clear
|
80
|
+
configured_providers.each { |k, v| providers[k] = v }
|
81
|
+
self.default = configured_default
|
82
|
+
end
|
83
|
+
|
84
|
+
private
|
85
|
+
|
86
|
+
attr_reader :configured_providers, :configured_default
|
87
|
+
|
88
|
+
def format filepath
|
89
|
+
format = File.extname filepath
|
90
|
+
format.slice! 0
|
91
|
+
format.to_sym
|
92
|
+
end
|
93
|
+
|
94
|
+
def provider_not_in_list? provider
|
95
|
+
(provider && providers.values.empty?) ||
|
96
|
+
(providers.values.any? && !providers.values.include?(provider))
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module Rambling
|
2
2
|
module Trie
|
3
|
-
# Wrapper on top of
|
3
|
+
# Wrapper on top of trie data structure.
|
4
4
|
class Container
|
5
5
|
extend Rambling::Trie::Forwardable
|
6
6
|
include ::Enumerable
|
@@ -25,32 +25,32 @@ module Rambling
|
|
25
25
|
# @return [Node] the root node of this trie.
|
26
26
|
attr_reader :root
|
27
27
|
|
28
|
-
# Creates a new
|
28
|
+
# Creates a new trie.
|
29
29
|
# @param [Node] root the root node for the trie
|
30
30
|
# @param [Compressor] compressor responsible for compressing the trie
|
31
31
|
# @yield [Container] the trie just created.
|
32
|
-
def initialize root
|
33
|
-
@root = root
|
34
|
-
@compressor = compressor
|
32
|
+
def initialize root, compressor
|
33
|
+
@root = root
|
34
|
+
@compressor = compressor
|
35
35
|
|
36
36
|
yield self if block_given?
|
37
37
|
end
|
38
38
|
|
39
|
-
# Adds a
|
39
|
+
# Adds a word to the trie, without altering the passed word.
|
40
40
|
# @param [String] word the word to add the branch from.
|
41
41
|
# @return [Node] the just added branch's root node.
|
42
42
|
# @raise [InvalidOperation] if the trie is already compressed.
|
43
43
|
# @see RawNode#add
|
44
44
|
# @see CompressedNode#add
|
45
|
-
# @note Avoids
|
45
|
+
# @note Avoids altering the contents of the word variable.
|
46
46
|
def add word
|
47
47
|
root.add word.clone
|
48
48
|
end
|
49
49
|
|
50
|
-
# Compresses the existing tree using redundant node elimination.
|
50
|
+
# Compresses the existing tree using redundant node elimination. Marks
|
51
51
|
# the trie as compressed.
|
52
52
|
# @return [Container] self
|
53
|
-
# @note
|
53
|
+
# @note Only compresses tries that have not already been compressed.
|
54
54
|
def compress!
|
55
55
|
self.root = compressor.compress root unless root.compressed?
|
56
56
|
self
|
@@ -58,25 +58,61 @@ module Rambling
|
|
58
58
|
|
59
59
|
# Checks if a path for a word or partial word exists in the trie.
|
60
60
|
# @param [String] word the word or partial word to look for in the trie.
|
61
|
-
# @return [Boolean] `true` if the word or partial word is found, `false`
|
61
|
+
# @return [Boolean] `true` if the word or partial word is found, `false`
|
62
|
+
# otherwise.
|
63
|
+
# @see RawNode#partial_word?
|
64
|
+
# @see CompressedNode#partial_word?
|
62
65
|
def partial_word? word = ''
|
63
66
|
root.partial_word? word.chars
|
64
67
|
end
|
65
68
|
|
66
69
|
# Checks if a whole word exists in the trie.
|
67
70
|
# @param [String] word the word to look for in the trie.
|
68
|
-
# @return [Boolean] `true` only if the word is found and the last
|
71
|
+
# @return [Boolean] `true` only if the word is found and the last
|
72
|
+
# character corresponds to a terminal node, `false` otherwise.
|
73
|
+
# @see RawNode#word?
|
74
|
+
# @see CompressedNode#word?
|
69
75
|
def word? word = ''
|
70
76
|
root.word? word.chars
|
71
77
|
end
|
72
78
|
|
73
79
|
# Returns all words that start with the specified characters.
|
74
80
|
# @param [String] word the word to look for in the trie.
|
75
|
-
# @return [Array] all the words contained in the trie that start
|
81
|
+
# @return [Array<String>] all the words contained in the trie that start
|
82
|
+
# with the specified characters.
|
83
|
+
# @see RawNode#scan
|
84
|
+
# @see CompressedNode#scan
|
76
85
|
def scan word = ''
|
77
86
|
root.scan(word.chars).to_a
|
78
87
|
end
|
79
88
|
|
89
|
+
# Returns all words within a string that match a word contained in the
|
90
|
+
# trie.
|
91
|
+
# @param [String] phrase the string to look for matching words in.
|
92
|
+
# @return [Enumerator<String>] all the words in the given string that
|
93
|
+
# match a word in the trie.
|
94
|
+
# @yield [String] each word found in phrase.
|
95
|
+
# @see Node#words_within
|
96
|
+
def words_within phrase
|
97
|
+
words_within_root(phrase).to_a
|
98
|
+
end
|
99
|
+
|
100
|
+
# Checks if there are any valid words in a given string.
|
101
|
+
# @param [String] phrase the string to look for matching words in.
|
102
|
+
# @return [Boolean] `true` if any word within phrase is contained in the
|
103
|
+
# trie, `false` otherwise.
|
104
|
+
# @see Container#words_within
|
105
|
+
def words_within? phrase
|
106
|
+
words_within_root(phrase).any?
|
107
|
+
end
|
108
|
+
|
109
|
+
# Compares two trie data structures.
|
110
|
+
# @param [Container] other the trie to compare against.
|
111
|
+
# @return [Boolean] `true` if the tries are equal, `false` otherwise.
|
112
|
+
def == other
|
113
|
+
root == other.root
|
114
|
+
end
|
115
|
+
|
80
116
|
alias_method :include?, :word?
|
81
117
|
alias_method :match?, :partial_word?
|
82
118
|
alias_method :words, :scan
|
@@ -87,12 +123,16 @@ module Rambling
|
|
87
123
|
attr_reader :compressor
|
88
124
|
attr_writer :root
|
89
125
|
|
90
|
-
def
|
91
|
-
|
92
|
-
end
|
126
|
+
def words_within_root phrase
|
127
|
+
return enum_for :words_within_root, phrase unless block_given?
|
93
128
|
|
94
|
-
|
95
|
-
|
129
|
+
chars = phrase.chars
|
130
|
+
0.upto(chars.length - 1).each do |starting_index|
|
131
|
+
new_phrase = chars.slice starting_index..(chars.length - 1)
|
132
|
+
root.match_prefix new_phrase do |word|
|
133
|
+
yield word
|
134
|
+
end
|
135
|
+
end
|
96
136
|
end
|
97
137
|
end
|
98
138
|
end
|
@@ -1,12 +1,17 @@
|
|
1
1
|
module Rambling
|
2
2
|
module Trie
|
3
|
-
# Provides delegation behavior
|
3
|
+
# Provides delegation behavior.
|
4
4
|
module Forwardable
|
5
|
-
|
6
|
-
|
5
|
+
# Custom delegation behavior due to Ruby 2.4 delegation performance
|
6
|
+
# degradation. See {https://bugs.ruby-lang.org/issues/13111 Bug #13111}.
|
7
|
+
# @param [Hash] methods_to_target a Hash consisting of the methods to be
|
8
|
+
# delegated and the target to delegate those methods to.
|
9
|
+
# @return [Hash] the `methods_to_target` parameter.
|
10
|
+
def delegate methods_to_target
|
11
|
+
methods_to_target.each do |methods, target|
|
7
12
|
methods.each do |method|
|
8
13
|
define_method method do |*args|
|
9
|
-
send(
|
14
|
+
send(target).send method, *args
|
10
15
|
end
|
11
16
|
end
|
12
17
|
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Rambling
|
2
|
+
module Trie
|
3
|
+
# Provides pretty printing behavior for the trie data structure.
|
4
|
+
module Inspectable
|
5
|
+
# @return [String] a string representation of the current node.
|
6
|
+
def inspect
|
7
|
+
"#<#{class_name} #{attributes}>"
|
8
|
+
end
|
9
|
+
|
10
|
+
private
|
11
|
+
|
12
|
+
def class_name
|
13
|
+
self.class.name
|
14
|
+
end
|
15
|
+
|
16
|
+
def attributes
|
17
|
+
[
|
18
|
+
letter_inspect,
|
19
|
+
terminal_inspect,
|
20
|
+
children_inspect,
|
21
|
+
].join ', '
|
22
|
+
end
|
23
|
+
|
24
|
+
def letter_inspect
|
25
|
+
"letter: #{letter.inspect}"
|
26
|
+
end
|
27
|
+
|
28
|
+
def terminal_inspect
|
29
|
+
"terminal: #{terminal.inspect}"
|
30
|
+
end
|
31
|
+
|
32
|
+
def children_inspect
|
33
|
+
"children: #{children_tree.keys.inspect}"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -1,8 +1,9 @@
|
|
1
1
|
module Rambling
|
2
2
|
module Trie
|
3
|
-
# Raised when trying to execute an invalid operation on a
|
3
|
+
# Raised when trying to execute an invalid operation on a trie data
|
4
|
+
# structure.
|
4
5
|
class InvalidOperation < Exception
|
5
|
-
# Creates a new InvalidOperation exception.
|
6
|
+
# Creates a new {InvalidOperation InvalidOperation} exception.
|
6
7
|
# @param [String, nil] message the exception message.
|
7
8
|
def initialize message = nil
|
8
9
|
super
|