rambling-trie 0.9.3 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +2 -0
- data/LICENSE +1 -1
- data/README.md +133 -26
- data/Rakefile +1 -2
- data/lib/rambling/trie.rb +53 -9
- data/lib/rambling/trie/comparable.rb +16 -0
- data/lib/rambling/trie/compressable.rb +14 -0
- data/lib/rambling/trie/compressed_node.rb +38 -14
- data/lib/rambling/trie/compressor.rb +14 -10
- data/lib/rambling/trie/configuration.rb +11 -0
- data/lib/rambling/trie/configuration/properties.rb +66 -0
- data/lib/rambling/trie/configuration/provider_collection.rb +101 -0
- data/lib/rambling/trie/container.rb +57 -17
- data/lib/rambling/trie/enumerable.rb +1 -1
- data/lib/rambling/trie/forwardable.rb +9 -4
- data/lib/rambling/trie/inspectable.rb +37 -0
- data/lib/rambling/trie/invalid_operation.rb +3 -2
- data/lib/rambling/trie/missing_node.rb +2 -1
- data/lib/rambling/trie/node.rb +40 -30
- data/lib/rambling/trie/raw_node.rb +29 -13
- data/lib/rambling/trie/readers.rb +11 -0
- data/lib/rambling/trie/readers/plain_text.rb +26 -0
- data/lib/rambling/trie/serializers.rb +11 -0
- data/lib/rambling/trie/serializers/file.rb +25 -0
- data/lib/rambling/trie/serializers/marshal.rb +38 -0
- data/lib/rambling/trie/serializers/yaml.rb +39 -0
- data/lib/rambling/trie/serializers/zip.rb +67 -0
- data/lib/rambling/trie/stringifyable.rb +20 -0
- data/lib/rambling/trie/version.rb +1 -1
- data/rambling-trie.gemspec +2 -2
- data/spec/integration/rambling/trie_spec.rb +45 -49
- data/spec/lib/rambling/trie/comparable_spec.rb +104 -0
- data/spec/lib/rambling/trie/compressed_node_spec.rb +44 -0
- data/spec/lib/rambling/trie/configuration/properties_spec.rb +49 -0
- data/spec/lib/rambling/trie/configuration/provider_collection_spec.rb +165 -0
- data/spec/lib/rambling/trie/container_spec.rb +127 -38
- data/spec/lib/rambling/trie/{inspector_spec.rb → inspectable_spec.rb} +7 -5
- data/spec/lib/rambling/trie/raw_node_spec.rb +22 -41
- data/spec/lib/rambling/trie/readers/plain_text_spec.rb +14 -0
- data/spec/lib/rambling/trie/serializers/file_spec.rb +11 -0
- data/spec/lib/rambling/trie/serializers/marshal_spec.rb +14 -0
- data/spec/lib/rambling/trie/serializers/yaml_spec.rb +14 -0
- data/spec/lib/rambling/trie/serializers/zip_spec.rb +30 -0
- data/spec/lib/rambling/trie/stringifyable_spec.rb +82 -0
- data/spec/lib/rambling/trie_spec.rb +120 -7
- data/spec/spec_helper.rb +7 -1
- data/spec/support/config.rb +5 -0
- data/spec/support/shared_examples/a_compressable_trie.rb +26 -0
- data/spec/support/shared_examples/a_serializable_trie.rb +26 -0
- data/spec/support/shared_examples/a_serializer.rb +29 -0
- data/spec/support/shared_examples/a_trie_data_structure.rb +29 -0
- data/spec/tmp/.gitkeep +0 -0
- metadata +51 -24
- data/lib/rambling/trie/compression.rb +0 -13
- data/lib/rambling/trie/inspector.rb +0 -11
- data/lib/rambling/trie/plain_text_reader.rb +0 -23
- data/lib/rambling/trie/tasks/gem.rb +0 -17
- data/lib/rambling/trie/tasks/helpers/path.rb +0 -17
- data/lib/rambling/trie/tasks/helpers/performance_report.rb +0 -17
- data/lib/rambling/trie/tasks/helpers/time.rb +0 -7
- data/lib/rambling/trie/tasks/performance.rb +0 -15
- data/lib/rambling/trie/tasks/performance/all.rb +0 -17
- data/lib/rambling/trie/tasks/performance/benchmark.rb +0 -201
- data/lib/rambling/trie/tasks/performance/directory.rb +0 -11
- data/lib/rambling/trie/tasks/performance/flamegraph.rb +0 -119
- data/lib/rambling/trie/tasks/performance/profile/call_tree.rb +0 -147
- data/lib/rambling/trie/tasks/performance/profile/memory.rb +0 -143
- data/spec/lib/rambling/trie/plain_text_reader_spec.rb +0 -18
@@ -1,10 +1,10 @@
|
|
1
1
|
module Rambling
|
2
2
|
module Trie
|
3
|
-
# Responsible for the compression process of a
|
3
|
+
# Responsible for the compression process of a trie data structure.
|
4
4
|
class Compressor
|
5
|
-
# Compresses a
|
6
|
-
# @param [RawNode] node the node to compress
|
7
|
-
# @return [CompressedNode] node the compressed version of the node
|
5
|
+
# Compresses a {Node Node} from a trie data structure.
|
6
|
+
# @param [RawNode] node the node to compress.
|
7
|
+
# @return [CompressedNode] node the compressed version of the node.
|
8
8
|
def compress node
|
9
9
|
if node.compressable?
|
10
10
|
merge_with_child_and_compress node
|
@@ -18,18 +18,15 @@ module Rambling
|
|
18
18
|
def merge_with_child_and_compress node
|
19
19
|
child = node.children.first
|
20
20
|
|
21
|
-
|
22
|
-
new_node
|
23
|
-
new_node.terminal! if child.terminal?
|
21
|
+
letter = node.letter.to_s << child.letter.to_s
|
22
|
+
new_node = new_compressed_node node, letter, child.terminal?
|
24
23
|
new_node.children_tree = child.children_tree
|
25
24
|
|
26
25
|
compress new_node
|
27
26
|
end
|
28
27
|
|
29
28
|
def copy_node_and_compress_children node
|
30
|
-
new_node =
|
31
|
-
new_node.letter = node.letter
|
32
|
-
new_node.terminal! if node.terminal?
|
29
|
+
new_node = new_compressed_node node, node.letter, node.terminal?
|
33
30
|
|
34
31
|
node.children.each do |child|
|
35
32
|
compressed_child = compress child
|
@@ -40,6 +37,13 @@ module Rambling
|
|
40
37
|
|
41
38
|
new_node
|
42
39
|
end
|
40
|
+
|
41
|
+
def new_compressed_node node, letter, terminal
|
42
|
+
new_node = Rambling::Trie::CompressedNode.new node.parent
|
43
|
+
new_node.letter = letter
|
44
|
+
new_node.terminal! if terminal
|
45
|
+
new_node
|
46
|
+
end
|
43
47
|
end
|
44
48
|
end
|
45
49
|
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
module Rambling
|
2
|
+
module Trie
|
3
|
+
module Configuration
|
4
|
+
# Provides configurable properties for Rambling::Trie.
|
5
|
+
class Properties
|
6
|
+
# The configured {Readers Readers}.
|
7
|
+
# @return [ProviderCollection] the mapping of configured {Readers
|
8
|
+
# Readers}.
|
9
|
+
attr_reader :readers
|
10
|
+
|
11
|
+
# The configured {Serializers Serializers}.
|
12
|
+
# @return [ProviderCollection] the mapping of configured {Serializers
|
13
|
+
# Serializers}.
|
14
|
+
attr_reader :serializers
|
15
|
+
|
16
|
+
# The configured {Compressor Compressor}.
|
17
|
+
# @return [Compressor] the configured compressor.
|
18
|
+
attr_accessor :compressor
|
19
|
+
|
20
|
+
# The configured root_builder, which should return a {Node Node} when
|
21
|
+
# called.
|
22
|
+
# @return [Proc<Node>] the configured root_builder.
|
23
|
+
attr_accessor :root_builder
|
24
|
+
|
25
|
+
attr_accessor :tmp_path
|
26
|
+
|
27
|
+
# Returns a new properties instance.
|
28
|
+
def initialize
|
29
|
+
reset
|
30
|
+
end
|
31
|
+
|
32
|
+
# Resets back to default properties.
|
33
|
+
def reset
|
34
|
+
reset_readers
|
35
|
+
reset_serializers
|
36
|
+
|
37
|
+
self.compressor = Rambling::Trie::Compressor.new
|
38
|
+
self.root_builder = lambda { Rambling::Trie::RawNode.new }
|
39
|
+
self.tmp_path = '/tmp'
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
attr_writer :readers, :serializers
|
45
|
+
|
46
|
+
def reset_readers
|
47
|
+
plain_text_reader = Rambling::Trie::Readers::PlainText.new
|
48
|
+
|
49
|
+
self.readers = Rambling::Trie::Configuration::ProviderCollection.new 'reader', txt: plain_text_reader
|
50
|
+
end
|
51
|
+
|
52
|
+
def reset_serializers
|
53
|
+
marshal_serializer = Rambling::Trie::Serializers::Marshal.new
|
54
|
+
yaml_serializer = Rambling::Trie::Serializers::Yaml.new
|
55
|
+
zip_serializer = Rambling::Trie::Serializers::Zip.new self
|
56
|
+
|
57
|
+
self.serializers = Rambling::Trie::Configuration::ProviderCollection.new 'serializer',
|
58
|
+
marshal: marshal_serializer,
|
59
|
+
yml: yaml_serializer,
|
60
|
+
yaml: yaml_serializer,
|
61
|
+
zip: zip_serializer
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,101 @@
|
|
1
|
+
module Rambling
|
2
|
+
module Trie
|
3
|
+
module Configuration
|
4
|
+
# Collection of configurable providers.
|
5
|
+
class ProviderCollection
|
6
|
+
extend Rambling::Trie::Forwardable
|
7
|
+
|
8
|
+
# The name of this provider collection.
|
9
|
+
# @return [String] the name of this provider collection.
|
10
|
+
attr_reader :name
|
11
|
+
|
12
|
+
# @overload default
|
13
|
+
# The default provider. Used when a provider cannot be resolved in
|
14
|
+
# {ProviderCollection#resolve #resolve}.
|
15
|
+
# @overload default=(provider)
|
16
|
+
# Sets the default provider. Needs to be one of the configured
|
17
|
+
# providers.
|
18
|
+
# @param [Object] provider the provider to use as default.
|
19
|
+
# @raise [ArgumentError] when the given provider is not in the
|
20
|
+
# provider collection.
|
21
|
+
# @note If no providers have been configured, `nil` will be assigned.
|
22
|
+
# @return [Object, nil] the default provider to use when a provider
|
23
|
+
# cannot be resolved in {ProviderCollection#resolve #resolve}.
|
24
|
+
attr_reader :default
|
25
|
+
|
26
|
+
delegate [
|
27
|
+
:[],
|
28
|
+
:[]=,
|
29
|
+
:keys,
|
30
|
+
:values,
|
31
|
+
] => :providers
|
32
|
+
|
33
|
+
# Creates a new provider collection.
|
34
|
+
# @param [String] name the name for this provider collection.
|
35
|
+
# @param [Hash] providers the configured providers.
|
36
|
+
# @param [Object] default the configured default provider.
|
37
|
+
def initialize name, providers = {}, default = nil
|
38
|
+
@name = name
|
39
|
+
@configured_providers = providers
|
40
|
+
@configured_default = default || providers.values.first
|
41
|
+
|
42
|
+
reset
|
43
|
+
end
|
44
|
+
|
45
|
+
# Adds a new provider to the provider collection.
|
46
|
+
# @param [Symbol] extension the extension that the provider will
|
47
|
+
# correspond to.
|
48
|
+
# @param [provider] provider the provider to add to the provider
|
49
|
+
# collection.
|
50
|
+
def add extension, provider
|
51
|
+
providers[extension] = provider
|
52
|
+
end
|
53
|
+
|
54
|
+
def default= provider
|
55
|
+
if provider_not_in_list? provider
|
56
|
+
raise ArgumentError, "default #{name} should be part of configured #{name}s"
|
57
|
+
end
|
58
|
+
|
59
|
+
@default = provider
|
60
|
+
end
|
61
|
+
|
62
|
+
# List of configured providers.
|
63
|
+
# @return [Hash] the mapping of extensions to their corresponding
|
64
|
+
# providers.
|
65
|
+
def providers
|
66
|
+
@providers ||= {}
|
67
|
+
end
|
68
|
+
|
69
|
+
# Resolves the provider from a filepath based on the file extension.
|
70
|
+
# @param [String] filepath the filepath to resolve into a provider.
|
71
|
+
# @return [Object] the provider corresponding to the file extension in
|
72
|
+
# this provider collection. {#default} if not found.
|
73
|
+
def resolve filepath
|
74
|
+
providers[format filepath] || default
|
75
|
+
end
|
76
|
+
|
77
|
+
# Resets the provider collection to the initial values.
|
78
|
+
def reset
|
79
|
+
providers.clear
|
80
|
+
configured_providers.each { |k, v| providers[k] = v }
|
81
|
+
self.default = configured_default
|
82
|
+
end
|
83
|
+
|
84
|
+
private
|
85
|
+
|
86
|
+
attr_reader :configured_providers, :configured_default
|
87
|
+
|
88
|
+
def format filepath
|
89
|
+
format = File.extname filepath
|
90
|
+
format.slice! 0
|
91
|
+
format.to_sym
|
92
|
+
end
|
93
|
+
|
94
|
+
def provider_not_in_list? provider
|
95
|
+
(provider && providers.values.empty?) ||
|
96
|
+
(providers.values.any? && !providers.values.include?(provider))
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module Rambling
|
2
2
|
module Trie
|
3
|
-
# Wrapper on top of
|
3
|
+
# Wrapper on top of trie data structure.
|
4
4
|
class Container
|
5
5
|
extend Rambling::Trie::Forwardable
|
6
6
|
include ::Enumerable
|
@@ -25,32 +25,32 @@ module Rambling
|
|
25
25
|
# @return [Node] the root node of this trie.
|
26
26
|
attr_reader :root
|
27
27
|
|
28
|
-
# Creates a new
|
28
|
+
# Creates a new trie.
|
29
29
|
# @param [Node] root the root node for the trie
|
30
30
|
# @param [Compressor] compressor responsible for compressing the trie
|
31
31
|
# @yield [Container] the trie just created.
|
32
|
-
def initialize root
|
33
|
-
@root = root
|
34
|
-
@compressor = compressor
|
32
|
+
def initialize root, compressor
|
33
|
+
@root = root
|
34
|
+
@compressor = compressor
|
35
35
|
|
36
36
|
yield self if block_given?
|
37
37
|
end
|
38
38
|
|
39
|
-
# Adds a
|
39
|
+
# Adds a word to the trie, without altering the passed word.
|
40
40
|
# @param [String] word the word to add the branch from.
|
41
41
|
# @return [Node] the just added branch's root node.
|
42
42
|
# @raise [InvalidOperation] if the trie is already compressed.
|
43
43
|
# @see RawNode#add
|
44
44
|
# @see CompressedNode#add
|
45
|
-
# @note Avoids
|
45
|
+
# @note Avoids altering the contents of the word variable.
|
46
46
|
def add word
|
47
47
|
root.add word.clone
|
48
48
|
end
|
49
49
|
|
50
|
-
# Compresses the existing tree using redundant node elimination.
|
50
|
+
# Compresses the existing tree using redundant node elimination. Marks
|
51
51
|
# the trie as compressed.
|
52
52
|
# @return [Container] self
|
53
|
-
# @note
|
53
|
+
# @note Only compresses tries that have not already been compressed.
|
54
54
|
def compress!
|
55
55
|
self.root = compressor.compress root unless root.compressed?
|
56
56
|
self
|
@@ -58,25 +58,61 @@ module Rambling
|
|
58
58
|
|
59
59
|
# Checks if a path for a word or partial word exists in the trie.
|
60
60
|
# @param [String] word the word or partial word to look for in the trie.
|
61
|
-
# @return [Boolean] `true` if the word or partial word is found, `false`
|
61
|
+
# @return [Boolean] `true` if the word or partial word is found, `false`
|
62
|
+
# otherwise.
|
63
|
+
# @see RawNode#partial_word?
|
64
|
+
# @see CompressedNode#partial_word?
|
62
65
|
def partial_word? word = ''
|
63
66
|
root.partial_word? word.chars
|
64
67
|
end
|
65
68
|
|
66
69
|
# Checks if a whole word exists in the trie.
|
67
70
|
# @param [String] word the word to look for in the trie.
|
68
|
-
# @return [Boolean] `true` only if the word is found and the last
|
71
|
+
# @return [Boolean] `true` only if the word is found and the last
|
72
|
+
# character corresponds to a terminal node, `false` otherwise.
|
73
|
+
# @see RawNode#word?
|
74
|
+
# @see CompressedNode#word?
|
69
75
|
def word? word = ''
|
70
76
|
root.word? word.chars
|
71
77
|
end
|
72
78
|
|
73
79
|
# Returns all words that start with the specified characters.
|
74
80
|
# @param [String] word the word to look for in the trie.
|
75
|
-
# @return [Array] all the words contained in the trie that start
|
81
|
+
# @return [Array<String>] all the words contained in the trie that start
|
82
|
+
# with the specified characters.
|
83
|
+
# @see RawNode#scan
|
84
|
+
# @see CompressedNode#scan
|
76
85
|
def scan word = ''
|
77
86
|
root.scan(word.chars).to_a
|
78
87
|
end
|
79
88
|
|
89
|
+
# Returns all words within a string that match a word contained in the
|
90
|
+
# trie.
|
91
|
+
# @param [String] phrase the string to look for matching words in.
|
92
|
+
# @return [Enumerator<String>] all the words in the given string that
|
93
|
+
# match a word in the trie.
|
94
|
+
# @yield [String] each word found in phrase.
|
95
|
+
# @see Node#words_within
|
96
|
+
def words_within phrase
|
97
|
+
words_within_root(phrase).to_a
|
98
|
+
end
|
99
|
+
|
100
|
+
# Checks if there are any valid words in a given string.
|
101
|
+
# @param [String] phrase the string to look for matching words in.
|
102
|
+
# @return [Boolean] `true` if any word within phrase is contained in the
|
103
|
+
# trie, `false` otherwise.
|
104
|
+
# @see Container#words_within
|
105
|
+
def words_within? phrase
|
106
|
+
words_within_root(phrase).any?
|
107
|
+
end
|
108
|
+
|
109
|
+
# Compares two trie data structures.
|
110
|
+
# @param [Container] other the trie to compare against.
|
111
|
+
# @return [Boolean] `true` if the tries are equal, `false` otherwise.
|
112
|
+
def == other
|
113
|
+
root == other.root
|
114
|
+
end
|
115
|
+
|
80
116
|
alias_method :include?, :word?
|
81
117
|
alias_method :match?, :partial_word?
|
82
118
|
alias_method :words, :scan
|
@@ -87,12 +123,16 @@ module Rambling
|
|
87
123
|
attr_reader :compressor
|
88
124
|
attr_writer :root
|
89
125
|
|
90
|
-
def
|
91
|
-
|
92
|
-
end
|
126
|
+
def words_within_root phrase
|
127
|
+
return enum_for :words_within_root, phrase unless block_given?
|
93
128
|
|
94
|
-
|
95
|
-
|
129
|
+
chars = phrase.chars
|
130
|
+
0.upto(chars.length - 1).each do |starting_index|
|
131
|
+
new_phrase = chars.slice starting_index..(chars.length - 1)
|
132
|
+
root.match_prefix new_phrase do |word|
|
133
|
+
yield word
|
134
|
+
end
|
135
|
+
end
|
96
136
|
end
|
97
137
|
end
|
98
138
|
end
|
@@ -1,12 +1,17 @@
|
|
1
1
|
module Rambling
|
2
2
|
module Trie
|
3
|
-
# Provides delegation behavior
|
3
|
+
# Provides delegation behavior.
|
4
4
|
module Forwardable
|
5
|
-
|
6
|
-
|
5
|
+
# Custom delegation behavior due to Ruby 2.4 delegation performance
|
6
|
+
# degradation. See {https://bugs.ruby-lang.org/issues/13111 Bug #13111}.
|
7
|
+
# @param [Hash] methods_to_target a Hash consisting of the methods to be
|
8
|
+
# delegated and the target to delegate those methods to.
|
9
|
+
# @return [Hash] the `methods_to_target` parameter.
|
10
|
+
def delegate methods_to_target
|
11
|
+
methods_to_target.each do |methods, target|
|
7
12
|
methods.each do |method|
|
8
13
|
define_method method do |*args|
|
9
|
-
send(
|
14
|
+
send(target).send method, *args
|
10
15
|
end
|
11
16
|
end
|
12
17
|
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Rambling
|
2
|
+
module Trie
|
3
|
+
# Provides pretty printing behavior for the trie data structure.
|
4
|
+
module Inspectable
|
5
|
+
# @return [String] a string representation of the current node.
|
6
|
+
def inspect
|
7
|
+
"#<#{class_name} #{attributes}>"
|
8
|
+
end
|
9
|
+
|
10
|
+
private
|
11
|
+
|
12
|
+
def class_name
|
13
|
+
self.class.name
|
14
|
+
end
|
15
|
+
|
16
|
+
def attributes
|
17
|
+
[
|
18
|
+
letter_inspect,
|
19
|
+
terminal_inspect,
|
20
|
+
children_inspect,
|
21
|
+
].join ', '
|
22
|
+
end
|
23
|
+
|
24
|
+
def letter_inspect
|
25
|
+
"letter: #{letter.inspect}"
|
26
|
+
end
|
27
|
+
|
28
|
+
def terminal_inspect
|
29
|
+
"terminal: #{terminal.inspect}"
|
30
|
+
end
|
31
|
+
|
32
|
+
def children_inspect
|
33
|
+
"children: #{children_tree.keys.inspect}"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -1,8 +1,9 @@
|
|
1
1
|
module Rambling
|
2
2
|
module Trie
|
3
|
-
# Raised when trying to execute an invalid operation on a
|
3
|
+
# Raised when trying to execute an invalid operation on a trie data
|
4
|
+
# structure.
|
4
5
|
class InvalidOperation < Exception
|
5
|
-
# Creates a new InvalidOperation exception.
|
6
|
+
# Creates a new {InvalidOperation InvalidOperation} exception.
|
6
7
|
# @param [String, nil] message the exception message.
|
7
8
|
def initialize message = nil
|
8
9
|
super
|