rambling-trie 0.9.3 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +2 -0
- data/LICENSE +1 -1
- data/README.md +133 -26
- data/Rakefile +1 -2
- data/lib/rambling/trie.rb +53 -9
- data/lib/rambling/trie/comparable.rb +16 -0
- data/lib/rambling/trie/compressable.rb +14 -0
- data/lib/rambling/trie/compressed_node.rb +38 -14
- data/lib/rambling/trie/compressor.rb +14 -10
- data/lib/rambling/trie/configuration.rb +11 -0
- data/lib/rambling/trie/configuration/properties.rb +66 -0
- data/lib/rambling/trie/configuration/provider_collection.rb +101 -0
- data/lib/rambling/trie/container.rb +57 -17
- data/lib/rambling/trie/enumerable.rb +1 -1
- data/lib/rambling/trie/forwardable.rb +9 -4
- data/lib/rambling/trie/inspectable.rb +37 -0
- data/lib/rambling/trie/invalid_operation.rb +3 -2
- data/lib/rambling/trie/missing_node.rb +2 -1
- data/lib/rambling/trie/node.rb +40 -30
- data/lib/rambling/trie/raw_node.rb +29 -13
- data/lib/rambling/trie/readers.rb +11 -0
- data/lib/rambling/trie/readers/plain_text.rb +26 -0
- data/lib/rambling/trie/serializers.rb +11 -0
- data/lib/rambling/trie/serializers/file.rb +25 -0
- data/lib/rambling/trie/serializers/marshal.rb +38 -0
- data/lib/rambling/trie/serializers/yaml.rb +39 -0
- data/lib/rambling/trie/serializers/zip.rb +67 -0
- data/lib/rambling/trie/stringifyable.rb +20 -0
- data/lib/rambling/trie/version.rb +1 -1
- data/rambling-trie.gemspec +2 -2
- data/spec/integration/rambling/trie_spec.rb +45 -49
- data/spec/lib/rambling/trie/comparable_spec.rb +104 -0
- data/spec/lib/rambling/trie/compressed_node_spec.rb +44 -0
- data/spec/lib/rambling/trie/configuration/properties_spec.rb +49 -0
- data/spec/lib/rambling/trie/configuration/provider_collection_spec.rb +165 -0
- data/spec/lib/rambling/trie/container_spec.rb +127 -38
- data/spec/lib/rambling/trie/{inspector_spec.rb → inspectable_spec.rb} +7 -5
- data/spec/lib/rambling/trie/raw_node_spec.rb +22 -41
- data/spec/lib/rambling/trie/readers/plain_text_spec.rb +14 -0
- data/spec/lib/rambling/trie/serializers/file_spec.rb +11 -0
- data/spec/lib/rambling/trie/serializers/marshal_spec.rb +14 -0
- data/spec/lib/rambling/trie/serializers/yaml_spec.rb +14 -0
- data/spec/lib/rambling/trie/serializers/zip_spec.rb +30 -0
- data/spec/lib/rambling/trie/stringifyable_spec.rb +82 -0
- data/spec/lib/rambling/trie_spec.rb +120 -7
- data/spec/spec_helper.rb +7 -1
- data/spec/support/config.rb +5 -0
- data/spec/support/shared_examples/a_compressable_trie.rb +26 -0
- data/spec/support/shared_examples/a_serializable_trie.rb +26 -0
- data/spec/support/shared_examples/a_serializer.rb +29 -0
- data/spec/support/shared_examples/a_trie_data_structure.rb +29 -0
- data/spec/tmp/.gitkeep +0 -0
- metadata +51 -24
- data/lib/rambling/trie/compression.rb +0 -13
- data/lib/rambling/trie/inspector.rb +0 -11
- data/lib/rambling/trie/plain_text_reader.rb +0 -23
- data/lib/rambling/trie/tasks/gem.rb +0 -17
- data/lib/rambling/trie/tasks/helpers/path.rb +0 -17
- data/lib/rambling/trie/tasks/helpers/performance_report.rb +0 -17
- data/lib/rambling/trie/tasks/helpers/time.rb +0 -7
- data/lib/rambling/trie/tasks/performance.rb +0 -15
- data/lib/rambling/trie/tasks/performance/all.rb +0 -17
- data/lib/rambling/trie/tasks/performance/benchmark.rb +0 -201
- data/lib/rambling/trie/tasks/performance/directory.rb +0 -11
- data/lib/rambling/trie/tasks/performance/flamegraph.rb +0 -119
- data/lib/rambling/trie/tasks/performance/profile/call_tree.rb +0 -147
- data/lib/rambling/trie/tasks/performance/profile/memory.rb +0 -143
- data/spec/lib/rambling/trie/plain_text_reader_spec.rb +0 -18
data/lib/rambling/trie/node.rb
CHANGED
@@ -1,11 +1,13 @@
|
|
1
1
|
module Rambling
|
2
2
|
module Trie
|
3
|
-
# A representation of a node in the
|
3
|
+
# A representation of a node in the trie data structure.
|
4
4
|
class Node
|
5
5
|
extend Rambling::Trie::Forwardable
|
6
|
-
include Rambling::Trie::
|
6
|
+
include Rambling::Trie::Compressable
|
7
7
|
include Rambling::Trie::Enumerable
|
8
|
-
include Rambling::Trie::
|
8
|
+
include Rambling::Trie::Comparable
|
9
|
+
include Rambling::Trie::Stringifyable
|
10
|
+
include Rambling::Trie::Inspectable
|
9
11
|
|
10
12
|
delegate [
|
11
13
|
:[],
|
@@ -14,64 +16,58 @@ module Rambling
|
|
14
16
|
:has_key?
|
15
17
|
] => :children_tree
|
16
18
|
|
17
|
-
#
|
18
|
-
#
|
19
|
+
# @overload letter
|
20
|
+
# Letter(s) corresponding to the current node.
|
21
|
+
# @overload letter=(letter)
|
22
|
+
# Sets the letter(s) corresponding to the current node. Ensures the
|
23
|
+
# {Node#letter #letter} in the {Node#parent #parent}'s
|
24
|
+
# {Node#children_tree #children_tree} is updated.
|
25
|
+
# @param [String, Symbol, nil] letter the new letter value.
|
26
|
+
# @return [Symbol, nil] the corresponding letter(s).
|
19
27
|
attr_reader :letter
|
20
28
|
|
21
|
-
# Children nodes.
|
22
|
-
# @return [Hash] the children_tree hash, consisting of
|
29
|
+
# Children nodes tree.
|
30
|
+
# @return [Hash] the children_tree hash, consisting of `:letter => node`.
|
23
31
|
attr_accessor :children_tree
|
24
32
|
|
25
33
|
# Parent node.
|
26
|
-
# @return [Node, nil] the parent
|
34
|
+
# @return [Node, nil] the parent of the current node.
|
27
35
|
attr_accessor :parent
|
28
36
|
|
29
|
-
# Creates a new
|
30
|
-
# @param [Node, nil] parent the parent of
|
37
|
+
# Creates a new node.
|
38
|
+
# @param [Node, nil] parent the parent of the current node.
|
31
39
|
def initialize parent = nil
|
32
40
|
self.parent = parent
|
33
41
|
self.children_tree = {}
|
34
42
|
end
|
35
43
|
|
36
|
-
#
|
37
|
-
# @return [
|
38
|
-
#
|
39
|
-
def as_word
|
40
|
-
raise InvalidOperation, 'Cannot represent branch as a word' if letter && !terminal?
|
41
|
-
to_s
|
42
|
-
end
|
43
|
-
|
44
|
-
# Children nodes of the current node.
|
45
|
-
# @return [Array] the array of children nodes contained in the current node.
|
44
|
+
# Children nodes.
|
45
|
+
# @return [Array<Node>] the array of children nodes contained in the
|
46
|
+
# current node.
|
46
47
|
def children
|
47
48
|
children_tree.values
|
48
49
|
end
|
49
50
|
|
50
|
-
#
|
51
|
-
# @return [Boolean] `true`
|
51
|
+
# Indicates if the current node is the root node.
|
52
|
+
# @return [Boolean] `true` if the node does not have a parent, `false`
|
53
|
+
# otherwise.
|
52
54
|
def root?
|
53
55
|
!parent
|
54
56
|
end
|
55
57
|
|
56
|
-
#
|
58
|
+
# Indicates if a {Node Node} is terminal or not.
|
57
59
|
# @return [Boolean] `true` for terminal nodes, `false` otherwise.
|
58
60
|
def terminal?
|
59
61
|
!!terminal
|
60
62
|
end
|
61
63
|
|
62
|
-
#
|
64
|
+
# Mark {Node Node} as terminal.
|
63
65
|
# @return [Node] the modified node.
|
64
66
|
def terminal!
|
65
67
|
self.terminal = true
|
66
68
|
self
|
67
69
|
end
|
68
70
|
|
69
|
-
# String representation of the current node.
|
70
|
-
# @return [String] the string representation of the current node.
|
71
|
-
def to_s
|
72
|
-
parent.to_s << letter.to_s
|
73
|
-
end
|
74
|
-
|
75
71
|
def letter= letter
|
76
72
|
if letter
|
77
73
|
@letter = letter.to_sym
|
@@ -79,6 +75,20 @@ module Rambling
|
|
79
75
|
end
|
80
76
|
end
|
81
77
|
|
78
|
+
# Returns all words that match a prefix of any length within chars.
|
79
|
+
# @param [String] chars the chars to base the prefix on.
|
80
|
+
# @return [Enumerator<String>] all the words that match a prefix given by
|
81
|
+
# chars.
|
82
|
+
# @yield [String] each word found.
|
83
|
+
def match_prefix chars
|
84
|
+
return enum_for :match_prefix, chars unless block_given?
|
85
|
+
|
86
|
+
yield as_word if terminal?
|
87
|
+
children_match_prefix chars do |word|
|
88
|
+
yield word
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
82
92
|
private
|
83
93
|
|
84
94
|
attr_accessor :terminal
|
@@ -1,10 +1,10 @@
|
|
1
1
|
module Rambling
|
2
2
|
module Trie
|
3
|
-
# A representation of a node in an uncompressed
|
3
|
+
# A representation of a node in an uncompressed trie data structure.
|
4
4
|
class RawNode < Rambling::Trie::Node
|
5
|
-
# Adds a
|
6
|
-
# @param [String] word the word to add the
|
7
|
-
# @return [
|
5
|
+
# Adds a word to the current raw (uncompressed) trie node.
|
6
|
+
# @param [String] word the word to add to the trie.
|
7
|
+
# @return [RawNode] the added/modified node based on the word added.
|
8
8
|
# @note This method clears the contents of the word variable.
|
9
9
|
def add word
|
10
10
|
if word.empty?
|
@@ -14,9 +14,10 @@ module Rambling
|
|
14
14
|
end
|
15
15
|
end
|
16
16
|
|
17
|
-
# Checks if a path for set of characters exists in the trie.
|
18
|
-
# @param [Array] chars the characters to look for in the trie.
|
19
|
-
# @return [Boolean] `true` if the characters are found, `false`
|
17
|
+
# Checks if a path for a set of characters exists in the trie.
|
18
|
+
# @param [Array<String>] chars the characters to look for in the trie.
|
19
|
+
# @return [Boolean] `true` if the characters are found, `false`
|
20
|
+
# otherwise.
|
20
21
|
def partial_word? chars = []
|
21
22
|
if chars.empty?
|
22
23
|
true
|
@@ -28,9 +29,9 @@ module Rambling
|
|
28
29
|
end
|
29
30
|
|
30
31
|
# Checks if a path for set of characters represents a word in the trie.
|
31
|
-
# @param [Array] chars the characters to look for in the trie.
|
32
|
+
# @param [Array<String>] chars the characters to look for in the trie.
|
32
33
|
# @return [Boolean] `true` if the characters are found and form a word,
|
33
|
-
#
|
34
|
+
# `false` otherwise.
|
34
35
|
def word? chars = []
|
35
36
|
if chars.empty?
|
36
37
|
terminal?
|
@@ -41,15 +42,16 @@ module Rambling
|
|
41
42
|
end
|
42
43
|
end
|
43
44
|
|
44
|
-
# Returns
|
45
|
-
# @param [Array] chars the characters to look for in the trie.
|
46
|
-
# @return [
|
45
|
+
# Returns the node that starts with the specified characters.
|
46
|
+
# @param [Array<String>] chars the characters to look for in the trie.
|
47
|
+
# @return [Node] the node that matches the specified characters.
|
48
|
+
# {MissingNode MissingNode} when not found.
|
47
49
|
def scan chars
|
48
50
|
chars.empty? ? self : closest_node(chars)
|
49
51
|
end
|
50
52
|
|
51
53
|
# Always return `false` for a raw (uncompressed) node.
|
52
|
-
# @return [Boolean] always false for a raw (uncompressed) node.
|
54
|
+
# @return [Boolean] always `false` for a raw (uncompressed) node.
|
53
55
|
def compressed?
|
54
56
|
false
|
55
57
|
end
|
@@ -75,6 +77,20 @@ module Rambling
|
|
75
77
|
|
76
78
|
child ? child.scan(chars) : Rambling::Trie::MissingNode.new
|
77
79
|
end
|
80
|
+
|
81
|
+
def children_match_prefix chars
|
82
|
+
return enum_for :children_match_prefix, chars unless block_given?
|
83
|
+
|
84
|
+
if !chars.empty?
|
85
|
+
letter = chars.slice!(0).to_sym
|
86
|
+
child = children_tree[letter]
|
87
|
+
if child
|
88
|
+
child.match_prefix chars do |word|
|
89
|
+
yield word
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
78
94
|
end
|
79
95
|
end
|
80
96
|
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Rambling
|
2
|
+
module Trie
|
3
|
+
module Readers
|
4
|
+
# File reader for .txt files.
|
5
|
+
class PlainText
|
6
|
+
# Yields each word read from a .txt file.
|
7
|
+
# @param [String] filepath the full path of the file to load the words
|
8
|
+
# from.
|
9
|
+
# @yield [String] Each line read from the file.
|
10
|
+
def each_word filepath
|
11
|
+
each_line(filepath) { |line| yield line.chomp! }
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def each_line filepath
|
17
|
+
open(filepath) { |file| file.each_line { |line| yield line } }
|
18
|
+
end
|
19
|
+
|
20
|
+
def open filepath
|
21
|
+
File.open(filepath) { |file| yield file }
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module Rambling
|
2
|
+
module Trie
|
3
|
+
module Serializers
|
4
|
+
# Basic file serializer. Dumps/loads string contents from files.
|
5
|
+
class File
|
6
|
+
# Loads contents from a specified filepath.
|
7
|
+
# @param [String] filepath the filepath to load contents from.
|
8
|
+
# @return [String] all contents of the file.
|
9
|
+
def load filepath
|
10
|
+
::File.read filepath
|
11
|
+
end
|
12
|
+
|
13
|
+
# Dumps contents into a specified filepath.
|
14
|
+
# @param [String] contents the contents to dump.
|
15
|
+
# @param [String] filepath the filepath to dump the contents to.
|
16
|
+
# @return [Numeric] number of bytes written to disk.
|
17
|
+
def dump contents, filepath
|
18
|
+
::File.open filepath, 'w+' do |f|
|
19
|
+
f.write contents
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module Rambling
|
2
|
+
module Trie
|
3
|
+
module Serializers
|
4
|
+
# Serializer for Ruby marshal format (.marshal) files.
|
5
|
+
class Marshal
|
6
|
+
# Creates a new Marshal serializer.
|
7
|
+
# @param [Serializer] serializer the serializer responsible to write to
|
8
|
+
# and read from disk.
|
9
|
+
def initialize serializer = nil
|
10
|
+
@serializer = serializer || Rambling::Trie::Serializers::File.new
|
11
|
+
end
|
12
|
+
|
13
|
+
# Loads marshaled object from contents in filepath and deserializes it
|
14
|
+
# into a {Node Node}.
|
15
|
+
# @param [String] filepath the full path of the file to load the
|
16
|
+
# marshaled object from.
|
17
|
+
# @return [Node] The deserialized {Node Node}.
|
18
|
+
def load filepath
|
19
|
+
::Marshal.load serializer.load filepath
|
20
|
+
end
|
21
|
+
|
22
|
+
# Serializes a {Node Node} and dumps it as a marshaled object into
|
23
|
+
# filepath.
|
24
|
+
# @param [Node] node the node to serialize
|
25
|
+
# @param [String] filepath the full path of the file to dump the
|
26
|
+
# marshaled object into.
|
27
|
+
# @return [Numeric] number of bytes written to disk.
|
28
|
+
def dump node, filepath
|
29
|
+
serializer.dump ::Marshal.dump(node), filepath
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
attr_reader :serializer
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module Rambling
|
2
|
+
module Trie
|
3
|
+
module Serializers
|
4
|
+
# Serializer for Ruby yaml format (.yaml) files.
|
5
|
+
class Yaml
|
6
|
+
# Creates a new Yaml serializer.
|
7
|
+
# @param [Serializer] serializer the serializer responsible to write to
|
8
|
+
# and read from disk.
|
9
|
+
def initialize serializer = nil
|
10
|
+
@serializer = serializer || Rambling::Trie::Serializers::File.new
|
11
|
+
end
|
12
|
+
|
13
|
+
# Loads serialized object from YAML file in filepath and deserializes
|
14
|
+
# it into a {Node Node}.
|
15
|
+
# @param [String] filepath the full path of the file to load the
|
16
|
+
# serialized YAML object from.
|
17
|
+
# @return [Node] The deserialized {Node Node}.
|
18
|
+
def load filepath
|
19
|
+
require 'yaml'
|
20
|
+
::YAML.load serializer.load filepath
|
21
|
+
end
|
22
|
+
|
23
|
+
# Serializes a {Node Node} and dumps it as a YAML object into filepath.
|
24
|
+
# @param [Node] node the node to serialize
|
25
|
+
# @param [String] filepath the full path of the file to dump the YAML
|
26
|
+
# object into.
|
27
|
+
# @return [Numeric] number of bytes written to disk.
|
28
|
+
def dump node, filepath
|
29
|
+
require 'yaml'
|
30
|
+
serializer.dump ::YAML.dump(node), filepath
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
attr_reader :serializer
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
module Rambling
|
2
|
+
module Trie
|
3
|
+
module Serializers
|
4
|
+
# Zip file serializer. Dumps/loads contents from zip files. Automatically
|
5
|
+
# detects if zip file contains `.marshal` or `.yml` file
|
6
|
+
class Zip
|
7
|
+
extend Rambling::Trie::Forwardable
|
8
|
+
|
9
|
+
# Creates a new Zip serializer.
|
10
|
+
# @param [Properties] properties the configuration properties set up so
|
11
|
+
# far.
|
12
|
+
def initialize properties
|
13
|
+
@properties = properties
|
14
|
+
end
|
15
|
+
|
16
|
+
# Unzip contents from specified filepath and load in contents from
|
17
|
+
# unzipped files.
|
18
|
+
# @param [String] filepath the filepath to load contents from.
|
19
|
+
# @return [String] all contents of the unzipped loaded file.
|
20
|
+
def load filepath
|
21
|
+
require 'zip'
|
22
|
+
|
23
|
+
::Zip::File.open filepath do |zip|
|
24
|
+
entry = zip.entries.first
|
25
|
+
entry_path = path entry.name
|
26
|
+
entry.extract entry_path
|
27
|
+
|
28
|
+
serializer = serializers.resolve entry.name
|
29
|
+
serializer.load entry_path
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# Dumps contents and zips into a specified filepath.
|
34
|
+
# @param [String] contents the contents to dump.
|
35
|
+
# @param [String] filepath the filepath to dump the contents to.
|
36
|
+
# @return [Numeric] number of bytes written to disk.
|
37
|
+
def dump contents, filepath
|
38
|
+
require 'zip'
|
39
|
+
|
40
|
+
::Zip::File.open filepath, ::Zip::File::CREATE do |zip|
|
41
|
+
filename = ::File.basename filepath, '.zip'
|
42
|
+
|
43
|
+
entry_path = path filename
|
44
|
+
serializer = serializers.resolve filename
|
45
|
+
serializer.dump contents, entry_path
|
46
|
+
|
47
|
+
zip.add filename, entry_path
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
attr_reader :properties
|
54
|
+
|
55
|
+
delegate [
|
56
|
+
:serializers,
|
57
|
+
:tmp_path
|
58
|
+
] => :properties
|
59
|
+
|
60
|
+
def path filename
|
61
|
+
require 'securerandom'
|
62
|
+
::File.join tmp_path, "#{SecureRandom.uuid}-#{filename}"
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Rambling
|
2
|
+
module Trie
|
3
|
+
# Provides the String representation behavior for the trie data structure.
|
4
|
+
module Stringifyable
|
5
|
+
# String representation of the current node, if it is a terminal node.
|
6
|
+
# @return [String] the string representation of the current node.
|
7
|
+
# @raise [InvalidOperation] if node is not terminal or is root.
|
8
|
+
def as_word
|
9
|
+
raise Rambling::Trie::InvalidOperation, 'Cannot represent branch as a word' if letter && !terminal?
|
10
|
+
to_s
|
11
|
+
end
|
12
|
+
|
13
|
+
# String representation of the current node.
|
14
|
+
# @return [String] the string representation of the current node.
|
15
|
+
def to_s
|
16
|
+
parent.to_s << letter.to_s
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|