rambling-trie 0.9.3 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +2 -0
- data/LICENSE +1 -1
- data/README.md +133 -26
- data/Rakefile +1 -2
- data/lib/rambling/trie.rb +53 -9
- data/lib/rambling/trie/comparable.rb +16 -0
- data/lib/rambling/trie/compressable.rb +14 -0
- data/lib/rambling/trie/compressed_node.rb +38 -14
- data/lib/rambling/trie/compressor.rb +14 -10
- data/lib/rambling/trie/configuration.rb +11 -0
- data/lib/rambling/trie/configuration/properties.rb +66 -0
- data/lib/rambling/trie/configuration/provider_collection.rb +101 -0
- data/lib/rambling/trie/container.rb +57 -17
- data/lib/rambling/trie/enumerable.rb +1 -1
- data/lib/rambling/trie/forwardable.rb +9 -4
- data/lib/rambling/trie/inspectable.rb +37 -0
- data/lib/rambling/trie/invalid_operation.rb +3 -2
- data/lib/rambling/trie/missing_node.rb +2 -1
- data/lib/rambling/trie/node.rb +40 -30
- data/lib/rambling/trie/raw_node.rb +29 -13
- data/lib/rambling/trie/readers.rb +11 -0
- data/lib/rambling/trie/readers/plain_text.rb +26 -0
- data/lib/rambling/trie/serializers.rb +11 -0
- data/lib/rambling/trie/serializers/file.rb +25 -0
- data/lib/rambling/trie/serializers/marshal.rb +38 -0
- data/lib/rambling/trie/serializers/yaml.rb +39 -0
- data/lib/rambling/trie/serializers/zip.rb +67 -0
- data/lib/rambling/trie/stringifyable.rb +20 -0
- data/lib/rambling/trie/version.rb +1 -1
- data/rambling-trie.gemspec +2 -2
- data/spec/integration/rambling/trie_spec.rb +45 -49
- data/spec/lib/rambling/trie/comparable_spec.rb +104 -0
- data/spec/lib/rambling/trie/compressed_node_spec.rb +44 -0
- data/spec/lib/rambling/trie/configuration/properties_spec.rb +49 -0
- data/spec/lib/rambling/trie/configuration/provider_collection_spec.rb +165 -0
- data/spec/lib/rambling/trie/container_spec.rb +127 -38
- data/spec/lib/rambling/trie/{inspector_spec.rb → inspectable_spec.rb} +7 -5
- data/spec/lib/rambling/trie/raw_node_spec.rb +22 -41
- data/spec/lib/rambling/trie/readers/plain_text_spec.rb +14 -0
- data/spec/lib/rambling/trie/serializers/file_spec.rb +11 -0
- data/spec/lib/rambling/trie/serializers/marshal_spec.rb +14 -0
- data/spec/lib/rambling/trie/serializers/yaml_spec.rb +14 -0
- data/spec/lib/rambling/trie/serializers/zip_spec.rb +30 -0
- data/spec/lib/rambling/trie/stringifyable_spec.rb +82 -0
- data/spec/lib/rambling/trie_spec.rb +120 -7
- data/spec/spec_helper.rb +7 -1
- data/spec/support/config.rb +5 -0
- data/spec/support/shared_examples/a_compressable_trie.rb +26 -0
- data/spec/support/shared_examples/a_serializable_trie.rb +26 -0
- data/spec/support/shared_examples/a_serializer.rb +29 -0
- data/spec/support/shared_examples/a_trie_data_structure.rb +29 -0
- data/spec/tmp/.gitkeep +0 -0
- metadata +51 -24
- data/lib/rambling/trie/compression.rb +0 -13
- data/lib/rambling/trie/inspector.rb +0 -11
- data/lib/rambling/trie/plain_text_reader.rb +0 -23
- data/lib/rambling/trie/tasks/gem.rb +0 -17
- data/lib/rambling/trie/tasks/helpers/path.rb +0 -17
- data/lib/rambling/trie/tasks/helpers/performance_report.rb +0 -17
- data/lib/rambling/trie/tasks/helpers/time.rb +0 -7
- data/lib/rambling/trie/tasks/performance.rb +0 -15
- data/lib/rambling/trie/tasks/performance/all.rb +0 -17
- data/lib/rambling/trie/tasks/performance/benchmark.rb +0 -201
- data/lib/rambling/trie/tasks/performance/directory.rb +0 -11
- data/lib/rambling/trie/tasks/performance/flamegraph.rb +0 -119
- data/lib/rambling/trie/tasks/performance/profile/call_tree.rb +0 -147
- data/lib/rambling/trie/tasks/performance/profile/memory.rb +0 -143
- data/spec/lib/rambling/trie/plain_text_reader_spec.rb +0 -18
data/lib/rambling/trie/node.rb
CHANGED
@@ -1,11 +1,13 @@
|
|
1
1
|
module Rambling
|
2
2
|
module Trie
|
3
|
-
# A representation of a node in the
|
3
|
+
# A representation of a node in the trie data structure.
|
4
4
|
class Node
|
5
5
|
extend Rambling::Trie::Forwardable
|
6
|
-
include Rambling::Trie::
|
6
|
+
include Rambling::Trie::Compressable
|
7
7
|
include Rambling::Trie::Enumerable
|
8
|
-
include Rambling::Trie::
|
8
|
+
include Rambling::Trie::Comparable
|
9
|
+
include Rambling::Trie::Stringifyable
|
10
|
+
include Rambling::Trie::Inspectable
|
9
11
|
|
10
12
|
delegate [
|
11
13
|
:[],
|
@@ -14,64 +16,58 @@ module Rambling
|
|
14
16
|
:has_key?
|
15
17
|
] => :children_tree
|
16
18
|
|
17
|
-
#
|
18
|
-
#
|
19
|
+
# @overload letter
|
20
|
+
# Letter(s) corresponding to the current node.
|
21
|
+
# @overload letter=(letter)
|
22
|
+
# Sets the letter(s) corresponding to the current node. Ensures the
|
23
|
+
# {Node#letter #letter} in the {Node#parent #parent}'s
|
24
|
+
# {Node#children_tree #children_tree} is updated.
|
25
|
+
# @param [String, Symbol, nil] letter the new letter value.
|
26
|
+
# @return [Symbol, nil] the corresponding letter(s).
|
19
27
|
attr_reader :letter
|
20
28
|
|
21
|
-
# Children nodes.
|
22
|
-
# @return [Hash] the children_tree hash, consisting of
|
29
|
+
# Children nodes tree.
|
30
|
+
# @return [Hash] the children_tree hash, consisting of `:letter => node`.
|
23
31
|
attr_accessor :children_tree
|
24
32
|
|
25
33
|
# Parent node.
|
26
|
-
# @return [Node, nil] the parent
|
34
|
+
# @return [Node, nil] the parent of the current node.
|
27
35
|
attr_accessor :parent
|
28
36
|
|
29
|
-
# Creates a new
|
30
|
-
# @param [Node, nil] parent the parent of
|
37
|
+
# Creates a new node.
|
38
|
+
# @param [Node, nil] parent the parent of the current node.
|
31
39
|
def initialize parent = nil
|
32
40
|
self.parent = parent
|
33
41
|
self.children_tree = {}
|
34
42
|
end
|
35
43
|
|
36
|
-
#
|
37
|
-
# @return [
|
38
|
-
#
|
39
|
-
def as_word
|
40
|
-
raise InvalidOperation, 'Cannot represent branch as a word' if letter && !terminal?
|
41
|
-
to_s
|
42
|
-
end
|
43
|
-
|
44
|
-
# Children nodes of the current node.
|
45
|
-
# @return [Array] the array of children nodes contained in the current node.
|
44
|
+
# Children nodes.
|
45
|
+
# @return [Array<Node>] the array of children nodes contained in the
|
46
|
+
# current node.
|
46
47
|
def children
|
47
48
|
children_tree.values
|
48
49
|
end
|
49
50
|
|
50
|
-
#
|
51
|
-
# @return [Boolean] `true`
|
51
|
+
# Indicates if the current node is the root node.
|
52
|
+
# @return [Boolean] `true` if the node does not have a parent, `false`
|
53
|
+
# otherwise.
|
52
54
|
def root?
|
53
55
|
!parent
|
54
56
|
end
|
55
57
|
|
56
|
-
#
|
58
|
+
# Indicates if a {Node Node} is terminal or not.
|
57
59
|
# @return [Boolean] `true` for terminal nodes, `false` otherwise.
|
58
60
|
def terminal?
|
59
61
|
!!terminal
|
60
62
|
end
|
61
63
|
|
62
|
-
#
|
64
|
+
# Mark {Node Node} as terminal.
|
63
65
|
# @return [Node] the modified node.
|
64
66
|
def terminal!
|
65
67
|
self.terminal = true
|
66
68
|
self
|
67
69
|
end
|
68
70
|
|
69
|
-
# String representation of the current node.
|
70
|
-
# @return [String] the string representation of the current node.
|
71
|
-
def to_s
|
72
|
-
parent.to_s << letter.to_s
|
73
|
-
end
|
74
|
-
|
75
71
|
def letter= letter
|
76
72
|
if letter
|
77
73
|
@letter = letter.to_sym
|
@@ -79,6 +75,20 @@ module Rambling
|
|
79
75
|
end
|
80
76
|
end
|
81
77
|
|
78
|
+
# Returns all words that match a prefix of any length within chars.
|
79
|
+
# @param [String] chars the chars to base the prefix on.
|
80
|
+
# @return [Enumerator<String>] all the words that match a prefix given by
|
81
|
+
# chars.
|
82
|
+
# @yield [String] each word found.
|
83
|
+
def match_prefix chars
|
84
|
+
return enum_for :match_prefix, chars unless block_given?
|
85
|
+
|
86
|
+
yield as_word if terminal?
|
87
|
+
children_match_prefix chars do |word|
|
88
|
+
yield word
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
82
92
|
private
|
83
93
|
|
84
94
|
attr_accessor :terminal
|
@@ -1,10 +1,10 @@
|
|
1
1
|
module Rambling
|
2
2
|
module Trie
|
3
|
-
# A representation of a node in an uncompressed
|
3
|
+
# A representation of a node in an uncompressed trie data structure.
|
4
4
|
class RawNode < Rambling::Trie::Node
|
5
|
-
# Adds a
|
6
|
-
# @param [String] word the word to add the
|
7
|
-
# @return [
|
5
|
+
# Adds a word to the current raw (uncompressed) trie node.
|
6
|
+
# @param [String] word the word to add to the trie.
|
7
|
+
# @return [RawNode] the added/modified node based on the word added.
|
8
8
|
# @note This method clears the contents of the word variable.
|
9
9
|
def add word
|
10
10
|
if word.empty?
|
@@ -14,9 +14,10 @@ module Rambling
|
|
14
14
|
end
|
15
15
|
end
|
16
16
|
|
17
|
-
# Checks if a path for set of characters exists in the trie.
|
18
|
-
# @param [Array] chars the characters to look for in the trie.
|
19
|
-
# @return [Boolean] `true` if the characters are found, `false`
|
17
|
+
# Checks if a path for a set of characters exists in the trie.
|
18
|
+
# @param [Array<String>] chars the characters to look for in the trie.
|
19
|
+
# @return [Boolean] `true` if the characters are found, `false`
|
20
|
+
# otherwise.
|
20
21
|
def partial_word? chars = []
|
21
22
|
if chars.empty?
|
22
23
|
true
|
@@ -28,9 +29,9 @@ module Rambling
|
|
28
29
|
end
|
29
30
|
|
30
31
|
# Checks if a path for set of characters represents a word in the trie.
|
31
|
-
# @param [Array] chars the characters to look for in the trie.
|
32
|
+
# @param [Array<String>] chars the characters to look for in the trie.
|
32
33
|
# @return [Boolean] `true` if the characters are found and form a word,
|
33
|
-
#
|
34
|
+
# `false` otherwise.
|
34
35
|
def word? chars = []
|
35
36
|
if chars.empty?
|
36
37
|
terminal?
|
@@ -41,15 +42,16 @@ module Rambling
|
|
41
42
|
end
|
42
43
|
end
|
43
44
|
|
44
|
-
# Returns
|
45
|
-
# @param [Array] chars the characters to look for in the trie.
|
46
|
-
# @return [
|
45
|
+
# Returns the node that starts with the specified characters.
|
46
|
+
# @param [Array<String>] chars the characters to look for in the trie.
|
47
|
+
# @return [Node] the node that matches the specified characters.
|
48
|
+
# {MissingNode MissingNode} when not found.
|
47
49
|
def scan chars
|
48
50
|
chars.empty? ? self : closest_node(chars)
|
49
51
|
end
|
50
52
|
|
51
53
|
# Always return `false` for a raw (uncompressed) node.
|
52
|
-
# @return [Boolean] always false for a raw (uncompressed) node.
|
54
|
+
# @return [Boolean] always `false` for a raw (uncompressed) node.
|
53
55
|
def compressed?
|
54
56
|
false
|
55
57
|
end
|
@@ -75,6 +77,20 @@ module Rambling
|
|
75
77
|
|
76
78
|
child ? child.scan(chars) : Rambling::Trie::MissingNode.new
|
77
79
|
end
|
80
|
+
|
81
|
+
def children_match_prefix chars
|
82
|
+
return enum_for :children_match_prefix, chars unless block_given?
|
83
|
+
|
84
|
+
if !chars.empty?
|
85
|
+
letter = chars.slice!(0).to_sym
|
86
|
+
child = children_tree[letter]
|
87
|
+
if child
|
88
|
+
child.match_prefix chars do |word|
|
89
|
+
yield word
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
78
94
|
end
|
79
95
|
end
|
80
96
|
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Rambling
|
2
|
+
module Trie
|
3
|
+
module Readers
|
4
|
+
# File reader for .txt files.
|
5
|
+
class PlainText
|
6
|
+
# Yields each word read from a .txt file.
|
7
|
+
# @param [String] filepath the full path of the file to load the words
|
8
|
+
# from.
|
9
|
+
# @yield [String] Each line read from the file.
|
10
|
+
def each_word filepath
|
11
|
+
each_line(filepath) { |line| yield line.chomp! }
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def each_line filepath
|
17
|
+
open(filepath) { |file| file.each_line { |line| yield line } }
|
18
|
+
end
|
19
|
+
|
20
|
+
def open filepath
|
21
|
+
File.open(filepath) { |file| yield file }
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module Rambling
|
2
|
+
module Trie
|
3
|
+
module Serializers
|
4
|
+
# Basic file serializer. Dumps/loads string contents from files.
|
5
|
+
class File
|
6
|
+
# Loads contents from a specified filepath.
|
7
|
+
# @param [String] filepath the filepath to load contents from.
|
8
|
+
# @return [String] all contents of the file.
|
9
|
+
def load filepath
|
10
|
+
::File.read filepath
|
11
|
+
end
|
12
|
+
|
13
|
+
# Dumps contents into a specified filepath.
|
14
|
+
# @param [String] contents the contents to dump.
|
15
|
+
# @param [String] filepath the filepath to dump the contents to.
|
16
|
+
# @return [Numeric] number of bytes written to disk.
|
17
|
+
def dump contents, filepath
|
18
|
+
::File.open filepath, 'w+' do |f|
|
19
|
+
f.write contents
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module Rambling
|
2
|
+
module Trie
|
3
|
+
module Serializers
|
4
|
+
# Serializer for Ruby marshal format (.marshal) files.
|
5
|
+
class Marshal
|
6
|
+
# Creates a new Marshal serializer.
|
7
|
+
# @param [Serializer] serializer the serializer responsible to write to
|
8
|
+
# and read from disk.
|
9
|
+
def initialize serializer = nil
|
10
|
+
@serializer = serializer || Rambling::Trie::Serializers::File.new
|
11
|
+
end
|
12
|
+
|
13
|
+
# Loads marshaled object from contents in filepath and deserializes it
|
14
|
+
# into a {Node Node}.
|
15
|
+
# @param [String] filepath the full path of the file to load the
|
16
|
+
# marshaled object from.
|
17
|
+
# @return [Node] The deserialized {Node Node}.
|
18
|
+
def load filepath
|
19
|
+
::Marshal.load serializer.load filepath
|
20
|
+
end
|
21
|
+
|
22
|
+
# Serializes a {Node Node} and dumps it as a marshaled object into
|
23
|
+
# filepath.
|
24
|
+
# @param [Node] node the node to serialize
|
25
|
+
# @param [String] filepath the full path of the file to dump the
|
26
|
+
# marshaled object into.
|
27
|
+
# @return [Numeric] number of bytes written to disk.
|
28
|
+
def dump node, filepath
|
29
|
+
serializer.dump ::Marshal.dump(node), filepath
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
attr_reader :serializer
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module Rambling
|
2
|
+
module Trie
|
3
|
+
module Serializers
|
4
|
+
# Serializer for Ruby yaml format (.yaml) files.
|
5
|
+
class Yaml
|
6
|
+
# Creates a new Yaml serializer.
|
7
|
+
# @param [Serializer] serializer the serializer responsible to write to
|
8
|
+
# and read from disk.
|
9
|
+
def initialize serializer = nil
|
10
|
+
@serializer = serializer || Rambling::Trie::Serializers::File.new
|
11
|
+
end
|
12
|
+
|
13
|
+
# Loads serialized object from YAML file in filepath and deserializes
|
14
|
+
# it into a {Node Node}.
|
15
|
+
# @param [String] filepath the full path of the file to load the
|
16
|
+
# serialized YAML object from.
|
17
|
+
# @return [Node] The deserialized {Node Node}.
|
18
|
+
def load filepath
|
19
|
+
require 'yaml'
|
20
|
+
::YAML.load serializer.load filepath
|
21
|
+
end
|
22
|
+
|
23
|
+
# Serializes a {Node Node} and dumps it as a YAML object into filepath.
|
24
|
+
# @param [Node] node the node to serialize
|
25
|
+
# @param [String] filepath the full path of the file to dump the YAML
|
26
|
+
# object into.
|
27
|
+
# @return [Numeric] number of bytes written to disk.
|
28
|
+
def dump node, filepath
|
29
|
+
require 'yaml'
|
30
|
+
serializer.dump ::YAML.dump(node), filepath
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
attr_reader :serializer
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,67 @@
|
|
1
|
+
module Rambling
|
2
|
+
module Trie
|
3
|
+
module Serializers
|
4
|
+
# Zip file serializer. Dumps/loads contents from zip files. Automatically
|
5
|
+
# detects if zip file contains `.marshal` or `.yml` file
|
6
|
+
class Zip
|
7
|
+
extend Rambling::Trie::Forwardable
|
8
|
+
|
9
|
+
# Creates a new Zip serializer.
|
10
|
+
# @param [Properties] properties the configuration properties set up so
|
11
|
+
# far.
|
12
|
+
def initialize properties
|
13
|
+
@properties = properties
|
14
|
+
end
|
15
|
+
|
16
|
+
# Unzip contents from specified filepath and load in contents from
|
17
|
+
# unzipped files.
|
18
|
+
# @param [String] filepath the filepath to load contents from.
|
19
|
+
# @return [String] all contents of the unzipped loaded file.
|
20
|
+
def load filepath
|
21
|
+
require 'zip'
|
22
|
+
|
23
|
+
::Zip::File.open filepath do |zip|
|
24
|
+
entry = zip.entries.first
|
25
|
+
entry_path = path entry.name
|
26
|
+
entry.extract entry_path
|
27
|
+
|
28
|
+
serializer = serializers.resolve entry.name
|
29
|
+
serializer.load entry_path
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# Dumps contents and zips into a specified filepath.
|
34
|
+
# @param [String] contents the contents to dump.
|
35
|
+
# @param [String] filepath the filepath to dump the contents to.
|
36
|
+
# @return [Numeric] number of bytes written to disk.
|
37
|
+
def dump contents, filepath
|
38
|
+
require 'zip'
|
39
|
+
|
40
|
+
::Zip::File.open filepath, ::Zip::File::CREATE do |zip|
|
41
|
+
filename = ::File.basename filepath, '.zip'
|
42
|
+
|
43
|
+
entry_path = path filename
|
44
|
+
serializer = serializers.resolve filename
|
45
|
+
serializer.dump contents, entry_path
|
46
|
+
|
47
|
+
zip.add filename, entry_path
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
attr_reader :properties
|
54
|
+
|
55
|
+
delegate [
|
56
|
+
:serializers,
|
57
|
+
:tmp_path
|
58
|
+
] => :properties
|
59
|
+
|
60
|
+
def path filename
|
61
|
+
require 'securerandom'
|
62
|
+
::File.join tmp_path, "#{SecureRandom.uuid}-#{filename}"
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Rambling
|
2
|
+
module Trie
|
3
|
+
# Provides the String representation behavior for the trie data structure.
|
4
|
+
module Stringifyable
|
5
|
+
# String representation of the current node, if it is a terminal node.
|
6
|
+
# @return [String] the string representation of the current node.
|
7
|
+
# @raise [InvalidOperation] if node is not terminal or is root.
|
8
|
+
def as_word
|
9
|
+
raise Rambling::Trie::InvalidOperation, 'Cannot represent branch as a word' if letter && !terminal?
|
10
|
+
to_s
|
11
|
+
end
|
12
|
+
|
13
|
+
# String representation of the current node.
|
14
|
+
# @return [String] the string representation of the current node.
|
15
|
+
def to_s
|
16
|
+
parent.to_s << letter.to_s
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|