rambling-trie 0.9.3 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +2 -0
  3. data/LICENSE +1 -1
  4. data/README.md +133 -26
  5. data/Rakefile +1 -2
  6. data/lib/rambling/trie.rb +53 -9
  7. data/lib/rambling/trie/comparable.rb +16 -0
  8. data/lib/rambling/trie/compressable.rb +14 -0
  9. data/lib/rambling/trie/compressed_node.rb +38 -14
  10. data/lib/rambling/trie/compressor.rb +14 -10
  11. data/lib/rambling/trie/configuration.rb +11 -0
  12. data/lib/rambling/trie/configuration/properties.rb +66 -0
  13. data/lib/rambling/trie/configuration/provider_collection.rb +101 -0
  14. data/lib/rambling/trie/container.rb +57 -17
  15. data/lib/rambling/trie/enumerable.rb +1 -1
  16. data/lib/rambling/trie/forwardable.rb +9 -4
  17. data/lib/rambling/trie/inspectable.rb +37 -0
  18. data/lib/rambling/trie/invalid_operation.rb +3 -2
  19. data/lib/rambling/trie/missing_node.rb +2 -1
  20. data/lib/rambling/trie/node.rb +40 -30
  21. data/lib/rambling/trie/raw_node.rb +29 -13
  22. data/lib/rambling/trie/readers.rb +11 -0
  23. data/lib/rambling/trie/readers/plain_text.rb +26 -0
  24. data/lib/rambling/trie/serializers.rb +11 -0
  25. data/lib/rambling/trie/serializers/file.rb +25 -0
  26. data/lib/rambling/trie/serializers/marshal.rb +38 -0
  27. data/lib/rambling/trie/serializers/yaml.rb +39 -0
  28. data/lib/rambling/trie/serializers/zip.rb +67 -0
  29. data/lib/rambling/trie/stringifyable.rb +20 -0
  30. data/lib/rambling/trie/version.rb +1 -1
  31. data/rambling-trie.gemspec +2 -2
  32. data/spec/integration/rambling/trie_spec.rb +45 -49
  33. data/spec/lib/rambling/trie/comparable_spec.rb +104 -0
  34. data/spec/lib/rambling/trie/compressed_node_spec.rb +44 -0
  35. data/spec/lib/rambling/trie/configuration/properties_spec.rb +49 -0
  36. data/spec/lib/rambling/trie/configuration/provider_collection_spec.rb +165 -0
  37. data/spec/lib/rambling/trie/container_spec.rb +127 -38
  38. data/spec/lib/rambling/trie/{inspector_spec.rb → inspectable_spec.rb} +7 -5
  39. data/spec/lib/rambling/trie/raw_node_spec.rb +22 -41
  40. data/spec/lib/rambling/trie/readers/plain_text_spec.rb +14 -0
  41. data/spec/lib/rambling/trie/serializers/file_spec.rb +11 -0
  42. data/spec/lib/rambling/trie/serializers/marshal_spec.rb +14 -0
  43. data/spec/lib/rambling/trie/serializers/yaml_spec.rb +14 -0
  44. data/spec/lib/rambling/trie/serializers/zip_spec.rb +30 -0
  45. data/spec/lib/rambling/trie/stringifyable_spec.rb +82 -0
  46. data/spec/lib/rambling/trie_spec.rb +120 -7
  47. data/spec/spec_helper.rb +7 -1
  48. data/spec/support/config.rb +5 -0
  49. data/spec/support/shared_examples/a_compressable_trie.rb +26 -0
  50. data/spec/support/shared_examples/a_serializable_trie.rb +26 -0
  51. data/spec/support/shared_examples/a_serializer.rb +29 -0
  52. data/spec/support/shared_examples/a_trie_data_structure.rb +29 -0
  53. data/spec/tmp/.gitkeep +0 -0
  54. metadata +51 -24
  55. data/lib/rambling/trie/compression.rb +0 -13
  56. data/lib/rambling/trie/inspector.rb +0 -11
  57. data/lib/rambling/trie/plain_text_reader.rb +0 -23
  58. data/lib/rambling/trie/tasks/gem.rb +0 -17
  59. data/lib/rambling/trie/tasks/helpers/path.rb +0 -17
  60. data/lib/rambling/trie/tasks/helpers/performance_report.rb +0 -17
  61. data/lib/rambling/trie/tasks/helpers/time.rb +0 -7
  62. data/lib/rambling/trie/tasks/performance.rb +0 -15
  63. data/lib/rambling/trie/tasks/performance/all.rb +0 -17
  64. data/lib/rambling/trie/tasks/performance/benchmark.rb +0 -201
  65. data/lib/rambling/trie/tasks/performance/directory.rb +0 -11
  66. data/lib/rambling/trie/tasks/performance/flamegraph.rb +0 -119
  67. data/lib/rambling/trie/tasks/performance/profile/call_tree.rb +0 -147
  68. data/lib/rambling/trie/tasks/performance/profile/memory.rb +0 -143
  69. data/spec/lib/rambling/trie/plain_text_reader_spec.rb +0 -18
@@ -1,6 +1,7 @@
1
1
  module Rambling
2
2
  module Trie
3
- # A representation of a missing node in the Trie data structure.
3
+ # A representation of a missing node in the trie data structure. Returned
4
+ # when a node is not found.
4
5
  class MissingNode < Rambling::Trie::Node
5
6
  end
6
7
  end
@@ -1,11 +1,13 @@
1
1
  module Rambling
2
2
  module Trie
3
- # A representation of a node in the Trie data structure.
3
+ # A representation of a node in the trie data structure.
4
4
  class Node
5
5
  extend Rambling::Trie::Forwardable
6
- include Rambling::Trie::Compression
6
+ include Rambling::Trie::Compressable
7
7
  include Rambling::Trie::Enumerable
8
- include Rambling::Trie::Inspector
8
+ include Rambling::Trie::Comparable
9
+ include Rambling::Trie::Stringifyable
10
+ include Rambling::Trie::Inspectable
9
11
 
10
12
  delegate [
11
13
  :[],
@@ -14,64 +16,58 @@ module Rambling
14
16
  :has_key?
15
17
  ] => :children_tree
16
18
 
17
- # Letter or letters corresponding to this node.
18
- # @return [Symbol, nil] the corresponding letter(s) or nil.
19
+ # @overload letter
20
+ # Letter(s) corresponding to the current node.
21
+ # @overload letter=(letter)
22
+ # Sets the letter(s) corresponding to the current node. Ensures the
23
+ # {Node#letter #letter} in the {Node#parent #parent}'s
24
+ # {Node#children_tree #children_tree} is updated.
25
+ # @param [String, Symbol, nil] letter the new letter value.
26
+ # @return [Symbol, nil] the corresponding letter(s).
19
27
  attr_reader :letter
20
28
 
21
- # Children nodes.
22
- # @return [Hash] the children_tree hash, consisting of :letter => node.
29
+ # Children nodes tree.
30
+ # @return [Hash] the children_tree hash, consisting of `:letter => node`.
23
31
  attr_accessor :children_tree
24
32
 
25
33
  # Parent node.
26
- # @return [Node, nil] the parent node or nil for the root element.
34
+ # @return [Node, nil] the parent of the current node.
27
35
  attr_accessor :parent
28
36
 
29
- # Creates a new Node.
30
- # @param [Node, nil] parent the parent of this node.
37
+ # Creates a new node.
38
+ # @param [Node, nil] parent the parent of the current node.
31
39
  def initialize parent = nil
32
40
  self.parent = parent
33
41
  self.children_tree = {}
34
42
  end
35
43
 
36
- # String representation of the current node, if it is a terminal node.
37
- # @return [String] the string representation of the current node.
38
- # @raise [InvalidOperation] if node is not terminal or is root.
39
- def as_word
40
- raise InvalidOperation, 'Cannot represent branch as a word' if letter && !terminal?
41
- to_s
42
- end
43
-
44
- # Children nodes of the current node.
45
- # @return [Array] the array of children nodes contained in the current node.
44
+ # Children nodes.
45
+ # @return [Array<Node>] the array of children nodes contained in the
46
+ # current node.
46
47
  def children
47
48
  children_tree.values
48
49
  end
49
50
 
50
- # If the current node is the root node.
51
- # @return [Boolean] `true` only if the node does not have a parent
51
+ # Indicates if the current node is the root node.
52
+ # @return [Boolean] `true` if the node does not have a parent, `false`
53
+ # otherwise.
52
54
  def root?
53
55
  !parent
54
56
  end
55
57
 
56
- # Flag for terminal nodes.
58
+ # Indicates if a {Node Node} is terminal or not.
57
59
  # @return [Boolean] `true` for terminal nodes, `false` otherwise.
58
60
  def terminal?
59
61
  !!terminal
60
62
  end
61
63
 
62
- # Force [Node] to be `terminal`
64
+ # Mark {Node Node} as terminal.
63
65
  # @return [Node] the modified node.
64
66
  def terminal!
65
67
  self.terminal = true
66
68
  self
67
69
  end
68
70
 
69
- # String representation of the current node.
70
- # @return [String] the string representation of the current node.
71
- def to_s
72
- parent.to_s << letter.to_s
73
- end
74
-
75
71
  def letter= letter
76
72
  if letter
77
73
  @letter = letter.to_sym
@@ -79,6 +75,20 @@ module Rambling
79
75
  end
80
76
  end
81
77
 
78
+ # Returns all words that match a prefix of any length within chars.
79
+ # @param [String] chars the chars to base the prefix on.
80
+ # @return [Enumerator<String>] all the words that match a prefix given by
81
+ # chars.
82
+ # @yield [String] each word found.
83
+ def match_prefix chars
84
+ return enum_for :match_prefix, chars unless block_given?
85
+
86
+ yield as_word if terminal?
87
+ children_match_prefix chars do |word|
88
+ yield word
89
+ end
90
+ end
91
+
82
92
  private
83
93
 
84
94
  attr_accessor :terminal
@@ -1,10 +1,10 @@
1
1
  module Rambling
2
2
  module Trie
3
- # A representation of a node in an uncompressed Trie data structure.
3
+ # A representation of a node in an uncompressed trie data structure.
4
4
  class RawNode < Rambling::Trie::Node
5
- # Adds a branch to the current trie node based on the word
6
- # @param [String] word the word to add the branch from.
7
- # @return [Rambling::Trie::RawNode] the just added branch's root node.
5
+ # Adds a word to the current raw (uncompressed) trie node.
6
+ # @param [String] word the word to add to the trie.
7
+ # @return [RawNode] the added/modified node based on the word added.
8
8
  # @note This method clears the contents of the word variable.
9
9
  def add word
10
10
  if word.empty?
@@ -14,9 +14,10 @@ module Rambling
14
14
  end
15
15
  end
16
16
 
17
- # Checks if a path for set of characters exists in the trie.
18
- # @param [Array] chars the characters to look for in the trie.
19
- # @return [Boolean] `true` if the characters are found, `false` otherwise.
17
+ # Checks if a path for a set of characters exists in the trie.
18
+ # @param [Array<String>] chars the characters to look for in the trie.
19
+ # @return [Boolean] `true` if the characters are found, `false`
20
+ # otherwise.
20
21
  def partial_word? chars = []
21
22
  if chars.empty?
22
23
  true
@@ -28,9 +29,9 @@ module Rambling
28
29
  end
29
30
 
30
31
  # Checks if a path for set of characters represents a word in the trie.
31
- # @param [Array] chars the characters to look for in the trie.
32
+ # @param [Array<String>] chars the characters to look for in the trie.
32
33
  # @return [Boolean] `true` if the characters are found and form a word,
33
- # `false` otherwise.
34
+ # `false` otherwise.
34
35
  def word? chars = []
35
36
  if chars.empty?
36
37
  terminal?
@@ -41,15 +42,16 @@ module Rambling
41
42
  end
42
43
  end
43
44
 
44
- # Returns all words that start with the specified characters.
45
- # @param [Array] chars the characters to look for in the trie.
46
- # @return [Array] all the words contained in the trie that start with the specified characters.
45
+ # Returns the node that starts with the specified characters.
46
+ # @param [Array<String>] chars the characters to look for in the trie.
47
+ # @return [Node] the node that matches the specified characters.
48
+ # {MissingNode MissingNode} when not found.
47
49
  def scan chars
48
50
  chars.empty? ? self : closest_node(chars)
49
51
  end
50
52
 
51
53
  # Always return `false` for a raw (uncompressed) node.
52
- # @return [Boolean] always false for a raw (uncompressed) node.
54
+ # @return [Boolean] always `false` for a raw (uncompressed) node.
53
55
  def compressed?
54
56
  false
55
57
  end
@@ -75,6 +77,20 @@ module Rambling
75
77
 
76
78
  child ? child.scan(chars) : Rambling::Trie::MissingNode.new
77
79
  end
80
+
81
+ def children_match_prefix chars
82
+ return enum_for :children_match_prefix, chars unless block_given?
83
+
84
+ if !chars.empty?
85
+ letter = chars.slice!(0).to_sym
86
+ child = children_tree[letter]
87
+ if child
88
+ child.match_prefix chars do |word|
89
+ yield word
90
+ end
91
+ end
92
+ end
93
+ end
78
94
  end
79
95
  end
80
96
  end
@@ -0,0 +1,11 @@
1
+ %w{plain_text}.each do |file|
2
+ require File.join('rambling', 'trie', 'readers', file)
3
+ end
4
+
5
+ module Rambling
6
+ module Trie
7
+ # Namespace for all readers.
8
+ module Readers
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,26 @@
1
+ module Rambling
2
+ module Trie
3
+ module Readers
4
+ # File reader for .txt files.
5
+ class PlainText
6
+ # Yields each word read from a .txt file.
7
+ # @param [String] filepath the full path of the file to load the words
8
+ # from.
9
+ # @yield [String] Each line read from the file.
10
+ def each_word filepath
11
+ each_line(filepath) { |line| yield line.chomp! }
12
+ end
13
+
14
+ private
15
+
16
+ def each_line filepath
17
+ open(filepath) { |file| file.each_line { |line| yield line } }
18
+ end
19
+
20
+ def open filepath
21
+ File.open(filepath) { |file| yield file }
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,11 @@
1
+ %w{file marshal yaml zip}.each do |file|
2
+ require File.join('rambling', 'trie', 'serializers', file)
3
+ end
4
+
5
+ module Rambling
6
+ module Trie
7
+ # Namespace for all serializers.
8
+ module Serializers
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,25 @@
1
+ module Rambling
2
+ module Trie
3
+ module Serializers
4
+ # Basic file serializer. Dumps/loads string contents from files.
5
+ class File
6
+ # Loads contents from a specified filepath.
7
+ # @param [String] filepath the filepath to load contents from.
8
+ # @return [String] all contents of the file.
9
+ def load filepath
10
+ ::File.read filepath
11
+ end
12
+
13
+ # Dumps contents into a specified filepath.
14
+ # @param [String] contents the contents to dump.
15
+ # @param [String] filepath the filepath to dump the contents to.
16
+ # @return [Numeric] number of bytes written to disk.
17
+ def dump contents, filepath
18
+ ::File.open filepath, 'w+' do |f|
19
+ f.write contents
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,38 @@
1
+ module Rambling
2
+ module Trie
3
+ module Serializers
4
+ # Serializer for Ruby marshal format (.marshal) files.
5
+ class Marshal
6
+ # Creates a new Marshal serializer.
7
+ # @param [Serializer] serializer the serializer responsible to write to
8
+ # and read from disk.
9
+ def initialize serializer = nil
10
+ @serializer = serializer || Rambling::Trie::Serializers::File.new
11
+ end
12
+
13
+ # Loads marshaled object from contents in filepath and deserializes it
14
+ # into a {Node Node}.
15
+ # @param [String] filepath the full path of the file to load the
16
+ # marshaled object from.
17
+ # @return [Node] The deserialized {Node Node}.
18
+ def load filepath
19
+ ::Marshal.load serializer.load filepath
20
+ end
21
+
22
+ # Serializes a {Node Node} and dumps it as a marshaled object into
23
+ # filepath.
24
+ # @param [Node] node the node to serialize
25
+ # @param [String] filepath the full path of the file to dump the
26
+ # marshaled object into.
27
+ # @return [Numeric] number of bytes written to disk.
28
+ def dump node, filepath
29
+ serializer.dump ::Marshal.dump(node), filepath
30
+ end
31
+
32
+ private
33
+
34
+ attr_reader :serializer
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,39 @@
1
+ module Rambling
2
+ module Trie
3
+ module Serializers
4
+ # Serializer for Ruby yaml format (.yaml) files.
5
+ class Yaml
6
+ # Creates a new Yaml serializer.
7
+ # @param [Serializer] serializer the serializer responsible to write to
8
+ # and read from disk.
9
+ def initialize serializer = nil
10
+ @serializer = serializer || Rambling::Trie::Serializers::File.new
11
+ end
12
+
13
+ # Loads serialized object from YAML file in filepath and deserializes
14
+ # it into a {Node Node}.
15
+ # @param [String] filepath the full path of the file to load the
16
+ # serialized YAML object from.
17
+ # @return [Node] The deserialized {Node Node}.
18
+ def load filepath
19
+ require 'yaml'
20
+ ::YAML.load serializer.load filepath
21
+ end
22
+
23
+ # Serializes a {Node Node} and dumps it as a YAML object into filepath.
24
+ # @param [Node] node the node to serialize
25
+ # @param [String] filepath the full path of the file to dump the YAML
26
+ # object into.
27
+ # @return [Numeric] number of bytes written to disk.
28
+ def dump node, filepath
29
+ require 'yaml'
30
+ serializer.dump ::YAML.dump(node), filepath
31
+ end
32
+
33
+ private
34
+
35
+ attr_reader :serializer
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,67 @@
1
+ module Rambling
2
+ module Trie
3
+ module Serializers
4
+ # Zip file serializer. Dumps/loads contents from zip files. Automatically
5
+ # detects if zip file contains `.marshal` or `.yml` file
6
+ class Zip
7
+ extend Rambling::Trie::Forwardable
8
+
9
+ # Creates a new Zip serializer.
10
+ # @param [Properties] properties the configuration properties set up so
11
+ # far.
12
+ def initialize properties
13
+ @properties = properties
14
+ end
15
+
16
+ # Unzip contents from specified filepath and load in contents from
17
+ # unzipped files.
18
+ # @param [String] filepath the filepath to load contents from.
19
+ # @return [String] all contents of the unzipped loaded file.
20
+ def load filepath
21
+ require 'zip'
22
+
23
+ ::Zip::File.open filepath do |zip|
24
+ entry = zip.entries.first
25
+ entry_path = path entry.name
26
+ entry.extract entry_path
27
+
28
+ serializer = serializers.resolve entry.name
29
+ serializer.load entry_path
30
+ end
31
+ end
32
+
33
+ # Dumps contents and zips into a specified filepath.
34
+ # @param [String] contents the contents to dump.
35
+ # @param [String] filepath the filepath to dump the contents to.
36
+ # @return [Numeric] number of bytes written to disk.
37
+ def dump contents, filepath
38
+ require 'zip'
39
+
40
+ ::Zip::File.open filepath, ::Zip::File::CREATE do |zip|
41
+ filename = ::File.basename filepath, '.zip'
42
+
43
+ entry_path = path filename
44
+ serializer = serializers.resolve filename
45
+ serializer.dump contents, entry_path
46
+
47
+ zip.add filename, entry_path
48
+ end
49
+ end
50
+
51
+ private
52
+
53
+ attr_reader :properties
54
+
55
+ delegate [
56
+ :serializers,
57
+ :tmp_path
58
+ ] => :properties
59
+
60
+ def path filename
61
+ require 'securerandom'
62
+ ::File.join tmp_path, "#{SecureRandom.uuid}-#{filename}"
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,20 @@
1
+ module Rambling
2
+ module Trie
3
+ # Provides the String representation behavior for the trie data structure.
4
+ module Stringifyable
5
+ # String representation of the current node, if it is a terminal node.
6
+ # @return [String] the string representation of the current node.
7
+ # @raise [InvalidOperation] if node is not terminal or is root.
8
+ def as_word
9
+ raise Rambling::Trie::InvalidOperation, 'Cannot represent branch as a word' if letter && !terminal?
10
+ to_s
11
+ end
12
+
13
+ # String representation of the current node.
14
+ # @return [String] the string representation of the current node.
15
+ def to_s
16
+ parent.to_s << letter.to_s
17
+ end
18
+ end
19
+ end
20
+ end