rambling-trie 0.9.3 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (69) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +2 -0
  3. data/LICENSE +1 -1
  4. data/README.md +133 -26
  5. data/Rakefile +1 -2
  6. data/lib/rambling/trie.rb +53 -9
  7. data/lib/rambling/trie/comparable.rb +16 -0
  8. data/lib/rambling/trie/compressable.rb +14 -0
  9. data/lib/rambling/trie/compressed_node.rb +38 -14
  10. data/lib/rambling/trie/compressor.rb +14 -10
  11. data/lib/rambling/trie/configuration.rb +11 -0
  12. data/lib/rambling/trie/configuration/properties.rb +66 -0
  13. data/lib/rambling/trie/configuration/provider_collection.rb +101 -0
  14. data/lib/rambling/trie/container.rb +57 -17
  15. data/lib/rambling/trie/enumerable.rb +1 -1
  16. data/lib/rambling/trie/forwardable.rb +9 -4
  17. data/lib/rambling/trie/inspectable.rb +37 -0
  18. data/lib/rambling/trie/invalid_operation.rb +3 -2
  19. data/lib/rambling/trie/missing_node.rb +2 -1
  20. data/lib/rambling/trie/node.rb +40 -30
  21. data/lib/rambling/trie/raw_node.rb +29 -13
  22. data/lib/rambling/trie/readers.rb +11 -0
  23. data/lib/rambling/trie/readers/plain_text.rb +26 -0
  24. data/lib/rambling/trie/serializers.rb +11 -0
  25. data/lib/rambling/trie/serializers/file.rb +25 -0
  26. data/lib/rambling/trie/serializers/marshal.rb +38 -0
  27. data/lib/rambling/trie/serializers/yaml.rb +39 -0
  28. data/lib/rambling/trie/serializers/zip.rb +67 -0
  29. data/lib/rambling/trie/stringifyable.rb +20 -0
  30. data/lib/rambling/trie/version.rb +1 -1
  31. data/rambling-trie.gemspec +2 -2
  32. data/spec/integration/rambling/trie_spec.rb +45 -49
  33. data/spec/lib/rambling/trie/comparable_spec.rb +104 -0
  34. data/spec/lib/rambling/trie/compressed_node_spec.rb +44 -0
  35. data/spec/lib/rambling/trie/configuration/properties_spec.rb +49 -0
  36. data/spec/lib/rambling/trie/configuration/provider_collection_spec.rb +165 -0
  37. data/spec/lib/rambling/trie/container_spec.rb +127 -38
  38. data/spec/lib/rambling/trie/{inspector_spec.rb → inspectable_spec.rb} +7 -5
  39. data/spec/lib/rambling/trie/raw_node_spec.rb +22 -41
  40. data/spec/lib/rambling/trie/readers/plain_text_spec.rb +14 -0
  41. data/spec/lib/rambling/trie/serializers/file_spec.rb +11 -0
  42. data/spec/lib/rambling/trie/serializers/marshal_spec.rb +14 -0
  43. data/spec/lib/rambling/trie/serializers/yaml_spec.rb +14 -0
  44. data/spec/lib/rambling/trie/serializers/zip_spec.rb +30 -0
  45. data/spec/lib/rambling/trie/stringifyable_spec.rb +82 -0
  46. data/spec/lib/rambling/trie_spec.rb +120 -7
  47. data/spec/spec_helper.rb +7 -1
  48. data/spec/support/config.rb +5 -0
  49. data/spec/support/shared_examples/a_compressable_trie.rb +26 -0
  50. data/spec/support/shared_examples/a_serializable_trie.rb +26 -0
  51. data/spec/support/shared_examples/a_serializer.rb +29 -0
  52. data/spec/support/shared_examples/a_trie_data_structure.rb +29 -0
  53. data/spec/tmp/.gitkeep +0 -0
  54. metadata +51 -24
  55. data/lib/rambling/trie/compression.rb +0 -13
  56. data/lib/rambling/trie/inspector.rb +0 -11
  57. data/lib/rambling/trie/plain_text_reader.rb +0 -23
  58. data/lib/rambling/trie/tasks/gem.rb +0 -17
  59. data/lib/rambling/trie/tasks/helpers/path.rb +0 -17
  60. data/lib/rambling/trie/tasks/helpers/performance_report.rb +0 -17
  61. data/lib/rambling/trie/tasks/helpers/time.rb +0 -7
  62. data/lib/rambling/trie/tasks/performance.rb +0 -15
  63. data/lib/rambling/trie/tasks/performance/all.rb +0 -17
  64. data/lib/rambling/trie/tasks/performance/benchmark.rb +0 -201
  65. data/lib/rambling/trie/tasks/performance/directory.rb +0 -11
  66. data/lib/rambling/trie/tasks/performance/flamegraph.rb +0 -119
  67. data/lib/rambling/trie/tasks/performance/profile/call_tree.rb +0 -147
  68. data/lib/rambling/trie/tasks/performance/profile/memory.rb +0 -143
  69. data/spec/lib/rambling/trie/plain_text_reader_spec.rb +0 -18
@@ -1,6 +1,7 @@
1
1
  module Rambling
2
2
  module Trie
3
- # A representation of a missing node in the Trie data structure.
3
+ # A representation of a missing node in the trie data structure. Returned
4
+ # when a node is not found.
4
5
  class MissingNode < Rambling::Trie::Node
5
6
  end
6
7
  end
@@ -1,11 +1,13 @@
1
1
  module Rambling
2
2
  module Trie
3
- # A representation of a node in the Trie data structure.
3
+ # A representation of a node in the trie data structure.
4
4
  class Node
5
5
  extend Rambling::Trie::Forwardable
6
- include Rambling::Trie::Compression
6
+ include Rambling::Trie::Compressable
7
7
  include Rambling::Trie::Enumerable
8
- include Rambling::Trie::Inspector
8
+ include Rambling::Trie::Comparable
9
+ include Rambling::Trie::Stringifyable
10
+ include Rambling::Trie::Inspectable
9
11
 
10
12
  delegate [
11
13
  :[],
@@ -14,64 +16,58 @@ module Rambling
14
16
  :has_key?
15
17
  ] => :children_tree
16
18
 
17
- # Letter or letters corresponding to this node.
18
- # @return [Symbol, nil] the corresponding letter(s) or nil.
19
+ # @overload letter
20
+ # Letter(s) corresponding to the current node.
21
+ # @overload letter=(letter)
22
+ # Sets the letter(s) corresponding to the current node. Ensures the
23
+ # {Node#letter #letter} in the {Node#parent #parent}'s
24
+ # {Node#children_tree #children_tree} is updated.
25
+ # @param [String, Symbol, nil] letter the new letter value.
26
+ # @return [Symbol, nil] the corresponding letter(s).
19
27
  attr_reader :letter
20
28
 
21
- # Children nodes.
22
- # @return [Hash] the children_tree hash, consisting of :letter => node.
29
+ # Children nodes tree.
30
+ # @return [Hash] the children_tree hash, consisting of `:letter => node`.
23
31
  attr_accessor :children_tree
24
32
 
25
33
  # Parent node.
26
- # @return [Node, nil] the parent node or nil for the root element.
34
+ # @return [Node, nil] the parent of the current node.
27
35
  attr_accessor :parent
28
36
 
29
- # Creates a new Node.
30
- # @param [Node, nil] parent the parent of this node.
37
+ # Creates a new node.
38
+ # @param [Node, nil] parent the parent of the current node.
31
39
  def initialize parent = nil
32
40
  self.parent = parent
33
41
  self.children_tree = {}
34
42
  end
35
43
 
36
- # String representation of the current node, if it is a terminal node.
37
- # @return [String] the string representation of the current node.
38
- # @raise [InvalidOperation] if node is not terminal or is root.
39
- def as_word
40
- raise InvalidOperation, 'Cannot represent branch as a word' if letter && !terminal?
41
- to_s
42
- end
43
-
44
- # Children nodes of the current node.
45
- # @return [Array] the array of children nodes contained in the current node.
44
+ # Children nodes.
45
+ # @return [Array<Node>] the array of children nodes contained in the
46
+ # current node.
46
47
  def children
47
48
  children_tree.values
48
49
  end
49
50
 
50
- # If the current node is the root node.
51
- # @return [Boolean] `true` only if the node does not have a parent
51
+ # Indicates if the current node is the root node.
52
+ # @return [Boolean] `true` if the node does not have a parent, `false`
53
+ # otherwise.
52
54
  def root?
53
55
  !parent
54
56
  end
55
57
 
56
- # Flag for terminal nodes.
58
+ # Indicates if a {Node Node} is terminal or not.
57
59
  # @return [Boolean] `true` for terminal nodes, `false` otherwise.
58
60
  def terminal?
59
61
  !!terminal
60
62
  end
61
63
 
62
- # Force [Node] to be `terminal`
64
+ # Mark {Node Node} as terminal.
63
65
  # @return [Node] the modified node.
64
66
  def terminal!
65
67
  self.terminal = true
66
68
  self
67
69
  end
68
70
 
69
- # String representation of the current node.
70
- # @return [String] the string representation of the current node.
71
- def to_s
72
- parent.to_s << letter.to_s
73
- end
74
-
75
71
  def letter= letter
76
72
  if letter
77
73
  @letter = letter.to_sym
@@ -79,6 +75,20 @@ module Rambling
79
75
  end
80
76
  end
81
77
 
78
+ # Returns all words that match a prefix of any length within chars.
79
+ # @param [String] chars the chars to base the prefix on.
80
+ # @return [Enumerator<String>] all the words that match a prefix given by
81
+ # chars.
82
+ # @yield [String] each word found.
83
+ def match_prefix chars
84
+ return enum_for :match_prefix, chars unless block_given?
85
+
86
+ yield as_word if terminal?
87
+ children_match_prefix chars do |word|
88
+ yield word
89
+ end
90
+ end
91
+
82
92
  private
83
93
 
84
94
  attr_accessor :terminal
@@ -1,10 +1,10 @@
1
1
  module Rambling
2
2
  module Trie
3
- # A representation of a node in an uncompressed Trie data structure.
3
+ # A representation of a node in an uncompressed trie data structure.
4
4
  class RawNode < Rambling::Trie::Node
5
- # Adds a branch to the current trie node based on the word
6
- # @param [String] word the word to add the branch from.
7
- # @return [Rambling::Trie::RawNode] the just added branch's root node.
5
+ # Adds a word to the current raw (uncompressed) trie node.
6
+ # @param [String] word the word to add to the trie.
7
+ # @return [RawNode] the added/modified node based on the word added.
8
8
  # @note This method clears the contents of the word variable.
9
9
  def add word
10
10
  if word.empty?
@@ -14,9 +14,10 @@ module Rambling
14
14
  end
15
15
  end
16
16
 
17
- # Checks if a path for set of characters exists in the trie.
18
- # @param [Array] chars the characters to look for in the trie.
19
- # @return [Boolean] `true` if the characters are found, `false` otherwise.
17
+ # Checks if a path for a set of characters exists in the trie.
18
+ # @param [Array<String>] chars the characters to look for in the trie.
19
+ # @return [Boolean] `true` if the characters are found, `false`
20
+ # otherwise.
20
21
  def partial_word? chars = []
21
22
  if chars.empty?
22
23
  true
@@ -28,9 +29,9 @@ module Rambling
28
29
  end
29
30
 
30
31
  # Checks if a path for set of characters represents a word in the trie.
31
- # @param [Array] chars the characters to look for in the trie.
32
+ # @param [Array<String>] chars the characters to look for in the trie.
32
33
  # @return [Boolean] `true` if the characters are found and form a word,
33
- # `false` otherwise.
34
+ # `false` otherwise.
34
35
  def word? chars = []
35
36
  if chars.empty?
36
37
  terminal?
@@ -41,15 +42,16 @@ module Rambling
41
42
  end
42
43
  end
43
44
 
44
- # Returns all words that start with the specified characters.
45
- # @param [Array] chars the characters to look for in the trie.
46
- # @return [Array] all the words contained in the trie that start with the specified characters.
45
+ # Returns the node that starts with the specified characters.
46
+ # @param [Array<String>] chars the characters to look for in the trie.
47
+ # @return [Node] the node that matches the specified characters.
48
+ # {MissingNode MissingNode} when not found.
47
49
  def scan chars
48
50
  chars.empty? ? self : closest_node(chars)
49
51
  end
50
52
 
51
53
  # Always return `false` for a raw (uncompressed) node.
52
- # @return [Boolean] always false for a raw (uncompressed) node.
54
+ # @return [Boolean] always `false` for a raw (uncompressed) node.
53
55
  def compressed?
54
56
  false
55
57
  end
@@ -75,6 +77,20 @@ module Rambling
75
77
 
76
78
  child ? child.scan(chars) : Rambling::Trie::MissingNode.new
77
79
  end
80
+
81
+ def children_match_prefix chars
82
+ return enum_for :children_match_prefix, chars unless block_given?
83
+
84
+ if !chars.empty?
85
+ letter = chars.slice!(0).to_sym
86
+ child = children_tree[letter]
87
+ if child
88
+ child.match_prefix chars do |word|
89
+ yield word
90
+ end
91
+ end
92
+ end
93
+ end
78
94
  end
79
95
  end
80
96
  end
@@ -0,0 +1,11 @@
1
+ %w{plain_text}.each do |file|
2
+ require File.join('rambling', 'trie', 'readers', file)
3
+ end
4
+
5
+ module Rambling
6
+ module Trie
7
+ # Namespace for all readers.
8
+ module Readers
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,26 @@
1
+ module Rambling
2
+ module Trie
3
+ module Readers
4
+ # File reader for .txt files.
5
+ class PlainText
6
+ # Yields each word read from a .txt file.
7
+ # @param [String] filepath the full path of the file to load the words
8
+ # from.
9
+ # @yield [String] Each line read from the file.
10
+ def each_word filepath
11
+ each_line(filepath) { |line| yield line.chomp! }
12
+ end
13
+
14
+ private
15
+
16
+ def each_line filepath
17
+ open(filepath) { |file| file.each_line { |line| yield line } }
18
+ end
19
+
20
+ def open filepath
21
+ File.open(filepath) { |file| yield file }
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,11 @@
1
+ %w{file marshal yaml zip}.each do |file|
2
+ require File.join('rambling', 'trie', 'serializers', file)
3
+ end
4
+
5
+ module Rambling
6
+ module Trie
7
+ # Namespace for all serializers.
8
+ module Serializers
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,25 @@
1
+ module Rambling
2
+ module Trie
3
+ module Serializers
4
+ # Basic file serializer. Dumps/loads string contents from files.
5
+ class File
6
+ # Loads contents from a specified filepath.
7
+ # @param [String] filepath the filepath to load contents from.
8
+ # @return [String] all contents of the file.
9
+ def load filepath
10
+ ::File.read filepath
11
+ end
12
+
13
+ # Dumps contents into a specified filepath.
14
+ # @param [String] contents the contents to dump.
15
+ # @param [String] filepath the filepath to dump the contents to.
16
+ # @return [Numeric] number of bytes written to disk.
17
+ def dump contents, filepath
18
+ ::File.open filepath, 'w+' do |f|
19
+ f.write contents
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,38 @@
1
+ module Rambling
2
+ module Trie
3
+ module Serializers
4
+ # Serializer for Ruby marshal format (.marshal) files.
5
+ class Marshal
6
+ # Creates a new Marshal serializer.
7
+ # @param [Serializer] serializer the serializer responsible to write to
8
+ # and read from disk.
9
+ def initialize serializer = nil
10
+ @serializer = serializer || Rambling::Trie::Serializers::File.new
11
+ end
12
+
13
+ # Loads marshaled object from contents in filepath and deserializes it
14
+ # into a {Node Node}.
15
+ # @param [String] filepath the full path of the file to load the
16
+ # marshaled object from.
17
+ # @return [Node] The deserialized {Node Node}.
18
+ def load filepath
19
+ ::Marshal.load serializer.load filepath
20
+ end
21
+
22
+ # Serializes a {Node Node} and dumps it as a marshaled object into
23
+ # filepath.
24
+ # @param [Node] node the node to serialize
25
+ # @param [String] filepath the full path of the file to dump the
26
+ # marshaled object into.
27
+ # @return [Numeric] number of bytes written to disk.
28
+ def dump node, filepath
29
+ serializer.dump ::Marshal.dump(node), filepath
30
+ end
31
+
32
+ private
33
+
34
+ attr_reader :serializer
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,39 @@
1
+ module Rambling
2
+ module Trie
3
+ module Serializers
4
+ # Serializer for Ruby yaml format (.yaml) files.
5
+ class Yaml
6
+ # Creates a new Yaml serializer.
7
+ # @param [Serializer] serializer the serializer responsible to write to
8
+ # and read from disk.
9
+ def initialize serializer = nil
10
+ @serializer = serializer || Rambling::Trie::Serializers::File.new
11
+ end
12
+
13
+ # Loads serialized object from YAML file in filepath and deserializes
14
+ # it into a {Node Node}.
15
+ # @param [String] filepath the full path of the file to load the
16
+ # serialized YAML object from.
17
+ # @return [Node] The deserialized {Node Node}.
18
+ def load filepath
19
+ require 'yaml'
20
+ ::YAML.load serializer.load filepath
21
+ end
22
+
23
+ # Serializes a {Node Node} and dumps it as a YAML object into filepath.
24
+ # @param [Node] node the node to serialize
25
+ # @param [String] filepath the full path of the file to dump the YAML
26
+ # object into.
27
+ # @return [Numeric] number of bytes written to disk.
28
+ def dump node, filepath
29
+ require 'yaml'
30
+ serializer.dump ::YAML.dump(node), filepath
31
+ end
32
+
33
+ private
34
+
35
+ attr_reader :serializer
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,67 @@
1
+ module Rambling
2
+ module Trie
3
+ module Serializers
4
+ # Zip file serializer. Dumps/loads contents from zip files. Automatically
5
+ # detects if zip file contains `.marshal` or `.yml` file
6
+ class Zip
7
+ extend Rambling::Trie::Forwardable
8
+
9
+ # Creates a new Zip serializer.
10
+ # @param [Properties] properties the configuration properties set up so
11
+ # far.
12
+ def initialize properties
13
+ @properties = properties
14
+ end
15
+
16
+ # Unzip contents from specified filepath and load in contents from
17
+ # unzipped files.
18
+ # @param [String] filepath the filepath to load contents from.
19
+ # @return [String] all contents of the unzipped loaded file.
20
+ def load filepath
21
+ require 'zip'
22
+
23
+ ::Zip::File.open filepath do |zip|
24
+ entry = zip.entries.first
25
+ entry_path = path entry.name
26
+ entry.extract entry_path
27
+
28
+ serializer = serializers.resolve entry.name
29
+ serializer.load entry_path
30
+ end
31
+ end
32
+
33
+ # Dumps contents and zips into a specified filepath.
34
+ # @param [String] contents the contents to dump.
35
+ # @param [String] filepath the filepath to dump the contents to.
36
+ # @return [Numeric] number of bytes written to disk.
37
+ def dump contents, filepath
38
+ require 'zip'
39
+
40
+ ::Zip::File.open filepath, ::Zip::File::CREATE do |zip|
41
+ filename = ::File.basename filepath, '.zip'
42
+
43
+ entry_path = path filename
44
+ serializer = serializers.resolve filename
45
+ serializer.dump contents, entry_path
46
+
47
+ zip.add filename, entry_path
48
+ end
49
+ end
50
+
51
+ private
52
+
53
+ attr_reader :properties
54
+
55
+ delegate [
56
+ :serializers,
57
+ :tmp_path
58
+ ] => :properties
59
+
60
+ def path filename
61
+ require 'securerandom'
62
+ ::File.join tmp_path, "#{SecureRandom.uuid}-#{filename}"
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,20 @@
1
+ module Rambling
2
+ module Trie
3
+ # Provides the String representation behavior for the trie data structure.
4
+ module Stringifyable
5
+ # String representation of the current node, if it is a terminal node.
6
+ # @return [String] the string representation of the current node.
7
+ # @raise [InvalidOperation] if node is not terminal or is root.
8
+ def as_word
9
+ raise Rambling::Trie::InvalidOperation, 'Cannot represent branch as a word' if letter && !terminal?
10
+ to_s
11
+ end
12
+
13
+ # String representation of the current node.
14
+ # @return [String] the string representation of the current node.
15
+ def to_s
16
+ parent.to_s << letter.to_s
17
+ end
18
+ end
19
+ end
20
+ end