rambling-trie 0.8.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +1 -1
  3. data/lib/rambling/trie.rb +21 -9
  4. data/lib/rambling/trie/compressed_node.rb +112 -0
  5. data/lib/rambling/trie/compression.rb +13 -0
  6. data/lib/rambling/trie/compressor.rb +30 -31
  7. data/lib/rambling/trie/{root.rb → container.rb} +41 -38
  8. data/lib/rambling/trie/enumerable.rb +11 -7
  9. data/lib/rambling/trie/missing_node.rb +1 -1
  10. data/lib/rambling/trie/node.rb +25 -22
  11. data/lib/rambling/trie/plain_text_reader.rb +1 -1
  12. data/lib/rambling/trie/raw_node.rb +90 -0
  13. data/lib/rambling/trie/tasks/helpers/path.rb +13 -0
  14. data/lib/rambling/trie/tasks/helpers/time.rb +7 -0
  15. data/lib/rambling/trie/tasks/performance.rb +10 -91
  16. data/lib/rambling/trie/tasks/performance/all.rb +4 -0
  17. data/lib/rambling/trie/tasks/performance/benchmark.rb +172 -0
  18. data/lib/rambling/trie/tasks/performance/directory.rb +11 -0
  19. data/lib/rambling/trie/tasks/performance/profile/call_tree.rb +132 -0
  20. data/lib/rambling/trie/tasks/performance/profile/memory.rb +116 -0
  21. data/lib/rambling/trie/version.rb +1 -1
  22. data/rambling-trie.gemspec +6 -4
  23. data/spec/integration/rambling/trie_spec.rb +63 -9
  24. data/spec/lib/rambling/trie/compressed_node_spec.rb +35 -0
  25. data/spec/lib/rambling/trie/compressor_spec.rb +31 -0
  26. data/spec/lib/rambling/trie/container_spec.rb +470 -0
  27. data/spec/lib/rambling/trie/enumerable_spec.rb +2 -2
  28. data/spec/lib/rambling/trie/inspector_spec.rb +21 -14
  29. data/spec/lib/rambling/trie/node_spec.rb +72 -209
  30. data/spec/lib/rambling/trie/raw_node_spec.rb +377 -0
  31. data/spec/lib/rambling/trie_spec.rb +46 -25
  32. metadata +57 -16
  33. data/lib/rambling/trie/branches.rb +0 -149
  34. data/spec/lib/rambling/trie/branches_spec.rb +0 -52
  35. data/spec/lib/rambling/trie/root_spec.rb +0 -376
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c7aefe84824bbbfa09b239caca901b1e8022e33a
4
- data.tar.gz: e7e22e2ac50d2f8be672063efb5bcae8342369b7
3
+ metadata.gz: 3bb17c6b1df5c473eb696bc484ff0e3b46517867
4
+ data.tar.gz: 0b6daafa67a20409389b80aaf9ac5d1bc6694cd2
5
5
  SHA512:
6
- metadata.gz: 6d15a5de1d200e156fae33ca82e25b5be1d43d4916be1e88761ce0142c6c7002985164c2e2b95bf1c95f0e04b108f9f11671d3e50028e397d5f9520f996a8efe
7
- data.tar.gz: ea776aaf5b0a0ecefc1ba7626a5d2676b04499f268e5acf39c7bd8b45f17160acd170358d7b2b52eb8c6022431696fc04dcaa2ab8f016cb4a2b31224ec824191
6
+ metadata.gz: ecaecb91b920793208c878134881f08a7718d9f9771857a49cb15f8059e35273e29f81ce21a497de37566cfae8273a13ff611b43f0d049694b0207c65efa35bd
7
+ data.tar.gz: 810db517696450d411d75ad59df68e410e0c1109ad1133bc7aa95363609520f8db51031a72e1b3392475524f8d10b28ca1e827bd5dbb9c3fba0aaa2417eeef41
data/Rakefile CHANGED
@@ -3,6 +3,6 @@ require 'rspec/core/rake_task'
3
3
  require 'rambling-trie'
4
4
  require 'rambling/trie/tasks/performance'
5
5
 
6
- RSpec::Core::RakeTask.new(:spec)
6
+ RSpec::Core::RakeTask.new :spec
7
7
 
8
8
  task default: :spec
data/lib/rambling/trie.rb CHANGED
@@ -1,8 +1,7 @@
1
1
  require 'forwardable'
2
2
  %w{
3
- branches compressor enumerable inspector
4
- invalid_operation node missing_node
5
- plain_text_reader root version
3
+ compression compressor inspector container enumerable invalid_operation
4
+ plain_text_reader node missing_node compressed_node raw_node version
6
5
  }.each do |file|
7
6
  require File.join('rambling', 'trie', file)
8
7
  end
@@ -14,14 +13,27 @@ module Rambling
14
13
  class << self
15
14
  # Creates a new Trie. Entry point for the Rambling::Trie API.
16
15
  # @param [String, nil] filepath the file to load the words from.
17
- # @return [Root] the trie just created.
18
- # @yield [Root] the trie just created.
19
- def create filepath = nil, reader = PlainTextReader.new
20
- Root.new do |root|
21
- reader.each_word(filepath) { |word| root << word } if filepath
22
- yield root if block_given?
16
+ # @return [Container] the trie just created.
17
+ # @yield [Container] the trie just created.
18
+ def create filepath = nil, reader = nil
19
+ reader ||= default_reader
20
+
21
+ Rambling::Trie::Container.new do |container|
22
+ if filepath
23
+ reader.each_word filepath do |word|
24
+ container << word
25
+ end
26
+ end
27
+
28
+ yield container if block_given?
23
29
  end
24
30
  end
31
+
32
+ private
33
+
34
+ def default_reader
35
+ Rambling::Trie::PlainTextReader.new
36
+ end
25
37
  end
26
38
  end
27
39
  end
@@ -0,0 +1,112 @@
1
+ module Rambling
2
+ module Trie
3
+ # A representation of a node in an compressed Trie data structure.
4
+ class CompressedNode < Rambling::Trie::Node
5
+ # Always raises [Rambling::Trie::InvalidOperation] when trying to add a
6
+ # branch to the current trie node based on the word
7
+ # @param [String] word the word to add the branch from.
8
+ # @raise [InvalidOperation] if the trie is already compressed.
9
+ def add word
10
+ raise Rambling::Trie::InvalidOperation, 'Cannot add branch to compressed trie'
11
+ end
12
+
13
+ # Checks if a path for set of characters exists in the trie.
14
+ # @param [Array] chars the characters to look for in the trie.
15
+ # @return [Boolean] `true` if the characters are found, `false` otherwise.
16
+ def partial_word? chars
17
+ chars.empty? || has_partial_word?(chars)
18
+ end
19
+
20
+ # Checks if a path for set of characters represents a word in the trie.
21
+ # @param [Array] chars the characters to look for in the trie.
22
+ # @return [Boolean] `true` if the characters are found and form a word,
23
+ # `false` otherwise.
24
+ def word? chars
25
+ if chars.empty?
26
+ terminal?
27
+ else
28
+ has_word? chars
29
+ end
30
+ end
31
+
32
+ # Returns all words that start with the specified characters.
33
+ # @param [Array] chars the characters to look for in the trie.
34
+ # @return [Array] all the words contained in the trie that start with the specified characters.
35
+ def scan chars
36
+ closest_node(chars).to_a
37
+ end
38
+
39
+ # Always return `true` for a raw (compressed) node.
40
+ # @return [Boolean] always true for a raw (compressed) node.
41
+ def compressed?
42
+ true
43
+ end
44
+
45
+ protected
46
+
47
+ def closest_node chars
48
+ if chars.empty?
49
+ self
50
+ else
51
+ current_length = 0
52
+ current_key, current_key_string = current_key chars.slice!(0)
53
+
54
+ begin
55
+ current_length += 1
56
+
57
+ if current_key_string.length == current_length || chars.empty?
58
+ return children_tree[current_key].closest_node chars
59
+ end
60
+ end while current_key_string[current_length] == chars.slice!(0)
61
+
62
+ Rambling::Trie::MissingNode.new
63
+ end
64
+ end
65
+
66
+ private
67
+
68
+ def has_partial_word? chars
69
+ current_length = 0
70
+ current_key, current_key_string = current_key chars.slice!(0)
71
+
72
+ begin
73
+ current_length += 1
74
+
75
+ if current_key_string.length == current_length || chars.empty?
76
+ return children_tree[current_key].partial_word? chars
77
+ end
78
+ end while current_key_string[current_length] == chars.slice!(0)
79
+
80
+ false
81
+ end
82
+
83
+ def has_word? chars
84
+ current_key_string = ''
85
+
86
+ while !chars.empty?
87
+ current_key_string << chars.slice!(0)
88
+ current_key = current_key_string.to_sym
89
+ child = children_tree[current_key]
90
+ return child.word? chars if child
91
+ end
92
+
93
+ false
94
+ end
95
+
96
+ def current_key letter
97
+ current_key_string = current_key = ''
98
+
99
+ children_tree.keys.each do |key|
100
+ key_string = key.to_s
101
+ if key_string.start_with? letter
102
+ current_key = key
103
+ current_key_string = key_string
104
+ break
105
+ end
106
+ end
107
+
108
+ [current_key, current_key_string]
109
+ end
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,13 @@
1
+ module Rambling
2
+ module Trie
3
+ # Provides the compressing behavior for the Trie data structure.
4
+ module Compression
5
+ # Indicates if the current [Rambling::Trie::Node] can be compressed.
6
+ # @return [Boolean] `true` for non-terminal nodes with one child,
7
+ # `false` otherwise.
8
+ def compressable?
9
+ !(root? || terminal?) && children_tree.size == 1
10
+ end
11
+ end
12
+ end
13
+ end
@@ -1,47 +1,46 @@
1
1
  module Rambling
2
2
  module Trie
3
- # Provides the compressing behavior for the Trie data structure.
4
- module Compressor
5
- # Flag for compressed tries.
6
- # @return [Boolean] `true` for compressed tries, `false` otherwise.
7
- def compressed?
8
- parent && parent.compressed?
3
+ # Responsible for the compression process of a Trie data structure.
4
+ class Compressor
5
+ # Compresses a node from a Trie data structure.
6
+ # @param [RawNode] node the node to compress
7
+ # @return [CompressedNode] node the compressed version of the node
8
+ def compress node, parent = nil
9
+ if node.compressable?
10
+ merge_node_with_compressed_child node, parent
11
+ else
12
+ copy_node_and_compress_children node, parent
13
+ end
9
14
  end
10
15
 
11
- # Compress the current node using redundant node elimination.
12
- # @return [Root, Node] the compressed node.
13
- def compress_tree!
14
- if compressable?
15
- merge_with! children.first
16
- compress_tree!
17
- end
16
+ private
18
17
 
19
- children.each &:compress_tree!
18
+ def merge_node_with_compressed_child node, parent
19
+ compressed_child = compress node.children.first
20
20
 
21
- self
22
- end
21
+ new_node = Rambling::Trie::CompressedNode.new parent
22
+ new_node.letter = node.letter.to_s << compressed_child.letter.to_s
23
+ new_node.terminal! if compressed_child.terminal?
24
+ new_node.children_tree = compressed_child.children_tree
23
25
 
24
- private
26
+ new_node.children.each do |child|
27
+ child.parent = new_node
28
+ end
25
29
 
26
- def compressable?
27
- !(root? || terminal?) && children_tree.size == 1
30
+ new_node
28
31
  end
29
32
 
30
- def merge_with! child
31
- delete_old_key_on_parent!
32
- redefine_self! child
33
+ def copy_node_and_compress_children node, parent
34
+ new_node = Rambling::Trie::CompressedNode.new parent
33
35
 
34
- children.each { |node| node.parent = self }
35
- end
36
+ new_node.letter = node.letter
37
+ new_node.terminal! if node.terminal?
36
38
 
37
- def delete_old_key_on_parent!
38
- parent.delete letter if parent
39
- end
39
+ node.children.map do |child|
40
+ compress child, new_node
41
+ end
40
42
 
41
- def redefine_self! merged_node
42
- self.letter = letter.to_s << merged_node.letter.to_s
43
- self.children_tree = merged_node.children_tree
44
- self.terminal = merged_node.terminal?
43
+ new_node
45
44
  end
46
45
  end
47
46
  end
@@ -1,12 +1,27 @@
1
1
  module Rambling
2
2
  module Trie
3
- # A representation of the root node in the Trie data structure.
4
- class Root < Node
3
+ # Wrapper on top of Trie data structure.
4
+ class Container
5
+ extend ::Forwardable
6
+
7
+ include ::Enumerable
8
+
9
+ delegate [
10
+ :each,
11
+ :compressed?,
12
+ :[],
13
+ :letter,
14
+ :inspect
15
+ ] => :root
16
+
5
17
  # Creates a new Trie.
6
- # @yield [Root] the trie just created.
7
- def initialize
8
- super
9
- self.compressed = false
18
+ # @param [Node] root the root node for the trie
19
+ # @param [Compressor] compressor responsible for compressing the trie
20
+ # @yield [Container] the trie just created.
21
+ def initialize root = nil, compressor = nil
22
+ @root = root || default_root
23
+ @compressor = compressor || default_compressor
24
+
10
25
  yield self if block_given?
11
26
  end
12
27
 
@@ -14,71 +29,59 @@ module Rambling
14
29
  # @param [String] word the word to add the branch from.
15
30
  # @return [Node] the just added branch's root node.
16
31
  # @raise [InvalidOperation] if the trie is already compressed.
17
- # @see Branches#add
32
+ # @see RawNode#add
33
+ # @see CompressedNode#add
18
34
  # @note Avoids clearing the contents of the word variable.
19
35
  def add word
20
- super word.clone
36
+ root.add word.clone
21
37
  end
22
38
 
23
- # Compresses the existing tree using redundant node elimination. Flags the trie as compressed.
24
- # @return [Root] self
39
+ # Compresses the existing tree using redundant node elimination. Flags
40
+ # the trie as compressed.
41
+ # @return [Container] self
42
+ # @note Avoids compressing again if the trie has already been compressed.
25
43
  def compress!
26
- self.compressed = compressed? || !!compress_tree!
44
+ self.root = compressor.compress root unless root.compressed?
27
45
  self
28
46
  end
29
47
 
30
- # Flag for compressed tries. Overrides {Compressor#compressed?}.
31
- # @return [Boolean] `true` for compressed tries, `false` otherwise.
32
- def compressed?
33
- !!compressed
34
- end
35
-
36
48
  # Checks if a path for a word or partial word exists in the trie.
37
49
  # @param [String] word the word or partial word to look for in the trie.
38
50
  # @return [Boolean] `true` if the word or partial word is found, `false` otherwise.
39
51
  def partial_word? word = ''
40
- is? :partial_word, word
41
- end
42
-
43
- alias_method :match?, :partial_word?
44
-
45
- # If the current node is the root node.
46
- # @return [Boolean] `true`
47
- def root?
48
- true
52
+ root.partial_word? word.chars.to_a
49
53
  end
50
54
 
51
55
  # Checks if a whole word exists in the trie.
52
56
  # @param [String] word the word to look for in the trie.
53
57
  # @return [Boolean] `true` only if the word is found and the last character corresponds to a terminal node.
54
58
  def word? word = ''
55
- is? :word, word
59
+ root.word? word.chars.to_a
56
60
  end
57
61
 
58
- alias_method :include?, :word?
59
-
60
62
  # Returns all words that start with the specified characters.
61
63
  # @param [String] word the word to look for in the trie.
62
64
  # @return [Array] all the words contained in the trie that start with the specified characters.
63
65
  def scan word = ''
64
- closest_node(word).to_a
66
+ root.scan(word.chars).to_a
65
67
  end
66
68
 
69
+ alias_method :include?, :word?
70
+ alias_method :match?, :partial_word?
67
71
  alias_method :words, :scan
72
+ alias_method :<<, :add
68
73
 
69
74
  private
70
75
 
71
- attr_accessor :compressed
76
+ attr_reader :compressor
77
+ attr_accessor :root
72
78
 
73
- def is? method, word
74
- method = compressed? ? "#{method}_when_compressed?" : "#{method}_when_uncompressed?"
75
- send method, word.chars.to_a
79
+ def default_root
80
+ Rambling::Trie::RawNode.new
76
81
  end
77
82
 
78
- def closest_node word
79
- method = compressed? ? :closest_node_when_compressed : :closest_node_when_uncompressed
80
-
81
- send method, word.chars.to_a
83
+ def default_compressor
84
+ Rambling::Trie::Compressor.new
82
85
  end
83
86
  end
84
87
  end
@@ -6,14 +6,18 @@ module Rambling
6
6
 
7
7
  alias_method :size, :count
8
8
 
9
- # Calls block once for each of the words contained in the trie. If no block given, an Enumerator is returned.
10
- def each &block
11
- enumerator = Enumerator.new do |words|
12
- words << as_word if terminal?
13
- children.each { |child| child.each { |word| words << word } }
14
- end
9
+ # Iterates over the words contained in the trie.
10
+ # @yield [String] the words contained in this trie node.
11
+ def each
12
+ return enum_for :each unless block_given?
13
+
14
+ yield as_word if terminal?
15
15
 
16
- block.nil? ? enumerator : enumerator.each(&block)
16
+ children.each do |child|
17
+ child.each do |word|
18
+ yield word
19
+ end
20
+ end
17
21
  end
18
22
  end
19
23
  end
@@ -1,7 +1,7 @@
1
1
  module Rambling
2
2
  module Trie
3
3
  # A representation of a missing node in the Trie data structure.
4
- class MissingNode < Node
4
+ class MissingNode < Rambling::Trie::Node
5
5
  end
6
6
  end
7
7
  end
@@ -2,14 +2,18 @@ module Rambling
2
2
  module Trie
3
3
  # A representation of a node in the Trie data structure.
4
4
  class Node
5
- extend Forwardable
5
+ extend ::Forwardable
6
6
 
7
- delegate [:[], :[]=, :delete, :has_key?] => :children_tree
7
+ delegate [
8
+ :[],
9
+ :[]=,
10
+ :delete,
11
+ :has_key?
12
+ ] => :children_tree
8
13
 
9
- include Compressor
10
- include Branches
11
- include Enumerable
12
- include Inspector
14
+ include Rambling::Trie::Compression
15
+ include Rambling::Trie::Enumerable
16
+ include Rambling::Trie::Inspector
13
17
 
14
18
  # Letter or letters corresponding to this node.
15
19
  # @return [Symbol, nil] the corresponding letter(s) or nil.
@@ -17,24 +21,17 @@ module Rambling
17
21
 
18
22
  # Children nodes.
19
23
  # @return [Hash] the children_tree hash, consisting of :letter => node.
20
- attr_reader :children_tree
24
+ attr_accessor :children_tree
21
25
 
22
26
  # Parent node.
23
27
  # @return [Node, nil] the parent node or nil for the root element.
24
28
  attr_accessor :parent
25
29
 
26
30
  # Creates a new Node.
27
- # @param [String, nil] word the word from which to create this Node and his branch.
28
31
  # @param [Node, nil] parent the parent of this node.
29
- def initialize word = nil, parent = nil
32
+ def initialize parent = nil
30
33
  self.parent = parent
31
34
  self.children_tree = {}
32
-
33
- unless word.nil? || word.empty?
34
- self.letter = word.slice! 0
35
- self.terminal = word.empty?
36
- self << word
37
- end
38
35
  end
39
36
 
40
37
  # String representation of the current node, if it is a terminal node.
@@ -52,9 +49,9 @@ module Rambling
52
49
  end
53
50
 
54
51
  # If the current node is the root node.
55
- # @return [Boolean] `false`
52
+ # @return [Boolean] `true` only if the node does not have a parent
56
53
  def root?
57
- false
54
+ !parent
58
55
  end
59
56
 
60
57
  # Flag for terminal nodes.
@@ -63,23 +60,29 @@ module Rambling
63
60
  !!terminal
64
61
  end
65
62
 
63
+ # Force [Node] to be `terminal`
64
+ # @return [Node] the modified node.
65
+ def terminal!
66
+ self.terminal = true
67
+ self
68
+ end
69
+
66
70
  # String representation of the current node.
67
71
  # @return [String] the string representation of the current node.
68
72
  def to_s
69
73
  parent.to_s << letter.to_s
70
74
  end
71
75
 
72
- protected
73
-
74
- attr_writer :children_tree
75
- attr_accessor :terminal
76
-
77
76
  def letter= new_letter
78
77
  if new_letter
79
78
  @letter = new_letter.to_sym
80
79
  parent[letter] = self if parent
81
80
  end
82
81
  end
82
+
83
+ private
84
+
85
+ attr_accessor :terminal
83
86
  end
84
87
  end
85
88
  end