rambling-trie 0.8.1 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +1 -1
  3. data/lib/rambling/trie.rb +21 -9
  4. data/lib/rambling/trie/compressed_node.rb +112 -0
  5. data/lib/rambling/trie/compression.rb +13 -0
  6. data/lib/rambling/trie/compressor.rb +30 -31
  7. data/lib/rambling/trie/{root.rb → container.rb} +41 -38
  8. data/lib/rambling/trie/enumerable.rb +11 -7
  9. data/lib/rambling/trie/missing_node.rb +1 -1
  10. data/lib/rambling/trie/node.rb +25 -22
  11. data/lib/rambling/trie/plain_text_reader.rb +1 -1
  12. data/lib/rambling/trie/raw_node.rb +90 -0
  13. data/lib/rambling/trie/tasks/helpers/path.rb +13 -0
  14. data/lib/rambling/trie/tasks/helpers/time.rb +7 -0
  15. data/lib/rambling/trie/tasks/performance.rb +10 -91
  16. data/lib/rambling/trie/tasks/performance/all.rb +4 -0
  17. data/lib/rambling/trie/tasks/performance/benchmark.rb +172 -0
  18. data/lib/rambling/trie/tasks/performance/directory.rb +11 -0
  19. data/lib/rambling/trie/tasks/performance/profile/call_tree.rb +132 -0
  20. data/lib/rambling/trie/tasks/performance/profile/memory.rb +116 -0
  21. data/lib/rambling/trie/version.rb +1 -1
  22. data/rambling-trie.gemspec +6 -4
  23. data/spec/integration/rambling/trie_spec.rb +63 -9
  24. data/spec/lib/rambling/trie/compressed_node_spec.rb +35 -0
  25. data/spec/lib/rambling/trie/compressor_spec.rb +31 -0
  26. data/spec/lib/rambling/trie/container_spec.rb +470 -0
  27. data/spec/lib/rambling/trie/enumerable_spec.rb +2 -2
  28. data/spec/lib/rambling/trie/inspector_spec.rb +21 -14
  29. data/spec/lib/rambling/trie/node_spec.rb +72 -209
  30. data/spec/lib/rambling/trie/raw_node_spec.rb +377 -0
  31. data/spec/lib/rambling/trie_spec.rb +46 -25
  32. metadata +57 -16
  33. data/lib/rambling/trie/branches.rb +0 -149
  34. data/spec/lib/rambling/trie/branches_spec.rb +0 -52
  35. data/spec/lib/rambling/trie/root_spec.rb +0 -376
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c7aefe84824bbbfa09b239caca901b1e8022e33a
4
- data.tar.gz: e7e22e2ac50d2f8be672063efb5bcae8342369b7
3
+ metadata.gz: 3bb17c6b1df5c473eb696bc484ff0e3b46517867
4
+ data.tar.gz: 0b6daafa67a20409389b80aaf9ac5d1bc6694cd2
5
5
  SHA512:
6
- metadata.gz: 6d15a5de1d200e156fae33ca82e25b5be1d43d4916be1e88761ce0142c6c7002985164c2e2b95bf1c95f0e04b108f9f11671d3e50028e397d5f9520f996a8efe
7
- data.tar.gz: ea776aaf5b0a0ecefc1ba7626a5d2676b04499f268e5acf39c7bd8b45f17160acd170358d7b2b52eb8c6022431696fc04dcaa2ab8f016cb4a2b31224ec824191
6
+ metadata.gz: ecaecb91b920793208c878134881f08a7718d9f9771857a49cb15f8059e35273e29f81ce21a497de37566cfae8273a13ff611b43f0d049694b0207c65efa35bd
7
+ data.tar.gz: 810db517696450d411d75ad59df68e410e0c1109ad1133bc7aa95363609520f8db51031a72e1b3392475524f8d10b28ca1e827bd5dbb9c3fba0aaa2417eeef41
data/Rakefile CHANGED
@@ -3,6 +3,6 @@ require 'rspec/core/rake_task'
3
3
  require 'rambling-trie'
4
4
  require 'rambling/trie/tasks/performance'
5
5
 
6
- RSpec::Core::RakeTask.new(:spec)
6
+ RSpec::Core::RakeTask.new :spec
7
7
 
8
8
  task default: :spec
data/lib/rambling/trie.rb CHANGED
@@ -1,8 +1,7 @@
1
1
  require 'forwardable'
2
2
  %w{
3
- branches compressor enumerable inspector
4
- invalid_operation node missing_node
5
- plain_text_reader root version
3
+ compression compressor inspector container enumerable invalid_operation
4
+ plain_text_reader node missing_node compressed_node raw_node version
6
5
  }.each do |file|
7
6
  require File.join('rambling', 'trie', file)
8
7
  end
@@ -14,14 +13,27 @@ module Rambling
14
13
  class << self
15
14
  # Creates a new Trie. Entry point for the Rambling::Trie API.
16
15
  # @param [String, nil] filepath the file to load the words from.
17
- # @return [Root] the trie just created.
18
- # @yield [Root] the trie just created.
19
- def create filepath = nil, reader = PlainTextReader.new
20
- Root.new do |root|
21
- reader.each_word(filepath) { |word| root << word } if filepath
22
- yield root if block_given?
16
+ # @return [Container] the trie just created.
17
+ # @yield [Container] the trie just created.
18
+ def create filepath = nil, reader = nil
19
+ reader ||= default_reader
20
+
21
+ Rambling::Trie::Container.new do |container|
22
+ if filepath
23
+ reader.each_word filepath do |word|
24
+ container << word
25
+ end
26
+ end
27
+
28
+ yield container if block_given?
23
29
  end
24
30
  end
31
+
32
+ private
33
+
34
+ def default_reader
35
+ Rambling::Trie::PlainTextReader.new
36
+ end
25
37
  end
26
38
  end
27
39
  end
@@ -0,0 +1,112 @@
1
+ module Rambling
2
+ module Trie
3
+ # A representation of a node in an compressed Trie data structure.
4
+ class CompressedNode < Rambling::Trie::Node
5
+ # Always raises [Rambling::Trie::InvalidOperation] when trying to add a
6
+ # branch to the current trie node based on the word
7
+ # @param [String] word the word to add the branch from.
8
+ # @raise [InvalidOperation] if the trie is already compressed.
9
+ def add word
10
+ raise Rambling::Trie::InvalidOperation, 'Cannot add branch to compressed trie'
11
+ end
12
+
13
+ # Checks if a path for set of characters exists in the trie.
14
+ # @param [Array] chars the characters to look for in the trie.
15
+ # @return [Boolean] `true` if the characters are found, `false` otherwise.
16
+ def partial_word? chars
17
+ chars.empty? || has_partial_word?(chars)
18
+ end
19
+
20
+ # Checks if a path for set of characters represents a word in the trie.
21
+ # @param [Array] chars the characters to look for in the trie.
22
+ # @return [Boolean] `true` if the characters are found and form a word,
23
+ # `false` otherwise.
24
+ def word? chars
25
+ if chars.empty?
26
+ terminal?
27
+ else
28
+ has_word? chars
29
+ end
30
+ end
31
+
32
+ # Returns all words that start with the specified characters.
33
+ # @param [Array] chars the characters to look for in the trie.
34
+ # @return [Array] all the words contained in the trie that start with the specified characters.
35
+ def scan chars
36
+ closest_node(chars).to_a
37
+ end
38
+
39
+ # Always return `true` for a raw (compressed) node.
40
+ # @return [Boolean] always true for a raw (compressed) node.
41
+ def compressed?
42
+ true
43
+ end
44
+
45
+ protected
46
+
47
+ def closest_node chars
48
+ if chars.empty?
49
+ self
50
+ else
51
+ current_length = 0
52
+ current_key, current_key_string = current_key chars.slice!(0)
53
+
54
+ begin
55
+ current_length += 1
56
+
57
+ if current_key_string.length == current_length || chars.empty?
58
+ return children_tree[current_key].closest_node chars
59
+ end
60
+ end while current_key_string[current_length] == chars.slice!(0)
61
+
62
+ Rambling::Trie::MissingNode.new
63
+ end
64
+ end
65
+
66
+ private
67
+
68
+ def has_partial_word? chars
69
+ current_length = 0
70
+ current_key, current_key_string = current_key chars.slice!(0)
71
+
72
+ begin
73
+ current_length += 1
74
+
75
+ if current_key_string.length == current_length || chars.empty?
76
+ return children_tree[current_key].partial_word? chars
77
+ end
78
+ end while current_key_string[current_length] == chars.slice!(0)
79
+
80
+ false
81
+ end
82
+
83
+ def has_word? chars
84
+ current_key_string = ''
85
+
86
+ while !chars.empty?
87
+ current_key_string << chars.slice!(0)
88
+ current_key = current_key_string.to_sym
89
+ child = children_tree[current_key]
90
+ return child.word? chars if child
91
+ end
92
+
93
+ false
94
+ end
95
+
96
+ def current_key letter
97
+ current_key_string = current_key = ''
98
+
99
+ children_tree.keys.each do |key|
100
+ key_string = key.to_s
101
+ if key_string.start_with? letter
102
+ current_key = key
103
+ current_key_string = key_string
104
+ break
105
+ end
106
+ end
107
+
108
+ [current_key, current_key_string]
109
+ end
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,13 @@
1
+ module Rambling
2
+ module Trie
3
+ # Provides the compressing behavior for the Trie data structure.
4
+ module Compression
5
+ # Indicates if the current [Rambling::Trie::Node] can be compressed.
6
+ # @return [Boolean] `true` for non-terminal nodes with one child,
7
+ # `false` otherwise.
8
+ def compressable?
9
+ !(root? || terminal?) && children_tree.size == 1
10
+ end
11
+ end
12
+ end
13
+ end
@@ -1,47 +1,46 @@
1
1
  module Rambling
2
2
  module Trie
3
- # Provides the compressing behavior for the Trie data structure.
4
- module Compressor
5
- # Flag for compressed tries.
6
- # @return [Boolean] `true` for compressed tries, `false` otherwise.
7
- def compressed?
8
- parent && parent.compressed?
3
+ # Responsible for the compression process of a Trie data structure.
4
+ class Compressor
5
+ # Compresses a node from a Trie data structure.
6
+ # @param [RawNode] node the node to compress
7
+ # @return [CompressedNode] node the compressed version of the node
8
+ def compress node, parent = nil
9
+ if node.compressable?
10
+ merge_node_with_compressed_child node, parent
11
+ else
12
+ copy_node_and_compress_children node, parent
13
+ end
9
14
  end
10
15
 
11
- # Compress the current node using redundant node elimination.
12
- # @return [Root, Node] the compressed node.
13
- def compress_tree!
14
- if compressable?
15
- merge_with! children.first
16
- compress_tree!
17
- end
16
+ private
18
17
 
19
- children.each &:compress_tree!
18
+ def merge_node_with_compressed_child node, parent
19
+ compressed_child = compress node.children.first
20
20
 
21
- self
22
- end
21
+ new_node = Rambling::Trie::CompressedNode.new parent
22
+ new_node.letter = node.letter.to_s << compressed_child.letter.to_s
23
+ new_node.terminal! if compressed_child.terminal?
24
+ new_node.children_tree = compressed_child.children_tree
23
25
 
24
- private
26
+ new_node.children.each do |child|
27
+ child.parent = new_node
28
+ end
25
29
 
26
- def compressable?
27
- !(root? || terminal?) && children_tree.size == 1
30
+ new_node
28
31
  end
29
32
 
30
- def merge_with! child
31
- delete_old_key_on_parent!
32
- redefine_self! child
33
+ def copy_node_and_compress_children node, parent
34
+ new_node = Rambling::Trie::CompressedNode.new parent
33
35
 
34
- children.each { |node| node.parent = self }
35
- end
36
+ new_node.letter = node.letter
37
+ new_node.terminal! if node.terminal?
36
38
 
37
- def delete_old_key_on_parent!
38
- parent.delete letter if parent
39
- end
39
+ node.children.map do |child|
40
+ compress child, new_node
41
+ end
40
42
 
41
- def redefine_self! merged_node
42
- self.letter = letter.to_s << merged_node.letter.to_s
43
- self.children_tree = merged_node.children_tree
44
- self.terminal = merged_node.terminal?
43
+ new_node
45
44
  end
46
45
  end
47
46
  end
@@ -1,12 +1,27 @@
1
1
  module Rambling
2
2
  module Trie
3
- # A representation of the root node in the Trie data structure.
4
- class Root < Node
3
+ # Wrapper on top of Trie data structure.
4
+ class Container
5
+ extend ::Forwardable
6
+
7
+ include ::Enumerable
8
+
9
+ delegate [
10
+ :each,
11
+ :compressed?,
12
+ :[],
13
+ :letter,
14
+ :inspect
15
+ ] => :root
16
+
5
17
  # Creates a new Trie.
6
- # @yield [Root] the trie just created.
7
- def initialize
8
- super
9
- self.compressed = false
18
+ # @param [Node] root the root node for the trie
19
+ # @param [Compressor] compressor responsible for compressing the trie
20
+ # @yield [Container] the trie just created.
21
+ def initialize root = nil, compressor = nil
22
+ @root = root || default_root
23
+ @compressor = compressor || default_compressor
24
+
10
25
  yield self if block_given?
11
26
  end
12
27
 
@@ -14,71 +29,59 @@ module Rambling
14
29
  # @param [String] word the word to add the branch from.
15
30
  # @return [Node] the just added branch's root node.
16
31
  # @raise [InvalidOperation] if the trie is already compressed.
17
- # @see Branches#add
32
+ # @see RawNode#add
33
+ # @see CompressedNode#add
18
34
  # @note Avoids clearing the contents of the word variable.
19
35
  def add word
20
- super word.clone
36
+ root.add word.clone
21
37
  end
22
38
 
23
- # Compresses the existing tree using redundant node elimination. Flags the trie as compressed.
24
- # @return [Root] self
39
+ # Compresses the existing tree using redundant node elimination. Flags
40
+ # the trie as compressed.
41
+ # @return [Container] self
42
+ # @note Avoids compressing again if the trie has already been compressed.
25
43
  def compress!
26
- self.compressed = compressed? || !!compress_tree!
44
+ self.root = compressor.compress root unless root.compressed?
27
45
  self
28
46
  end
29
47
 
30
- # Flag for compressed tries. Overrides {Compressor#compressed?}.
31
- # @return [Boolean] `true` for compressed tries, `false` otherwise.
32
- def compressed?
33
- !!compressed
34
- end
35
-
36
48
  # Checks if a path for a word or partial word exists in the trie.
37
49
  # @param [String] word the word or partial word to look for in the trie.
38
50
  # @return [Boolean] `true` if the word or partial word is found, `false` otherwise.
39
51
  def partial_word? word = ''
40
- is? :partial_word, word
41
- end
42
-
43
- alias_method :match?, :partial_word?
44
-
45
- # If the current node is the root node.
46
- # @return [Boolean] `true`
47
- def root?
48
- true
52
+ root.partial_word? word.chars.to_a
49
53
  end
50
54
 
51
55
  # Checks if a whole word exists in the trie.
52
56
  # @param [String] word the word to look for in the trie.
53
57
  # @return [Boolean] `true` only if the word is found and the last character corresponds to a terminal node.
54
58
  def word? word = ''
55
- is? :word, word
59
+ root.word? word.chars.to_a
56
60
  end
57
61
 
58
- alias_method :include?, :word?
59
-
60
62
  # Returns all words that start with the specified characters.
61
63
  # @param [String] word the word to look for in the trie.
62
64
  # @return [Array] all the words contained in the trie that start with the specified characters.
63
65
  def scan word = ''
64
- closest_node(word).to_a
66
+ root.scan(word.chars).to_a
65
67
  end
66
68
 
69
+ alias_method :include?, :word?
70
+ alias_method :match?, :partial_word?
67
71
  alias_method :words, :scan
72
+ alias_method :<<, :add
68
73
 
69
74
  private
70
75
 
71
- attr_accessor :compressed
76
+ attr_reader :compressor
77
+ attr_accessor :root
72
78
 
73
- def is? method, word
74
- method = compressed? ? "#{method}_when_compressed?" : "#{method}_when_uncompressed?"
75
- send method, word.chars.to_a
79
+ def default_root
80
+ Rambling::Trie::RawNode.new
76
81
  end
77
82
 
78
- def closest_node word
79
- method = compressed? ? :closest_node_when_compressed : :closest_node_when_uncompressed
80
-
81
- send method, word.chars.to_a
83
+ def default_compressor
84
+ Rambling::Trie::Compressor.new
82
85
  end
83
86
  end
84
87
  end
@@ -6,14 +6,18 @@ module Rambling
6
6
 
7
7
  alias_method :size, :count
8
8
 
9
- # Calls block once for each of the words contained in the trie. If no block given, an Enumerator is returned.
10
- def each &block
11
- enumerator = Enumerator.new do |words|
12
- words << as_word if terminal?
13
- children.each { |child| child.each { |word| words << word } }
14
- end
9
+ # Iterates over the words contained in the trie.
10
+ # @yield [String] the words contained in this trie node.
11
+ def each
12
+ return enum_for :each unless block_given?
13
+
14
+ yield as_word if terminal?
15
15
 
16
- block.nil? ? enumerator : enumerator.each(&block)
16
+ children.each do |child|
17
+ child.each do |word|
18
+ yield word
19
+ end
20
+ end
17
21
  end
18
22
  end
19
23
  end
@@ -1,7 +1,7 @@
1
1
  module Rambling
2
2
  module Trie
3
3
  # A representation of a missing node in the Trie data structure.
4
- class MissingNode < Node
4
+ class MissingNode < Rambling::Trie::Node
5
5
  end
6
6
  end
7
7
  end
@@ -2,14 +2,18 @@ module Rambling
2
2
  module Trie
3
3
  # A representation of a node in the Trie data structure.
4
4
  class Node
5
- extend Forwardable
5
+ extend ::Forwardable
6
6
 
7
- delegate [:[], :[]=, :delete, :has_key?] => :children_tree
7
+ delegate [
8
+ :[],
9
+ :[]=,
10
+ :delete,
11
+ :has_key?
12
+ ] => :children_tree
8
13
 
9
- include Compressor
10
- include Branches
11
- include Enumerable
12
- include Inspector
14
+ include Rambling::Trie::Compression
15
+ include Rambling::Trie::Enumerable
16
+ include Rambling::Trie::Inspector
13
17
 
14
18
  # Letter or letters corresponding to this node.
15
19
  # @return [Symbol, nil] the corresponding letter(s) or nil.
@@ -17,24 +21,17 @@ module Rambling
17
21
 
18
22
  # Children nodes.
19
23
  # @return [Hash] the children_tree hash, consisting of :letter => node.
20
- attr_reader :children_tree
24
+ attr_accessor :children_tree
21
25
 
22
26
  # Parent node.
23
27
  # @return [Node, nil] the parent node or nil for the root element.
24
28
  attr_accessor :parent
25
29
 
26
30
  # Creates a new Node.
27
- # @param [String, nil] word the word from which to create this Node and his branch.
28
31
  # @param [Node, nil] parent the parent of this node.
29
- def initialize word = nil, parent = nil
32
+ def initialize parent = nil
30
33
  self.parent = parent
31
34
  self.children_tree = {}
32
-
33
- unless word.nil? || word.empty?
34
- self.letter = word.slice! 0
35
- self.terminal = word.empty?
36
- self << word
37
- end
38
35
  end
39
36
 
40
37
  # String representation of the current node, if it is a terminal node.
@@ -52,9 +49,9 @@ module Rambling
52
49
  end
53
50
 
54
51
  # If the current node is the root node.
55
- # @return [Boolean] `false`
52
+ # @return [Boolean] `true` only if the node does not have a parent
56
53
  def root?
57
- false
54
+ !parent
58
55
  end
59
56
 
60
57
  # Flag for terminal nodes.
@@ -63,23 +60,29 @@ module Rambling
63
60
  !!terminal
64
61
  end
65
62
 
63
+ # Force [Node] to be `terminal`
64
+ # @return [Node] the modified node.
65
+ def terminal!
66
+ self.terminal = true
67
+ self
68
+ end
69
+
66
70
  # String representation of the current node.
67
71
  # @return [String] the string representation of the current node.
68
72
  def to_s
69
73
  parent.to_s << letter.to_s
70
74
  end
71
75
 
72
- protected
73
-
74
- attr_writer :children_tree
75
- attr_accessor :terminal
76
-
77
76
  def letter= new_letter
78
77
  if new_letter
79
78
  @letter = new_letter.to_sym
80
79
  parent[letter] = self if parent
81
80
  end
82
81
  end
82
+
83
+ private
84
+
85
+ attr_accessor :terminal
83
86
  end
84
87
  end
85
88
  end