rambling-trie 0.3.2 → 0.3.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.markdown +9 -5
- data/lib/children_hash_deferer.rb +14 -0
- data/lib/invalid_trie_operation.rb +1 -0
- data/lib/rambling-trie.rb +2 -1
- data/lib/rambling-trie/tasks/gem.rb +17 -0
- data/lib/rambling-trie/tasks/performance.rb +79 -0
- data/lib/rambling-trie/version.rb +8 -0
- data/lib/rambling.rb +3 -0
- data/lib/trie.rb +17 -2
- data/lib/trie_branches.rb +53 -28
- data/lib/trie_compressor.rb +4 -5
- data/lib/trie_node.rb +24 -5
- metadata +44 -6
data/README.markdown
CHANGED
@@ -37,25 +37,25 @@ trie = Rambling::Trie.new
|
|
37
37
|
You can also provide a file which contains all the words to be added to the trie, and it will read the file and create the structure for you, like this:
|
38
38
|
|
39
39
|
``` ruby
|
40
|
-
trie = Rambling::Trie.new
|
40
|
+
trie = Rambling::Trie.new '/path/to/file'
|
41
41
|
```
|
42
42
|
|
43
43
|
To add new words to the trie, use `add_branch_from`:
|
44
44
|
|
45
45
|
``` ruby
|
46
|
-
trie.add_branch_from
|
46
|
+
trie.add_branch_from 'word'
|
47
47
|
```
|
48
48
|
|
49
49
|
And to find out if a word already exists in the trie, use `is_word?`:
|
50
50
|
|
51
51
|
``` ruby
|
52
|
-
trie.is_word?
|
52
|
+
trie.is_word? 'word'
|
53
53
|
```
|
54
54
|
|
55
|
-
If you wish to find if part of a word exists in the `Rambling::Trie` instance, you should call `
|
55
|
+
If you wish to find if part of a word exists in the `Rambling::Trie` instance, you should call `has_branch_for?`:
|
56
56
|
|
57
57
|
``` ruby
|
58
|
-
trie.
|
58
|
+
trie.has_branch_for? 'partial_word'
|
59
59
|
```
|
60
60
|
|
61
61
|
### Compression
|
@@ -80,6 +80,10 @@ You can find out if a `Rambling::Trie` instance is compressed by calling the `co
|
|
80
80
|
trie.compressed?
|
81
81
|
```
|
82
82
|
|
83
|
+
## Further Documentation
|
84
|
+
|
85
|
+
You can find further API documentation on the autogenerated [RubyDoc.info](http://rubydoc.info/gems/rambling-trie/0.3.3/Rambling)
|
86
|
+
|
83
87
|
## Compatible Ruby and Rails versions
|
84
88
|
|
85
89
|
The Rambling Trie has been tested with the following Ruby versions:
|
@@ -1,17 +1,31 @@
|
|
1
1
|
module Rambling
|
2
|
+
# Provides some proxy methods to the children's hash for readability.
|
2
3
|
module ChildrenHashDeferer
|
4
|
+
# Proxies to @children[key]
|
5
|
+
# @param [Symbol] key the key to look for in the children's hash.
|
6
|
+
# @return [TrieNode, nil] the child node with that key or nil.
|
3
7
|
def [](key)
|
4
8
|
@children[key]
|
5
9
|
end
|
6
10
|
|
11
|
+
# Proxies to @children[key] = value.
|
12
|
+
# @param [Symbol] key the to add or change the value for.
|
13
|
+
# @param [TrieNode] value the node to add to the children's hash.
|
14
|
+
# @return [TrieNode, nil] the child node with that key or nil.
|
7
15
|
def []=(key, value)
|
8
16
|
@children[key] = value
|
9
17
|
end
|
10
18
|
|
19
|
+
# Proxies to @children.delete(key)
|
20
|
+
# @param [Symbol] key the key to delete in the children's hash.
|
21
|
+
# @return [TrieNode, nil] the child node corresponding to the key just deleted or nil.
|
11
22
|
def delete(key)
|
12
23
|
@children.delete(key)
|
13
24
|
end
|
14
25
|
|
26
|
+
# Proxies to @children.has_key?(key)
|
27
|
+
# @param [Symbol] key the key to look for in the children's hash.
|
28
|
+
# @return [Boolean] `true` for the keys that exist in the children's hash, false otherwise.
|
15
29
|
def has_key?(key)
|
16
30
|
@children.has_key?(key)
|
17
31
|
end
|
data/lib/rambling-trie.rb
CHANGED
@@ -1,7 +1,8 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'rambling.rb')
|
1
2
|
require File.join(File.dirname(__FILE__), 'invalid_trie_operation.rb')
|
2
3
|
require File.join(File.dirname(__FILE__), 'children_hash_deferer.rb')
|
3
4
|
require File.join(File.dirname(__FILE__), 'trie_compressor.rb')
|
4
5
|
require File.join(File.dirname(__FILE__), 'trie_branches.rb')
|
5
6
|
require File.join(File.dirname(__FILE__), 'trie_node.rb')
|
6
7
|
require File.join(File.dirname(__FILE__), 'trie.rb')
|
7
|
-
|
8
|
+
require File.join(File.dirname(__FILE__), 'rambling-trie', 'version.rb')
|
@@ -0,0 +1,17 @@
|
|
1
|
+
namespace :gem do
|
2
|
+
task :build do
|
3
|
+
desc 'Build the rambling-trie gem'
|
4
|
+
system 'gem build rambling-trie.gemspec'
|
5
|
+
end
|
6
|
+
|
7
|
+
task release: :build do
|
8
|
+
desc 'Push the latest version of the rambling-trie gem'
|
9
|
+
system "gem push rambling-trie-#{Rambling::Trie::VERSION}.gem"
|
10
|
+
end
|
11
|
+
|
12
|
+
task :version do
|
13
|
+
desc 'Output the current rambling-trie version'
|
14
|
+
puts "rambling-trie #{Rambling::Trie::VERSION}"
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'benchmark'
|
2
|
+
require 'ruby-prof'
|
3
|
+
|
4
|
+
namespace :performance do
|
5
|
+
def report(name, trie, output)
|
6
|
+
words = ['hi', 'help', 'beautiful', 'impressionism', 'anthropological']
|
7
|
+
methods = [:is_word?, :has_branch_for?]
|
8
|
+
|
9
|
+
output.puts "==> #{name}"
|
10
|
+
methods.each do |method|
|
11
|
+
output.puts "`#{method}`:"
|
12
|
+
words.each do |word|
|
13
|
+
output.print "#{word} - #{trie.send(method, word)}".ljust(30)
|
14
|
+
output.puts Benchmark.measure { 200_000.times {trie.send(method, word) }}
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def generate_report(filename = nil)
|
20
|
+
output = filename.nil? ? $stdout : File.open(filename, 'a+')
|
21
|
+
|
22
|
+
trie = Rambling::Trie.new(get_path('assets', 'dictionaries', 'words_with_friends.txt'))
|
23
|
+
|
24
|
+
output.puts "\nReport for rambling-trie version #{Rambling::Trie::VERSION}"
|
25
|
+
report('Uncompressed', trie, output)
|
26
|
+
|
27
|
+
return unless trie.respond_to?(:compress!)
|
28
|
+
|
29
|
+
trie.compress!
|
30
|
+
report('Compressed', trie, output)
|
31
|
+
|
32
|
+
output.close
|
33
|
+
end
|
34
|
+
|
35
|
+
def get_path(*filename)
|
36
|
+
File.join(File.dirname(__FILE__), '..', '..', '..', *filename)
|
37
|
+
end
|
38
|
+
|
39
|
+
task :report do
|
40
|
+
puts 'Generating performance report...'
|
41
|
+
generate_report
|
42
|
+
end
|
43
|
+
|
44
|
+
namespace :report do
|
45
|
+
task :save do
|
46
|
+
puts 'Generating performance report...'
|
47
|
+
generate_report(get_path('reports', 'performance'))
|
48
|
+
puts 'Report has been saved to reports/performance'
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
task :profile do
|
53
|
+
puts 'Generating profiling reports...'
|
54
|
+
|
55
|
+
rambling_trie = Rambling::Trie.new(get_path('assets', 'dictionaries', 'words_with_friends.txt'))
|
56
|
+
words = ['hi', 'help', 'beautiful', 'impressionism', 'anthropological']
|
57
|
+
methods = [:has_branch_for?]#, :is_word?]
|
58
|
+
tries = [lambda {rambling_trie.compress!}]#lambda {rambling_trie}, lambda {rambling_trie.compress!}]
|
59
|
+
|
60
|
+
methods.each do |method|
|
61
|
+
tries.each do |trie_generator|
|
62
|
+
trie = trie_generator.call
|
63
|
+
result = RubyProf.profile do
|
64
|
+
words.each do |word|
|
65
|
+
200_000.times { trie.send(method, word) }
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
File.open get_path('reports', "profile-#{trie.compressed? ? 'compressed' : 'uncompressed'}-#{method.to_s.sub(/\?/, '')}-#{Time.now.to_i}"), 'w' do |file|
|
70
|
+
RubyProf::CallTreePrinter.new(result).print(file)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
puts 'Done'
|
76
|
+
end
|
77
|
+
|
78
|
+
task all: [:profile, :report]
|
79
|
+
end
|
data/lib/rambling.rb
ADDED
data/lib/trie.rb
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
module Rambling
|
2
|
+
# A representation of the root node in the Trie data structure.
|
2
3
|
class Trie < TrieNode
|
4
|
+
# Creates a new Trie.
|
5
|
+
# @param [String, nil] filename the file to load the words from (defaults to nil).
|
3
6
|
def initialize(filename = nil)
|
4
7
|
super(nil)
|
5
8
|
|
@@ -8,6 +11,8 @@ module Rambling
|
|
8
11
|
add_all_nodes if filename
|
9
12
|
end
|
10
13
|
|
14
|
+
# Compresses the existing tree using redundant node elimination. Flags the trie as compressed.
|
15
|
+
# @return [Trie] same object
|
11
16
|
def compress!
|
12
17
|
unless compressed?
|
13
18
|
compress_own_tree!
|
@@ -17,16 +22,26 @@ module Rambling
|
|
17
22
|
self
|
18
23
|
end
|
19
24
|
|
25
|
+
# Flag for compressed tries. Overrides {TrieCompressor#compressed?}.
|
26
|
+
# @return [Boolean] `true` for compressed tries, `false` otherwise.
|
20
27
|
def compressed?
|
21
28
|
@is_compressed = @is_compressed.nil? ? false : @is_compressed
|
22
29
|
end
|
23
30
|
|
31
|
+
# Checks if a path for a word or partial word exists in the trie.
|
32
|
+
# @param [String] word the word or partial word to look for in the trie.
|
33
|
+
# @return [Boolean] `true` if the word or partial word is found, `false` otherwise.
|
24
34
|
def has_branch_for?(word = '')
|
25
|
-
|
35
|
+
chars = word.chars.to_a
|
36
|
+
compressed? ? has_compressed_branch_for?(chars) : has_uncompressed_branch_for?(chars)
|
26
37
|
end
|
27
38
|
|
39
|
+
# Checks if a whole word exists in the trie.
|
40
|
+
# @param [String] word the word to look for in the trie.
|
41
|
+
# @return [Boolean] `true` only if the word is found and the last character corresponds to a terminal node.
|
28
42
|
def is_word?(word = '')
|
29
|
-
|
43
|
+
chars = word.chars.to_a
|
44
|
+
compressed? ? is_compressed_word?(chars) : is_uncompressed_word?(chars)
|
30
45
|
end
|
31
46
|
|
32
47
|
private
|
data/lib/trie_branches.rb
CHANGED
@@ -1,5 +1,10 @@
|
|
1
1
|
module Rambling
|
2
|
+
# Provides the branching behavior for the Trie data structure.
|
2
3
|
module TrieBranches
|
4
|
+
# Adds a branch to the trie based on the word.
|
5
|
+
# @param [String] word the word to add the branch from.
|
6
|
+
# @return [TrieNode] the just added branch's root node.
|
7
|
+
# @raise [InvalidTrieOperation] if the trie is already compressed.
|
3
8
|
def add_branch_from(word)
|
4
9
|
raise InvalidTrieOperation.new('Cannot add branch to compressed trie') if compressed?
|
5
10
|
if word.empty?
|
@@ -11,7 +16,9 @@ module Rambling
|
|
11
16
|
|
12
17
|
if @children.has_key?(first_letter)
|
13
18
|
word.slice!(0)
|
14
|
-
@children[first_letter]
|
19
|
+
child = @children[first_letter]
|
20
|
+
child.add_branch_from(word)
|
21
|
+
child
|
15
22
|
else
|
16
23
|
@children[first_letter] = TrieNode.new(word, self)
|
17
24
|
end
|
@@ -19,47 +26,65 @@ module Rambling
|
|
19
26
|
|
20
27
|
protected
|
21
28
|
|
22
|
-
def has_uncompressed_branch_for?(
|
23
|
-
|
29
|
+
def has_uncompressed_branch_for?(chars)
|
30
|
+
chars.empty? or fulfills_uncompressed_condition?(:has_uncompressed_branch_for?, chars)
|
24
31
|
end
|
25
32
|
|
26
|
-
def
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
33
|
+
def has_compressed_branch_for?(chars)
|
34
|
+
return true if chars.empty?
|
35
|
+
|
36
|
+
length = chars.length
|
37
|
+
first_letter = chars.slice!(0)
|
38
|
+
key = nil
|
39
|
+
@children.keys.each do |x|
|
40
|
+
x = x.to_s
|
41
|
+
if x.start_with?(first_letter)
|
42
|
+
key = x
|
43
|
+
break
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
unless key.nil?
|
48
|
+
sym_key = key.to_sym
|
49
|
+
return @children[sym_key].has_compressed_branch_for?(chars) if key.length == first_letter.length
|
50
|
+
|
51
|
+
while not chars.empty?
|
52
|
+
first_letter += chars.slice!(0)
|
53
|
+
|
54
|
+
break unless key.start_with?(first_letter)
|
55
|
+
return true if chars.empty?
|
56
|
+
return @children[sym_key].has_compressed_branch_for?(chars) if key.length == first_letter.length
|
57
|
+
end
|
32
58
|
end
|
33
59
|
|
34
60
|
false
|
35
61
|
end
|
36
62
|
|
37
|
-
def
|
38
|
-
|
39
|
-
|
40
|
-
keys = @children.keys.map { |x| x.to_s }
|
41
|
-
return true if keys.include?(word)
|
63
|
+
def is_uncompressed_word?(chars)
|
64
|
+
(chars.empty? and terminal?) or fulfills_uncompressed_condition?(:is_uncompressed_word?, chars)
|
65
|
+
end
|
42
66
|
|
43
|
-
|
44
|
-
return true
|
67
|
+
def is_compressed_word?(chars)
|
68
|
+
return true if chars.empty? and terminal?
|
45
69
|
|
46
|
-
|
47
|
-
|
70
|
+
length = chars.length
|
71
|
+
first_letter = ''
|
72
|
+
while not chars.empty?
|
73
|
+
first_letter += chars.slice!(0)
|
74
|
+
key = first_letter.to_sym
|
75
|
+
return @children[key].is_compressed_word?(chars) if @children.has_key?(key)
|
76
|
+
end
|
48
77
|
|
49
78
|
false
|
50
79
|
end
|
51
80
|
|
52
|
-
|
53
|
-
(word.empty? and terminal?) or fulfills_uncompressed_condition?(:is_uncompressed_word?, word)
|
54
|
-
end
|
81
|
+
private
|
55
82
|
|
56
|
-
def
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
key = word.slice(0..index).to_sym
|
62
|
-
return @children[key].is_compressed_word?(word.slice((index + 1)...length)) if @children.has_key?(key)
|
83
|
+
def fulfills_uncompressed_condition?(method, chars)
|
84
|
+
first_letter = chars.slice!(0)
|
85
|
+
unless first_letter.nil?
|
86
|
+
first_letter_sym = first_letter.to_sym
|
87
|
+
return @children[first_letter_sym].send(method, chars) if @children.has_key?(first_letter_sym)
|
63
88
|
end
|
64
89
|
|
65
90
|
false
|
data/lib/trie_compressor.rb
CHANGED
@@ -1,11 +1,10 @@
|
|
1
1
|
module Rambling
|
2
|
+
# Provides the compressing behavior for the Trie data structure.
|
2
3
|
module TrieCompressor
|
4
|
+
# Flag for compressed tries.
|
5
|
+
# @return [Boolean] `true` for compressed tries, `false` otherwise.
|
3
6
|
def compressed?
|
4
|
-
|
5
|
-
@is_compressed
|
6
|
-
else
|
7
|
-
@parent.nil? ? false : @parent.compressed?
|
8
|
-
end
|
7
|
+
@parent.nil? ? false : @parent.compressed?
|
9
8
|
end
|
10
9
|
|
11
10
|
protected
|
data/lib/trie_node.rb
CHANGED
@@ -1,11 +1,25 @@
|
|
1
1
|
module Rambling
|
2
|
+
# A representation of a node in the Trie data structure.
|
2
3
|
class TrieNode
|
3
4
|
include ChildrenHashDeferer
|
4
5
|
include TrieCompressor
|
5
6
|
include TrieBranches
|
6
7
|
|
7
|
-
|
8
|
+
# Letter or letters corresponding to this node.
|
9
|
+
# @return [Symbol, nil] the corresponding letter(s) or nil.
|
10
|
+
attr_reader :letter
|
8
11
|
|
12
|
+
# Children nodes.
|
13
|
+
# @return [Hash] the children hash, consisting of :letter => node.
|
14
|
+
attr_reader :children
|
15
|
+
|
16
|
+
# Parent node.
|
17
|
+
# @return [TrieNode, nil] the parent node or nil for the root element.
|
18
|
+
attr_reader :parent
|
19
|
+
|
20
|
+
# Creates a new TrieNode.
|
21
|
+
# @param [String] word the word from which to create this TrieNode and his branch.
|
22
|
+
# @param [TrieNode] parent the parent of this node.
|
9
23
|
def initialize(word, parent = nil)
|
10
24
|
@letter = nil
|
11
25
|
@parent = parent
|
@@ -20,14 +34,15 @@ module Rambling
|
|
20
34
|
end
|
21
35
|
end
|
22
36
|
|
23
|
-
|
24
|
-
|
25
|
-
end
|
26
|
-
|
37
|
+
# Flag for terminal nodes.
|
38
|
+
# @return [Boolean] `true` for terminal nodes, `false` otherwise.
|
27
39
|
def terminal?
|
28
40
|
@is_terminal
|
29
41
|
end
|
30
42
|
|
43
|
+
# String representation of the current node, if it is a terminal node.
|
44
|
+
# @return [String] the string representation of the current node.
|
45
|
+
# @raise [InvalidTrieOperation] if node is not terminal or is root.
|
31
46
|
def as_word
|
32
47
|
raise InvalidTrieOperation.new() unless @letter.nil? or terminal?
|
33
48
|
get_letter_string
|
@@ -41,5 +56,9 @@ module Rambling
|
|
41
56
|
def parent=(parent)
|
42
57
|
@parent = parent
|
43
58
|
end
|
59
|
+
|
60
|
+
def terminal=(terminal)
|
61
|
+
@is_terminal = terminal
|
62
|
+
end
|
44
63
|
end
|
45
64
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rambling-trie
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-02-
|
12
|
+
date: 2012-02-13 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
16
|
-
requirement: &
|
16
|
+
requirement: &17006080 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 2.0.0
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *17006080
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rake
|
27
|
-
requirement: &
|
27
|
+
requirement: &17018000 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,7 +32,40 @@ dependencies:
|
|
32
32
|
version: 0.9.2
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *17018000
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: ruby-prof
|
38
|
+
requirement: &17031020 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: 0.10.8
|
44
|
+
type: :development
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *17031020
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: yard
|
49
|
+
requirement: &16618180 !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.7.5
|
55
|
+
type: :development
|
56
|
+
prerelease: false
|
57
|
+
version_requirements: *16618180
|
58
|
+
- !ruby/object:Gem::Dependency
|
59
|
+
name: redcarpet
|
60
|
+
requirement: &16616900 !ruby/object:Gem::Requirement
|
61
|
+
none: false
|
62
|
+
requirements:
|
63
|
+
- - ! '>='
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: 2.1.0
|
66
|
+
type: :development
|
67
|
+
prerelease: false
|
68
|
+
version_requirements: *16616900
|
36
69
|
description: The Rambling Trie is a custom implementation of the Trie data structure
|
37
70
|
with Ruby, which includes compression abilities and is designed to be very fast
|
38
71
|
to traverse.
|
@@ -43,7 +76,11 @@ extra_rdoc_files: []
|
|
43
76
|
files:
|
44
77
|
- ./lib/children_hash_deferer.rb
|
45
78
|
- ./lib/invalid_trie_operation.rb
|
79
|
+
- ./lib/rambling-trie/tasks/gem.rb
|
80
|
+
- ./lib/rambling-trie/tasks/performance.rb
|
81
|
+
- ./lib/rambling-trie/version.rb
|
46
82
|
- ./lib/rambling-trie.rb
|
83
|
+
- ./lib/rambling.rb
|
47
84
|
- ./lib/trie.rb
|
48
85
|
- ./lib/trie_branches.rb
|
49
86
|
- ./lib/trie_compressor.rb
|
@@ -75,3 +112,4 @@ signing_key:
|
|
75
112
|
specification_version: 3
|
76
113
|
summary: A custom implementation of the trie data structure.
|
77
114
|
test_files: []
|
115
|
+
has_rdoc:
|