rambling-trie 0.3.2 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.markdown +9 -5
- data/lib/children_hash_deferer.rb +14 -0
- data/lib/invalid_trie_operation.rb +1 -0
- data/lib/rambling-trie.rb +2 -1
- data/lib/rambling-trie/tasks/gem.rb +17 -0
- data/lib/rambling-trie/tasks/performance.rb +79 -0
- data/lib/rambling-trie/version.rb +8 -0
- data/lib/rambling.rb +3 -0
- data/lib/trie.rb +17 -2
- data/lib/trie_branches.rb +53 -28
- data/lib/trie_compressor.rb +4 -5
- data/lib/trie_node.rb +24 -5
- metadata +44 -6
data/README.markdown
CHANGED
@@ -37,25 +37,25 @@ trie = Rambling::Trie.new
|
|
37
37
|
You can also provide a file which contains all the words to be added to the trie, and it will read the file and create the structure for you, like this:
|
38
38
|
|
39
39
|
``` ruby
|
40
|
-
trie = Rambling::Trie.new
|
40
|
+
trie = Rambling::Trie.new '/path/to/file'
|
41
41
|
```
|
42
42
|
|
43
43
|
To add new words to the trie, use `add_branch_from`:
|
44
44
|
|
45
45
|
``` ruby
|
46
|
-
trie.add_branch_from
|
46
|
+
trie.add_branch_from 'word'
|
47
47
|
```
|
48
48
|
|
49
49
|
And to find out if a word already exists in the trie, use `is_word?`:
|
50
50
|
|
51
51
|
``` ruby
|
52
|
-
trie.is_word?
|
52
|
+
trie.is_word? 'word'
|
53
53
|
```
|
54
54
|
|
55
|
-
If you wish to find if part of a word exists in the `Rambling::Trie` instance, you should call `
|
55
|
+
If you wish to find if part of a word exists in the `Rambling::Trie` instance, you should call `has_branch_for?`:
|
56
56
|
|
57
57
|
``` ruby
|
58
|
-
trie.
|
58
|
+
trie.has_branch_for? 'partial_word'
|
59
59
|
```
|
60
60
|
|
61
61
|
### Compression
|
@@ -80,6 +80,10 @@ You can find out if a `Rambling::Trie` instance is compressed by calling the `co
|
|
80
80
|
trie.compressed?
|
81
81
|
```
|
82
82
|
|
83
|
+
## Further Documentation
|
84
|
+
|
85
|
+
You can find further API documentation on the autogenerated [RubyDoc.info](http://rubydoc.info/gems/rambling-trie/0.3.3/Rambling)
|
86
|
+
|
83
87
|
## Compatible Ruby and Rails versions
|
84
88
|
|
85
89
|
The Rambling Trie has been tested with the following Ruby versions:
|
@@ -1,17 +1,31 @@
|
|
1
1
|
module Rambling
|
2
|
+
# Provides some proxy methods to the children's hash for readability.
|
2
3
|
module ChildrenHashDeferer
|
4
|
+
# Proxies to @children[key]
|
5
|
+
# @param [Symbol] key the key to look for in the children's hash.
|
6
|
+
# @return [TrieNode, nil] the child node with that key or nil.
|
3
7
|
def [](key)
|
4
8
|
@children[key]
|
5
9
|
end
|
6
10
|
|
11
|
+
# Proxies to @children[key] = value.
|
12
|
+
# @param [Symbol] key the to add or change the value for.
|
13
|
+
# @param [TrieNode] value the node to add to the children's hash.
|
14
|
+
# @return [TrieNode, nil] the child node with that key or nil.
|
7
15
|
def []=(key, value)
|
8
16
|
@children[key] = value
|
9
17
|
end
|
10
18
|
|
19
|
+
# Proxies to @children.delete(key)
|
20
|
+
# @param [Symbol] key the key to delete in the children's hash.
|
21
|
+
# @return [TrieNode, nil] the child node corresponding to the key just deleted or nil.
|
11
22
|
def delete(key)
|
12
23
|
@children.delete(key)
|
13
24
|
end
|
14
25
|
|
26
|
+
# Proxies to @children.has_key?(key)
|
27
|
+
# @param [Symbol] key the key to look for in the children's hash.
|
28
|
+
# @return [Boolean] `true` for the keys that exist in the children's hash, false otherwise.
|
15
29
|
def has_key?(key)
|
16
30
|
@children.has_key?(key)
|
17
31
|
end
|
data/lib/rambling-trie.rb
CHANGED
@@ -1,7 +1,8 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'rambling.rb')
|
1
2
|
require File.join(File.dirname(__FILE__), 'invalid_trie_operation.rb')
|
2
3
|
require File.join(File.dirname(__FILE__), 'children_hash_deferer.rb')
|
3
4
|
require File.join(File.dirname(__FILE__), 'trie_compressor.rb')
|
4
5
|
require File.join(File.dirname(__FILE__), 'trie_branches.rb')
|
5
6
|
require File.join(File.dirname(__FILE__), 'trie_node.rb')
|
6
7
|
require File.join(File.dirname(__FILE__), 'trie.rb')
|
7
|
-
|
8
|
+
require File.join(File.dirname(__FILE__), 'rambling-trie', 'version.rb')
|
@@ -0,0 +1,17 @@
|
|
1
|
+
namespace :gem do
|
2
|
+
task :build do
|
3
|
+
desc 'Build the rambling-trie gem'
|
4
|
+
system 'gem build rambling-trie.gemspec'
|
5
|
+
end
|
6
|
+
|
7
|
+
task release: :build do
|
8
|
+
desc 'Push the latest version of the rambling-trie gem'
|
9
|
+
system "gem push rambling-trie-#{Rambling::Trie::VERSION}.gem"
|
10
|
+
end
|
11
|
+
|
12
|
+
task :version do
|
13
|
+
desc 'Output the current rambling-trie version'
|
14
|
+
puts "rambling-trie #{Rambling::Trie::VERSION}"
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'benchmark'
|
2
|
+
require 'ruby-prof'
|
3
|
+
|
4
|
+
namespace :performance do
|
5
|
+
def report(name, trie, output)
|
6
|
+
words = ['hi', 'help', 'beautiful', 'impressionism', 'anthropological']
|
7
|
+
methods = [:is_word?, :has_branch_for?]
|
8
|
+
|
9
|
+
output.puts "==> #{name}"
|
10
|
+
methods.each do |method|
|
11
|
+
output.puts "`#{method}`:"
|
12
|
+
words.each do |word|
|
13
|
+
output.print "#{word} - #{trie.send(method, word)}".ljust(30)
|
14
|
+
output.puts Benchmark.measure { 200_000.times {trie.send(method, word) }}
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def generate_report(filename = nil)
|
20
|
+
output = filename.nil? ? $stdout : File.open(filename, 'a+')
|
21
|
+
|
22
|
+
trie = Rambling::Trie.new(get_path('assets', 'dictionaries', 'words_with_friends.txt'))
|
23
|
+
|
24
|
+
output.puts "\nReport for rambling-trie version #{Rambling::Trie::VERSION}"
|
25
|
+
report('Uncompressed', trie, output)
|
26
|
+
|
27
|
+
return unless trie.respond_to?(:compress!)
|
28
|
+
|
29
|
+
trie.compress!
|
30
|
+
report('Compressed', trie, output)
|
31
|
+
|
32
|
+
output.close
|
33
|
+
end
|
34
|
+
|
35
|
+
def get_path(*filename)
|
36
|
+
File.join(File.dirname(__FILE__), '..', '..', '..', *filename)
|
37
|
+
end
|
38
|
+
|
39
|
+
task :report do
|
40
|
+
puts 'Generating performance report...'
|
41
|
+
generate_report
|
42
|
+
end
|
43
|
+
|
44
|
+
namespace :report do
|
45
|
+
task :save do
|
46
|
+
puts 'Generating performance report...'
|
47
|
+
generate_report(get_path('reports', 'performance'))
|
48
|
+
puts 'Report has been saved to reports/performance'
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
task :profile do
|
53
|
+
puts 'Generating profiling reports...'
|
54
|
+
|
55
|
+
rambling_trie = Rambling::Trie.new(get_path('assets', 'dictionaries', 'words_with_friends.txt'))
|
56
|
+
words = ['hi', 'help', 'beautiful', 'impressionism', 'anthropological']
|
57
|
+
methods = [:has_branch_for?]#, :is_word?]
|
58
|
+
tries = [lambda {rambling_trie.compress!}]#lambda {rambling_trie}, lambda {rambling_trie.compress!}]
|
59
|
+
|
60
|
+
methods.each do |method|
|
61
|
+
tries.each do |trie_generator|
|
62
|
+
trie = trie_generator.call
|
63
|
+
result = RubyProf.profile do
|
64
|
+
words.each do |word|
|
65
|
+
200_000.times { trie.send(method, word) }
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
File.open get_path('reports', "profile-#{trie.compressed? ? 'compressed' : 'uncompressed'}-#{method.to_s.sub(/\?/, '')}-#{Time.now.to_i}"), 'w' do |file|
|
70
|
+
RubyProf::CallTreePrinter.new(result).print(file)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
puts 'Done'
|
76
|
+
end
|
77
|
+
|
78
|
+
task all: [:profile, :report]
|
79
|
+
end
|
data/lib/rambling.rb
ADDED
data/lib/trie.rb
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
module Rambling
|
2
|
+
# A representation of the root node in the Trie data structure.
|
2
3
|
class Trie < TrieNode
|
4
|
+
# Creates a new Trie.
|
5
|
+
# @param [String, nil] filename the file to load the words from (defaults to nil).
|
3
6
|
def initialize(filename = nil)
|
4
7
|
super(nil)
|
5
8
|
|
@@ -8,6 +11,8 @@ module Rambling
|
|
8
11
|
add_all_nodes if filename
|
9
12
|
end
|
10
13
|
|
14
|
+
# Compresses the existing tree using redundant node elimination. Flags the trie as compressed.
|
15
|
+
# @return [Trie] same object
|
11
16
|
def compress!
|
12
17
|
unless compressed?
|
13
18
|
compress_own_tree!
|
@@ -17,16 +22,26 @@ module Rambling
|
|
17
22
|
self
|
18
23
|
end
|
19
24
|
|
25
|
+
# Flag for compressed tries. Overrides {TrieCompressor#compressed?}.
|
26
|
+
# @return [Boolean] `true` for compressed tries, `false` otherwise.
|
20
27
|
def compressed?
|
21
28
|
@is_compressed = @is_compressed.nil? ? false : @is_compressed
|
22
29
|
end
|
23
30
|
|
31
|
+
# Checks if a path for a word or partial word exists in the trie.
|
32
|
+
# @param [String] word the word or partial word to look for in the trie.
|
33
|
+
# @return [Boolean] `true` if the word or partial word is found, `false` otherwise.
|
24
34
|
def has_branch_for?(word = '')
|
25
|
-
|
35
|
+
chars = word.chars.to_a
|
36
|
+
compressed? ? has_compressed_branch_for?(chars) : has_uncompressed_branch_for?(chars)
|
26
37
|
end
|
27
38
|
|
39
|
+
# Checks if a whole word exists in the trie.
|
40
|
+
# @param [String] word the word to look for in the trie.
|
41
|
+
# @return [Boolean] `true` only if the word is found and the last character corresponds to a terminal node.
|
28
42
|
def is_word?(word = '')
|
29
|
-
|
43
|
+
chars = word.chars.to_a
|
44
|
+
compressed? ? is_compressed_word?(chars) : is_uncompressed_word?(chars)
|
30
45
|
end
|
31
46
|
|
32
47
|
private
|
data/lib/trie_branches.rb
CHANGED
@@ -1,5 +1,10 @@
|
|
1
1
|
module Rambling
|
2
|
+
# Provides the branching behavior for the Trie data structure.
|
2
3
|
module TrieBranches
|
4
|
+
# Adds a branch to the trie based on the word.
|
5
|
+
# @param [String] word the word to add the branch from.
|
6
|
+
# @return [TrieNode] the just added branch's root node.
|
7
|
+
# @raise [InvalidTrieOperation] if the trie is already compressed.
|
3
8
|
def add_branch_from(word)
|
4
9
|
raise InvalidTrieOperation.new('Cannot add branch to compressed trie') if compressed?
|
5
10
|
if word.empty?
|
@@ -11,7 +16,9 @@ module Rambling
|
|
11
16
|
|
12
17
|
if @children.has_key?(first_letter)
|
13
18
|
word.slice!(0)
|
14
|
-
@children[first_letter]
|
19
|
+
child = @children[first_letter]
|
20
|
+
child.add_branch_from(word)
|
21
|
+
child
|
15
22
|
else
|
16
23
|
@children[first_letter] = TrieNode.new(word, self)
|
17
24
|
end
|
@@ -19,47 +26,65 @@ module Rambling
|
|
19
26
|
|
20
27
|
protected
|
21
28
|
|
22
|
-
def has_uncompressed_branch_for?(
|
23
|
-
|
29
|
+
def has_uncompressed_branch_for?(chars)
|
30
|
+
chars.empty? or fulfills_uncompressed_condition?(:has_uncompressed_branch_for?, chars)
|
24
31
|
end
|
25
32
|
|
26
|
-
def
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
33
|
+
def has_compressed_branch_for?(chars)
|
34
|
+
return true if chars.empty?
|
35
|
+
|
36
|
+
length = chars.length
|
37
|
+
first_letter = chars.slice!(0)
|
38
|
+
key = nil
|
39
|
+
@children.keys.each do |x|
|
40
|
+
x = x.to_s
|
41
|
+
if x.start_with?(first_letter)
|
42
|
+
key = x
|
43
|
+
break
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
unless key.nil?
|
48
|
+
sym_key = key.to_sym
|
49
|
+
return @children[sym_key].has_compressed_branch_for?(chars) if key.length == first_letter.length
|
50
|
+
|
51
|
+
while not chars.empty?
|
52
|
+
first_letter += chars.slice!(0)
|
53
|
+
|
54
|
+
break unless key.start_with?(first_letter)
|
55
|
+
return true if chars.empty?
|
56
|
+
return @children[sym_key].has_compressed_branch_for?(chars) if key.length == first_letter.length
|
57
|
+
end
|
32
58
|
end
|
33
59
|
|
34
60
|
false
|
35
61
|
end
|
36
62
|
|
37
|
-
def
|
38
|
-
|
39
|
-
|
40
|
-
keys = @children.keys.map { |x| x.to_s }
|
41
|
-
return true if keys.include?(word)
|
63
|
+
def is_uncompressed_word?(chars)
|
64
|
+
(chars.empty? and terminal?) or fulfills_uncompressed_condition?(:is_uncompressed_word?, chars)
|
65
|
+
end
|
42
66
|
|
43
|
-
|
44
|
-
return true
|
67
|
+
def is_compressed_word?(chars)
|
68
|
+
return true if chars.empty? and terminal?
|
45
69
|
|
46
|
-
|
47
|
-
|
70
|
+
length = chars.length
|
71
|
+
first_letter = ''
|
72
|
+
while not chars.empty?
|
73
|
+
first_letter += chars.slice!(0)
|
74
|
+
key = first_letter.to_sym
|
75
|
+
return @children[key].is_compressed_word?(chars) if @children.has_key?(key)
|
76
|
+
end
|
48
77
|
|
49
78
|
false
|
50
79
|
end
|
51
80
|
|
52
|
-
|
53
|
-
(word.empty? and terminal?) or fulfills_uncompressed_condition?(:is_uncompressed_word?, word)
|
54
|
-
end
|
81
|
+
private
|
55
82
|
|
56
|
-
def
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
key = word.slice(0..index).to_sym
|
62
|
-
return @children[key].is_compressed_word?(word.slice((index + 1)...length)) if @children.has_key?(key)
|
83
|
+
def fulfills_uncompressed_condition?(method, chars)
|
84
|
+
first_letter = chars.slice!(0)
|
85
|
+
unless first_letter.nil?
|
86
|
+
first_letter_sym = first_letter.to_sym
|
87
|
+
return @children[first_letter_sym].send(method, chars) if @children.has_key?(first_letter_sym)
|
63
88
|
end
|
64
89
|
|
65
90
|
false
|
data/lib/trie_compressor.rb
CHANGED
@@ -1,11 +1,10 @@
|
|
1
1
|
module Rambling
|
2
|
+
# Provides the compressing behavior for the Trie data structure.
|
2
3
|
module TrieCompressor
|
4
|
+
# Flag for compressed tries.
|
5
|
+
# @return [Boolean] `true` for compressed tries, `false` otherwise.
|
3
6
|
def compressed?
|
4
|
-
|
5
|
-
@is_compressed
|
6
|
-
else
|
7
|
-
@parent.nil? ? false : @parent.compressed?
|
8
|
-
end
|
7
|
+
@parent.nil? ? false : @parent.compressed?
|
9
8
|
end
|
10
9
|
|
11
10
|
protected
|
data/lib/trie_node.rb
CHANGED
@@ -1,11 +1,25 @@
|
|
1
1
|
module Rambling
|
2
|
+
# A representation of a node in the Trie data structure.
|
2
3
|
class TrieNode
|
3
4
|
include ChildrenHashDeferer
|
4
5
|
include TrieCompressor
|
5
6
|
include TrieBranches
|
6
7
|
|
7
|
-
|
8
|
+
# Letter or letters corresponding to this node.
|
9
|
+
# @return [Symbol, nil] the corresponding letter(s) or nil.
|
10
|
+
attr_reader :letter
|
8
11
|
|
12
|
+
# Children nodes.
|
13
|
+
# @return [Hash] the children hash, consisting of :letter => node.
|
14
|
+
attr_reader :children
|
15
|
+
|
16
|
+
# Parent node.
|
17
|
+
# @return [TrieNode, nil] the parent node or nil for the root element.
|
18
|
+
attr_reader :parent
|
19
|
+
|
20
|
+
# Creates a new TrieNode.
|
21
|
+
# @param [String] word the word from which to create this TrieNode and his branch.
|
22
|
+
# @param [TrieNode] parent the parent of this node.
|
9
23
|
def initialize(word, parent = nil)
|
10
24
|
@letter = nil
|
11
25
|
@parent = parent
|
@@ -20,14 +34,15 @@ module Rambling
|
|
20
34
|
end
|
21
35
|
end
|
22
36
|
|
23
|
-
|
24
|
-
|
25
|
-
end
|
26
|
-
|
37
|
+
# Flag for terminal nodes.
|
38
|
+
# @return [Boolean] `true` for terminal nodes, `false` otherwise.
|
27
39
|
def terminal?
|
28
40
|
@is_terminal
|
29
41
|
end
|
30
42
|
|
43
|
+
# String representation of the current node, if it is a terminal node.
|
44
|
+
# @return [String] the string representation of the current node.
|
45
|
+
# @raise [InvalidTrieOperation] if node is not terminal or is root.
|
31
46
|
def as_word
|
32
47
|
raise InvalidTrieOperation.new() unless @letter.nil? or terminal?
|
33
48
|
get_letter_string
|
@@ -41,5 +56,9 @@ module Rambling
|
|
41
56
|
def parent=(parent)
|
42
57
|
@parent = parent
|
43
58
|
end
|
59
|
+
|
60
|
+
def terminal=(terminal)
|
61
|
+
@is_terminal = terminal
|
62
|
+
end
|
44
63
|
end
|
45
64
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rambling-trie
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-02-
|
12
|
+
date: 2012-02-13 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
16
|
-
requirement: &
|
16
|
+
requirement: &17006080 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 2.0.0
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *17006080
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rake
|
27
|
-
requirement: &
|
27
|
+
requirement: &17018000 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,7 +32,40 @@ dependencies:
|
|
32
32
|
version: 0.9.2
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *17018000
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: ruby-prof
|
38
|
+
requirement: &17031020 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: 0.10.8
|
44
|
+
type: :development
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *17031020
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: yard
|
49
|
+
requirement: &16618180 !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.7.5
|
55
|
+
type: :development
|
56
|
+
prerelease: false
|
57
|
+
version_requirements: *16618180
|
58
|
+
- !ruby/object:Gem::Dependency
|
59
|
+
name: redcarpet
|
60
|
+
requirement: &16616900 !ruby/object:Gem::Requirement
|
61
|
+
none: false
|
62
|
+
requirements:
|
63
|
+
- - ! '>='
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: 2.1.0
|
66
|
+
type: :development
|
67
|
+
prerelease: false
|
68
|
+
version_requirements: *16616900
|
36
69
|
description: The Rambling Trie is a custom implementation of the Trie data structure
|
37
70
|
with Ruby, which includes compression abilities and is designed to be very fast
|
38
71
|
to traverse.
|
@@ -43,7 +76,11 @@ extra_rdoc_files: []
|
|
43
76
|
files:
|
44
77
|
- ./lib/children_hash_deferer.rb
|
45
78
|
- ./lib/invalid_trie_operation.rb
|
79
|
+
- ./lib/rambling-trie/tasks/gem.rb
|
80
|
+
- ./lib/rambling-trie/tasks/performance.rb
|
81
|
+
- ./lib/rambling-trie/version.rb
|
46
82
|
- ./lib/rambling-trie.rb
|
83
|
+
- ./lib/rambling.rb
|
47
84
|
- ./lib/trie.rb
|
48
85
|
- ./lib/trie_branches.rb
|
49
86
|
- ./lib/trie_compressor.rb
|
@@ -75,3 +112,4 @@ signing_key:
|
|
75
112
|
specification_version: 3
|
76
113
|
summary: A custom implementation of the trie data structure.
|
77
114
|
test_files: []
|
115
|
+
has_rdoc:
|