rambling-trie 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.markdown +32 -10
- data/lib/rambling/trie.rb +6 -10
- data/lib/rambling/trie/branches.rb +9 -9
- data/lib/rambling/trie/children_hash_deferer.rb +4 -4
- data/lib/rambling/trie/compressor.rb +13 -13
- data/lib/rambling/trie/inspector.rb +1 -0
- data/lib/rambling/trie/invalid_operation.rb +1 -1
- data/lib/rambling/trie/node.rb +13 -8
- data/lib/rambling/trie/plain_text_reader.rb +23 -0
- data/lib/rambling/trie/root.rb +8 -7
- data/lib/rambling/trie/version.rb +1 -1
- data/spec/integration/rambling/trie_spec.rb +20 -0
- data/spec/lib/rambling/trie/plain_text_reader_spec.rb +18 -0
- data/spec/lib/rambling/trie_spec.rb +10 -4
- metadata +9 -4
data/README.markdown
CHANGED
@@ -28,33 +28,55 @@ gem 'rambling-trie'
|
|
28
28
|
|
29
29
|
## How to use the Rambling Trie
|
30
30
|
|
31
|
+
To create the trie, initialize it like this:
|
32
|
+
|
33
|
+
``` ruby
|
34
|
+
trie = Rambling::Trie.create
|
35
|
+
```
|
36
|
+
|
31
37
|
- - -
|
32
|
-
|
38
|
+
|
39
|
+
#### Deprecation warnings
|
33
40
|
|
34
41
|
* Starting from version 0.4.0, `Rambling::Trie.new` is deprecated. Please use `Rambling::Trie.create` instead.
|
35
|
-
|
42
|
+
|
36
43
|
- - -
|
37
44
|
|
38
|
-
|
45
|
+
You can also provide a block and the created instance will be yielded for you to perform any operation on it:
|
39
46
|
|
40
47
|
``` ruby
|
41
|
-
|
48
|
+
Rambling::Trie.create do |trie|
|
49
|
+
trie << 'word'
|
50
|
+
end
|
42
51
|
```
|
43
52
|
|
44
|
-
|
53
|
+
Additionally, you can provide the path to a file that contains all the words to be added to the trie, and it will read the file and create the complete structure for you, like this:
|
45
54
|
|
46
55
|
``` ruby
|
47
56
|
trie = Rambling::Trie.create '/path/to/file'
|
48
57
|
```
|
49
58
|
|
50
|
-
|
59
|
+
By default, a plain text file with the following format will be expected:
|
51
60
|
|
52
|
-
```
|
53
|
-
|
54
|
-
|
55
|
-
|
61
|
+
``` text
|
62
|
+
some
|
63
|
+
words
|
64
|
+
to
|
65
|
+
populate
|
66
|
+
the
|
67
|
+
trie
|
56
68
|
```
|
57
69
|
|
70
|
+
If you want to use a custom file format, you will need to provide a custom file reader that defines the `each_word` method that yields each word contained in the file. Look at the `Rambling::Trie::PlainTextReader` class for an example.
|
71
|
+
|
72
|
+
- - -
|
73
|
+
|
74
|
+
#### Deprecation warnings
|
75
|
+
|
76
|
+
* Starting from version 0.5.0, the `has_branch_for?`, `is_word?` and `add_branch_from` methods are deprecated. The methods `branch?`, `word?` and `add` should be used respectively.
|
77
|
+
|
78
|
+
- - -
|
79
|
+
|
58
80
|
To add new words to the trie, use `add` or `<<`:
|
59
81
|
|
60
82
|
``` ruby
|
data/lib/rambling/trie.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
%w{
|
2
|
-
|
3
|
-
|
2
|
+
branches children_hash_deferer compressor enumerable
|
3
|
+
plain_text_reader inspector invalid_operation node
|
4
|
+
root version
|
4
5
|
}.map { |file| File.join 'rambling', 'trie', file }.each &method(:require)
|
5
6
|
|
6
7
|
# General namespace for all Rambling gems.
|
@@ -9,20 +10,15 @@ module Rambling
|
|
9
10
|
module Trie
|
10
11
|
class << self
|
11
12
|
# Creates a new Trie. Entry point for the Rambling::Trie API.
|
12
|
-
# @param [String, nil]
|
13
|
+
# @param [String, nil] filepath the file to load the words from.
|
13
14
|
# @return [Root] the trie just created.
|
14
15
|
# @yield [Root] the trie just created.
|
15
|
-
def create(
|
16
|
+
def create(filepath = nil, reader = PlainTextReader.new)
|
16
17
|
Root.new do |root|
|
17
|
-
|
18
|
+
reader.each_word(filepath) { |word| root << word } if filepath
|
18
19
|
yield root if block_given?
|
19
20
|
end
|
20
21
|
end
|
21
|
-
|
22
|
-
private
|
23
|
-
def words_from(filename)
|
24
|
-
File.open(filename) { |file| file.each_line { |line| yield line.chomp } }
|
25
|
-
end
|
26
22
|
end
|
27
23
|
end
|
28
24
|
end
|
@@ -10,19 +10,19 @@ module Rambling
|
|
10
10
|
def add(word)
|
11
11
|
raise InvalidOperation, 'Cannot add branch to compressed trie' if compressed?
|
12
12
|
if word.empty?
|
13
|
-
|
13
|
+
self.terminal = true
|
14
14
|
return
|
15
15
|
end
|
16
16
|
|
17
17
|
first_letter = word.slice(0).to_sym
|
18
18
|
|
19
|
-
if
|
19
|
+
if children.has_key? first_letter
|
20
20
|
word.slice! 0
|
21
|
-
child =
|
21
|
+
child = children[first_letter]
|
22
22
|
child << word
|
23
23
|
child
|
24
24
|
else
|
25
|
-
|
25
|
+
children[first_letter] = Node.new word, self
|
26
26
|
end
|
27
27
|
end
|
28
28
|
|
@@ -41,7 +41,7 @@ module Rambling
|
|
41
41
|
current_key, current_key_string = current_key first_letter
|
42
42
|
|
43
43
|
unless current_key.nil?
|
44
|
-
return
|
44
|
+
return children[current_key].branch_when_compressed?(chars) if current_key_string.length == first_letter.length
|
45
45
|
|
46
46
|
while not chars.empty?
|
47
47
|
char = chars.slice! 0
|
@@ -50,7 +50,7 @@ module Rambling
|
|
50
50
|
|
51
51
|
return true if chars.empty?
|
52
52
|
first_letter << char
|
53
|
-
return
|
53
|
+
return children[current_key].branch_when_compressed?(chars) if current_key_string.length == first_letter.length
|
54
54
|
end
|
55
55
|
end
|
56
56
|
|
@@ -68,7 +68,7 @@ module Rambling
|
|
68
68
|
while not chars.empty?
|
69
69
|
first_letter << chars.slice!(0)
|
70
70
|
key = first_letter.to_sym
|
71
|
-
return
|
71
|
+
return children[key].word_when_compressed?(chars) if children.has_key? key
|
72
72
|
end
|
73
73
|
|
74
74
|
false
|
@@ -79,7 +79,7 @@ module Rambling
|
|
79
79
|
def current_key(letter)
|
80
80
|
current_key_string = current_key = nil
|
81
81
|
|
82
|
-
|
82
|
+
children.keys.each do |key|
|
83
83
|
key_string = key.to_s
|
84
84
|
if key_string.start_with? letter
|
85
85
|
current_key = key
|
@@ -95,7 +95,7 @@ module Rambling
|
|
95
95
|
first_letter = chars.slice! 0
|
96
96
|
unless first_letter.nil?
|
97
97
|
first_letter_sym = first_letter.to_sym
|
98
|
-
return
|
98
|
+
return children[first_letter_sym].send(method, chars) if children.has_key? first_letter_sym
|
99
99
|
end
|
100
100
|
|
101
101
|
false
|
@@ -6,7 +6,7 @@ module Rambling
|
|
6
6
|
# @param [Symbol] key the key to look for in the children's hash.
|
7
7
|
# @return [Node, nil] the child node with that key or nil.
|
8
8
|
def [](key)
|
9
|
-
|
9
|
+
children[key]
|
10
10
|
end
|
11
11
|
|
12
12
|
# Proxies to @children[key] = value.
|
@@ -14,21 +14,21 @@ module Rambling
|
|
14
14
|
# @param [Node] value the node to add to the children's hash.
|
15
15
|
# @return [Node, nil] the child node with that key or nil.
|
16
16
|
def []=(key, value)
|
17
|
-
|
17
|
+
children[key] = value
|
18
18
|
end
|
19
19
|
|
20
20
|
# Proxies to @children.delete(key)
|
21
21
|
# @param [Symbol] key the key to delete in the children's hash.
|
22
22
|
# @return [Node, nil] the child node corresponding to the key just deleted or nil.
|
23
23
|
def delete(key)
|
24
|
-
|
24
|
+
children.delete(key)
|
25
25
|
end
|
26
26
|
|
27
27
|
# Proxies to @children.has_key?(key)
|
28
28
|
# @param [Symbol] key the key to look for in the children's hash.
|
29
29
|
# @return [Boolean] `true` for the keys that exist in the children's hash, false otherwise.
|
30
30
|
def has_key?(key)
|
31
|
-
|
31
|
+
children.has_key?(key)
|
32
32
|
end
|
33
33
|
end
|
34
34
|
end
|
@@ -5,18 +5,18 @@ module Rambling
|
|
5
5
|
# Flag for compressed tries.
|
6
6
|
# @return [Boolean] `true` for compressed tries, `false` otherwise.
|
7
7
|
def compressed?
|
8
|
-
|
8
|
+
parent and parent.compressed?
|
9
9
|
end
|
10
10
|
|
11
11
|
# Compress the current node using redundant node elimination.
|
12
12
|
# @return [Root, Node] the compressed node.
|
13
13
|
def compress_tree!
|
14
|
-
if
|
15
|
-
merge_with!
|
14
|
+
if children.size == 1 and not terminal? and letter
|
15
|
+
merge_with! children.values.first
|
16
16
|
compress_tree!
|
17
17
|
end
|
18
18
|
|
19
|
-
|
19
|
+
children.values.each &:compress_tree!
|
20
20
|
|
21
21
|
self
|
22
22
|
end
|
@@ -24,25 +24,25 @@ module Rambling
|
|
24
24
|
private
|
25
25
|
|
26
26
|
def merge_with!(child)
|
27
|
-
new_letter = (
|
27
|
+
new_letter = (letter.to_s << child.letter.to_s).to_sym
|
28
28
|
|
29
|
-
rehash_on_parent!
|
29
|
+
rehash_on_parent! letter, new_letter
|
30
30
|
redefine_self! new_letter, child
|
31
31
|
|
32
|
-
|
32
|
+
children.values.each { |node| node.parent = self }
|
33
33
|
end
|
34
34
|
|
35
35
|
def rehash_on_parent!(old_letter, new_letter)
|
36
|
-
return if
|
36
|
+
return if parent.nil?
|
37
37
|
|
38
|
-
|
39
|
-
|
38
|
+
parent.delete old_letter
|
39
|
+
parent[new_letter] = self
|
40
40
|
end
|
41
41
|
|
42
42
|
def redefine_self!(new_letter, merged_node)
|
43
|
-
|
44
|
-
|
45
|
-
|
43
|
+
self.letter = new_letter
|
44
|
+
self.children = merged_node.children
|
45
|
+
self.terminal = merged_node.terminal?
|
46
46
|
end
|
47
47
|
end
|
48
48
|
end
|
@@ -2,6 +2,7 @@ module Rambling
|
|
2
2
|
module Trie
|
3
3
|
# Provides pretty printing behavior for the Trie data structure.
|
4
4
|
module Inspector
|
5
|
+
# @return [String] a string representation of the current node.
|
5
6
|
def inspect
|
6
7
|
"#<#{self.class.name} letter: #{letter.inspect or 'nil'}, children: #{children.keys}>"
|
7
8
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module Rambling
|
2
2
|
module Trie
|
3
|
-
# Raised when trying to execute an invalid operation
|
3
|
+
# Raised when trying to execute an invalid operation on a Trie data structure.
|
4
4
|
class InvalidOperation < Exception
|
5
5
|
def initialize(message = nil)
|
6
6
|
super
|
data/lib/rambling/trie/node.rb
CHANGED
@@ -24,12 +24,13 @@ module Rambling
|
|
24
24
|
# @param [String, nil] word the word from which to create this Node and his branch.
|
25
25
|
# @param [Node, nil] parent the parent of this node.
|
26
26
|
def initialize(word = nil, parent = nil)
|
27
|
-
|
27
|
+
self.parent = parent
|
28
|
+
self.children = {}
|
28
29
|
|
29
30
|
unless word.nil? or word.empty?
|
30
31
|
letter = word.slice! 0
|
31
|
-
|
32
|
-
|
32
|
+
self.letter = letter.to_sym if letter
|
33
|
+
self.terminal = word.empty?
|
33
34
|
self << word
|
34
35
|
end
|
35
36
|
end
|
@@ -37,20 +38,24 @@ module Rambling
|
|
37
38
|
# Flag for terminal nodes.
|
38
39
|
# @return [Boolean] `true` for terminal nodes, `false` otherwise.
|
39
40
|
def terminal?
|
40
|
-
|
41
|
+
!!terminal
|
41
42
|
end
|
42
43
|
|
43
44
|
# String representation of the current node, if it is a terminal node.
|
44
45
|
# @return [String] the string representation of the current node.
|
45
46
|
# @raise [InvalidOperation] if node is not terminal or is root.
|
46
47
|
def as_word
|
47
|
-
raise InvalidOperation, 'Cannot represent branch as a word' unless
|
48
|
-
|
48
|
+
raise InvalidOperation, 'Cannot represent branch as a word' unless letter.nil? or terminal?
|
49
|
+
letter_string
|
49
50
|
end
|
50
51
|
|
51
52
|
protected
|
52
|
-
|
53
|
-
|
53
|
+
|
54
|
+
attr_writer :letter, :children
|
55
|
+
attr_accessor :terminal
|
56
|
+
|
57
|
+
def letter_string
|
58
|
+
(parent ? parent.letter_string : '') << letter.to_s
|
54
59
|
end
|
55
60
|
end
|
56
61
|
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Rambling
|
2
|
+
module Trie
|
3
|
+
# File reader for .txt files
|
4
|
+
class PlainTextReader
|
5
|
+
# Yields each word read from a .txt file
|
6
|
+
# @param [String] filepath the full path of the file to load the words from.
|
7
|
+
# @yield [String] Each line read from the file.
|
8
|
+
def each_word(filepath)
|
9
|
+
each_line(filepath) { |line| yield line.chomp }
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def each_line(filepath)
|
15
|
+
open(filepath) { |file| file.each_line { |line| yield line } }
|
16
|
+
end
|
17
|
+
|
18
|
+
def open(filepath)
|
19
|
+
File.open(filepath) { |file| yield file }
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
data/lib/rambling/trie/root.rb
CHANGED
@@ -6,35 +6,35 @@ module Rambling
|
|
6
6
|
# @yield [Root] the trie just created.
|
7
7
|
def initialize
|
8
8
|
super
|
9
|
-
|
9
|
+
self.compressed = false
|
10
10
|
yield self if block_given?
|
11
11
|
end
|
12
12
|
|
13
13
|
# Compresses the existing tree using redundant node elimination. Flags the trie as compressed.
|
14
14
|
# @return [Root] self
|
15
15
|
def compress!
|
16
|
-
|
16
|
+
self.compressed = (compressed? or !!compress_tree!)
|
17
17
|
self
|
18
18
|
end
|
19
19
|
|
20
20
|
# Flag for compressed tries. Overrides {Compressor#compressed?}.
|
21
21
|
# @return [Boolean] `true` for compressed tries, `false` otherwise.
|
22
22
|
def compressed?
|
23
|
-
|
23
|
+
!!compressed
|
24
24
|
end
|
25
25
|
|
26
26
|
# Checks if a path for a word or partial word exists in the trie.
|
27
27
|
# @param [String] word the word or partial word to look for in the trie.
|
28
28
|
# @return [Boolean] `true` if the word or partial word is found, `false` otherwise.
|
29
29
|
def branch?(word = '')
|
30
|
-
|
30
|
+
is? :branch, word
|
31
31
|
end
|
32
32
|
|
33
33
|
# Checks if a whole word exists in the trie.
|
34
34
|
# @param [String] word the word to look for in the trie.
|
35
35
|
# @return [Boolean] `true` only if the word is found and the last character corresponds to a terminal node.
|
36
36
|
def word?(word = '')
|
37
|
-
|
37
|
+
is? :word, word
|
38
38
|
end
|
39
39
|
|
40
40
|
alias_method :include?, :word?
|
@@ -53,8 +53,9 @@ module Rambling
|
|
53
53
|
|
54
54
|
private
|
55
55
|
|
56
|
-
|
57
|
-
|
56
|
+
attr_accessor :compressed
|
57
|
+
|
58
|
+
def is?(method, word)
|
58
59
|
method = compressed? ? "#{method}_when_compressed?" : "#{method}_when_uncompressed?"
|
59
60
|
send method, word.chars.to_a
|
60
61
|
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Rambling::Trie do
|
4
|
+
describe 'when a filepath is provided' do
|
5
|
+
let(:filepath) { File.join(::SPEC_ROOT, 'assets', 'test_words.txt') }
|
6
|
+
let(:words) { File.readlines(filepath).map &:chomp }
|
7
|
+
subject { Rambling::Trie.create filepath }
|
8
|
+
|
9
|
+
it 'contains all the words from the file' do
|
10
|
+
words.each { |word| expect(subject).to include(word) }
|
11
|
+
end
|
12
|
+
|
13
|
+
describe 'and the trie is compressed' do
|
14
|
+
it 'still contains all the words from the file' do
|
15
|
+
subject.compress!
|
16
|
+
words.each { |word| expect(subject).to include(word) }
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module Rambling
|
4
|
+
module Trie
|
5
|
+
describe PlainTextReader do
|
6
|
+
describe '#each_word' do
|
7
|
+
let(:filepath) { File.join(::SPEC_ROOT, 'assets', 'test_words.txt') }
|
8
|
+
let(:words) { File.readlines(filepath).map &:chomp }
|
9
|
+
|
10
|
+
it 'yields every word yielded by the file' do
|
11
|
+
yielded_words = []
|
12
|
+
subject.each_word(filepath) { |word| yielded_words << word }
|
13
|
+
expect(yielded_words).to eq(words)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -19,14 +19,20 @@ module Rambling
|
|
19
19
|
end
|
20
20
|
end
|
21
21
|
|
22
|
-
context 'with a
|
23
|
-
let(:
|
24
|
-
let(:
|
22
|
+
context 'with a filepath' do
|
23
|
+
let(:filepath) { 'test_words.txt' }
|
24
|
+
let(:reader) { double(Trie::PlainTextReader) }
|
25
|
+
let(:words) { %w(a couple of test words over here) }
|
26
|
+
|
27
|
+
before do
|
28
|
+
yielder = reader.stub(:each_word)
|
29
|
+
words.each { |word| yielder = yielder.and_yield(word) }
|
30
|
+
end
|
25
31
|
|
26
32
|
it 'loads every word' do
|
27
33
|
words.each { |word| root.should_receive(:<<).with(word) }
|
28
34
|
|
29
|
-
Trie.create
|
35
|
+
Trie.create filepath, reader
|
30
36
|
end
|
31
37
|
end
|
32
38
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rambling-trie
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-12-
|
12
|
+
date: 2012-12-25 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
@@ -118,6 +118,7 @@ files:
|
|
118
118
|
- lib/rambling/trie/inspector.rb
|
119
119
|
- lib/rambling/trie/invalid_operation.rb
|
120
120
|
- lib/rambling/trie/node.rb
|
121
|
+
- lib/rambling/trie/plain_text_reader.rb
|
121
122
|
- lib/rambling/trie/root.rb
|
122
123
|
- lib/rambling/trie/tasks/gem.rb
|
123
124
|
- lib/rambling/trie/tasks/performance.rb
|
@@ -125,11 +126,13 @@ files:
|
|
125
126
|
- rambling-trie.gemspec
|
126
127
|
- reports/performance
|
127
128
|
- spec/assets/test_words.txt
|
129
|
+
- spec/integration/rambling/trie_spec.rb
|
128
130
|
- spec/lib/rambling/trie/branches_spec.rb
|
129
131
|
- spec/lib/rambling/trie/children_hash_deferer_spec.rb
|
130
132
|
- spec/lib/rambling/trie/enumerable_spec.rb
|
131
133
|
- spec/lib/rambling/trie/inspector_spec.rb
|
132
134
|
- spec/lib/rambling/trie/node_spec.rb
|
135
|
+
- spec/lib/rambling/trie/plain_text_reader_spec.rb
|
133
136
|
- spec/lib/rambling/trie/root_spec.rb
|
134
137
|
- spec/lib/rambling/trie_spec.rb
|
135
138
|
- spec/spec_helper.rb
|
@@ -147,7 +150,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
147
150
|
version: '0'
|
148
151
|
segments:
|
149
152
|
- 0
|
150
|
-
hash:
|
153
|
+
hash: -2285345975138821646
|
151
154
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
152
155
|
none: false
|
153
156
|
requirements:
|
@@ -156,7 +159,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
156
159
|
version: '0'
|
157
160
|
segments:
|
158
161
|
- 0
|
159
|
-
hash:
|
162
|
+
hash: -2285345975138821646
|
160
163
|
requirements: []
|
161
164
|
rubyforge_project:
|
162
165
|
rubygems_version: 1.8.24
|
@@ -165,11 +168,13 @@ specification_version: 3
|
|
165
168
|
summary: A custom implementation of the trie data structure.
|
166
169
|
test_files:
|
167
170
|
- spec/assets/test_words.txt
|
171
|
+
- spec/integration/rambling/trie_spec.rb
|
168
172
|
- spec/lib/rambling/trie/branches_spec.rb
|
169
173
|
- spec/lib/rambling/trie/children_hash_deferer_spec.rb
|
170
174
|
- spec/lib/rambling/trie/enumerable_spec.rb
|
171
175
|
- spec/lib/rambling/trie/inspector_spec.rb
|
172
176
|
- spec/lib/rambling/trie/node_spec.rb
|
177
|
+
- spec/lib/rambling/trie/plain_text_reader_spec.rb
|
173
178
|
- spec/lib/rambling/trie/root_spec.rb
|
174
179
|
- spec/lib/rambling/trie_spec.rb
|
175
180
|
- spec/spec_helper.rb
|