rambling-trie 0.3.4 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/rambling-trie.rb CHANGED
@@ -1,12 +1,22 @@
1
1
  [
2
2
  'rambling',
3
- 'invalid_trie_operation',
4
- 'children_hash_deferer',
5
- 'trie_compressor',
6
- 'trie_branches',
7
- 'trie_node',
8
- 'trie',
3
+ 'rambling-trie/invalid_operation',
4
+ 'rambling-trie/children_hash_deferer',
5
+ 'rambling-trie/compressor',
6
+ 'rambling-trie/branches',
7
+ 'rambling-trie/node',
8
+ 'rambling-trie/root',
9
9
  'rambling-trie/version'
10
10
  ].each do |file|
11
11
  require File.join File.dirname(__FILE__), file
12
12
  end
13
+
14
+ module Rambling
15
+ module Trie
16
+ class << self
17
+ def create(*params)
18
+ Root.new *params
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,95 @@
1
+ module Rambling
2
+ module Trie
3
+ # Provides the branching behavior for the Trie data structure.
4
+ module Branches
5
+ # Adds a branch to the trie based on the word.
6
+ # @param [String] word the word to add the branch from.
7
+ # @return [Node] the just added branch's root node.
8
+ # @raise [InvalidOperation] if the trie is already compressed.
9
+ def add_branch_from(word)
10
+ raise InvalidOperation.new('Cannot add branch to compressed trie') if compressed?
11
+ if word.empty?
12
+ @is_terminal = true
13
+ return
14
+ end
15
+
16
+ first_letter = word.slice(0).to_sym
17
+
18
+ if @children.has_key?(first_letter)
19
+ word.slice!(0)
20
+ child = @children[first_letter]
21
+ child.add_branch_from(word)
22
+ child
23
+ else
24
+ @children[first_letter] = Node.new word, self
25
+ end
26
+ end
27
+
28
+ protected
29
+
30
+ def has_uncompressed_branch_for?(chars)
31
+ chars.empty? or fulfills_uncompressed_condition?(:has_uncompressed_branch_for?, chars)
32
+ end
33
+
34
+ def has_compressed_branch_for?(chars)
35
+ return true if chars.empty?
36
+
37
+ first_letter = chars.slice!(0)
38
+ key = nil
39
+ @children.keys.each do |x|
40
+ x = x.to_s
41
+ if x.start_with?(first_letter)
42
+ key = x
43
+ break
44
+ end
45
+ end
46
+
47
+ unless key.nil?
48
+ sym_key = key.to_sym
49
+ return @children[sym_key].has_compressed_branch_for?(chars) if key.length == first_letter.length
50
+
51
+ while not chars.empty?
52
+ char = chars.slice!(0)
53
+
54
+ break unless key[first_letter.length] == char
55
+
56
+ first_letter += char
57
+ return true if chars.empty?
58
+ return @children[sym_key].has_compressed_branch_for?(chars) if key.length == first_letter.length
59
+ end
60
+ end
61
+
62
+ false
63
+ end
64
+
65
+ def is_uncompressed_word?(chars)
66
+ (chars.empty? and terminal?) or fulfills_uncompressed_condition?(:is_uncompressed_word?, chars)
67
+ end
68
+
69
+ def is_compressed_word?(chars)
70
+ return true if chars.empty? and terminal?
71
+
72
+ first_letter = ''
73
+ while not chars.empty?
74
+ first_letter += chars.slice!(0)
75
+ key = first_letter.to_sym
76
+ return @children[key].is_compressed_word?(chars) if @children.has_key?(key)
77
+ end
78
+
79
+ false
80
+ end
81
+
82
+ private
83
+
84
+ def fulfills_uncompressed_condition?(method, chars)
85
+ first_letter = chars.slice!(0)
86
+ unless first_letter.nil?
87
+ first_letter_sym = first_letter.to_sym
88
+ return @children[first_letter_sym].send(method, chars) if @children.has_key?(first_letter_sym)
89
+ end
90
+
91
+ false
92
+ end
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,49 @@
1
+ module Rambling
2
+ module Trie
3
+ # Provides the compressing behavior for the Trie data structure.
4
+ module Compressor
5
+ # Flag for compressed tries.
6
+ # @return [Boolean] `true` for compressed tries, `false` otherwise.
7
+ def compressed?
8
+ @parent.nil? ? false : @parent.compressed?
9
+ end
10
+
11
+ protected
12
+
13
+ def compress_own_tree!
14
+ if @children.size == 1 and not terminal? and not @letter.nil?
15
+ merge_with!(@children.values.first)
16
+ compress_own_tree!
17
+ end
18
+
19
+ @children.values.each { |node| node.compress_own_tree! }
20
+
21
+ self
22
+ end
23
+
24
+ private
25
+
26
+ def merge_with!(child)
27
+ new_letter = (@letter.to_s + child.letter.to_s).to_sym
28
+
29
+ rehash_on_parent!(@letter, new_letter)
30
+ redefine_self!(new_letter, child)
31
+
32
+ @children.values.each { |node| node.parent = self }
33
+ end
34
+
35
+ def rehash_on_parent!(old_letter, new_letter)
36
+ return if @parent.nil?
37
+
38
+ @parent.delete(old_letter)
39
+ @parent[new_letter] = self
40
+ end
41
+
42
+ def redefine_self!(new_letter, merged_node)
43
+ @letter = new_letter
44
+ @children = merged_node.children
45
+ @is_terminal = merged_node.terminal?
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,10 @@
1
+ module Rambling
2
+ module Trie
3
+ # Raised when trying to execute an invalid operation for this Trie data structure.
4
+ class InvalidOperation < Exception
5
+ def initialize(message = nil)
6
+ super
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,58 @@
1
+ module Rambling
2
+ module Trie
3
+ # A representation of a node in the Trie data structure.
4
+ class Node
5
+ include ChildrenHashDeferer
6
+ include Compressor
7
+ include Branches
8
+
9
+ # Letter or letters corresponding to this node.
10
+ # @return [Symbol, nil] the corresponding letter(s) or nil.
11
+ attr_reader :letter
12
+
13
+ # Children nodes.
14
+ # @return [Hash] the children hash, consisting of :letter => node.
15
+ attr_reader :children
16
+
17
+ # Parent node.
18
+ # @return [TrieNode, nil] the parent node or nil for the root element.
19
+ attr_accessor :parent
20
+
21
+ # Creates a new TrieNode.
22
+ # @param [String] word the word from which to create this TrieNode and his branch.
23
+ # @param [TrieNode] parent the parent of this node.
24
+ def initialize(word, parent = nil)
25
+ @letter = nil
26
+ @parent = parent
27
+ @is_terminal = false
28
+ @children = {}
29
+
30
+ unless word.nil? or word.empty?
31
+ letter = word.slice!(0)
32
+ @letter = letter.to_sym unless letter.nil?
33
+ @is_terminal = word.empty?
34
+ add_branch_from(word)
35
+ end
36
+ end
37
+
38
+ # Flag for terminal nodes.
39
+ # @return [Boolean] `true` for terminal nodes, `false` otherwise.
40
+ def terminal?
41
+ @is_terminal
42
+ end
43
+
44
+ # String representation of the current node, if it is a terminal node.
45
+ # @return [String] the string representation of the current node.
46
+ # @raise [InvalidTrieOperation] if node is not terminal or is root.
47
+ def as_word
48
+ raise InvalidOperation.new() unless @letter.nil? or terminal?
49
+ get_letter_string
50
+ end
51
+
52
+ protected
53
+ def get_letter_string
54
+ (@parent.nil? ? '' : @parent.get_letter_string) + @letter.to_s
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,58 @@
1
+ module Rambling
2
+ module Trie
3
+ # A representation of the root node in the Trie data structure.
4
+ class Root < Node
5
+ # Creates a new Trie.
6
+ # @param [String, nil] filename the file to load the words from (defaults to nil).
7
+ def initialize(filename = nil)
8
+ super(nil)
9
+
10
+ @filename = filename
11
+ @is_compressed = false
12
+ add_all_nodes if filename
13
+ end
14
+
15
+ # Compresses the existing tree using redundant node elimination. Flags the trie as compressed.
16
+ # @return [Trie] same object
17
+ def compress!
18
+ unless compressed?
19
+ compress_own_tree!
20
+ @is_compressed = true
21
+ end
22
+
23
+ self
24
+ end
25
+
26
+ # Flag for compressed tries. Overrides {TrieCompressor#compressed?}.
27
+ # @return [Boolean] `true` for compressed tries, `false` otherwise.
28
+ def compressed?
29
+ @is_compressed = @is_compressed.nil? ? false : @is_compressed
30
+ end
31
+
32
+ # Checks if a path for a word or partial word exists in the trie.
33
+ # @param [String] word the word or partial word to look for in the trie.
34
+ # @return [Boolean] `true` if the word or partial word is found, `false` otherwise.
35
+ def has_branch_for?(word = '')
36
+ chars = word.chars.to_a
37
+ compressed? ? has_compressed_branch_for?(chars) : has_uncompressed_branch_for?(chars)
38
+ end
39
+
40
+ # Checks if a whole word exists in the trie.
41
+ # @param [String] word the word to look for in the trie.
42
+ # @return [Boolean] `true` only if the word is found and the last character corresponds to a terminal node.
43
+ def is_word?(word = '')
44
+ chars = word.chars.to_a
45
+ compressed? ? is_compressed_word?(chars) : is_uncompressed_word?(chars)
46
+ end
47
+
48
+ private
49
+ def add_all_nodes
50
+ File.open(@filename) do |file|
51
+ while word = file.gets
52
+ add_branch_from(word.chomp)
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end
58
+ end
@@ -1,5 +1,4 @@
1
1
  require 'benchmark'
2
- require 'ruby-prof'
3
2
 
4
3
  namespace :performance do
5
4
  def report(name, trie, output)
@@ -53,6 +52,8 @@ namespace :performance do
53
52
 
54
53
  desc 'Generate application profiling reports'
55
54
  task :profile do
55
+ require 'ruby-prof'
56
+
56
57
  puts 'Generating profiling reports...'
57
58
 
58
59
  rambling_trie = Rambling::Trie.new(get_path('assets', 'dictionaries', 'words_with_friends.txt'))
@@ -78,6 +79,31 @@ namespace :performance do
78
79
  puts 'Done'
79
80
  end
80
81
 
82
+ desc 'Generate CPU profiling reports'
83
+ task :cpu_profile do
84
+ require 'perftools'
85
+
86
+ puts 'Generating cpu profiling reports...'
87
+
88
+ rambling_trie = Rambling::Trie.new(get_path('assets', 'dictionaries', 'words_with_friends.txt'))
89
+ words = ['hi', 'help', 'beautiful', 'impressionism', 'anthropological']
90
+ methods = [:has_branch_for?, :is_word?]
91
+ tries = [lambda {rambling_trie.clone}, lambda {rambling_trie.clone.compress!}]
92
+
93
+ methods.each do |method|
94
+ tries.each do |trie_generator|
95
+ trie = trie_generator.call
96
+ result = PerfTools::CpuProfiler.start get_path('reports', "cpu_profile-#{trie.compressed? ? 'compressed' : 'uncompressed'}-#{method.to_s.sub(/\?/, '')}-#{Time.now.to_i}") do
97
+ words.each do |word|
98
+ 200_000.times { trie.send(method, word) }
99
+ end
100
+ end
101
+ end
102
+ end
103
+
104
+ puts 'Done'
105
+ end
106
+
81
107
  desc 'Generate profiling and performance reports'
82
108
  task all: [:profile, :report]
83
109
  end
@@ -1,6 +1,6 @@
1
1
  module Rambling
2
- unless defined?(Rambling::Trie::VERSION)
2
+ module Trie
3
3
  # Current version of the rambling-trie.
4
- Rambling::Trie.const_set(:VERSION, '0.3.4')
4
+ VERSION = '0.4.0'
5
5
  end
6
6
  end
@@ -0,0 +1,27 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require 'rambling-trie/version'
4
+
5
+ Gem::Specification.new do |gem|
6
+ gem.authors = ['Rambling Labs']
7
+ gem.email = ['development@ramblinglabs.com']
8
+ gem.description = 'The Rambling Trie is a custom implementation of the Trie data structure with Ruby, which includes compression abilities and is designed to be very fast to traverse.'
9
+ gem.summary = 'A custom implementation of the trie data structure.'
10
+ gem.homepage = 'http://github.com/ramblinglabs/rambling-trie'
11
+ gem.date = Time.now.strftime('%Y-%m-%d')
12
+
13
+ gem.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
14
+ gem.files = `git ls-files`.split("\n")
15
+ gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
16
+ gem.require_paths = ['lib']
17
+
18
+ gem.name = 'rambling-trie'
19
+ gem.version = Rambling::Trie::VERSION
20
+ gem.platform = Gem::Platform::RUBY
21
+
22
+ gem.add_development_dependency 'rspec', '>=2.0.0'
23
+ gem.add_development_dependency 'rake', '>=0.9.2'
24
+ gem.add_development_dependency 'ruby-prof', '>=0.10.8'
25
+ gem.add_development_dependency 'yard', '>=0.7.5'
26
+ gem.add_development_dependency 'redcarpet', '>=2.1.0'
27
+ end
@@ -0,0 +1,23 @@
1
+ are
2
+ beautiful
3
+ course
4
+ false
5
+ hello
6
+ hi
7
+ is
8
+ it
9
+ mine
10
+ my
11
+ no
12
+ of
13
+ ours
14
+ today
15
+ true
16
+ truth
17
+ whatever
18
+ where
19
+ who
20
+ yeah
21
+ you
22
+ your
23
+ yours
@@ -0,0 +1,202 @@
1
+ require 'spec_helper'
2
+
3
+ module Rambling
4
+ module Trie
5
+ describe Node do
6
+ describe '.new' do
7
+ context 'with no letters' do
8
+ let(:node) { Node.new '' }
9
+
10
+ it 'does not have any letter' do
11
+ node.letter.should be_nil
12
+ end
13
+
14
+ it 'includes no children' do
15
+ node.should have(0).children
16
+ end
17
+
18
+ it 'is not a terminal node' do
19
+ node.should_not be_terminal
20
+ end
21
+
22
+ it 'returns empty string as its word' do
23
+ node.as_word.should be_empty
24
+ end
25
+
26
+ it 'is not compressed' do
27
+ node.should_not be_compressed
28
+ end
29
+ end
30
+
31
+ context 'with one letter' do
32
+ let(:node) { Node.new 'a' }
33
+
34
+ it 'makes it the node letter' do
35
+ node.letter.should == :a
36
+ end
37
+
38
+ it 'includes no children' do
39
+ node.should have(0).children
40
+ end
41
+
42
+ it 'is a terminal node' do
43
+ node.should be_terminal
44
+ end
45
+ end
46
+
47
+ context 'with two letters' do
48
+ let(:node) { Node.new 'ba' }
49
+
50
+ it 'takes the first as the node letter' do
51
+ node.letter.should == :b
52
+ end
53
+
54
+ it 'includes one child' do
55
+ node.should have(1).children
56
+ end
57
+
58
+ it 'includes a child with the expected letter' do
59
+ node.children.values.first.letter.should == :a
60
+ end
61
+
62
+ it 'has the expected letter as a key' do
63
+ node.has_key?(:a).should be_true
64
+ end
65
+
66
+ it 'returns the child corresponding to the key' do
67
+ node[:a].should == node.children[:a]
68
+ end
69
+
70
+ it 'does not mark itself as a terminal node' do
71
+ node.should_not be_terminal
72
+ end
73
+
74
+ it 'marks the first child as a terminal node' do
75
+ node[:a].should be_terminal
76
+ end
77
+ end
78
+
79
+ context 'with a large word' do
80
+ let(:node) { Node.new 'spaghetti' }
81
+
82
+ it 'marks the last letter as terminal node' do
83
+ node[:p][:a][:g][:h][:e][:t][:t][:i].should be_terminal
84
+ end
85
+
86
+ it 'does not mark any other letter as terminal node' do
87
+ node[:p][:a][:g][:h][:e][:t][:t].should_not be_terminal
88
+ node[:p][:a][:g][:h][:e][:t].should_not be_terminal
89
+ node[:p][:a][:g][:h][:e].should_not be_terminal
90
+ node[:p][:a][:g][:h].should_not be_terminal
91
+ node[:p][:a][:g].should_not be_terminal
92
+ node[:p][:a].should_not be_terminal
93
+ node[:p].should_not be_terminal
94
+ end
95
+ end
96
+ end
97
+
98
+ describe '#add_branch_from' do
99
+ context 'new word for existing branch' do
100
+ let(:node) { Node.new 'back' }
101
+
102
+ before :each do
103
+ node.add_branch_from 'a'
104
+ end
105
+
106
+ it 'does not increment the child count' do
107
+ node.should have(1).children
108
+ end
109
+
110
+ it 'marks it as terminal' do
111
+ node[:a].should be_terminal
112
+ end
113
+ end
114
+
115
+ context 'old word for existing branch' do
116
+ let(:node) { Node.new 'back' }
117
+
118
+ before :each do
119
+ node.add_branch_from 'ack'
120
+ end
121
+
122
+ it 'does not increment any child count' do
123
+ node.should have(1).children
124
+ node[:a].should have(1).children
125
+ node[:a][:c].should have(1).children
126
+ node[:a][:c][:k].should have(0).children
127
+ end
128
+ end
129
+ end
130
+
131
+ describe '#as_word' do
132
+ context 'for an empty node' do
133
+ let(:node) { Node.new '' }
134
+
135
+ it 'returns nil' do
136
+ node.as_word.should be_empty
137
+ end
138
+ end
139
+
140
+ context 'for one letter' do
141
+ let(:node) { Node.new 'a' }
142
+
143
+ it 'returns the expected one letter word' do
144
+ node.as_word.should == 'a'
145
+ end
146
+ end
147
+
148
+ context 'for a small word' do
149
+ let(:node) { Node.new 'all' }
150
+
151
+ it 'returns the expected small word' do
152
+ node[:l][:l].as_word.should == 'all'
153
+ end
154
+
155
+ it 'raises an error for a non terminal node' do
156
+ lambda { node[:l].as_word }.should raise_error(InvalidOperation)
157
+ end
158
+ end
159
+
160
+ context 'for a long word' do
161
+ let(:node) { Node.new 'beautiful' }
162
+
163
+ it 'returns the expected long word' do
164
+ node[:e][:a][:u][:t][:i][:f][:u][:l].as_word.should == 'beautiful'
165
+ end
166
+ end
167
+
168
+ context 'for a node with nil letter' do
169
+ let(:node) { Node.new nil }
170
+ it 'returns nil' do
171
+ node.as_word.should be_empty
172
+ end
173
+ end
174
+ end
175
+
176
+ describe '#compressed?' do
177
+ let(:root) { double('Root') }
178
+ let(:node) { Node.new '', root }
179
+
180
+ context 'parent is compressed' do
181
+ before :each do
182
+ root.stub(:compressed?).and_return true
183
+ end
184
+
185
+ it 'returns true' do
186
+ node.should be_compressed
187
+ end
188
+ end
189
+
190
+ context 'parent is not compressed' do
191
+ before :each do
192
+ root.stub(:compressed?).and_return false
193
+ end
194
+
195
+ it 'returns false' do
196
+ node.compressed?.should be_false
197
+ end
198
+ end
199
+ end
200
+ end
201
+ end
202
+ end