rambling-trie 0.8.1 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +1 -1
- data/lib/rambling/trie.rb +21 -9
- data/lib/rambling/trie/compressed_node.rb +112 -0
- data/lib/rambling/trie/compression.rb +13 -0
- data/lib/rambling/trie/compressor.rb +30 -31
- data/lib/rambling/trie/{root.rb → container.rb} +41 -38
- data/lib/rambling/trie/enumerable.rb +11 -7
- data/lib/rambling/trie/missing_node.rb +1 -1
- data/lib/rambling/trie/node.rb +25 -22
- data/lib/rambling/trie/plain_text_reader.rb +1 -1
- data/lib/rambling/trie/raw_node.rb +90 -0
- data/lib/rambling/trie/tasks/helpers/path.rb +13 -0
- data/lib/rambling/trie/tasks/helpers/time.rb +7 -0
- data/lib/rambling/trie/tasks/performance.rb +10 -91
- data/lib/rambling/trie/tasks/performance/all.rb +4 -0
- data/lib/rambling/trie/tasks/performance/benchmark.rb +172 -0
- data/lib/rambling/trie/tasks/performance/directory.rb +11 -0
- data/lib/rambling/trie/tasks/performance/profile/call_tree.rb +132 -0
- data/lib/rambling/trie/tasks/performance/profile/memory.rb +116 -0
- data/lib/rambling/trie/version.rb +1 -1
- data/rambling-trie.gemspec +6 -4
- data/spec/integration/rambling/trie_spec.rb +63 -9
- data/spec/lib/rambling/trie/compressed_node_spec.rb +35 -0
- data/spec/lib/rambling/trie/compressor_spec.rb +31 -0
- data/spec/lib/rambling/trie/container_spec.rb +470 -0
- data/spec/lib/rambling/trie/enumerable_spec.rb +2 -2
- data/spec/lib/rambling/trie/inspector_spec.rb +21 -14
- data/spec/lib/rambling/trie/node_spec.rb +72 -209
- data/spec/lib/rambling/trie/raw_node_spec.rb +377 -0
- data/spec/lib/rambling/trie_spec.rb +46 -25
- metadata +57 -16
- data/lib/rambling/trie/branches.rb +0 -149
- data/spec/lib/rambling/trie/branches_spec.rb +0 -52
- data/spec/lib/rambling/trie/root_spec.rb +0 -376
@@ -6,7 +6,7 @@ module Rambling
|
|
6
6
|
# @param [String] filepath the full path of the file to load the words from.
|
7
7
|
# @yield [String] Each line read from the file.
|
8
8
|
def each_word filepath
|
9
|
-
each_line(filepath) { |line| yield line.chomp }
|
9
|
+
each_line(filepath) { |line| yield line.chomp! }
|
10
10
|
end
|
11
11
|
|
12
12
|
private
|
@@ -0,0 +1,90 @@
|
|
1
|
+
module Rambling
|
2
|
+
module Trie
|
3
|
+
# A representation of a node in an uncompressed Trie data structure.
|
4
|
+
class RawNode < Rambling::Trie::Node
|
5
|
+
# Adds a branch to the current trie node based on the word
|
6
|
+
# @param [String] word the word to add the branch from.
|
7
|
+
# @return [Rambling::Trie::RawNode] the just added branch's root node.
|
8
|
+
# @note This method clears the contents of the word variable.
|
9
|
+
def add word
|
10
|
+
if word.empty?
|
11
|
+
terminal!
|
12
|
+
else
|
13
|
+
add_to_children_tree word
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
# Checks if a path for set of characters exists in the trie.
|
18
|
+
# @param [Array] chars the characters to look for in the trie.
|
19
|
+
# @return [Boolean] `true` if the characters are found, `false` otherwise.
|
20
|
+
def partial_word? chars = []
|
21
|
+
if chars.empty?
|
22
|
+
true
|
23
|
+
else
|
24
|
+
letter = chars.slice!(0).to_sym
|
25
|
+
child = children_tree[letter]
|
26
|
+
!!child && child.partial_word?(chars)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# Checks if a path for set of characters represents a word in the trie.
|
31
|
+
# @param [Array] chars the characters to look for in the trie.
|
32
|
+
# @return [Boolean] `true` if the characters are found and form a word,
|
33
|
+
# `false` otherwise.
|
34
|
+
def word? chars = []
|
35
|
+
if chars.empty?
|
36
|
+
terminal?
|
37
|
+
else
|
38
|
+
letter = chars.slice!(0).to_sym
|
39
|
+
child = children_tree[letter]
|
40
|
+
!!child && child.word?(chars)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# Returns all words that start with the specified characters.
|
45
|
+
# @param [Array] chars the characters to look for in the trie.
|
46
|
+
# @return [Array] all the words contained in the trie that start with the specified characters.
|
47
|
+
def scan chars
|
48
|
+
closest_node chars
|
49
|
+
end
|
50
|
+
|
51
|
+
# Always return `false` for a raw (uncompressed) node.
|
52
|
+
# @return [Boolean] always false for a raw (uncompressed) node.
|
53
|
+
def compressed?
|
54
|
+
false
|
55
|
+
end
|
56
|
+
|
57
|
+
protected
|
58
|
+
|
59
|
+
def closest_node chars
|
60
|
+
if chars.empty?
|
61
|
+
self
|
62
|
+
else
|
63
|
+
letter = chars.slice!(0).to_sym
|
64
|
+
child = children_tree[letter]
|
65
|
+
|
66
|
+
if child
|
67
|
+
child.closest_node chars
|
68
|
+
else
|
69
|
+
Rambling::Trie::MissingNode.new
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
private
|
75
|
+
|
76
|
+
def add_to_children_tree word
|
77
|
+
letter = word.slice!(0).to_sym
|
78
|
+
child = children_tree[letter] || new_node(letter)
|
79
|
+
child.add word
|
80
|
+
child
|
81
|
+
end
|
82
|
+
|
83
|
+
def new_node letter
|
84
|
+
node = Rambling::Trie::RawNode.new self
|
85
|
+
node.letter = letter
|
86
|
+
children_tree[letter] = node
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
@@ -1,92 +1,11 @@
|
|
1
|
+
require 'fileutils'
|
1
2
|
require 'benchmark'
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
words.each do |word|
|
12
|
-
output.print "#{word} - #{trie.send method, word}".ljust 30
|
13
|
-
output.puts Benchmark.measure { 200_000.times { trie.send method, word }}
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
def generate_report filename = nil
|
19
|
-
output = filename.nil? ? $stdout : File.open(filename, 'a+')
|
20
|
-
|
21
|
-
output.puts "\nReport for rambling-trie version #{Rambling::Trie::VERSION}"
|
22
|
-
|
23
|
-
trie = nil
|
24
|
-
measure = Benchmark.measure { trie = Rambling::Trie.create path('assets', 'dictionaries', 'words_with_friends.txt') }
|
25
|
-
|
26
|
-
if ENV['profile_creation']
|
27
|
-
output.puts '==> Creation'
|
28
|
-
output.print 'Rambling::Trie.create'.ljust 30
|
29
|
-
output.puts measure
|
30
|
-
end
|
31
|
-
|
32
|
-
report 'Uncompressed', trie, output
|
33
|
-
|
34
|
-
return unless trie.respond_to? :compress!
|
35
|
-
|
36
|
-
trie.compress!
|
37
|
-
report 'Compressed', trie, output
|
38
|
-
|
39
|
-
output.close
|
40
|
-
end
|
41
|
-
|
42
|
-
def path *filename
|
43
|
-
File.join File.dirname(__FILE__), '..', '..', '..', '..', *filename
|
44
|
-
end
|
45
|
-
|
46
|
-
desc 'Generate performance report'
|
47
|
-
task :report do
|
48
|
-
puts 'Generating performance report...'
|
49
|
-
generate_report
|
50
|
-
end
|
51
|
-
|
52
|
-
namespace :report do
|
53
|
-
desc 'Generate performance report and append result to reports/performance'
|
54
|
-
task :save do
|
55
|
-
puts 'Generating performance report...'
|
56
|
-
generate_report path('reports', 'performance')
|
57
|
-
puts 'Report has been saved to reports/performance'
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
desc 'Generate application profiling reports'
|
62
|
-
task :profile do
|
63
|
-
require 'ruby-prof'
|
64
|
-
|
65
|
-
puts 'Generating profiling reports...'
|
66
|
-
|
67
|
-
rambling_trie = Rambling::Trie.create path('assets', 'dictionaries', 'words_with_friends.txt')
|
68
|
-
words = %w(hi help beautiful impressionism anthropological)
|
69
|
-
methods = [:word?, :partial_word?]
|
70
|
-
tries = [lambda {rambling_trie.clone}, lambda {rambling_trie.clone.compress!}]
|
71
|
-
|
72
|
-
methods.each do |method|
|
73
|
-
tries.each do |trie_generator|
|
74
|
-
trie = trie_generator.call
|
75
|
-
result = RubyProf.profile do
|
76
|
-
words.each do |word|
|
77
|
-
200_000.times { trie.send method, word }
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
|
-
File.open path('reports', "profile-#{trie.compressed? ? 'compressed' : 'uncompressed'}-#{method.to_s.sub(/\?/, '')}-#{Time.now.to_i}"), 'w' do |file|
|
82
|
-
RubyProf::CallTreePrinter.new(result).print file
|
83
|
-
end
|
84
|
-
end
|
85
|
-
end
|
86
|
-
|
87
|
-
puts 'Done'
|
88
|
-
end
|
89
|
-
|
90
|
-
desc 'Generate profiling and performance reports'
|
91
|
-
task all: [:profile, :report]
|
92
|
-
end
|
3
|
+
require 'ruby-prof'
|
4
|
+
require 'memory_profiler'
|
5
|
+
require 'benchmark/ips'
|
6
|
+
require_relative 'helpers/path'
|
7
|
+
require_relative 'performance/directory'
|
8
|
+
require_relative 'performance/benchmark'
|
9
|
+
require_relative 'performance/profile/call_tree'
|
10
|
+
require_relative 'performance/profile/memory'
|
11
|
+
require_relative 'performance/all'
|
@@ -0,0 +1,172 @@
|
|
1
|
+
require_relative '../helpers/path'
|
2
|
+
|
3
|
+
namespace :performance do
|
4
|
+
include Helpers::Path
|
5
|
+
|
6
|
+
class BenchmarkMeasurement
|
7
|
+
def initialize output
|
8
|
+
@output = output
|
9
|
+
end
|
10
|
+
|
11
|
+
def perform times, params = nil
|
12
|
+
params = Array params
|
13
|
+
params << nil unless params.any?
|
14
|
+
|
15
|
+
params.each do |param|
|
16
|
+
output.print "#{param}".ljust 20
|
17
|
+
|
18
|
+
measure times, param do |param|
|
19
|
+
yield param
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def banner
|
25
|
+
output.puts "\nBenchmark for rambling-trie version #{Rambling::Trie::VERSION}"
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
attr_reader :output
|
31
|
+
|
32
|
+
def measure times, param = nil
|
33
|
+
result = nil
|
34
|
+
|
35
|
+
measure = Benchmark.measure do
|
36
|
+
times.times do
|
37
|
+
result = yield param
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
output.print "#{result}".ljust 10
|
42
|
+
output.puts measure
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def with_file filename = nil
|
47
|
+
output = filename.nil? ? IO.new(1) : File.open(filename, 'a+')
|
48
|
+
|
49
|
+
yield output
|
50
|
+
|
51
|
+
output.close
|
52
|
+
end
|
53
|
+
|
54
|
+
def generate_lookups_benchmark filename = nil
|
55
|
+
with_file filename do |output|
|
56
|
+
measure = BenchmarkMeasurement.new output
|
57
|
+
measure.banner
|
58
|
+
|
59
|
+
trie = Rambling::Trie.create path('assets', 'dictionaries', 'words_with_friends.txt')
|
60
|
+
[ trie, trie.clone.compress! ].each do |trie|
|
61
|
+
output.puts "==> #{trie.compressed? ? 'Compressed' : 'Uncompressed'}"
|
62
|
+
words = %w(hi help beautiful impressionism anthropological)
|
63
|
+
|
64
|
+
output.puts '`word?`'
|
65
|
+
measure.perform 200_000, words do |word|
|
66
|
+
trie.word? word
|
67
|
+
end
|
68
|
+
|
69
|
+
output.puts '`partial_word?`'
|
70
|
+
measure.perform 200_000, words do |word|
|
71
|
+
trie.partial_word? word
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def generate_scans_benchmark filename = nil
|
78
|
+
with_file filename do |output|
|
79
|
+
measure = BenchmarkMeasurement.new output
|
80
|
+
measure.banner
|
81
|
+
|
82
|
+
words = {
|
83
|
+
hi: 1_000,
|
84
|
+
help: 100_000,
|
85
|
+
beautiful: 100_000,
|
86
|
+
impressionism: 200_000,
|
87
|
+
anthropological: 200_000,
|
88
|
+
}
|
89
|
+
trie = Rambling::Trie.create path('assets', 'dictionaries', 'words_with_friends.txt')
|
90
|
+
|
91
|
+
[ trie, trie.clone.compress! ].each do |trie|
|
92
|
+
output.puts "==> #{trie.compressed? ? 'Compressed' : 'Uncompressed'}"
|
93
|
+
output.puts "`scan`"
|
94
|
+
words.each do |word, times|
|
95
|
+
measure.perform times, word.to_s do |word|
|
96
|
+
trie.scan(word).size
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
namespace :benchmark do
|
104
|
+
desc 'Generate lookups performance benchmark report'
|
105
|
+
task :lookups do
|
106
|
+
generate_lookups_benchmark
|
107
|
+
end
|
108
|
+
|
109
|
+
desc 'Generate scans performance benchmark report'
|
110
|
+
task :scans do
|
111
|
+
generate_scans_benchmark
|
112
|
+
end
|
113
|
+
|
114
|
+
namespace :lookups do
|
115
|
+
desc 'Generate performance benchmark report store results in reports/'
|
116
|
+
task save: ['performance:directory'] do
|
117
|
+
puts 'Generating performance benchmark report for lookups...'
|
118
|
+
generate_lookups_benchmark path('reports', Rambling::Trie::VERSION, 'benchmark')
|
119
|
+
puts "Benchmarks have been saved to reports/#{Rambling::Trie::VERSION}/benchmark"
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
task :creation do
|
124
|
+
with_file do |output|
|
125
|
+
measure = BenchmarkMeasurement.new output
|
126
|
+
measure.banner
|
127
|
+
|
128
|
+
output.puts '==> Creation'
|
129
|
+
output.puts '`Rambling::Trie.create`'
|
130
|
+
measure.perform 5 do
|
131
|
+
trie = Rambling::Trie.create path('assets', 'dictionaries', 'words_with_friends.txt')
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
task :compression do
|
137
|
+
with_file do |output|
|
138
|
+
measure = BenchmarkMeasurement.new output
|
139
|
+
measure.banner
|
140
|
+
|
141
|
+
output.puts '==> Compression'
|
142
|
+
output.puts '`compress!`'
|
143
|
+
|
144
|
+
trie = Rambling::Trie.create path('assets', 'dictionaries', 'words_with_friends.txt')
|
145
|
+
measure.perform 5 do
|
146
|
+
trie.clone.compress!
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
task all: [
|
152
|
+
'performance:benchmark:creation',
|
153
|
+
'performance:benchmark:compression',
|
154
|
+
'performance:benchmark:lookups',
|
155
|
+
'performance:benchmark:scans',
|
156
|
+
]
|
157
|
+
|
158
|
+
task :compare do
|
159
|
+
Benchmark.ips do |b|
|
160
|
+
hash = { 'thing' => 'gniht' }
|
161
|
+
|
162
|
+
b.report 'has_key?' do
|
163
|
+
hash.has_key? 'thing'
|
164
|
+
end
|
165
|
+
|
166
|
+
b.report '[]' do
|
167
|
+
!!hash['thing']
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
@@ -0,0 +1,132 @@
|
|
1
|
+
require_relative '../../helpers/path'
|
2
|
+
require_relative '../../helpers/time'
|
3
|
+
|
4
|
+
namespace :performance do
|
5
|
+
namespace :profile do
|
6
|
+
include Helpers::Path
|
7
|
+
include Helpers::Time
|
8
|
+
|
9
|
+
def profile times, params, path
|
10
|
+
params = Array params
|
11
|
+
params << nil unless params.any?
|
12
|
+
|
13
|
+
result = RubyProf.profile merge_fibers: true do
|
14
|
+
params.each do |param|
|
15
|
+
times.times do
|
16
|
+
yield param
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
printer = RubyProf::CallTreePrinter.new result
|
22
|
+
printer.print path: path
|
23
|
+
end
|
24
|
+
|
25
|
+
def generate_lookups_call_tree
|
26
|
+
puts 'Generating call tree profiling reports for lookups...'
|
27
|
+
|
28
|
+
puts "\nCall Tree profile for rambling-trie version #{Rambling::Trie::VERSION}"
|
29
|
+
trie = Rambling::Trie.create path('assets', 'dictionaries', 'words_with_friends.txt')
|
30
|
+
tries = [ trie, trie.clone.compress! ]
|
31
|
+
|
32
|
+
words = %w(hi help beautiful impressionism anthropological)
|
33
|
+
|
34
|
+
tries.each do |trie|
|
35
|
+
filename = "profile-#{trie.compressed? ? 'compressed' : 'uncompressed'}-word"
|
36
|
+
path = path 'reports', Rambling::Trie::VERSION, 'call-tree', time, filename
|
37
|
+
FileUtils.mkdir_p path
|
38
|
+
|
39
|
+
profile 200_000, words, path do
|
40
|
+
trie.word? word
|
41
|
+
end
|
42
|
+
|
43
|
+
filename = "profile-#{trie.compressed? ? 'compressed' : 'uncompressed'}-partial-word"
|
44
|
+
path = path 'reports', Rambling::Trie::VERSION, 'call-tree', time, filename
|
45
|
+
FileUtils.mkdir_p path
|
46
|
+
|
47
|
+
profile 200_000, words, path do
|
48
|
+
trie.partial_word? word
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
puts 'Done'
|
53
|
+
end
|
54
|
+
|
55
|
+
def generate_scans_call_tree
|
56
|
+
puts 'Generating call tree profiling reports for scans...'
|
57
|
+
|
58
|
+
puts "\nCall Tree profile for rambling-trie version #{Rambling::Trie::VERSION}"
|
59
|
+
trie = Rambling::Trie.create path('assets', 'dictionaries', 'words_with_friends.txt')
|
60
|
+
tries = [ trie, trie.clone.compress! ]
|
61
|
+
|
62
|
+
words = {
|
63
|
+
hi: 1_000,
|
64
|
+
help: 100_000,
|
65
|
+
beautiful: 100_000,
|
66
|
+
impressionism: 200_000,
|
67
|
+
anthropological: 200_000,
|
68
|
+
}
|
69
|
+
|
70
|
+
tries.each do |trie|
|
71
|
+
filename = "profile-#{trie.compressed? ? 'compressed' : 'uncompressed'}-scan"
|
72
|
+
path = path 'reports', Rambling::Trie::VERSION, 'call-tree', time, filename
|
73
|
+
FileUtils.mkdir_p path
|
74
|
+
|
75
|
+
words.each do |word, times|
|
76
|
+
profile times, word.to_s, path do |word|
|
77
|
+
trie.scan(word).size
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
puts 'Done'
|
83
|
+
end
|
84
|
+
|
85
|
+
namespace :call_tree do
|
86
|
+
desc 'Generate call tree profiling reports for creation'
|
87
|
+
task creation: ['performance:directory'] do
|
88
|
+
puts 'Generating call tree profiling reports for creation...'
|
89
|
+
puts "\nCall Tree profile for rambling-trie version #{Rambling::Trie::VERSION}"
|
90
|
+
filename = "profile-new-trie"
|
91
|
+
path = path 'reports', Rambling::Trie::VERSION, 'call-tree', time, filename
|
92
|
+
FileUtils.mkdir_p path
|
93
|
+
|
94
|
+
profile 5, nil, path do
|
95
|
+
trie = Rambling::Trie.create path('assets', 'dictionaries', 'words_with_friends.txt')
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
desc 'Generate call tree profiling reports for compression'
|
100
|
+
task compression: ['performance:directory'] do
|
101
|
+
puts 'Generating call tree profiling reports for compression...'
|
102
|
+
puts "\nCall Tree profile for rambling-trie version #{Rambling::Trie::VERSION}"
|
103
|
+
|
104
|
+
filename = "profile-compressed-trie"
|
105
|
+
path = path 'reports', Rambling::Trie::VERSION, 'call-tree', time, filename
|
106
|
+
FileUtils.mkdir_p path
|
107
|
+
|
108
|
+
trie = Rambling::Trie.create path('assets', 'dictionaries', 'words_with_friends.txt')
|
109
|
+
profile 5, nil, path do
|
110
|
+
trie.clone.compress!
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
desc 'Generate call tree profiling reports for lookups'
|
115
|
+
task lookups: ['performance:directory'] do
|
116
|
+
generate_lookups_call_tree
|
117
|
+
end
|
118
|
+
|
119
|
+
desc 'Generate call tree profiling reports for scans'
|
120
|
+
task scans: ['performance:directory'] do
|
121
|
+
generate_scans_call_tree
|
122
|
+
end
|
123
|
+
|
124
|
+
task all: [
|
125
|
+
'performance:profile:call_tree:creation',
|
126
|
+
'performance:profile:call_tree:compression',
|
127
|
+
'performance:profile:call_tree:lookups',
|
128
|
+
'performance:profile:call_tree:scans',
|
129
|
+
]
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|