rambling-trie 0.9.0 → 0.9.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rambling/trie/compressed_node.rb +30 -45
- data/lib/rambling/trie/compressor.rb +17 -19
- data/lib/rambling/trie/container.rb +2 -2
- data/lib/rambling/trie/node.rb +3 -3
- data/lib/rambling/trie/tasks/helpers/path.rb +4 -0
- data/lib/rambling/trie/tasks/performance/all.rb +15 -2
- data/lib/rambling/trie/tasks/performance/benchmark.rb +125 -73
- data/lib/rambling/trie/tasks/performance/profile/call_tree.rb +20 -14
- data/lib/rambling/trie/tasks/performance/profile/memory.rb +17 -16
- data/lib/rambling/trie/version.rb +1 -1
- data/reports/0.0.1/benchmark +15 -0
- data/reports/0.0.2/benchmark +15 -0
- data/reports/0.1.0/benchmark +15 -0
- data/reports/0.2.0/benchmark +29 -0
- data/reports/0.3.0/benchmark +29 -0
- data/reports/0.3.2/benchmark +29 -0
- data/reports/0.3.3/benchmark +29 -0
- data/reports/0.3.4/benchmark +28 -0
- data/reports/0.4.0/benchmark +28 -0
- data/reports/0.4.1/benchmark +29 -0
- data/reports/0.5.0/benchmark +28 -0
- data/reports/0.5.1/benchmark +28 -0
- data/reports/0.5.2/benchmark +28 -0
- data/reports/0.6.0/benchmark +29 -0
- data/reports/0.6.1/benchmark +28 -0
- data/reports/0.7.0/benchmark +28 -0
- data/reports/0.8.0/benchmark +54 -0
- data/reports/0.8.1/benchmark +54 -0
- data/reports/0.9.0/benchmark +54 -0
- data/reports/0.9.1/benchmark +58 -0
- data/spec/integration/rambling/trie_spec.rb +1 -1
- metadata +22 -3
- data/reports/performance +0 -498
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9b23cfe0d2b236a7970f4666f0df6a7cc4226d22
|
4
|
+
data.tar.gz: 5147da530a25b386fdafd9cc59ccb1c35b2a2efc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 71405a6dd24e710629d9e8cb8d307b2faa5664f029bcdedd749063d106e6706bce5fb07fb6ecbf776098668d0b5ab3826e3f3d6d9f75e96e516c2cbce07de2ab
|
7
|
+
data.tar.gz: 6d5a292f676c979dbf2e3b7cc25d8a932cf087c14325df539b2ccafd872083d9ca1140224eb4edbc93c4fb0e920bd7f75374ba0bbd260d911fdf057fdb5cdd0d
|
@@ -22,18 +22,15 @@ module Rambling
|
|
22
22
|
# @return [Boolean] `true` if the characters are found and form a word,
|
23
23
|
# `false` otherwise.
|
24
24
|
def word? chars
|
25
|
-
|
26
|
-
terminal?
|
27
|
-
else
|
28
|
-
has_word? chars
|
29
|
-
end
|
25
|
+
chars.empty? ? terminal? : has_word?(chars)
|
30
26
|
end
|
31
27
|
|
32
28
|
# Returns all words that start with the specified characters.
|
33
29
|
# @param [Array] chars the characters to look for in the trie.
|
34
30
|
# @return [Array] all the words contained in the trie that start with the specified characters.
|
35
31
|
def scan chars
|
36
|
-
closest_node(chars)
|
32
|
+
node = chars.empty? ? self : closest_node(chars)
|
33
|
+
node.to_a
|
37
34
|
end
|
38
35
|
|
39
36
|
# Always return `true` for a raw (compressed) node.
|
@@ -42,70 +39,58 @@ module Rambling
|
|
42
39
|
true
|
43
40
|
end
|
44
41
|
|
45
|
-
|
42
|
+
private
|
46
43
|
|
47
|
-
def
|
48
|
-
|
49
|
-
|
50
|
-
else
|
51
|
-
current_length = 0
|
52
|
-
current_key, current_key_string = current_key chars.slice!(0)
|
44
|
+
def has_partial_word? chars
|
45
|
+
recursive_get(:partial_word?, chars) || false
|
46
|
+
end
|
53
47
|
|
54
|
-
|
55
|
-
|
48
|
+
def has_word? chars
|
49
|
+
current_key = nil
|
56
50
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
51
|
+
while !chars.empty?
|
52
|
+
if current_key
|
53
|
+
current_key << chars.slice!(0)
|
54
|
+
else
|
55
|
+
current_key = chars.slice!(0)
|
56
|
+
end
|
61
57
|
|
62
|
-
|
58
|
+
child = children_tree[current_key.to_sym]
|
59
|
+
return child.word? chars if child
|
63
60
|
end
|
61
|
+
|
62
|
+
false
|
64
63
|
end
|
65
64
|
|
66
|
-
|
65
|
+
def closest_node chars
|
66
|
+
recursive_get(:scan, chars) || Rambling::Trie::MissingNode.new
|
67
|
+
end
|
67
68
|
|
68
|
-
def
|
69
|
+
def recursive_get method, chars
|
69
70
|
current_length = 0
|
70
|
-
current_key
|
71
|
+
current_key = current_key chars.slice!(0)
|
71
72
|
|
72
73
|
begin
|
73
74
|
current_length += 1
|
74
75
|
|
75
|
-
if
|
76
|
-
return children_tree[current_key].
|
76
|
+
if (current_key && current_key.length == current_length) || chars.empty?
|
77
|
+
return children_tree[current_key.to_sym].send method, chars
|
77
78
|
end
|
78
|
-
end while
|
79
|
-
|
80
|
-
false
|
81
|
-
end
|
82
|
-
|
83
|
-
def has_word? chars
|
84
|
-
current_key_string = ''
|
85
|
-
|
86
|
-
while !chars.empty?
|
87
|
-
current_key_string << chars.slice!(0)
|
88
|
-
current_key = current_key_string.to_sym
|
89
|
-
child = children_tree[current_key]
|
90
|
-
return child.word? chars if child
|
91
|
-
end
|
92
|
-
|
93
|
-
false
|
79
|
+
end while current_key && current_key[current_length] == chars.slice!(0)
|
94
80
|
end
|
95
81
|
|
96
82
|
def current_key letter
|
97
|
-
|
83
|
+
current_key = nil
|
98
84
|
|
99
85
|
children_tree.keys.each do |key|
|
100
86
|
key_string = key.to_s
|
101
87
|
if key_string.start_with? letter
|
102
|
-
current_key =
|
103
|
-
current_key_string = key_string
|
88
|
+
current_key = key_string
|
104
89
|
break
|
105
90
|
end
|
106
91
|
end
|
107
92
|
|
108
|
-
|
93
|
+
current_key
|
109
94
|
end
|
110
95
|
end
|
111
96
|
end
|
@@ -5,39 +5,37 @@ module Rambling
|
|
5
5
|
# Compresses a node from a Trie data structure.
|
6
6
|
# @param [RawNode] node the node to compress
|
7
7
|
# @return [CompressedNode] node the compressed version of the node
|
8
|
-
def compress node
|
8
|
+
def compress node
|
9
9
|
if node.compressable?
|
10
|
-
|
10
|
+
merge_with_child_and_compress node
|
11
11
|
else
|
12
|
-
copy_node_and_compress_children node
|
12
|
+
copy_node_and_compress_children node
|
13
13
|
end
|
14
14
|
end
|
15
15
|
|
16
16
|
private
|
17
17
|
|
18
|
-
def
|
19
|
-
|
18
|
+
def merge_with_child_and_compress node
|
19
|
+
child = node.children.first
|
20
20
|
|
21
|
-
new_node = Rambling::Trie::CompressedNode.new parent
|
22
|
-
new_node.letter = node.letter.to_s <<
|
23
|
-
new_node.terminal! if
|
24
|
-
new_node.children_tree =
|
21
|
+
new_node = Rambling::Trie::CompressedNode.new node.parent
|
22
|
+
new_node.letter = node.letter.to_s << child.letter.to_s
|
23
|
+
new_node.terminal! if child.terminal?
|
24
|
+
new_node.children_tree = child.children_tree
|
25
25
|
|
26
|
-
new_node
|
27
|
-
child.parent = new_node
|
28
|
-
end
|
29
|
-
|
30
|
-
new_node
|
26
|
+
compress new_node
|
31
27
|
end
|
32
28
|
|
33
|
-
def copy_node_and_compress_children node
|
34
|
-
new_node = Rambling::Trie::CompressedNode.new parent
|
35
|
-
|
29
|
+
def copy_node_and_compress_children node
|
30
|
+
new_node = Rambling::Trie::CompressedNode.new node.parent
|
36
31
|
new_node.letter = node.letter
|
37
32
|
new_node.terminal! if node.terminal?
|
38
33
|
|
39
|
-
node.children.
|
40
|
-
compress child
|
34
|
+
node.children.each do |child|
|
35
|
+
compressed_child = compress child
|
36
|
+
|
37
|
+
compressed_child.parent = new_node
|
38
|
+
new_node[compressed_child.letter] = compressed_child
|
41
39
|
end
|
42
40
|
|
43
41
|
new_node
|
@@ -49,14 +49,14 @@ module Rambling
|
|
49
49
|
# @param [String] word the word or partial word to look for in the trie.
|
50
50
|
# @return [Boolean] `true` if the word or partial word is found, `false` otherwise.
|
51
51
|
def partial_word? word = ''
|
52
|
-
root.partial_word? word.chars
|
52
|
+
root.partial_word? word.chars
|
53
53
|
end
|
54
54
|
|
55
55
|
# Checks if a whole word exists in the trie.
|
56
56
|
# @param [String] word the word to look for in the trie.
|
57
57
|
# @return [Boolean] `true` only if the word is found and the last character corresponds to a terminal node.
|
58
58
|
def word? word = ''
|
59
|
-
root.word? word.chars
|
59
|
+
root.word? word.chars
|
60
60
|
end
|
61
61
|
|
62
62
|
# Returns all words that start with the specified characters.
|
data/lib/rambling/trie/node.rb
CHANGED
@@ -1,4 +1,17 @@
|
|
1
1
|
namespace :performance do
|
2
|
-
desc 'Generate profiling and performance reports'
|
3
|
-
task all: [
|
2
|
+
desc 'Generate all profiling and performance reports'
|
3
|
+
task all: [
|
4
|
+
'benchmark:all',
|
5
|
+
'profile:call_tree:all',
|
6
|
+
'profile:memory:all',
|
7
|
+
]
|
8
|
+
|
9
|
+
namespace :all do
|
10
|
+
desc 'Generate and store all profiling and performance reports'
|
11
|
+
task save: [
|
12
|
+
'benchmark:all:save',
|
13
|
+
'profile:call_tree:all',
|
14
|
+
'profile:memory:all',
|
15
|
+
]
|
16
|
+
end
|
4
17
|
end
|
@@ -3,17 +3,38 @@ require_relative '../helpers/path'
|
|
3
3
|
namespace :performance do
|
4
4
|
include Helpers::Path
|
5
5
|
|
6
|
+
class BenchmarkReport
|
7
|
+
attr_reader :output
|
8
|
+
|
9
|
+
def initialize output
|
10
|
+
@output = output
|
11
|
+
end
|
12
|
+
|
13
|
+
def finish
|
14
|
+
output.close
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
6
18
|
class BenchmarkMeasurement
|
7
19
|
def initialize output
|
8
20
|
@output = output
|
9
21
|
end
|
10
22
|
|
23
|
+
def param_to_s param
|
24
|
+
case param
|
25
|
+
when Rambling::Trie::Container
|
26
|
+
''
|
27
|
+
else
|
28
|
+
param.to_s
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
11
32
|
def perform times, params = nil
|
12
33
|
params = Array params
|
13
34
|
params << nil unless params.any?
|
14
35
|
|
15
36
|
params.each do |param|
|
16
|
-
output.print
|
37
|
+
output.print param_to_s(param).ljust 20
|
17
38
|
|
18
39
|
measure times, param do |param|
|
19
40
|
yield param
|
@@ -43,64 +64,88 @@ namespace :performance do
|
|
43
64
|
end
|
44
65
|
end
|
45
66
|
|
46
|
-
def
|
47
|
-
|
67
|
+
def benchmark_report= benchmark_report
|
68
|
+
@benchmark_report = benchmark_report
|
69
|
+
end
|
48
70
|
|
49
|
-
|
71
|
+
def benchmark_report
|
72
|
+
Rake::Task['performance:benchmark:output:stdout'].invoke unless @benchmark_report
|
50
73
|
|
51
|
-
|
74
|
+
@benchmark_report
|
52
75
|
end
|
53
76
|
|
54
|
-
def
|
55
|
-
|
56
|
-
|
57
|
-
measure.banner
|
58
|
-
|
59
|
-
trie = Rambling::Trie.create path('assets', 'dictionaries', 'words_with_friends.txt')
|
60
|
-
[ trie, trie.clone.compress! ].each do |trie|
|
61
|
-
output.puts "==> #{trie.compressed? ? 'Compressed' : 'Uncompressed'}"
|
62
|
-
words = %w(hi help beautiful impressionism anthropological)
|
77
|
+
def output
|
78
|
+
benchmark_report.output
|
79
|
+
end
|
63
80
|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
81
|
+
def generate_lookups_benchmark filename = nil
|
82
|
+
measure = BenchmarkMeasurement.new output
|
83
|
+
measure.banner
|
84
|
+
|
85
|
+
trie = Rambling::Trie.create dictionary
|
86
|
+
compressed_trie = Rambling::Trie.create(dictionary).compress!
|
87
|
+
[ trie, compressed_trie ].each do |trie|
|
88
|
+
output.puts "==> #{trie.compressed? ? 'Compressed' : 'Uncompressed'}"
|
89
|
+
words = %w(hi help beautiful impressionism anthropological)
|
90
|
+
|
91
|
+
output.puts '`word?`'
|
92
|
+
measure.perform 200_000, words do |word|
|
93
|
+
trie.word? word
|
94
|
+
end
|
68
95
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
end
|
96
|
+
output.puts '`partial_word?`'
|
97
|
+
measure.perform 200_000, words do |word|
|
98
|
+
trie.partial_word? word
|
73
99
|
end
|
74
100
|
end
|
75
101
|
end
|
76
102
|
|
77
103
|
def generate_scans_benchmark filename = nil
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
104
|
+
measure = BenchmarkMeasurement.new output
|
105
|
+
measure.banner
|
106
|
+
|
107
|
+
words = {
|
108
|
+
hi: 1_000,
|
109
|
+
help: 100_000,
|
110
|
+
beautiful: 100_000,
|
111
|
+
impressionism: 200_000,
|
112
|
+
anthropological: 200_000,
|
113
|
+
}
|
114
|
+
|
115
|
+
trie = Rambling::Trie.create dictionary
|
116
|
+
compressed_trie = Rambling::Trie.create(dictionary).compress!
|
117
|
+
|
118
|
+
[ trie, compressed_trie ].each do |trie|
|
119
|
+
output.puts "==> #{trie.compressed? ? 'Compressed' : 'Uncompressed'}"
|
120
|
+
output.puts "`scan`"
|
121
|
+
words.each do |word, times|
|
122
|
+
measure.perform times, word.to_s do |word|
|
123
|
+
trie.scan(word).size
|
98
124
|
end
|
99
125
|
end
|
100
126
|
end
|
101
127
|
end
|
102
128
|
|
103
129
|
namespace :benchmark do
|
130
|
+
namespace :output do
|
131
|
+
desc 'Set task reporting output to stdout'
|
132
|
+
task :stdout do
|
133
|
+
self.benchmark_report = BenchmarkReport.new IO.new(1)
|
134
|
+
end
|
135
|
+
|
136
|
+
desc 'Set task reporting output to file'
|
137
|
+
task file: ['performance:directory'] do
|
138
|
+
path = path 'reports', Rambling::Trie::VERSION, 'benchmark'
|
139
|
+
file = File.open path, 'a+'
|
140
|
+
self.benchmark_report = BenchmarkReport.new file
|
141
|
+
end
|
142
|
+
|
143
|
+
desc 'Close output stream'
|
144
|
+
task :close do
|
145
|
+
benchmark_report.finish unless benchmark_report.nil?
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
104
149
|
desc 'Generate lookups performance benchmark report'
|
105
150
|
task :lookups do
|
106
151
|
generate_lookups_benchmark
|
@@ -111,50 +156,53 @@ namespace :performance do
|
|
111
156
|
generate_scans_benchmark
|
112
157
|
end
|
113
158
|
|
114
|
-
|
115
|
-
desc 'Generate performance benchmark report store results in reports/'
|
116
|
-
task save: ['performance:directory'] do
|
117
|
-
puts 'Generating performance benchmark report for lookups...'
|
118
|
-
generate_lookups_benchmark path('reports', Rambling::Trie::VERSION, 'benchmark')
|
119
|
-
puts "Benchmarks have been saved to reports/#{Rambling::Trie::VERSION}/benchmark"
|
120
|
-
end
|
121
|
-
end
|
122
|
-
|
159
|
+
desc 'Generate creation performance benchmark report'
|
123
160
|
task :creation do
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
end
|
161
|
+
measure = BenchmarkMeasurement.new output
|
162
|
+
measure.banner
|
163
|
+
|
164
|
+
output.puts '==> Creation'
|
165
|
+
output.puts '`Rambling::Trie.create`'
|
166
|
+
measure.perform 5 do
|
167
|
+
trie = Rambling::Trie.create dictionary
|
168
|
+
nil
|
133
169
|
end
|
134
170
|
end
|
135
171
|
|
172
|
+
desc 'Generate compression performance benchmark report'
|
136
173
|
task :compression do
|
137
|
-
|
138
|
-
|
139
|
-
measure.banner
|
174
|
+
measure = BenchmarkMeasurement.new output
|
175
|
+
measure.banner
|
140
176
|
|
141
|
-
|
142
|
-
|
177
|
+
output.puts '==> Compression'
|
178
|
+
output.puts '`compress!`'
|
143
179
|
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
180
|
+
tries = []
|
181
|
+
5.times { tries << Rambling::Trie.create(dictionary) }
|
182
|
+
|
183
|
+
measure.perform 5, tries do |trie|
|
184
|
+
trie.compress!
|
185
|
+
nil
|
148
186
|
end
|
149
187
|
end
|
150
188
|
|
189
|
+
desc 'Generate all performance benchmark reports'
|
151
190
|
task all: [
|
152
|
-
'
|
153
|
-
'
|
154
|
-
'
|
155
|
-
'
|
191
|
+
'creation',
|
192
|
+
'compression',
|
193
|
+
'lookups',
|
194
|
+
'scans',
|
156
195
|
]
|
157
196
|
|
197
|
+
namespace :all do
|
198
|
+
desc "Generate and store performance benchmark report in reports/#{Rambling::Trie::VERSION}"
|
199
|
+
task save: [
|
200
|
+
'output:file',
|
201
|
+
'all'
|
202
|
+
]
|
203
|
+
end
|
204
|
+
|
205
|
+
desc 'Compare ips for different implementations (changes over time)'
|
158
206
|
task :compare do
|
159
207
|
Benchmark.ips do |b|
|
160
208
|
hash = { 'thing' => 'gniht' }
|
@@ -170,3 +218,7 @@ namespace :performance do
|
|
170
218
|
end
|
171
219
|
end
|
172
220
|
end
|
221
|
+
|
222
|
+
current_tasks = Rake.application.top_level_tasks
|
223
|
+
current_tasks << 'performance:benchmark:output:close'
|
224
|
+
Rake.application.instance_variable_set :@top_level_tasks, current_tasks
|