rambling-trie 0.9.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rambling/trie/compressed_node.rb +30 -45
- data/lib/rambling/trie/compressor.rb +17 -19
- data/lib/rambling/trie/container.rb +2 -2
- data/lib/rambling/trie/node.rb +3 -3
- data/lib/rambling/trie/tasks/helpers/path.rb +4 -0
- data/lib/rambling/trie/tasks/performance/all.rb +15 -2
- data/lib/rambling/trie/tasks/performance/benchmark.rb +125 -73
- data/lib/rambling/trie/tasks/performance/profile/call_tree.rb +20 -14
- data/lib/rambling/trie/tasks/performance/profile/memory.rb +17 -16
- data/lib/rambling/trie/version.rb +1 -1
- data/reports/0.0.1/benchmark +15 -0
- data/reports/0.0.2/benchmark +15 -0
- data/reports/0.1.0/benchmark +15 -0
- data/reports/0.2.0/benchmark +29 -0
- data/reports/0.3.0/benchmark +29 -0
- data/reports/0.3.2/benchmark +29 -0
- data/reports/0.3.3/benchmark +29 -0
- data/reports/0.3.4/benchmark +28 -0
- data/reports/0.4.0/benchmark +28 -0
- data/reports/0.4.1/benchmark +29 -0
- data/reports/0.5.0/benchmark +28 -0
- data/reports/0.5.1/benchmark +28 -0
- data/reports/0.5.2/benchmark +28 -0
- data/reports/0.6.0/benchmark +29 -0
- data/reports/0.6.1/benchmark +28 -0
- data/reports/0.7.0/benchmark +28 -0
- data/reports/0.8.0/benchmark +54 -0
- data/reports/0.8.1/benchmark +54 -0
- data/reports/0.9.0/benchmark +54 -0
- data/reports/0.9.1/benchmark +58 -0
- data/spec/integration/rambling/trie_spec.rb +1 -1
- metadata +22 -3
- data/reports/performance +0 -498
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9b23cfe0d2b236a7970f4666f0df6a7cc4226d22
|
4
|
+
data.tar.gz: 5147da530a25b386fdafd9cc59ccb1c35b2a2efc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 71405a6dd24e710629d9e8cb8d307b2faa5664f029bcdedd749063d106e6706bce5fb07fb6ecbf776098668d0b5ab3826e3f3d6d9f75e96e516c2cbce07de2ab
|
7
|
+
data.tar.gz: 6d5a292f676c979dbf2e3b7cc25d8a932cf087c14325df539b2ccafd872083d9ca1140224eb4edbc93c4fb0e920bd7f75374ba0bbd260d911fdf057fdb5cdd0d
|
@@ -22,18 +22,15 @@ module Rambling
|
|
22
22
|
# @return [Boolean] `true` if the characters are found and form a word,
|
23
23
|
# `false` otherwise.
|
24
24
|
def word? chars
|
25
|
-
|
26
|
-
terminal?
|
27
|
-
else
|
28
|
-
has_word? chars
|
29
|
-
end
|
25
|
+
chars.empty? ? terminal? : has_word?(chars)
|
30
26
|
end
|
31
27
|
|
32
28
|
# Returns all words that start with the specified characters.
|
33
29
|
# @param [Array] chars the characters to look for in the trie.
|
34
30
|
# @return [Array] all the words contained in the trie that start with the specified characters.
|
35
31
|
def scan chars
|
36
|
-
closest_node(chars)
|
32
|
+
node = chars.empty? ? self : closest_node(chars)
|
33
|
+
node.to_a
|
37
34
|
end
|
38
35
|
|
39
36
|
# Always return `true` for a raw (compressed) node.
|
@@ -42,70 +39,58 @@ module Rambling
|
|
42
39
|
true
|
43
40
|
end
|
44
41
|
|
45
|
-
|
42
|
+
private
|
46
43
|
|
47
|
-
def
|
48
|
-
|
49
|
-
|
50
|
-
else
|
51
|
-
current_length = 0
|
52
|
-
current_key, current_key_string = current_key chars.slice!(0)
|
44
|
+
def has_partial_word? chars
|
45
|
+
recursive_get(:partial_word?, chars) || false
|
46
|
+
end
|
53
47
|
|
54
|
-
|
55
|
-
|
48
|
+
def has_word? chars
|
49
|
+
current_key = nil
|
56
50
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
51
|
+
while !chars.empty?
|
52
|
+
if current_key
|
53
|
+
current_key << chars.slice!(0)
|
54
|
+
else
|
55
|
+
current_key = chars.slice!(0)
|
56
|
+
end
|
61
57
|
|
62
|
-
|
58
|
+
child = children_tree[current_key.to_sym]
|
59
|
+
return child.word? chars if child
|
63
60
|
end
|
61
|
+
|
62
|
+
false
|
64
63
|
end
|
65
64
|
|
66
|
-
|
65
|
+
def closest_node chars
|
66
|
+
recursive_get(:scan, chars) || Rambling::Trie::MissingNode.new
|
67
|
+
end
|
67
68
|
|
68
|
-
def
|
69
|
+
def recursive_get method, chars
|
69
70
|
current_length = 0
|
70
|
-
current_key
|
71
|
+
current_key = current_key chars.slice!(0)
|
71
72
|
|
72
73
|
begin
|
73
74
|
current_length += 1
|
74
75
|
|
75
|
-
if
|
76
|
-
return children_tree[current_key].
|
76
|
+
if (current_key && current_key.length == current_length) || chars.empty?
|
77
|
+
return children_tree[current_key.to_sym].send method, chars
|
77
78
|
end
|
78
|
-
end while
|
79
|
-
|
80
|
-
false
|
81
|
-
end
|
82
|
-
|
83
|
-
def has_word? chars
|
84
|
-
current_key_string = ''
|
85
|
-
|
86
|
-
while !chars.empty?
|
87
|
-
current_key_string << chars.slice!(0)
|
88
|
-
current_key = current_key_string.to_sym
|
89
|
-
child = children_tree[current_key]
|
90
|
-
return child.word? chars if child
|
91
|
-
end
|
92
|
-
|
93
|
-
false
|
79
|
+
end while current_key && current_key[current_length] == chars.slice!(0)
|
94
80
|
end
|
95
81
|
|
96
82
|
def current_key letter
|
97
|
-
|
83
|
+
current_key = nil
|
98
84
|
|
99
85
|
children_tree.keys.each do |key|
|
100
86
|
key_string = key.to_s
|
101
87
|
if key_string.start_with? letter
|
102
|
-
current_key =
|
103
|
-
current_key_string = key_string
|
88
|
+
current_key = key_string
|
104
89
|
break
|
105
90
|
end
|
106
91
|
end
|
107
92
|
|
108
|
-
|
93
|
+
current_key
|
109
94
|
end
|
110
95
|
end
|
111
96
|
end
|
@@ -5,39 +5,37 @@ module Rambling
|
|
5
5
|
# Compresses a node from a Trie data structure.
|
6
6
|
# @param [RawNode] node the node to compress
|
7
7
|
# @return [CompressedNode] node the compressed version of the node
|
8
|
-
def compress node
|
8
|
+
def compress node
|
9
9
|
if node.compressable?
|
10
|
-
|
10
|
+
merge_with_child_and_compress node
|
11
11
|
else
|
12
|
-
copy_node_and_compress_children node
|
12
|
+
copy_node_and_compress_children node
|
13
13
|
end
|
14
14
|
end
|
15
15
|
|
16
16
|
private
|
17
17
|
|
18
|
-
def
|
19
|
-
|
18
|
+
def merge_with_child_and_compress node
|
19
|
+
child = node.children.first
|
20
20
|
|
21
|
-
new_node = Rambling::Trie::CompressedNode.new parent
|
22
|
-
new_node.letter = node.letter.to_s <<
|
23
|
-
new_node.terminal! if
|
24
|
-
new_node.children_tree =
|
21
|
+
new_node = Rambling::Trie::CompressedNode.new node.parent
|
22
|
+
new_node.letter = node.letter.to_s << child.letter.to_s
|
23
|
+
new_node.terminal! if child.terminal?
|
24
|
+
new_node.children_tree = child.children_tree
|
25
25
|
|
26
|
-
new_node
|
27
|
-
child.parent = new_node
|
28
|
-
end
|
29
|
-
|
30
|
-
new_node
|
26
|
+
compress new_node
|
31
27
|
end
|
32
28
|
|
33
|
-
def copy_node_and_compress_children node
|
34
|
-
new_node = Rambling::Trie::CompressedNode.new parent
|
35
|
-
|
29
|
+
def copy_node_and_compress_children node
|
30
|
+
new_node = Rambling::Trie::CompressedNode.new node.parent
|
36
31
|
new_node.letter = node.letter
|
37
32
|
new_node.terminal! if node.terminal?
|
38
33
|
|
39
|
-
node.children.
|
40
|
-
compress child
|
34
|
+
node.children.each do |child|
|
35
|
+
compressed_child = compress child
|
36
|
+
|
37
|
+
compressed_child.parent = new_node
|
38
|
+
new_node[compressed_child.letter] = compressed_child
|
41
39
|
end
|
42
40
|
|
43
41
|
new_node
|
@@ -49,14 +49,14 @@ module Rambling
|
|
49
49
|
# @param [String] word the word or partial word to look for in the trie.
|
50
50
|
# @return [Boolean] `true` if the word or partial word is found, `false` otherwise.
|
51
51
|
def partial_word? word = ''
|
52
|
-
root.partial_word? word.chars
|
52
|
+
root.partial_word? word.chars
|
53
53
|
end
|
54
54
|
|
55
55
|
# Checks if a whole word exists in the trie.
|
56
56
|
# @param [String] word the word to look for in the trie.
|
57
57
|
# @return [Boolean] `true` only if the word is found and the last character corresponds to a terminal node.
|
58
58
|
def word? word = ''
|
59
|
-
root.word? word.chars
|
59
|
+
root.word? word.chars
|
60
60
|
end
|
61
61
|
|
62
62
|
# Returns all words that start with the specified characters.
|
data/lib/rambling/trie/node.rb
CHANGED
@@ -1,4 +1,17 @@
|
|
1
1
|
namespace :performance do
|
2
|
-
desc 'Generate profiling and performance reports'
|
3
|
-
task all: [
|
2
|
+
desc 'Generate all profiling and performance reports'
|
3
|
+
task all: [
|
4
|
+
'benchmark:all',
|
5
|
+
'profile:call_tree:all',
|
6
|
+
'profile:memory:all',
|
7
|
+
]
|
8
|
+
|
9
|
+
namespace :all do
|
10
|
+
desc 'Generate and store all profiling and performance reports'
|
11
|
+
task save: [
|
12
|
+
'benchmark:all:save',
|
13
|
+
'profile:call_tree:all',
|
14
|
+
'profile:memory:all',
|
15
|
+
]
|
16
|
+
end
|
4
17
|
end
|
@@ -3,17 +3,38 @@ require_relative '../helpers/path'
|
|
3
3
|
namespace :performance do
|
4
4
|
include Helpers::Path
|
5
5
|
|
6
|
+
class BenchmarkReport
|
7
|
+
attr_reader :output
|
8
|
+
|
9
|
+
def initialize output
|
10
|
+
@output = output
|
11
|
+
end
|
12
|
+
|
13
|
+
def finish
|
14
|
+
output.close
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
6
18
|
class BenchmarkMeasurement
|
7
19
|
def initialize output
|
8
20
|
@output = output
|
9
21
|
end
|
10
22
|
|
23
|
+
def param_to_s param
|
24
|
+
case param
|
25
|
+
when Rambling::Trie::Container
|
26
|
+
''
|
27
|
+
else
|
28
|
+
param.to_s
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
11
32
|
def perform times, params = nil
|
12
33
|
params = Array params
|
13
34
|
params << nil unless params.any?
|
14
35
|
|
15
36
|
params.each do |param|
|
16
|
-
output.print
|
37
|
+
output.print param_to_s(param).ljust 20
|
17
38
|
|
18
39
|
measure times, param do |param|
|
19
40
|
yield param
|
@@ -43,64 +64,88 @@ namespace :performance do
|
|
43
64
|
end
|
44
65
|
end
|
45
66
|
|
46
|
-
def
|
47
|
-
|
67
|
+
def benchmark_report= benchmark_report
|
68
|
+
@benchmark_report = benchmark_report
|
69
|
+
end
|
48
70
|
|
49
|
-
|
71
|
+
def benchmark_report
|
72
|
+
Rake::Task['performance:benchmark:output:stdout'].invoke unless @benchmark_report
|
50
73
|
|
51
|
-
|
74
|
+
@benchmark_report
|
52
75
|
end
|
53
76
|
|
54
|
-
def
|
55
|
-
|
56
|
-
|
57
|
-
measure.banner
|
58
|
-
|
59
|
-
trie = Rambling::Trie.create path('assets', 'dictionaries', 'words_with_friends.txt')
|
60
|
-
[ trie, trie.clone.compress! ].each do |trie|
|
61
|
-
output.puts "==> #{trie.compressed? ? 'Compressed' : 'Uncompressed'}"
|
62
|
-
words = %w(hi help beautiful impressionism anthropological)
|
77
|
+
def output
|
78
|
+
benchmark_report.output
|
79
|
+
end
|
63
80
|
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
81
|
+
def generate_lookups_benchmark filename = nil
|
82
|
+
measure = BenchmarkMeasurement.new output
|
83
|
+
measure.banner
|
84
|
+
|
85
|
+
trie = Rambling::Trie.create dictionary
|
86
|
+
compressed_trie = Rambling::Trie.create(dictionary).compress!
|
87
|
+
[ trie, compressed_trie ].each do |trie|
|
88
|
+
output.puts "==> #{trie.compressed? ? 'Compressed' : 'Uncompressed'}"
|
89
|
+
words = %w(hi help beautiful impressionism anthropological)
|
90
|
+
|
91
|
+
output.puts '`word?`'
|
92
|
+
measure.perform 200_000, words do |word|
|
93
|
+
trie.word? word
|
94
|
+
end
|
68
95
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
end
|
96
|
+
output.puts '`partial_word?`'
|
97
|
+
measure.perform 200_000, words do |word|
|
98
|
+
trie.partial_word? word
|
73
99
|
end
|
74
100
|
end
|
75
101
|
end
|
76
102
|
|
77
103
|
def generate_scans_benchmark filename = nil
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
104
|
+
measure = BenchmarkMeasurement.new output
|
105
|
+
measure.banner
|
106
|
+
|
107
|
+
words = {
|
108
|
+
hi: 1_000,
|
109
|
+
help: 100_000,
|
110
|
+
beautiful: 100_000,
|
111
|
+
impressionism: 200_000,
|
112
|
+
anthropological: 200_000,
|
113
|
+
}
|
114
|
+
|
115
|
+
trie = Rambling::Trie.create dictionary
|
116
|
+
compressed_trie = Rambling::Trie.create(dictionary).compress!
|
117
|
+
|
118
|
+
[ trie, compressed_trie ].each do |trie|
|
119
|
+
output.puts "==> #{trie.compressed? ? 'Compressed' : 'Uncompressed'}"
|
120
|
+
output.puts "`scan`"
|
121
|
+
words.each do |word, times|
|
122
|
+
measure.perform times, word.to_s do |word|
|
123
|
+
trie.scan(word).size
|
98
124
|
end
|
99
125
|
end
|
100
126
|
end
|
101
127
|
end
|
102
128
|
|
103
129
|
namespace :benchmark do
|
130
|
+
namespace :output do
|
131
|
+
desc 'Set task reporting output to stdout'
|
132
|
+
task :stdout do
|
133
|
+
self.benchmark_report = BenchmarkReport.new IO.new(1)
|
134
|
+
end
|
135
|
+
|
136
|
+
desc 'Set task reporting output to file'
|
137
|
+
task file: ['performance:directory'] do
|
138
|
+
path = path 'reports', Rambling::Trie::VERSION, 'benchmark'
|
139
|
+
file = File.open path, 'a+'
|
140
|
+
self.benchmark_report = BenchmarkReport.new file
|
141
|
+
end
|
142
|
+
|
143
|
+
desc 'Close output stream'
|
144
|
+
task :close do
|
145
|
+
benchmark_report.finish unless benchmark_report.nil?
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
104
149
|
desc 'Generate lookups performance benchmark report'
|
105
150
|
task :lookups do
|
106
151
|
generate_lookups_benchmark
|
@@ -111,50 +156,53 @@ namespace :performance do
|
|
111
156
|
generate_scans_benchmark
|
112
157
|
end
|
113
158
|
|
114
|
-
|
115
|
-
desc 'Generate performance benchmark report store results in reports/'
|
116
|
-
task save: ['performance:directory'] do
|
117
|
-
puts 'Generating performance benchmark report for lookups...'
|
118
|
-
generate_lookups_benchmark path('reports', Rambling::Trie::VERSION, 'benchmark')
|
119
|
-
puts "Benchmarks have been saved to reports/#{Rambling::Trie::VERSION}/benchmark"
|
120
|
-
end
|
121
|
-
end
|
122
|
-
|
159
|
+
desc 'Generate creation performance benchmark report'
|
123
160
|
task :creation do
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
end
|
161
|
+
measure = BenchmarkMeasurement.new output
|
162
|
+
measure.banner
|
163
|
+
|
164
|
+
output.puts '==> Creation'
|
165
|
+
output.puts '`Rambling::Trie.create`'
|
166
|
+
measure.perform 5 do
|
167
|
+
trie = Rambling::Trie.create dictionary
|
168
|
+
nil
|
133
169
|
end
|
134
170
|
end
|
135
171
|
|
172
|
+
desc 'Generate compression performance benchmark report'
|
136
173
|
task :compression do
|
137
|
-
|
138
|
-
|
139
|
-
measure.banner
|
174
|
+
measure = BenchmarkMeasurement.new output
|
175
|
+
measure.banner
|
140
176
|
|
141
|
-
|
142
|
-
|
177
|
+
output.puts '==> Compression'
|
178
|
+
output.puts '`compress!`'
|
143
179
|
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
180
|
+
tries = []
|
181
|
+
5.times { tries << Rambling::Trie.create(dictionary) }
|
182
|
+
|
183
|
+
measure.perform 5, tries do |trie|
|
184
|
+
trie.compress!
|
185
|
+
nil
|
148
186
|
end
|
149
187
|
end
|
150
188
|
|
189
|
+
desc 'Generate all performance benchmark reports'
|
151
190
|
task all: [
|
152
|
-
'
|
153
|
-
'
|
154
|
-
'
|
155
|
-
'
|
191
|
+
'creation',
|
192
|
+
'compression',
|
193
|
+
'lookups',
|
194
|
+
'scans',
|
156
195
|
]
|
157
196
|
|
197
|
+
namespace :all do
|
198
|
+
desc "Generate and store performance benchmark report in reports/#{Rambling::Trie::VERSION}"
|
199
|
+
task save: [
|
200
|
+
'output:file',
|
201
|
+
'all'
|
202
|
+
]
|
203
|
+
end
|
204
|
+
|
205
|
+
desc 'Compare ips for different implementations (changes over time)'
|
158
206
|
task :compare do
|
159
207
|
Benchmark.ips do |b|
|
160
208
|
hash = { 'thing' => 'gniht' }
|
@@ -170,3 +218,7 @@ namespace :performance do
|
|
170
218
|
end
|
171
219
|
end
|
172
220
|
end
|
221
|
+
|
222
|
+
current_tasks = Rake.application.top_level_tasks
|
223
|
+
current_tasks << 'performance:benchmark:output:close'
|
224
|
+
Rake.application.instance_variable_set :@top_level_tasks, current_tasks
|