rambling-trie 0.9.3 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +2 -0
- data/LICENSE +1 -1
- data/README.md +133 -26
- data/Rakefile +1 -2
- data/lib/rambling/trie.rb +53 -9
- data/lib/rambling/trie/comparable.rb +16 -0
- data/lib/rambling/trie/compressable.rb +14 -0
- data/lib/rambling/trie/compressed_node.rb +38 -14
- data/lib/rambling/trie/compressor.rb +14 -10
- data/lib/rambling/trie/configuration.rb +11 -0
- data/lib/rambling/trie/configuration/properties.rb +66 -0
- data/lib/rambling/trie/configuration/provider_collection.rb +101 -0
- data/lib/rambling/trie/container.rb +57 -17
- data/lib/rambling/trie/enumerable.rb +1 -1
- data/lib/rambling/trie/forwardable.rb +9 -4
- data/lib/rambling/trie/inspectable.rb +37 -0
- data/lib/rambling/trie/invalid_operation.rb +3 -2
- data/lib/rambling/trie/missing_node.rb +2 -1
- data/lib/rambling/trie/node.rb +40 -30
- data/lib/rambling/trie/raw_node.rb +29 -13
- data/lib/rambling/trie/readers.rb +11 -0
- data/lib/rambling/trie/readers/plain_text.rb +26 -0
- data/lib/rambling/trie/serializers.rb +11 -0
- data/lib/rambling/trie/serializers/file.rb +25 -0
- data/lib/rambling/trie/serializers/marshal.rb +38 -0
- data/lib/rambling/trie/serializers/yaml.rb +39 -0
- data/lib/rambling/trie/serializers/zip.rb +67 -0
- data/lib/rambling/trie/stringifyable.rb +20 -0
- data/lib/rambling/trie/version.rb +1 -1
- data/rambling-trie.gemspec +2 -2
- data/spec/integration/rambling/trie_spec.rb +45 -49
- data/spec/lib/rambling/trie/comparable_spec.rb +104 -0
- data/spec/lib/rambling/trie/compressed_node_spec.rb +44 -0
- data/spec/lib/rambling/trie/configuration/properties_spec.rb +49 -0
- data/spec/lib/rambling/trie/configuration/provider_collection_spec.rb +165 -0
- data/spec/lib/rambling/trie/container_spec.rb +127 -38
- data/spec/lib/rambling/trie/{inspector_spec.rb → inspectable_spec.rb} +7 -5
- data/spec/lib/rambling/trie/raw_node_spec.rb +22 -41
- data/spec/lib/rambling/trie/readers/plain_text_spec.rb +14 -0
- data/spec/lib/rambling/trie/serializers/file_spec.rb +11 -0
- data/spec/lib/rambling/trie/serializers/marshal_spec.rb +14 -0
- data/spec/lib/rambling/trie/serializers/yaml_spec.rb +14 -0
- data/spec/lib/rambling/trie/serializers/zip_spec.rb +30 -0
- data/spec/lib/rambling/trie/stringifyable_spec.rb +82 -0
- data/spec/lib/rambling/trie_spec.rb +120 -7
- data/spec/spec_helper.rb +7 -1
- data/spec/support/config.rb +5 -0
- data/spec/support/shared_examples/a_compressable_trie.rb +26 -0
- data/spec/support/shared_examples/a_serializable_trie.rb +26 -0
- data/spec/support/shared_examples/a_serializer.rb +29 -0
- data/spec/support/shared_examples/a_trie_data_structure.rb +29 -0
- data/spec/tmp/.gitkeep +0 -0
- metadata +51 -24
- data/lib/rambling/trie/compression.rb +0 -13
- data/lib/rambling/trie/inspector.rb +0 -11
- data/lib/rambling/trie/plain_text_reader.rb +0 -23
- data/lib/rambling/trie/tasks/gem.rb +0 -17
- data/lib/rambling/trie/tasks/helpers/path.rb +0 -17
- data/lib/rambling/trie/tasks/helpers/performance_report.rb +0 -17
- data/lib/rambling/trie/tasks/helpers/time.rb +0 -7
- data/lib/rambling/trie/tasks/performance.rb +0 -15
- data/lib/rambling/trie/tasks/performance/all.rb +0 -17
- data/lib/rambling/trie/tasks/performance/benchmark.rb +0 -201
- data/lib/rambling/trie/tasks/performance/directory.rb +0 -11
- data/lib/rambling/trie/tasks/performance/flamegraph.rb +0 -119
- data/lib/rambling/trie/tasks/performance/profile/call_tree.rb +0 -147
- data/lib/rambling/trie/tasks/performance/profile/memory.rb +0 -143
- data/spec/lib/rambling/trie/plain_text_reader_spec.rb +0 -18
@@ -1,11 +0,0 @@
|
|
1
|
-
module Rambling
|
2
|
-
module Trie
|
3
|
-
# Provides pretty printing behavior for the Trie data structure.
|
4
|
-
module Inspector
|
5
|
-
# @return [String] a string representation of the current node.
|
6
|
-
def inspect
|
7
|
-
"#<#{self.class.name} letter: #{letter.inspect || 'nil'}, children: #{children_tree.keys}>"
|
8
|
-
end
|
9
|
-
end
|
10
|
-
end
|
11
|
-
end
|
@@ -1,23 +0,0 @@
|
|
1
|
-
module Rambling
|
2
|
-
module Trie
|
3
|
-
# File reader for .txt files
|
4
|
-
class PlainTextReader
|
5
|
-
# Yields each word read from a .txt file
|
6
|
-
# @param [String] filepath the full path of the file to load the words from.
|
7
|
-
# @yield [String] Each line read from the file.
|
8
|
-
def each_word filepath
|
9
|
-
each_line(filepath) { |line| yield line.chomp! }
|
10
|
-
end
|
11
|
-
|
12
|
-
private
|
13
|
-
|
14
|
-
def each_line filepath
|
15
|
-
open(filepath) { |file| file.each_line { |line| yield line } }
|
16
|
-
end
|
17
|
-
|
18
|
-
def open filepath
|
19
|
-
File.open(filepath) { |file| yield file }
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
@@ -1,17 +0,0 @@
|
|
1
|
-
namespace :gem do
|
2
|
-
desc 'Build the rambling-trie gem'
|
3
|
-
task :build do
|
4
|
-
system 'gem build rambling-trie.gemspec'
|
5
|
-
end
|
6
|
-
|
7
|
-
desc 'Push the latest version of the rambling-trie gem'
|
8
|
-
task release: :build do
|
9
|
-
system "gem push rambling-trie-#{Rambling::Trie::VERSION}.gem"
|
10
|
-
end
|
11
|
-
|
12
|
-
desc 'Output the current rambling-trie version'
|
13
|
-
task :version do
|
14
|
-
puts "rambling-trie #{Rambling::Trie::VERSION}"
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
@@ -1,17 +0,0 @@
|
|
1
|
-
module Helpers
|
2
|
-
module Path
|
3
|
-
def path *filename
|
4
|
-
Pathname.new(full_path *filename).cleanpath
|
5
|
-
end
|
6
|
-
|
7
|
-
def dictionary
|
8
|
-
path 'assets', 'dictionaries', 'words_with_friends.txt'
|
9
|
-
end
|
10
|
-
|
11
|
-
private
|
12
|
-
|
13
|
-
def full_path *filename
|
14
|
-
full_path = File.join File.dirname(__FILE__), '..', '..', '..', '..', '..', *filename
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|
@@ -1,17 +0,0 @@
|
|
1
|
-
class PerformanceReport
|
2
|
-
attr_reader :output
|
3
|
-
|
4
|
-
def initialize output = $stdout.dup
|
5
|
-
@output = output
|
6
|
-
end
|
7
|
-
|
8
|
-
def start name
|
9
|
-
output.puts
|
10
|
-
output.puts "#{name} for rambling-trie version #{Rambling::Trie::VERSION}"
|
11
|
-
output.puts
|
12
|
-
end
|
13
|
-
|
14
|
-
def finish
|
15
|
-
output.close
|
16
|
-
end
|
17
|
-
end
|
@@ -1,15 +0,0 @@
|
|
1
|
-
require 'fileutils'
|
2
|
-
require 'benchmark'
|
3
|
-
require 'ruby-prof'
|
4
|
-
require 'memory_profiler'
|
5
|
-
require 'benchmark/ips'
|
6
|
-
require 'flamegraph'
|
7
|
-
require_relative 'helpers/path'
|
8
|
-
require_relative 'helpers/time'
|
9
|
-
require_relative 'helpers/performance_report'
|
10
|
-
require_relative 'performance/directory'
|
11
|
-
require_relative 'performance/benchmark'
|
12
|
-
require_relative 'performance/flamegraph'
|
13
|
-
require_relative 'performance/profile/call_tree'
|
14
|
-
require_relative 'performance/profile/memory'
|
15
|
-
require_relative 'performance/all'
|
@@ -1,17 +0,0 @@
|
|
1
|
-
namespace :performance do
|
2
|
-
desc 'Generate all profiling and performance reports'
|
3
|
-
task all: [
|
4
|
-
'benchmark:all',
|
5
|
-
'profile:call_tree:all',
|
6
|
-
'profile:memory:all',
|
7
|
-
]
|
8
|
-
|
9
|
-
namespace :all do
|
10
|
-
desc 'Generate and store all profiling and performance reports'
|
11
|
-
task save: [
|
12
|
-
'benchmark:all:save',
|
13
|
-
'profile:call_tree:all',
|
14
|
-
'profile:memory:all',
|
15
|
-
]
|
16
|
-
end
|
17
|
-
end
|
@@ -1,201 +0,0 @@
|
|
1
|
-
namespace :performance do
|
2
|
-
include Helpers::Path
|
3
|
-
|
4
|
-
class BenchmarkMeasurement
|
5
|
-
def initialize output
|
6
|
-
@output = output
|
7
|
-
end
|
8
|
-
|
9
|
-
def param_to_s param
|
10
|
-
case param
|
11
|
-
when Rambling::Trie::Container
|
12
|
-
''
|
13
|
-
else
|
14
|
-
param.to_s
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
def perform times, params = nil
|
19
|
-
params = Array params
|
20
|
-
params << nil unless params.any?
|
21
|
-
|
22
|
-
params.each do |param|
|
23
|
-
output.print param_to_s(param).ljust 20
|
24
|
-
|
25
|
-
measure times, param do |param|
|
26
|
-
yield param
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
private
|
32
|
-
|
33
|
-
attr_reader :output
|
34
|
-
|
35
|
-
def measure times, param = nil
|
36
|
-
result = nil
|
37
|
-
|
38
|
-
measure = Benchmark.measure do
|
39
|
-
times.times do
|
40
|
-
result = yield param
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
output.print "#{result}".ljust 10
|
45
|
-
output.puts measure
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
def performance_report= performance_report
|
50
|
-
@performance_report = performance_report
|
51
|
-
end
|
52
|
-
|
53
|
-
def performance_report
|
54
|
-
@performance_report ||= PerformanceReport.new
|
55
|
-
end
|
56
|
-
|
57
|
-
def output
|
58
|
-
performance_report.output
|
59
|
-
end
|
60
|
-
|
61
|
-
def generate_lookups_benchmark filename = nil
|
62
|
-
measure = BenchmarkMeasurement.new output
|
63
|
-
|
64
|
-
trie = Rambling::Trie.create dictionary
|
65
|
-
compressed_trie = Rambling::Trie.create(dictionary).compress!
|
66
|
-
[ trie, compressed_trie ].each do |trie|
|
67
|
-
output.puts "==> #{trie.compressed? ? 'Compressed' : 'Uncompressed'}"
|
68
|
-
words = %w(hi help beautiful impressionism anthropological)
|
69
|
-
|
70
|
-
output.puts '`word?`'
|
71
|
-
measure.perform 200_000, words do |word|
|
72
|
-
trie.word? word
|
73
|
-
end
|
74
|
-
|
75
|
-
output.puts '`partial_word?`'
|
76
|
-
measure.perform 200_000, words do |word|
|
77
|
-
trie.partial_word? word
|
78
|
-
end
|
79
|
-
end
|
80
|
-
end
|
81
|
-
|
82
|
-
def generate_scans_benchmark filename = nil
|
83
|
-
measure = BenchmarkMeasurement.new output
|
84
|
-
|
85
|
-
words = {
|
86
|
-
hi: 1_000,
|
87
|
-
help: 100_000,
|
88
|
-
beautiful: 100_000,
|
89
|
-
impressionism: 200_000,
|
90
|
-
anthropological: 200_000,
|
91
|
-
}
|
92
|
-
|
93
|
-
trie = Rambling::Trie.create dictionary
|
94
|
-
compressed_trie = Rambling::Trie.create(dictionary).compress!
|
95
|
-
|
96
|
-
[ trie, compressed_trie ].each do |trie|
|
97
|
-
output.puts "==> #{trie.compressed? ? 'Compressed' : 'Uncompressed'}"
|
98
|
-
output.puts "`scan`"
|
99
|
-
words.each do |word, times|
|
100
|
-
measure.perform times, word.to_s do |word|
|
101
|
-
trie.scan(word).size
|
102
|
-
end
|
103
|
-
end
|
104
|
-
end
|
105
|
-
end
|
106
|
-
|
107
|
-
namespace :benchmark do
|
108
|
-
namespace :output do
|
109
|
-
desc 'Set task reporting output to file'
|
110
|
-
task file: ['performance:directory'] do
|
111
|
-
path = path 'reports', Rambling::Trie::VERSION, 'benchmark'
|
112
|
-
file = File.open path, 'a+'
|
113
|
-
self.performance_report = PerformanceReport.new file
|
114
|
-
end
|
115
|
-
|
116
|
-
desc 'Close output stream'
|
117
|
-
task :close do
|
118
|
-
performance_report.finish
|
119
|
-
end
|
120
|
-
end
|
121
|
-
|
122
|
-
desc 'Output banner'
|
123
|
-
task :banner do
|
124
|
-
performance_report.start 'Benchmark'
|
125
|
-
end
|
126
|
-
|
127
|
-
desc 'Generate lookups performance benchmark report'
|
128
|
-
task lookups: :banner do
|
129
|
-
generate_lookups_benchmark
|
130
|
-
end
|
131
|
-
|
132
|
-
desc 'Generate scans performance benchmark report'
|
133
|
-
task scans: :banner do
|
134
|
-
generate_scans_benchmark
|
135
|
-
end
|
136
|
-
|
137
|
-
desc 'Generate creation performance benchmark report'
|
138
|
-
task creation: :banner do
|
139
|
-
measure = BenchmarkMeasurement.new output
|
140
|
-
|
141
|
-
output.puts '==> Creation'
|
142
|
-
output.puts '`Rambling::Trie.create`'
|
143
|
-
measure.perform 5 do
|
144
|
-
trie = Rambling::Trie.create dictionary
|
145
|
-
nil
|
146
|
-
end
|
147
|
-
end
|
148
|
-
|
149
|
-
desc 'Generate compression performance benchmark report'
|
150
|
-
task compression: :banner do
|
151
|
-
measure = BenchmarkMeasurement.new output
|
152
|
-
|
153
|
-
output.puts '==> Compression'
|
154
|
-
output.puts '`compress!`'
|
155
|
-
|
156
|
-
tries = []
|
157
|
-
5.times { tries << Rambling::Trie.create(dictionary) }
|
158
|
-
|
159
|
-
measure.perform 5, tries do |trie|
|
160
|
-
trie.compress!
|
161
|
-
nil
|
162
|
-
end
|
163
|
-
end
|
164
|
-
|
165
|
-
desc 'Generate all performance benchmark reports'
|
166
|
-
task all: [
|
167
|
-
:creation,
|
168
|
-
:compression,
|
169
|
-
:lookups,
|
170
|
-
:scans,
|
171
|
-
]
|
172
|
-
|
173
|
-
namespace :all do
|
174
|
-
desc "Generate and store performance benchmark report in reports/#{Rambling::Trie::VERSION}"
|
175
|
-
task save: [
|
176
|
-
'output:file',
|
177
|
-
:all
|
178
|
-
]
|
179
|
-
end
|
180
|
-
|
181
|
-
desc 'Compare ips for different implementations (changes over time)'
|
182
|
-
task :compare do
|
183
|
-
require 'benchmark/ips'
|
184
|
-
Benchmark.ips do |b|
|
185
|
-
hash = { 'thing' => 'gniht' }
|
186
|
-
|
187
|
-
b.report 'has_key?' do
|
188
|
-
hash.has_key? 'thing'
|
189
|
-
end
|
190
|
-
|
191
|
-
b.report '[]' do
|
192
|
-
!!hash['thing']
|
193
|
-
end
|
194
|
-
end
|
195
|
-
end
|
196
|
-
end
|
197
|
-
end
|
198
|
-
|
199
|
-
current_tasks = Rake.application.top_level_tasks
|
200
|
-
current_tasks << 'performance:benchmark:output:close'
|
201
|
-
Rake.application.instance_variable_set :@top_level_tasks, current_tasks
|
@@ -1,119 +0,0 @@
|
|
1
|
-
namespace :performance do
|
2
|
-
include Helpers::Path
|
3
|
-
include Helpers::Time
|
4
|
-
|
5
|
-
def performance_report
|
6
|
-
@performance_report ||= PerformanceReport.new
|
7
|
-
end
|
8
|
-
|
9
|
-
def output
|
10
|
-
performance_report.output
|
11
|
-
end
|
12
|
-
|
13
|
-
class FlamegraphProfile
|
14
|
-
def initialize filename
|
15
|
-
@filename = filename
|
16
|
-
end
|
17
|
-
|
18
|
-
def perform times, params = nil
|
19
|
-
params = Array params
|
20
|
-
params << nil unless params.any?
|
21
|
-
|
22
|
-
dirname = path 'reports', Rambling::Trie::VERSION, 'flamegraph', time
|
23
|
-
FileUtils.mkdir_p dirname
|
24
|
-
path = File.join dirname, "#{filename}.html"
|
25
|
-
|
26
|
-
result = Flamegraph.generate path do
|
27
|
-
params.each do |param|
|
28
|
-
times.times do
|
29
|
-
yield param
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
private
|
36
|
-
|
37
|
-
attr_reader :filename
|
38
|
-
end
|
39
|
-
|
40
|
-
namespace :flamegraph do
|
41
|
-
desc 'Output banner'
|
42
|
-
task :banner do
|
43
|
-
performance_report.start 'Flamegraph'
|
44
|
-
end
|
45
|
-
|
46
|
-
desc 'Generate flamegraph reports for creation'
|
47
|
-
task creation: ['performance:directory', :banner] do
|
48
|
-
output.puts 'Generating flamegraph reports for creation...'
|
49
|
-
|
50
|
-
flamegraph = FlamegraphProfile.new 'new-trie'
|
51
|
-
flamegraph.perform 1 do
|
52
|
-
trie = Rambling::Trie.create dictionary
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
desc 'Generate flamegraph reports for compression'
|
57
|
-
task compression: ['performance:directory', :banner] do
|
58
|
-
output.puts 'Generating flamegraph reports for compression...'
|
59
|
-
|
60
|
-
tries = [ Rambling::Trie.create(dictionary) ]
|
61
|
-
|
62
|
-
flamegraph = FlamegraphProfile.new 'compressed-trie'
|
63
|
-
flamegraph.perform 1, tries do |trie|
|
64
|
-
trie.compress!
|
65
|
-
nil
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
|
-
desc 'Generate flamegraph reports for lookups'
|
70
|
-
task lookups: ['performance:directory', :banner] do
|
71
|
-
output.puts 'Generating flamegraph reports for lookups...'
|
72
|
-
|
73
|
-
words = %w(hi help beautiful impressionism anthropological)
|
74
|
-
|
75
|
-
trie = Rambling::Trie.create dictionary
|
76
|
-
compressed_trie = Rambling::Trie.create(dictionary).compress!
|
77
|
-
|
78
|
-
[ trie, compressed_trie ].each do |trie|
|
79
|
-
prefix = "#{trie.compressed? ? 'compressed' : 'uncompressed'}-trie"
|
80
|
-
|
81
|
-
flamegraph = FlamegraphProfile.new "#{prefix}-word"
|
82
|
-
flamegraph.perform 1, words do |word|
|
83
|
-
trie.word? word
|
84
|
-
end
|
85
|
-
|
86
|
-
flamegraph = FlamegraphProfile.new "#{prefix}-partial-word"
|
87
|
-
flamegraph.perform 1, words do |word|
|
88
|
-
trie.partial_word? word
|
89
|
-
end
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
desc 'Generate flamegraph reports for scans'
|
94
|
-
task scans: ['performance:directory', :banner] do
|
95
|
-
output.puts 'Generating flamegraph reports for scans...'
|
96
|
-
|
97
|
-
words = %w(hi help beautiful impressionism anthropological)
|
98
|
-
|
99
|
-
trie = Rambling::Trie.create dictionary
|
100
|
-
compressed_trie = Rambling::Trie.create(dictionary).compress!
|
101
|
-
|
102
|
-
[ trie, compressed_trie ].each do |trie|
|
103
|
-
prefix = "#{trie.compressed? ? 'compressed' : 'uncompressed'}-trie"
|
104
|
-
flamegraph = FlamegraphProfile.new "#{prefix}-scan"
|
105
|
-
flamegraph.perform 1, words do |word|
|
106
|
-
trie.scan(word).size
|
107
|
-
end
|
108
|
-
end
|
109
|
-
end
|
110
|
-
|
111
|
-
desc 'Generate all flamegraph reports'
|
112
|
-
task all: [
|
113
|
-
:creation,
|
114
|
-
:compression,
|
115
|
-
:lookups,
|
116
|
-
:scans,
|
117
|
-
]
|
118
|
-
end
|
119
|
-
end
|