rambling-trie 0.9.3 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +2 -0
  3. data/LICENSE +1 -1
  4. data/README.md +133 -26
  5. data/Rakefile +1 -2
  6. data/lib/rambling/trie.rb +53 -9
  7. data/lib/rambling/trie/comparable.rb +16 -0
  8. data/lib/rambling/trie/compressable.rb +14 -0
  9. data/lib/rambling/trie/compressed_node.rb +38 -14
  10. data/lib/rambling/trie/compressor.rb +14 -10
  11. data/lib/rambling/trie/configuration.rb +11 -0
  12. data/lib/rambling/trie/configuration/properties.rb +66 -0
  13. data/lib/rambling/trie/configuration/provider_collection.rb +101 -0
  14. data/lib/rambling/trie/container.rb +57 -17
  15. data/lib/rambling/trie/enumerable.rb +1 -1
  16. data/lib/rambling/trie/forwardable.rb +9 -4
  17. data/lib/rambling/trie/inspectable.rb +37 -0
  18. data/lib/rambling/trie/invalid_operation.rb +3 -2
  19. data/lib/rambling/trie/missing_node.rb +2 -1
  20. data/lib/rambling/trie/node.rb +40 -30
  21. data/lib/rambling/trie/raw_node.rb +29 -13
  22. data/lib/rambling/trie/readers.rb +11 -0
  23. data/lib/rambling/trie/readers/plain_text.rb +26 -0
  24. data/lib/rambling/trie/serializers.rb +11 -0
  25. data/lib/rambling/trie/serializers/file.rb +25 -0
  26. data/lib/rambling/trie/serializers/marshal.rb +38 -0
  27. data/lib/rambling/trie/serializers/yaml.rb +39 -0
  28. data/lib/rambling/trie/serializers/zip.rb +67 -0
  29. data/lib/rambling/trie/stringifyable.rb +20 -0
  30. data/lib/rambling/trie/version.rb +1 -1
  31. data/rambling-trie.gemspec +2 -2
  32. data/spec/integration/rambling/trie_spec.rb +45 -49
  33. data/spec/lib/rambling/trie/comparable_spec.rb +104 -0
  34. data/spec/lib/rambling/trie/compressed_node_spec.rb +44 -0
  35. data/spec/lib/rambling/trie/configuration/properties_spec.rb +49 -0
  36. data/spec/lib/rambling/trie/configuration/provider_collection_spec.rb +165 -0
  37. data/spec/lib/rambling/trie/container_spec.rb +127 -38
  38. data/spec/lib/rambling/trie/{inspector_spec.rb → inspectable_spec.rb} +7 -5
  39. data/spec/lib/rambling/trie/raw_node_spec.rb +22 -41
  40. data/spec/lib/rambling/trie/readers/plain_text_spec.rb +14 -0
  41. data/spec/lib/rambling/trie/serializers/file_spec.rb +11 -0
  42. data/spec/lib/rambling/trie/serializers/marshal_spec.rb +14 -0
  43. data/spec/lib/rambling/trie/serializers/yaml_spec.rb +14 -0
  44. data/spec/lib/rambling/trie/serializers/zip_spec.rb +30 -0
  45. data/spec/lib/rambling/trie/stringifyable_spec.rb +82 -0
  46. data/spec/lib/rambling/trie_spec.rb +120 -7
  47. data/spec/spec_helper.rb +7 -1
  48. data/spec/support/config.rb +5 -0
  49. data/spec/support/shared_examples/a_compressable_trie.rb +26 -0
  50. data/spec/support/shared_examples/a_serializable_trie.rb +26 -0
  51. data/spec/support/shared_examples/a_serializer.rb +29 -0
  52. data/spec/support/shared_examples/a_trie_data_structure.rb +29 -0
  53. data/spec/tmp/.gitkeep +0 -0
  54. metadata +51 -24
  55. data/lib/rambling/trie/compression.rb +0 -13
  56. data/lib/rambling/trie/inspector.rb +0 -11
  57. data/lib/rambling/trie/plain_text_reader.rb +0 -23
  58. data/lib/rambling/trie/tasks/gem.rb +0 -17
  59. data/lib/rambling/trie/tasks/helpers/path.rb +0 -17
  60. data/lib/rambling/trie/tasks/helpers/performance_report.rb +0 -17
  61. data/lib/rambling/trie/tasks/helpers/time.rb +0 -7
  62. data/lib/rambling/trie/tasks/performance.rb +0 -15
  63. data/lib/rambling/trie/tasks/performance/all.rb +0 -17
  64. data/lib/rambling/trie/tasks/performance/benchmark.rb +0 -201
  65. data/lib/rambling/trie/tasks/performance/directory.rb +0 -11
  66. data/lib/rambling/trie/tasks/performance/flamegraph.rb +0 -119
  67. data/lib/rambling/trie/tasks/performance/profile/call_tree.rb +0 -147
  68. data/lib/rambling/trie/tasks/performance/profile/memory.rb +0 -143
  69. data/spec/lib/rambling/trie/plain_text_reader_spec.rb +0 -18
@@ -1,11 +0,0 @@
1
- module Rambling
2
- module Trie
3
- # Provides pretty printing behavior for the Trie data structure.
4
- module Inspector
5
- # @return [String] a string representation of the current node.
6
- def inspect
7
- "#<#{self.class.name} letter: #{letter.inspect || 'nil'}, children: #{children_tree.keys}>"
8
- end
9
- end
10
- end
11
- end
@@ -1,23 +0,0 @@
1
- module Rambling
2
- module Trie
3
- # File reader for .txt files
4
- class PlainTextReader
5
- # Yields each word read from a .txt file
6
- # @param [String] filepath the full path of the file to load the words from.
7
- # @yield [String] Each line read from the file.
8
- def each_word filepath
9
- each_line(filepath) { |line| yield line.chomp! }
10
- end
11
-
12
- private
13
-
14
- def each_line filepath
15
- open(filepath) { |file| file.each_line { |line| yield line } }
16
- end
17
-
18
- def open filepath
19
- File.open(filepath) { |file| yield file }
20
- end
21
- end
22
- end
23
- end
@@ -1,17 +0,0 @@
1
- namespace :gem do
2
- desc 'Build the rambling-trie gem'
3
- task :build do
4
- system 'gem build rambling-trie.gemspec'
5
- end
6
-
7
- desc 'Push the latest version of the rambling-trie gem'
8
- task release: :build do
9
- system "gem push rambling-trie-#{Rambling::Trie::VERSION}.gem"
10
- end
11
-
12
- desc 'Output the current rambling-trie version'
13
- task :version do
14
- puts "rambling-trie #{Rambling::Trie::VERSION}"
15
- end
16
- end
17
-
@@ -1,17 +0,0 @@
1
- module Helpers
2
- module Path
3
- def path *filename
4
- Pathname.new(full_path *filename).cleanpath
5
- end
6
-
7
- def dictionary
8
- path 'assets', 'dictionaries', 'words_with_friends.txt'
9
- end
10
-
11
- private
12
-
13
- def full_path *filename
14
- full_path = File.join File.dirname(__FILE__), '..', '..', '..', '..', '..', *filename
15
- end
16
- end
17
- end
@@ -1,17 +0,0 @@
1
- class PerformanceReport
2
- attr_reader :output
3
-
4
- def initialize output = $stdout.dup
5
- @output = output
6
- end
7
-
8
- def start name
9
- output.puts
10
- output.puts "#{name} for rambling-trie version #{Rambling::Trie::VERSION}"
11
- output.puts
12
- end
13
-
14
- def finish
15
- output.close
16
- end
17
- end
@@ -1,7 +0,0 @@
1
- module Helpers
2
- module Time
3
- def time
4
- @time ||= ::Time.now.strftime '%Y-%m-%d %H.%M.%S.%L'
5
- end
6
- end
7
- end
@@ -1,15 +0,0 @@
1
- require 'fileutils'
2
- require 'benchmark'
3
- require 'ruby-prof'
4
- require 'memory_profiler'
5
- require 'benchmark/ips'
6
- require 'flamegraph'
7
- require_relative 'helpers/path'
8
- require_relative 'helpers/time'
9
- require_relative 'helpers/performance_report'
10
- require_relative 'performance/directory'
11
- require_relative 'performance/benchmark'
12
- require_relative 'performance/flamegraph'
13
- require_relative 'performance/profile/call_tree'
14
- require_relative 'performance/profile/memory'
15
- require_relative 'performance/all'
@@ -1,17 +0,0 @@
1
- namespace :performance do
2
- desc 'Generate all profiling and performance reports'
3
- task all: [
4
- 'benchmark:all',
5
- 'profile:call_tree:all',
6
- 'profile:memory:all',
7
- ]
8
-
9
- namespace :all do
10
- desc 'Generate and store all profiling and performance reports'
11
- task save: [
12
- 'benchmark:all:save',
13
- 'profile:call_tree:all',
14
- 'profile:memory:all',
15
- ]
16
- end
17
- end
@@ -1,201 +0,0 @@
1
- namespace :performance do
2
- include Helpers::Path
3
-
4
- class BenchmarkMeasurement
5
- def initialize output
6
- @output = output
7
- end
8
-
9
- def param_to_s param
10
- case param
11
- when Rambling::Trie::Container
12
- ''
13
- else
14
- param.to_s
15
- end
16
- end
17
-
18
- def perform times, params = nil
19
- params = Array params
20
- params << nil unless params.any?
21
-
22
- params.each do |param|
23
- output.print param_to_s(param).ljust 20
24
-
25
- measure times, param do |param|
26
- yield param
27
- end
28
- end
29
- end
30
-
31
- private
32
-
33
- attr_reader :output
34
-
35
- def measure times, param = nil
36
- result = nil
37
-
38
- measure = Benchmark.measure do
39
- times.times do
40
- result = yield param
41
- end
42
- end
43
-
44
- output.print "#{result}".ljust 10
45
- output.puts measure
46
- end
47
- end
48
-
49
- def performance_report= performance_report
50
- @performance_report = performance_report
51
- end
52
-
53
- def performance_report
54
- @performance_report ||= PerformanceReport.new
55
- end
56
-
57
- def output
58
- performance_report.output
59
- end
60
-
61
- def generate_lookups_benchmark filename = nil
62
- measure = BenchmarkMeasurement.new output
63
-
64
- trie = Rambling::Trie.create dictionary
65
- compressed_trie = Rambling::Trie.create(dictionary).compress!
66
- [ trie, compressed_trie ].each do |trie|
67
- output.puts "==> #{trie.compressed? ? 'Compressed' : 'Uncompressed'}"
68
- words = %w(hi help beautiful impressionism anthropological)
69
-
70
- output.puts '`word?`'
71
- measure.perform 200_000, words do |word|
72
- trie.word? word
73
- end
74
-
75
- output.puts '`partial_word?`'
76
- measure.perform 200_000, words do |word|
77
- trie.partial_word? word
78
- end
79
- end
80
- end
81
-
82
- def generate_scans_benchmark filename = nil
83
- measure = BenchmarkMeasurement.new output
84
-
85
- words = {
86
- hi: 1_000,
87
- help: 100_000,
88
- beautiful: 100_000,
89
- impressionism: 200_000,
90
- anthropological: 200_000,
91
- }
92
-
93
- trie = Rambling::Trie.create dictionary
94
- compressed_trie = Rambling::Trie.create(dictionary).compress!
95
-
96
- [ trie, compressed_trie ].each do |trie|
97
- output.puts "==> #{trie.compressed? ? 'Compressed' : 'Uncompressed'}"
98
- output.puts "`scan`"
99
- words.each do |word, times|
100
- measure.perform times, word.to_s do |word|
101
- trie.scan(word).size
102
- end
103
- end
104
- end
105
- end
106
-
107
- namespace :benchmark do
108
- namespace :output do
109
- desc 'Set task reporting output to file'
110
- task file: ['performance:directory'] do
111
- path = path 'reports', Rambling::Trie::VERSION, 'benchmark'
112
- file = File.open path, 'a+'
113
- self.performance_report = PerformanceReport.new file
114
- end
115
-
116
- desc 'Close output stream'
117
- task :close do
118
- performance_report.finish
119
- end
120
- end
121
-
122
- desc 'Output banner'
123
- task :banner do
124
- performance_report.start 'Benchmark'
125
- end
126
-
127
- desc 'Generate lookups performance benchmark report'
128
- task lookups: :banner do
129
- generate_lookups_benchmark
130
- end
131
-
132
- desc 'Generate scans performance benchmark report'
133
- task scans: :banner do
134
- generate_scans_benchmark
135
- end
136
-
137
- desc 'Generate creation performance benchmark report'
138
- task creation: :banner do
139
- measure = BenchmarkMeasurement.new output
140
-
141
- output.puts '==> Creation'
142
- output.puts '`Rambling::Trie.create`'
143
- measure.perform 5 do
144
- trie = Rambling::Trie.create dictionary
145
- nil
146
- end
147
- end
148
-
149
- desc 'Generate compression performance benchmark report'
150
- task compression: :banner do
151
- measure = BenchmarkMeasurement.new output
152
-
153
- output.puts '==> Compression'
154
- output.puts '`compress!`'
155
-
156
- tries = []
157
- 5.times { tries << Rambling::Trie.create(dictionary) }
158
-
159
- measure.perform 5, tries do |trie|
160
- trie.compress!
161
- nil
162
- end
163
- end
164
-
165
- desc 'Generate all performance benchmark reports'
166
- task all: [
167
- :creation,
168
- :compression,
169
- :lookups,
170
- :scans,
171
- ]
172
-
173
- namespace :all do
174
- desc "Generate and store performance benchmark report in reports/#{Rambling::Trie::VERSION}"
175
- task save: [
176
- 'output:file',
177
- :all
178
- ]
179
- end
180
-
181
- desc 'Compare ips for different implementations (changes over time)'
182
- task :compare do
183
- require 'benchmark/ips'
184
- Benchmark.ips do |b|
185
- hash = { 'thing' => 'gniht' }
186
-
187
- b.report 'has_key?' do
188
- hash.has_key? 'thing'
189
- end
190
-
191
- b.report '[]' do
192
- !!hash['thing']
193
- end
194
- end
195
- end
196
- end
197
- end
198
-
199
- current_tasks = Rake.application.top_level_tasks
200
- current_tasks << 'performance:benchmark:output:close'
201
- Rake.application.instance_variable_set :@top_level_tasks, current_tasks
@@ -1,11 +0,0 @@
1
- require 'fileutils'
2
- require_relative '../helpers/path'
3
-
4
- namespace :performance do
5
- include Helpers::Path
6
-
7
- desc 'Create report dir'
8
- task :directory do
9
- FileUtils.mkdir_p path('reports', Rambling::Trie::VERSION)
10
- end
11
- end
@@ -1,119 +0,0 @@
1
- namespace :performance do
2
- include Helpers::Path
3
- include Helpers::Time
4
-
5
- def performance_report
6
- @performance_report ||= PerformanceReport.new
7
- end
8
-
9
- def output
10
- performance_report.output
11
- end
12
-
13
- class FlamegraphProfile
14
- def initialize filename
15
- @filename = filename
16
- end
17
-
18
- def perform times, params = nil
19
- params = Array params
20
- params << nil unless params.any?
21
-
22
- dirname = path 'reports', Rambling::Trie::VERSION, 'flamegraph', time
23
- FileUtils.mkdir_p dirname
24
- path = File.join dirname, "#{filename}.html"
25
-
26
- result = Flamegraph.generate path do
27
- params.each do |param|
28
- times.times do
29
- yield param
30
- end
31
- end
32
- end
33
- end
34
-
35
- private
36
-
37
- attr_reader :filename
38
- end
39
-
40
- namespace :flamegraph do
41
- desc 'Output banner'
42
- task :banner do
43
- performance_report.start 'Flamegraph'
44
- end
45
-
46
- desc 'Generate flamegraph reports for creation'
47
- task creation: ['performance:directory', :banner] do
48
- output.puts 'Generating flamegraph reports for creation...'
49
-
50
- flamegraph = FlamegraphProfile.new 'new-trie'
51
- flamegraph.perform 1 do
52
- trie = Rambling::Trie.create dictionary
53
- end
54
- end
55
-
56
- desc 'Generate flamegraph reports for compression'
57
- task compression: ['performance:directory', :banner] do
58
- output.puts 'Generating flamegraph reports for compression...'
59
-
60
- tries = [ Rambling::Trie.create(dictionary) ]
61
-
62
- flamegraph = FlamegraphProfile.new 'compressed-trie'
63
- flamegraph.perform 1, tries do |trie|
64
- trie.compress!
65
- nil
66
- end
67
- end
68
-
69
- desc 'Generate flamegraph reports for lookups'
70
- task lookups: ['performance:directory', :banner] do
71
- output.puts 'Generating flamegraph reports for lookups...'
72
-
73
- words = %w(hi help beautiful impressionism anthropological)
74
-
75
- trie = Rambling::Trie.create dictionary
76
- compressed_trie = Rambling::Trie.create(dictionary).compress!
77
-
78
- [ trie, compressed_trie ].each do |trie|
79
- prefix = "#{trie.compressed? ? 'compressed' : 'uncompressed'}-trie"
80
-
81
- flamegraph = FlamegraphProfile.new "#{prefix}-word"
82
- flamegraph.perform 1, words do |word|
83
- trie.word? word
84
- end
85
-
86
- flamegraph = FlamegraphProfile.new "#{prefix}-partial-word"
87
- flamegraph.perform 1, words do |word|
88
- trie.partial_word? word
89
- end
90
- end
91
- end
92
-
93
- desc 'Generate flamegraph reports for scans'
94
- task scans: ['performance:directory', :banner] do
95
- output.puts 'Generating flamegraph reports for scans...'
96
-
97
- words = %w(hi help beautiful impressionism anthropological)
98
-
99
- trie = Rambling::Trie.create dictionary
100
- compressed_trie = Rambling::Trie.create(dictionary).compress!
101
-
102
- [ trie, compressed_trie ].each do |trie|
103
- prefix = "#{trie.compressed? ? 'compressed' : 'uncompressed'}-trie"
104
- flamegraph = FlamegraphProfile.new "#{prefix}-scan"
105
- flamegraph.perform 1, words do |word|
106
- trie.scan(word).size
107
- end
108
- end
109
- end
110
-
111
- desc 'Generate all flamegraph reports'
112
- task all: [
113
- :creation,
114
- :compression,
115
- :lookups,
116
- :scans,
117
- ]
118
- end
119
- end