rambling-trie 0.8.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +1 -1
  3. data/lib/rambling/trie.rb +21 -9
  4. data/lib/rambling/trie/compressed_node.rb +112 -0
  5. data/lib/rambling/trie/compression.rb +13 -0
  6. data/lib/rambling/trie/compressor.rb +30 -31
  7. data/lib/rambling/trie/{root.rb → container.rb} +41 -38
  8. data/lib/rambling/trie/enumerable.rb +11 -7
  9. data/lib/rambling/trie/missing_node.rb +1 -1
  10. data/lib/rambling/trie/node.rb +25 -22
  11. data/lib/rambling/trie/plain_text_reader.rb +1 -1
  12. data/lib/rambling/trie/raw_node.rb +90 -0
  13. data/lib/rambling/trie/tasks/helpers/path.rb +13 -0
  14. data/lib/rambling/trie/tasks/helpers/time.rb +7 -0
  15. data/lib/rambling/trie/tasks/performance.rb +10 -91
  16. data/lib/rambling/trie/tasks/performance/all.rb +4 -0
  17. data/lib/rambling/trie/tasks/performance/benchmark.rb +172 -0
  18. data/lib/rambling/trie/tasks/performance/directory.rb +11 -0
  19. data/lib/rambling/trie/tasks/performance/profile/call_tree.rb +132 -0
  20. data/lib/rambling/trie/tasks/performance/profile/memory.rb +116 -0
  21. data/lib/rambling/trie/version.rb +1 -1
  22. data/rambling-trie.gemspec +6 -4
  23. data/spec/integration/rambling/trie_spec.rb +63 -9
  24. data/spec/lib/rambling/trie/compressed_node_spec.rb +35 -0
  25. data/spec/lib/rambling/trie/compressor_spec.rb +31 -0
  26. data/spec/lib/rambling/trie/container_spec.rb +470 -0
  27. data/spec/lib/rambling/trie/enumerable_spec.rb +2 -2
  28. data/spec/lib/rambling/trie/inspector_spec.rb +21 -14
  29. data/spec/lib/rambling/trie/node_spec.rb +72 -209
  30. data/spec/lib/rambling/trie/raw_node_spec.rb +377 -0
  31. data/spec/lib/rambling/trie_spec.rb +46 -25
  32. metadata +57 -16
  33. data/lib/rambling/trie/branches.rb +0 -149
  34. data/spec/lib/rambling/trie/branches_spec.rb +0 -52
  35. data/spec/lib/rambling/trie/root_spec.rb +0 -376
@@ -6,7 +6,7 @@ module Rambling
6
6
  # @param [String] filepath the full path of the file to load the words from.
7
7
  # @yield [String] Each line read from the file.
8
8
  def each_word filepath
9
- each_line(filepath) { |line| yield line.chomp }
9
+ each_line(filepath) { |line| yield line.chomp! }
10
10
  end
11
11
 
12
12
  private
@@ -0,0 +1,90 @@
1
+ module Rambling
2
+ module Trie
3
+ # A representation of a node in an uncompressed Trie data structure.
4
+ class RawNode < Rambling::Trie::Node
5
+ # Adds a branch to the current trie node based on the word
6
+ # @param [String] word the word to add the branch from.
7
+ # @return [Rambling::Trie::RawNode] the just added branch's root node.
8
+ # @note This method clears the contents of the word variable.
9
+ def add word
10
+ if word.empty?
11
+ terminal!
12
+ else
13
+ add_to_children_tree word
14
+ end
15
+ end
16
+
17
+ # Checks if a path for set of characters exists in the trie.
18
+ # @param [Array] chars the characters to look for in the trie.
19
+ # @return [Boolean] `true` if the characters are found, `false` otherwise.
20
+ def partial_word? chars = []
21
+ if chars.empty?
22
+ true
23
+ else
24
+ letter = chars.slice!(0).to_sym
25
+ child = children_tree[letter]
26
+ !!child && child.partial_word?(chars)
27
+ end
28
+ end
29
+
30
+ # Checks if a path for set of characters represents a word in the trie.
31
+ # @param [Array] chars the characters to look for in the trie.
32
+ # @return [Boolean] `true` if the characters are found and form a word,
33
+ # `false` otherwise.
34
+ def word? chars = []
35
+ if chars.empty?
36
+ terminal?
37
+ else
38
+ letter = chars.slice!(0).to_sym
39
+ child = children_tree[letter]
40
+ !!child && child.word?(chars)
41
+ end
42
+ end
43
+
44
+ # Returns all words that start with the specified characters.
45
+ # @param [Array] chars the characters to look for in the trie.
46
+ # @return [Array] all the words contained in the trie that start with the specified characters.
47
+ def scan chars
48
+ closest_node chars
49
+ end
50
+
51
+ # Always return `false` for a raw (uncompressed) node.
52
+ # @return [Boolean] always false for a raw (uncompressed) node.
53
+ def compressed?
54
+ false
55
+ end
56
+
57
+ protected
58
+
59
+ def closest_node chars
60
+ if chars.empty?
61
+ self
62
+ else
63
+ letter = chars.slice!(0).to_sym
64
+ child = children_tree[letter]
65
+
66
+ if child
67
+ child.closest_node chars
68
+ else
69
+ Rambling::Trie::MissingNode.new
70
+ end
71
+ end
72
+ end
73
+
74
+ private
75
+
76
+ def add_to_children_tree word
77
+ letter = word.slice!(0).to_sym
78
+ child = children_tree[letter] || new_node(letter)
79
+ child.add word
80
+ child
81
+ end
82
+
83
+ def new_node letter
84
+ node = Rambling::Trie::RawNode.new self
85
+ node.letter = letter
86
+ children_tree[letter] = node
87
+ end
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,13 @@
1
+ module Helpers
2
+ module Path
3
+ def path *filename
4
+ Pathname.new(full_path *filename).cleanpath
5
+ end
6
+
7
+ private
8
+
9
+ def full_path *filename
10
+ full_path = File.join File.dirname(__FILE__), '..', '..', '..', '..', '..', *filename
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,7 @@
1
+ module Helpers
2
+ module Time
3
+ def time
4
+ @time ||= ::Time.now.strftime '%Y-%m-%d %H.%M.%S.%L'
5
+ end
6
+ end
7
+ end
@@ -1,92 +1,11 @@
1
+ require 'fileutils'
1
2
  require 'benchmark'
2
-
3
- namespace :performance do
4
- def report name, trie, output
5
- words = %w(hi help beautiful impressionism anthropological)
6
- methods = [:word?, :partial_word?]
7
-
8
- output.puts "==> #{name}"
9
- methods.each do |method|
10
- output.puts "`#{method}`:"
11
- words.each do |word|
12
- output.print "#{word} - #{trie.send method, word}".ljust 30
13
- output.puts Benchmark.measure { 200_000.times { trie.send method, word }}
14
- end
15
- end
16
- end
17
-
18
- def generate_report filename = nil
19
- output = filename.nil? ? $stdout : File.open(filename, 'a+')
20
-
21
- output.puts "\nReport for rambling-trie version #{Rambling::Trie::VERSION}"
22
-
23
- trie = nil
24
- measure = Benchmark.measure { trie = Rambling::Trie.create path('assets', 'dictionaries', 'words_with_friends.txt') }
25
-
26
- if ENV['profile_creation']
27
- output.puts '==> Creation'
28
- output.print 'Rambling::Trie.create'.ljust 30
29
- output.puts measure
30
- end
31
-
32
- report 'Uncompressed', trie, output
33
-
34
- return unless trie.respond_to? :compress!
35
-
36
- trie.compress!
37
- report 'Compressed', trie, output
38
-
39
- output.close
40
- end
41
-
42
- def path *filename
43
- File.join File.dirname(__FILE__), '..', '..', '..', '..', *filename
44
- end
45
-
46
- desc 'Generate performance report'
47
- task :report do
48
- puts 'Generating performance report...'
49
- generate_report
50
- end
51
-
52
- namespace :report do
53
- desc 'Generate performance report and append result to reports/performance'
54
- task :save do
55
- puts 'Generating performance report...'
56
- generate_report path('reports', 'performance')
57
- puts 'Report has been saved to reports/performance'
58
- end
59
- end
60
-
61
- desc 'Generate application profiling reports'
62
- task :profile do
63
- require 'ruby-prof'
64
-
65
- puts 'Generating profiling reports...'
66
-
67
- rambling_trie = Rambling::Trie.create path('assets', 'dictionaries', 'words_with_friends.txt')
68
- words = %w(hi help beautiful impressionism anthropological)
69
- methods = [:word?, :partial_word?]
70
- tries = [lambda {rambling_trie.clone}, lambda {rambling_trie.clone.compress!}]
71
-
72
- methods.each do |method|
73
- tries.each do |trie_generator|
74
- trie = trie_generator.call
75
- result = RubyProf.profile do
76
- words.each do |word|
77
- 200_000.times { trie.send method, word }
78
- end
79
- end
80
-
81
- File.open path('reports', "profile-#{trie.compressed? ? 'compressed' : 'uncompressed'}-#{method.to_s.sub(/\?/, '')}-#{Time.now.to_i}"), 'w' do |file|
82
- RubyProf::CallTreePrinter.new(result).print file
83
- end
84
- end
85
- end
86
-
87
- puts 'Done'
88
- end
89
-
90
- desc 'Generate profiling and performance reports'
91
- task all: [:profile, :report]
92
- end
3
+ require 'ruby-prof'
4
+ require 'memory_profiler'
5
+ require 'benchmark/ips'
6
+ require_relative 'helpers/path'
7
+ require_relative 'performance/directory'
8
+ require_relative 'performance/benchmark'
9
+ require_relative 'performance/profile/call_tree'
10
+ require_relative 'performance/profile/memory'
11
+ require_relative 'performance/all'
@@ -0,0 +1,4 @@
1
+ namespace :performance do
2
+ desc 'Generate profiling and performance reports'
3
+ task all: ['profile:call_tree', :report]
4
+ end
@@ -0,0 +1,172 @@
1
+ require_relative '../helpers/path'
2
+
3
+ namespace :performance do
4
+ include Helpers::Path
5
+
6
+ class BenchmarkMeasurement
7
+ def initialize output
8
+ @output = output
9
+ end
10
+
11
+ def perform times, params = nil
12
+ params = Array params
13
+ params << nil unless params.any?
14
+
15
+ params.each do |param|
16
+ output.print "#{param}".ljust 20
17
+
18
+ measure times, param do |param|
19
+ yield param
20
+ end
21
+ end
22
+ end
23
+
24
+ def banner
25
+ output.puts "\nBenchmark for rambling-trie version #{Rambling::Trie::VERSION}"
26
+ end
27
+
28
+ private
29
+
30
+ attr_reader :output
31
+
32
+ def measure times, param = nil
33
+ result = nil
34
+
35
+ measure = Benchmark.measure do
36
+ times.times do
37
+ result = yield param
38
+ end
39
+ end
40
+
41
+ output.print "#{result}".ljust 10
42
+ output.puts measure
43
+ end
44
+ end
45
+
46
+ def with_file filename = nil
47
+ output = filename.nil? ? IO.new(1) : File.open(filename, 'a+')
48
+
49
+ yield output
50
+
51
+ output.close
52
+ end
53
+
54
+ def generate_lookups_benchmark filename = nil
55
+ with_file filename do |output|
56
+ measure = BenchmarkMeasurement.new output
57
+ measure.banner
58
+
59
+ trie = Rambling::Trie.create path('assets', 'dictionaries', 'words_with_friends.txt')
60
+ [ trie, trie.clone.compress! ].each do |trie|
61
+ output.puts "==> #{trie.compressed? ? 'Compressed' : 'Uncompressed'}"
62
+ words = %w(hi help beautiful impressionism anthropological)
63
+
64
+ output.puts '`word?`'
65
+ measure.perform 200_000, words do |word|
66
+ trie.word? word
67
+ end
68
+
69
+ output.puts '`partial_word?`'
70
+ measure.perform 200_000, words do |word|
71
+ trie.partial_word? word
72
+ end
73
+ end
74
+ end
75
+ end
76
+
77
+ def generate_scans_benchmark filename = nil
78
+ with_file filename do |output|
79
+ measure = BenchmarkMeasurement.new output
80
+ measure.banner
81
+
82
+ words = {
83
+ hi: 1_000,
84
+ help: 100_000,
85
+ beautiful: 100_000,
86
+ impressionism: 200_000,
87
+ anthropological: 200_000,
88
+ }
89
+ trie = Rambling::Trie.create path('assets', 'dictionaries', 'words_with_friends.txt')
90
+
91
+ [ trie, trie.clone.compress! ].each do |trie|
92
+ output.puts "==> #{trie.compressed? ? 'Compressed' : 'Uncompressed'}"
93
+ output.puts "`scan`"
94
+ words.each do |word, times|
95
+ measure.perform times, word.to_s do |word|
96
+ trie.scan(word).size
97
+ end
98
+ end
99
+ end
100
+ end
101
+ end
102
+
103
+ namespace :benchmark do
104
+ desc 'Generate lookups performance benchmark report'
105
+ task :lookups do
106
+ generate_lookups_benchmark
107
+ end
108
+
109
+ desc 'Generate scans performance benchmark report'
110
+ task :scans do
111
+ generate_scans_benchmark
112
+ end
113
+
114
+ namespace :lookups do
115
+ desc 'Generate performance benchmark report store results in reports/'
116
+ task save: ['performance:directory'] do
117
+ puts 'Generating performance benchmark report for lookups...'
118
+ generate_lookups_benchmark path('reports', Rambling::Trie::VERSION, 'benchmark')
119
+ puts "Benchmarks have been saved to reports/#{Rambling::Trie::VERSION}/benchmark"
120
+ end
121
+ end
122
+
123
+ task :creation do
124
+ with_file do |output|
125
+ measure = BenchmarkMeasurement.new output
126
+ measure.banner
127
+
128
+ output.puts '==> Creation'
129
+ output.puts '`Rambling::Trie.create`'
130
+ measure.perform 5 do
131
+ trie = Rambling::Trie.create path('assets', 'dictionaries', 'words_with_friends.txt')
132
+ end
133
+ end
134
+ end
135
+
136
+ task :compression do
137
+ with_file do |output|
138
+ measure = BenchmarkMeasurement.new output
139
+ measure.banner
140
+
141
+ output.puts '==> Compression'
142
+ output.puts '`compress!`'
143
+
144
+ trie = Rambling::Trie.create path('assets', 'dictionaries', 'words_with_friends.txt')
145
+ measure.perform 5 do
146
+ trie.clone.compress!
147
+ end
148
+ end
149
+ end
150
+
151
+ task all: [
152
+ 'performance:benchmark:creation',
153
+ 'performance:benchmark:compression',
154
+ 'performance:benchmark:lookups',
155
+ 'performance:benchmark:scans',
156
+ ]
157
+
158
+ task :compare do
159
+ Benchmark.ips do |b|
160
+ hash = { 'thing' => 'gniht' }
161
+
162
+ b.report 'has_key?' do
163
+ hash.has_key? 'thing'
164
+ end
165
+
166
+ b.report '[]' do
167
+ !!hash['thing']
168
+ end
169
+ end
170
+ end
171
+ end
172
+ end
@@ -0,0 +1,11 @@
1
+ require 'fileutils'
2
+ require_relative '../helpers/path'
3
+
4
+ namespace :performance do
5
+ include Helpers::Path
6
+
7
+ desc 'Create report dir'
8
+ task :directory do
9
+ FileUtils.mkdir_p path('reports', Rambling::Trie::VERSION)
10
+ end
11
+ end
@@ -0,0 +1,132 @@
1
+ require_relative '../../helpers/path'
2
+ require_relative '../../helpers/time'
3
+
4
+ namespace :performance do
5
+ namespace :profile do
6
+ include Helpers::Path
7
+ include Helpers::Time
8
+
9
+ def profile times, params, path
10
+ params = Array params
11
+ params << nil unless params.any?
12
+
13
+ result = RubyProf.profile merge_fibers: true do
14
+ params.each do |param|
15
+ times.times do
16
+ yield param
17
+ end
18
+ end
19
+ end
20
+
21
+ printer = RubyProf::CallTreePrinter.new result
22
+ printer.print path: path
23
+ end
24
+
25
+ def generate_lookups_call_tree
26
+ puts 'Generating call tree profiling reports for lookups...'
27
+
28
+ puts "\nCall Tree profile for rambling-trie version #{Rambling::Trie::VERSION}"
29
+ trie = Rambling::Trie.create path('assets', 'dictionaries', 'words_with_friends.txt')
30
+ tries = [ trie, trie.clone.compress! ]
31
+
32
+ words = %w(hi help beautiful impressionism anthropological)
33
+
34
+ tries.each do |trie|
35
+ filename = "profile-#{trie.compressed? ? 'compressed' : 'uncompressed'}-word"
36
+ path = path 'reports', Rambling::Trie::VERSION, 'call-tree', time, filename
37
+ FileUtils.mkdir_p path
38
+
39
+ profile 200_000, words, path do
40
+ trie.word? word
41
+ end
42
+
43
+ filename = "profile-#{trie.compressed? ? 'compressed' : 'uncompressed'}-partial-word"
44
+ path = path 'reports', Rambling::Trie::VERSION, 'call-tree', time, filename
45
+ FileUtils.mkdir_p path
46
+
47
+ profile 200_000, words, path do
48
+ trie.partial_word? word
49
+ end
50
+ end
51
+
52
+ puts 'Done'
53
+ end
54
+
55
+ def generate_scans_call_tree
56
+ puts 'Generating call tree profiling reports for scans...'
57
+
58
+ puts "\nCall Tree profile for rambling-trie version #{Rambling::Trie::VERSION}"
59
+ trie = Rambling::Trie.create path('assets', 'dictionaries', 'words_with_friends.txt')
60
+ tries = [ trie, trie.clone.compress! ]
61
+
62
+ words = {
63
+ hi: 1_000,
64
+ help: 100_000,
65
+ beautiful: 100_000,
66
+ impressionism: 200_000,
67
+ anthropological: 200_000,
68
+ }
69
+
70
+ tries.each do |trie|
71
+ filename = "profile-#{trie.compressed? ? 'compressed' : 'uncompressed'}-scan"
72
+ path = path 'reports', Rambling::Trie::VERSION, 'call-tree', time, filename
73
+ FileUtils.mkdir_p path
74
+
75
+ words.each do |word, times|
76
+ profile times, word.to_s, path do |word|
77
+ trie.scan(word).size
78
+ end
79
+ end
80
+ end
81
+
82
+ puts 'Done'
83
+ end
84
+
85
+ namespace :call_tree do
86
+ desc 'Generate call tree profiling reports for creation'
87
+ task creation: ['performance:directory'] do
88
+ puts 'Generating call tree profiling reports for creation...'
89
+ puts "\nCall Tree profile for rambling-trie version #{Rambling::Trie::VERSION}"
90
+ filename = "profile-new-trie"
91
+ path = path 'reports', Rambling::Trie::VERSION, 'call-tree', time, filename
92
+ FileUtils.mkdir_p path
93
+
94
+ profile 5, nil, path do
95
+ trie = Rambling::Trie.create path('assets', 'dictionaries', 'words_with_friends.txt')
96
+ end
97
+ end
98
+
99
+ desc 'Generate call tree profiling reports for compression'
100
+ task compression: ['performance:directory'] do
101
+ puts 'Generating call tree profiling reports for compression...'
102
+ puts "\nCall Tree profile for rambling-trie version #{Rambling::Trie::VERSION}"
103
+
104
+ filename = "profile-compressed-trie"
105
+ path = path 'reports', Rambling::Trie::VERSION, 'call-tree', time, filename
106
+ FileUtils.mkdir_p path
107
+
108
+ trie = Rambling::Trie.create path('assets', 'dictionaries', 'words_with_friends.txt')
109
+ profile 5, nil, path do
110
+ trie.clone.compress!
111
+ end
112
+ end
113
+
114
+ desc 'Generate call tree profiling reports for lookups'
115
+ task lookups: ['performance:directory'] do
116
+ generate_lookups_call_tree
117
+ end
118
+
119
+ desc 'Generate call tree profiling reports for scans'
120
+ task scans: ['performance:directory'] do
121
+ generate_scans_call_tree
122
+ end
123
+
124
+ task all: [
125
+ 'performance:profile:call_tree:creation',
126
+ 'performance:profile:call_tree:compression',
127
+ 'performance:profile:call_tree:lookups',
128
+ 'performance:profile:call_tree:scans',
129
+ ]
130
+ end
131
+ end
132
+ end