rambling-trie 0.9.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3bb17c6b1df5c473eb696bc484ff0e3b46517867
4
- data.tar.gz: 0b6daafa67a20409389b80aaf9ac5d1bc6694cd2
3
+ metadata.gz: 9b23cfe0d2b236a7970f4666f0df6a7cc4226d22
4
+ data.tar.gz: 5147da530a25b386fdafd9cc59ccb1c35b2a2efc
5
5
  SHA512:
6
- metadata.gz: ecaecb91b920793208c878134881f08a7718d9f9771857a49cb15f8059e35273e29f81ce21a497de37566cfae8273a13ff611b43f0d049694b0207c65efa35bd
7
- data.tar.gz: 810db517696450d411d75ad59df68e410e0c1109ad1133bc7aa95363609520f8db51031a72e1b3392475524f8d10b28ca1e827bd5dbb9c3fba0aaa2417eeef41
6
+ metadata.gz: 71405a6dd24e710629d9e8cb8d307b2faa5664f029bcdedd749063d106e6706bce5fb07fb6ecbf776098668d0b5ab3826e3f3d6d9f75e96e516c2cbce07de2ab
7
+ data.tar.gz: 6d5a292f676c979dbf2e3b7cc25d8a932cf087c14325df539b2ccafd872083d9ca1140224eb4edbc93c4fb0e920bd7f75374ba0bbd260d911fdf057fdb5cdd0d
@@ -22,18 +22,15 @@ module Rambling
22
22
  # @return [Boolean] `true` if the characters are found and form a word,
23
23
  # `false` otherwise.
24
24
  def word? chars
25
- if chars.empty?
26
- terminal?
27
- else
28
- has_word? chars
29
- end
25
+ chars.empty? ? terminal? : has_word?(chars)
30
26
  end
31
27
 
32
28
  # Returns all words that start with the specified characters.
33
29
  # @param [Array] chars the characters to look for in the trie.
34
30
  # @return [Array] all the words contained in the trie that start with the specified characters.
35
31
  def scan chars
36
- closest_node(chars).to_a
32
+ node = chars.empty? ? self : closest_node(chars)
33
+ node.to_a
37
34
  end
38
35
 
39
36
  # Always return `true` for a raw (compressed) node.
@@ -42,70 +39,58 @@ module Rambling
42
39
  true
43
40
  end
44
41
 
45
- protected
42
+ private
46
43
 
47
- def closest_node chars
48
- if chars.empty?
49
- self
50
- else
51
- current_length = 0
52
- current_key, current_key_string = current_key chars.slice!(0)
44
+ def has_partial_word? chars
45
+ recursive_get(:partial_word?, chars) || false
46
+ end
53
47
 
54
- begin
55
- current_length += 1
48
+ def has_word? chars
49
+ current_key = nil
56
50
 
57
- if current_key_string.length == current_length || chars.empty?
58
- return children_tree[current_key].closest_node chars
59
- end
60
- end while current_key_string[current_length] == chars.slice!(0)
51
+ while !chars.empty?
52
+ if current_key
53
+ current_key << chars.slice!(0)
54
+ else
55
+ current_key = chars.slice!(0)
56
+ end
61
57
 
62
- Rambling::Trie::MissingNode.new
58
+ child = children_tree[current_key.to_sym]
59
+ return child.word? chars if child
63
60
  end
61
+
62
+ false
64
63
  end
65
64
 
66
- private
65
+ def closest_node chars
66
+ recursive_get(:scan, chars) || Rambling::Trie::MissingNode.new
67
+ end
67
68
 
68
- def has_partial_word? chars
69
+ def recursive_get method, chars
69
70
  current_length = 0
70
- current_key, current_key_string = current_key chars.slice!(0)
71
+ current_key = current_key chars.slice!(0)
71
72
 
72
73
  begin
73
74
  current_length += 1
74
75
 
75
- if current_key_string.length == current_length || chars.empty?
76
- return children_tree[current_key].partial_word? chars
76
+ if (current_key && current_key.length == current_length) || chars.empty?
77
+ return children_tree[current_key.to_sym].send method, chars
77
78
  end
78
- end while current_key_string[current_length] == chars.slice!(0)
79
-
80
- false
81
- end
82
-
83
- def has_word? chars
84
- current_key_string = ''
85
-
86
- while !chars.empty?
87
- current_key_string << chars.slice!(0)
88
- current_key = current_key_string.to_sym
89
- child = children_tree[current_key]
90
- return child.word? chars if child
91
- end
92
-
93
- false
79
+ end while current_key && current_key[current_length] == chars.slice!(0)
94
80
  end
95
81
 
96
82
  def current_key letter
97
- current_key_string = current_key = ''
83
+ current_key = nil
98
84
 
99
85
  children_tree.keys.each do |key|
100
86
  key_string = key.to_s
101
87
  if key_string.start_with? letter
102
- current_key = key
103
- current_key_string = key_string
88
+ current_key = key_string
104
89
  break
105
90
  end
106
91
  end
107
92
 
108
- [current_key, current_key_string]
93
+ current_key
109
94
  end
110
95
  end
111
96
  end
@@ -5,39 +5,37 @@ module Rambling
5
5
  # Compresses a node from a Trie data structure.
6
6
  # @param [RawNode] node the node to compress
7
7
  # @return [CompressedNode] node the compressed version of the node
8
- def compress node, parent = nil
8
+ def compress node
9
9
  if node.compressable?
10
- merge_node_with_compressed_child node, parent
10
+ merge_with_child_and_compress node
11
11
  else
12
- copy_node_and_compress_children node, parent
12
+ copy_node_and_compress_children node
13
13
  end
14
14
  end
15
15
 
16
16
  private
17
17
 
18
- def merge_node_with_compressed_child node, parent
19
- compressed_child = compress node.children.first
18
+ def merge_with_child_and_compress node
19
+ child = node.children.first
20
20
 
21
- new_node = Rambling::Trie::CompressedNode.new parent
22
- new_node.letter = node.letter.to_s << compressed_child.letter.to_s
23
- new_node.terminal! if compressed_child.terminal?
24
- new_node.children_tree = compressed_child.children_tree
21
+ new_node = Rambling::Trie::CompressedNode.new node.parent
22
+ new_node.letter = node.letter.to_s << child.letter.to_s
23
+ new_node.terminal! if child.terminal?
24
+ new_node.children_tree = child.children_tree
25
25
 
26
- new_node.children.each do |child|
27
- child.parent = new_node
28
- end
29
-
30
- new_node
26
+ compress new_node
31
27
  end
32
28
 
33
- def copy_node_and_compress_children node, parent
34
- new_node = Rambling::Trie::CompressedNode.new parent
35
-
29
+ def copy_node_and_compress_children node
30
+ new_node = Rambling::Trie::CompressedNode.new node.parent
36
31
  new_node.letter = node.letter
37
32
  new_node.terminal! if node.terminal?
38
33
 
39
- node.children.map do |child|
40
- compress child, new_node
34
+ node.children.each do |child|
35
+ compressed_child = compress child
36
+
37
+ compressed_child.parent = new_node
38
+ new_node[compressed_child.letter] = compressed_child
41
39
  end
42
40
 
43
41
  new_node
@@ -49,14 +49,14 @@ module Rambling
49
49
  # @param [String] word the word or partial word to look for in the trie.
50
50
  # @return [Boolean] `true` if the word or partial word is found, `false` otherwise.
51
51
  def partial_word? word = ''
52
- root.partial_word? word.chars.to_a
52
+ root.partial_word? word.chars
53
53
  end
54
54
 
55
55
  # Checks if a whole word exists in the trie.
56
56
  # @param [String] word the word to look for in the trie.
57
57
  # @return [Boolean] `true` only if the word is found and the last character corresponds to a terminal node.
58
58
  def word? word = ''
59
- root.word? word.chars.to_a
59
+ root.word? word.chars
60
60
  end
61
61
 
62
62
  # Returns all words that start with the specified characters.
@@ -73,9 +73,9 @@ module Rambling
73
73
  parent.to_s << letter.to_s
74
74
  end
75
75
 
76
- def letter= new_letter
77
- if new_letter
78
- @letter = new_letter.to_sym
76
+ def letter= letter
77
+ if letter
78
+ @letter = letter.to_sym
79
79
  parent[letter] = self if parent
80
80
  end
81
81
  end
@@ -4,6 +4,10 @@ module Helpers
4
4
  Pathname.new(full_path *filename).cleanpath
5
5
  end
6
6
 
7
+ def dictionary
8
+ path 'assets', 'dictionaries', 'words_with_friends.txt'
9
+ end
10
+
7
11
  private
8
12
 
9
13
  def full_path *filename
@@ -1,4 +1,17 @@
1
1
  namespace :performance do
2
- desc 'Generate profiling and performance reports'
3
- task all: ['profile:call_tree', :report]
2
+ desc 'Generate all profiling and performance reports'
3
+ task all: [
4
+ 'benchmark:all',
5
+ 'profile:call_tree:all',
6
+ 'profile:memory:all',
7
+ ]
8
+
9
+ namespace :all do
10
+ desc 'Generate and store all profiling and performance reports'
11
+ task save: [
12
+ 'benchmark:all:save',
13
+ 'profile:call_tree:all',
14
+ 'profile:memory:all',
15
+ ]
16
+ end
4
17
  end
@@ -3,17 +3,38 @@ require_relative '../helpers/path'
3
3
  namespace :performance do
4
4
  include Helpers::Path
5
5
 
6
+ class BenchmarkReport
7
+ attr_reader :output
8
+
9
+ def initialize output
10
+ @output = output
11
+ end
12
+
13
+ def finish
14
+ output.close
15
+ end
16
+ end
17
+
6
18
  class BenchmarkMeasurement
7
19
  def initialize output
8
20
  @output = output
9
21
  end
10
22
 
23
+ def param_to_s param
24
+ case param
25
+ when Rambling::Trie::Container
26
+ ''
27
+ else
28
+ param.to_s
29
+ end
30
+ end
31
+
11
32
  def perform times, params = nil
12
33
  params = Array params
13
34
  params << nil unless params.any?
14
35
 
15
36
  params.each do |param|
16
- output.print "#{param}".ljust 20
37
+ output.print param_to_s(param).ljust 20
17
38
 
18
39
  measure times, param do |param|
19
40
  yield param
@@ -43,64 +64,88 @@ namespace :performance do
43
64
  end
44
65
  end
45
66
 
46
- def with_file filename = nil
47
- output = filename.nil? ? IO.new(1) : File.open(filename, 'a+')
67
+ def benchmark_report= benchmark_report
68
+ @benchmark_report = benchmark_report
69
+ end
48
70
 
49
- yield output
71
+ def benchmark_report
72
+ Rake::Task['performance:benchmark:output:stdout'].invoke unless @benchmark_report
50
73
 
51
- output.close
74
+ @benchmark_report
52
75
  end
53
76
 
54
- def generate_lookups_benchmark filename = nil
55
- with_file filename do |output|
56
- measure = BenchmarkMeasurement.new output
57
- measure.banner
58
-
59
- trie = Rambling::Trie.create path('assets', 'dictionaries', 'words_with_friends.txt')
60
- [ trie, trie.clone.compress! ].each do |trie|
61
- output.puts "==> #{trie.compressed? ? 'Compressed' : 'Uncompressed'}"
62
- words = %w(hi help beautiful impressionism anthropological)
77
+ def output
78
+ benchmark_report.output
79
+ end
63
80
 
64
- output.puts '`word?`'
65
- measure.perform 200_000, words do |word|
66
- trie.word? word
67
- end
81
+ def generate_lookups_benchmark filename = nil
82
+ measure = BenchmarkMeasurement.new output
83
+ measure.banner
84
+
85
+ trie = Rambling::Trie.create dictionary
86
+ compressed_trie = Rambling::Trie.create(dictionary).compress!
87
+ [ trie, compressed_trie ].each do |trie|
88
+ output.puts "==> #{trie.compressed? ? 'Compressed' : 'Uncompressed'}"
89
+ words = %w(hi help beautiful impressionism anthropological)
90
+
91
+ output.puts '`word?`'
92
+ measure.perform 200_000, words do |word|
93
+ trie.word? word
94
+ end
68
95
 
69
- output.puts '`partial_word?`'
70
- measure.perform 200_000, words do |word|
71
- trie.partial_word? word
72
- end
96
+ output.puts '`partial_word?`'
97
+ measure.perform 200_000, words do |word|
98
+ trie.partial_word? word
73
99
  end
74
100
  end
75
101
  end
76
102
 
77
103
  def generate_scans_benchmark filename = nil
78
- with_file filename do |output|
79
- measure = BenchmarkMeasurement.new output
80
- measure.banner
81
-
82
- words = {
83
- hi: 1_000,
84
- help: 100_000,
85
- beautiful: 100_000,
86
- impressionism: 200_000,
87
- anthropological: 200_000,
88
- }
89
- trie = Rambling::Trie.create path('assets', 'dictionaries', 'words_with_friends.txt')
90
-
91
- [ trie, trie.clone.compress! ].each do |trie|
92
- output.puts "==> #{trie.compressed? ? 'Compressed' : 'Uncompressed'}"
93
- output.puts "`scan`"
94
- words.each do |word, times|
95
- measure.perform times, word.to_s do |word|
96
- trie.scan(word).size
97
- end
104
+ measure = BenchmarkMeasurement.new output
105
+ measure.banner
106
+
107
+ words = {
108
+ hi: 1_000,
109
+ help: 100_000,
110
+ beautiful: 100_000,
111
+ impressionism: 200_000,
112
+ anthropological: 200_000,
113
+ }
114
+
115
+ trie = Rambling::Trie.create dictionary
116
+ compressed_trie = Rambling::Trie.create(dictionary).compress!
117
+
118
+ [ trie, compressed_trie ].each do |trie|
119
+ output.puts "==> #{trie.compressed? ? 'Compressed' : 'Uncompressed'}"
120
+ output.puts "`scan`"
121
+ words.each do |word, times|
122
+ measure.perform times, word.to_s do |word|
123
+ trie.scan(word).size
98
124
  end
99
125
  end
100
126
  end
101
127
  end
102
128
 
103
129
  namespace :benchmark do
130
+ namespace :output do
131
+ desc 'Set task reporting output to stdout'
132
+ task :stdout do
133
+ self.benchmark_report = BenchmarkReport.new IO.new(1)
134
+ end
135
+
136
+ desc 'Set task reporting output to file'
137
+ task file: ['performance:directory'] do
138
+ path = path 'reports', Rambling::Trie::VERSION, 'benchmark'
139
+ file = File.open path, 'a+'
140
+ self.benchmark_report = BenchmarkReport.new file
141
+ end
142
+
143
+ desc 'Close output stream'
144
+ task :close do
145
+ benchmark_report.finish unless benchmark_report.nil?
146
+ end
147
+ end
148
+
104
149
  desc 'Generate lookups performance benchmark report'
105
150
  task :lookups do
106
151
  generate_lookups_benchmark
@@ -111,50 +156,53 @@ namespace :performance do
111
156
  generate_scans_benchmark
112
157
  end
113
158
 
114
- namespace :lookups do
115
- desc 'Generate performance benchmark report store results in reports/'
116
- task save: ['performance:directory'] do
117
- puts 'Generating performance benchmark report for lookups...'
118
- generate_lookups_benchmark path('reports', Rambling::Trie::VERSION, 'benchmark')
119
- puts "Benchmarks have been saved to reports/#{Rambling::Trie::VERSION}/benchmark"
120
- end
121
- end
122
-
159
+ desc 'Generate creation performance benchmark report'
123
160
  task :creation do
124
- with_file do |output|
125
- measure = BenchmarkMeasurement.new output
126
- measure.banner
127
-
128
- output.puts '==> Creation'
129
- output.puts '`Rambling::Trie.create`'
130
- measure.perform 5 do
131
- trie = Rambling::Trie.create path('assets', 'dictionaries', 'words_with_friends.txt')
132
- end
161
+ measure = BenchmarkMeasurement.new output
162
+ measure.banner
163
+
164
+ output.puts '==> Creation'
165
+ output.puts '`Rambling::Trie.create`'
166
+ measure.perform 5 do
167
+ trie = Rambling::Trie.create dictionary
168
+ nil
133
169
  end
134
170
  end
135
171
 
172
+ desc 'Generate compression performance benchmark report'
136
173
  task :compression do
137
- with_file do |output|
138
- measure = BenchmarkMeasurement.new output
139
- measure.banner
174
+ measure = BenchmarkMeasurement.new output
175
+ measure.banner
140
176
 
141
- output.puts '==> Compression'
142
- output.puts '`compress!`'
177
+ output.puts '==> Compression'
178
+ output.puts '`compress!`'
143
179
 
144
- trie = Rambling::Trie.create path('assets', 'dictionaries', 'words_with_friends.txt')
145
- measure.perform 5 do
146
- trie.clone.compress!
147
- end
180
+ tries = []
181
+ 5.times { tries << Rambling::Trie.create(dictionary) }
182
+
183
+ measure.perform 5, tries do |trie|
184
+ trie.compress!
185
+ nil
148
186
  end
149
187
  end
150
188
 
189
+ desc 'Generate all performance benchmark reports'
151
190
  task all: [
152
- 'performance:benchmark:creation',
153
- 'performance:benchmark:compression',
154
- 'performance:benchmark:lookups',
155
- 'performance:benchmark:scans',
191
+ 'creation',
192
+ 'compression',
193
+ 'lookups',
194
+ 'scans',
156
195
  ]
157
196
 
197
+ namespace :all do
198
+ desc "Generate and store performance benchmark report in reports/#{Rambling::Trie::VERSION}"
199
+ task save: [
200
+ 'output:file',
201
+ 'all'
202
+ ]
203
+ end
204
+
205
+ desc 'Compare ips for different implementations (changes over time)'
158
206
  task :compare do
159
207
  Benchmark.ips do |b|
160
208
  hash = { 'thing' => 'gniht' }
@@ -170,3 +218,7 @@ namespace :performance do
170
218
  end
171
219
  end
172
220
  end
221
+
222
+ current_tasks = Rake.application.top_level_tasks
223
+ current_tasks << 'performance:benchmark:output:close'
224
+ Rake.application.instance_variable_set :@top_level_tasks, current_tasks