rambling-trie 0.9.0 → 0.9.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3bb17c6b1df5c473eb696bc484ff0e3b46517867
4
- data.tar.gz: 0b6daafa67a20409389b80aaf9ac5d1bc6694cd2
3
+ metadata.gz: 9b23cfe0d2b236a7970f4666f0df6a7cc4226d22
4
+ data.tar.gz: 5147da530a25b386fdafd9cc59ccb1c35b2a2efc
5
5
  SHA512:
6
- metadata.gz: ecaecb91b920793208c878134881f08a7718d9f9771857a49cb15f8059e35273e29f81ce21a497de37566cfae8273a13ff611b43f0d049694b0207c65efa35bd
7
- data.tar.gz: 810db517696450d411d75ad59df68e410e0c1109ad1133bc7aa95363609520f8db51031a72e1b3392475524f8d10b28ca1e827bd5dbb9c3fba0aaa2417eeef41
6
+ metadata.gz: 71405a6dd24e710629d9e8cb8d307b2faa5664f029bcdedd749063d106e6706bce5fb07fb6ecbf776098668d0b5ab3826e3f3d6d9f75e96e516c2cbce07de2ab
7
+ data.tar.gz: 6d5a292f676c979dbf2e3b7cc25d8a932cf087c14325df539b2ccafd872083d9ca1140224eb4edbc93c4fb0e920bd7f75374ba0bbd260d911fdf057fdb5cdd0d
@@ -22,18 +22,15 @@ module Rambling
22
22
  # @return [Boolean] `true` if the characters are found and form a word,
23
23
  # `false` otherwise.
24
24
  def word? chars
25
- if chars.empty?
26
- terminal?
27
- else
28
- has_word? chars
29
- end
25
+ chars.empty? ? terminal? : has_word?(chars)
30
26
  end
31
27
 
32
28
  # Returns all words that start with the specified characters.
33
29
  # @param [Array] chars the characters to look for in the trie.
34
30
  # @return [Array] all the words contained in the trie that start with the specified characters.
35
31
  def scan chars
36
- closest_node(chars).to_a
32
+ node = chars.empty? ? self : closest_node(chars)
33
+ node.to_a
37
34
  end
38
35
 
39
36
  # Always return `true` for a raw (compressed) node.
@@ -42,70 +39,58 @@ module Rambling
42
39
  true
43
40
  end
44
41
 
45
- protected
42
+ private
46
43
 
47
- def closest_node chars
48
- if chars.empty?
49
- self
50
- else
51
- current_length = 0
52
- current_key, current_key_string = current_key chars.slice!(0)
44
+ def has_partial_word? chars
45
+ recursive_get(:partial_word?, chars) || false
46
+ end
53
47
 
54
- begin
55
- current_length += 1
48
+ def has_word? chars
49
+ current_key = nil
56
50
 
57
- if current_key_string.length == current_length || chars.empty?
58
- return children_tree[current_key].closest_node chars
59
- end
60
- end while current_key_string[current_length] == chars.slice!(0)
51
+ while !chars.empty?
52
+ if current_key
53
+ current_key << chars.slice!(0)
54
+ else
55
+ current_key = chars.slice!(0)
56
+ end
61
57
 
62
- Rambling::Trie::MissingNode.new
58
+ child = children_tree[current_key.to_sym]
59
+ return child.word? chars if child
63
60
  end
61
+
62
+ false
64
63
  end
65
64
 
66
- private
65
+ def closest_node chars
66
+ recursive_get(:scan, chars) || Rambling::Trie::MissingNode.new
67
+ end
67
68
 
68
- def has_partial_word? chars
69
+ def recursive_get method, chars
69
70
  current_length = 0
70
- current_key, current_key_string = current_key chars.slice!(0)
71
+ current_key = current_key chars.slice!(0)
71
72
 
72
73
  begin
73
74
  current_length += 1
74
75
 
75
- if current_key_string.length == current_length || chars.empty?
76
- return children_tree[current_key].partial_word? chars
76
+ if (current_key && current_key.length == current_length) || chars.empty?
77
+ return children_tree[current_key.to_sym].send method, chars
77
78
  end
78
- end while current_key_string[current_length] == chars.slice!(0)
79
-
80
- false
81
- end
82
-
83
- def has_word? chars
84
- current_key_string = ''
85
-
86
- while !chars.empty?
87
- current_key_string << chars.slice!(0)
88
- current_key = current_key_string.to_sym
89
- child = children_tree[current_key]
90
- return child.word? chars if child
91
- end
92
-
93
- false
79
+ end while current_key && current_key[current_length] == chars.slice!(0)
94
80
  end
95
81
 
96
82
  def current_key letter
97
- current_key_string = current_key = ''
83
+ current_key = nil
98
84
 
99
85
  children_tree.keys.each do |key|
100
86
  key_string = key.to_s
101
87
  if key_string.start_with? letter
102
- current_key = key
103
- current_key_string = key_string
88
+ current_key = key_string
104
89
  break
105
90
  end
106
91
  end
107
92
 
108
- [current_key, current_key_string]
93
+ current_key
109
94
  end
110
95
  end
111
96
  end
@@ -5,39 +5,37 @@ module Rambling
5
5
  # Compresses a node from a Trie data structure.
6
6
  # @param [RawNode] node the node to compress
7
7
  # @return [CompressedNode] node the compressed version of the node
8
- def compress node, parent = nil
8
+ def compress node
9
9
  if node.compressable?
10
- merge_node_with_compressed_child node, parent
10
+ merge_with_child_and_compress node
11
11
  else
12
- copy_node_and_compress_children node, parent
12
+ copy_node_and_compress_children node
13
13
  end
14
14
  end
15
15
 
16
16
  private
17
17
 
18
- def merge_node_with_compressed_child node, parent
19
- compressed_child = compress node.children.first
18
+ def merge_with_child_and_compress node
19
+ child = node.children.first
20
20
 
21
- new_node = Rambling::Trie::CompressedNode.new parent
22
- new_node.letter = node.letter.to_s << compressed_child.letter.to_s
23
- new_node.terminal! if compressed_child.terminal?
24
- new_node.children_tree = compressed_child.children_tree
21
+ new_node = Rambling::Trie::CompressedNode.new node.parent
22
+ new_node.letter = node.letter.to_s << child.letter.to_s
23
+ new_node.terminal! if child.terminal?
24
+ new_node.children_tree = child.children_tree
25
25
 
26
- new_node.children.each do |child|
27
- child.parent = new_node
28
- end
29
-
30
- new_node
26
+ compress new_node
31
27
  end
32
28
 
33
- def copy_node_and_compress_children node, parent
34
- new_node = Rambling::Trie::CompressedNode.new parent
35
-
29
+ def copy_node_and_compress_children node
30
+ new_node = Rambling::Trie::CompressedNode.new node.parent
36
31
  new_node.letter = node.letter
37
32
  new_node.terminal! if node.terminal?
38
33
 
39
- node.children.map do |child|
40
- compress child, new_node
34
+ node.children.each do |child|
35
+ compressed_child = compress child
36
+
37
+ compressed_child.parent = new_node
38
+ new_node[compressed_child.letter] = compressed_child
41
39
  end
42
40
 
43
41
  new_node
@@ -49,14 +49,14 @@ module Rambling
49
49
  # @param [String] word the word or partial word to look for in the trie.
50
50
  # @return [Boolean] `true` if the word or partial word is found, `false` otherwise.
51
51
  def partial_word? word = ''
52
- root.partial_word? word.chars.to_a
52
+ root.partial_word? word.chars
53
53
  end
54
54
 
55
55
  # Checks if a whole word exists in the trie.
56
56
  # @param [String] word the word to look for in the trie.
57
57
  # @return [Boolean] `true` only if the word is found and the last character corresponds to a terminal node.
58
58
  def word? word = ''
59
- root.word? word.chars.to_a
59
+ root.word? word.chars
60
60
  end
61
61
 
62
62
  # Returns all words that start with the specified characters.
@@ -73,9 +73,9 @@ module Rambling
73
73
  parent.to_s << letter.to_s
74
74
  end
75
75
 
76
- def letter= new_letter
77
- if new_letter
78
- @letter = new_letter.to_sym
76
+ def letter= letter
77
+ if letter
78
+ @letter = letter.to_sym
79
79
  parent[letter] = self if parent
80
80
  end
81
81
  end
@@ -4,6 +4,10 @@ module Helpers
4
4
  Pathname.new(full_path *filename).cleanpath
5
5
  end
6
6
 
7
+ def dictionary
8
+ path 'assets', 'dictionaries', 'words_with_friends.txt'
9
+ end
10
+
7
11
  private
8
12
 
9
13
  def full_path *filename
@@ -1,4 +1,17 @@
1
1
  namespace :performance do
2
- desc 'Generate profiling and performance reports'
3
- task all: ['profile:call_tree', :report]
2
+ desc 'Generate all profiling and performance reports'
3
+ task all: [
4
+ 'benchmark:all',
5
+ 'profile:call_tree:all',
6
+ 'profile:memory:all',
7
+ ]
8
+
9
+ namespace :all do
10
+ desc 'Generate and store all profiling and performance reports'
11
+ task save: [
12
+ 'benchmark:all:save',
13
+ 'profile:call_tree:all',
14
+ 'profile:memory:all',
15
+ ]
16
+ end
4
17
  end
@@ -3,17 +3,38 @@ require_relative '../helpers/path'
3
3
  namespace :performance do
4
4
  include Helpers::Path
5
5
 
6
+ class BenchmarkReport
7
+ attr_reader :output
8
+
9
+ def initialize output
10
+ @output = output
11
+ end
12
+
13
+ def finish
14
+ output.close
15
+ end
16
+ end
17
+
6
18
  class BenchmarkMeasurement
7
19
  def initialize output
8
20
  @output = output
9
21
  end
10
22
 
23
+ def param_to_s param
24
+ case param
25
+ when Rambling::Trie::Container
26
+ ''
27
+ else
28
+ param.to_s
29
+ end
30
+ end
31
+
11
32
  def perform times, params = nil
12
33
  params = Array params
13
34
  params << nil unless params.any?
14
35
 
15
36
  params.each do |param|
16
- output.print "#{param}".ljust 20
37
+ output.print param_to_s(param).ljust 20
17
38
 
18
39
  measure times, param do |param|
19
40
  yield param
@@ -43,64 +64,88 @@ namespace :performance do
43
64
  end
44
65
  end
45
66
 
46
- def with_file filename = nil
47
- output = filename.nil? ? IO.new(1) : File.open(filename, 'a+')
67
+ def benchmark_report= benchmark_report
68
+ @benchmark_report = benchmark_report
69
+ end
48
70
 
49
- yield output
71
+ def benchmark_report
72
+ Rake::Task['performance:benchmark:output:stdout'].invoke unless @benchmark_report
50
73
 
51
- output.close
74
+ @benchmark_report
52
75
  end
53
76
 
54
- def generate_lookups_benchmark filename = nil
55
- with_file filename do |output|
56
- measure = BenchmarkMeasurement.new output
57
- measure.banner
58
-
59
- trie = Rambling::Trie.create path('assets', 'dictionaries', 'words_with_friends.txt')
60
- [ trie, trie.clone.compress! ].each do |trie|
61
- output.puts "==> #{trie.compressed? ? 'Compressed' : 'Uncompressed'}"
62
- words = %w(hi help beautiful impressionism anthropological)
77
+ def output
78
+ benchmark_report.output
79
+ end
63
80
 
64
- output.puts '`word?`'
65
- measure.perform 200_000, words do |word|
66
- trie.word? word
67
- end
81
+ def generate_lookups_benchmark filename = nil
82
+ measure = BenchmarkMeasurement.new output
83
+ measure.banner
84
+
85
+ trie = Rambling::Trie.create dictionary
86
+ compressed_trie = Rambling::Trie.create(dictionary).compress!
87
+ [ trie, compressed_trie ].each do |trie|
88
+ output.puts "==> #{trie.compressed? ? 'Compressed' : 'Uncompressed'}"
89
+ words = %w(hi help beautiful impressionism anthropological)
90
+
91
+ output.puts '`word?`'
92
+ measure.perform 200_000, words do |word|
93
+ trie.word? word
94
+ end
68
95
 
69
- output.puts '`partial_word?`'
70
- measure.perform 200_000, words do |word|
71
- trie.partial_word? word
72
- end
96
+ output.puts '`partial_word?`'
97
+ measure.perform 200_000, words do |word|
98
+ trie.partial_word? word
73
99
  end
74
100
  end
75
101
  end
76
102
 
77
103
  def generate_scans_benchmark filename = nil
78
- with_file filename do |output|
79
- measure = BenchmarkMeasurement.new output
80
- measure.banner
81
-
82
- words = {
83
- hi: 1_000,
84
- help: 100_000,
85
- beautiful: 100_000,
86
- impressionism: 200_000,
87
- anthropological: 200_000,
88
- }
89
- trie = Rambling::Trie.create path('assets', 'dictionaries', 'words_with_friends.txt')
90
-
91
- [ trie, trie.clone.compress! ].each do |trie|
92
- output.puts "==> #{trie.compressed? ? 'Compressed' : 'Uncompressed'}"
93
- output.puts "`scan`"
94
- words.each do |word, times|
95
- measure.perform times, word.to_s do |word|
96
- trie.scan(word).size
97
- end
104
+ measure = BenchmarkMeasurement.new output
105
+ measure.banner
106
+
107
+ words = {
108
+ hi: 1_000,
109
+ help: 100_000,
110
+ beautiful: 100_000,
111
+ impressionism: 200_000,
112
+ anthropological: 200_000,
113
+ }
114
+
115
+ trie = Rambling::Trie.create dictionary
116
+ compressed_trie = Rambling::Trie.create(dictionary).compress!
117
+
118
+ [ trie, compressed_trie ].each do |trie|
119
+ output.puts "==> #{trie.compressed? ? 'Compressed' : 'Uncompressed'}"
120
+ output.puts "`scan`"
121
+ words.each do |word, times|
122
+ measure.perform times, word.to_s do |word|
123
+ trie.scan(word).size
98
124
  end
99
125
  end
100
126
  end
101
127
  end
102
128
 
103
129
  namespace :benchmark do
130
+ namespace :output do
131
+ desc 'Set task reporting output to stdout'
132
+ task :stdout do
133
+ self.benchmark_report = BenchmarkReport.new IO.new(1)
134
+ end
135
+
136
+ desc 'Set task reporting output to file'
137
+ task file: ['performance:directory'] do
138
+ path = path 'reports', Rambling::Trie::VERSION, 'benchmark'
139
+ file = File.open path, 'a+'
140
+ self.benchmark_report = BenchmarkReport.new file
141
+ end
142
+
143
+ desc 'Close output stream'
144
+ task :close do
145
+ benchmark_report.finish unless benchmark_report.nil?
146
+ end
147
+ end
148
+
104
149
  desc 'Generate lookups performance benchmark report'
105
150
  task :lookups do
106
151
  generate_lookups_benchmark
@@ -111,50 +156,53 @@ namespace :performance do
111
156
  generate_scans_benchmark
112
157
  end
113
158
 
114
- namespace :lookups do
115
- desc 'Generate performance benchmark report store results in reports/'
116
- task save: ['performance:directory'] do
117
- puts 'Generating performance benchmark report for lookups...'
118
- generate_lookups_benchmark path('reports', Rambling::Trie::VERSION, 'benchmark')
119
- puts "Benchmarks have been saved to reports/#{Rambling::Trie::VERSION}/benchmark"
120
- end
121
- end
122
-
159
+ desc 'Generate creation performance benchmark report'
123
160
  task :creation do
124
- with_file do |output|
125
- measure = BenchmarkMeasurement.new output
126
- measure.banner
127
-
128
- output.puts '==> Creation'
129
- output.puts '`Rambling::Trie.create`'
130
- measure.perform 5 do
131
- trie = Rambling::Trie.create path('assets', 'dictionaries', 'words_with_friends.txt')
132
- end
161
+ measure = BenchmarkMeasurement.new output
162
+ measure.banner
163
+
164
+ output.puts '==> Creation'
165
+ output.puts '`Rambling::Trie.create`'
166
+ measure.perform 5 do
167
+ trie = Rambling::Trie.create dictionary
168
+ nil
133
169
  end
134
170
  end
135
171
 
172
+ desc 'Generate compression performance benchmark report'
136
173
  task :compression do
137
- with_file do |output|
138
- measure = BenchmarkMeasurement.new output
139
- measure.banner
174
+ measure = BenchmarkMeasurement.new output
175
+ measure.banner
140
176
 
141
- output.puts '==> Compression'
142
- output.puts '`compress!`'
177
+ output.puts '==> Compression'
178
+ output.puts '`compress!`'
143
179
 
144
- trie = Rambling::Trie.create path('assets', 'dictionaries', 'words_with_friends.txt')
145
- measure.perform 5 do
146
- trie.clone.compress!
147
- end
180
+ tries = []
181
+ 5.times { tries << Rambling::Trie.create(dictionary) }
182
+
183
+ measure.perform 5, tries do |trie|
184
+ trie.compress!
185
+ nil
148
186
  end
149
187
  end
150
188
 
189
+ desc 'Generate all performance benchmark reports'
151
190
  task all: [
152
- 'performance:benchmark:creation',
153
- 'performance:benchmark:compression',
154
- 'performance:benchmark:lookups',
155
- 'performance:benchmark:scans',
191
+ 'creation',
192
+ 'compression',
193
+ 'lookups',
194
+ 'scans',
156
195
  ]
157
196
 
197
+ namespace :all do
198
+ desc "Generate and store performance benchmark report in reports/#{Rambling::Trie::VERSION}"
199
+ task save: [
200
+ 'output:file',
201
+ 'all'
202
+ ]
203
+ end
204
+
205
+ desc 'Compare ips for different implementations (changes over time)'
158
206
  task :compare do
159
207
  Benchmark.ips do |b|
160
208
  hash = { 'thing' => 'gniht' }
@@ -170,3 +218,7 @@ namespace :performance do
170
218
  end
171
219
  end
172
220
  end
221
+
222
+ current_tasks = Rake.application.top_level_tasks
223
+ current_tasks << 'performance:benchmark:output:close'
224
+ Rake.application.instance_variable_set :@top_level_tasks, current_tasks