d_heap 0.1.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,168 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DHeap::Benchmarks
4
+
5
+ # base class for example priority queues
6
+ class ExamplePriorityQueue
7
+ attr_reader :a
8
+
9
+ def initialize
10
+ @a = []
11
+ end
12
+
13
+ def clear
14
+ @a.clear
15
+ end
16
+
17
+ def empty?
18
+ @a.empty?
19
+ end
20
+
21
+ if ENV["LOG_LEVEL"] == "debug"
22
+ def dbg(msg)
23
+ puts "%20s: %p, %p" % [msg, @a.first, (@a[1..-1] || []).each_slice(2).to_a]
24
+ end
25
+ else
26
+ def dbg(msg) nil end
27
+ end
28
+
29
+ end
30
+
31
+ # The most naive approach--completely unsorted!--is ironically not the worst.
32
+ class FindMin < ExamplePriorityQueue
33
+
34
+ # O(1)
35
+ def <<(score)
36
+ raise ArgumentError unless score
37
+ @a.push score
38
+ end
39
+
40
+ # O(n)
41
+ def pop
42
+ return unless (score = @a.min)
43
+ index = @a.rindex(score)
44
+ @a.delete_at(index)
45
+ score
46
+ end
47
+
48
+ end
49
+
50
+ # Re-sorting after each insert: this both naive and performs the worst.
51
+ class Sorting < ExamplePriorityQueue
52
+
53
+ # O(n log n)
54
+ def <<(score)
55
+ raise ArgumentError unless score
56
+ @a.push score
57
+ @a.sort!
58
+ end
59
+
60
+ # O(1)
61
+ def pop
62
+ @a.shift
63
+ end
64
+
65
+ end
66
+
67
+ # A very simple example priority queue that is implemented with a sorted array.
68
+ #
69
+ # It uses Array#bsearch + Array#insert to push new values, and Array#pop to pop
70
+ # the min value.
71
+ class BSearch < ExamplePriorityQueue
72
+
73
+ # Array#bsearch_index is O(log n)
74
+ # Array#insert is O(n)
75
+ #
76
+ # So this should be O(n).
77
+ #
78
+ # In practice though, memcpy has a *very* small constant factor.
79
+ # And bsearch_index uses *exactly* (log n / log 2) comparisons.
80
+ def <<(score)
81
+ raise ArgumentError unless score
82
+ index = @a.bsearch_index {|other| score > other } || @a.length
83
+ @a.insert(index, score)
84
+ end
85
+
86
+ # Array#pop is O(1). It updates length without changing capacity or contents.
87
+ #
88
+ # No comparisons are necessary.
89
+ #
90
+ # shift is usually also O(1) and could be used if it were sorted normally.
91
+ def pop
92
+ @a.pop
93
+ end
94
+
95
+ end
96
+
97
+ # a very simple pure ruby binary heap
98
+ # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
99
+ class RbHeap < ExamplePriorityQueue
100
+
101
+ def <<(score)
102
+ raise ArgumentError unless score
103
+ @a.push(score)
104
+ # shift up
105
+ index = @a.size - 1
106
+ while 0 < index # rubocop:disable Style/NumericPredicate
107
+ parent_index = (index - 1) / 2
108
+ break if @a[parent_index] <= @a[index]
109
+ @a[index] = @a[parent_index]
110
+ index = parent_index
111
+ @a[index] = score
112
+ # check_heap!(index)
113
+ end
114
+ self
115
+ end
116
+
117
+ def pop
118
+ return if @a.empty?
119
+ popped = @a.first
120
+ @a[0] = shifting = @a.last
121
+ @a.pop
122
+ # shift down
123
+ index = 0
124
+ last_index = @a.size - 1
125
+ while (child_index = index * 2 + 1) <= last_index
126
+ # select min child
127
+ if child_index < last_index && @a[child_index + 1] < @a[child_index]
128
+ child_index += 1
129
+ end
130
+ break if @a[index] <= @a[child_index]
131
+ @a[index] = @a[child_index]
132
+ index = child_index
133
+ @a[index] = shifting
134
+ end
135
+ popped
136
+ end
137
+
138
+ private
139
+
140
+ def check_heap!(idx, last = @a.size - 1)
141
+ pscore = @a[idx]
142
+ child = idx * 2 + 1
143
+ if child <= last
144
+ cscore = check_heap!(child)
145
+ raise "#{pscore} > #{cscore}" if pscore > cscore
146
+ end
147
+ child += 1
148
+ if child <= last
149
+ check_heap!(child)
150
+ cscore = check_heap!(child)
151
+ raise "#{pscore} > #{cscore}" if pscore > cscore
152
+ end
153
+ pscore
154
+ end
155
+
156
+ end
157
+ # rubocop:enable Metrics/MethodLength, Metrics/AbcSize
158
+
159
+ # Different duck-typed priority queue implemenations
160
+ IMPLEMENTATIONS = [
161
+ OpenStruct.new(name: " push and resort", klass: Sorting).freeze,
162
+ OpenStruct.new(name: " find min + del", klass: FindMin).freeze,
163
+ OpenStruct.new(name: "bsearch + insert", klass: BSearch).freeze,
164
+ OpenStruct.new(name: "ruby binary heap", klass: RbHeap).freeze,
165
+ OpenStruct.new(name: "quaternary DHeap", klass: DHeap).freeze,
166
+ ].freeze
167
+
168
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "d_heap/benchmarks"
4
+
5
+ require "ruby-prof"
6
+
7
+ module DHeap::Benchmarks
8
+ # Profiles different implementations with different sizes
9
+ class Profiler
10
+ include Randomness
11
+ include Scenarios
12
+
13
+ N_COUNTS = [
14
+ 5, # 1 + 4
15
+ 1365, # 1 + 4 + 16 + 64 + 256 + 1024
16
+ 87_381, # 1 + 4 + 16 + 64 + 256 + 1024 + 4096 + 16384 + 65536
17
+ ].freeze
18
+
19
+ def call(
20
+ queue_size: ENV.fetch("PROFILE_QUEUE_SIZE", :unset),
21
+ iterations: ENV.fetch("PROFILE_ITERATIONS", 1_000_000)
22
+ )
23
+ DHeap::Benchmarks.puts_version_info("Profiling")
24
+ fill_random_vals
25
+ sizes = queue_size == :unset ? N_COUNTS : [Integer(queue_size)]
26
+ sizes.each do |size|
27
+ profile_all(size, iterations)
28
+ end
29
+ end
30
+
31
+ def profile_all(queue_size, iterations, io: $stdout)
32
+ io.puts <<~TEXT
33
+ ########################################################################
34
+ # Profile w/ N=#{queue_size} (i=#{iterations})
35
+ # (n.b. RubyProf & tracepoint can change relative performance.
36
+ # A sampling profiler can provide more accurate relative metrics.
37
+ ########################################################################
38
+
39
+ TEXT
40
+ DHeap::Benchmarks::IMPLEMENTATIONS.each do |impl|
41
+ profile_one(impl, queue_size, iterations, io: io)
42
+ end
43
+ end
44
+
45
+ # TODO: move somewhere else...
46
+ def skip_profiling?(queue_size, impl)
47
+ impl.klass == DHeap::Benchmarks::Sorting && 10_000 < queue_size
48
+ end
49
+
50
+ def profile_one(impl, queue_size, iterations, io: $stdout)
51
+ return if skip_profiling?(queue_size, impl)
52
+ io.puts "Filling #{impl.name} ---------------------------"
53
+ queue = impl.klass.new
54
+ push_n(queue, queue_size)
55
+ io.puts "Profiling #{impl.name} ---------------------------"
56
+ profiling do
57
+ repeated_push_pop(queue, iterations)
58
+ end
59
+ end
60
+
61
+ def profiling(io: $stdout, &block)
62
+ # do the thing
63
+ result = RubyProf.profile(&block)
64
+ # report_the_thing
65
+ printer = RubyProf::FlatPrinter.new(result)
66
+ printer.print($stdout, min_percent: 1.0)
67
+ io.puts
68
+ end
69
+
70
+ end
71
+ end
@@ -0,0 +1,374 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "d_heap/benchmarks"
4
+
5
+ module DHeap::Benchmarks
6
+
7
+ # Profiles different implementations with different sizes
8
+ module RSpecMatchers # rubocop:disable Metrics/ModuleLength
9
+ extend RSpec::Matchers::DSL
10
+
11
+ # Assert ips (iterations per second):
12
+ #
13
+ # expect { ... }.to perform_at_least(1_000_000).ips
14
+ # .running_at_least(10).times # optional, defaults to 1
15
+ # .running_at_least(10).seconds # optional, defaults to 1s
16
+ # .running_at_most(10_000_000).times # optional, defaults to nil
17
+ # .running_at_most(2).seconds # optional, defaults to 2s
18
+ # .warmup_at_most(1000).times # optional, defaults to 1k
19
+ # .warmup_at_most(0.100).seconds # optional, defaults to 0.1s
20
+ # .iterations_per_round # optional, defaults to 1
21
+ # .and_at_least(1.1).times.faster_than { ... } # can also compare
22
+ #
23
+ # Assert comparison (and optionally runtime or ips):
24
+ #
25
+ # expect { ... }.to perform_at_least(2.5).times_faster_than { ... }
26
+ # .running_at_least(10).times # optional, defaults to 1
27
+ # .running_at_least(10).seconds # optional, defaults to 1s
28
+ # .running_at_most(10_000_000).times # optional, defaults to nil
29
+ # .running_at_most(2).seconds # optional, defaults to 2s
30
+ # .warmup_at_most(1000).times # optional, defaults to 1k
31
+ # .warmup_at_most(0.100).seconds # optional, defaults to 0.1s
32
+ # .iterations_per_call # optional, defaults to 1
33
+ # .and_at_least(100).ips { ... } # can also assert ips
34
+ #
35
+ # n.b: Given a known constant number of iterations, run time and ips are both
36
+ # measuring the same underlying metric.
37
+ #
38
+ # rubocop:disable Metrics/BlockLength, Layout/SpaceAroundOperators
39
+ matcher :perform_at_least do |expected|
40
+ supports_block_expectations
41
+
42
+ def __debug__(name, caller_binding)
43
+ lvars = __debug_lvars__(caller_binding)
44
+ ivars = __debug_ivars__(caller_binding)
45
+ puts "%s, locals => %p, ivars => %p" % [name, lvars, ivars]
46
+ end
47
+
48
+ def __debug_lvars__(caller_binding)
49
+ caller_binding.local_variables.map {|lvar|
50
+ next if %i[type unit].include?(lvar)
51
+ next if (val = caller_binding.local_variable_get(lvar)).nil?
52
+ [lvar, val]
53
+ }.compact.to_h
54
+ end
55
+
56
+ def __debug_ivars__(caller_binding)
57
+ instance_variables.map {|ivar|
58
+ next if %i[@name @actual @expected_as_array @matcher_execution_context
59
+ @chained_method_clauses @block_arg]
60
+ .include?(ivar)
61
+ next if (val = instance_variable_get(ivar)).nil?
62
+ [ivar, val]
63
+ }.compact.to_h
64
+ end
65
+
66
+ %i[
67
+ is_at_least
68
+ running_at_most
69
+ running_at_least
70
+ warmup_at_most
71
+ ].each do |type|
72
+ chain type do |number|
73
+ # __debug__ "%s(%p)" % [type, number], binding
74
+ reason, value = ___number_reason_and_value___
75
+ if reason || value
76
+ raise "Need to handle unit-less number first: %s(%p)" % [reason, value]
77
+ end
78
+ @number_for = type
79
+ @number_val = number
80
+ end
81
+ end
82
+
83
+ alias_method :and_at_least, :is_at_least
84
+
85
+ %i[
86
+ times
87
+ seconds
88
+ milliseconds
89
+ ].each do |unit|
90
+ chain unit do
91
+ # __debug__ unit, binding
92
+ reason, value = ___number_reason_and_value___
93
+ raise "No number was specified" unless reason && value
94
+ case reason
95
+ when :running_at_most; apply_max_run unit
96
+ when :running_at_least; apply_min_run unit
97
+ when :warmup_at_most; apply_warmup unit
98
+ else raise "%s is incompatible with %s(%p)" % [unit, reason, value]
99
+ end
100
+ @number_for = @number_val = nil
101
+ end
102
+ end
103
+
104
+ # TODO: let IPS set time to run instead of iterations to run
105
+ chain :ips do
106
+ # __debug__ "ips", binding
107
+ reason, value = ___number_reason_and_value___
108
+ raise "'ips' unit is only for assertions" unless reason == :is_at_least
109
+ raise "Already asserting %s ips" % [@expect_ips] if @expect_ips
110
+ raise "'ips' assertion has already been made" if @expect_ips
111
+ raise "Unknown assertion count" unless value
112
+ @expect_ips = Integer(value)
113
+ @number_for = @number_val = nil
114
+ end
115
+
116
+ # need to use method because "chain" can't take a block
117
+ def times_faster_than(&other)
118
+ # __debug__ "times_faster_than"
119
+ reason, value = ___number_reason_and_value___
120
+ raise "'times_faster_than' is only for assertions" unless reason == :is_at_least
121
+ raise "Already asserting %sx comparison" % [@expect_cmp] if @expect_cmp
122
+ raise ArgumentError, "must provide a proc" unless other
123
+ @expect_cmp = Float(value)
124
+ @cmp_proc = other
125
+ @number_for = @number_val = nil
126
+ self
127
+ end
128
+
129
+ chain :loudly do @volume = :loud end
130
+ chain :quietly do @volume = :quiet end
131
+ chain :volume do |volume|
132
+ raise "Invalid volume" unless %i[loud quiet].include?(volume)
133
+ @volume = volume
134
+ end
135
+
136
+ chain :iterations_per_round do |iterations|
137
+ if @iterations_per_round
138
+ raise "Already set iterations per round (%p)" [@iterations_per_round]
139
+ end
140
+ @iterations_per_round = Integer(iterations)
141
+ end
142
+
143
+ match do |actual|
144
+ require "benchmark"
145
+ raise "Need to expect a proc or block" unless actual.respond_to?(:to_proc)
146
+ raise "Need a performance assertion" unless assertion?
147
+ @actual_proc = actual
148
+ prepare_for_measurement
149
+ if @max_iter && (@max_iter % @iterations_per_round) != 0
150
+ raise "Iterations per round (%p) must divide evenly by max iterations (%p)" % [
151
+ @iterations_per_round, @max_iter,
152
+ ]
153
+ end
154
+ run_measurements
155
+ cmp_okay? && ips_okay?
156
+ end
157
+
158
+ description do
159
+ [
160
+ @expect_cmp && cmp_okay_msg,
161
+ @expect_ips && ips_okay_msg,
162
+ ].join(", and ")
163
+ end
164
+
165
+ failure_message do
166
+ [
167
+ cmp_okay? ? nil : "expected to #{cmp_okay_msg} but #{cmp_fail_msg}", # =>
168
+ ips_okay? ? nil : "expected to #{ips_okay_msg} but #{ips_fail_msg}",
169
+ ].compact.join(", and ")
170
+ end
171
+
172
+ private
173
+
174
+ chain :__convert_expected_to_ivars__ do
175
+ @number_val ||= expected
176
+ @number_for ||= :is_at_least if @number_val
177
+ # __debug__ "__convert_expected_to_ivars__", binding
178
+ expected = nil
179
+ end
180
+ private :__convert_expected_to_ivars__
181
+
182
+ def ___number_reason_and_value___
183
+ __convert_expected_to_ivars__
184
+ [@number_for, @number_val]
185
+ end
186
+
187
+ def apply_min_run(unit)
188
+ case unit
189
+ when :seconds; @min_time = Float(@number_val)
190
+ when :milliseconds; @min_time = Float(@number_val) / 1000.0
191
+ when :times; @min_iter = Integer(@number_val)
192
+ else raise "Invalid unit %s for %s(%p)" % [unit, @number_for, @number_val]
193
+ end
194
+ end
195
+
196
+ def apply_max_run(unit)
197
+ case unit
198
+ when :seconds; @max_time = Float(@number_val)
199
+ when :milliseconds; @max_time = Float(@number_val) / 1000.0
200
+ when :times; @max_iter = Integer(@number_val)
201
+ else raise "Invalid unit %s for %s(%p)" % [unit, @number_for, @number_val]
202
+ end
203
+ end
204
+
205
+ def apply_warmup(unit)
206
+ case unit
207
+ when :seconds; @warmup_time = Float(@number_val)
208
+ when :milliseconds; @warmup_time = Float(@number_val) / 1000.0
209
+ when :times; @warmup_iter = Integer(@number_val)
210
+ else raise "Invalid unit %s for %s(%p)" % [unit, @number_for, @number_val]
211
+ end
212
+ end
213
+
214
+ def prepare_for_measurement
215
+ @volume ||= ENV.fetch("RSPEC_BENCHMARK_VOLUME", :quiet).to_sym
216
+ @max_time ||= 2
217
+ @min_time ||= 1
218
+ @min_iter ||= 1
219
+ @warmup_time ||= 0.100
220
+ @warmup_iter ||= 1000
221
+ @iterations_per_round ||= 1
222
+ nil
223
+ end
224
+
225
+ def run_measurements
226
+ puts header if loud?
227
+ # __debug__ "run_measurements", binding
228
+ warmup
229
+ take_measurements
230
+ end
231
+
232
+ def header
233
+ max_rounds = @max_iter && @max_iter / @iterations_per_round
234
+ [
235
+ "Warmup time %s, or iterations: %s" % [@min_iter, @max_iter],
236
+ "Benchmark time (%s..%s) or iterations (%s..%s), max rounds: %p" % [
237
+ @min_time, @max_time, @min_iter, @max_iter, max_rounds,
238
+ ],
239
+ "%-10s %s" % ["", Benchmark::CAPTION],
240
+ ].join("\n")
241
+ end
242
+
243
+ def warmup
244
+ return unless 0 < @warmup_time && 0 < @warmup_iter # rubocop:disable Style/NumericPredicate
245
+ args = [@warmup_iter, 0, @warmup_time, 1, @warmup_iter]
246
+ measure("warmup", *args, &@actual_proc)
247
+ measure("warmup cmp", *args, &@cmp_proc) if @cmp_proc
248
+ end
249
+
250
+ def take_measurements
251
+ args = [@iterations_per_round, @min_time, @max_time, @min_iter, @max_iter]
252
+ @actual_tms = measure("actual", *args, &@actual_proc)
253
+ @cmp_tms = measure("cmp", *args, &@cmp_proc) if @cmp_proc
254
+ return unless @cmp_proc
255
+ # how many times faster?
256
+ @actual_cmp = @actual_tms.ips_real / @cmp_tms.ips_real
257
+ puts "Ran %0.3fx as fast as comparison" % [@actual_cmp] if loud?
258
+ end
259
+
260
+ def loud?; @volume == :loud end
261
+
262
+ def assertion?; !!(@expect_cmp || @expect_ips) end
263
+
264
+ def cmp_okay?; !@expect_cmp || @expect_cmp < @actual_cmp end
265
+ def ips_okay?; !@expect_tms || @expect_tms.ips < @actual_tms.ips end
266
+
267
+ def measure(name, ipr, *args)
268
+ measurements = TmsMeasurements.new(name, ipr, *args)
269
+ measurements.max_rounds.times do
270
+ # GC.start(full_mark: true, immediate_sweep: true)
271
+ # GC.compact
272
+ measurements << Benchmark.measure do
273
+ yield ipr
274
+ end
275
+ # p measurements.real
276
+ break if measurements.max_time < measurements.real
277
+ end
278
+ log_measurement(name, measurements)
279
+ measurements
280
+ end
281
+
282
+ # rubocop:disable Metrics/AbcSize
283
+ def units_str(num)
284
+ if num >= 10**12; "%7.3fT" % [num.to_f / 10**12]
285
+ elsif num >= 10** 9; "%7.3fB" % [num.to_f / 10** 9]
286
+ elsif num >= 10** 6; "%7.3fM" % [num.to_f / 10** 6]
287
+ elsif num >= 10** 3; "%7.3fk" % [num.to_f / 10** 3]
288
+ else "%7.3f" % [num.to_f]
289
+ end
290
+ end
291
+ # rubocop:enable Metrics/AbcSize
292
+
293
+ def log_measurement(name, measurements)
294
+ return unless loud?
295
+ puts "%-10s %s => %s ips (%d rounds)" % [
296
+ name,
297
+ measurements.tms.to_s.rstrip,
298
+ units_str(measurements.ips_real),
299
+ measurements.size,
300
+ ]
301
+ end
302
+
303
+ def cmp_okay_msg; "run %0.2fx faster" % [@expect_cmp] end
304
+ def cmp_fail_msg; "was only %0.2fx as fast" % [@actual_cmp] end
305
+ def ips_okay_msg; "run with %s ips" % [units_str(@expect_ips)] end
306
+ def ips_fail_msg; "was only %s ips" % [units_str(@actual_ips)] end
307
+
308
+ end
309
+ # rubocop:enable Metrics/BlockLength, Layout/SpaceAroundOperators
310
+
311
+ alias_matcher :perform_with, :perform
312
+
313
+ end
314
+
315
+ # Replicates a subset of the functionality in benchmark-ips
316
+ #
317
+ # TODO: merge this with benchmark-ips
318
+ # TODO: implement (or remove) min_time, min_iter
319
+ class TmsMeasurements
320
+ attr_reader :iterations_per_entry
321
+ attr_reader :iterations
322
+
323
+ attr_reader :min_time
324
+ attr_reader :max_time
325
+
326
+ attr_reader :min_iter
327
+ attr_reader :max_iter
328
+
329
+ def initialize(name, ipe, min_time, max_time, min_iter, max_iter) # rubocop:disable Metrics/ParameterLists
330
+ @name = name
331
+ @iterations_per_entry = Integer(ipe)
332
+ @min_time = Float(min_time)
333
+ @max_time = Float(max_time)
334
+ @min_iter = Integer(min_iter)
335
+ @max_iter = Integer(max_iter)
336
+ @entries = []
337
+ @sum = Benchmark::Tms.new
338
+ @iterations = 0
339
+ end
340
+
341
+ def size; entries.size end
342
+
343
+ def <<(tms)
344
+ raise TypeError, "not a #{Benchmark::Tms}" unless tms.is_a?(Benchmark::Tms)
345
+ raise IndexError, "full" if @max_iter <= size
346
+ @sum += tms
347
+ @iterations += @iterations_per_entry
348
+ @entries << tms
349
+ self
350
+ end
351
+
352
+ def sum; @sum.dup end
353
+ alias tms sum
354
+
355
+ def entries; @entries.dup end
356
+
357
+ def cstime; @sum.cstime end
358
+ def cutime; @sum.cutime end
359
+ def real; @sum.real end
360
+ def stime; @sum.stime end
361
+ def total; @sum.total end
362
+ def utime; @sum.utime end
363
+
364
+ def ips_real; @iterations / real end
365
+ def ips_total; @iterations / total end
366
+ def ips_utime; @iterations / utime end
367
+
368
+ def max_rounds
369
+ @max_iter && @max_iter / @iterations_per_entry
370
+ end
371
+
372
+ end
373
+
374
+ end