d_heap 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +30 -1
- data/CHANGELOG.md +42 -0
- data/Gemfile +1 -0
- data/Gemfile.lock +11 -10
- data/README.md +353 -121
- data/benchmarks/push_n.yml +28 -0
- data/benchmarks/push_n_pop_n.yml +31 -0
- data/benchmarks/push_pop.yml +24 -0
- data/bin/bench_n +7 -0
- data/bin/benchmark-driver +29 -0
- data/bin/benchmarks +10 -0
- data/bin/profile +10 -0
- data/d_heap.gemspec +2 -1
- data/docs/benchmarks-2.txt +52 -0
- data/docs/benchmarks.txt +443 -0
- data/docs/profile.txt +392 -0
- data/ext/d_heap/d_heap.c +428 -150
- data/ext/d_heap/d_heap.h +6 -3
- data/ext/d_heap/extconf.rb +8 -3
- data/lib/benchmark_driver/runner/ips_zero_fail.rb +120 -0
- data/lib/d_heap.rb +5 -3
- data/lib/d_heap/benchmarks.rb +111 -0
- data/lib/d_heap/benchmarks/benchmarker.rb +113 -0
- data/lib/d_heap/benchmarks/implementations.rb +168 -0
- data/lib/d_heap/benchmarks/profiler.rb +71 -0
- data/lib/d_heap/benchmarks/rspec_matchers.rb +374 -0
- data/lib/d_heap/version.rb +1 -1
- metadata +34 -3
@@ -0,0 +1,71 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "d_heap/benchmarks"
|
4
|
+
|
5
|
+
require "ruby-prof"
|
6
|
+
|
7
|
+
module DHeap::Benchmarks
|
8
|
+
# Profiles different implementations with different sizes
|
9
|
+
class Profiler
|
10
|
+
include Randomness
|
11
|
+
include Scenarios
|
12
|
+
|
13
|
+
N_COUNTS = [
|
14
|
+
5, # 1 + 4
|
15
|
+
1365, # 1 + 4 + 16 + 64 + 256 + 1024
|
16
|
+
87_381, # 1 + 4 + 16 + 64 + 256 + 1024 + 4096 + 16384 + 65536
|
17
|
+
].freeze
|
18
|
+
|
19
|
+
def call(
|
20
|
+
queue_size: ENV.fetch("PROFILE_QUEUE_SIZE", :unset),
|
21
|
+
iterations: ENV.fetch("PROFILE_ITERATIONS", 1_000_000)
|
22
|
+
)
|
23
|
+
DHeap::Benchmarks.puts_version_info("Profiling")
|
24
|
+
fill_random_vals
|
25
|
+
sizes = queue_size == :unset ? N_COUNTS : [Integer(queue_size)]
|
26
|
+
sizes.each do |size|
|
27
|
+
profile_all(size, iterations)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def profile_all(queue_size, iterations, io: $stdout)
|
32
|
+
io.puts <<~TEXT
|
33
|
+
########################################################################
|
34
|
+
# Profile w/ N=#{queue_size} (i=#{iterations})
|
35
|
+
# (n.b. RubyProf & tracepoint can change relative performance.
|
36
|
+
# A sampling profiler can provide more accurate relative metrics.
|
37
|
+
########################################################################
|
38
|
+
|
39
|
+
TEXT
|
40
|
+
DHeap::Benchmarks::IMPLEMENTATIONS.each do |impl|
|
41
|
+
profile_one(impl, queue_size, iterations, io: io)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
# TODO: move somewhere else...
|
46
|
+
def skip_profiling?(queue_size, impl)
|
47
|
+
impl.klass == DHeap::Benchmarks::Sorting && 10_000 < queue_size
|
48
|
+
end
|
49
|
+
|
50
|
+
def profile_one(impl, queue_size, iterations, io: $stdout)
|
51
|
+
return if skip_profiling?(queue_size, impl)
|
52
|
+
io.puts "Filling #{impl.name} ---------------------------"
|
53
|
+
queue = impl.klass.new
|
54
|
+
push_n(queue, queue_size)
|
55
|
+
io.puts "Profiling #{impl.name} ---------------------------"
|
56
|
+
profiling do
|
57
|
+
repeated_push_pop(queue, iterations)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def profiling(io: $stdout, &block)
|
62
|
+
# do the thing
|
63
|
+
result = RubyProf.profile(&block)
|
64
|
+
# report_the_thing
|
65
|
+
printer = RubyProf::FlatPrinter.new(result)
|
66
|
+
printer.print($stdout, min_percent: 1.0)
|
67
|
+
io.puts
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,374 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "d_heap/benchmarks"
|
4
|
+
|
5
|
+
module DHeap::Benchmarks
|
6
|
+
|
7
|
+
# Profiles different implementations with different sizes
|
8
|
+
module RSpecMatchers # rubocop:disable Metrics/ModuleLength
|
9
|
+
extend RSpec::Matchers::DSL
|
10
|
+
|
11
|
+
# Assert ips (iterations per second):
|
12
|
+
#
|
13
|
+
# expect { ... }.to perform_at_least(1_000_000).ips
|
14
|
+
# .running_at_least(10).times # optional, defaults to 1
|
15
|
+
# .running_at_least(10).seconds # optional, defaults to 1s
|
16
|
+
# .running_at_most(10_000_000).times # optional, defaults to nil
|
17
|
+
# .running_at_most(2).seconds # optional, defaults to 2s
|
18
|
+
# .warmup_at_most(1000).times # optional, defaults to 1k
|
19
|
+
# .warmup_at_most(0.100).seconds # optional, defaults to 0.1s
|
20
|
+
# .iterations_per_round # optional, defaults to 1
|
21
|
+
# .and_at_least(1.1).times.faster_than { ... } # can also compare
|
22
|
+
#
|
23
|
+
# Assert comparison (and optionally runtime or ips):
|
24
|
+
#
|
25
|
+
# expect { ... }.to perform_at_least(2.5).times_faster_than { ... }
|
26
|
+
# .running_at_least(10).times # optional, defaults to 1
|
27
|
+
# .running_at_least(10).seconds # optional, defaults to 1s
|
28
|
+
# .running_at_most(10_000_000).times # optional, defaults to nil
|
29
|
+
# .running_at_most(2).seconds # optional, defaults to 2s
|
30
|
+
# .warmup_at_most(1000).times # optional, defaults to 1k
|
31
|
+
# .warmup_at_most(0.100).seconds # optional, defaults to 0.1s
|
32
|
+
# .iterations_per_call # optional, defaults to 1
|
33
|
+
# .and_at_least(100).ips { ... } # can also assert ips
|
34
|
+
#
|
35
|
+
# n.b: Given a known constant number of iterations, run time and ips are both
|
36
|
+
# measuring the same underlying metric.
|
37
|
+
#
|
38
|
+
# rubocop:disable Metrics/BlockLength, Layout/SpaceAroundOperators
|
39
|
+
matcher :perform_at_least do |expected|
|
40
|
+
supports_block_expectations
|
41
|
+
|
42
|
+
def __debug__(name, caller_binding)
|
43
|
+
lvars = __debug_lvars__(caller_binding)
|
44
|
+
ivars = __debug_ivars__(caller_binding)
|
45
|
+
puts "%s, locals => %p, ivars => %p" % [name, lvars, ivars]
|
46
|
+
end
|
47
|
+
|
48
|
+
def __debug_lvars__(caller_binding)
|
49
|
+
caller_binding.local_variables.map {|lvar|
|
50
|
+
next if %i[type unit].include?(lvar)
|
51
|
+
next if (val = caller_binding.local_variable_get(lvar)).nil?
|
52
|
+
[lvar, val]
|
53
|
+
}.compact.to_h
|
54
|
+
end
|
55
|
+
|
56
|
+
def __debug_ivars__(caller_binding)
|
57
|
+
instance_variables.map {|ivar|
|
58
|
+
next if %i[@name @actual @expected_as_array @matcher_execution_context
|
59
|
+
@chained_method_clauses @block_arg]
|
60
|
+
.include?(ivar)
|
61
|
+
next if (val = instance_variable_get(ivar)).nil?
|
62
|
+
[ivar, val]
|
63
|
+
}.compact.to_h
|
64
|
+
end
|
65
|
+
|
66
|
+
%i[
|
67
|
+
is_at_least
|
68
|
+
running_at_most
|
69
|
+
running_at_least
|
70
|
+
warmup_at_most
|
71
|
+
].each do |type|
|
72
|
+
chain type do |number|
|
73
|
+
# __debug__ "%s(%p)" % [type, number], binding
|
74
|
+
reason, value = ___number_reason_and_value___
|
75
|
+
if reason || value
|
76
|
+
raise "Need to handle unit-less number first: %s(%p)" % [reason, value]
|
77
|
+
end
|
78
|
+
@number_for = type
|
79
|
+
@number_val = number
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
alias_method :and_at_least, :is_at_least
|
84
|
+
|
85
|
+
%i[
|
86
|
+
times
|
87
|
+
seconds
|
88
|
+
milliseconds
|
89
|
+
].each do |unit|
|
90
|
+
chain unit do
|
91
|
+
# __debug__ unit, binding
|
92
|
+
reason, value = ___number_reason_and_value___
|
93
|
+
raise "No number was specified" unless reason && value
|
94
|
+
case reason
|
95
|
+
when :running_at_most; apply_max_run unit
|
96
|
+
when :running_at_least; apply_min_run unit
|
97
|
+
when :warmup_at_most; apply_warmup unit
|
98
|
+
else raise "%s is incompatible with %s(%p)" % [unit, reason, value]
|
99
|
+
end
|
100
|
+
@number_for = @number_val = nil
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
# TODO: let IPS set time to run instead of iterations to run
|
105
|
+
chain :ips do
|
106
|
+
# __debug__ "ips", binding
|
107
|
+
reason, value = ___number_reason_and_value___
|
108
|
+
raise "'ips' unit is only for assertions" unless reason == :is_at_least
|
109
|
+
raise "Already asserting %s ips" % [@expect_ips] if @expect_ips
|
110
|
+
raise "'ips' assertion has already been made" if @expect_ips
|
111
|
+
raise "Unknown assertion count" unless value
|
112
|
+
@expect_ips = Integer(value)
|
113
|
+
@number_for = @number_val = nil
|
114
|
+
end
|
115
|
+
|
116
|
+
# need to use method because "chain" can't take a block
|
117
|
+
def times_faster_than(&other)
|
118
|
+
# __debug__ "times_faster_than"
|
119
|
+
reason, value = ___number_reason_and_value___
|
120
|
+
raise "'times_faster_than' is only for assertions" unless reason == :is_at_least
|
121
|
+
raise "Already asserting %sx comparison" % [@expect_cmp] if @expect_cmp
|
122
|
+
raise ArgumentError, "must provide a proc" unless other
|
123
|
+
@expect_cmp = Float(value)
|
124
|
+
@cmp_proc = other
|
125
|
+
@number_for = @number_val = nil
|
126
|
+
self
|
127
|
+
end
|
128
|
+
|
129
|
+
chain :loudly do @volume = :loud end
|
130
|
+
chain :quietly do @volume = :quiet end
|
131
|
+
chain :volume do |volume|
|
132
|
+
raise "Invalid volume" unless %i[loud quiet].include?(volume)
|
133
|
+
@volume = volume
|
134
|
+
end
|
135
|
+
|
136
|
+
chain :iterations_per_round do |iterations|
|
137
|
+
if @iterations_per_round
|
138
|
+
raise "Already set iterations per round (%p)" [@iterations_per_round]
|
139
|
+
end
|
140
|
+
@iterations_per_round = Integer(iterations)
|
141
|
+
end
|
142
|
+
|
143
|
+
match do |actual|
|
144
|
+
require "benchmark"
|
145
|
+
raise "Need to expect a proc or block" unless actual.respond_to?(:to_proc)
|
146
|
+
raise "Need a performance assertion" unless assertion?
|
147
|
+
@actual_proc = actual
|
148
|
+
prepare_for_measurement
|
149
|
+
if @max_iter && (@max_iter % @iterations_per_round) != 0
|
150
|
+
raise "Iterations per round (%p) must divide evenly by max iterations (%p)" % [
|
151
|
+
@iterations_per_round, @max_iter,
|
152
|
+
]
|
153
|
+
end
|
154
|
+
run_measurements
|
155
|
+
cmp_okay? && ips_okay?
|
156
|
+
end
|
157
|
+
|
158
|
+
description do
|
159
|
+
[
|
160
|
+
@expect_cmp && cmp_okay_msg,
|
161
|
+
@expect_ips && ips_okay_msg,
|
162
|
+
].join(", and ")
|
163
|
+
end
|
164
|
+
|
165
|
+
failure_message do
|
166
|
+
[
|
167
|
+
cmp_okay? ? nil : "expected to #{cmp_okay_msg} but #{cmp_fail_msg}", # =>
|
168
|
+
ips_okay? ? nil : "expected to #{ips_okay_msg} but #{ips_fail_msg}",
|
169
|
+
].compact.join(", and ")
|
170
|
+
end
|
171
|
+
|
172
|
+
private
|
173
|
+
|
174
|
+
chain :__convert_expected_to_ivars__ do
|
175
|
+
@number_val ||= expected
|
176
|
+
@number_for ||= :is_at_least if @number_val
|
177
|
+
# __debug__ "__convert_expected_to_ivars__", binding
|
178
|
+
expected = nil
|
179
|
+
end
|
180
|
+
private :__convert_expected_to_ivars__
|
181
|
+
|
182
|
+
def ___number_reason_and_value___
|
183
|
+
__convert_expected_to_ivars__
|
184
|
+
[@number_for, @number_val]
|
185
|
+
end
|
186
|
+
|
187
|
+
def apply_min_run(unit)
|
188
|
+
case unit
|
189
|
+
when :seconds; @min_time = Float(@number_val)
|
190
|
+
when :milliseconds; @min_time = Float(@number_val) / 1000.0
|
191
|
+
when :times; @min_iter = Integer(@number_val)
|
192
|
+
else raise "Invalid unit %s for %s(%p)" % [unit, @number_for, @number_val]
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
def apply_max_run(unit)
|
197
|
+
case unit
|
198
|
+
when :seconds; @max_time = Float(@number_val)
|
199
|
+
when :milliseconds; @max_time = Float(@number_val) / 1000.0
|
200
|
+
when :times; @max_iter = Integer(@number_val)
|
201
|
+
else raise "Invalid unit %s for %s(%p)" % [unit, @number_for, @number_val]
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
def apply_warmup(unit)
|
206
|
+
case unit
|
207
|
+
when :seconds; @warmup_time = Float(@number_val)
|
208
|
+
when :milliseconds; @warmup_time = Float(@number_val) / 1000.0
|
209
|
+
when :times; @warmup_iter = Integer(@number_val)
|
210
|
+
else raise "Invalid unit %s for %s(%p)" % [unit, @number_for, @number_val]
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
def prepare_for_measurement
|
215
|
+
@volume ||= ENV.fetch("RSPEC_BENCHMARK_VOLUME", :quiet).to_sym
|
216
|
+
@max_time ||= 2
|
217
|
+
@min_time ||= 1
|
218
|
+
@min_iter ||= 1
|
219
|
+
@warmup_time ||= 0.100
|
220
|
+
@warmup_iter ||= 1000
|
221
|
+
@iterations_per_round ||= 1
|
222
|
+
nil
|
223
|
+
end
|
224
|
+
|
225
|
+
def run_measurements
|
226
|
+
puts header if loud?
|
227
|
+
# __debug__ "run_measurements", binding
|
228
|
+
warmup
|
229
|
+
take_measurements
|
230
|
+
end
|
231
|
+
|
232
|
+
def header
|
233
|
+
max_rounds = @max_iter && @max_iter / @iterations_per_round
|
234
|
+
[
|
235
|
+
"Warmup time %s, or iterations: %s" % [@min_iter, @max_iter],
|
236
|
+
"Benchmark time (%s..%s) or iterations (%s..%s), max rounds: %p" % [
|
237
|
+
@min_time, @max_time, @min_iter, @max_iter, max_rounds,
|
238
|
+
],
|
239
|
+
"%-10s %s" % ["", Benchmark::CAPTION],
|
240
|
+
].join("\n")
|
241
|
+
end
|
242
|
+
|
243
|
+
def warmup
|
244
|
+
return unless 0 < @warmup_time && 0 < @warmup_iter # rubocop:disable Style/NumericPredicate
|
245
|
+
args = [@warmup_iter, 0, @warmup_time, 1, @warmup_iter]
|
246
|
+
measure("warmup", *args, &@actual_proc)
|
247
|
+
measure("warmup cmp", *args, &@cmp_proc) if @cmp_proc
|
248
|
+
end
|
249
|
+
|
250
|
+
def take_measurements
|
251
|
+
args = [@iterations_per_round, @min_time, @max_time, @min_iter, @max_iter]
|
252
|
+
@actual_tms = measure("actual", *args, &@actual_proc)
|
253
|
+
@cmp_tms = measure("cmp", *args, &@cmp_proc) if @cmp_proc
|
254
|
+
return unless @cmp_proc
|
255
|
+
# how many times faster?
|
256
|
+
@actual_cmp = @actual_tms.ips_real / @cmp_tms.ips_real
|
257
|
+
puts "Ran %0.3fx as fast as comparison" % [@actual_cmp] if loud?
|
258
|
+
end
|
259
|
+
|
260
|
+
def loud?; @volume == :loud end
|
261
|
+
|
262
|
+
def assertion?; !!(@expect_cmp || @expect_ips) end
|
263
|
+
|
264
|
+
def cmp_okay?; !@expect_cmp || @expect_cmp < @actual_cmp end
|
265
|
+
def ips_okay?; !@expect_tms || @expect_tms.ips < @actual_tms.ips end
|
266
|
+
|
267
|
+
def measure(name, ipr, *args)
|
268
|
+
measurements = TmsMeasurements.new(name, ipr, *args)
|
269
|
+
measurements.max_rounds.times do
|
270
|
+
# GC.start(full_mark: true, immediate_sweep: true)
|
271
|
+
# GC.compact
|
272
|
+
measurements << Benchmark.measure do
|
273
|
+
yield ipr
|
274
|
+
end
|
275
|
+
# p measurements.real
|
276
|
+
break if measurements.max_time < measurements.real
|
277
|
+
end
|
278
|
+
log_measurement(name, measurements)
|
279
|
+
measurements
|
280
|
+
end
|
281
|
+
|
282
|
+
# rubocop:disable Metrics/AbcSize
|
283
|
+
def units_str(num)
|
284
|
+
if num >= 10**12; "%7.3fT" % [num.to_f / 10**12]
|
285
|
+
elsif num >= 10** 9; "%7.3fB" % [num.to_f / 10** 9]
|
286
|
+
elsif num >= 10** 6; "%7.3fM" % [num.to_f / 10** 6]
|
287
|
+
elsif num >= 10** 3; "%7.3fk" % [num.to_f / 10** 3]
|
288
|
+
else "%7.3f" % [num.to_f]
|
289
|
+
end
|
290
|
+
end
|
291
|
+
# rubocop:enable Metrics/AbcSize
|
292
|
+
|
293
|
+
def log_measurement(name, measurements)
|
294
|
+
return unless loud?
|
295
|
+
puts "%-10s %s => %s ips (%d rounds)" % [
|
296
|
+
name,
|
297
|
+
measurements.tms.to_s.rstrip,
|
298
|
+
units_str(measurements.ips_real),
|
299
|
+
measurements.size,
|
300
|
+
]
|
301
|
+
end
|
302
|
+
|
303
|
+
def cmp_okay_msg; "run %0.2fx faster" % [@expect_cmp] end
|
304
|
+
def cmp_fail_msg; "was only %0.2fx as fast" % [@actual_cmp] end
|
305
|
+
def ips_okay_msg; "run with %s ips" % [units_str(@expect_ips)] end
|
306
|
+
def ips_fail_msg; "was only %s ips" % [units_str(@actual_ips)] end
|
307
|
+
|
308
|
+
end
|
309
|
+
# rubocop:enable Metrics/BlockLength, Layout/SpaceAroundOperators
|
310
|
+
|
311
|
+
alias_matcher :perform_with, :perform
|
312
|
+
|
313
|
+
end
|
314
|
+
|
315
|
+
# Replicates a subset of the functionality in benchmark-ips
|
316
|
+
#
|
317
|
+
# TODO: merge this with benchmark-ips
|
318
|
+
# TODO: implement (or remove) min_time, min_iter
|
319
|
+
class TmsMeasurements
|
320
|
+
attr_reader :iterations_per_entry
|
321
|
+
attr_reader :iterations
|
322
|
+
|
323
|
+
attr_reader :min_time
|
324
|
+
attr_reader :max_time
|
325
|
+
|
326
|
+
attr_reader :min_iter
|
327
|
+
attr_reader :max_iter
|
328
|
+
|
329
|
+
def initialize(name, ipe, min_time, max_time, min_iter, max_iter) # rubocop:disable Metrics/ParameterLists
|
330
|
+
@name = name
|
331
|
+
@iterations_per_entry = Integer(ipe)
|
332
|
+
@min_time = Float(min_time)
|
333
|
+
@max_time = Float(max_time)
|
334
|
+
@min_iter = Integer(min_iter)
|
335
|
+
@max_iter = Integer(max_iter)
|
336
|
+
@entries = []
|
337
|
+
@sum = Benchmark::Tms.new
|
338
|
+
@iterations = 0
|
339
|
+
end
|
340
|
+
|
341
|
+
def size; entries.size end
|
342
|
+
|
343
|
+
def <<(tms)
|
344
|
+
raise TypeError, "not a #{Benchmark::Tms}" unless tms.is_a?(Benchmark::Tms)
|
345
|
+
raise IndexError, "full" if @max_iter <= size
|
346
|
+
@sum += tms
|
347
|
+
@iterations += @iterations_per_entry
|
348
|
+
@entries << tms
|
349
|
+
self
|
350
|
+
end
|
351
|
+
|
352
|
+
def sum; @sum.dup end
|
353
|
+
alias tms sum
|
354
|
+
|
355
|
+
def entries; @entries.dup end
|
356
|
+
|
357
|
+
def cstime; @sum.cstime end
|
358
|
+
def cutime; @sum.cutime end
|
359
|
+
def real; @sum.real end
|
360
|
+
def stime; @sum.stime end
|
361
|
+
def total; @sum.total end
|
362
|
+
def utime; @sum.utime end
|
363
|
+
|
364
|
+
def ips_real; @iterations / real end
|
365
|
+
def ips_total; @iterations / total end
|
366
|
+
def ips_utime; @iterations / utime end
|
367
|
+
|
368
|
+
def max_rounds
|
369
|
+
@max_iter && @max_iter / @iterations_per_entry
|
370
|
+
end
|
371
|
+
|
372
|
+
end
|
373
|
+
|
374
|
+
end
|