rperf 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +149 -0
- data/docs/help.md +291 -0
- data/exe/rperf +207 -0
- data/ext/rperf/extconf.rb +6 -0
- data/ext/rperf/rperf.c +834 -0
- data/lib/rperf/version.rb +3 -0
- data/lib/rperf.rb +650 -0
- metadata +77 -0
data/lib/rperf.rb
ADDED
|
@@ -0,0 +1,650 @@
|
|
|
1
|
+
require "rperf/version"
|
|
2
|
+
require "zlib"
|
|
3
|
+
require "stringio"
|
|
4
|
+
|
|
5
|
+
begin
|
|
6
|
+
# gem install
|
|
7
|
+
require "rperf.so"
|
|
8
|
+
rescue LoadError
|
|
9
|
+
# local development
|
|
10
|
+
require 'rbconfig'
|
|
11
|
+
require_relative "../tmp/#{RbConfig::CONFIG['arch']}/rperf/#{RbConfig::CONFIG['RUBY_PROGRAM_VERSION']}/rperf.so"
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
module Rperf
|
|
15
|
+
|
|
16
|
+
@verbose = false
|
|
17
|
+
@output = nil
|
|
18
|
+
@stat = false
|
|
19
|
+
@stat_start_mono = nil
|
|
20
|
+
STAT_TOP_N = 5
|
|
21
|
+
SYNTHETIC_LABELS = %w[[GVL\ blocked] [GVL\ wait] [GC\ marking] [GC\ sweeping]].freeze
|
|
22
|
+
|
|
23
|
+
# Starts profiling.
|
|
24
|
+
# format: :pprof, :collapsed, or :text. nil = auto-detect from output extension
|
|
25
|
+
# .collapsed → collapsed stacks (FlameGraph / speedscope compatible)
|
|
26
|
+
# .txt → text report (human/AI readable flat + cumulative table)
|
|
27
|
+
# otherwise (.pb.gz etc) → pprof protobuf (gzip compressed)
|
|
28
|
+
def self.start(frequency: 1000, mode: :cpu, output: nil, verbose: false, format: nil, stat: false, signal: nil)
|
|
29
|
+
@verbose = verbose || ENV["RPERF_VERBOSE"] == "1"
|
|
30
|
+
@output = output
|
|
31
|
+
@format = format
|
|
32
|
+
@stat = stat
|
|
33
|
+
@stat_start_mono = Process.clock_gettime(Process::CLOCK_MONOTONIC) if @stat
|
|
34
|
+
c_opts = { frequency: frequency, mode: mode }
|
|
35
|
+
c_opts[:signal] = signal unless signal.nil?
|
|
36
|
+
_c_start(**c_opts)
|
|
37
|
+
|
|
38
|
+
if block_given?
|
|
39
|
+
begin
|
|
40
|
+
yield
|
|
41
|
+
ensure
|
|
42
|
+
return stop
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def self.stop
|
|
48
|
+
data = _c_stop
|
|
49
|
+
return unless data
|
|
50
|
+
|
|
51
|
+
print_stats(data) if @verbose
|
|
52
|
+
print_stat(data) if @stat
|
|
53
|
+
|
|
54
|
+
if @output
|
|
55
|
+
write_data(@output, data, @format)
|
|
56
|
+
@output = nil
|
|
57
|
+
@format = nil
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
data
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Saves profiling data to a file.
|
|
64
|
+
# format: :pprof, :collapsed, or :text. nil = auto-detect from path extension
|
|
65
|
+
# .collapsed → collapsed stacks (FlameGraph / speedscope compatible)
|
|
66
|
+
# .txt → text report (human/AI readable flat + cumulative table)
|
|
67
|
+
# otherwise (.pb.gz etc) → pprof protobuf (gzip compressed)
|
|
68
|
+
def self.save(path, data, format: nil)
|
|
69
|
+
write_data(path, data, format)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def self.write_data(path, data, format)
|
|
73
|
+
fmt = detect_format(path, format)
|
|
74
|
+
case fmt
|
|
75
|
+
when :collapsed
|
|
76
|
+
File.write(path, Collapsed.encode(data))
|
|
77
|
+
when :text
|
|
78
|
+
File.write(path, Text.encode(data))
|
|
79
|
+
else
|
|
80
|
+
File.binwrite(path, gzip(PProf.encode(data)))
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
private_class_method :write_data
|
|
84
|
+
|
|
85
|
+
def self.detect_format(path, format)
|
|
86
|
+
return format.to_sym if format
|
|
87
|
+
case path.to_s
|
|
88
|
+
when /\.collapsed\z/ then :collapsed
|
|
89
|
+
when /\.txt\z/ then :text
|
|
90
|
+
else :pprof
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
private_class_method :detect_format
|
|
94
|
+
|
|
95
|
+
def self.gzip(data)
|
|
96
|
+
io = StringIO.new
|
|
97
|
+
io.set_encoding("ASCII-8BIT")
|
|
98
|
+
gz = Zlib::GzipWriter.new(io)
|
|
99
|
+
gz.write(data)
|
|
100
|
+
gz.close
|
|
101
|
+
io.string
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def self.print_stats(data)
|
|
105
|
+
count = data[:sampling_count] || 0
|
|
106
|
+
total_ns = data[:sampling_time_ns] || 0
|
|
107
|
+
samples = data[:samples]&.size || 0
|
|
108
|
+
mode = data[:mode] || :cpu
|
|
109
|
+
frequency = data[:frequency] || 0
|
|
110
|
+
|
|
111
|
+
total_ms = total_ns / 1_000_000.0
|
|
112
|
+
avg_us = count > 0 ? total_ns / count / 1000.0 : 0.0
|
|
113
|
+
|
|
114
|
+
$stderr.puts "[rperf] mode=#{mode} frequency=#{frequency}Hz"
|
|
115
|
+
$stderr.puts "[rperf] sampling: #{count} calls, #{format("%.2f", total_ms)}ms total, #{format("%.1f", avg_us)}us/call avg"
|
|
116
|
+
$stderr.puts "[rperf] samples recorded: #{samples}"
|
|
117
|
+
|
|
118
|
+
print_top(data)
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
TOP_N = 10
|
|
122
|
+
|
|
123
|
+
# Compute flat and cumulative weight tables from raw samples.
|
|
124
|
+
# Returns { flat: Hash, cum: Hash, total_weight: Integer }
|
|
125
|
+
def self.compute_flat_cum(samples_raw)
|
|
126
|
+
flat = Hash.new(0)
|
|
127
|
+
cum = Hash.new(0)
|
|
128
|
+
total_weight = 0
|
|
129
|
+
|
|
130
|
+
samples_raw.each do |frames, weight|
|
|
131
|
+
total_weight += weight
|
|
132
|
+
seen = {}
|
|
133
|
+
|
|
134
|
+
frames.each_with_index do |frame, i|
|
|
135
|
+
path, label = frame
|
|
136
|
+
key = [label, path]
|
|
137
|
+
|
|
138
|
+
flat[key] += weight if i == 0 # leaf = first element (deepest frame)
|
|
139
|
+
|
|
140
|
+
unless seen[key]
|
|
141
|
+
cum[key] += weight
|
|
142
|
+
seen[key] = true
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
{ flat: flat, cum: cum, total_weight: total_weight }
|
|
148
|
+
end
|
|
149
|
+
private_class_method :compute_flat_cum
|
|
150
|
+
|
|
151
|
+
# Samples from C are now [[path_str, label_str], ...], weight]
|
|
152
|
+
def self.print_top(data)
|
|
153
|
+
samples_raw = data[:samples]
|
|
154
|
+
return if !samples_raw || samples_raw.empty?
|
|
155
|
+
|
|
156
|
+
result = compute_flat_cum(samples_raw)
|
|
157
|
+
return if result[:cum].empty?
|
|
158
|
+
|
|
159
|
+
print_top_table("flat", result[:flat], result[:total_weight])
|
|
160
|
+
print_top_table("cum", result[:cum], result[:total_weight])
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def self.print_top_table(kind, table, total_weight)
|
|
164
|
+
top = table.sort_by { |_, w| -w }.first(TOP_N)
|
|
165
|
+
$stderr.puts "[rperf] top #{top.size} by #{kind}:"
|
|
166
|
+
top.each do |key, weight|
|
|
167
|
+
label, path = key
|
|
168
|
+
ms = weight / 1_000_000.0
|
|
169
|
+
pct = total_weight > 0 ? weight * 100.0 / total_weight : 0.0
|
|
170
|
+
loc = path.empty? ? "" : " (#{path})"
|
|
171
|
+
$stderr.puts format("[rperf] %8.1fms %5.1f%% %s%s", ms, pct, label, loc)
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
# Column formatters for stat output
|
|
176
|
+
STAT_PCT_LINE = ->(val, unit, pct, label) {
|
|
177
|
+
format(" %14s %-2s %5.1f%% %s", val, unit, pct, label)
|
|
178
|
+
}
|
|
179
|
+
STAT_LINE = ->(val, unit, label) {
|
|
180
|
+
format(" %14s %-2s %s", val, unit, label)
|
|
181
|
+
}
|
|
182
|
+
private_constant :STAT_PCT_LINE, :STAT_LINE
|
|
183
|
+
|
|
184
|
+
def self.print_stat(data)
|
|
185
|
+
samples_raw = data[:samples] || []
|
|
186
|
+
real_ns = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - @stat_start_mono) * 1_000_000_000).to_i
|
|
187
|
+
times = Process.times
|
|
188
|
+
user_ns = (times.utime * 1_000_000_000).to_i
|
|
189
|
+
sys_ns = (times.stime * 1_000_000_000).to_i
|
|
190
|
+
|
|
191
|
+
command = ENV["RPERF_STAT_COMMAND"] || "(unknown)"
|
|
192
|
+
|
|
193
|
+
$stderr.puts
|
|
194
|
+
$stderr.puts " Performance stats for '#{command}':"
|
|
195
|
+
$stderr.puts
|
|
196
|
+
$stderr.puts format(" %14s ms user", format_ms(user_ns))
|
|
197
|
+
$stderr.puts format(" %14s ms sys", format_ms(sys_ns))
|
|
198
|
+
$stderr.puts format(" %14s ms real", format_ms(real_ns))
|
|
199
|
+
|
|
200
|
+
if samples_raw.size > 0
|
|
201
|
+
breakdown, total_weight = compute_stat_breakdown(samples_raw)
|
|
202
|
+
print_stat_breakdown(breakdown, total_weight)
|
|
203
|
+
print_stat_runtime_info
|
|
204
|
+
print_stat_system_info
|
|
205
|
+
print_stat_top(samples_raw, total_weight)
|
|
206
|
+
print_stat_footer(samples_raw, real_ns, data)
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
$stderr.puts
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
def self.compute_stat_breakdown(samples_raw)
|
|
213
|
+
breakdown = Hash.new(0)
|
|
214
|
+
total_weight = 0
|
|
215
|
+
|
|
216
|
+
samples_raw.each do |frames, weight|
|
|
217
|
+
total_weight += weight
|
|
218
|
+
leaf_label = frames.first&.last || ""
|
|
219
|
+
category = case leaf_label
|
|
220
|
+
when "[GVL blocked]" then :gvl_blocked
|
|
221
|
+
when "[GVL wait]" then :gvl_wait
|
|
222
|
+
when "[GC marking]" then :gc_marking
|
|
223
|
+
when "[GC sweeping]" then :gc_sweeping
|
|
224
|
+
else :cpu_execution
|
|
225
|
+
end
|
|
226
|
+
breakdown[category] += weight
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
[breakdown, total_weight]
|
|
230
|
+
end
|
|
231
|
+
private_class_method :compute_stat_breakdown
|
|
232
|
+
|
|
233
|
+
def self.print_stat_breakdown(breakdown, total_weight)
|
|
234
|
+
$stderr.puts
|
|
235
|
+
|
|
236
|
+
[
|
|
237
|
+
[:cpu_execution, "CPU execution"],
|
|
238
|
+
[:gvl_blocked, "[Ruby] GVL blocked (I/O, sleep)"],
|
|
239
|
+
[:gvl_wait, "[Ruby] GVL wait (contention)"],
|
|
240
|
+
[:gc_marking, "[Ruby] GC marking"],
|
|
241
|
+
[:gc_sweeping, "[Ruby] GC sweeping"],
|
|
242
|
+
].each do |key, label|
|
|
243
|
+
w = breakdown[key]
|
|
244
|
+
next if w == 0
|
|
245
|
+
pct = total_weight > 0 ? w * 100.0 / total_weight : 0.0
|
|
246
|
+
$stderr.puts STAT_PCT_LINE.call(format_ms(w), "ms", pct, label)
|
|
247
|
+
end
|
|
248
|
+
end
|
|
249
|
+
private_class_method :print_stat_breakdown
|
|
250
|
+
|
|
251
|
+
def self.print_stat_runtime_info
|
|
252
|
+
gc = GC.stat
|
|
253
|
+
$stderr.puts STAT_LINE.call(format_ms(gc[:time] * 1_000_000), "ms",
|
|
254
|
+
"[Ruby] GC time (%s count: %s minor, %s major)" % [
|
|
255
|
+
format_integer(gc[:count]),
|
|
256
|
+
format_integer(gc[:minor_gc_count]),
|
|
257
|
+
format_integer(gc[:major_gc_count])])
|
|
258
|
+
$stderr.puts STAT_LINE.call(format_integer(gc[:total_allocated_objects]), " ", "[Ruby] allocated objects")
|
|
259
|
+
$stderr.puts STAT_LINE.call(format_integer(gc[:total_freed_objects]), " ", "[Ruby] freed objects")
|
|
260
|
+
if defined?(RubyVM::YJIT) && RubyVM::YJIT.enabled?
|
|
261
|
+
yjit = RubyVM::YJIT.runtime_stats
|
|
262
|
+
if yjit[:ratio_in_yjit]
|
|
263
|
+
$stderr.puts STAT_LINE.call(format("%.1f%%", yjit[:ratio_in_yjit] * 100), " ", "[Ruby] YJIT code execution ratio")
|
|
264
|
+
end
|
|
265
|
+
end
|
|
266
|
+
end
|
|
267
|
+
private_class_method :print_stat_runtime_info
|
|
268
|
+
|
|
269
|
+
def self.print_stat_system_info
|
|
270
|
+
sys_stats = get_system_stats
|
|
271
|
+
maxrss_kb = sys_stats[:maxrss_kb]
|
|
272
|
+
if maxrss_kb
|
|
273
|
+
$stderr.puts STAT_LINE.call(format_integer((maxrss_kb / 1024.0).round), "MB", "[OS] peak memory (maxrss)")
|
|
274
|
+
end
|
|
275
|
+
if sys_stats[:ctx_voluntary]
|
|
276
|
+
$stderr.puts STAT_LINE.call(
|
|
277
|
+
format_integer(sys_stats[:ctx_voluntary] + sys_stats[:ctx_involuntary]), " ",
|
|
278
|
+
"[OS] context switches (%s voluntary, %s involuntary)" % [
|
|
279
|
+
format_integer(sys_stats[:ctx_voluntary]),
|
|
280
|
+
format_integer(sys_stats[:ctx_involuntary])])
|
|
281
|
+
end
|
|
282
|
+
if sys_stats[:io_read_bytes]
|
|
283
|
+
r = sys_stats[:io_read_bytes]
|
|
284
|
+
w = sys_stats[:io_write_bytes]
|
|
285
|
+
$stderr.puts STAT_LINE.call(
|
|
286
|
+
format_integer(((r + w) / 1024.0 / 1024.0).round), "MB",
|
|
287
|
+
"[OS] disk I/O (%s MB read, %s MB write)" % [
|
|
288
|
+
format_integer((r / 1024.0 / 1024.0).round),
|
|
289
|
+
format_integer((w / 1024.0 / 1024.0).round)])
|
|
290
|
+
end
|
|
291
|
+
end
|
|
292
|
+
private_class_method :print_stat_system_info
|
|
293
|
+
|
|
294
|
+
def self.print_stat_top(samples_raw, total_weight)
|
|
295
|
+
flat = Hash.new(0)
|
|
296
|
+
samples_raw.each do |frames, weight|
|
|
297
|
+
leaf = frames.first
|
|
298
|
+
if leaf
|
|
299
|
+
_, label = leaf
|
|
300
|
+
next if SYNTHETIC_LABELS.include?(label)
|
|
301
|
+
flat[[label, leaf[0]]] += weight
|
|
302
|
+
end
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
return if flat.empty?
|
|
306
|
+
|
|
307
|
+
top = flat.sort_by { |_, w| -w }.first(STAT_TOP_N)
|
|
308
|
+
$stderr.puts
|
|
309
|
+
$stderr.puts " Top #{top.size} by flat:"
|
|
310
|
+
top.each do |key, weight|
|
|
311
|
+
label, path = key
|
|
312
|
+
pct = total_weight > 0 ? weight * 100.0 / total_weight : 0.0
|
|
313
|
+
loc = path.empty? ? "" : " (#{path})"
|
|
314
|
+
$stderr.puts STAT_PCT_LINE.call(format_ms(weight), "ms", pct, "#{label}#{loc}")
|
|
315
|
+
end
|
|
316
|
+
end
|
|
317
|
+
private_class_method :print_stat_top
|
|
318
|
+
|
|
319
|
+
def self.print_stat_footer(samples_raw, real_ns, data)
|
|
320
|
+
unique_stacks = samples_raw.map { |frames, _| frames }.uniq.size
|
|
321
|
+
overhead_pct = real_ns > 0 ? (data[:sampling_time_ns] || 0) * 100.0 / real_ns : 0.0
|
|
322
|
+
$stderr.puts
|
|
323
|
+
$stderr.puts format(" %d samples (%d unique stacks), %.1f%% profiler overhead",
|
|
324
|
+
samples_raw.size, unique_stacks, overhead_pct)
|
|
325
|
+
end
|
|
326
|
+
private_class_method :print_stat_footer
|
|
327
|
+
|
|
328
|
+
def self.format_integer(n)
|
|
329
|
+
n.to_s.reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
|
|
330
|
+
end
|
|
331
|
+
private_class_method :format_integer
|
|
332
|
+
|
|
333
|
+
# Format nanoseconds as ms with 1 decimal place and comma-separated integer part.
|
|
334
|
+
# Example: 5_609_200_000 → "5,609.2"
|
|
335
|
+
def self.format_ms(ns)
|
|
336
|
+
ms = ns / 1_000_000.0
|
|
337
|
+
int_part = ms.truncate
|
|
338
|
+
frac = format(".%d", ((ms - int_part).abs * 10).round % 10)
|
|
339
|
+
int_str = int_part.to_s.reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
|
|
340
|
+
"#{int_str}#{frac}"
|
|
341
|
+
end
|
|
342
|
+
private_class_method :format_ms
|
|
343
|
+
|
|
344
|
+
# Collect system-level stats. Returns a hash; missing keys are omitted.
|
|
345
|
+
def self.get_system_stats
|
|
346
|
+
stats = {}
|
|
347
|
+
|
|
348
|
+
if File.readable?("/proc/self/status")
|
|
349
|
+
# Linux: parse /proc/self/status
|
|
350
|
+
File.read("/proc/self/status").each_line do |line|
|
|
351
|
+
case line
|
|
352
|
+
when /\AVmHWM:\s+(\d+)\s+kB/
|
|
353
|
+
stats[:maxrss_kb] = $1.to_i
|
|
354
|
+
when /\Avoluntary_ctxt_switches:\s+(\d+)/
|
|
355
|
+
stats[:ctx_voluntary] = $1.to_i
|
|
356
|
+
when /\Anonvoluntary_ctxt_switches:\s+(\d+)/
|
|
357
|
+
stats[:ctx_involuntary] = $1.to_i
|
|
358
|
+
end
|
|
359
|
+
end
|
|
360
|
+
else
|
|
361
|
+
# macOS/BSD: ps reports RSS in KB
|
|
362
|
+
rss = `ps -o rss= -p #{$$}`.strip.to_i rescue nil
|
|
363
|
+
stats[:maxrss_kb] = rss if rss && rss > 0
|
|
364
|
+
end
|
|
365
|
+
|
|
366
|
+
if File.readable?("/proc/self/io")
|
|
367
|
+
# Linux: parse /proc/self/io
|
|
368
|
+
File.read("/proc/self/io").each_line do |line|
|
|
369
|
+
case line
|
|
370
|
+
when /\Aread_bytes:\s+(\d+)/
|
|
371
|
+
stats[:io_read_bytes] = $1.to_i
|
|
372
|
+
when /\Awrite_bytes:\s+(\d+)/
|
|
373
|
+
stats[:io_write_bytes] = $1.to_i
|
|
374
|
+
end
|
|
375
|
+
end
|
|
376
|
+
end
|
|
377
|
+
|
|
378
|
+
stats
|
|
379
|
+
end
|
|
380
|
+
private_class_method :get_system_stats
|
|
381
|
+
|
|
382
|
+
# ENV-based auto-start for CLI usage
|
|
383
|
+
if ENV["RPERF_ENABLED"] == "1"
|
|
384
|
+
_rperf_mode_str = ENV["RPERF_MODE"] || "cpu"
|
|
385
|
+
unless %w[cpu wall].include?(_rperf_mode_str)
|
|
386
|
+
raise ArgumentError, "RPERF_MODE must be 'cpu' or 'wall', got: #{_rperf_mode_str.inspect}"
|
|
387
|
+
end
|
|
388
|
+
_rperf_mode = _rperf_mode_str == "wall" ? :wall : :cpu
|
|
389
|
+
_rperf_format = ENV["RPERF_FORMAT"] ? ENV["RPERF_FORMAT"].to_sym : nil
|
|
390
|
+
_rperf_stat = ENV["RPERF_STAT"] == "1"
|
|
391
|
+
_rperf_signal = case ENV["RPERF_SIGNAL"]
|
|
392
|
+
when nil then nil
|
|
393
|
+
when "false" then false
|
|
394
|
+
else ENV["RPERF_SIGNAL"].to_i
|
|
395
|
+
end
|
|
396
|
+
_rperf_start_opts = { frequency: (ENV["RPERF_FREQUENCY"] || 1000).to_i, mode: _rperf_mode,
|
|
397
|
+
output: _rperf_stat ? ENV["RPERF_OUTPUT"] : (ENV["RPERF_OUTPUT"] || "rperf.data"),
|
|
398
|
+
verbose: ENV["RPERF_VERBOSE"] == "1",
|
|
399
|
+
format: _rperf_format,
|
|
400
|
+
stat: _rperf_stat }
|
|
401
|
+
_rperf_start_opts[:signal] = _rperf_signal unless _rperf_signal.nil?
|
|
402
|
+
start(**_rperf_start_opts)
|
|
403
|
+
at_exit { stop }
|
|
404
|
+
end
|
|
405
|
+
|
|
406
|
+
# Text report encoder — human/AI readable flat + cumulative top-N table.
|
|
407
|
+
module Text
|
|
408
|
+
module_function
|
|
409
|
+
|
|
410
|
+
def encode(data, top_n: 50)
|
|
411
|
+
samples_raw = data[:samples]
|
|
412
|
+
mode = data[:mode] || :cpu
|
|
413
|
+
frequency = data[:frequency] || 0
|
|
414
|
+
|
|
415
|
+
return "No samples recorded.\n" if !samples_raw || samples_raw.empty?
|
|
416
|
+
|
|
417
|
+
result = Rperf.send(:compute_flat_cum, samples_raw)
|
|
418
|
+
|
|
419
|
+
out = String.new
|
|
420
|
+
total_ms = result[:total_weight] / 1_000_000.0
|
|
421
|
+
out << "Total: #{"%.1f" % total_ms}ms (#{mode})\n"
|
|
422
|
+
out << "Samples: #{samples_raw.size}, Frequency: #{frequency}Hz\n"
|
|
423
|
+
out << "\n"
|
|
424
|
+
out << format_table("Flat", result[:flat], result[:total_weight], top_n)
|
|
425
|
+
out << "\n"
|
|
426
|
+
out << format_table("Cumulative", result[:cum], result[:total_weight], top_n)
|
|
427
|
+
out
|
|
428
|
+
end
|
|
429
|
+
|
|
430
|
+
def format_table(title, table, total_weight, top_n)
|
|
431
|
+
sorted = table.sort_by { |_, w| -w }.first(top_n)
|
|
432
|
+
out = String.new
|
|
433
|
+
out << "#{title}:\n"
|
|
434
|
+
sorted.each do |key, weight|
|
|
435
|
+
label, path = key
|
|
436
|
+
ms = weight / 1_000_000.0
|
|
437
|
+
pct = total_weight > 0 ? weight * 100.0 / total_weight : 0.0
|
|
438
|
+
loc = path.empty? ? "" : " (#{path})"
|
|
439
|
+
out << (" %8.1fms %5.1f%% %s%s\n" % [ms, pct, label, loc])
|
|
440
|
+
end
|
|
441
|
+
out
|
|
442
|
+
end
|
|
443
|
+
end
|
|
444
|
+
|
|
445
|
+
# Collapsed stacks encoder for FlameGraph / speedscope.
|
|
446
|
+
# Output: one line per unique stack, "frame1;frame2;...;leafN weight\n"
|
|
447
|
+
module Collapsed
|
|
448
|
+
module_function
|
|
449
|
+
|
|
450
|
+
def encode(data)
|
|
451
|
+
merged = Hash.new(0)
|
|
452
|
+
data[:samples].each do |frames, weight|
|
|
453
|
+
key = frames.reverse.map { |_, label| label }.join(";")
|
|
454
|
+
merged[key] += weight
|
|
455
|
+
end
|
|
456
|
+
merged.map { |stack, weight| "#{stack} #{weight}" }.join("\n") + "\n"
|
|
457
|
+
end
|
|
458
|
+
end
|
|
459
|
+
|
|
460
|
+
# Hand-written protobuf encoder for pprof profile format.
|
|
461
|
+
# Only runs once at stop time, so performance is not critical.
|
|
462
|
+
#
|
|
463
|
+
# Samples from C are: [[[path_str, label_str], ...], weight]
|
|
464
|
+
# This encoder builds its own string table for pprof output.
|
|
465
|
+
module PProf
|
|
466
|
+
module_function
|
|
467
|
+
|
|
468
|
+
def encode(data)
|
|
469
|
+
samples_raw = data[:samples]
|
|
470
|
+
frequency = data[:frequency]
|
|
471
|
+
interval_ns = 1_000_000_000 / frequency
|
|
472
|
+
mode = data[:mode] || :cpu
|
|
473
|
+
|
|
474
|
+
# Build string table: index 0 must be ""
|
|
475
|
+
string_table = [""]
|
|
476
|
+
string_index = { "" => 0 }
|
|
477
|
+
|
|
478
|
+
intern = ->(s) {
|
|
479
|
+
string_index[s] ||= begin
|
|
480
|
+
idx = string_table.size
|
|
481
|
+
string_table << s
|
|
482
|
+
idx
|
|
483
|
+
end
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
# Convert string frames to index frames and merge identical stacks per thread
|
|
487
|
+
merged = Hash.new(0)
|
|
488
|
+
thread_seq_key = intern.("thread_seq")
|
|
489
|
+
samples_raw.each do |frames, weight, thread_seq|
|
|
490
|
+
key = [frames.map { |path, label| [intern.(path), intern.(label)] }, thread_seq || 0]
|
|
491
|
+
merged[key] += weight
|
|
492
|
+
end
|
|
493
|
+
merged = merged.to_a
|
|
494
|
+
|
|
495
|
+
# Build location/function tables
|
|
496
|
+
locations, functions = build_tables(merged.map { |(frames, _), w| [frames, w] })
|
|
497
|
+
|
|
498
|
+
# Intern type label and unit
|
|
499
|
+
type_label = mode == :wall ? "wall" : "cpu"
|
|
500
|
+
type_idx = intern.(type_label)
|
|
501
|
+
ns_idx = intern.("nanoseconds")
|
|
502
|
+
|
|
503
|
+
# Encode Profile message
|
|
504
|
+
buf = "".b
|
|
505
|
+
|
|
506
|
+
# field 1: sample_type (repeated ValueType)
|
|
507
|
+
buf << encode_message(1, encode_value_type(type_idx, ns_idx))
|
|
508
|
+
|
|
509
|
+
# field 2: sample (repeated Sample) with thread_seq label
|
|
510
|
+
merged.each do |(frames, thread_seq), weight|
|
|
511
|
+
sample_buf = "".b
|
|
512
|
+
loc_ids = frames.map { |f| locations[f] }
|
|
513
|
+
sample_buf << encode_packed_uint64(1, loc_ids)
|
|
514
|
+
sample_buf << encode_packed_int64(2, [weight])
|
|
515
|
+
if thread_seq && thread_seq > 0
|
|
516
|
+
label_buf = "".b
|
|
517
|
+
label_buf << encode_int64(1, thread_seq_key) # key
|
|
518
|
+
label_buf << encode_int64(3, thread_seq) # num
|
|
519
|
+
sample_buf << encode_message(3, label_buf)
|
|
520
|
+
end
|
|
521
|
+
buf << encode_message(2, sample_buf)
|
|
522
|
+
end
|
|
523
|
+
|
|
524
|
+
# field 4: location (repeated Location)
|
|
525
|
+
locations.each do |frame, loc_id|
|
|
526
|
+
loc_buf = "".b
|
|
527
|
+
loc_buf << encode_uint64(1, loc_id)
|
|
528
|
+
line_buf = "".b
|
|
529
|
+
func_id = functions[frame]
|
|
530
|
+
line_buf << encode_uint64(1, func_id)
|
|
531
|
+
loc_buf << encode_message(4, line_buf)
|
|
532
|
+
buf << encode_message(4, loc_buf)
|
|
533
|
+
end
|
|
534
|
+
|
|
535
|
+
# field 5: function (repeated Function)
|
|
536
|
+
functions.each do |frame, func_id|
|
|
537
|
+
func_buf = "".b
|
|
538
|
+
func_buf << encode_uint64(1, func_id)
|
|
539
|
+
func_buf << encode_int64(2, frame[1]) # name (label_idx)
|
|
540
|
+
func_buf << encode_int64(4, frame[0]) # filename (path_idx)
|
|
541
|
+
buf << encode_message(5, func_buf)
|
|
542
|
+
end
|
|
543
|
+
|
|
544
|
+
# Intern comment and doc_url strings before encoding string_table
|
|
545
|
+
comment_indices = [
|
|
546
|
+
intern.("rperf #{Rperf::VERSION}"),
|
|
547
|
+
intern.("mode: #{mode}"),
|
|
548
|
+
intern.("frequency: #{frequency}Hz"),
|
|
549
|
+
intern.("ruby: #{RUBY_DESCRIPTION}"),
|
|
550
|
+
]
|
|
551
|
+
doc_url_idx = intern.("https://ko1.github.io/rperf/help.html")
|
|
552
|
+
|
|
553
|
+
# field 6: string_table (repeated string)
|
|
554
|
+
string_table.each do |s|
|
|
555
|
+
buf << encode_bytes(6, s.encode("UTF-8"))
|
|
556
|
+
end
|
|
557
|
+
|
|
558
|
+
# field 9: time_nanos (int64)
|
|
559
|
+
if data[:start_time_ns]
|
|
560
|
+
buf << encode_int64(9, data[:start_time_ns])
|
|
561
|
+
end
|
|
562
|
+
|
|
563
|
+
# field 10: duration_nanos (int64)
|
|
564
|
+
if data[:duration_ns]
|
|
565
|
+
buf << encode_int64(10, data[:duration_ns])
|
|
566
|
+
end
|
|
567
|
+
|
|
568
|
+
# field 11: period_type (ValueType)
|
|
569
|
+
buf << encode_message(11, encode_value_type(type_idx, ns_idx))
|
|
570
|
+
|
|
571
|
+
# field 12: period (int64)
|
|
572
|
+
buf << encode_int64(12, interval_ns)
|
|
573
|
+
|
|
574
|
+
# field 13: comment (repeated int64 = string_table index)
|
|
575
|
+
comment_indices.each { |idx| buf << encode_int64(13, idx) }
|
|
576
|
+
|
|
577
|
+
# field 15: doc_url (int64 = string_table index)
|
|
578
|
+
buf << encode_int64(15, doc_url_idx)
|
|
579
|
+
|
|
580
|
+
buf
|
|
581
|
+
end
|
|
582
|
+
|
|
583
|
+
def build_tables(merged)
|
|
584
|
+
locations = {}
|
|
585
|
+
functions = {}
|
|
586
|
+
next_id = 1
|
|
587
|
+
|
|
588
|
+
merged.each do |frames, _weight|
|
|
589
|
+
frames.each do |frame|
|
|
590
|
+
unless locations.key?(frame)
|
|
591
|
+
locations[frame] = next_id
|
|
592
|
+
functions[frame] = next_id
|
|
593
|
+
next_id += 1
|
|
594
|
+
end
|
|
595
|
+
end
|
|
596
|
+
end
|
|
597
|
+
|
|
598
|
+
[locations, functions]
|
|
599
|
+
end
|
|
600
|
+
|
|
601
|
+
# --- Protobuf encoding helpers ---
|
|
602
|
+
|
|
603
|
+
def encode_varint(value)
|
|
604
|
+
value = value & 0xFFFFFFFF_FFFFFFFF if value < 0
|
|
605
|
+
buf = "".b
|
|
606
|
+
loop do
|
|
607
|
+
byte = value & 0x7F
|
|
608
|
+
value >>= 7
|
|
609
|
+
if value > 0
|
|
610
|
+
buf << (byte | 0x80).chr
|
|
611
|
+
else
|
|
612
|
+
buf << byte.chr
|
|
613
|
+
break
|
|
614
|
+
end
|
|
615
|
+
end
|
|
616
|
+
buf
|
|
617
|
+
end
|
|
618
|
+
|
|
619
|
+
def encode_uint64(field, value)
|
|
620
|
+
encode_varint((field << 3) | 0) + encode_varint(value)
|
|
621
|
+
end
|
|
622
|
+
|
|
623
|
+
def encode_int64(field, value)
|
|
624
|
+
encode_varint((field << 3) | 0) + encode_varint(value < 0 ? value + (1 << 64) : value)
|
|
625
|
+
end
|
|
626
|
+
|
|
627
|
+
def encode_bytes(field, data)
|
|
628
|
+
data = data.b if data.respond_to?(:b)
|
|
629
|
+
encode_varint((field << 3) | 2) + encode_varint(data.bytesize) + data
|
|
630
|
+
end
|
|
631
|
+
|
|
632
|
+
def encode_message(field, data)
|
|
633
|
+
encode_bytes(field, data)
|
|
634
|
+
end
|
|
635
|
+
|
|
636
|
+
def encode_value_type(type_idx, unit_idx)
|
|
637
|
+
encode_int64(1, type_idx) + encode_int64(2, unit_idx)
|
|
638
|
+
end
|
|
639
|
+
|
|
640
|
+
def encode_packed_uint64(field, values)
|
|
641
|
+
inner = values.map { |v| encode_varint(v) }.join
|
|
642
|
+
encode_bytes(field, inner)
|
|
643
|
+
end
|
|
644
|
+
|
|
645
|
+
def encode_packed_int64(field, values)
|
|
646
|
+
inner = values.map { |v| encode_varint(v < 0 ? v + (1 << 64) : v) }.join
|
|
647
|
+
encode_bytes(field, inner)
|
|
648
|
+
end
|
|
649
|
+
end
|
|
650
|
+
end
|