sperf 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +125 -0
- data/exe/sperf +457 -0
- data/ext/sperf/extconf.rb +6 -0
- data/ext/sperf/sperf.c +708 -0
- data/lib/sperf.rb +598 -0
- metadata +79 -0
data/lib/sperf.rb
ADDED
|
@@ -0,0 +1,598 @@
|
|
|
1
|
+
require "sperf.so"
|
|
2
|
+
require "zlib"
|
|
3
|
+
require "stringio"
|
|
4
|
+
|
|
5
|
+
module Sperf
|
|
6
|
+
VERSION = "0.1.0"
|
|
7
|
+
|
|
8
|
+
@verbose = false
|
|
9
|
+
@output = nil
|
|
10
|
+
@stat = false
|
|
11
|
+
@stat_start_mono = nil
|
|
12
|
+
STAT_TOP_N = 5
|
|
13
|
+
SYNTHETIC_LABELS = %w[[GVL\ blocked] [GVL\ wait] [GC\ marking] [GC\ sweeping]].freeze
|
|
14
|
+
|
|
15
|
+
# Starts profiling.
|
|
16
|
+
# format: :pprof, :collapsed, or :text. nil = auto-detect from output extension
|
|
17
|
+
# .collapsed → collapsed stacks (FlameGraph / speedscope compatible)
|
|
18
|
+
# .txt → text report (human/AI readable flat + cumulative table)
|
|
19
|
+
# otherwise (.pb.gz etc) → pprof protobuf (gzip compressed)
|
|
20
|
+
def self.start(frequency: 1000, mode: :cpu, output: nil, verbose: false, format: nil, stat: false)
|
|
21
|
+
@verbose = verbose || ENV["SPERF_VERBOSE"] == "1"
|
|
22
|
+
@output = output
|
|
23
|
+
@format = format
|
|
24
|
+
@stat = stat
|
|
25
|
+
@stat_start_mono = Process.clock_gettime(Process::CLOCK_MONOTONIC) if @stat
|
|
26
|
+
_c_start(frequency: frequency, mode: mode)
|
|
27
|
+
|
|
28
|
+
if block_given?
|
|
29
|
+
begin
|
|
30
|
+
yield
|
|
31
|
+
ensure
|
|
32
|
+
return stop
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def self.stop
|
|
38
|
+
data = _c_stop
|
|
39
|
+
return unless data
|
|
40
|
+
|
|
41
|
+
print_stats(data) if @verbose
|
|
42
|
+
print_stat(data) if @stat
|
|
43
|
+
|
|
44
|
+
if @output
|
|
45
|
+
fmt = detect_format(@output, @format)
|
|
46
|
+
case fmt
|
|
47
|
+
when :collapsed
|
|
48
|
+
File.write(@output, Collapsed.encode(data))
|
|
49
|
+
when :text
|
|
50
|
+
File.write(@output, Text.encode(data))
|
|
51
|
+
else
|
|
52
|
+
File.binwrite(@output, gzip(PProf.encode(data)))
|
|
53
|
+
end
|
|
54
|
+
@output = nil
|
|
55
|
+
@format = nil
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
data
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Saves profiling data to a file.
|
|
62
|
+
# format: :pprof, :collapsed, or :text. nil = auto-detect from path extension
|
|
63
|
+
# .collapsed → collapsed stacks (FlameGraph / speedscope compatible)
|
|
64
|
+
# .txt → text report (human/AI readable flat + cumulative table)
|
|
65
|
+
# otherwise (.pb.gz etc) → pprof protobuf (gzip compressed)
|
|
66
|
+
def self.save(path, data, format: nil)
|
|
67
|
+
fmt = detect_format(path, format)
|
|
68
|
+
case fmt
|
|
69
|
+
when :collapsed
|
|
70
|
+
File.write(path, Collapsed.encode(data))
|
|
71
|
+
when :text
|
|
72
|
+
File.write(path, Text.encode(data))
|
|
73
|
+
else
|
|
74
|
+
File.binwrite(path, gzip(PProf.encode(data)))
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def self.detect_format(path, format)
|
|
79
|
+
return format.to_sym if format
|
|
80
|
+
case path.to_s
|
|
81
|
+
when /\.collapsed\z/ then :collapsed
|
|
82
|
+
when /\.txt\z/ then :text
|
|
83
|
+
else :pprof
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
private_class_method :detect_format
|
|
87
|
+
|
|
88
|
+
def self.gzip(data)
|
|
89
|
+
io = StringIO.new
|
|
90
|
+
io.set_encoding("ASCII-8BIT")
|
|
91
|
+
gz = Zlib::GzipWriter.new(io)
|
|
92
|
+
gz.write(data)
|
|
93
|
+
gz.close
|
|
94
|
+
io.string
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def self.print_stats(data)
|
|
98
|
+
count = data[:sampling_count] || 0
|
|
99
|
+
total_ns = data[:sampling_time_ns] || 0
|
|
100
|
+
samples = data[:samples]&.size || 0
|
|
101
|
+
mode = data[:mode] || :cpu
|
|
102
|
+
frequency = data[:frequency] || 0
|
|
103
|
+
|
|
104
|
+
total_ms = total_ns / 1_000_000.0
|
|
105
|
+
avg_us = count > 0 ? total_ns / count / 1000.0 : 0.0
|
|
106
|
+
|
|
107
|
+
$stderr.puts "[sperf] mode=#{mode} frequency=#{frequency}Hz"
|
|
108
|
+
$stderr.puts "[sperf] sampling: #{count} calls, #{format("%.2f", total_ms)}ms total, #{format("%.1f", avg_us)}us/call avg"
|
|
109
|
+
$stderr.puts "[sperf] samples recorded: #{samples}"
|
|
110
|
+
|
|
111
|
+
print_top(data)
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
TOP_N = 10
|
|
115
|
+
|
|
116
|
+
# Samples from C are now [[path_str, label_str], ...], weight]
|
|
117
|
+
def self.print_top(data)
|
|
118
|
+
samples_raw = data[:samples]
|
|
119
|
+
return if !samples_raw || samples_raw.empty?
|
|
120
|
+
|
|
121
|
+
flat = Hash.new(0)
|
|
122
|
+
cum = Hash.new(0)
|
|
123
|
+
total_weight = 0
|
|
124
|
+
|
|
125
|
+
samples_raw.each do |frames, weight|
|
|
126
|
+
total_weight += weight
|
|
127
|
+
seen = {}
|
|
128
|
+
|
|
129
|
+
frames.each_with_index do |frame, i|
|
|
130
|
+
path, label = frame
|
|
131
|
+
key = [label, path]
|
|
132
|
+
|
|
133
|
+
flat[key] += weight if i == 0 # leaf = first element (deepest frame)
|
|
134
|
+
|
|
135
|
+
unless seen[key]
|
|
136
|
+
cum[key] += weight
|
|
137
|
+
seen[key] = true
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
return if cum.empty?
|
|
143
|
+
|
|
144
|
+
print_top_table("flat", flat, total_weight)
|
|
145
|
+
print_top_table("cum", cum, total_weight)
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
def self.print_top_table(kind, table, total_weight)
|
|
149
|
+
top = table.sort_by { |_, w| -w }.first(TOP_N)
|
|
150
|
+
$stderr.puts "[sperf] top #{top.size} by #{kind}:"
|
|
151
|
+
top.each do |key, weight|
|
|
152
|
+
label, path = key
|
|
153
|
+
ms = weight / 1_000_000.0
|
|
154
|
+
pct = total_weight > 0 ? weight * 100.0 / total_weight : 0.0
|
|
155
|
+
loc = path.empty? ? "" : " (#{path})"
|
|
156
|
+
$stderr.puts format("[sperf] %8.1fms %5.1f%% %s%s", ms, pct, label, loc)
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def self.print_stat(data)
|
|
161
|
+
samples_raw = data[:samples] || []
|
|
162
|
+
real_ns = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - @stat_start_mono) * 1_000_000_000).to_i
|
|
163
|
+
times = Process.times
|
|
164
|
+
user_ns = (times.utime * 1_000_000_000).to_i
|
|
165
|
+
sys_ns = (times.stime * 1_000_000_000).to_i
|
|
166
|
+
|
|
167
|
+
command = ENV["SPERF_STAT_COMMAND"] || "(unknown)"
|
|
168
|
+
|
|
169
|
+
$stderr.puts
|
|
170
|
+
$stderr.puts " Performance stats for '#{command}':"
|
|
171
|
+
$stderr.puts
|
|
172
|
+
|
|
173
|
+
# user / sys / real
|
|
174
|
+
$stderr.puts format(" %14s ms user", format_ms(user_ns))
|
|
175
|
+
$stderr.puts format(" %14s ms sys", format_ms(sys_ns))
|
|
176
|
+
$stderr.puts format(" %14s ms real", format_ms(real_ns))
|
|
177
|
+
|
|
178
|
+
# Time breakdown from samples
|
|
179
|
+
if samples_raw.size > 0
|
|
180
|
+
breakdown = Hash.new(0)
|
|
181
|
+
total_weight = 0
|
|
182
|
+
|
|
183
|
+
samples_raw.each do |frames, weight|
|
|
184
|
+
total_weight += weight
|
|
185
|
+
leaf_label = frames.first&.last || ""
|
|
186
|
+
category = case leaf_label
|
|
187
|
+
when "[GVL blocked]" then :gvl_blocked
|
|
188
|
+
when "[GVL wait]" then :gvl_wait
|
|
189
|
+
when "[GC marking]" then :gc_marking
|
|
190
|
+
when "[GC sweeping]" then :gc_sweeping
|
|
191
|
+
else :cpu_execution
|
|
192
|
+
end
|
|
193
|
+
breakdown[category] += weight
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
# Column layout: " %14s %2s %6s label"
|
|
197
|
+
# value(14) + unit(2) + pct(6) + gap(2) + label
|
|
198
|
+
pct_line = ->(val, unit, pct, label) {
|
|
199
|
+
format(" %14s %-2s %5.1f%% %s", val, unit, pct, label)
|
|
200
|
+
}
|
|
201
|
+
stat_line = ->(val, unit, label) {
|
|
202
|
+
format(" %14s %-2s %s", val, unit, label)
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
$stderr.puts
|
|
206
|
+
|
|
207
|
+
[
|
|
208
|
+
[:cpu_execution, "CPU execution"],
|
|
209
|
+
[:gvl_blocked, "[Ruby] GVL blocked (I/O, sleep)"],
|
|
210
|
+
[:gvl_wait, "[Ruby] GVL wait (contention)"],
|
|
211
|
+
[:gc_marking, "[Ruby] GC marking"],
|
|
212
|
+
[:gc_sweeping, "[Ruby] GC sweeping"],
|
|
213
|
+
].each do |key, label|
|
|
214
|
+
w = breakdown[key]
|
|
215
|
+
next if w == 0
|
|
216
|
+
pct = total_weight > 0 ? w * 100.0 / total_weight : 0.0
|
|
217
|
+
$stderr.puts pct_line.call(format_ms(w), "ms", pct, label)
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
# GC statistics (cumulative since process start)
|
|
221
|
+
gc = GC.stat
|
|
222
|
+
$stderr.puts stat_line.call(format_ms(gc[:time] * 1_000_000), "ms",
|
|
223
|
+
"[Ruby] GC time (%s count: %s minor, %s major)" % [
|
|
224
|
+
format_integer(gc[:count]),
|
|
225
|
+
format_integer(gc[:minor_gc_count]),
|
|
226
|
+
format_integer(gc[:major_gc_count])])
|
|
227
|
+
$stderr.puts stat_line.call(format_integer(gc[:total_allocated_objects]), " ", "[Ruby] allocated objects")
|
|
228
|
+
$stderr.puts stat_line.call(format_integer(gc[:total_freed_objects]), " ", "[Ruby] freed objects")
|
|
229
|
+
if defined?(RubyVM::YJIT) && RubyVM::YJIT.enabled?
|
|
230
|
+
yjit = RubyVM::YJIT.runtime_stats
|
|
231
|
+
if yjit[:ratio_in_yjit]
|
|
232
|
+
$stderr.puts stat_line.call(format("%.1f%%", yjit[:ratio_in_yjit] * 100), " ", "[Ruby] YJIT code execution ratio")
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
# System resources
|
|
237
|
+
sys_stats = get_system_stats
|
|
238
|
+
maxrss_kb = sys_stats[:maxrss_kb]
|
|
239
|
+
if maxrss_kb
|
|
240
|
+
$stderr.puts stat_line.call(format_integer((maxrss_kb / 1024.0).round), "MB", "[OS] peak memory (maxrss)")
|
|
241
|
+
end
|
|
242
|
+
if sys_stats[:ctx_voluntary]
|
|
243
|
+
$stderr.puts stat_line.call(
|
|
244
|
+
format_integer(sys_stats[:ctx_voluntary] + sys_stats[:ctx_involuntary]), " ",
|
|
245
|
+
"[OS] context switches (%s voluntary, %s involuntary)" % [
|
|
246
|
+
format_integer(sys_stats[:ctx_voluntary]),
|
|
247
|
+
format_integer(sys_stats[:ctx_involuntary])])
|
|
248
|
+
end
|
|
249
|
+
if sys_stats[:io_read_bytes]
|
|
250
|
+
r = sys_stats[:io_read_bytes]
|
|
251
|
+
w = sys_stats[:io_write_bytes]
|
|
252
|
+
$stderr.puts stat_line.call(
|
|
253
|
+
format_integer(((r + w) / 1024.0 / 1024.0).round), "MB",
|
|
254
|
+
"[OS] disk I/O (%s MB read, %s MB write)" % [
|
|
255
|
+
format_integer((r / 1024.0 / 1024.0).round),
|
|
256
|
+
format_integer((w / 1024.0 / 1024.0).round)])
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
# Top N by flat
|
|
260
|
+
flat = Hash.new(0)
|
|
261
|
+
samples_raw.each do |frames, weight|
|
|
262
|
+
frames.each_with_index do |frame, i|
|
|
263
|
+
if i == 0
|
|
264
|
+
_, label = frame
|
|
265
|
+
next if SYNTHETIC_LABELS.include?(label)
|
|
266
|
+
flat[[label, frame[0]]] += weight
|
|
267
|
+
end
|
|
268
|
+
end
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
unless flat.empty?
|
|
272
|
+
top = flat.sort_by { |_, w| -w }.first(STAT_TOP_N)
|
|
273
|
+
$stderr.puts
|
|
274
|
+
$stderr.puts " Top #{top.size} by flat:"
|
|
275
|
+
top.each do |key, weight|
|
|
276
|
+
label, path = key
|
|
277
|
+
pct = total_weight > 0 ? weight * 100.0 / total_weight : 0.0
|
|
278
|
+
loc = path.empty? ? "" : " (#{path})"
|
|
279
|
+
$stderr.puts pct_line.call(format_ms(weight), "ms", pct, "#{label}#{loc}")
|
|
280
|
+
end
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
# Footer
|
|
286
|
+
if samples_raw.size > 0
|
|
287
|
+
unique_stacks = samples_raw.map { |frames, _| frames }.uniq.size
|
|
288
|
+
overhead_pct = real_ns > 0 ? (data[:sampling_time_ns] || 0) * 100.0 / real_ns : 0.0
|
|
289
|
+
$stderr.puts
|
|
290
|
+
$stderr.puts format(" %d samples (%d unique stacks), %.1f%% profiler overhead",
|
|
291
|
+
samples_raw.size, unique_stacks, overhead_pct)
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
$stderr.puts
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
def self.format_integer(n)
|
|
298
|
+
n.to_s.reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
|
|
299
|
+
end
|
|
300
|
+
private_class_method :format_integer
|
|
301
|
+
|
|
302
|
+
# Format nanoseconds as ms with 1 decimal place and comma-separated integer part.
|
|
303
|
+
# Example: 5_609_200_000 → "5,609.2"
|
|
304
|
+
def self.format_ms(ns)
|
|
305
|
+
ms = ns / 1_000_000.0
|
|
306
|
+
int_part = ms.truncate
|
|
307
|
+
frac = format(".%d", ((ms - int_part).abs * 10).round % 10)
|
|
308
|
+
int_str = int_part.to_s.reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
|
|
309
|
+
"#{int_str}#{frac}"
|
|
310
|
+
end
|
|
311
|
+
private_class_method :format_ms
|
|
312
|
+
|
|
313
|
+
# Collect system-level stats. Returns a hash; missing keys are omitted.
|
|
314
|
+
def self.get_system_stats
|
|
315
|
+
stats = {}
|
|
316
|
+
|
|
317
|
+
if File.readable?("/proc/self/status")
|
|
318
|
+
# Linux: parse /proc/self/status
|
|
319
|
+
File.read("/proc/self/status").each_line do |line|
|
|
320
|
+
case line
|
|
321
|
+
when /\AVmHWM:\s+(\d+)\s+kB/
|
|
322
|
+
stats[:maxrss_kb] = $1.to_i
|
|
323
|
+
when /\Avoluntary_ctxt_switches:\s+(\d+)/
|
|
324
|
+
stats[:ctx_voluntary] = $1.to_i
|
|
325
|
+
when /\Anonvoluntary_ctxt_switches:\s+(\d+)/
|
|
326
|
+
stats[:ctx_involuntary] = $1.to_i
|
|
327
|
+
end
|
|
328
|
+
end
|
|
329
|
+
else
|
|
330
|
+
# macOS/BSD: ps reports RSS in KB
|
|
331
|
+
rss = `ps -o rss= -p #{$$}`.strip.to_i rescue nil
|
|
332
|
+
stats[:maxrss_kb] = rss if rss && rss > 0
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
if File.readable?("/proc/self/io")
|
|
336
|
+
# Linux: parse /proc/self/io
|
|
337
|
+
File.read("/proc/self/io").each_line do |line|
|
|
338
|
+
case line
|
|
339
|
+
when /\Aread_bytes:\s+(\d+)/
|
|
340
|
+
stats[:io_read_bytes] = $1.to_i
|
|
341
|
+
when /\Awrite_bytes:\s+(\d+)/
|
|
342
|
+
stats[:io_write_bytes] = $1.to_i
|
|
343
|
+
end
|
|
344
|
+
end
|
|
345
|
+
end
|
|
346
|
+
|
|
347
|
+
stats
|
|
348
|
+
end
|
|
349
|
+
private_class_method :get_system_stats
|
|
350
|
+
|
|
351
|
+
# ENV-based auto-start for CLI usage
|
|
352
|
+
if ENV["SPERF_ENABLED"] == "1"
|
|
353
|
+
_sperf_mode_str = ENV["SPERF_MODE"] || "cpu"
|
|
354
|
+
unless %w[cpu wall].include?(_sperf_mode_str)
|
|
355
|
+
raise ArgumentError, "SPERF_MODE must be 'cpu' or 'wall', got: #{_sperf_mode_str.inspect}"
|
|
356
|
+
end
|
|
357
|
+
_sperf_mode = _sperf_mode_str == "wall" ? :wall : :cpu
|
|
358
|
+
_sperf_format = ENV["SPERF_FORMAT"] ? ENV["SPERF_FORMAT"].to_sym : nil
|
|
359
|
+
_sperf_stat = ENV["SPERF_STAT"] == "1"
|
|
360
|
+
start(frequency: (ENV["SPERF_FREQUENCY"] || 1000).to_i, mode: _sperf_mode,
|
|
361
|
+
output: _sperf_stat ? ENV["SPERF_OUTPUT"] : (ENV["SPERF_OUTPUT"] || "sperf.data"),
|
|
362
|
+
verbose: ENV["SPERF_VERBOSE"] == "1",
|
|
363
|
+
format: _sperf_format,
|
|
364
|
+
stat: _sperf_stat)
|
|
365
|
+
at_exit { stop }
|
|
366
|
+
end
|
|
367
|
+
|
|
368
|
+
# Text report encoder — human/AI readable flat + cumulative top-N table.
|
|
369
|
+
module Text
|
|
370
|
+
module_function
|
|
371
|
+
|
|
372
|
+
def encode(data, top_n: 50)
|
|
373
|
+
samples_raw = data[:samples]
|
|
374
|
+
mode = data[:mode] || :cpu
|
|
375
|
+
frequency = data[:frequency] || 0
|
|
376
|
+
|
|
377
|
+
return "No samples recorded.\n" if !samples_raw || samples_raw.empty?
|
|
378
|
+
|
|
379
|
+
flat = Hash.new(0)
|
|
380
|
+
cum = Hash.new(0)
|
|
381
|
+
total_weight = 0
|
|
382
|
+
|
|
383
|
+
samples_raw.each do |frames, weight|
|
|
384
|
+
total_weight += weight
|
|
385
|
+
seen = {}
|
|
386
|
+
|
|
387
|
+
frames.each_with_index do |frame, i|
|
|
388
|
+
path, label = frame
|
|
389
|
+
key = [label, path]
|
|
390
|
+
flat[key] += weight if i == 0
|
|
391
|
+
|
|
392
|
+
unless seen[key]
|
|
393
|
+
cum[key] += weight
|
|
394
|
+
seen[key] = true
|
|
395
|
+
end
|
|
396
|
+
end
|
|
397
|
+
end
|
|
398
|
+
|
|
399
|
+
out = String.new
|
|
400
|
+
total_ms = total_weight / 1_000_000.0
|
|
401
|
+
out << "Total: #{"%.1f" % total_ms}ms (#{mode})\n"
|
|
402
|
+
out << "Samples: #{samples_raw.size}, Frequency: #{frequency}Hz\n"
|
|
403
|
+
out << "\n"
|
|
404
|
+
out << format_table("Flat", flat, total_weight, top_n)
|
|
405
|
+
out << "\n"
|
|
406
|
+
out << format_table("Cumulative", cum, total_weight, top_n)
|
|
407
|
+
out
|
|
408
|
+
end
|
|
409
|
+
|
|
410
|
+
def format_table(title, table, total_weight, top_n)
|
|
411
|
+
sorted = table.sort_by { |_, w| -w }.first(top_n)
|
|
412
|
+
out = String.new
|
|
413
|
+
out << "#{title}:\n"
|
|
414
|
+
sorted.each do |key, weight|
|
|
415
|
+
label, path = key
|
|
416
|
+
ms = weight / 1_000_000.0
|
|
417
|
+
pct = total_weight > 0 ? weight * 100.0 / total_weight : 0.0
|
|
418
|
+
loc = path.empty? ? "" : " (#{path})"
|
|
419
|
+
out << (" %8.1fms %5.1f%% %s%s\n" % [ms, pct, label, loc])
|
|
420
|
+
end
|
|
421
|
+
out
|
|
422
|
+
end
|
|
423
|
+
end
|
|
424
|
+
|
|
425
|
+
# Collapsed stacks encoder for FlameGraph / speedscope.
|
|
426
|
+
# Output: one line per unique stack, "frame1;frame2;...;leafN weight\n"
|
|
427
|
+
module Collapsed
|
|
428
|
+
module_function
|
|
429
|
+
|
|
430
|
+
def encode(data)
|
|
431
|
+
merged = Hash.new(0)
|
|
432
|
+
data[:samples].each do |frames, weight|
|
|
433
|
+
key = frames.reverse.map { |_, label| label }.join(";")
|
|
434
|
+
merged[key] += weight
|
|
435
|
+
end
|
|
436
|
+
merged.map { |stack, weight| "#{stack} #{weight}" }.join("\n") + "\n"
|
|
437
|
+
end
|
|
438
|
+
end
|
|
439
|
+
|
|
440
|
+
# Hand-written protobuf encoder for pprof profile format.
|
|
441
|
+
# Only runs once at stop time, so performance is not critical.
|
|
442
|
+
#
|
|
443
|
+
# Samples from C are: [[[path_str, label_str], ...], weight]
|
|
444
|
+
# This encoder builds its own string table for pprof output.
|
|
445
|
+
module PProf
|
|
446
|
+
module_function
|
|
447
|
+
|
|
448
|
+
def encode(data)
|
|
449
|
+
samples_raw = data[:samples]
|
|
450
|
+
frequency = data[:frequency]
|
|
451
|
+
interval_ns = 1_000_000_000 / frequency
|
|
452
|
+
mode = data[:mode] || :cpu
|
|
453
|
+
|
|
454
|
+
# Build string table: index 0 must be ""
|
|
455
|
+
string_table = [""]
|
|
456
|
+
string_index = { "" => 0 }
|
|
457
|
+
|
|
458
|
+
intern = ->(s) {
|
|
459
|
+
string_index[s] ||= begin
|
|
460
|
+
idx = string_table.size
|
|
461
|
+
string_table << s
|
|
462
|
+
idx
|
|
463
|
+
end
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
# Convert string frames to index frames and merge identical stacks
|
|
467
|
+
merged = Hash.new(0)
|
|
468
|
+
samples_raw.each do |frames, weight|
|
|
469
|
+
key = frames.map { |path, label| [intern.(path), intern.(label)] }
|
|
470
|
+
merged[key] += weight
|
|
471
|
+
end
|
|
472
|
+
merged = merged.to_a
|
|
473
|
+
|
|
474
|
+
# Build location/function tables
|
|
475
|
+
locations, functions = build_tables(merged)
|
|
476
|
+
|
|
477
|
+
# Intern type label and unit
|
|
478
|
+
type_label = mode == :wall ? "wall" : "cpu"
|
|
479
|
+
type_idx = intern.(type_label)
|
|
480
|
+
ns_idx = intern.("nanoseconds")
|
|
481
|
+
|
|
482
|
+
# Encode Profile message
|
|
483
|
+
buf = "".b
|
|
484
|
+
|
|
485
|
+
# field 1: sample_type (repeated ValueType)
|
|
486
|
+
buf << encode_message(1, encode_value_type(type_idx, ns_idx))
|
|
487
|
+
|
|
488
|
+
# field 2: sample (repeated Sample)
|
|
489
|
+
merged.each do |frames, weight|
|
|
490
|
+
sample_buf = "".b
|
|
491
|
+
loc_ids = frames.map { |f| locations[f] }
|
|
492
|
+
sample_buf << encode_packed_uint64(1, loc_ids)
|
|
493
|
+
sample_buf << encode_packed_int64(2, [weight])
|
|
494
|
+
buf << encode_message(2, sample_buf)
|
|
495
|
+
end
|
|
496
|
+
|
|
497
|
+
# field 4: location (repeated Location)
|
|
498
|
+
locations.each do |frame, loc_id|
|
|
499
|
+
loc_buf = "".b
|
|
500
|
+
loc_buf << encode_uint64(1, loc_id)
|
|
501
|
+
line_buf = "".b
|
|
502
|
+
func_id = functions[frame]
|
|
503
|
+
line_buf << encode_uint64(1, func_id)
|
|
504
|
+
loc_buf << encode_message(4, line_buf)
|
|
505
|
+
buf << encode_message(4, loc_buf)
|
|
506
|
+
end
|
|
507
|
+
|
|
508
|
+
# field 5: function (repeated Function)
|
|
509
|
+
functions.each do |frame, func_id|
|
|
510
|
+
func_buf = "".b
|
|
511
|
+
func_buf << encode_uint64(1, func_id)
|
|
512
|
+
func_buf << encode_int64(2, frame[1]) # name (label_idx)
|
|
513
|
+
func_buf << encode_int64(4, frame[0]) # filename (path_idx)
|
|
514
|
+
buf << encode_message(5, func_buf)
|
|
515
|
+
end
|
|
516
|
+
|
|
517
|
+
# field 6: string_table (repeated string)
|
|
518
|
+
string_table.each do |s|
|
|
519
|
+
buf << encode_bytes(6, s.encode("UTF-8"))
|
|
520
|
+
end
|
|
521
|
+
|
|
522
|
+
# field 11: period_type (ValueType)
|
|
523
|
+
buf << encode_message(11, encode_value_type(type_idx, ns_idx))
|
|
524
|
+
|
|
525
|
+
# field 12: period (int64)
|
|
526
|
+
buf << encode_int64(12, interval_ns)
|
|
527
|
+
|
|
528
|
+
buf
|
|
529
|
+
end
|
|
530
|
+
|
|
531
|
+
def build_tables(merged)
|
|
532
|
+
locations = {}
|
|
533
|
+
functions = {}
|
|
534
|
+
next_id = 1
|
|
535
|
+
|
|
536
|
+
merged.each do |frames, _weight|
|
|
537
|
+
frames.each do |frame|
|
|
538
|
+
unless locations.key?(frame)
|
|
539
|
+
locations[frame] = next_id
|
|
540
|
+
functions[frame] = next_id
|
|
541
|
+
next_id += 1
|
|
542
|
+
end
|
|
543
|
+
end
|
|
544
|
+
end
|
|
545
|
+
|
|
546
|
+
[locations, functions]
|
|
547
|
+
end
|
|
548
|
+
|
|
549
|
+
# --- Protobuf encoding helpers ---
|
|
550
|
+
|
|
551
|
+
def encode_varint(value)
|
|
552
|
+
value = value & 0xFFFFFFFF_FFFFFFFF if value < 0
|
|
553
|
+
buf = "".b
|
|
554
|
+
loop do
|
|
555
|
+
byte = value & 0x7F
|
|
556
|
+
value >>= 7
|
|
557
|
+
if value > 0
|
|
558
|
+
buf << (byte | 0x80).chr
|
|
559
|
+
else
|
|
560
|
+
buf << byte.chr
|
|
561
|
+
break
|
|
562
|
+
end
|
|
563
|
+
end
|
|
564
|
+
buf
|
|
565
|
+
end
|
|
566
|
+
|
|
567
|
+
def encode_uint64(field, value)
|
|
568
|
+
encode_varint((field << 3) | 0) + encode_varint(value)
|
|
569
|
+
end
|
|
570
|
+
|
|
571
|
+
def encode_int64(field, value)
|
|
572
|
+
encode_varint((field << 3) | 0) + encode_varint(value < 0 ? value + (1 << 64) : value)
|
|
573
|
+
end
|
|
574
|
+
|
|
575
|
+
def encode_bytes(field, data)
|
|
576
|
+
data = data.b if data.respond_to?(:b)
|
|
577
|
+
encode_varint((field << 3) | 2) + encode_varint(data.bytesize) + data
|
|
578
|
+
end
|
|
579
|
+
|
|
580
|
+
def encode_message(field, data)
|
|
581
|
+
encode_bytes(field, data)
|
|
582
|
+
end
|
|
583
|
+
|
|
584
|
+
def encode_value_type(type_idx, unit_idx)
|
|
585
|
+
encode_int64(1, type_idx) + encode_int64(2, unit_idx)
|
|
586
|
+
end
|
|
587
|
+
|
|
588
|
+
def encode_packed_uint64(field, values)
|
|
589
|
+
inner = values.map { |v| encode_varint(v) }.join
|
|
590
|
+
encode_bytes(field, inner)
|
|
591
|
+
end
|
|
592
|
+
|
|
593
|
+
def encode_packed_int64(field, values)
|
|
594
|
+
inner = values.map { |v| encode_varint(v < 0 ? v + (1 << 64) : v) }.join
|
|
595
|
+
encode_bytes(field, inner)
|
|
596
|
+
end
|
|
597
|
+
end
|
|
598
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: sperf
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Koichi Sasada
|
|
8
|
+
bindir: exe
|
|
9
|
+
cert_chain: []
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
+
dependencies:
|
|
12
|
+
- !ruby/object:Gem::Dependency
|
|
13
|
+
name: rake-compiler
|
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
|
15
|
+
requirements:
|
|
16
|
+
-
|
|
17
|
+
- ~>
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: "1.2"
|
|
20
|
+
type: :development
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
-
|
|
25
|
+
- ~>
|
|
26
|
+
- !ruby/object:Gem::Version
|
|
27
|
+
version: "1.2"
|
|
28
|
+
- !ruby/object:Gem::Dependency
|
|
29
|
+
name: test-unit
|
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
|
31
|
+
requirements:
|
|
32
|
+
-
|
|
33
|
+
- ~>
|
|
34
|
+
- !ruby/object:Gem::Version
|
|
35
|
+
version: "3.6"
|
|
36
|
+
type: :development
|
|
37
|
+
prerelease: false
|
|
38
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
39
|
+
requirements:
|
|
40
|
+
-
|
|
41
|
+
- ~>
|
|
42
|
+
- !ruby/object:Gem::Version
|
|
43
|
+
version: "3.6"
|
|
44
|
+
description: "A safepoint-based sampling performance profiler that uses thread CPU time deltas as weights to correct safepoint bias. Outputs pprof, collapsed stacks, or text report."
|
|
45
|
+
executables:
|
|
46
|
+
- sperf
|
|
47
|
+
extensions:
|
|
48
|
+
- ext/sperf/extconf.rb
|
|
49
|
+
extra_rdoc_files: []
|
|
50
|
+
files:
|
|
51
|
+
- README.md
|
|
52
|
+
- exe/sperf
|
|
53
|
+
- ext/sperf/extconf.rb
|
|
54
|
+
- ext/sperf/sperf.c
|
|
55
|
+
- lib/sperf.rb
|
|
56
|
+
homepage: "https://github.com/ko1/sperf"
|
|
57
|
+
licenses:
|
|
58
|
+
- MIT
|
|
59
|
+
metadata: {}
|
|
60
|
+
rdoc_options: []
|
|
61
|
+
require_paths:
|
|
62
|
+
- lib
|
|
63
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
64
|
+
requirements:
|
|
65
|
+
-
|
|
66
|
+
- ">="
|
|
67
|
+
- !ruby/object:Gem::Version
|
|
68
|
+
version: 3.4.0
|
|
69
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
70
|
+
requirements:
|
|
71
|
+
-
|
|
72
|
+
- ">="
|
|
73
|
+
- !ruby/object:Gem::Version
|
|
74
|
+
version: "0"
|
|
75
|
+
requirements: []
|
|
76
|
+
rubygems_version: 4.1.0.dev
|
|
77
|
+
specification_version: 4
|
|
78
|
+
summary: Safepoint-based sampling performance profiler for Ruby
|
|
79
|
+
test_files: []
|