sperf 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/sperf.rb CHANGED
@@ -1,607 +1 @@
1
- require "sperf.so"
2
- require "sperf/version"
3
- require "zlib"
4
- require "stringio"
5
-
6
- module Sperf
7
-
8
- @verbose = false
9
- @output = nil
10
- @stat = false
11
- @stat_start_mono = nil
12
- STAT_TOP_N = 5
13
- SYNTHETIC_LABELS = %w[[GVL\ blocked] [GVL\ wait] [GC\ marking] [GC\ sweeping]].freeze
14
-
15
- # Starts profiling.
16
- # format: :pprof, :collapsed, or :text. nil = auto-detect from output extension
17
- # .collapsed → collapsed stacks (FlameGraph / speedscope compatible)
18
- # .txt → text report (human/AI readable flat + cumulative table)
19
- # otherwise (.pb.gz etc) → pprof protobuf (gzip compressed)
20
- def self.start(frequency: 1000, mode: :cpu, output: nil, verbose: false, format: nil, stat: false, signal: nil)
21
- @verbose = verbose || ENV["SPERF_VERBOSE"] == "1"
22
- @output = output
23
- @format = format
24
- @stat = stat
25
- @stat_start_mono = Process.clock_gettime(Process::CLOCK_MONOTONIC) if @stat
26
- c_opts = { frequency: frequency, mode: mode }
27
- c_opts[:signal] = signal unless signal.nil?
28
- _c_start(**c_opts)
29
-
30
- if block_given?
31
- begin
32
- yield
33
- ensure
34
- return stop
35
- end
36
- end
37
- end
38
-
39
- def self.stop
40
- data = _c_stop
41
- return unless data
42
-
43
- print_stats(data) if @verbose
44
- print_stat(data) if @stat
45
-
46
- if @output
47
- fmt = detect_format(@output, @format)
48
- case fmt
49
- when :collapsed
50
- File.write(@output, Collapsed.encode(data))
51
- when :text
52
- File.write(@output, Text.encode(data))
53
- else
54
- File.binwrite(@output, gzip(PProf.encode(data)))
55
- end
56
- @output = nil
57
- @format = nil
58
- end
59
-
60
- data
61
- end
62
-
63
- # Saves profiling data to a file.
64
- # format: :pprof, :collapsed, or :text. nil = auto-detect from path extension
65
- # .collapsed → collapsed stacks (FlameGraph / speedscope compatible)
66
- # .txt → text report (human/AI readable flat + cumulative table)
67
- # otherwise (.pb.gz etc) → pprof protobuf (gzip compressed)
68
- def self.save(path, data, format: nil)
69
- fmt = detect_format(path, format)
70
- case fmt
71
- when :collapsed
72
- File.write(path, Collapsed.encode(data))
73
- when :text
74
- File.write(path, Text.encode(data))
75
- else
76
- File.binwrite(path, gzip(PProf.encode(data)))
77
- end
78
- end
79
-
80
- def self.detect_format(path, format)
81
- return format.to_sym if format
82
- case path.to_s
83
- when /\.collapsed\z/ then :collapsed
84
- when /\.txt\z/ then :text
85
- else :pprof
86
- end
87
- end
88
- private_class_method :detect_format
89
-
90
- def self.gzip(data)
91
- io = StringIO.new
92
- io.set_encoding("ASCII-8BIT")
93
- gz = Zlib::GzipWriter.new(io)
94
- gz.write(data)
95
- gz.close
96
- io.string
97
- end
98
-
99
- def self.print_stats(data)
100
- count = data[:sampling_count] || 0
101
- total_ns = data[:sampling_time_ns] || 0
102
- samples = data[:samples]&.size || 0
103
- mode = data[:mode] || :cpu
104
- frequency = data[:frequency] || 0
105
-
106
- total_ms = total_ns / 1_000_000.0
107
- avg_us = count > 0 ? total_ns / count / 1000.0 : 0.0
108
-
109
- $stderr.puts "[sperf] mode=#{mode} frequency=#{frequency}Hz"
110
- $stderr.puts "[sperf] sampling: #{count} calls, #{format("%.2f", total_ms)}ms total, #{format("%.1f", avg_us)}us/call avg"
111
- $stderr.puts "[sperf] samples recorded: #{samples}"
112
-
113
- print_top(data)
114
- end
115
-
116
- TOP_N = 10
117
-
118
- # Samples from C are now [[path_str, label_str], ...], weight]
119
- def self.print_top(data)
120
- samples_raw = data[:samples]
121
- return if !samples_raw || samples_raw.empty?
122
-
123
- flat = Hash.new(0)
124
- cum = Hash.new(0)
125
- total_weight = 0
126
-
127
- samples_raw.each do |frames, weight|
128
- total_weight += weight
129
- seen = {}
130
-
131
- frames.each_with_index do |frame, i|
132
- path, label = frame
133
- key = [label, path]
134
-
135
- flat[key] += weight if i == 0 # leaf = first element (deepest frame)
136
-
137
- unless seen[key]
138
- cum[key] += weight
139
- seen[key] = true
140
- end
141
- end
142
- end
143
-
144
- return if cum.empty?
145
-
146
- print_top_table("flat", flat, total_weight)
147
- print_top_table("cum", cum, total_weight)
148
- end
149
-
150
- def self.print_top_table(kind, table, total_weight)
151
- top = table.sort_by { |_, w| -w }.first(TOP_N)
152
- $stderr.puts "[sperf] top #{top.size} by #{kind}:"
153
- top.each do |key, weight|
154
- label, path = key
155
- ms = weight / 1_000_000.0
156
- pct = total_weight > 0 ? weight * 100.0 / total_weight : 0.0
157
- loc = path.empty? ? "" : " (#{path})"
158
- $stderr.puts format("[sperf] %8.1fms %5.1f%% %s%s", ms, pct, label, loc)
159
- end
160
- end
161
-
162
- def self.print_stat(data)
163
- samples_raw = data[:samples] || []
164
- real_ns = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - @stat_start_mono) * 1_000_000_000).to_i
165
- times = Process.times
166
- user_ns = (times.utime * 1_000_000_000).to_i
167
- sys_ns = (times.stime * 1_000_000_000).to_i
168
-
169
- command = ENV["SPERF_STAT_COMMAND"] || "(unknown)"
170
-
171
- $stderr.puts
172
- $stderr.puts " Performance stats for '#{command}':"
173
- $stderr.puts
174
-
175
- # user / sys / real
176
- $stderr.puts format(" %14s ms user", format_ms(user_ns))
177
- $stderr.puts format(" %14s ms sys", format_ms(sys_ns))
178
- $stderr.puts format(" %14s ms real", format_ms(real_ns))
179
-
180
- # Time breakdown from samples
181
- if samples_raw.size > 0
182
- breakdown = Hash.new(0)
183
- total_weight = 0
184
-
185
- samples_raw.each do |frames, weight|
186
- total_weight += weight
187
- leaf_label = frames.first&.last || ""
188
- category = case leaf_label
189
- when "[GVL blocked]" then :gvl_blocked
190
- when "[GVL wait]" then :gvl_wait
191
- when "[GC marking]" then :gc_marking
192
- when "[GC sweeping]" then :gc_sweeping
193
- else :cpu_execution
194
- end
195
- breakdown[category] += weight
196
- end
197
-
198
- # Column layout: " %14s %2s %6s label"
199
- # value(14) + unit(2) + pct(6) + gap(2) + label
200
- pct_line = ->(val, unit, pct, label) {
201
- format(" %14s %-2s %5.1f%% %s", val, unit, pct, label)
202
- }
203
- stat_line = ->(val, unit, label) {
204
- format(" %14s %-2s %s", val, unit, label)
205
- }
206
-
207
- $stderr.puts
208
-
209
- [
210
- [:cpu_execution, "CPU execution"],
211
- [:gvl_blocked, "[Ruby] GVL blocked (I/O, sleep)"],
212
- [:gvl_wait, "[Ruby] GVL wait (contention)"],
213
- [:gc_marking, "[Ruby] GC marking"],
214
- [:gc_sweeping, "[Ruby] GC sweeping"],
215
- ].each do |key, label|
216
- w = breakdown[key]
217
- next if w == 0
218
- pct = total_weight > 0 ? w * 100.0 / total_weight : 0.0
219
- $stderr.puts pct_line.call(format_ms(w), "ms", pct, label)
220
- end
221
-
222
- # GC statistics (cumulative since process start)
223
- gc = GC.stat
224
- $stderr.puts stat_line.call(format_ms(gc[:time] * 1_000_000), "ms",
225
- "[Ruby] GC time (%s count: %s minor, %s major)" % [
226
- format_integer(gc[:count]),
227
- format_integer(gc[:minor_gc_count]),
228
- format_integer(gc[:major_gc_count])])
229
- $stderr.puts stat_line.call(format_integer(gc[:total_allocated_objects]), " ", "[Ruby] allocated objects")
230
- $stderr.puts stat_line.call(format_integer(gc[:total_freed_objects]), " ", "[Ruby] freed objects")
231
- if defined?(RubyVM::YJIT) && RubyVM::YJIT.enabled?
232
- yjit = RubyVM::YJIT.runtime_stats
233
- if yjit[:ratio_in_yjit]
234
- $stderr.puts stat_line.call(format("%.1f%%", yjit[:ratio_in_yjit] * 100), " ", "[Ruby] YJIT code execution ratio")
235
- end
236
- end
237
-
238
- # System resources
239
- sys_stats = get_system_stats
240
- maxrss_kb = sys_stats[:maxrss_kb]
241
- if maxrss_kb
242
- $stderr.puts stat_line.call(format_integer((maxrss_kb / 1024.0).round), "MB", "[OS] peak memory (maxrss)")
243
- end
244
- if sys_stats[:ctx_voluntary]
245
- $stderr.puts stat_line.call(
246
- format_integer(sys_stats[:ctx_voluntary] + sys_stats[:ctx_involuntary]), " ",
247
- "[OS] context switches (%s voluntary, %s involuntary)" % [
248
- format_integer(sys_stats[:ctx_voluntary]),
249
- format_integer(sys_stats[:ctx_involuntary])])
250
- end
251
- if sys_stats[:io_read_bytes]
252
- r = sys_stats[:io_read_bytes]
253
- w = sys_stats[:io_write_bytes]
254
- $stderr.puts stat_line.call(
255
- format_integer(((r + w) / 1024.0 / 1024.0).round), "MB",
256
- "[OS] disk I/O (%s MB read, %s MB write)" % [
257
- format_integer((r / 1024.0 / 1024.0).round),
258
- format_integer((w / 1024.0 / 1024.0).round)])
259
- end
260
-
261
- # Top N by flat
262
- flat = Hash.new(0)
263
- samples_raw.each do |frames, weight|
264
- frames.each_with_index do |frame, i|
265
- if i == 0
266
- _, label = frame
267
- next if SYNTHETIC_LABELS.include?(label)
268
- flat[[label, frame[0]]] += weight
269
- end
270
- end
271
- end
272
-
273
- unless flat.empty?
274
- top = flat.sort_by { |_, w| -w }.first(STAT_TOP_N)
275
- $stderr.puts
276
- $stderr.puts " Top #{top.size} by flat:"
277
- top.each do |key, weight|
278
- label, path = key
279
- pct = total_weight > 0 ? weight * 100.0 / total_weight : 0.0
280
- loc = path.empty? ? "" : " (#{path})"
281
- $stderr.puts pct_line.call(format_ms(weight), "ms", pct, "#{label}#{loc}")
282
- end
283
- end
284
-
285
- end
286
-
287
- # Footer
288
- if samples_raw.size > 0
289
- unique_stacks = samples_raw.map { |frames, _| frames }.uniq.size
290
- overhead_pct = real_ns > 0 ? (data[:sampling_time_ns] || 0) * 100.0 / real_ns : 0.0
291
- $stderr.puts
292
- $stderr.puts format(" %d samples (%d unique stacks), %.1f%% profiler overhead",
293
- samples_raw.size, unique_stacks, overhead_pct)
294
- end
295
-
296
- $stderr.puts
297
- end
298
-
299
- def self.format_integer(n)
300
- n.to_s.reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
301
- end
302
- private_class_method :format_integer
303
-
304
- # Format nanoseconds as ms with 1 decimal place and comma-separated integer part.
305
- # Example: 5_609_200_000 → "5,609.2"
306
- def self.format_ms(ns)
307
- ms = ns / 1_000_000.0
308
- int_part = ms.truncate
309
- frac = format(".%d", ((ms - int_part).abs * 10).round % 10)
310
- int_str = int_part.to_s.reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
311
- "#{int_str}#{frac}"
312
- end
313
- private_class_method :format_ms
314
-
315
- # Collect system-level stats. Returns a hash; missing keys are omitted.
316
- def self.get_system_stats
317
- stats = {}
318
-
319
- if File.readable?("/proc/self/status")
320
- # Linux: parse /proc/self/status
321
- File.read("/proc/self/status").each_line do |line|
322
- case line
323
- when /\AVmHWM:\s+(\d+)\s+kB/
324
- stats[:maxrss_kb] = $1.to_i
325
- when /\Avoluntary_ctxt_switches:\s+(\d+)/
326
- stats[:ctx_voluntary] = $1.to_i
327
- when /\Anonvoluntary_ctxt_switches:\s+(\d+)/
328
- stats[:ctx_involuntary] = $1.to_i
329
- end
330
- end
331
- else
332
- # macOS/BSD: ps reports RSS in KB
333
- rss = `ps -o rss= -p #{$$}`.strip.to_i rescue nil
334
- stats[:maxrss_kb] = rss if rss && rss > 0
335
- end
336
-
337
- if File.readable?("/proc/self/io")
338
- # Linux: parse /proc/self/io
339
- File.read("/proc/self/io").each_line do |line|
340
- case line
341
- when /\Aread_bytes:\s+(\d+)/
342
- stats[:io_read_bytes] = $1.to_i
343
- when /\Awrite_bytes:\s+(\d+)/
344
- stats[:io_write_bytes] = $1.to_i
345
- end
346
- end
347
- end
348
-
349
- stats
350
- end
351
- private_class_method :get_system_stats
352
-
353
- # ENV-based auto-start for CLI usage
354
- if ENV["SPERF_ENABLED"] == "1"
355
- _sperf_mode_str = ENV["SPERF_MODE"] || "cpu"
356
- unless %w[cpu wall].include?(_sperf_mode_str)
357
- raise ArgumentError, "SPERF_MODE must be 'cpu' or 'wall', got: #{_sperf_mode_str.inspect}"
358
- end
359
- _sperf_mode = _sperf_mode_str == "wall" ? :wall : :cpu
360
- _sperf_format = ENV["SPERF_FORMAT"] ? ENV["SPERF_FORMAT"].to_sym : nil
361
- _sperf_stat = ENV["SPERF_STAT"] == "1"
362
- _sperf_signal = case ENV["SPERF_SIGNAL"]
363
- when nil then nil
364
- when "false" then false
365
- else ENV["SPERF_SIGNAL"].to_i
366
- end
367
- _sperf_start_opts = { frequency: (ENV["SPERF_FREQUENCY"] || 1000).to_i, mode: _sperf_mode,
368
- output: _sperf_stat ? ENV["SPERF_OUTPUT"] : (ENV["SPERF_OUTPUT"] || "sperf.data"),
369
- verbose: ENV["SPERF_VERBOSE"] == "1",
370
- format: _sperf_format,
371
- stat: _sperf_stat }
372
- _sperf_start_opts[:signal] = _sperf_signal unless _sperf_signal.nil?
373
- start(**_sperf_start_opts)
374
- at_exit { stop }
375
- end
376
-
377
- # Text report encoder — human/AI readable flat + cumulative top-N table.
378
- module Text
379
- module_function
380
-
381
- def encode(data, top_n: 50)
382
- samples_raw = data[:samples]
383
- mode = data[:mode] || :cpu
384
- frequency = data[:frequency] || 0
385
-
386
- return "No samples recorded.\n" if !samples_raw || samples_raw.empty?
387
-
388
- flat = Hash.new(0)
389
- cum = Hash.new(0)
390
- total_weight = 0
391
-
392
- samples_raw.each do |frames, weight|
393
- total_weight += weight
394
- seen = {}
395
-
396
- frames.each_with_index do |frame, i|
397
- path, label = frame
398
- key = [label, path]
399
- flat[key] += weight if i == 0
400
-
401
- unless seen[key]
402
- cum[key] += weight
403
- seen[key] = true
404
- end
405
- end
406
- end
407
-
408
- out = String.new
409
- total_ms = total_weight / 1_000_000.0
410
- out << "Total: #{"%.1f" % total_ms}ms (#{mode})\n"
411
- out << "Samples: #{samples_raw.size}, Frequency: #{frequency}Hz\n"
412
- out << "\n"
413
- out << format_table("Flat", flat, total_weight, top_n)
414
- out << "\n"
415
- out << format_table("Cumulative", cum, total_weight, top_n)
416
- out
417
- end
418
-
419
- def format_table(title, table, total_weight, top_n)
420
- sorted = table.sort_by { |_, w| -w }.first(top_n)
421
- out = String.new
422
- out << "#{title}:\n"
423
- sorted.each do |key, weight|
424
- label, path = key
425
- ms = weight / 1_000_000.0
426
- pct = total_weight > 0 ? weight * 100.0 / total_weight : 0.0
427
- loc = path.empty? ? "" : " (#{path})"
428
- out << (" %8.1fms %5.1f%% %s%s\n" % [ms, pct, label, loc])
429
- end
430
- out
431
- end
432
- end
433
-
434
- # Collapsed stacks encoder for FlameGraph / speedscope.
435
- # Output: one line per unique stack, "frame1;frame2;...;leafN weight\n"
436
- module Collapsed
437
- module_function
438
-
439
- def encode(data)
440
- merged = Hash.new(0)
441
- data[:samples].each do |frames, weight|
442
- key = frames.reverse.map { |_, label| label }.join(";")
443
- merged[key] += weight
444
- end
445
- merged.map { |stack, weight| "#{stack} #{weight}" }.join("\n") + "\n"
446
- end
447
- end
448
-
449
- # Hand-written protobuf encoder for pprof profile format.
450
- # Only runs once at stop time, so performance is not critical.
451
- #
452
- # Samples from C are: [[[path_str, label_str], ...], weight]
453
- # This encoder builds its own string table for pprof output.
454
- module PProf
455
- module_function
456
-
457
- def encode(data)
458
- samples_raw = data[:samples]
459
- frequency = data[:frequency]
460
- interval_ns = 1_000_000_000 / frequency
461
- mode = data[:mode] || :cpu
462
-
463
- # Build string table: index 0 must be ""
464
- string_table = [""]
465
- string_index = { "" => 0 }
466
-
467
- intern = ->(s) {
468
- string_index[s] ||= begin
469
- idx = string_table.size
470
- string_table << s
471
- idx
472
- end
473
- }
474
-
475
- # Convert string frames to index frames and merge identical stacks
476
- merged = Hash.new(0)
477
- samples_raw.each do |frames, weight|
478
- key = frames.map { |path, label| [intern.(path), intern.(label)] }
479
- merged[key] += weight
480
- end
481
- merged = merged.to_a
482
-
483
- # Build location/function tables
484
- locations, functions = build_tables(merged)
485
-
486
- # Intern type label and unit
487
- type_label = mode == :wall ? "wall" : "cpu"
488
- type_idx = intern.(type_label)
489
- ns_idx = intern.("nanoseconds")
490
-
491
- # Encode Profile message
492
- buf = "".b
493
-
494
- # field 1: sample_type (repeated ValueType)
495
- buf << encode_message(1, encode_value_type(type_idx, ns_idx))
496
-
497
- # field 2: sample (repeated Sample)
498
- merged.each do |frames, weight|
499
- sample_buf = "".b
500
- loc_ids = frames.map { |f| locations[f] }
501
- sample_buf << encode_packed_uint64(1, loc_ids)
502
- sample_buf << encode_packed_int64(2, [weight])
503
- buf << encode_message(2, sample_buf)
504
- end
505
-
506
- # field 4: location (repeated Location)
507
- locations.each do |frame, loc_id|
508
- loc_buf = "".b
509
- loc_buf << encode_uint64(1, loc_id)
510
- line_buf = "".b
511
- func_id = functions[frame]
512
- line_buf << encode_uint64(1, func_id)
513
- loc_buf << encode_message(4, line_buf)
514
- buf << encode_message(4, loc_buf)
515
- end
516
-
517
- # field 5: function (repeated Function)
518
- functions.each do |frame, func_id|
519
- func_buf = "".b
520
- func_buf << encode_uint64(1, func_id)
521
- func_buf << encode_int64(2, frame[1]) # name (label_idx)
522
- func_buf << encode_int64(4, frame[0]) # filename (path_idx)
523
- buf << encode_message(5, func_buf)
524
- end
525
-
526
- # field 6: string_table (repeated string)
527
- string_table.each do |s|
528
- buf << encode_bytes(6, s.encode("UTF-8"))
529
- end
530
-
531
- # field 11: period_type (ValueType)
532
- buf << encode_message(11, encode_value_type(type_idx, ns_idx))
533
-
534
- # field 12: period (int64)
535
- buf << encode_int64(12, interval_ns)
536
-
537
- buf
538
- end
539
-
540
- def build_tables(merged)
541
- locations = {}
542
- functions = {}
543
- next_id = 1
544
-
545
- merged.each do |frames, _weight|
546
- frames.each do |frame|
547
- unless locations.key?(frame)
548
- locations[frame] = next_id
549
- functions[frame] = next_id
550
- next_id += 1
551
- end
552
- end
553
- end
554
-
555
- [locations, functions]
556
- end
557
-
558
- # --- Protobuf encoding helpers ---
559
-
560
- def encode_varint(value)
561
- value = value & 0xFFFFFFFF_FFFFFFFF if value < 0
562
- buf = "".b
563
- loop do
564
- byte = value & 0x7F
565
- value >>= 7
566
- if value > 0
567
- buf << (byte | 0x80).chr
568
- else
569
- buf << byte.chr
570
- break
571
- end
572
- end
573
- buf
574
- end
575
-
576
- def encode_uint64(field, value)
577
- encode_varint((field << 3) | 0) + encode_varint(value)
578
- end
579
-
580
- def encode_int64(field, value)
581
- encode_varint((field << 3) | 0) + encode_varint(value < 0 ? value + (1 << 64) : value)
582
- end
583
-
584
- def encode_bytes(field, data)
585
- data = data.b if data.respond_to?(:b)
586
- encode_varint((field << 3) | 2) + encode_varint(data.bytesize) + data
587
- end
588
-
589
- def encode_message(field, data)
590
- encode_bytes(field, data)
591
- end
592
-
593
- def encode_value_type(type_idx, unit_idx)
594
- encode_int64(1, type_idx) + encode_int64(2, unit_idx)
595
- end
596
-
597
- def encode_packed_uint64(field, values)
598
- inner = values.map { |v| encode_varint(v) }.join
599
- encode_bytes(field, inner)
600
- end
601
-
602
- def encode_packed_int64(field, values)
603
- inner = values.map { |v| encode_varint(v < 0 ? v + (1 << 64) : v) }.join
604
- encode_bytes(field, inner)
605
- end
606
- end
607
- end
1
+ raise "sperf was renamed to rperf"