rperf 0.9.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +15 -6
- data/docs/help.md +179 -10
- data/exe/rperf +247 -53
- data/ext/rperf/rperf.c +96 -43
- data/lib/rperf/meta.rb +343 -0
- data/lib/rperf/rack.rb +7 -2
- data/lib/rperf/table.rb +156 -0
- data/lib/rperf/version.rb +1 -1
- data/lib/rperf/viewer/viewer.html +1148 -0
- data/lib/rperf/viewer.rb +101 -653
- data/lib/rperf.rb +208 -69
- metadata +4 -1
data/lib/rperf.rb
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
require_relative "rperf/version"
|
|
2
|
+
require_relative "rperf/meta"
|
|
3
|
+
require_relative "rperf/table"
|
|
2
4
|
require "zlib"
|
|
3
5
|
require "stringio"
|
|
4
6
|
|
|
@@ -67,13 +69,15 @@ module Rperf
|
|
|
67
69
|
@stat = stat
|
|
68
70
|
@stat_start_mono = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
69
71
|
@stat_start_times = Process.times
|
|
72
|
+
@gc_stat_start = GC.stat
|
|
73
|
+
@gc_stat_snapshot_base = @gc_stat_start
|
|
70
74
|
@label_set_table = nil
|
|
71
75
|
@label_set_index = nil
|
|
72
76
|
_c_start(frequency, c_mode, aggregate, c_signal, defer)
|
|
73
77
|
|
|
74
78
|
# Set up child process tracking
|
|
75
79
|
if inherit && !ENV["RPERF_SESSION_DIR"]
|
|
76
|
-
_setup_inherit(mode, frequency, signal, aggregate,
|
|
80
|
+
_setup_inherit(mode, frequency, signal, aggregate, inherit, defer)
|
|
77
81
|
end
|
|
78
82
|
|
|
79
83
|
if block_given?
|
|
@@ -86,6 +90,10 @@ module Rperf
|
|
|
86
90
|
end
|
|
87
91
|
end
|
|
88
92
|
|
|
93
|
+
# Defensive fallback for process times when start didn't record them
|
|
94
|
+
ZERO_TIMES = Struct.new(:utime, :stime).new(0.0, 0.0).freeze
|
|
95
|
+
private_constant :ZERO_TIMES
|
|
96
|
+
|
|
89
97
|
# VM state integer → label value mapping.
|
|
90
98
|
# These values appear as "%GVL" / "%GC" label keys in label_sets.
|
|
91
99
|
VM_STATE_LABELS = {
|
|
@@ -111,10 +119,28 @@ module Rperf
|
|
|
111
119
|
|
|
112
120
|
# Record process times for multi-process aggregation
|
|
113
121
|
times = Process.times
|
|
114
|
-
start_times = @stat_start_times ||
|
|
122
|
+
start_times = @stat_start_times || ZERO_TIMES
|
|
115
123
|
data[:user_ns] = ((times.utime - start_times.utime) * 1_000_000_000).to_i
|
|
116
124
|
data[:sys_ns] = ((times.stime - start_times.stime) * 1_000_000_000).to_i
|
|
117
125
|
|
|
126
|
+
# GC / memory statistics for the summary (deltas since start; GC.stat is
|
|
127
|
+
# cumulative over the process lifetime). maxrss is a process-lifetime
|
|
128
|
+
# peak — no delta is possible.
|
|
129
|
+
if @gc_stat_start
|
|
130
|
+
gc = GC.stat
|
|
131
|
+
data[:gc_stats] = {
|
|
132
|
+
count: gc[:count] - @gc_stat_start[:count],
|
|
133
|
+
minor_count: gc[:minor_gc_count] - @gc_stat_start[:minor_gc_count],
|
|
134
|
+
major_count: gc[:major_gc_count] - @gc_stat_start[:major_gc_count],
|
|
135
|
+
time_ms: (gc[:time] || 0) - (@gc_stat_start[:time] || 0),
|
|
136
|
+
allocated_objects: gc[:total_allocated_objects] - @gc_stat_start[:total_allocated_objects],
|
|
137
|
+
freed_objects: gc[:total_freed_objects] - @gc_stat_start[:total_freed_objects],
|
|
138
|
+
}
|
|
139
|
+
@gc_stat_start = nil
|
|
140
|
+
end
|
|
141
|
+
sys_stats = get_system_stats
|
|
142
|
+
data[:maxrss_mb] = (sys_stats[:maxrss_kb] / 1024.0).round if sys_stats[:maxrss_kb]
|
|
143
|
+
|
|
118
144
|
# When aggregate: false, C extension returns :raw_samples but not
|
|
119
145
|
# :aggregated_samples. Build aggregated view so encoders always work.
|
|
120
146
|
if data[:raw_samples] && !data[:aggregated_samples]
|
|
@@ -138,11 +164,11 @@ module Rperf
|
|
|
138
164
|
# Root's @output/@format/@stat are preserved for the merged result.
|
|
139
165
|
print_stats(data) if @verbose
|
|
140
166
|
begin
|
|
141
|
-
|
|
167
|
+
write_data(File.join(session_dir, "profile-#{Process.pid}.json.gz"), data, :json, internal: true)
|
|
142
168
|
rescue SystemCallError
|
|
143
169
|
# Session dir may have been removed (e.g., test scenario) — continue to aggregation
|
|
144
170
|
end
|
|
145
|
-
merged = _aggregate_and_report
|
|
171
|
+
merged = _aggregate_and_report(data)
|
|
146
172
|
if merged.nil? && data
|
|
147
173
|
# Aggregation failed — fall back to root's own data
|
|
148
174
|
$stderr.puts "rperf: warning: multi-process aggregation failed; writing root process data only"
|
|
@@ -160,7 +186,7 @@ module Rperf
|
|
|
160
186
|
if @_session_dir_output
|
|
161
187
|
# Child process writing to session dir — tolerate missing dir
|
|
162
188
|
begin
|
|
163
|
-
write_data(@output, data, @format)
|
|
189
|
+
write_data(@output, data, @format, internal: true)
|
|
164
190
|
rescue SystemCallError
|
|
165
191
|
# Parent may have already cleaned up the session dir (e.g., parent
|
|
166
192
|
# exited first and rm_rf'd it), or disk is full. Silently skip —
|
|
@@ -216,6 +242,23 @@ module Rperf
|
|
|
216
242
|
def self.snapshot(clear: false)
|
|
217
243
|
data = _c_snapshot(clear)
|
|
218
244
|
return unless data
|
|
245
|
+
# GC/memory stats for the snapshot's summary. The baseline advances on
|
|
246
|
+
# clear: true so interval snapshots report per-interval deltas.
|
|
247
|
+
if @gc_stat_snapshot_base
|
|
248
|
+
gc = GC.stat
|
|
249
|
+
base = @gc_stat_snapshot_base
|
|
250
|
+
data[:gc_stats] = {
|
|
251
|
+
count: gc[:count] - base[:count],
|
|
252
|
+
minor_count: gc[:minor_gc_count] - base[:minor_gc_count],
|
|
253
|
+
major_count: gc[:major_gc_count] - base[:major_gc_count],
|
|
254
|
+
time_ms: (gc[:time] || 0) - (base[:time] || 0),
|
|
255
|
+
allocated_objects: gc[:total_allocated_objects] - base[:total_allocated_objects],
|
|
256
|
+
freed_objects: gc[:total_freed_objects] - base[:total_freed_objects],
|
|
257
|
+
}
|
|
258
|
+
@gc_stat_snapshot_base = gc if clear
|
|
259
|
+
end
|
|
260
|
+
sys_stats = get_system_stats
|
|
261
|
+
data[:maxrss_mb] = (sys_stats[:maxrss_kb] / 1024.0).round if sys_stats[:maxrss_kb]
|
|
219
262
|
merge_vm_state_labels!(data)
|
|
220
263
|
data
|
|
221
264
|
end
|
|
@@ -247,7 +290,9 @@ module Rperf
|
|
|
247
290
|
|
|
248
291
|
cur_id = _c_get_label
|
|
249
292
|
cur_labels = @label_set_table[cur_id] || {}
|
|
250
|
-
|
|
293
|
+
# Interned label sets must be deeply immutable, but freezing the caller's
|
|
294
|
+
# own objects is an observable side effect — dup mutable Strings instead
|
|
295
|
+
kw = kw.transform_values { |v| v.is_a?(String) && !v.frozen? ? v.dup.freeze : v.freeze }
|
|
251
296
|
new_labels = cur_labels.merge(kw).reject { |_, v| v.nil? }
|
|
252
297
|
new_id = _intern_label_set(new_labels)
|
|
253
298
|
_c_set_label(new_id)
|
|
@@ -306,6 +351,11 @@ module Rperf
|
|
|
306
351
|
end
|
|
307
352
|
end
|
|
308
353
|
|
|
354
|
+
# Returns true while a profiling session is active (between start and stop).
|
|
355
|
+
def self.running?
|
|
356
|
+
_c_running?
|
|
357
|
+
end
|
|
358
|
+
|
|
309
359
|
# Returns the current thread's labels as a Hash.
|
|
310
360
|
# Returns an empty Hash if no labels are set or profiling is not running.
|
|
311
361
|
def self.labels
|
|
@@ -367,28 +417,69 @@ module Rperf
|
|
|
367
417
|
write_data(path, data, format)
|
|
368
418
|
end
|
|
369
419
|
|
|
370
|
-
|
|
420
|
+
# internal: true skips meta/summary generation — used for per-process
|
|
421
|
+
# intermediate files in the multi-process session dir (meta is attached
|
|
422
|
+
# once, on the root's final output).
|
|
423
|
+
def self.write_data(path, data, format, internal: false)
|
|
371
424
|
fmt = detect_format(path, format)
|
|
372
425
|
case fmt
|
|
373
426
|
when :collapsed
|
|
374
|
-
|
|
427
|
+
atomic_write(path, Collapsed.encode(data))
|
|
375
428
|
when :text
|
|
376
|
-
|
|
429
|
+
atomic_write(path, Text.encode(data))
|
|
377
430
|
when :json
|
|
378
431
|
require "json"
|
|
379
|
-
json_data = data
|
|
432
|
+
json_data = data
|
|
433
|
+
unless internal
|
|
434
|
+
# meta/summary must be the FIRST keys so Meta.read can extract them
|
|
435
|
+
# from the head of the (gzipped) file without loading the body —
|
|
436
|
+
# reorder even when re-saving data that already carries them.
|
|
437
|
+
meta = data[:meta] || Meta.build_meta(data)
|
|
438
|
+
summary = data[:summary] || Meta.build_summary(data)
|
|
439
|
+
json_data = { meta: meta, summary: summary }.merge(data.except(:meta, :summary))
|
|
440
|
+
end
|
|
441
|
+
json_data = json_data.merge(rperf_version: VERSION, pid: Process.pid, ppid: Process.ppid)
|
|
380
442
|
json_str = JSON.generate(json_data)
|
|
381
443
|
if path.to_s.end_with?(".gz")
|
|
382
|
-
|
|
444
|
+
atomic_write(path, gzip(json_str), binary: true)
|
|
383
445
|
else
|
|
384
|
-
|
|
446
|
+
atomic_write(path, json_str)
|
|
385
447
|
end
|
|
386
448
|
else
|
|
387
|
-
|
|
449
|
+
atomic_write(path, gzip(PProf.encode(data)), binary: true)
|
|
388
450
|
end
|
|
389
451
|
end
|
|
390
452
|
private_class_method :write_data
|
|
391
453
|
|
|
454
|
+
# Write via tmp file + rename so a crash mid-write never leaves a truncated
|
|
455
|
+
# file at the final path (the multi-process aggregator globs the session dir
|
|
456
|
+
# and would otherwise load — and then discard — a partial child profile).
|
|
457
|
+
def self.atomic_write(path, content, binary: false)
|
|
458
|
+
# rename cannot replace special files (/dev/null → EBUSY) and would
|
|
459
|
+
# replace a symlink instead of writing through it — write those directly
|
|
460
|
+
st = begin
|
|
461
|
+
File.lstat(path)
|
|
462
|
+
rescue SystemCallError
|
|
463
|
+
nil
|
|
464
|
+
end
|
|
465
|
+
if st && !st.file?
|
|
466
|
+
binary ? File.binwrite(path, content) : File.write(path, content)
|
|
467
|
+
return
|
|
468
|
+
end
|
|
469
|
+
|
|
470
|
+
tmp = "#{path}.tmp-#{Process.pid}"
|
|
471
|
+
binary ? File.binwrite(tmp, content) : File.write(tmp, content)
|
|
472
|
+
File.rename(tmp, path)
|
|
473
|
+
rescue Exception
|
|
474
|
+
begin
|
|
475
|
+
File.unlink(tmp)
|
|
476
|
+
rescue SystemCallError
|
|
477
|
+
# tmp was never created or already renamed
|
|
478
|
+
end
|
|
479
|
+
raise
|
|
480
|
+
end
|
|
481
|
+
private_class_method :atomic_write
|
|
482
|
+
|
|
392
483
|
# Load a profile saved by rperf record (.json.gz or .json).
|
|
393
484
|
# Returns the data hash (same format as Rperf.stop / Rperf.snapshot).
|
|
394
485
|
# Warns to stderr if the file was saved by a different rperf version.
|
|
@@ -402,6 +493,9 @@ module Rperf
|
|
|
402
493
|
end
|
|
403
494
|
require "json"
|
|
404
495
|
data = JSON.parse(raw, symbolize_names: true)
|
|
496
|
+
# symbolize_names only converts keys — :mode round-trips as a String
|
|
497
|
+
# ("wall"), which encoders compare against :wall/:cpu symbols
|
|
498
|
+
data[:mode] = data[:mode].to_sym if data[:mode].is_a?(String)
|
|
405
499
|
saved_version = data.delete(:rperf_version)
|
|
406
500
|
if saved_version && saved_version != VERSION
|
|
407
501
|
$stderr.puts "rperf: warning: file was saved by rperf #{saved_version} (current: #{VERSION})"
|
|
@@ -411,6 +505,14 @@ module Rperf
|
|
|
411
505
|
data
|
|
412
506
|
end
|
|
413
507
|
|
|
508
|
+
# Read only the meta/summary head of a profile saved by rperf record
|
|
509
|
+
# (.json.gz or .json) without loading the sample body.
|
|
510
|
+
# Returns { meta: Hash|nil, summary: Hash|nil }, or nil for files saved
|
|
511
|
+
# by older rperf versions (no leading meta) or unreadable files.
|
|
512
|
+
def self.read_meta(path)
|
|
513
|
+
Meta.read(path)
|
|
514
|
+
end
|
|
515
|
+
|
|
414
516
|
def self.detect_format(path, format)
|
|
415
517
|
return format.to_sym if format
|
|
416
518
|
case path.to_s
|
|
@@ -514,7 +616,7 @@ module Rperf
|
|
|
514
616
|
samples_raw = data[:aggregated_samples] || []
|
|
515
617
|
real_ns = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - @stat_start_mono) * 1_000_000_000).to_i
|
|
516
618
|
times = Process.times
|
|
517
|
-
start_times = @stat_start_times ||
|
|
619
|
+
start_times = @stat_start_times || ZERO_TIMES
|
|
518
620
|
user_ns = ((times.utime - start_times.utime) * 1_000_000_000).to_i
|
|
519
621
|
sys_ns = ((times.stime - start_times.stime) * 1_000_000_000).to_i
|
|
520
622
|
|
|
@@ -720,9 +822,11 @@ module Rperf
|
|
|
720
822
|
end
|
|
721
823
|
|
|
722
824
|
if File.readable?("/proc/self/stat")
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
825
|
+
# comm (field 2) is parenthesized and may contain spaces — split only
|
|
826
|
+
# the part after the closing paren (fields from state, field 3, onward)
|
|
827
|
+
fields = File.read("/proc/self/stat").rpartition(")").last.split
|
|
828
|
+
stats[:page_faults_minor] = fields[7].to_i # minflt (field 10)
|
|
829
|
+
stats[:page_faults_major] = fields[9].to_i # majflt (field 12)
|
|
726
830
|
end
|
|
727
831
|
|
|
728
832
|
if File.readable?("/proc/self/io")
|
|
@@ -747,14 +851,20 @@ module Rperf
|
|
|
747
851
|
# Called only when NOT already inside a CLI-managed session (no RPERF_SESSION_DIR).
|
|
748
852
|
# Creates the session directory eagerly — if creation fails, inherit is silently
|
|
749
853
|
# disabled and profiling continues in single-process mode.
|
|
750
|
-
def self._setup_inherit(mode, frequency, signal, aggregate,
|
|
751
|
-
session_dir = _create_session_dir
|
|
854
|
+
def self._setup_inherit(mode, frequency, signal, aggregate, inherit, defer)
|
|
855
|
+
session_dir = _create_session_dir(clean_stale: true)
|
|
752
856
|
return unless session_dir
|
|
753
857
|
|
|
754
858
|
ENV["RPERF_ROOT_PROCESS"] = Process.pid.to_s
|
|
755
859
|
ENV["RPERF_SESSION_DIR"] = session_dir
|
|
756
860
|
ENV["RPERF_DEFER"] = "1" if defer
|
|
757
861
|
|
|
862
|
+
# Remember the start options for forked children (_restart_in_child).
|
|
863
|
+
# Fork preserves module state, so this works for inherit: :fork too,
|
|
864
|
+
# where the RPERF_* env vars below are NOT exported.
|
|
865
|
+
@_child_start_opts = { mode: mode, frequency: frequency, signal: signal,
|
|
866
|
+
aggregate: aggregate, defer: defer }
|
|
867
|
+
|
|
758
868
|
_install_fork_hook
|
|
759
869
|
|
|
760
870
|
if inherit == true
|
|
@@ -882,20 +992,27 @@ module Rperf
|
|
|
882
992
|
@label_set_table = nil
|
|
883
993
|
@label_set_index = nil
|
|
884
994
|
|
|
885
|
-
|
|
886
|
-
|
|
995
|
+
require "securerandom"
|
|
996
|
+
# Random suffix: PIDs can be recycled within a long-lived session, and a
|
|
997
|
+
# plain profile-<pid> name would silently overwrite an earlier child's data
|
|
998
|
+
child_output = File.join(session_dir, "profile-#{Process.pid}-#{SecureRandom.hex(4)}.json.gz")
|
|
999
|
+
|
|
1000
|
+
# Start options: prefer the values remembered by _setup_inherit (API
|
|
1001
|
+
# inherit: :fork / true — fork preserves module state); fall back to the
|
|
1002
|
+
# RPERF_* env vars (CLI-managed sessions always export them).
|
|
1003
|
+
saved = @_child_start_opts
|
|
887
1004
|
opts = {
|
|
888
|
-
frequency: (ENV["RPERF_FREQUENCY"] || 1000).to_i,
|
|
889
|
-
mode: ENV["RPERF_MODE"] == "cpu" ? :cpu : :wall,
|
|
890
|
-
aggregate: ENV["RPERF_AGGREGATE"] != "0",
|
|
1005
|
+
frequency: saved ? saved[:frequency] : (ENV["RPERF_FREQUENCY"] || 1000).to_i,
|
|
1006
|
+
mode: saved ? saved[:mode] : (ENV["RPERF_MODE"] == "cpu" ? :cpu : :wall),
|
|
1007
|
+
aggregate: saved ? saved[:aggregate] : ENV["RPERF_AGGREGATE"] != "0",
|
|
891
1008
|
output: child_output,
|
|
892
1009
|
format: :json,
|
|
893
1010
|
stat: false,
|
|
894
1011
|
verbose: false,
|
|
895
1012
|
}
|
|
896
|
-
sig = _parse_signal_env
|
|
1013
|
+
sig = saved ? saved[:signal] : _parse_signal_env
|
|
897
1014
|
opts[:signal] = sig unless sig.nil?
|
|
898
|
-
opts[:defer] = true if ENV["RPERF_DEFER"] == "1"
|
|
1015
|
+
opts[:defer] = true if saved ? saved[:defer] : ENV["RPERF_DEFER"] == "1"
|
|
899
1016
|
|
|
900
1017
|
start(**opts, inherit: false)
|
|
901
1018
|
@_session_dir_output = true
|
|
@@ -905,7 +1022,9 @@ module Rperf
|
|
|
905
1022
|
at_exit { Rperf.stop }
|
|
906
1023
|
end
|
|
907
1024
|
|
|
908
|
-
|
|
1025
|
+
# root_data: the root process's own profile data — GC/OS stats in the
|
|
1026
|
+
# merged summary come from the root only (same policy as `rperf stat`).
|
|
1027
|
+
def self._aggregate_and_report(root_data = nil)
|
|
909
1028
|
session_dir = ENV["RPERF_SESSION_DIR"]
|
|
910
1029
|
return unless session_dir && File.directory?(session_dir)
|
|
911
1030
|
|
|
@@ -941,11 +1060,20 @@ module Rperf
|
|
|
941
1060
|
process_count += 1
|
|
942
1061
|
end
|
|
943
1062
|
|
|
944
|
-
|
|
1063
|
+
if process_count == 0
|
|
1064
|
+
# Nothing loadable — remove the session dir here, or stop's empty-dir
|
|
1065
|
+
# rmdir would fail on the leftover corrupt files and leak the dir
|
|
1066
|
+
_cleanup_session_dir(session_dir)
|
|
1067
|
+
return
|
|
1068
|
+
end
|
|
945
1069
|
|
|
1070
|
+
# mode/frequency: the root's own profile is authoritative; the env vars
|
|
1071
|
+
# are only set by the CLI or inherit: true (and default to the root's
|
|
1072
|
+
# actual settings via _setup_inherit for the API case)
|
|
1073
|
+
saved = @_child_start_opts
|
|
946
1074
|
merged_data = {
|
|
947
|
-
mode: (ENV["RPERF_MODE"] || "wall").to_sym,
|
|
948
|
-
frequency: (ENV["RPERF_FREQUENCY"] || 1000).to_i,
|
|
1075
|
+
mode: (root_data && root_data[:mode]) || (saved ? saved[:mode] : (ENV["RPERF_MODE"] || "wall").to_sym),
|
|
1076
|
+
frequency: (root_data && root_data[:frequency]) || (saved ? saved[:frequency] : (ENV["RPERF_FREQUENCY"] || 1000).to_i),
|
|
949
1077
|
aggregated_samples: merged_samples,
|
|
950
1078
|
label_sets: merged_label_sets,
|
|
951
1079
|
trigger_count: total_trigger_count,
|
|
@@ -958,6 +1086,11 @@ module Rperf
|
|
|
958
1086
|
process_count: process_count,
|
|
959
1087
|
}
|
|
960
1088
|
|
|
1089
|
+
if root_data
|
|
1090
|
+
merged_data[:gc_stats] = root_data[:gc_stats] if root_data[:gc_stats]
|
|
1091
|
+
merged_data[:maxrss_mb] = root_data[:maxrss_mb] if root_data[:maxrss_mb]
|
|
1092
|
+
end
|
|
1093
|
+
|
|
961
1094
|
print_stat(merged_data) if @stat
|
|
962
1095
|
if @output
|
|
963
1096
|
write_data(@output, merged_data, @format)
|
|
@@ -968,8 +1101,7 @@ module Rperf
|
|
|
968
1101
|
merged_data
|
|
969
1102
|
rescue => e
|
|
970
1103
|
$stderr.puts "rperf: warning: failed to aggregate multi-process data: #{e.message}"
|
|
971
|
-
#
|
|
972
|
-
_fallback_aggregate_output(session_dir)
|
|
1104
|
+
# stop() falls back to writing the root's own data when this returns nil
|
|
973
1105
|
_cleanup_session_dir(session_dir)
|
|
974
1106
|
nil
|
|
975
1107
|
end
|
|
@@ -983,22 +1115,8 @@ module Rperf
|
|
|
983
1115
|
end
|
|
984
1116
|
private_class_method :_cleanup_session_dir
|
|
985
1117
|
|
|
986
|
-
# Best-effort fallback: if aggregation failed, try to copy the first
|
|
987
|
-
# available child profile to @output so the user gets something.
|
|
988
|
-
def self._fallback_aggregate_output(session_dir)
|
|
989
|
-
return unless @output
|
|
990
|
-
return unless session_dir && File.directory?(session_dir)
|
|
991
|
-
files = Dir.glob(File.join(session_dir, "profile-*.json.gz"))
|
|
992
|
-
return if files.empty?
|
|
993
|
-
require "fileutils"
|
|
994
|
-
FileUtils.cp(files.first, @output)
|
|
995
|
-
rescue StandardError
|
|
996
|
-
# nothing more we can do
|
|
997
|
-
end
|
|
998
|
-
private_class_method :_fallback_aggregate_output
|
|
999
|
-
|
|
1000
1118
|
def self._merge_into(merged_samples, merged_label_sets, data, merged_label_sets_index = nil)
|
|
1001
|
-
# Build a reverse index
|
|
1119
|
+
# Build a reverse index when the caller doesn't maintain one across calls
|
|
1002
1120
|
unless merged_label_sets_index
|
|
1003
1121
|
merged_label_sets_index = {}
|
|
1004
1122
|
merged_label_sets.each_with_index { |ls, i| merged_label_sets_index[ls] = i }
|
|
@@ -1058,7 +1176,9 @@ module Rperf
|
|
|
1058
1176
|
# normal mode which would duplicate output with the root process.
|
|
1059
1177
|
_rperf_session_dir = ENV["RPERF_SESSION_DIR"]
|
|
1060
1178
|
if File.directory?(_rperf_session_dir)
|
|
1061
|
-
|
|
1179
|
+
require "securerandom"
|
|
1180
|
+
# Random suffix: PID reuse must not overwrite an earlier child's profile
|
|
1181
|
+
_rperf_start_opts[:output] = File.join(_rperf_session_dir, "profile-#{Process.pid}-#{SecureRandom.hex(4)}.json.gz")
|
|
1062
1182
|
_rperf_start_opts[:format] = :json
|
|
1063
1183
|
_rperf_start_opts[:stat] = false
|
|
1064
1184
|
_rperf_start_opts[:verbose] = false
|
|
@@ -1086,6 +1206,21 @@ module Rperf
|
|
|
1086
1206
|
_rperf_start_opts[:stat] = _rperf_stat
|
|
1087
1207
|
_rperf_start_opts[:inherit] = false # no RPERF_SESSION_DIR means --no-inherit
|
|
1088
1208
|
start(**_rperf_start_opts)
|
|
1209
|
+
# --no-inherit: scrub the env the CLI injected for THIS process, so
|
|
1210
|
+
# Ruby descendants spawned by the app don't auto-start their own
|
|
1211
|
+
# sessions (and clobber RPERF_OUTPUT with their own profiles)
|
|
1212
|
+
%w[RPERF_ENABLED RPERF_OUTPUT RPERF_STAT RPERF_FORMAT RPERF_VERBOSE
|
|
1213
|
+
RPERF_FREQUENCY RPERF_MODE RPERF_SIGNAL RPERF_AGGREGATE
|
|
1214
|
+
RPERF_DEFER].each { |k| ENV.delete(k) }
|
|
1215
|
+
if ENV["RUBYOPT"]
|
|
1216
|
+
rubyopt = ENV["RUBYOPT"].split(" ").reject { |o| o == "-rrperf" }.join(" ")
|
|
1217
|
+
rubyopt.empty? ? ENV.delete("RUBYOPT") : ENV["RUBYOPT"] = rubyopt
|
|
1218
|
+
end
|
|
1219
|
+
if ENV["RUBYLIB"]
|
|
1220
|
+
_rperf_lib_dir = File.expand_path("..", __FILE__)
|
|
1221
|
+
rubylib = ENV["RUBYLIB"].split(File::PATH_SEPARATOR).reject { |p| p == _rperf_lib_dir }.join(File::PATH_SEPARATOR)
|
|
1222
|
+
rubylib.empty? ? ENV.delete("RUBYLIB") : ENV["RUBYLIB"] = rubylib
|
|
1223
|
+
end
|
|
1089
1224
|
at_exit { stop }
|
|
1090
1225
|
end
|
|
1091
1226
|
end
|
|
@@ -1141,7 +1276,10 @@ module Rperf
|
|
|
1141
1276
|
return "" if !samples || samples.empty?
|
|
1142
1277
|
merged = Hash.new(0)
|
|
1143
1278
|
samples.each do |frames, weight|
|
|
1144
|
-
|
|
1279
|
+
# ";" is the frame separator and has no escape in the collapsed
|
|
1280
|
+
# format — replace it so a pathological method name cannot corrupt
|
|
1281
|
+
# stack splitting downstream (FlameGraph/speedscope)
|
|
1282
|
+
key = frames.reverse.map { |_, label| label.include?(";") ? label.tr(";", ",") : label }.join(";")
|
|
1145
1283
|
merged[key] += weight
|
|
1146
1284
|
end
|
|
1147
1285
|
merged.map { |stack, weight| "#{stack} #{weight}" }.join("\n") + "\n"
|
|
@@ -1157,10 +1295,10 @@ module Rperf
|
|
|
1157
1295
|
module_function
|
|
1158
1296
|
|
|
1159
1297
|
def encode(data)
|
|
1160
|
-
samples_raw = data[:aggregated_samples]
|
|
1298
|
+
samples_raw = data[:aggregated_samples] || []
|
|
1161
1299
|
frequency = data[:frequency]
|
|
1162
|
-
interval_ns = 1_000_000_000 / frequency
|
|
1163
|
-
mode = data[:mode] || :cpu
|
|
1300
|
+
interval_ns = (frequency && frequency > 0) ? 1_000_000_000 / frequency : 0
|
|
1301
|
+
mode = (data[:mode] || :cpu).to_sym
|
|
1164
1302
|
|
|
1165
1303
|
# Build string table: index 0 must be ""
|
|
1166
1304
|
string_table = [""]
|
|
@@ -1176,7 +1314,7 @@ module Rperf
|
|
|
1176
1314
|
|
|
1177
1315
|
# Convert string frames to index frames and merge identical stacks per thread/label
|
|
1178
1316
|
merged = Hash.new(0)
|
|
1179
|
-
thread_seq_key =
|
|
1317
|
+
thread_seq_key = nil # interned lazily — only when a sample carries thread_seq
|
|
1180
1318
|
label_sets = data[:label_sets] # Array of Hash (may be nil)
|
|
1181
1319
|
samples_raw.each do |frames, weight, thread_seq, label_set_id|
|
|
1182
1320
|
key = [frames.map { |path, label| [intern.(path), intern.(label)] }, thread_seq || 0, label_set_id || 0]
|
|
@@ -1196,8 +1334,8 @@ module Rperf
|
|
|
1196
1334
|
end
|
|
1197
1335
|
end
|
|
1198
1336
|
|
|
1199
|
-
# Build
|
|
1200
|
-
|
|
1337
|
+
# Build the frame → id table (locations and functions are 1:1)
|
|
1338
|
+
frame_ids = build_tables(merged)
|
|
1201
1339
|
|
|
1202
1340
|
# Intern type label and unit
|
|
1203
1341
|
type_label = mode == :wall ? "wall" : "cpu"
|
|
@@ -1213,11 +1351,12 @@ module Rperf
|
|
|
1213
1351
|
# field 2: sample (repeated Sample) with thread_seq + user labels
|
|
1214
1352
|
merged.each do |(frames, thread_seq, label_set_id), weight|
|
|
1215
1353
|
sample_buf = "".b
|
|
1216
|
-
loc_ids = frames.map { |f|
|
|
1354
|
+
loc_ids = frames.map { |f| frame_ids[f] }
|
|
1217
1355
|
sample_buf << encode_packed_uint64(1, loc_ids)
|
|
1218
1356
|
sample_buf << encode_packed_int64(2, [weight])
|
|
1219
1357
|
if thread_seq && thread_seq > 0
|
|
1220
1358
|
label_buf = "".b
|
|
1359
|
+
thread_seq_key ||= intern.("thread_seq")
|
|
1221
1360
|
label_buf << encode_int64(1, thread_seq_key) # key
|
|
1222
1361
|
label_buf << encode_int64(3, thread_seq) # num
|
|
1223
1362
|
sample_buf << encode_message(3, label_buf)
|
|
@@ -1236,19 +1375,18 @@ module Rperf
|
|
|
1236
1375
|
buf << encode_message(2, sample_buf)
|
|
1237
1376
|
end
|
|
1238
1377
|
|
|
1239
|
-
# field 4: location (repeated Location)
|
|
1240
|
-
|
|
1378
|
+
# field 4: location (repeated Location) — Line points at the same id
|
|
1379
|
+
frame_ids.each do |_frame, id|
|
|
1241
1380
|
loc_buf = "".b
|
|
1242
|
-
loc_buf << encode_uint64(1,
|
|
1381
|
+
loc_buf << encode_uint64(1, id)
|
|
1243
1382
|
line_buf = "".b
|
|
1244
|
-
|
|
1245
|
-
line_buf << encode_uint64(1, func_id)
|
|
1383
|
+
line_buf << encode_uint64(1, id)
|
|
1246
1384
|
loc_buf << encode_message(4, line_buf)
|
|
1247
1385
|
buf << encode_message(4, loc_buf)
|
|
1248
1386
|
end
|
|
1249
1387
|
|
|
1250
1388
|
# field 5: function (repeated Function)
|
|
1251
|
-
|
|
1389
|
+
frame_ids.each do |frame, func_id|
|
|
1252
1390
|
func_buf = "".b
|
|
1253
1391
|
func_buf << encode_uint64(1, func_id)
|
|
1254
1392
|
func_buf << encode_int64(2, frame[1]) # name (label_idx)
|
|
@@ -1295,22 +1433,23 @@ module Rperf
|
|
|
1295
1433
|
buf
|
|
1296
1434
|
end
|
|
1297
1435
|
|
|
1436
|
+
# Assign sequential ids to unique frames. rperf emits exactly one
|
|
1437
|
+
# Location and one Function per frame, sharing the same id, so a single
|
|
1438
|
+
# table serves both.
|
|
1298
1439
|
def build_tables(merged)
|
|
1299
|
-
|
|
1300
|
-
functions = {}
|
|
1440
|
+
frame_ids = {}
|
|
1301
1441
|
next_id = 1
|
|
1302
1442
|
|
|
1303
|
-
merged.each do |frames, _weight|
|
|
1443
|
+
merged.each do |(frames, _thread_seq, _label_set_id), _weight|
|
|
1304
1444
|
frames.each do |frame|
|
|
1305
|
-
unless
|
|
1306
|
-
|
|
1307
|
-
functions[frame] = next_id
|
|
1445
|
+
unless frame_ids.key?(frame)
|
|
1446
|
+
frame_ids[frame] = next_id
|
|
1308
1447
|
next_id += 1
|
|
1309
1448
|
end
|
|
1310
1449
|
end
|
|
1311
1450
|
end
|
|
1312
1451
|
|
|
1313
|
-
|
|
1452
|
+
frame_ids
|
|
1314
1453
|
end
|
|
1315
1454
|
|
|
1316
1455
|
# --- Protobuf encoding helpers ---
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: rperf
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.10.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Koichi Sasada
|
|
@@ -55,10 +55,13 @@ files:
|
|
|
55
55
|
- ext/rperf/rperf.c
|
|
56
56
|
- lib/rperf.rb
|
|
57
57
|
- lib/rperf/active_job.rb
|
|
58
|
+
- lib/rperf/meta.rb
|
|
58
59
|
- lib/rperf/rack.rb
|
|
59
60
|
- lib/rperf/sidekiq.rb
|
|
61
|
+
- lib/rperf/table.rb
|
|
60
62
|
- lib/rperf/version.rb
|
|
61
63
|
- lib/rperf/viewer.rb
|
|
64
|
+
- lib/rperf/viewer/viewer.html
|
|
62
65
|
homepage: https://github.com/ko1/rperf
|
|
63
66
|
licenses:
|
|
64
67
|
- MIT
|