rperf 0.8.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +21 -0
- data/README.md +26 -15
- data/docs/help.md +284 -18
- data/exe/rperf +278 -55
- data/ext/rperf/rperf.c +220 -81
- data/lib/rperf/active_job.rb +1 -0
- data/lib/rperf/meta.rb +343 -0
- data/lib/rperf/rack.rb +7 -2
- data/lib/rperf/table.rb +156 -0
- data/lib/rperf/version.rb +1 -1
- data/lib/rperf/viewer/viewer.html +1148 -0
- data/lib/rperf/viewer.rb +158 -661
- data/lib/rperf.rb +682 -89
- metadata +8 -4
data/lib/rperf.rb
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
require_relative "rperf/version"
|
|
2
|
+
require_relative "rperf/meta"
|
|
3
|
+
require_relative "rperf/table"
|
|
2
4
|
require "zlib"
|
|
3
5
|
require "stringio"
|
|
4
6
|
|
|
@@ -13,10 +15,21 @@ end
|
|
|
13
15
|
|
|
14
16
|
module Rperf
|
|
15
17
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
@
|
|
19
|
-
@
|
|
18
|
+
# --- Module-level state (single global profiler) ---
|
|
19
|
+
# Profiling session
|
|
20
|
+
@verbose = false # verbose stats output on stop
|
|
21
|
+
@output = nil # output file path (nil = no file)
|
|
22
|
+
@format = nil # output format (:json, :pprof, :collapsed, :text, nil = auto)
|
|
23
|
+
@stat = false # print user/sys/real summary to stderr
|
|
24
|
+
@stat_start_mono = nil # Process::CLOCK_MONOTONIC at start (for real time)
|
|
25
|
+
@stat_start_times = nil # Process.times at start (for user/sys time)
|
|
26
|
+
@label_set_table = nil # Array: label_set_id → frozen Hash
|
|
27
|
+
@label_set_index = nil # Hash: frozen label Hash → label_set_id
|
|
28
|
+
# Multi-process (fork/spawn) support
|
|
29
|
+
@_session_dir_output = false # true when @output points to session dir (child process)
|
|
30
|
+
@_session_dir_created = false # true after first fork activates session dir
|
|
31
|
+
@_fork_hook_installed = false # true after Process._fork hook is prepended
|
|
32
|
+
@_saved_env = nil # saved ENV values for restore on stop (inherit: true)
|
|
20
33
|
|
|
21
34
|
# Starts profiling.
|
|
22
35
|
# format: :json, :pprof, :collapsed, or :text. nil = auto-detect from output extension
|
|
@@ -24,10 +37,19 @@ module Rperf
|
|
|
24
37
|
# .collapsed → collapsed stacks (FlameGraph / speedscope compatible)
|
|
25
38
|
# .txt → text report (human/AI readable flat + cumulative table)
|
|
26
39
|
# .pb.gz → pprof protobuf (gzip compressed)
|
|
27
|
-
|
|
40
|
+
# inherit: controls child process profiling.
|
|
41
|
+
# :fork — (default) automatically profile forked child processes via Process._fork hook.
|
|
42
|
+
# Session dir is created eagerly at start time. Spawned processes are NOT tracked.
|
|
43
|
+
# true — profile both forked and spawned Ruby child processes. Sets RUBYOPT=-rrperf
|
|
44
|
+
# and RPERF_* env vars so spawned Ruby processes auto-start profiling.
|
|
45
|
+
# Use with caution: affects ALL spawned Ruby processes, including independent
|
|
46
|
+
# programs that may use rperf themselves.
|
|
47
|
+
# false — do not track child processes (single-process mode).
|
|
48
|
+
def self.start(frequency: 1000, mode: :cpu, output: nil, verbose: false, format: nil, stat: false, signal: nil, aggregate: true, defer: false, inherit: :fork)
|
|
28
49
|
raise ArgumentError, "frequency must be a positive integer (got #{frequency.inspect})" unless frequency.is_a?(Integer) && frequency > 0
|
|
29
50
|
raise ArgumentError, "frequency must be <= 10000 (10KHz), got #{frequency}" if frequency > 10_000
|
|
30
51
|
raise ArgumentError, "mode must be :cpu or :wall, got #{mode.inspect}" unless %i[cpu wall].include?(mode)
|
|
52
|
+
raise ArgumentError, "inherit must be :fork, true, or false, got #{inherit.inspect}" unless [true, false, :fork].include?(inherit)
|
|
31
53
|
c_mode = mode == :cpu ? 0 : 1
|
|
32
54
|
unless signal.nil? || signal == false || signal.is_a?(Integer)
|
|
33
55
|
raise ArgumentError, "signal must be nil, false, or an Integer, got #{signal.inspect}"
|
|
@@ -45,14 +67,19 @@ module Rperf
|
|
|
45
67
|
@output = output
|
|
46
68
|
@format = format
|
|
47
69
|
@stat = stat
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
70
|
+
@stat_start_mono = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
71
|
+
@stat_start_times = Process.times
|
|
72
|
+
@gc_stat_start = GC.stat
|
|
73
|
+
@gc_stat_snapshot_base = @gc_stat_start
|
|
52
74
|
@label_set_table = nil
|
|
53
75
|
@label_set_index = nil
|
|
54
76
|
_c_start(frequency, c_mode, aggregate, c_signal, defer)
|
|
55
77
|
|
|
78
|
+
# Set up child process tracking
|
|
79
|
+
if inherit && !ENV["RPERF_SESSION_DIR"]
|
|
80
|
+
_setup_inherit(mode, frequency, signal, aggregate, inherit, defer)
|
|
81
|
+
end
|
|
82
|
+
|
|
56
83
|
if block_given?
|
|
57
84
|
begin
|
|
58
85
|
yield
|
|
@@ -63,8 +90,12 @@ module Rperf
|
|
|
63
90
|
end
|
|
64
91
|
end
|
|
65
92
|
|
|
93
|
+
# Defensive fallback for process times when start didn't record them
|
|
94
|
+
ZERO_TIMES = Struct.new(:utime, :stime).new(0.0, 0.0).freeze
|
|
95
|
+
private_constant :ZERO_TIMES
|
|
96
|
+
|
|
66
97
|
# VM state integer → label value mapping.
|
|
67
|
-
# These values appear
|
|
98
|
+
# These values appear as "%GVL" / "%GC" label keys in label_sets.
|
|
68
99
|
VM_STATE_LABELS = {
|
|
69
100
|
1 => ["%GVL", "blocked"],
|
|
70
101
|
2 => ["%GVL", "wait"],
|
|
@@ -73,9 +104,43 @@ module Rperf
|
|
|
73
104
|
}.freeze
|
|
74
105
|
|
|
75
106
|
def self.stop
|
|
107
|
+
# Check if we need to aggregate child process data.
|
|
108
|
+
# @_session_dir_created: fork happened and session dir is active.
|
|
109
|
+
# Otherwise: check for actual child profile files (spawn-only case).
|
|
110
|
+
session_dir = ENV["RPERF_SESSION_DIR"]
|
|
111
|
+
is_root = session_dir && Process.pid.to_s == ENV["RPERF_ROOT_PROCESS"]
|
|
112
|
+
has_child_profiles = is_root && !@_session_dir_created &&
|
|
113
|
+
File.directory?(session_dir.to_s) &&
|
|
114
|
+
!Dir.glob(File.join(session_dir.to_s, "profile-*.json.gz")).empty?
|
|
115
|
+
needs_aggregation = is_root && (@_session_dir_created || has_child_profiles)
|
|
116
|
+
|
|
76
117
|
data = _c_stop
|
|
77
118
|
return unless data
|
|
78
119
|
|
|
120
|
+
# Record process times for multi-process aggregation
|
|
121
|
+
times = Process.times
|
|
122
|
+
start_times = @stat_start_times || ZERO_TIMES
|
|
123
|
+
data[:user_ns] = ((times.utime - start_times.utime) * 1_000_000_000).to_i
|
|
124
|
+
data[:sys_ns] = ((times.stime - start_times.stime) * 1_000_000_000).to_i
|
|
125
|
+
|
|
126
|
+
# GC / memory statistics for the summary (deltas since start; GC.stat is
|
|
127
|
+
# cumulative over the process lifetime). maxrss is a process-lifetime
|
|
128
|
+
# peak — no delta is possible.
|
|
129
|
+
if @gc_stat_start
|
|
130
|
+
gc = GC.stat
|
|
131
|
+
data[:gc_stats] = {
|
|
132
|
+
count: gc[:count] - @gc_stat_start[:count],
|
|
133
|
+
minor_count: gc[:minor_gc_count] - @gc_stat_start[:minor_gc_count],
|
|
134
|
+
major_count: gc[:major_gc_count] - @gc_stat_start[:major_gc_count],
|
|
135
|
+
time_ms: (gc[:time] || 0) - (@gc_stat_start[:time] || 0),
|
|
136
|
+
allocated_objects: gc[:total_allocated_objects] - @gc_stat_start[:total_allocated_objects],
|
|
137
|
+
freed_objects: gc[:total_freed_objects] - @gc_stat_start[:total_freed_objects],
|
|
138
|
+
}
|
|
139
|
+
@gc_stat_start = nil
|
|
140
|
+
end
|
|
141
|
+
sys_stats = get_system_stats
|
|
142
|
+
data[:maxrss_mb] = (sys_stats[:maxrss_kb] / 1024.0).round if sys_stats[:maxrss_kb]
|
|
143
|
+
|
|
79
144
|
# When aggregate: false, C extension returns :raw_samples but not
|
|
80
145
|
# :aggregated_samples. Build aggregated view so encoders always work.
|
|
81
146
|
if data[:raw_samples] && !data[:aggregated_samples]
|
|
@@ -93,18 +158,79 @@ module Rperf
|
|
|
93
158
|
|
|
94
159
|
merge_vm_state_labels!(data)
|
|
95
160
|
|
|
161
|
+
if needs_aggregation
|
|
162
|
+
# Root process with children: write root's own profile to session dir
|
|
163
|
+
# (fixed json.gz format), then aggregate all profiles.
|
|
164
|
+
# Root's @output/@format/@stat are preserved for the merged result.
|
|
165
|
+
print_stats(data) if @verbose
|
|
166
|
+
begin
|
|
167
|
+
write_data(File.join(session_dir, "profile-#{Process.pid}.json.gz"), data, :json, internal: true)
|
|
168
|
+
rescue SystemCallError
|
|
169
|
+
# Session dir may have been removed (e.g., test scenario) — continue to aggregation
|
|
170
|
+
end
|
|
171
|
+
merged = _aggregate_and_report(data)
|
|
172
|
+
if merged.nil? && data
|
|
173
|
+
# Aggregation failed — fall back to root's own data
|
|
174
|
+
$stderr.puts "rperf: warning: multi-process aggregation failed; writing root process data only"
|
|
175
|
+
write_data(@output, data, @format) if @output
|
|
176
|
+
print_stat(data) if @stat
|
|
177
|
+
end
|
|
178
|
+
_cleanup_session_state
|
|
179
|
+
return merged || data
|
|
180
|
+
end
|
|
181
|
+
|
|
96
182
|
print_stats(data) if @verbose
|
|
97
183
|
print_stat(data) if @stat
|
|
98
184
|
|
|
99
185
|
if @output
|
|
100
|
-
|
|
186
|
+
if @_session_dir_output
|
|
187
|
+
# Child process writing to session dir — tolerate missing dir
|
|
188
|
+
begin
|
|
189
|
+
write_data(@output, data, @format, internal: true)
|
|
190
|
+
rescue SystemCallError
|
|
191
|
+
# Parent may have already cleaned up the session dir (e.g., parent
|
|
192
|
+
# exited first and rm_rf'd it), or disk is full. Silently skip —
|
|
193
|
+
# crashing in at_exit is worse than losing one child's profile.
|
|
194
|
+
end
|
|
195
|
+
else
|
|
196
|
+
write_data(@output, data, @format)
|
|
197
|
+
end
|
|
101
198
|
@output = nil
|
|
102
199
|
@format = nil
|
|
103
200
|
end
|
|
104
201
|
|
|
202
|
+
_cleanup_session_state
|
|
105
203
|
data
|
|
106
204
|
end
|
|
107
205
|
|
|
206
|
+
def self._cleanup_session_state
|
|
207
|
+
session_dir = ENV.delete("RPERF_SESSION_DIR")
|
|
208
|
+
ENV.delete("RPERF_ROOT_PROCESS")
|
|
209
|
+
ENV.delete("RPERF_DEFER")
|
|
210
|
+
@_session_dir_created = false
|
|
211
|
+
@_session_dir_output = false
|
|
212
|
+
# Restore ENV variables saved by _setup_inherit (inherit: true)
|
|
213
|
+
if @_saved_env
|
|
214
|
+
@_saved_env.each do |key, original|
|
|
215
|
+
if original.nil?
|
|
216
|
+
ENV.delete(key)
|
|
217
|
+
else
|
|
218
|
+
ENV[key] = original
|
|
219
|
+
end
|
|
220
|
+
end
|
|
221
|
+
@_saved_env = nil
|
|
222
|
+
end
|
|
223
|
+
# Remove eagerly-created session dir if it's empty (no children ran)
|
|
224
|
+
if session_dir && File.directory?(session_dir)
|
|
225
|
+
begin
|
|
226
|
+
Dir.rmdir(session_dir) # only succeeds if empty
|
|
227
|
+
rescue SystemCallError
|
|
228
|
+
# not empty or already removed — fine
|
|
229
|
+
end
|
|
230
|
+
end
|
|
231
|
+
end
|
|
232
|
+
private_class_method :_cleanup_session_state
|
|
233
|
+
|
|
108
234
|
# Returns a snapshot of the current profiling data without stopping.
|
|
109
235
|
# Only works in aggregate mode (the default). Returns nil if not profiling.
|
|
110
236
|
# The returned data has the same format as stop's return value and can be
|
|
@@ -116,6 +242,23 @@ module Rperf
|
|
|
116
242
|
def self.snapshot(clear: false)
|
|
117
243
|
data = _c_snapshot(clear)
|
|
118
244
|
return unless data
|
|
245
|
+
# GC/memory stats for the snapshot's summary. The baseline advances on
|
|
246
|
+
# clear: true so interval snapshots report per-interval deltas.
|
|
247
|
+
if @gc_stat_snapshot_base
|
|
248
|
+
gc = GC.stat
|
|
249
|
+
base = @gc_stat_snapshot_base
|
|
250
|
+
data[:gc_stats] = {
|
|
251
|
+
count: gc[:count] - base[:count],
|
|
252
|
+
minor_count: gc[:minor_gc_count] - base[:minor_gc_count],
|
|
253
|
+
major_count: gc[:major_gc_count] - base[:major_gc_count],
|
|
254
|
+
time_ms: (gc[:time] || 0) - (base[:time] || 0),
|
|
255
|
+
allocated_objects: gc[:total_allocated_objects] - base[:total_allocated_objects],
|
|
256
|
+
freed_objects: gc[:total_freed_objects] - base[:total_freed_objects],
|
|
257
|
+
}
|
|
258
|
+
@gc_stat_snapshot_base = gc if clear
|
|
259
|
+
end
|
|
260
|
+
sys_stats = get_system_stats
|
|
261
|
+
data[:maxrss_mb] = (sys_stats[:maxrss_kb] / 1024.0).round if sys_stats[:maxrss_kb]
|
|
119
262
|
merge_vm_state_labels!(data)
|
|
120
263
|
data
|
|
121
264
|
end
|
|
@@ -124,24 +267,40 @@ module Rperf
|
|
|
124
267
|
# Label sets are stored as an Array of Hashes, indexed by label_set_id.
|
|
125
268
|
# Index 0 is reserved (no labels).
|
|
126
269
|
|
|
127
|
-
@label_set_table = nil # Array of frozen Hash
|
|
128
|
-
@label_set_index = nil # Hash → id (for dedup)
|
|
129
|
-
|
|
130
270
|
def self._init_label_sets
|
|
131
271
|
@label_set_table = [{}] # id 0 = no labels
|
|
132
272
|
@label_set_index = { {} => 0 }
|
|
133
273
|
end
|
|
134
274
|
|
|
135
275
|
def self._intern_label_set(hash)
|
|
136
|
-
|
|
137
|
-
@label_set_index[
|
|
276
|
+
hash.freeze
|
|
277
|
+
@label_set_index[hash] ||= begin
|
|
138
278
|
id = @label_set_table.size
|
|
139
|
-
@label_set_table <<
|
|
279
|
+
@label_set_table << hash
|
|
140
280
|
_c_set_label_sets(@label_set_table)
|
|
141
281
|
id
|
|
142
282
|
end
|
|
143
283
|
end
|
|
144
284
|
|
|
285
|
+
# Merges the given keyword labels into the current thread's label set,
|
|
286
|
+
# sets the result on the current thread, and returns [previous_id, new_id].
|
|
287
|
+
# Callers use previous_id to restore labels after a block.
|
|
288
|
+
def self._merge_and_set_label(kw)
|
|
289
|
+
_init_label_sets unless @label_set_table
|
|
290
|
+
|
|
291
|
+
cur_id = _c_get_label
|
|
292
|
+
cur_labels = @label_set_table[cur_id] || {}
|
|
293
|
+
# Interned label sets must be deeply immutable, but freezing the caller's
|
|
294
|
+
# own objects is an observable side effect — dup mutable Strings instead
|
|
295
|
+
kw = kw.transform_values { |v| v.is_a?(String) && !v.frozen? ? v.dup.freeze : v.freeze }
|
|
296
|
+
new_labels = cur_labels.merge(kw).reject { |_, v| v.nil? }
|
|
297
|
+
new_id = _intern_label_set(new_labels)
|
|
298
|
+
_c_set_label(new_id)
|
|
299
|
+
|
|
300
|
+
[cur_id, new_id]
|
|
301
|
+
end
|
|
302
|
+
private_class_method :_merge_and_set_label
|
|
303
|
+
|
|
145
304
|
# Sets labels on the current thread for profiling annotation.
|
|
146
305
|
# With a block: restores previous labels when the block exits.
|
|
147
306
|
# Without a block: sets labels persistently on the current thread.
|
|
@@ -155,14 +314,7 @@ module Rperf
|
|
|
155
314
|
return yield if block && !_c_running?
|
|
156
315
|
return unless _c_running?
|
|
157
316
|
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
cur_id = _c_get_label
|
|
161
|
-
cur_labels = @label_set_table[cur_id] || {}
|
|
162
|
-
|
|
163
|
-
new_labels = cur_labels.merge(kw).reject { |_, v| v.nil? }
|
|
164
|
-
new_id = _intern_label_set(new_labels)
|
|
165
|
-
_c_set_label(new_id)
|
|
317
|
+
cur_id, _new_id = _merge_and_set_label(kw)
|
|
166
318
|
|
|
167
319
|
if block
|
|
168
320
|
begin
|
|
@@ -187,13 +339,7 @@ module Rperf
|
|
|
187
339
|
raise ArgumentError, "Rperf.profile requires a block" unless block
|
|
188
340
|
raise RuntimeError, "Rperf is not started" unless _c_running?
|
|
189
341
|
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
cur_id = _c_get_label
|
|
193
|
-
cur_labels = @label_set_table[cur_id] || {}
|
|
194
|
-
new_labels = cur_labels.merge(kw).reject { |_, v| v.nil? }
|
|
195
|
-
new_id = _intern_label_set(new_labels)
|
|
196
|
-
_c_set_label(new_id)
|
|
342
|
+
cur_id, _new_id = _merge_and_set_label(kw)
|
|
197
343
|
|
|
198
344
|
_c_profile_inc
|
|
199
345
|
|
|
@@ -205,6 +351,11 @@ module Rperf
|
|
|
205
351
|
end
|
|
206
352
|
end
|
|
207
353
|
|
|
354
|
+
# Returns true while a profiling session is active (between start and stop).
|
|
355
|
+
def self.running?
|
|
356
|
+
_c_running?
|
|
357
|
+
end
|
|
358
|
+
|
|
208
359
|
# Returns the current thread's labels as a Hash.
|
|
209
360
|
# Returns an empty Hash if no labels are set or profiling is not running.
|
|
210
361
|
def self.labels
|
|
@@ -238,7 +389,7 @@ module Rperf
|
|
|
238
389
|
unless new_id
|
|
239
390
|
base = label_sets[label_set_id] || {}
|
|
240
391
|
key, value = VM_STATE_LABELS[vm_state]
|
|
241
|
-
new_ls = base.merge(key => value).freeze
|
|
392
|
+
new_ls = base.merge(key.to_sym => value).freeze
|
|
242
393
|
new_id = label_sets.size
|
|
243
394
|
label_sets << new_ls
|
|
244
395
|
mapping[cache_key] = new_id
|
|
@@ -257,7 +408,8 @@ module Rperf
|
|
|
257
408
|
|
|
258
409
|
# Saves profiling data to a file.
|
|
259
410
|
# format: :json, :pprof, :collapsed, or :text. nil = auto-detect from path extension
|
|
260
|
-
# .json.gz → json (rperf native, default)
|
|
411
|
+
# .json.gz → json (rperf native, gzip compressed, default)
|
|
412
|
+
# .json → json (plain text, readable by jq etc.)
|
|
261
413
|
# .collapsed → collapsed stacks (FlameGraph / speedscope compatible)
|
|
262
414
|
# .txt → text report (human/AI readable flat + cumulative table)
|
|
263
415
|
# .pb.gz → pprof protobuf (gzip compressed)
|
|
@@ -265,30 +417,85 @@ module Rperf
|
|
|
265
417
|
write_data(path, data, format)
|
|
266
418
|
end
|
|
267
419
|
|
|
268
|
-
|
|
420
|
+
# internal: true skips meta/summary generation — used for per-process
|
|
421
|
+
# intermediate files in the multi-process session dir (meta is attached
|
|
422
|
+
# once, on the root's final output).
|
|
423
|
+
def self.write_data(path, data, format, internal: false)
|
|
269
424
|
fmt = detect_format(path, format)
|
|
270
425
|
case fmt
|
|
271
426
|
when :collapsed
|
|
272
|
-
|
|
427
|
+
atomic_write(path, Collapsed.encode(data))
|
|
273
428
|
when :text
|
|
274
|
-
|
|
429
|
+
atomic_write(path, Text.encode(data))
|
|
275
430
|
when :json
|
|
276
431
|
require "json"
|
|
277
|
-
|
|
432
|
+
json_data = data
|
|
433
|
+
unless internal
|
|
434
|
+
# meta/summary must be the FIRST keys so Meta.read can extract them
|
|
435
|
+
# from the head of the (gzipped) file without loading the body —
|
|
436
|
+
# reorder even when re-saving data that already carries them.
|
|
437
|
+
meta = data[:meta] || Meta.build_meta(data)
|
|
438
|
+
summary = data[:summary] || Meta.build_summary(data)
|
|
439
|
+
json_data = { meta: meta, summary: summary }.merge(data.except(:meta, :summary))
|
|
440
|
+
end
|
|
441
|
+
json_data = json_data.merge(rperf_version: VERSION, pid: Process.pid, ppid: Process.ppid)
|
|
442
|
+
json_str = JSON.generate(json_data)
|
|
443
|
+
if path.to_s.end_with?(".gz")
|
|
444
|
+
atomic_write(path, gzip(json_str), binary: true)
|
|
445
|
+
else
|
|
446
|
+
atomic_write(path, json_str)
|
|
447
|
+
end
|
|
278
448
|
else
|
|
279
|
-
|
|
449
|
+
atomic_write(path, gzip(PProf.encode(data)), binary: true)
|
|
280
450
|
end
|
|
281
451
|
end
|
|
282
452
|
private_class_method :write_data
|
|
283
453
|
|
|
284
|
-
#
|
|
454
|
+
# Write via tmp file + rename so a crash mid-write never leaves a truncated
|
|
455
|
+
# file at the final path (the multi-process aggregator globs the session dir
|
|
456
|
+
# and would otherwise load — and then discard — a partial child profile).
|
|
457
|
+
def self.atomic_write(path, content, binary: false)
|
|
458
|
+
# rename cannot replace special files (/dev/null → EBUSY) and would
|
|
459
|
+
# replace a symlink instead of writing through it — write those directly
|
|
460
|
+
st = begin
|
|
461
|
+
File.lstat(path)
|
|
462
|
+
rescue SystemCallError
|
|
463
|
+
nil
|
|
464
|
+
end
|
|
465
|
+
if st && !st.file?
|
|
466
|
+
binary ? File.binwrite(path, content) : File.write(path, content)
|
|
467
|
+
return
|
|
468
|
+
end
|
|
469
|
+
|
|
470
|
+
tmp = "#{path}.tmp-#{Process.pid}"
|
|
471
|
+
binary ? File.binwrite(tmp, content) : File.write(tmp, content)
|
|
472
|
+
File.rename(tmp, path)
|
|
473
|
+
rescue Exception
|
|
474
|
+
begin
|
|
475
|
+
File.unlink(tmp)
|
|
476
|
+
rescue SystemCallError
|
|
477
|
+
# tmp was never created or already renamed
|
|
478
|
+
end
|
|
479
|
+
raise
|
|
480
|
+
end
|
|
481
|
+
private_class_method :atomic_write
|
|
482
|
+
|
|
483
|
+
# Load a profile saved by rperf record (.json.gz or .json).
|
|
285
484
|
# Returns the data hash (same format as Rperf.stop / Rperf.snapshot).
|
|
286
485
|
# Warns to stderr if the file was saved by a different rperf version.
|
|
287
486
|
def self.load(path)
|
|
288
|
-
|
|
289
|
-
|
|
487
|
+
raw_bytes = File.binread(path)
|
|
488
|
+
# Auto-detect gzip by magic bytes (1f 8b)
|
|
489
|
+
raw = if raw_bytes.byteslice(0, 2) == "\x1f\x8b".b
|
|
490
|
+
Zlib::GzipReader.new(StringIO.new(raw_bytes)).read
|
|
491
|
+
else
|
|
492
|
+
raw_bytes
|
|
493
|
+
end
|
|
290
494
|
require "json"
|
|
291
495
|
data = JSON.parse(raw, symbolize_names: true)
|
|
496
|
+
# symbolize_names only converts keys — :mode round-trips as a String
|
|
497
|
+
# ("wall"), which encoders compare against :wall/:cpu symbols
|
|
498
|
+
data[:mode] = data[:mode].to_sym if data[:mode].is_a?(String)
|
|
292
499
|
saved_version = data.delete(:rperf_version)
|
|
293
500
|
if saved_version && saved_version != VERSION
|
|
294
501
|
$stderr.puts "rperf: warning: file was saved by rperf #{saved_version} (current: #{VERSION})"
|
|
@@ -298,6 +505,14 @@ module Rperf
|
|
|
298
505
|
data
|
|
299
506
|
end
|
|
300
507
|
|
|
508
|
+
# Read only the meta/summary head of a profile saved by rperf record
|
|
509
|
+
# (.json.gz or .json) without loading the sample body.
|
|
510
|
+
# Returns { meta: Hash|nil, summary: Hash|nil }, or nil for files saved
|
|
511
|
+
# by older rperf versions (no leading meta) or unreadable files.
|
|
512
|
+
def self.read_meta(path)
|
|
513
|
+
Meta.read(path)
|
|
514
|
+
end
|
|
515
|
+
|
|
301
516
|
def self.detect_format(path, format)
|
|
302
517
|
return format.to_sym if format
|
|
303
518
|
case path.to_s
|
|
@@ -401,10 +616,17 @@ module Rperf
|
|
|
401
616
|
samples_raw = data[:aggregated_samples] || []
|
|
402
617
|
real_ns = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - @stat_start_mono) * 1_000_000_000).to_i
|
|
403
618
|
times = Process.times
|
|
404
|
-
start_times = @stat_start_times ||
|
|
619
|
+
start_times = @stat_start_times || ZERO_TIMES
|
|
405
620
|
user_ns = ((times.utime - start_times.utime) * 1_000_000_000).to_i
|
|
406
621
|
sys_ns = ((times.stime - start_times.stime) * 1_000_000_000).to_i
|
|
407
622
|
|
|
623
|
+
# In multi-process mode, use aggregated user/sys from all processes
|
|
624
|
+
process_count = data[:process_count] || 0
|
|
625
|
+
if process_count > 1 && data[:user_ns]
|
|
626
|
+
user_ns = data[:user_ns]
|
|
627
|
+
sys_ns = data[:sys_ns] || 0
|
|
628
|
+
end
|
|
629
|
+
|
|
408
630
|
command = ENV["RPERF_STAT_COMMAND"] || "(unknown)"
|
|
409
631
|
|
|
410
632
|
$stderr.puts
|
|
@@ -416,9 +638,9 @@ module Rperf
|
|
|
416
638
|
|
|
417
639
|
if samples_raw.size > 0
|
|
418
640
|
breakdown, total_weight = compute_stat_breakdown(samples_raw, data[:label_sets])
|
|
419
|
-
print_stat_breakdown(breakdown, total_weight)
|
|
641
|
+
print_stat_breakdown(breakdown, total_weight, data)
|
|
420
642
|
print_stat_runtime_info(data)
|
|
421
|
-
print_stat_system_info
|
|
643
|
+
print_stat_system_info(data)
|
|
422
644
|
print_stat_report(data) if ENV["RPERF_STAT_REPORT"] == "1"
|
|
423
645
|
print_stat_footer(samples_raw, real_ns, data)
|
|
424
646
|
end
|
|
@@ -436,8 +658,8 @@ module Rperf
|
|
|
436
658
|
if label_sets && label_set_id && label_set_id > 0
|
|
437
659
|
ls = label_sets[label_set_id]
|
|
438
660
|
if ls
|
|
439
|
-
gvl = ls["%GVL"]
|
|
440
|
-
gc = ls["%GC"]
|
|
661
|
+
gvl = ls[:"%GVL"]
|
|
662
|
+
gc = ls[:"%GC"]
|
|
441
663
|
if gvl == "blocked" then category = :gvl_blocked
|
|
442
664
|
elsif gvl == "wait" then category = :gvl_wait
|
|
443
665
|
elsif gc == "mark" then category = :gc_marking
|
|
@@ -452,8 +674,12 @@ module Rperf
|
|
|
452
674
|
end
|
|
453
675
|
private_class_method :compute_stat_breakdown
|
|
454
676
|
|
|
455
|
-
def self.print_stat_breakdown(breakdown, total_weight)
|
|
677
|
+
def self.print_stat_breakdown(breakdown, total_weight, data)
|
|
456
678
|
$stderr.puts
|
|
679
|
+
process_count = data[:process_count] || 0
|
|
680
|
+
if process_count > 1
|
|
681
|
+
$stderr.puts STAT_LINE.call(format_integer(process_count), " ", "[Rperf] Ruby processes profiled")
|
|
682
|
+
end
|
|
457
683
|
|
|
458
684
|
[
|
|
459
685
|
[:cpu_execution, "[Rperf] CPU execution"],
|
|
@@ -490,7 +716,7 @@ module Rperf
|
|
|
490
716
|
end
|
|
491
717
|
private_class_method :print_stat_runtime_info
|
|
492
718
|
|
|
493
|
-
def self.print_stat_system_info
|
|
719
|
+
def self.print_stat_system_info(data = nil)
|
|
494
720
|
sys_stats = get_system_stats
|
|
495
721
|
maxrss_kb = sys_stats[:maxrss_kb]
|
|
496
722
|
if maxrss_kb
|
|
@@ -520,6 +746,10 @@ module Rperf
|
|
|
520
746
|
format_integer((r / 1024.0 / 1024.0).round),
|
|
521
747
|
format_integer((w / 1024.0 / 1024.0).round)])
|
|
522
748
|
end
|
|
749
|
+
process_count = data[:process_count] if data
|
|
750
|
+
if process_count && process_count > 1
|
|
751
|
+
$stderr.puts STAT_LINE.call("", " ", "(GC/OS stats are from root process only; user/sys/[Rperf] lines are aggregated)")
|
|
752
|
+
end
|
|
523
753
|
end
|
|
524
754
|
private_class_method :print_stat_system_info
|
|
525
755
|
|
|
@@ -532,7 +762,12 @@ module Rperf
|
|
|
532
762
|
|
|
533
763
|
def self.print_stat_footer(samples_raw, real_ns, data)
|
|
534
764
|
triggers = data[:trigger_count] || 0
|
|
535
|
-
|
|
765
|
+
sampling_time_ns = data[:sampling_time_ns] || 0
|
|
766
|
+
# In multi-process mode, use sum of all processes' durations as denominator.
|
|
767
|
+
# Single-process: fall back to root's real_ns.
|
|
768
|
+
total_real_ns = data[:total_duration_ns] || real_ns
|
|
769
|
+
total_real_ns = real_ns if total_real_ns == 0
|
|
770
|
+
overhead_pct = total_real_ns > 0 ? sampling_time_ns * 100.0 / total_real_ns : 0.0
|
|
536
771
|
$stderr.puts
|
|
537
772
|
samples = data[:sampling_count] || samples_raw.size
|
|
538
773
|
$stderr.puts format(" %d samples / %d triggers, %.1f%% profiler overhead",
|
|
@@ -541,6 +776,10 @@ module Rperf
|
|
|
541
776
|
if dropped > 0
|
|
542
777
|
$stderr.puts format(" WARNING: %d samples dropped due to memory allocation failure", dropped)
|
|
543
778
|
end
|
|
779
|
+
dropped_agg = data[:dropped_aggregation] || 0
|
|
780
|
+
if dropped_agg > 0
|
|
781
|
+
$stderr.puts format(" WARNING: %d samples dropped during aggregation (frame/stack table full)", dropped_agg)
|
|
782
|
+
end
|
|
544
783
|
end
|
|
545
784
|
private_class_method :print_stat_footer
|
|
546
785
|
|
|
@@ -553,10 +792,10 @@ module Rperf
|
|
|
553
792
|
# Example: 5_609_200_000 → "5,609.2"
|
|
554
793
|
def self.format_ms(ns)
|
|
555
794
|
ms = ns / 1_000_000.0
|
|
556
|
-
|
|
557
|
-
frac =
|
|
558
|
-
int_str =
|
|
559
|
-
"#{int_str}
|
|
795
|
+
formatted = format("%.1f", ms)
|
|
796
|
+
int_str, frac = formatted.split(".")
|
|
797
|
+
int_str = int_str.reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
|
|
798
|
+
"#{int_str}.#{frac}"
|
|
560
799
|
end
|
|
561
800
|
private_class_method :format_ms
|
|
562
801
|
|
|
@@ -583,9 +822,11 @@ module Rperf
|
|
|
583
822
|
end
|
|
584
823
|
|
|
585
824
|
if File.readable?("/proc/self/stat")
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
825
|
+
# comm (field 2) is parenthesized and may contain spaces — split only
|
|
826
|
+
# the part after the closing paren (fields from state, field 3, onward)
|
|
827
|
+
fields = File.read("/proc/self/stat").rpartition(")").last.split
|
|
828
|
+
stats[:page_faults_minor] = fields[7].to_i # minflt (field 10)
|
|
829
|
+
stats[:page_faults_major] = fields[9].to_i # majflt (field 12)
|
|
589
830
|
end
|
|
590
831
|
|
|
591
832
|
if File.readable?("/proc/self/io")
|
|
@@ -604,6 +845,306 @@ module Rperf
|
|
|
604
845
|
end
|
|
605
846
|
private_class_method :get_system_stats
|
|
606
847
|
|
|
848
|
+
# --- Multi-process (fork) support ---
|
|
849
|
+
|
|
850
|
+
# Set up child process tracking from Rperf.start(inherit: ...).
|
|
851
|
+
# Called only when NOT already inside a CLI-managed session (no RPERF_SESSION_DIR).
|
|
852
|
+
# Creates the session directory eagerly — if creation fails, inherit is silently
|
|
853
|
+
# disabled and profiling continues in single-process mode.
|
|
854
|
+
def self._setup_inherit(mode, frequency, signal, aggregate, inherit, defer)
|
|
855
|
+
session_dir = _create_session_dir(clean_stale: true)
|
|
856
|
+
return unless session_dir
|
|
857
|
+
|
|
858
|
+
ENV["RPERF_ROOT_PROCESS"] = Process.pid.to_s
|
|
859
|
+
ENV["RPERF_SESSION_DIR"] = session_dir
|
|
860
|
+
ENV["RPERF_DEFER"] = "1" if defer
|
|
861
|
+
|
|
862
|
+
# Remember the start options for forked children (_restart_in_child).
|
|
863
|
+
# Fork preserves module state, so this works for inherit: :fork too,
|
|
864
|
+
# where the RPERF_* env vars below are NOT exported.
|
|
865
|
+
@_child_start_opts = { mode: mode, frequency: frequency, signal: signal,
|
|
866
|
+
aggregate: aggregate, defer: defer }
|
|
867
|
+
|
|
868
|
+
_install_fork_hook
|
|
869
|
+
|
|
870
|
+
if inherit == true
|
|
871
|
+
# inherit: true — also track spawned Ruby children via RUBYOPT.
|
|
872
|
+
# Save original values so _cleanup_session_state can restore them.
|
|
873
|
+
env_keys = %w[RPERF_ENABLED RPERF_FREQUENCY RPERF_MODE RPERF_SIGNAL RPERF_AGGREGATE RUBYLIB RUBYOPT]
|
|
874
|
+
@_saved_env = env_keys.to_h { |k| [k, ENV[k]] }
|
|
875
|
+
|
|
876
|
+
ENV["RPERF_ENABLED"] = "1"
|
|
877
|
+
ENV["RPERF_FREQUENCY"] = frequency.to_s
|
|
878
|
+
ENV["RPERF_MODE"] = mode.to_s
|
|
879
|
+
ENV["RPERF_SIGNAL"] = signal.nil? ? nil : signal.to_s
|
|
880
|
+
ENV["RPERF_AGGREGATE"] = aggregate ? nil : "0"
|
|
881
|
+
lib_dir = File.expand_path("..", __FILE__)
|
|
882
|
+
ENV["RUBYLIB"] = [lib_dir, ENV["RUBYLIB"]].compact.join(File::PATH_SEPARATOR)
|
|
883
|
+
ENV["RUBYOPT"] = "-rrperf #{ENV['RUBYOPT']}".strip
|
|
884
|
+
end
|
|
885
|
+
end
|
|
886
|
+
private_class_method :_setup_inherit
|
|
887
|
+
|
|
888
|
+
# Create session directory eagerly. Returns the session dir path on success,
|
|
889
|
+
# nil on failure (caller should fall back to single-process mode).
|
|
890
|
+
# Try each candidate base in order. If user_dir looks usable but
|
|
891
|
+
# session_dir creation fails (quota, ACL, sandbox, etc.), fall through
|
|
892
|
+
# to the next base instead of giving up.
|
|
893
|
+
# When clean_stale: true, removes session dirs from dead processes.
|
|
894
|
+
def self._create_session_dir(clean_stale: false)
|
|
895
|
+
require "securerandom"
|
|
896
|
+
require "tmpdir"
|
|
897
|
+
|
|
898
|
+
bases = [ENV["RPERF_TMPDIR"], ENV["XDG_RUNTIME_DIR"], Dir.tmpdir].compact
|
|
899
|
+
bases.each do |base|
|
|
900
|
+
user_dir = File.join(base, "rperf-#{Process.uid}")
|
|
901
|
+
|
|
902
|
+
if File.directory?(user_dir)
|
|
903
|
+
st = File.stat(user_dir) rescue next
|
|
904
|
+
next unless st.owned? && (st.mode & 0777) == 0700
|
|
905
|
+
elsif File.writable?(base)
|
|
906
|
+
begin
|
|
907
|
+
Dir.mkdir(user_dir, 0700)
|
|
908
|
+
rescue Errno::EEXIST
|
|
909
|
+
st = File.stat(user_dir) rescue next
|
|
910
|
+
next unless st.owned? && (st.mode & 0777) == 0700
|
|
911
|
+
rescue SystemCallError
|
|
912
|
+
next
|
|
913
|
+
end
|
|
914
|
+
else
|
|
915
|
+
next
|
|
916
|
+
end
|
|
917
|
+
|
|
918
|
+
if clean_stale
|
|
919
|
+
require "fileutils"
|
|
920
|
+
Dir.glob(File.join(user_dir, "rperf-*")).each do |dir|
|
|
921
|
+
m = File.basename(dir).match(/\Arperf-(\d+)-/)
|
|
922
|
+
next unless m
|
|
923
|
+
pid = m[1].to_i
|
|
924
|
+
begin
|
|
925
|
+
Process.kill(0, pid)
|
|
926
|
+
rescue Errno::ESRCH
|
|
927
|
+
FileUtils.rm_rf(dir)
|
|
928
|
+
rescue Errno::EPERM
|
|
929
|
+
# not ours
|
|
930
|
+
end
|
|
931
|
+
end
|
|
932
|
+
end
|
|
933
|
+
|
|
934
|
+
session_dir = File.join(user_dir, "rperf-#{Process.pid}-#{SecureRandom.hex(4)}")
|
|
935
|
+
begin
|
|
936
|
+
Dir.mkdir(session_dir, 0700)
|
|
937
|
+
return session_dir
|
|
938
|
+
rescue SystemCallError
|
|
939
|
+
next
|
|
940
|
+
end
|
|
941
|
+
end
|
|
942
|
+
nil
|
|
943
|
+
end
|
|
944
|
+
private_class_method :_create_session_dir
|
|
945
|
+
|
|
946
|
+
def self._parse_signal_env
|
|
947
|
+
case ENV["RPERF_SIGNAL"]
|
|
948
|
+
when nil then nil
|
|
949
|
+
when "false" then false
|
|
950
|
+
when /\A\d+\z/ then ENV["RPERF_SIGNAL"].to_i
|
|
951
|
+
end
|
|
952
|
+
end
|
|
953
|
+
private_class_method :_parse_signal_env
|
|
954
|
+
|
|
955
|
+
def self._install_fork_hook
|
|
956
|
+
return if @_fork_hook_installed
|
|
957
|
+
@_fork_hook_installed = true
|
|
958
|
+
|
|
959
|
+
::Process.singleton_class.prepend(Module.new {
|
|
960
|
+
def _fork
|
|
961
|
+
if !Rperf.instance_variable_get(:@_session_dir_created) &&
|
|
962
|
+
Process.pid.to_s == ENV["RPERF_ROOT_PROCESS"]
|
|
963
|
+
Rperf._on_first_fork
|
|
964
|
+
end
|
|
965
|
+
pid = super
|
|
966
|
+
if pid == 0
|
|
967
|
+
Rperf._restart_in_child
|
|
968
|
+
end
|
|
969
|
+
pid
|
|
970
|
+
end
|
|
971
|
+
})
|
|
972
|
+
end
|
|
973
|
+
private_class_method :_install_fork_hook
|
|
974
|
+
|
|
975
|
+
def self._on_first_fork
|
|
976
|
+
return if @_session_dir_created
|
|
977
|
+
session_dir = ENV["RPERF_SESSION_DIR"]
|
|
978
|
+
return unless session_dir && File.directory?(session_dir)
|
|
979
|
+
|
|
980
|
+
@_session_dir_created = true
|
|
981
|
+
# Root's @output/@format/@stat are kept as-is (user's original settings).
|
|
982
|
+
# stop() writes root's profile to session dir with fixed json.gz format,
|
|
983
|
+
# then uses the original settings for the merged output.
|
|
984
|
+
end
|
|
985
|
+
|
|
986
|
+
def self._restart_in_child
|
|
987
|
+
session_dir = ENV["RPERF_SESSION_DIR"]
|
|
988
|
+
return unless session_dir && File.directory?(session_dir)
|
|
989
|
+
return if _c_running? # should not happen, but guard against it
|
|
990
|
+
|
|
991
|
+
# C state is already cleaned up by pthread_atfork child handler.
|
|
992
|
+
@label_set_table = nil
|
|
993
|
+
@label_set_index = nil
|
|
994
|
+
|
|
995
|
+
require "securerandom"
|
|
996
|
+
# Random suffix: PIDs can be recycled within a long-lived session, and a
|
|
997
|
+
# plain profile-<pid> name would silently overwrite an earlier child's data
|
|
998
|
+
child_output = File.join(session_dir, "profile-#{Process.pid}-#{SecureRandom.hex(4)}.json.gz")
|
|
999
|
+
|
|
1000
|
+
# Start options: prefer the values remembered by _setup_inherit (API
|
|
1001
|
+
# inherit: :fork / true — fork preserves module state); fall back to the
|
|
1002
|
+
# RPERF_* env vars (CLI-managed sessions always export them).
|
|
1003
|
+
saved = @_child_start_opts
|
|
1004
|
+
opts = {
|
|
1005
|
+
frequency: saved ? saved[:frequency] : (ENV["RPERF_FREQUENCY"] || 1000).to_i,
|
|
1006
|
+
mode: saved ? saved[:mode] : (ENV["RPERF_MODE"] == "cpu" ? :cpu : :wall),
|
|
1007
|
+
aggregate: saved ? saved[:aggregate] : ENV["RPERF_AGGREGATE"] != "0",
|
|
1008
|
+
output: child_output,
|
|
1009
|
+
format: :json,
|
|
1010
|
+
stat: false,
|
|
1011
|
+
verbose: false,
|
|
1012
|
+
}
|
|
1013
|
+
sig = saved ? saved[:signal] : _parse_signal_env
|
|
1014
|
+
opts[:signal] = sig unless sig.nil?
|
|
1015
|
+
opts[:defer] = true if saved ? saved[:defer] : ENV["RPERF_DEFER"] == "1"
|
|
1016
|
+
|
|
1017
|
+
start(**opts, inherit: false)
|
|
1018
|
+
@_session_dir_output = true
|
|
1019
|
+
label("%pid": Process.pid.to_s)
|
|
1020
|
+
|
|
1021
|
+
# Register at_exit so child's profile is written even without explicit stop
|
|
1022
|
+
at_exit { Rperf.stop }
|
|
1023
|
+
end
|
|
1024
|
+
|
|
1025
|
+
# root_data: the root process's own profile data — GC/OS stats in the
|
|
1026
|
+
# merged summary come from the root only (same policy as `rperf stat`).
|
|
1027
|
+
def self._aggregate_and_report(root_data = nil)
|
|
1028
|
+
session_dir = ENV["RPERF_SESSION_DIR"]
|
|
1029
|
+
return unless session_dir && File.directory?(session_dir)
|
|
1030
|
+
|
|
1031
|
+
merged_samples = []
|
|
1032
|
+
merged_label_sets = [{}]
|
|
1033
|
+
merged_label_sets_index = { {} => 0 }
|
|
1034
|
+
total_trigger_count = 0
|
|
1035
|
+
total_sampling_count = 0
|
|
1036
|
+
total_sampling_time_ns = 0
|
|
1037
|
+
max_duration_ns = 0
|
|
1038
|
+
total_duration_ns = 0
|
|
1039
|
+
total_user_ns = 0
|
|
1040
|
+
total_sys_ns = 0
|
|
1041
|
+
process_count = 0
|
|
1042
|
+
|
|
1043
|
+
Dir.glob(File.join(session_dir, "profile-*.json.gz")).each do |file|
|
|
1044
|
+
begin
|
|
1045
|
+
data = load(file)
|
|
1046
|
+
rescue StandardError => e
|
|
1047
|
+
$stderr.puts "rperf: warning: failed to load #{file}: #{e.message}"
|
|
1048
|
+
next
|
|
1049
|
+
end
|
|
1050
|
+
next unless data
|
|
1051
|
+
_merge_into(merged_samples, merged_label_sets, data, merged_label_sets_index)
|
|
1052
|
+
total_trigger_count += (data[:trigger_count] || 0)
|
|
1053
|
+
total_sampling_count += (data[:sampling_count] || 0)
|
|
1054
|
+
total_sampling_time_ns += (data[:sampling_time_ns] || 0)
|
|
1055
|
+
d = data[:duration_ns] || 0
|
|
1056
|
+
max_duration_ns = d if d > max_duration_ns
|
|
1057
|
+
total_duration_ns += d
|
|
1058
|
+
total_user_ns += (data[:user_ns] || 0)
|
|
1059
|
+
total_sys_ns += (data[:sys_ns] || 0)
|
|
1060
|
+
process_count += 1
|
|
1061
|
+
end
|
|
1062
|
+
|
|
1063
|
+
if process_count == 0
|
|
1064
|
+
# Nothing loadable — remove the session dir here, or stop's empty-dir
|
|
1065
|
+
# rmdir would fail on the leftover corrupt files and leak the dir
|
|
1066
|
+
_cleanup_session_dir(session_dir)
|
|
1067
|
+
return
|
|
1068
|
+
end
|
|
1069
|
+
|
|
1070
|
+
# mode/frequency: the root's own profile is authoritative; the env vars
|
|
1071
|
+
# are only set by the CLI or inherit: true (and default to the root's
|
|
1072
|
+
# actual settings via _setup_inherit for the API case)
|
|
1073
|
+
saved = @_child_start_opts
|
|
1074
|
+
merged_data = {
|
|
1075
|
+
mode: (root_data && root_data[:mode]) || (saved ? saved[:mode] : (ENV["RPERF_MODE"] || "wall").to_sym),
|
|
1076
|
+
frequency: (root_data && root_data[:frequency]) || (saved ? saved[:frequency] : (ENV["RPERF_FREQUENCY"] || 1000).to_i),
|
|
1077
|
+
aggregated_samples: merged_samples,
|
|
1078
|
+
label_sets: merged_label_sets,
|
|
1079
|
+
trigger_count: total_trigger_count,
|
|
1080
|
+
sampling_count: total_sampling_count,
|
|
1081
|
+
sampling_time_ns: total_sampling_time_ns,
|
|
1082
|
+
duration_ns: max_duration_ns,
|
|
1083
|
+
total_duration_ns: total_duration_ns,
|
|
1084
|
+
user_ns: total_user_ns,
|
|
1085
|
+
sys_ns: total_sys_ns,
|
|
1086
|
+
process_count: process_count,
|
|
1087
|
+
}
|
|
1088
|
+
|
|
1089
|
+
if root_data
|
|
1090
|
+
merged_data[:gc_stats] = root_data[:gc_stats] if root_data[:gc_stats]
|
|
1091
|
+
merged_data[:maxrss_mb] = root_data[:maxrss_mb] if root_data[:maxrss_mb]
|
|
1092
|
+
end
|
|
1093
|
+
|
|
1094
|
+
print_stat(merged_data) if @stat
|
|
1095
|
+
if @output
|
|
1096
|
+
write_data(@output, merged_data, @format)
|
|
1097
|
+
end
|
|
1098
|
+
|
|
1099
|
+
_cleanup_session_dir(session_dir)
|
|
1100
|
+
|
|
1101
|
+
merged_data
|
|
1102
|
+
rescue => e
|
|
1103
|
+
$stderr.puts "rperf: warning: failed to aggregate multi-process data: #{e.message}"
|
|
1104
|
+
# stop() falls back to writing the root's own data when this returns nil
|
|
1105
|
+
_cleanup_session_dir(session_dir)
|
|
1106
|
+
nil
|
|
1107
|
+
end
|
|
1108
|
+
# Not private — called from at_exit block which runs in top-level context
|
|
1109
|
+
|
|
1110
|
+
def self._cleanup_session_dir(session_dir)
|
|
1111
|
+
require "fileutils"
|
|
1112
|
+
FileUtils.rm_rf(session_dir)
|
|
1113
|
+
rescue => e
|
|
1114
|
+
$stderr.puts "rperf: warning: failed to clean up session dir: #{e.message}"
|
|
1115
|
+
end
|
|
1116
|
+
private_class_method :_cleanup_session_dir
|
|
1117
|
+
|
|
1118
|
+
def self._merge_into(merged_samples, merged_label_sets, data, merged_label_sets_index = nil)
|
|
1119
|
+
# Build a reverse index when the caller doesn't maintain one across calls
|
|
1120
|
+
unless merged_label_sets_index
|
|
1121
|
+
merged_label_sets_index = {}
|
|
1122
|
+
merged_label_sets.each_with_index { |ls, i| merged_label_sets_index[ls] = i }
|
|
1123
|
+
end
|
|
1124
|
+
|
|
1125
|
+
child_label_sets = data[:label_sets] || [{}]
|
|
1126
|
+
id_map = {}
|
|
1127
|
+
child_label_sets.each_with_index do |ls, child_id|
|
|
1128
|
+
# Normalize keys to symbols for consistent comparison
|
|
1129
|
+
normalized = ls.is_a?(Hash) ? ls.transform_keys(&:to_sym) : ls
|
|
1130
|
+
existing = merged_label_sets_index[normalized]
|
|
1131
|
+
if existing
|
|
1132
|
+
id_map[child_id] = existing
|
|
1133
|
+
else
|
|
1134
|
+
new_idx = merged_label_sets.size
|
|
1135
|
+
id_map[child_id] = new_idx
|
|
1136
|
+
merged_label_sets << normalized
|
|
1137
|
+
merged_label_sets_index[normalized] = new_idx
|
|
1138
|
+
end
|
|
1139
|
+
end
|
|
1140
|
+
|
|
1141
|
+
(data[:aggregated_samples] || []).each do |frames, weight, thread_seq, label_set_id|
|
|
1142
|
+
new_lsi = id_map[label_set_id || 0] || 0
|
|
1143
|
+
merged_samples << [frames, weight, thread_seq, new_lsi]
|
|
1144
|
+
end
|
|
1145
|
+
end
|
|
1146
|
+
private_class_method :_merge_into
|
|
1147
|
+
|
|
607
1148
|
# ENV-based auto-start for CLI usage
|
|
608
1149
|
if ENV["RPERF_ENABLED"] == "1"
|
|
609
1150
|
_rperf_mode_str = ENV["RPERF_MODE"] || "cpu"
|
|
@@ -618,22 +1159,70 @@ module Rperf
|
|
|
618
1159
|
ENV["RPERF_FORMAT"].to_sym
|
|
619
1160
|
end
|
|
620
1161
|
_rperf_stat = ENV["RPERF_STAT"] == "1"
|
|
621
|
-
_rperf_signal =
|
|
622
|
-
when nil then nil
|
|
623
|
-
when "false" then false
|
|
624
|
-
when /\A\d+\z/ then ENV["RPERF_SIGNAL"].to_i
|
|
625
|
-
else raise ArgumentError, "RPERF_SIGNAL must be a signal number or 'false', got: #{ENV["RPERF_SIGNAL"].inspect}"
|
|
626
|
-
end
|
|
1162
|
+
_rperf_signal = _parse_signal_env
|
|
627
1163
|
_rperf_aggregate = ENV["RPERF_AGGREGATE"] != "0"
|
|
1164
|
+
_rperf_original_output = _rperf_stat ? ENV["RPERF_OUTPUT"] : (ENV["RPERF_OUTPUT"] || "rperf.json.gz")
|
|
1165
|
+
|
|
628
1166
|
_rperf_start_opts = { frequency: (ENV["RPERF_FREQUENCY"] || 1000).to_i, mode: _rperf_mode,
|
|
629
|
-
output: _rperf_stat ? ENV["RPERF_OUTPUT"] : (ENV["RPERF_OUTPUT"] || "rperf.json.gz"),
|
|
630
1167
|
verbose: ENV["RPERF_VERBOSE"] == "1",
|
|
631
|
-
format: _rperf_format,
|
|
632
|
-
stat: _rperf_stat,
|
|
633
1168
|
aggregate: _rperf_aggregate }
|
|
634
1169
|
_rperf_start_opts[:signal] = _rperf_signal unless _rperf_signal.nil?
|
|
635
|
-
|
|
636
|
-
|
|
1170
|
+
_rperf_start_opts[:defer] = true if ENV["RPERF_DEFER"] == "1"
|
|
1171
|
+
|
|
1172
|
+
if ENV["RPERF_SESSION_DIR"] && Process.pid.to_s != ENV["RPERF_ROOT_PROCESS"]
|
|
1173
|
+
# spawn / fork+exec child: write to session dir, no aggregation.
|
|
1174
|
+
# Session dir is created eagerly by the root process (CLI or API).
|
|
1175
|
+
# If it doesn't exist, skip profiling entirely — don't fall back to
|
|
1176
|
+
# normal mode which would duplicate output with the root process.
|
|
1177
|
+
_rperf_session_dir = ENV["RPERF_SESSION_DIR"]
|
|
1178
|
+
if File.directory?(_rperf_session_dir)
|
|
1179
|
+
require "securerandom"
|
|
1180
|
+
# Random suffix: PID reuse must not overwrite an earlier child's profile
|
|
1181
|
+
_rperf_start_opts[:output] = File.join(_rperf_session_dir, "profile-#{Process.pid}-#{SecureRandom.hex(4)}.json.gz")
|
|
1182
|
+
_rperf_start_opts[:format] = :json
|
|
1183
|
+
_rperf_start_opts[:stat] = false
|
|
1184
|
+
_rperf_start_opts[:verbose] = false
|
|
1185
|
+
|
|
1186
|
+
_install_fork_hook
|
|
1187
|
+
start(**_rperf_start_opts, inherit: false)
|
|
1188
|
+
@_session_dir_output = true
|
|
1189
|
+
label("%pid": Process.pid.to_s)
|
|
1190
|
+
at_exit { stop }
|
|
1191
|
+
end
|
|
1192
|
+
elsif ENV["RPERF_SESSION_DIR"]
|
|
1193
|
+
# Root process: start with normal output settings.
|
|
1194
|
+
# If no fork/spawn happens, behaves exactly like single-process mode.
|
|
1195
|
+
_rperf_start_opts[:output] = _rperf_original_output
|
|
1196
|
+
_rperf_start_opts[:format] = _rperf_format
|
|
1197
|
+
_rperf_start_opts[:stat] = _rperf_stat
|
|
1198
|
+
|
|
1199
|
+
_install_fork_hook
|
|
1200
|
+
start(**_rperf_start_opts, inherit: false)
|
|
1201
|
+
|
|
1202
|
+
at_exit { Rperf.stop }
|
|
1203
|
+
else
|
|
1204
|
+
_rperf_start_opts[:output] = _rperf_original_output
|
|
1205
|
+
_rperf_start_opts[:format] = _rperf_format
|
|
1206
|
+
_rperf_start_opts[:stat] = _rperf_stat
|
|
1207
|
+
_rperf_start_opts[:inherit] = false # no RPERF_SESSION_DIR means --no-inherit
|
|
1208
|
+
start(**_rperf_start_opts)
|
|
1209
|
+
# --no-inherit: scrub the env the CLI injected for THIS process, so
|
|
1210
|
+
# Ruby descendants spawned by the app don't auto-start their own
|
|
1211
|
+
# sessions (and clobber RPERF_OUTPUT with their own profiles)
|
|
1212
|
+
%w[RPERF_ENABLED RPERF_OUTPUT RPERF_STAT RPERF_FORMAT RPERF_VERBOSE
|
|
1213
|
+
RPERF_FREQUENCY RPERF_MODE RPERF_SIGNAL RPERF_AGGREGATE
|
|
1214
|
+
RPERF_DEFER].each { |k| ENV.delete(k) }
|
|
1215
|
+
if ENV["RUBYOPT"]
|
|
1216
|
+
rubyopt = ENV["RUBYOPT"].split(" ").reject { |o| o == "-rrperf" }.join(" ")
|
|
1217
|
+
rubyopt.empty? ? ENV.delete("RUBYOPT") : ENV["RUBYOPT"] = rubyopt
|
|
1218
|
+
end
|
|
1219
|
+
if ENV["RUBYLIB"]
|
|
1220
|
+
_rperf_lib_dir = File.expand_path("..", __FILE__)
|
|
1221
|
+
rubylib = ENV["RUBYLIB"].split(File::PATH_SEPARATOR).reject { |p| p == _rperf_lib_dir }.join(File::PATH_SEPARATOR)
|
|
1222
|
+
rubylib.empty? ? ENV.delete("RUBYLIB") : ENV["RUBYLIB"] = rubylib
|
|
1223
|
+
end
|
|
1224
|
+
at_exit { stop }
|
|
1225
|
+
end
|
|
637
1226
|
end
|
|
638
1227
|
|
|
639
1228
|
# Text report encoder — human/AI readable flat + cumulative top-N table.
|
|
@@ -687,7 +1276,10 @@ module Rperf
|
|
|
687
1276
|
return "" if !samples || samples.empty?
|
|
688
1277
|
merged = Hash.new(0)
|
|
689
1278
|
samples.each do |frames, weight|
|
|
690
|
-
|
|
1279
|
+
# ";" is the frame separator and has no escape in the collapsed
|
|
1280
|
+
# format — replace it so a pathological method name cannot corrupt
|
|
1281
|
+
# stack splitting downstream (FlameGraph/speedscope)
|
|
1282
|
+
key = frames.reverse.map { |_, label| label.include?(";") ? label.tr(";", ",") : label }.join(";")
|
|
691
1283
|
merged[key] += weight
|
|
692
1284
|
end
|
|
693
1285
|
merged.map { |stack, weight| "#{stack} #{weight}" }.join("\n") + "\n"
|
|
@@ -703,10 +1295,10 @@ module Rperf
|
|
|
703
1295
|
module_function
|
|
704
1296
|
|
|
705
1297
|
def encode(data)
|
|
706
|
-
samples_raw = data[:aggregated_samples]
|
|
1298
|
+
samples_raw = data[:aggregated_samples] || []
|
|
707
1299
|
frequency = data[:frequency]
|
|
708
|
-
interval_ns = 1_000_000_000 / frequency
|
|
709
|
-
mode = data[:mode] || :cpu
|
|
1300
|
+
interval_ns = (frequency && frequency > 0) ? 1_000_000_000 / frequency : 0
|
|
1301
|
+
mode = (data[:mode] || :cpu).to_sym
|
|
710
1302
|
|
|
711
1303
|
# Build string table: index 0 must be ""
|
|
712
1304
|
string_table = [""]
|
|
@@ -722,7 +1314,7 @@ module Rperf
|
|
|
722
1314
|
|
|
723
1315
|
# Convert string frames to index frames and merge identical stacks per thread/label
|
|
724
1316
|
merged = Hash.new(0)
|
|
725
|
-
thread_seq_key =
|
|
1317
|
+
thread_seq_key = nil # interned lazily — only when a sample carries thread_seq
|
|
726
1318
|
label_sets = data[:label_sets] # Array of Hash (may be nil)
|
|
727
1319
|
samples_raw.each do |frames, weight, thread_seq, label_set_id|
|
|
728
1320
|
key = [frames.map { |path, label| [intern.(path), intern.(label)] }, thread_seq || 0, label_set_id || 0]
|
|
@@ -742,8 +1334,8 @@ module Rperf
|
|
|
742
1334
|
end
|
|
743
1335
|
end
|
|
744
1336
|
|
|
745
|
-
# Build
|
|
746
|
-
|
|
1337
|
+
# Build the frame → id table (locations and functions are 1:1)
|
|
1338
|
+
frame_ids = build_tables(merged)
|
|
747
1339
|
|
|
748
1340
|
# Intern type label and unit
|
|
749
1341
|
type_label = mode == :wall ? "wall" : "cpu"
|
|
@@ -759,11 +1351,12 @@ module Rperf
|
|
|
759
1351
|
# field 2: sample (repeated Sample) with thread_seq + user labels
|
|
760
1352
|
merged.each do |(frames, thread_seq, label_set_id), weight|
|
|
761
1353
|
sample_buf = "".b
|
|
762
|
-
loc_ids = frames.map { |f|
|
|
1354
|
+
loc_ids = frames.map { |f| frame_ids[f] }
|
|
763
1355
|
sample_buf << encode_packed_uint64(1, loc_ids)
|
|
764
1356
|
sample_buf << encode_packed_int64(2, [weight])
|
|
765
1357
|
if thread_seq && thread_seq > 0
|
|
766
1358
|
label_buf = "".b
|
|
1359
|
+
thread_seq_key ||= intern.("thread_seq")
|
|
767
1360
|
label_buf << encode_int64(1, thread_seq_key) # key
|
|
768
1361
|
label_buf << encode_int64(3, thread_seq) # num
|
|
769
1362
|
sample_buf << encode_message(3, label_buf)
|
|
@@ -782,19 +1375,18 @@ module Rperf
|
|
|
782
1375
|
buf << encode_message(2, sample_buf)
|
|
783
1376
|
end
|
|
784
1377
|
|
|
785
|
-
# field 4: location (repeated Location)
|
|
786
|
-
|
|
1378
|
+
# field 4: location (repeated Location) — Line points at the same id
|
|
1379
|
+
frame_ids.each do |_frame, id|
|
|
787
1380
|
loc_buf = "".b
|
|
788
|
-
loc_buf << encode_uint64(1,
|
|
1381
|
+
loc_buf << encode_uint64(1, id)
|
|
789
1382
|
line_buf = "".b
|
|
790
|
-
|
|
791
|
-
line_buf << encode_uint64(1, func_id)
|
|
1383
|
+
line_buf << encode_uint64(1, id)
|
|
792
1384
|
loc_buf << encode_message(4, line_buf)
|
|
793
1385
|
buf << encode_message(4, loc_buf)
|
|
794
1386
|
end
|
|
795
1387
|
|
|
796
1388
|
# field 5: function (repeated Function)
|
|
797
|
-
|
|
1389
|
+
frame_ids.each do |frame, func_id|
|
|
798
1390
|
func_buf = "".b
|
|
799
1391
|
func_buf << encode_uint64(1, func_id)
|
|
800
1392
|
func_buf << encode_int64(2, frame[1]) # name (label_idx)
|
|
@@ -841,22 +1433,23 @@ module Rperf
|
|
|
841
1433
|
buf
|
|
842
1434
|
end
|
|
843
1435
|
|
|
1436
|
+
# Assign sequential ids to unique frames. rperf emits exactly one
|
|
1437
|
+
# Location and one Function per frame, sharing the same id, so a single
|
|
1438
|
+
# table serves both.
|
|
844
1439
|
def build_tables(merged)
|
|
845
|
-
|
|
846
|
-
functions = {}
|
|
1440
|
+
frame_ids = {}
|
|
847
1441
|
next_id = 1
|
|
848
1442
|
|
|
849
|
-
merged.each do |frames, _weight|
|
|
1443
|
+
merged.each do |(frames, _thread_seq, _label_set_id), _weight|
|
|
850
1444
|
frames.each do |frame|
|
|
851
|
-
unless
|
|
852
|
-
|
|
853
|
-
functions[frame] = next_id
|
|
1445
|
+
unless frame_ids.key?(frame)
|
|
1446
|
+
frame_ids[frame] = next_id
|
|
854
1447
|
next_id += 1
|
|
855
1448
|
end
|
|
856
1449
|
end
|
|
857
1450
|
end
|
|
858
1451
|
|
|
859
|
-
|
|
1452
|
+
frame_ids
|
|
860
1453
|
end
|
|
861
1454
|
|
|
862
1455
|
# --- Protobuf encoding helpers ---
|