rperf 0.7.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +21 -0
- data/README.md +75 -49
- data/docs/help.md +255 -36
- data/docs/logo.svg +25 -0
- data/exe/rperf +154 -30
- data/ext/rperf/rperf.c +235 -121
- data/lib/rperf/active_job.rb +1 -0
- data/lib/rperf/rack.rb +25 -3
- data/lib/rperf/version.rb +1 -1
- data/lib/rperf/viewer.rb +847 -0
- data/lib/rperf.rb +663 -92
- metadata +7 -4
data/lib/rperf.rb
CHANGED
|
@@ -13,21 +13,45 @@ end
|
|
|
13
13
|
|
|
14
14
|
module Rperf
|
|
15
15
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
@
|
|
19
|
-
@
|
|
16
|
+
# --- Module-level state (single global profiler) ---
|
|
17
|
+
# Profiling session
|
|
18
|
+
@verbose = false # verbose stats output on stop
|
|
19
|
+
@output = nil # output file path (nil = no file)
|
|
20
|
+
@format = nil # output format (:json, :pprof, :collapsed, :text, nil = auto)
|
|
21
|
+
@stat = false # print user/sys/real summary to stderr
|
|
22
|
+
@stat_start_mono = nil # Process::CLOCK_MONOTONIC at start (for real time)
|
|
23
|
+
@stat_start_times = nil # Process.times at start (for user/sys time)
|
|
24
|
+
@label_set_table = nil # Array: label_set_id → frozen Hash
|
|
25
|
+
@label_set_index = nil # Hash: frozen label Hash → label_set_id
|
|
26
|
+
# Multi-process (fork/spawn) support
|
|
27
|
+
@_session_dir_output = false # true when @output points to session dir (child process)
|
|
28
|
+
@_session_dir_created = false # true after first fork activates session dir
|
|
29
|
+
@_fork_hook_installed = false # true after Process._fork hook is prepended
|
|
30
|
+
@_saved_env = nil # saved ENV values for restore on stop (inherit: true)
|
|
20
31
|
|
|
21
32
|
# Starts profiling.
|
|
22
|
-
# format: :pprof, :collapsed, or :text. nil = auto-detect from output extension
|
|
33
|
+
# format: :json, :pprof, :collapsed, or :text. nil = auto-detect from output extension
|
|
34
|
+
# .json.gz → json (rperf native, default)
|
|
23
35
|
# .collapsed → collapsed stacks (FlameGraph / speedscope compatible)
|
|
24
36
|
# .txt → text report (human/AI readable flat + cumulative table)
|
|
25
|
-
#
|
|
26
|
-
|
|
37
|
+
# .pb.gz → pprof protobuf (gzip compressed)
|
|
38
|
+
# inherit: controls child process profiling.
|
|
39
|
+
# :fork — (default) automatically profile forked child processes via Process._fork hook.
|
|
40
|
+
# Session dir is created eagerly at start time. Spawned processes are NOT tracked.
|
|
41
|
+
# true — profile both forked and spawned Ruby child processes. Sets RUBYOPT=-rrperf
|
|
42
|
+
# and RPERF_* env vars so spawned Ruby processes auto-start profiling.
|
|
43
|
+
# Use with caution: affects ALL spawned Ruby processes, including independent
|
|
44
|
+
# programs that may use rperf themselves.
|
|
45
|
+
# false — do not track child processes (single-process mode).
|
|
46
|
+
def self.start(frequency: 1000, mode: :cpu, output: nil, verbose: false, format: nil, stat: false, signal: nil, aggregate: true, defer: false, inherit: :fork)
|
|
27
47
|
raise ArgumentError, "frequency must be a positive integer (got #{frequency.inspect})" unless frequency.is_a?(Integer) && frequency > 0
|
|
28
48
|
raise ArgumentError, "frequency must be <= 10000 (10KHz), got #{frequency}" if frequency > 10_000
|
|
29
49
|
raise ArgumentError, "mode must be :cpu or :wall, got #{mode.inspect}" unless %i[cpu wall].include?(mode)
|
|
50
|
+
raise ArgumentError, "inherit must be :fork, true, or false, got #{inherit.inspect}" unless [true, false, :fork].include?(inherit)
|
|
30
51
|
c_mode = mode == :cpu ? 0 : 1
|
|
52
|
+
unless signal.nil? || signal == false || signal.is_a?(Integer)
|
|
53
|
+
raise ArgumentError, "signal must be nil, false, or an Integer, got #{signal.inspect}"
|
|
54
|
+
end
|
|
31
55
|
c_signal = signal.nil? ? -1 : (signal ? signal.to_i : 0)
|
|
32
56
|
if c_signal > 0
|
|
33
57
|
raise ArgumentError, "signal mode is only supported on Linux" unless RUBY_PLATFORM =~ /linux/
|
|
@@ -41,51 +65,146 @@ module Rperf
|
|
|
41
65
|
@output = output
|
|
42
66
|
@format = format
|
|
43
67
|
@stat = stat
|
|
44
|
-
@stat_start_mono = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
68
|
+
@stat_start_mono = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
69
|
+
@stat_start_times = Process.times
|
|
45
70
|
@label_set_table = nil
|
|
46
71
|
@label_set_index = nil
|
|
47
72
|
_c_start(frequency, c_mode, aggregate, c_signal, defer)
|
|
48
73
|
|
|
74
|
+
# Set up child process tracking
|
|
75
|
+
if inherit && !ENV["RPERF_SESSION_DIR"]
|
|
76
|
+
_setup_inherit(mode, frequency, signal, aggregate, output, format, stat, inherit, defer)
|
|
77
|
+
end
|
|
78
|
+
|
|
49
79
|
if block_given?
|
|
50
80
|
begin
|
|
51
81
|
yield
|
|
52
82
|
ensure
|
|
53
|
-
|
|
83
|
+
result = stop
|
|
54
84
|
end
|
|
85
|
+
result
|
|
55
86
|
end
|
|
56
87
|
end
|
|
57
88
|
|
|
89
|
+
# VM state integer → label value mapping.
|
|
90
|
+
# These values appear as "%GVL" / "%GC" label keys in label_sets.
|
|
91
|
+
VM_STATE_LABELS = {
|
|
92
|
+
1 => ["%GVL", "blocked"],
|
|
93
|
+
2 => ["%GVL", "wait"],
|
|
94
|
+
3 => ["%GC", "mark"],
|
|
95
|
+
4 => ["%GC", "sweep"],
|
|
96
|
+
}.freeze
|
|
97
|
+
|
|
58
98
|
def self.stop
|
|
99
|
+
# Check if we need to aggregate child process data.
|
|
100
|
+
# @_session_dir_created: fork happened and session dir is active.
|
|
101
|
+
# Otherwise: check for actual child profile files (spawn-only case).
|
|
102
|
+
session_dir = ENV["RPERF_SESSION_DIR"]
|
|
103
|
+
is_root = session_dir && Process.pid.to_s == ENV["RPERF_ROOT_PROCESS"]
|
|
104
|
+
has_child_profiles = is_root && !@_session_dir_created &&
|
|
105
|
+
File.directory?(session_dir.to_s) &&
|
|
106
|
+
!Dir.glob(File.join(session_dir.to_s, "profile-*.json.gz")).empty?
|
|
107
|
+
needs_aggregation = is_root && (@_session_dir_created || has_child_profiles)
|
|
108
|
+
|
|
59
109
|
data = _c_stop
|
|
60
110
|
return unless data
|
|
61
111
|
|
|
112
|
+
# Record process times for multi-process aggregation
|
|
113
|
+
times = Process.times
|
|
114
|
+
start_times = @stat_start_times || Struct.new(:utime, :stime).new(0.0, 0.0)
|
|
115
|
+
data[:user_ns] = ((times.utime - start_times.utime) * 1_000_000_000).to_i
|
|
116
|
+
data[:sys_ns] = ((times.stime - start_times.stime) * 1_000_000_000).to_i
|
|
117
|
+
|
|
62
118
|
# When aggregate: false, C extension returns :raw_samples but not
|
|
63
119
|
# :aggregated_samples. Build aggregated view so encoders always work.
|
|
64
120
|
if data[:raw_samples] && !data[:aggregated_samples]
|
|
65
121
|
merged = {}
|
|
66
|
-
data[:raw_samples].each do |frames, weight, thread_seq, label_set_id|
|
|
67
|
-
key = [frames, thread_seq || 0, label_set_id || 0]
|
|
122
|
+
data[:raw_samples].each do |frames, weight, thread_seq, label_set_id, vm_state|
|
|
123
|
+
key = [frames, thread_seq || 0, label_set_id || 0, vm_state || 0]
|
|
68
124
|
if merged.key?(key)
|
|
69
125
|
merged[key] += weight
|
|
70
126
|
else
|
|
71
127
|
merged[key] = weight
|
|
72
128
|
end
|
|
73
129
|
end
|
|
74
|
-
data[:aggregated_samples] = merged.map { |(frames, ts, lsi), w| [frames, w, ts, lsi] }
|
|
130
|
+
data[:aggregated_samples] = merged.map { |(frames, ts, lsi, vs), w| [frames, w, ts, lsi, vs] }
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
merge_vm_state_labels!(data)
|
|
134
|
+
|
|
135
|
+
if needs_aggregation
|
|
136
|
+
# Root process with children: write root's own profile to session dir
|
|
137
|
+
# (fixed json.gz format), then aggregate all profiles.
|
|
138
|
+
# Root's @output/@format/@stat are preserved for the merged result.
|
|
139
|
+
print_stats(data) if @verbose
|
|
140
|
+
begin
|
|
141
|
+
save(File.join(session_dir, "profile-#{Process.pid}.json.gz"), data, format: :json)
|
|
142
|
+
rescue SystemCallError
|
|
143
|
+
# Session dir may have been removed (e.g., test scenario) — continue to aggregation
|
|
144
|
+
end
|
|
145
|
+
merged = _aggregate_and_report
|
|
146
|
+
if merged.nil? && data
|
|
147
|
+
# Aggregation failed — fall back to root's own data
|
|
148
|
+
$stderr.puts "rperf: warning: multi-process aggregation failed; writing root process data only"
|
|
149
|
+
write_data(@output, data, @format) if @output
|
|
150
|
+
print_stat(data) if @stat
|
|
151
|
+
end
|
|
152
|
+
_cleanup_session_state
|
|
153
|
+
return merged || data
|
|
75
154
|
end
|
|
76
155
|
|
|
77
156
|
print_stats(data) if @verbose
|
|
78
157
|
print_stat(data) if @stat
|
|
79
158
|
|
|
80
159
|
if @output
|
|
81
|
-
|
|
160
|
+
if @_session_dir_output
|
|
161
|
+
# Child process writing to session dir — tolerate missing dir
|
|
162
|
+
begin
|
|
163
|
+
write_data(@output, data, @format)
|
|
164
|
+
rescue SystemCallError
|
|
165
|
+
# Parent may have already cleaned up the session dir (e.g., parent
|
|
166
|
+
# exited first and rm_rf'd it), or disk is full. Silently skip —
|
|
167
|
+
# crashing in at_exit is worse than losing one child's profile.
|
|
168
|
+
end
|
|
169
|
+
else
|
|
170
|
+
write_data(@output, data, @format)
|
|
171
|
+
end
|
|
82
172
|
@output = nil
|
|
83
173
|
@format = nil
|
|
84
174
|
end
|
|
85
175
|
|
|
176
|
+
_cleanup_session_state
|
|
86
177
|
data
|
|
87
178
|
end
|
|
88
179
|
|
|
180
|
+
def self._cleanup_session_state
|
|
181
|
+
session_dir = ENV.delete("RPERF_SESSION_DIR")
|
|
182
|
+
ENV.delete("RPERF_ROOT_PROCESS")
|
|
183
|
+
ENV.delete("RPERF_DEFER")
|
|
184
|
+
@_session_dir_created = false
|
|
185
|
+
@_session_dir_output = false
|
|
186
|
+
# Restore ENV variables saved by _setup_inherit (inherit: true)
|
|
187
|
+
if @_saved_env
|
|
188
|
+
@_saved_env.each do |key, original|
|
|
189
|
+
if original.nil?
|
|
190
|
+
ENV.delete(key)
|
|
191
|
+
else
|
|
192
|
+
ENV[key] = original
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
@_saved_env = nil
|
|
196
|
+
end
|
|
197
|
+
# Remove eagerly-created session dir if it's empty (no children ran)
|
|
198
|
+
if session_dir && File.directory?(session_dir)
|
|
199
|
+
begin
|
|
200
|
+
Dir.rmdir(session_dir) # only succeeds if empty
|
|
201
|
+
rescue SystemCallError
|
|
202
|
+
# not empty or already removed — fine
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
private_class_method :_cleanup_session_state
|
|
207
|
+
|
|
89
208
|
# Returns a snapshot of the current profiling data without stopping.
|
|
90
209
|
# Only works in aggregate mode (the default). Returns nil if not profiling.
|
|
91
210
|
# The returned data has the same format as stop's return value and can be
|
|
@@ -95,31 +214,48 @@ module Rperf
|
|
|
95
214
|
# This allows interval-based profiling where each snapshot covers only
|
|
96
215
|
# the period since the last clear.
|
|
97
216
|
def self.snapshot(clear: false)
|
|
98
|
-
_c_snapshot(clear)
|
|
217
|
+
data = _c_snapshot(clear)
|
|
218
|
+
return unless data
|
|
219
|
+
merge_vm_state_labels!(data)
|
|
220
|
+
data
|
|
99
221
|
end
|
|
100
222
|
|
|
101
223
|
# Label set management for per-context profiling.
|
|
102
224
|
# Label sets are stored as an Array of Hashes, indexed by label_set_id.
|
|
103
225
|
# Index 0 is reserved (no labels).
|
|
104
226
|
|
|
105
|
-
@label_set_table = nil # Array of frozen Hash
|
|
106
|
-
@label_set_index = nil # Hash → id (for dedup)
|
|
107
|
-
|
|
108
227
|
def self._init_label_sets
|
|
109
228
|
@label_set_table = [{}] # id 0 = no labels
|
|
110
229
|
@label_set_index = { {} => 0 }
|
|
111
230
|
end
|
|
112
231
|
|
|
113
232
|
def self._intern_label_set(hash)
|
|
114
|
-
|
|
115
|
-
@label_set_index[
|
|
233
|
+
hash.freeze
|
|
234
|
+
@label_set_index[hash] ||= begin
|
|
116
235
|
id = @label_set_table.size
|
|
117
|
-
@label_set_table <<
|
|
236
|
+
@label_set_table << hash
|
|
118
237
|
_c_set_label_sets(@label_set_table)
|
|
119
238
|
id
|
|
120
239
|
end
|
|
121
240
|
end
|
|
122
241
|
|
|
242
|
+
# Merges the given keyword labels into the current thread's label set,
|
|
243
|
+
# sets the result on the current thread, and returns [previous_id, new_id].
|
|
244
|
+
# Callers use previous_id to restore labels after a block.
|
|
245
|
+
def self._merge_and_set_label(kw)
|
|
246
|
+
_init_label_sets unless @label_set_table
|
|
247
|
+
|
|
248
|
+
cur_id = _c_get_label
|
|
249
|
+
cur_labels = @label_set_table[cur_id] || {}
|
|
250
|
+
kw.each_value { |v| v.freeze }
|
|
251
|
+
new_labels = cur_labels.merge(kw).reject { |_, v| v.nil? }
|
|
252
|
+
new_id = _intern_label_set(new_labels)
|
|
253
|
+
_c_set_label(new_id)
|
|
254
|
+
|
|
255
|
+
[cur_id, new_id]
|
|
256
|
+
end
|
|
257
|
+
private_class_method :_merge_and_set_label
|
|
258
|
+
|
|
123
259
|
# Sets labels on the current thread for profiling annotation.
|
|
124
260
|
# With a block: restores previous labels when the block exits.
|
|
125
261
|
# Without a block: sets labels persistently on the current thread.
|
|
@@ -130,14 +266,10 @@ module Rperf
|
|
|
130
266
|
#
|
|
131
267
|
# Values of nil remove that key. Existing labels are merged.
|
|
132
268
|
def self.label(**kw, &block)
|
|
133
|
-
|
|
269
|
+
return yield if block && !_c_running?
|
|
270
|
+
return unless _c_running?
|
|
134
271
|
|
|
135
|
-
cur_id =
|
|
136
|
-
cur_labels = @label_set_table[cur_id] || {}
|
|
137
|
-
|
|
138
|
-
new_labels = cur_labels.merge(kw).reject { |_, v| v.nil? }
|
|
139
|
-
new_id = _intern_label_set(new_labels)
|
|
140
|
-
_c_set_label(new_id)
|
|
272
|
+
cur_id, _new_id = _merge_and_set_label(kw)
|
|
141
273
|
|
|
142
274
|
if block
|
|
143
275
|
begin
|
|
@@ -162,13 +294,7 @@ module Rperf
|
|
|
162
294
|
raise ArgumentError, "Rperf.profile requires a block" unless block
|
|
163
295
|
raise RuntimeError, "Rperf is not started" unless _c_running?
|
|
164
296
|
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
cur_id = _c_get_label
|
|
168
|
-
cur_labels = @label_set_table[cur_id] || {}
|
|
169
|
-
new_labels = cur_labels.merge(kw).reject { |_, v| v.nil? }
|
|
170
|
-
new_id = _intern_label_set(new_labels)
|
|
171
|
-
_c_set_label(new_id)
|
|
297
|
+
cur_id, _new_id = _merge_and_set_label(kw)
|
|
172
298
|
|
|
173
299
|
_c_profile_inc
|
|
174
300
|
|
|
@@ -189,11 +315,54 @@ module Rperf
|
|
|
189
315
|
end
|
|
190
316
|
|
|
191
317
|
|
|
318
|
+
# Merge vm_state from C samples into label_sets as a "Ruby" label key.
|
|
319
|
+
# Mutates data in place: updates label_set_id on each sample, strips vm_state,
|
|
320
|
+
# and extends label_sets with new entries as needed.
|
|
321
|
+
def self.merge_vm_state_labels!(data)
|
|
322
|
+
samples_key = data[:aggregated_samples] ? :aggregated_samples : :raw_samples
|
|
323
|
+
samples = data[samples_key]
|
|
324
|
+
return unless samples
|
|
325
|
+
|
|
326
|
+
orig_label_sets = data[:label_sets]
|
|
327
|
+
label_sets = (orig_label_sets || [{}]).dup
|
|
328
|
+
mapping = {} # [original_label_set_id, vm_state] => new_label_set_id
|
|
329
|
+
modified = false
|
|
330
|
+
|
|
331
|
+
samples.each do |sample|
|
|
332
|
+
vm_state = sample[4] || 0
|
|
333
|
+
next if vm_state == 0
|
|
334
|
+
next unless VM_STATE_LABELS.key?(vm_state)
|
|
335
|
+
|
|
336
|
+
label_set_id = sample[3] || 0
|
|
337
|
+
cache_key = [label_set_id, vm_state]
|
|
338
|
+
new_id = mapping[cache_key]
|
|
339
|
+
unless new_id
|
|
340
|
+
base = label_sets[label_set_id] || {}
|
|
341
|
+
key, value = VM_STATE_LABELS[vm_state]
|
|
342
|
+
new_ls = base.merge(key.to_sym => value).freeze
|
|
343
|
+
new_id = label_sets.size
|
|
344
|
+
label_sets << new_ls
|
|
345
|
+
mapping[cache_key] = new_id
|
|
346
|
+
end
|
|
347
|
+
sample[3] = new_id
|
|
348
|
+
modified = true
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
# Strip vm_state (5th element) from all samples
|
|
352
|
+
samples.each { |s| s.pop if s.size > 4 }
|
|
353
|
+
|
|
354
|
+
# Only set label_sets if they were already present or we added vm_state labels
|
|
355
|
+
data[:label_sets] = label_sets if orig_label_sets || modified
|
|
356
|
+
end
|
|
357
|
+
private_class_method :merge_vm_state_labels!
|
|
358
|
+
|
|
192
359
|
# Saves profiling data to a file.
|
|
193
|
-
# format: :pprof, :collapsed, or :text. nil = auto-detect from path extension
|
|
360
|
+
# format: :json, :pprof, :collapsed, or :text. nil = auto-detect from path extension
|
|
361
|
+
# .json.gz → json (rperf native, gzip compressed, default)
|
|
362
|
+
# .json → json (plain text, readable by jq etc.)
|
|
194
363
|
# .collapsed → collapsed stacks (FlameGraph / speedscope compatible)
|
|
195
364
|
# .txt → text report (human/AI readable flat + cumulative table)
|
|
196
|
-
#
|
|
365
|
+
# .pb.gz → pprof protobuf (gzip compressed)
|
|
197
366
|
def self.save(path, data, format: nil)
|
|
198
367
|
write_data(path, data, format)
|
|
199
368
|
end
|
|
@@ -205,17 +374,49 @@ module Rperf
|
|
|
205
374
|
File.write(path, Collapsed.encode(data))
|
|
206
375
|
when :text
|
|
207
376
|
File.write(path, Text.encode(data))
|
|
377
|
+
when :json
|
|
378
|
+
require "json"
|
|
379
|
+
json_data = data.merge(rperf_version: VERSION, pid: Process.pid, ppid: Process.ppid)
|
|
380
|
+
json_str = JSON.generate(json_data)
|
|
381
|
+
if path.to_s.end_with?(".gz")
|
|
382
|
+
File.binwrite(path, gzip(json_str))
|
|
383
|
+
else
|
|
384
|
+
File.write(path, json_str)
|
|
385
|
+
end
|
|
208
386
|
else
|
|
209
387
|
File.binwrite(path, gzip(PProf.encode(data)))
|
|
210
388
|
end
|
|
211
389
|
end
|
|
212
390
|
private_class_method :write_data
|
|
213
391
|
|
|
392
|
+
# Load a profile saved by rperf record (.json.gz or .json).
|
|
393
|
+
# Returns the data hash (same format as Rperf.stop / Rperf.snapshot).
|
|
394
|
+
# Warns to stderr if the file was saved by a different rperf version.
|
|
395
|
+
def self.load(path)
|
|
396
|
+
raw_bytes = File.binread(path)
|
|
397
|
+
# Auto-detect gzip by magic bytes (1f 8b)
|
|
398
|
+
raw = if raw_bytes.byteslice(0, 2) == "\x1f\x8b".b
|
|
399
|
+
Zlib::GzipReader.new(StringIO.new(raw_bytes)).read
|
|
400
|
+
else
|
|
401
|
+
raw_bytes
|
|
402
|
+
end
|
|
403
|
+
require "json"
|
|
404
|
+
data = JSON.parse(raw, symbolize_names: true)
|
|
405
|
+
saved_version = data.delete(:rperf_version)
|
|
406
|
+
if saved_version && saved_version != VERSION
|
|
407
|
+
$stderr.puts "rperf: warning: file was saved by rperf #{saved_version} (current: #{VERSION})"
|
|
408
|
+
elsif saved_version.nil?
|
|
409
|
+
$stderr.puts "rperf: warning: file has no version info (may be from an older rperf)"
|
|
410
|
+
end
|
|
411
|
+
data
|
|
412
|
+
end
|
|
413
|
+
|
|
214
414
|
def self.detect_format(path, format)
|
|
215
415
|
return format.to_sym if format
|
|
216
416
|
case path.to_s
|
|
217
|
-
when /\.collapsed\z/
|
|
218
|
-
when /\.txt\z/
|
|
417
|
+
when /\.collapsed\z/ then :collapsed
|
|
418
|
+
when /\.txt\z/ then :text
|
|
419
|
+
when /\.json(\.gz)?\z/ then :json
|
|
219
420
|
else :pprof
|
|
220
421
|
end
|
|
221
422
|
end
|
|
@@ -233,16 +434,15 @@ module Rperf
|
|
|
233
434
|
def self.print_stats(data)
|
|
234
435
|
count = data[:sampling_count] || 0
|
|
235
436
|
total_ns = data[:sampling_time_ns] || 0
|
|
236
|
-
sample_count = data[:sampling_count] || 0
|
|
237
437
|
mode = data[:mode] || :cpu
|
|
238
438
|
frequency = data[:frequency] || 0
|
|
239
439
|
|
|
240
440
|
total_ms = total_ns / 1_000_000.0
|
|
241
441
|
avg_us = count > 0 ? total_ns / count / 1000.0 : 0.0
|
|
242
442
|
|
|
243
|
-
$stderr.puts "[
|
|
244
|
-
$stderr.puts "[
|
|
245
|
-
$stderr.puts "[
|
|
443
|
+
$stderr.puts "[Rperf] mode=#{mode} frequency=#{frequency}Hz"
|
|
444
|
+
$stderr.puts "[Rperf] sampling: #{count} calls, #{format("%.2f", total_ms)}ms total, #{format("%.1f", avg_us)}us/call avg"
|
|
445
|
+
$stderr.puts "[Rperf] samples recorded: #{count}"
|
|
246
446
|
|
|
247
447
|
print_top(data)
|
|
248
448
|
end
|
|
@@ -291,13 +491,13 @@ module Rperf
|
|
|
291
491
|
|
|
292
492
|
def self.print_top_table(kind, table, total_weight)
|
|
293
493
|
top = table.sort_by { |_, w| -w }.first(TOP_N)
|
|
294
|
-
$stderr.puts "[
|
|
494
|
+
$stderr.puts "[Rperf] top #{top.size} by #{kind}:"
|
|
295
495
|
top.each do |key, weight|
|
|
296
496
|
label, path = key
|
|
297
497
|
ms = weight / 1_000_000.0
|
|
298
498
|
pct = total_weight > 0 ? weight * 100.0 / total_weight : 0.0
|
|
299
499
|
loc = path.empty? ? "" : " (#{path})"
|
|
300
|
-
$stderr.puts format("[
|
|
500
|
+
$stderr.puts format("[Rperf] %8.1fms %5.1f%% %s%s", ms, pct, label, loc)
|
|
301
501
|
end
|
|
302
502
|
end
|
|
303
503
|
|
|
@@ -314,8 +514,16 @@ module Rperf
|
|
|
314
514
|
samples_raw = data[:aggregated_samples] || []
|
|
315
515
|
real_ns = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - @stat_start_mono) * 1_000_000_000).to_i
|
|
316
516
|
times = Process.times
|
|
317
|
-
|
|
318
|
-
|
|
517
|
+
start_times = @stat_start_times || Struct.new(:utime, :stime).new(0.0, 0.0)
|
|
518
|
+
user_ns = ((times.utime - start_times.utime) * 1_000_000_000).to_i
|
|
519
|
+
sys_ns = ((times.stime - start_times.stime) * 1_000_000_000).to_i
|
|
520
|
+
|
|
521
|
+
# In multi-process mode, use aggregated user/sys from all processes
|
|
522
|
+
process_count = data[:process_count] || 0
|
|
523
|
+
if process_count > 1 && data[:user_ns]
|
|
524
|
+
user_ns = data[:user_ns]
|
|
525
|
+
sys_ns = data[:sys_ns] || 0
|
|
526
|
+
end
|
|
319
527
|
|
|
320
528
|
command = ENV["RPERF_STAT_COMMAND"] || "(unknown)"
|
|
321
529
|
|
|
@@ -327,10 +535,10 @@ module Rperf
|
|
|
327
535
|
$stderr.puts format(" %14s ms real", format_ms(real_ns))
|
|
328
536
|
|
|
329
537
|
if samples_raw.size > 0
|
|
330
|
-
breakdown, total_weight = compute_stat_breakdown(samples_raw)
|
|
331
|
-
print_stat_breakdown(breakdown, total_weight)
|
|
538
|
+
breakdown, total_weight = compute_stat_breakdown(samples_raw, data[:label_sets])
|
|
539
|
+
print_stat_breakdown(breakdown, total_weight, data)
|
|
332
540
|
print_stat_runtime_info(data)
|
|
333
|
-
print_stat_system_info
|
|
541
|
+
print_stat_system_info(data)
|
|
334
542
|
print_stat_report(data) if ENV["RPERF_STAT_REPORT"] == "1"
|
|
335
543
|
print_stat_footer(samples_raw, real_ns, data)
|
|
336
544
|
end
|
|
@@ -338,20 +546,25 @@ module Rperf
|
|
|
338
546
|
$stderr.puts
|
|
339
547
|
end
|
|
340
548
|
|
|
341
|
-
def self.compute_stat_breakdown(samples_raw)
|
|
549
|
+
def self.compute_stat_breakdown(samples_raw, label_sets)
|
|
342
550
|
breakdown = Hash.new(0)
|
|
343
551
|
total_weight = 0
|
|
344
552
|
|
|
345
|
-
samples_raw.each do |frames, weight|
|
|
553
|
+
samples_raw.each do |frames, weight, _thread_seq, label_set_id|
|
|
346
554
|
total_weight += weight
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
555
|
+
category = :cpu_execution
|
|
556
|
+
if label_sets && label_set_id && label_set_id > 0
|
|
557
|
+
ls = label_sets[label_set_id]
|
|
558
|
+
if ls
|
|
559
|
+
gvl = ls[:"%GVL"]
|
|
560
|
+
gc = ls[:"%GC"]
|
|
561
|
+
if gvl == "blocked" then category = :gvl_blocked
|
|
562
|
+
elsif gvl == "wait" then category = :gvl_wait
|
|
563
|
+
elsif gc == "mark" then category = :gc_marking
|
|
564
|
+
elsif gc == "sweep" then category = :gc_sweeping
|
|
565
|
+
end
|
|
566
|
+
end
|
|
567
|
+
end
|
|
355
568
|
breakdown[category] += weight
|
|
356
569
|
end
|
|
357
570
|
|
|
@@ -359,15 +572,19 @@ module Rperf
|
|
|
359
572
|
end
|
|
360
573
|
private_class_method :compute_stat_breakdown
|
|
361
574
|
|
|
362
|
-
def self.print_stat_breakdown(breakdown, total_weight)
|
|
575
|
+
def self.print_stat_breakdown(breakdown, total_weight, data)
|
|
363
576
|
$stderr.puts
|
|
577
|
+
process_count = data[:process_count] || 0
|
|
578
|
+
if process_count > 1
|
|
579
|
+
$stderr.puts STAT_LINE.call(format_integer(process_count), " ", "[Rperf] Ruby processes profiled")
|
|
580
|
+
end
|
|
364
581
|
|
|
365
582
|
[
|
|
366
|
-
[:cpu_execution, "CPU execution"],
|
|
367
|
-
[:gvl_blocked, "[
|
|
368
|
-
[:gvl_wait, "[
|
|
369
|
-
[:gc_marking, "[
|
|
370
|
-
[:gc_sweeping, "[
|
|
583
|
+
[:cpu_execution, "[Rperf] CPU execution"],
|
|
584
|
+
[:gvl_blocked, "[Rperf] GVL blocked (I/O, sleep)"],
|
|
585
|
+
[:gvl_wait, "[Rperf] GVL wait (contention)"],
|
|
586
|
+
[:gc_marking, "[Rperf] GC marking"],
|
|
587
|
+
[:gc_sweeping, "[Rperf] GC sweeping"],
|
|
371
588
|
].each do |key, label|
|
|
372
589
|
w = breakdown[key]
|
|
373
590
|
next if w == 0
|
|
@@ -378,35 +595,43 @@ module Rperf
|
|
|
378
595
|
private_class_method :print_stat_breakdown
|
|
379
596
|
|
|
380
597
|
def self.print_stat_runtime_info(data)
|
|
381
|
-
thread_count = data[:detected_thread_count] || 0
|
|
382
|
-
$stderr.puts STAT_LINE.call(format_integer(thread_count), " ", "[Ruby] detected threads") if thread_count > 0
|
|
383
598
|
gc = GC.stat
|
|
384
599
|
$stderr.puts STAT_LINE.call(format_ms(gc[:time] * 1_000_000), "ms",
|
|
385
|
-
"[Ruby] GC time (%s count: %s minor, %s major)" % [
|
|
600
|
+
"[Ruby ] GC time (%s count: %s minor, %s major)" % [
|
|
386
601
|
format_integer(gc[:count]),
|
|
387
602
|
format_integer(gc[:minor_gc_count]),
|
|
388
603
|
format_integer(gc[:major_gc_count])])
|
|
389
|
-
$stderr.puts STAT_LINE.call(format_integer(gc[:total_allocated_objects]), " ", "[Ruby] allocated objects")
|
|
390
|
-
$stderr.puts STAT_LINE.call(format_integer(gc[:total_freed_objects]), " ", "[Ruby] freed objects")
|
|
604
|
+
$stderr.puts STAT_LINE.call(format_integer(gc[:total_allocated_objects]), " ", "[Ruby ] allocated objects")
|
|
605
|
+
$stderr.puts STAT_LINE.call(format_integer(gc[:total_freed_objects]), " ", "[Ruby ] freed objects")
|
|
606
|
+
thread_count = data[:detected_thread_count] || 0
|
|
607
|
+
$stderr.puts STAT_LINE.call(format_integer(thread_count), " ", "[Ruby ] detected threads") if thread_count > 0
|
|
391
608
|
if defined?(RubyVM::YJIT) && RubyVM::YJIT.enabled?
|
|
392
609
|
yjit = RubyVM::YJIT.runtime_stats
|
|
393
610
|
if yjit[:ratio_in_yjit]
|
|
394
|
-
$stderr.puts STAT_LINE.call(format("%.1f%%", yjit[:ratio_in_yjit] * 100), " ", "[Ruby] YJIT code execution ratio")
|
|
611
|
+
$stderr.puts STAT_LINE.call(format("%.1f%%", yjit[:ratio_in_yjit] * 100), " ", "[Ruby ] YJIT code execution ratio")
|
|
395
612
|
end
|
|
396
613
|
end
|
|
397
614
|
end
|
|
398
615
|
private_class_method :print_stat_runtime_info
|
|
399
616
|
|
|
400
|
-
def self.print_stat_system_info
|
|
617
|
+
def self.print_stat_system_info(data = nil)
|
|
401
618
|
sys_stats = get_system_stats
|
|
402
619
|
maxrss_kb = sys_stats[:maxrss_kb]
|
|
403
620
|
if maxrss_kb
|
|
404
|
-
$stderr.puts STAT_LINE.call(format_integer((maxrss_kb / 1024.0).round), "MB", "[OS] peak memory (maxrss)")
|
|
621
|
+
$stderr.puts STAT_LINE.call(format_integer((maxrss_kb / 1024.0).round), "MB", "[OS ] peak memory (maxrss)")
|
|
622
|
+
end
|
|
623
|
+
if sys_stats[:page_faults_minor]
|
|
624
|
+
minor = sys_stats[:page_faults_minor]
|
|
625
|
+
major = sys_stats[:page_faults_major]
|
|
626
|
+
$stderr.puts STAT_LINE.call(
|
|
627
|
+
format_integer(minor + major), " ",
|
|
628
|
+
"[OS ] page faults (%s minor, %s major)" % [
|
|
629
|
+
format_integer(minor), format_integer(major)])
|
|
405
630
|
end
|
|
406
631
|
if sys_stats[:ctx_voluntary]
|
|
407
632
|
$stderr.puts STAT_LINE.call(
|
|
408
633
|
format_integer(sys_stats[:ctx_voluntary] + sys_stats[:ctx_involuntary]), " ",
|
|
409
|
-
"[OS] context switches (%s voluntary, %s involuntary)" % [
|
|
634
|
+
"[OS ] context switches (%s voluntary, %s involuntary)" % [
|
|
410
635
|
format_integer(sys_stats[:ctx_voluntary]),
|
|
411
636
|
format_integer(sys_stats[:ctx_involuntary])])
|
|
412
637
|
end
|
|
@@ -415,10 +640,14 @@ module Rperf
|
|
|
415
640
|
w = sys_stats[:io_write_bytes]
|
|
416
641
|
$stderr.puts STAT_LINE.call(
|
|
417
642
|
format_integer(((r + w) / 1024.0 / 1024.0).round), "MB",
|
|
418
|
-
"[OS] disk I/O (%s MB read, %s MB write)" % [
|
|
643
|
+
"[OS ] disk I/O (%s MB read, %s MB write)" % [
|
|
419
644
|
format_integer((r / 1024.0 / 1024.0).round),
|
|
420
645
|
format_integer((w / 1024.0 / 1024.0).round)])
|
|
421
646
|
end
|
|
647
|
+
process_count = data[:process_count] if data
|
|
648
|
+
if process_count && process_count > 1
|
|
649
|
+
$stderr.puts STAT_LINE.call("", " ", "(GC/OS stats are from root process only; user/sys/[Rperf] lines are aggregated)")
|
|
650
|
+
end
|
|
422
651
|
end
|
|
423
652
|
private_class_method :print_stat_system_info
|
|
424
653
|
|
|
@@ -431,11 +660,24 @@ module Rperf
|
|
|
431
660
|
|
|
432
661
|
def self.print_stat_footer(samples_raw, real_ns, data)
|
|
433
662
|
triggers = data[:trigger_count] || 0
|
|
434
|
-
|
|
663
|
+
sampling_time_ns = data[:sampling_time_ns] || 0
|
|
664
|
+
# In multi-process mode, use sum of all processes' durations as denominator.
|
|
665
|
+
# Single-process: fall back to root's real_ns.
|
|
666
|
+
total_real_ns = data[:total_duration_ns] || real_ns
|
|
667
|
+
total_real_ns = real_ns if total_real_ns == 0
|
|
668
|
+
overhead_pct = total_real_ns > 0 ? sampling_time_ns * 100.0 / total_real_ns : 0.0
|
|
435
669
|
$stderr.puts
|
|
436
670
|
samples = data[:sampling_count] || samples_raw.size
|
|
437
671
|
$stderr.puts format(" %d samples / %d triggers, %.1f%% profiler overhead",
|
|
438
672
|
samples, triggers, overhead_pct)
|
|
673
|
+
dropped = data[:dropped_samples] || 0
|
|
674
|
+
if dropped > 0
|
|
675
|
+
$stderr.puts format(" WARNING: %d samples dropped due to memory allocation failure", dropped)
|
|
676
|
+
end
|
|
677
|
+
dropped_agg = data[:dropped_aggregation] || 0
|
|
678
|
+
if dropped_agg > 0
|
|
679
|
+
$stderr.puts format(" WARNING: %d samples dropped during aggregation (frame/stack table full)", dropped_agg)
|
|
680
|
+
end
|
|
439
681
|
end
|
|
440
682
|
private_class_method :print_stat_footer
|
|
441
683
|
|
|
@@ -448,10 +690,10 @@ module Rperf
|
|
|
448
690
|
# Example: 5_609_200_000 → "5,609.2"
|
|
449
691
|
def self.format_ms(ns)
|
|
450
692
|
ms = ns / 1_000_000.0
|
|
451
|
-
|
|
452
|
-
frac =
|
|
453
|
-
int_str =
|
|
454
|
-
"#{int_str}
|
|
693
|
+
formatted = format("%.1f", ms)
|
|
694
|
+
int_str, frac = formatted.split(".")
|
|
695
|
+
int_str = int_str.reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
|
|
696
|
+
"#{int_str}.#{frac}"
|
|
455
697
|
end
|
|
456
698
|
private_class_method :format_ms
|
|
457
699
|
|
|
@@ -477,6 +719,12 @@ module Rperf
|
|
|
477
719
|
stats[:maxrss_kb] = rss if rss && rss > 0
|
|
478
720
|
end
|
|
479
721
|
|
|
722
|
+
if File.readable?("/proc/self/stat")
|
|
723
|
+
fields = File.read("/proc/self/stat").split
|
|
724
|
+
stats[:page_faults_minor] = fields[9].to_i
|
|
725
|
+
stats[:page_faults_major] = fields[11].to_i
|
|
726
|
+
end
|
|
727
|
+
|
|
480
728
|
if File.readable?("/proc/self/io")
|
|
481
729
|
# Linux: parse /proc/self/io
|
|
482
730
|
File.read("/proc/self/io").each_line do |line|
|
|
@@ -493,6 +741,292 @@ module Rperf
|
|
|
493
741
|
end
|
|
494
742
|
private_class_method :get_system_stats
|
|
495
743
|
|
|
744
|
+
# --- Multi-process (fork) support ---
|
|
745
|
+
|
|
746
|
+
# Set up child process tracking from Rperf.start(inherit: ...).
|
|
747
|
+
# Called only when NOT already inside a CLI-managed session (no RPERF_SESSION_DIR).
|
|
748
|
+
# Creates the session directory eagerly — if creation fails, inherit is silently
|
|
749
|
+
# disabled and profiling continues in single-process mode.
|
|
750
|
+
def self._setup_inherit(mode, frequency, signal, aggregate, output, format, stat, inherit, defer)
|
|
751
|
+
session_dir = _create_session_dir
|
|
752
|
+
return unless session_dir
|
|
753
|
+
|
|
754
|
+
ENV["RPERF_ROOT_PROCESS"] = Process.pid.to_s
|
|
755
|
+
ENV["RPERF_SESSION_DIR"] = session_dir
|
|
756
|
+
ENV["RPERF_DEFER"] = "1" if defer
|
|
757
|
+
|
|
758
|
+
_install_fork_hook
|
|
759
|
+
|
|
760
|
+
if inherit == true
|
|
761
|
+
# inherit: true — also track spawned Ruby children via RUBYOPT.
|
|
762
|
+
# Save original values so _cleanup_session_state can restore them.
|
|
763
|
+
env_keys = %w[RPERF_ENABLED RPERF_FREQUENCY RPERF_MODE RPERF_SIGNAL RPERF_AGGREGATE RUBYLIB RUBYOPT]
|
|
764
|
+
@_saved_env = env_keys.to_h { |k| [k, ENV[k]] }
|
|
765
|
+
|
|
766
|
+
ENV["RPERF_ENABLED"] = "1"
|
|
767
|
+
ENV["RPERF_FREQUENCY"] = frequency.to_s
|
|
768
|
+
ENV["RPERF_MODE"] = mode.to_s
|
|
769
|
+
ENV["RPERF_SIGNAL"] = signal.nil? ? nil : signal.to_s
|
|
770
|
+
ENV["RPERF_AGGREGATE"] = aggregate ? nil : "0"
|
|
771
|
+
lib_dir = File.expand_path("..", __FILE__)
|
|
772
|
+
ENV["RUBYLIB"] = [lib_dir, ENV["RUBYLIB"]].compact.join(File::PATH_SEPARATOR)
|
|
773
|
+
ENV["RUBYOPT"] = "-rrperf #{ENV['RUBYOPT']}".strip
|
|
774
|
+
end
|
|
775
|
+
end
|
|
776
|
+
private_class_method :_setup_inherit
|
|
777
|
+
|
|
778
|
+
# Create session directory eagerly. Returns the session dir path on success,
|
|
779
|
+
# nil on failure (caller should fall back to single-process mode).
|
|
780
|
+
# Try each candidate base in order. If user_dir looks usable but
|
|
781
|
+
# session_dir creation fails (quota, ACL, sandbox, etc.), fall through
|
|
782
|
+
# to the next base instead of giving up.
|
|
783
|
+
# When clean_stale: true, removes session dirs from dead processes.
|
|
784
|
+
def self._create_session_dir(clean_stale: false)
|
|
785
|
+
require "securerandom"
|
|
786
|
+
require "tmpdir"
|
|
787
|
+
|
|
788
|
+
bases = [ENV["RPERF_TMPDIR"], ENV["XDG_RUNTIME_DIR"], Dir.tmpdir].compact
|
|
789
|
+
bases.each do |base|
|
|
790
|
+
user_dir = File.join(base, "rperf-#{Process.uid}")
|
|
791
|
+
|
|
792
|
+
if File.directory?(user_dir)
|
|
793
|
+
st = File.stat(user_dir) rescue next
|
|
794
|
+
next unless st.owned? && (st.mode & 0777) == 0700
|
|
795
|
+
elsif File.writable?(base)
|
|
796
|
+
begin
|
|
797
|
+
Dir.mkdir(user_dir, 0700)
|
|
798
|
+
rescue Errno::EEXIST
|
|
799
|
+
st = File.stat(user_dir) rescue next
|
|
800
|
+
next unless st.owned? && (st.mode & 0777) == 0700
|
|
801
|
+
rescue SystemCallError
|
|
802
|
+
next
|
|
803
|
+
end
|
|
804
|
+
else
|
|
805
|
+
next
|
|
806
|
+
end
|
|
807
|
+
|
|
808
|
+
if clean_stale
|
|
809
|
+
require "fileutils"
|
|
810
|
+
Dir.glob(File.join(user_dir, "rperf-*")).each do |dir|
|
|
811
|
+
m = File.basename(dir).match(/\Arperf-(\d+)-/)
|
|
812
|
+
next unless m
|
|
813
|
+
pid = m[1].to_i
|
|
814
|
+
begin
|
|
815
|
+
Process.kill(0, pid)
|
|
816
|
+
rescue Errno::ESRCH
|
|
817
|
+
FileUtils.rm_rf(dir)
|
|
818
|
+
rescue Errno::EPERM
|
|
819
|
+
# not ours
|
|
820
|
+
end
|
|
821
|
+
end
|
|
822
|
+
end
|
|
823
|
+
|
|
824
|
+
session_dir = File.join(user_dir, "rperf-#{Process.pid}-#{SecureRandom.hex(4)}")
|
|
825
|
+
begin
|
|
826
|
+
Dir.mkdir(session_dir, 0700)
|
|
827
|
+
return session_dir
|
|
828
|
+
rescue SystemCallError
|
|
829
|
+
next
|
|
830
|
+
end
|
|
831
|
+
end
|
|
832
|
+
nil
|
|
833
|
+
end
|
|
834
|
+
private_class_method :_create_session_dir
|
|
835
|
+
|
|
836
|
+
def self._parse_signal_env
|
|
837
|
+
case ENV["RPERF_SIGNAL"]
|
|
838
|
+
when nil then nil
|
|
839
|
+
when "false" then false
|
|
840
|
+
when /\A\d+\z/ then ENV["RPERF_SIGNAL"].to_i
|
|
841
|
+
end
|
|
842
|
+
end
|
|
843
|
+
private_class_method :_parse_signal_env
|
|
844
|
+
|
|
845
|
+
def self._install_fork_hook
|
|
846
|
+
return if @_fork_hook_installed
|
|
847
|
+
@_fork_hook_installed = true
|
|
848
|
+
|
|
849
|
+
::Process.singleton_class.prepend(Module.new {
|
|
850
|
+
def _fork
|
|
851
|
+
if !Rperf.instance_variable_get(:@_session_dir_created) &&
|
|
852
|
+
Process.pid.to_s == ENV["RPERF_ROOT_PROCESS"]
|
|
853
|
+
Rperf._on_first_fork
|
|
854
|
+
end
|
|
855
|
+
pid = super
|
|
856
|
+
if pid == 0
|
|
857
|
+
Rperf._restart_in_child
|
|
858
|
+
end
|
|
859
|
+
pid
|
|
860
|
+
end
|
|
861
|
+
})
|
|
862
|
+
end
|
|
863
|
+
private_class_method :_install_fork_hook
|
|
864
|
+
|
|
865
|
+
def self._on_first_fork
|
|
866
|
+
return if @_session_dir_created
|
|
867
|
+
session_dir = ENV["RPERF_SESSION_DIR"]
|
|
868
|
+
return unless session_dir && File.directory?(session_dir)
|
|
869
|
+
|
|
870
|
+
@_session_dir_created = true
|
|
871
|
+
# Root's @output/@format/@stat are kept as-is (user's original settings).
|
|
872
|
+
# stop() writes root's profile to session dir with fixed json.gz format,
|
|
873
|
+
# then uses the original settings for the merged output.
|
|
874
|
+
end
|
|
875
|
+
|
|
876
|
+
def self._restart_in_child
|
|
877
|
+
session_dir = ENV["RPERF_SESSION_DIR"]
|
|
878
|
+
return unless session_dir && File.directory?(session_dir)
|
|
879
|
+
return if _c_running? # should not happen, but guard against it
|
|
880
|
+
|
|
881
|
+
# C state is already cleaned up by pthread_atfork child handler.
|
|
882
|
+
@label_set_table = nil
|
|
883
|
+
@label_set_index = nil
|
|
884
|
+
|
|
885
|
+
child_output = File.join(session_dir, "profile-#{Process.pid}.json.gz")
|
|
886
|
+
|
|
887
|
+
opts = {
|
|
888
|
+
frequency: (ENV["RPERF_FREQUENCY"] || 1000).to_i,
|
|
889
|
+
mode: ENV["RPERF_MODE"] == "cpu" ? :cpu : :wall,
|
|
890
|
+
aggregate: ENV["RPERF_AGGREGATE"] != "0",
|
|
891
|
+
output: child_output,
|
|
892
|
+
format: :json,
|
|
893
|
+
stat: false,
|
|
894
|
+
verbose: false,
|
|
895
|
+
}
|
|
896
|
+
sig = _parse_signal_env
|
|
897
|
+
opts[:signal] = sig unless sig.nil?
|
|
898
|
+
opts[:defer] = true if ENV["RPERF_DEFER"] == "1"
|
|
899
|
+
|
|
900
|
+
start(**opts, inherit: false)
|
|
901
|
+
@_session_dir_output = true
|
|
902
|
+
label("%pid": Process.pid.to_s)
|
|
903
|
+
|
|
904
|
+
# Register at_exit so child's profile is written even without explicit stop
|
|
905
|
+
at_exit { Rperf.stop }
|
|
906
|
+
end
|
|
907
|
+
|
|
908
|
+
def self._aggregate_and_report
|
|
909
|
+
session_dir = ENV["RPERF_SESSION_DIR"]
|
|
910
|
+
return unless session_dir && File.directory?(session_dir)
|
|
911
|
+
|
|
912
|
+
merged_samples = []
|
|
913
|
+
merged_label_sets = [{}]
|
|
914
|
+
merged_label_sets_index = { {} => 0 }
|
|
915
|
+
total_trigger_count = 0
|
|
916
|
+
total_sampling_count = 0
|
|
917
|
+
total_sampling_time_ns = 0
|
|
918
|
+
max_duration_ns = 0
|
|
919
|
+
total_duration_ns = 0
|
|
920
|
+
total_user_ns = 0
|
|
921
|
+
total_sys_ns = 0
|
|
922
|
+
process_count = 0
|
|
923
|
+
|
|
924
|
+
Dir.glob(File.join(session_dir, "profile-*.json.gz")).each do |file|
|
|
925
|
+
begin
|
|
926
|
+
data = load(file)
|
|
927
|
+
rescue StandardError => e
|
|
928
|
+
$stderr.puts "rperf: warning: failed to load #{file}: #{e.message}"
|
|
929
|
+
next
|
|
930
|
+
end
|
|
931
|
+
next unless data
|
|
932
|
+
_merge_into(merged_samples, merged_label_sets, data, merged_label_sets_index)
|
|
933
|
+
total_trigger_count += (data[:trigger_count] || 0)
|
|
934
|
+
total_sampling_count += (data[:sampling_count] || 0)
|
|
935
|
+
total_sampling_time_ns += (data[:sampling_time_ns] || 0)
|
|
936
|
+
d = data[:duration_ns] || 0
|
|
937
|
+
max_duration_ns = d if d > max_duration_ns
|
|
938
|
+
total_duration_ns += d
|
|
939
|
+
total_user_ns += (data[:user_ns] || 0)
|
|
940
|
+
total_sys_ns += (data[:sys_ns] || 0)
|
|
941
|
+
process_count += 1
|
|
942
|
+
end
|
|
943
|
+
|
|
944
|
+
return if process_count == 0
|
|
945
|
+
|
|
946
|
+
merged_data = {
|
|
947
|
+
mode: (ENV["RPERF_MODE"] || "wall").to_sym,
|
|
948
|
+
frequency: (ENV["RPERF_FREQUENCY"] || 1000).to_i,
|
|
949
|
+
aggregated_samples: merged_samples,
|
|
950
|
+
label_sets: merged_label_sets,
|
|
951
|
+
trigger_count: total_trigger_count,
|
|
952
|
+
sampling_count: total_sampling_count,
|
|
953
|
+
sampling_time_ns: total_sampling_time_ns,
|
|
954
|
+
duration_ns: max_duration_ns,
|
|
955
|
+
total_duration_ns: total_duration_ns,
|
|
956
|
+
user_ns: total_user_ns,
|
|
957
|
+
sys_ns: total_sys_ns,
|
|
958
|
+
process_count: process_count,
|
|
959
|
+
}
|
|
960
|
+
|
|
961
|
+
print_stat(merged_data) if @stat
|
|
962
|
+
if @output
|
|
963
|
+
write_data(@output, merged_data, @format)
|
|
964
|
+
end
|
|
965
|
+
|
|
966
|
+
_cleanup_session_dir(session_dir)
|
|
967
|
+
|
|
968
|
+
merged_data
|
|
969
|
+
rescue => e
|
|
970
|
+
$stderr.puts "rperf: warning: failed to aggregate multi-process data: #{e.message}"
|
|
971
|
+
# Fallback: try to write whatever individual profiles exist as-is
|
|
972
|
+
_fallback_aggregate_output(session_dir)
|
|
973
|
+
_cleanup_session_dir(session_dir)
|
|
974
|
+
nil
|
|
975
|
+
end
|
|
976
|
+
# Not private — called from at_exit block which runs in top-level context
|
|
977
|
+
|
|
978
|
+
def self._cleanup_session_dir(session_dir)
|
|
979
|
+
require "fileutils"
|
|
980
|
+
FileUtils.rm_rf(session_dir)
|
|
981
|
+
rescue => e
|
|
982
|
+
$stderr.puts "rperf: warning: failed to clean up session dir: #{e.message}"
|
|
983
|
+
end
|
|
984
|
+
private_class_method :_cleanup_session_dir
|
|
985
|
+
|
|
986
|
+
# Best-effort fallback: if aggregation failed, try to copy the first
|
|
987
|
+
# available child profile to @output so the user gets something.
|
|
988
|
+
def self._fallback_aggregate_output(session_dir)
|
|
989
|
+
return unless @output
|
|
990
|
+
return unless session_dir && File.directory?(session_dir)
|
|
991
|
+
files = Dir.glob(File.join(session_dir, "profile-*.json.gz"))
|
|
992
|
+
return if files.empty?
|
|
993
|
+
require "fileutils"
|
|
994
|
+
FileUtils.cp(files.first, @output)
|
|
995
|
+
rescue StandardError
|
|
996
|
+
# nothing more we can do
|
|
997
|
+
end
|
|
998
|
+
private_class_method :_fallback_aggregate_output
|
|
999
|
+
|
|
1000
|
+
def self._merge_into(merged_samples, merged_label_sets, data, merged_label_sets_index = nil)
|
|
1001
|
+
# Build a reverse index on first call for O(1) dedup lookups
|
|
1002
|
+
unless merged_label_sets_index
|
|
1003
|
+
merged_label_sets_index = {}
|
|
1004
|
+
merged_label_sets.each_with_index { |ls, i| merged_label_sets_index[ls] = i }
|
|
1005
|
+
end
|
|
1006
|
+
|
|
1007
|
+
child_label_sets = data[:label_sets] || [{}]
|
|
1008
|
+
id_map = {}
|
|
1009
|
+
child_label_sets.each_with_index do |ls, child_id|
|
|
1010
|
+
# Normalize keys to symbols for consistent comparison
|
|
1011
|
+
normalized = ls.is_a?(Hash) ? ls.transform_keys(&:to_sym) : ls
|
|
1012
|
+
existing = merged_label_sets_index[normalized]
|
|
1013
|
+
if existing
|
|
1014
|
+
id_map[child_id] = existing
|
|
1015
|
+
else
|
|
1016
|
+
new_idx = merged_label_sets.size
|
|
1017
|
+
id_map[child_id] = new_idx
|
|
1018
|
+
merged_label_sets << normalized
|
|
1019
|
+
merged_label_sets_index[normalized] = new_idx
|
|
1020
|
+
end
|
|
1021
|
+
end
|
|
1022
|
+
|
|
1023
|
+
(data[:aggregated_samples] || []).each do |frames, weight, thread_seq, label_set_id|
|
|
1024
|
+
new_lsi = id_map[label_set_id || 0] || 0
|
|
1025
|
+
merged_samples << [frames, weight, thread_seq, new_lsi]
|
|
1026
|
+
end
|
|
1027
|
+
end
|
|
1028
|
+
private_class_method :_merge_into
|
|
1029
|
+
|
|
496
1030
|
# ENV-based auto-start for CLI usage
|
|
497
1031
|
if ENV["RPERF_ENABLED"] == "1"
|
|
498
1032
|
_rperf_mode_str = ENV["RPERF_MODE"] || "cpu"
|
|
@@ -500,23 +1034,60 @@ module Rperf
|
|
|
500
1034
|
raise ArgumentError, "RPERF_MODE must be 'cpu' or 'wall', got: #{_rperf_mode_str.inspect}"
|
|
501
1035
|
end
|
|
502
1036
|
_rperf_mode = _rperf_mode_str == "wall" ? :wall : :cpu
|
|
503
|
-
_rperf_format =
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
else ENV["RPERF_SIGNAL"].to_i
|
|
1037
|
+
_rperf_format = if ENV["RPERF_FORMAT"]
|
|
1038
|
+
unless %w[pprof collapsed text json].include?(ENV["RPERF_FORMAT"])
|
|
1039
|
+
raise ArgumentError, "RPERF_FORMAT must be one of pprof, collapsed, text, json, got: #{ENV["RPERF_FORMAT"].inspect}"
|
|
1040
|
+
end
|
|
1041
|
+
ENV["RPERF_FORMAT"].to_sym
|
|
509
1042
|
end
|
|
1043
|
+
_rperf_stat = ENV["RPERF_STAT"] == "1"
|
|
1044
|
+
_rperf_signal = _parse_signal_env
|
|
510
1045
|
_rperf_aggregate = ENV["RPERF_AGGREGATE"] != "0"
|
|
1046
|
+
_rperf_original_output = _rperf_stat ? ENV["RPERF_OUTPUT"] : (ENV["RPERF_OUTPUT"] || "rperf.json.gz")
|
|
1047
|
+
|
|
511
1048
|
_rperf_start_opts = { frequency: (ENV["RPERF_FREQUENCY"] || 1000).to_i, mode: _rperf_mode,
|
|
512
|
-
output: _rperf_stat ? ENV["RPERF_OUTPUT"] : (ENV["RPERF_OUTPUT"] || "rperf.data"),
|
|
513
1049
|
verbose: ENV["RPERF_VERBOSE"] == "1",
|
|
514
|
-
format: _rperf_format,
|
|
515
|
-
stat: _rperf_stat,
|
|
516
1050
|
aggregate: _rperf_aggregate }
|
|
517
1051
|
_rperf_start_opts[:signal] = _rperf_signal unless _rperf_signal.nil?
|
|
518
|
-
|
|
519
|
-
|
|
1052
|
+
_rperf_start_opts[:defer] = true if ENV["RPERF_DEFER"] == "1"
|
|
1053
|
+
|
|
1054
|
+
if ENV["RPERF_SESSION_DIR"] && Process.pid.to_s != ENV["RPERF_ROOT_PROCESS"]
|
|
1055
|
+
# spawn / fork+exec child: write to session dir, no aggregation.
|
|
1056
|
+
# Session dir is created eagerly by the root process (CLI or API).
|
|
1057
|
+
# If it doesn't exist, skip profiling entirely — don't fall back to
|
|
1058
|
+
# normal mode which would duplicate output with the root process.
|
|
1059
|
+
_rperf_session_dir = ENV["RPERF_SESSION_DIR"]
|
|
1060
|
+
if File.directory?(_rperf_session_dir)
|
|
1061
|
+
_rperf_start_opts[:output] = File.join(_rperf_session_dir, "profile-#{Process.pid}.json.gz")
|
|
1062
|
+
_rperf_start_opts[:format] = :json
|
|
1063
|
+
_rperf_start_opts[:stat] = false
|
|
1064
|
+
_rperf_start_opts[:verbose] = false
|
|
1065
|
+
|
|
1066
|
+
_install_fork_hook
|
|
1067
|
+
start(**_rperf_start_opts, inherit: false)
|
|
1068
|
+
@_session_dir_output = true
|
|
1069
|
+
label("%pid": Process.pid.to_s)
|
|
1070
|
+
at_exit { stop }
|
|
1071
|
+
end
|
|
1072
|
+
elsif ENV["RPERF_SESSION_DIR"]
|
|
1073
|
+
# Root process: start with normal output settings.
|
|
1074
|
+
# If no fork/spawn happens, behaves exactly like single-process mode.
|
|
1075
|
+
_rperf_start_opts[:output] = _rperf_original_output
|
|
1076
|
+
_rperf_start_opts[:format] = _rperf_format
|
|
1077
|
+
_rperf_start_opts[:stat] = _rperf_stat
|
|
1078
|
+
|
|
1079
|
+
_install_fork_hook
|
|
1080
|
+
start(**_rperf_start_opts, inherit: false)
|
|
1081
|
+
|
|
1082
|
+
at_exit { Rperf.stop }
|
|
1083
|
+
else
|
|
1084
|
+
_rperf_start_opts[:output] = _rperf_original_output
|
|
1085
|
+
_rperf_start_opts[:format] = _rperf_format
|
|
1086
|
+
_rperf_start_opts[:stat] = _rperf_stat
|
|
1087
|
+
_rperf_start_opts[:inherit] = false # no RPERF_SESSION_DIR means --no-inherit
|
|
1088
|
+
start(**_rperf_start_opts)
|
|
1089
|
+
at_exit { stop }
|
|
1090
|
+
end
|
|
520
1091
|
end
|
|
521
1092
|
|
|
522
1093
|
# Text report encoder — human/AI readable flat + cumulative top-N table.
|
|
@@ -692,7 +1263,7 @@ module Rperf
|
|
|
692
1263
|
intern.("frequency: #{frequency}Hz"),
|
|
693
1264
|
intern.("ruby: #{RUBY_DESCRIPTION}"),
|
|
694
1265
|
]
|
|
695
|
-
doc_url_idx = intern.("https://ko1.github.io/rperf/help.html")
|
|
1266
|
+
doc_url_idx = intern.("https://ko1.github.io/rperf/docs/help.html")
|
|
696
1267
|
|
|
697
1268
|
# field 6: string_table (repeated string)
|
|
698
1269
|
string_table.each do |s|
|