rperf 0.7.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/rperf.rb CHANGED
@@ -13,21 +13,45 @@ end
13
13
 
14
14
  module Rperf
15
15
 
16
- @verbose = false
17
- @output = nil
18
- @stat = false
19
- @stat_start_mono = nil
16
+ # --- Module-level state (single global profiler) ---
17
+ # Profiling session
18
+ @verbose = false # verbose stats output on stop
19
+ @output = nil # output file path (nil = no file)
20
+ @format = nil # output format (:json, :pprof, :collapsed, :text, nil = auto)
21
+ @stat = false # print user/sys/real summary to stderr
22
+ @stat_start_mono = nil # Process::CLOCK_MONOTONIC at start (for real time)
23
+ @stat_start_times = nil # Process.times at start (for user/sys time)
24
+ @label_set_table = nil # Array: label_set_id → frozen Hash
25
+ @label_set_index = nil # Hash: frozen label Hash → label_set_id
26
+ # Multi-process (fork/spawn) support
27
+ @_session_dir_output = false # true when @output points to session dir (child process)
28
+ @_session_dir_created = false # true after first fork activates session dir
29
+ @_fork_hook_installed = false # true after Process._fork hook is prepended
30
+ @_saved_env = nil # saved ENV values for restore on stop (inherit: true)
20
31
 
21
32
  # Starts profiling.
22
- # format: :pprof, :collapsed, or :text. nil = auto-detect from output extension
33
+ # format: :json, :pprof, :collapsed, or :text. nil = auto-detect from output extension
34
+ # .json.gz → json (rperf native, default)
23
35
  # .collapsed → collapsed stacks (FlameGraph / speedscope compatible)
24
36
  # .txt → text report (human/AI readable flat + cumulative table)
25
- # otherwise (.pb.gz etc) → pprof protobuf (gzip compressed)
26
- def self.start(frequency: 1000, mode: :cpu, output: nil, verbose: false, format: nil, stat: false, signal: nil, aggregate: true, defer: false)
37
+ # .pb.gz → pprof protobuf (gzip compressed)
38
+ # inherit: controls child process profiling.
39
+ # :fork — (default) automatically profile forked child processes via Process._fork hook.
40
+ # Session dir is created eagerly at start time. Spawned processes are NOT tracked.
41
+ # true — profile both forked and spawned Ruby child processes. Sets RUBYOPT=-rrperf
42
+ # and RPERF_* env vars so spawned Ruby processes auto-start profiling.
43
+ # Use with caution: affects ALL spawned Ruby processes, including independent
44
+ # programs that may use rperf themselves.
45
+ # false — do not track child processes (single-process mode).
46
+ def self.start(frequency: 1000, mode: :cpu, output: nil, verbose: false, format: nil, stat: false, signal: nil, aggregate: true, defer: false, inherit: :fork)
27
47
  raise ArgumentError, "frequency must be a positive integer (got #{frequency.inspect})" unless frequency.is_a?(Integer) && frequency > 0
28
48
  raise ArgumentError, "frequency must be <= 10000 (10KHz), got #{frequency}" if frequency > 10_000
29
49
  raise ArgumentError, "mode must be :cpu or :wall, got #{mode.inspect}" unless %i[cpu wall].include?(mode)
50
+ raise ArgumentError, "inherit must be :fork, true, or false, got #{inherit.inspect}" unless [true, false, :fork].include?(inherit)
30
51
  c_mode = mode == :cpu ? 0 : 1
52
+ unless signal.nil? || signal == false || signal.is_a?(Integer)
53
+ raise ArgumentError, "signal must be nil, false, or an Integer, got #{signal.inspect}"
54
+ end
31
55
  c_signal = signal.nil? ? -1 : (signal ? signal.to_i : 0)
32
56
  if c_signal > 0
33
57
  raise ArgumentError, "signal mode is only supported on Linux" unless RUBY_PLATFORM =~ /linux/
@@ -41,51 +65,146 @@ module Rperf
41
65
  @output = output
42
66
  @format = format
43
67
  @stat = stat
44
- @stat_start_mono = Process.clock_gettime(Process::CLOCK_MONOTONIC) if @stat
68
+ @stat_start_mono = Process.clock_gettime(Process::CLOCK_MONOTONIC)
69
+ @stat_start_times = Process.times
45
70
  @label_set_table = nil
46
71
  @label_set_index = nil
47
72
  _c_start(frequency, c_mode, aggregate, c_signal, defer)
48
73
 
74
+ # Set up child process tracking
75
+ if inherit && !ENV["RPERF_SESSION_DIR"]
76
+ _setup_inherit(mode, frequency, signal, aggregate, output, format, stat, inherit, defer)
77
+ end
78
+
49
79
  if block_given?
50
80
  begin
51
81
  yield
52
82
  ensure
53
- return stop
83
+ result = stop
54
84
  end
85
+ result
55
86
  end
56
87
  end
57
88
 
89
+ # VM state integer → label value mapping.
90
+ # These values appear as "%GVL" / "%GC" label keys in label_sets.
91
+ VM_STATE_LABELS = {
92
+ 1 => ["%GVL", "blocked"],
93
+ 2 => ["%GVL", "wait"],
94
+ 3 => ["%GC", "mark"],
95
+ 4 => ["%GC", "sweep"],
96
+ }.freeze
97
+
58
98
  def self.stop
99
+ # Check if we need to aggregate child process data.
100
+ # @_session_dir_created: fork happened and session dir is active.
101
+ # Otherwise: check for actual child profile files (spawn-only case).
102
+ session_dir = ENV["RPERF_SESSION_DIR"]
103
+ is_root = session_dir && Process.pid.to_s == ENV["RPERF_ROOT_PROCESS"]
104
+ has_child_profiles = is_root && !@_session_dir_created &&
105
+ File.directory?(session_dir.to_s) &&
106
+ !Dir.glob(File.join(session_dir.to_s, "profile-*.json.gz")).empty?
107
+ needs_aggregation = is_root && (@_session_dir_created || has_child_profiles)
108
+
59
109
  data = _c_stop
60
110
  return unless data
61
111
 
112
+ # Record process times for multi-process aggregation
113
+ times = Process.times
114
+ start_times = @stat_start_times || Struct.new(:utime, :stime).new(0.0, 0.0)
115
+ data[:user_ns] = ((times.utime - start_times.utime) * 1_000_000_000).to_i
116
+ data[:sys_ns] = ((times.stime - start_times.stime) * 1_000_000_000).to_i
117
+
62
118
  # When aggregate: false, C extension returns :raw_samples but not
63
119
  # :aggregated_samples. Build aggregated view so encoders always work.
64
120
  if data[:raw_samples] && !data[:aggregated_samples]
65
121
  merged = {}
66
- data[:raw_samples].each do |frames, weight, thread_seq, label_set_id|
67
- key = [frames, thread_seq || 0, label_set_id || 0]
122
+ data[:raw_samples].each do |frames, weight, thread_seq, label_set_id, vm_state|
123
+ key = [frames, thread_seq || 0, label_set_id || 0, vm_state || 0]
68
124
  if merged.key?(key)
69
125
  merged[key] += weight
70
126
  else
71
127
  merged[key] = weight
72
128
  end
73
129
  end
74
- data[:aggregated_samples] = merged.map { |(frames, ts, lsi), w| [frames, w, ts, lsi] }
130
+ data[:aggregated_samples] = merged.map { |(frames, ts, lsi, vs), w| [frames, w, ts, lsi, vs] }
131
+ end
132
+
133
+ merge_vm_state_labels!(data)
134
+
135
+ if needs_aggregation
136
+ # Root process with children: write root's own profile to session dir
137
+ # (fixed json.gz format), then aggregate all profiles.
138
+ # Root's @output/@format/@stat are preserved for the merged result.
139
+ print_stats(data) if @verbose
140
+ begin
141
+ save(File.join(session_dir, "profile-#{Process.pid}.json.gz"), data, format: :json)
142
+ rescue SystemCallError
143
+ # Session dir may have been removed (e.g., test scenario) — continue to aggregation
144
+ end
145
+ merged = _aggregate_and_report
146
+ if merged.nil? && data
147
+ # Aggregation failed — fall back to root's own data
148
+ $stderr.puts "rperf: warning: multi-process aggregation failed; writing root process data only"
149
+ write_data(@output, data, @format) if @output
150
+ print_stat(data) if @stat
151
+ end
152
+ _cleanup_session_state
153
+ return merged || data
75
154
  end
76
155
 
77
156
  print_stats(data) if @verbose
78
157
  print_stat(data) if @stat
79
158
 
80
159
  if @output
81
- write_data(@output, data, @format)
160
+ if @_session_dir_output
161
+ # Child process writing to session dir — tolerate missing dir
162
+ begin
163
+ write_data(@output, data, @format)
164
+ rescue SystemCallError
165
+ # Parent may have already cleaned up the session dir (e.g., parent
166
+ # exited first and rm_rf'd it), or disk is full. Silently skip —
167
+ # crashing in at_exit is worse than losing one child's profile.
168
+ end
169
+ else
170
+ write_data(@output, data, @format)
171
+ end
82
172
  @output = nil
83
173
  @format = nil
84
174
  end
85
175
 
176
+ _cleanup_session_state
86
177
  data
87
178
  end
88
179
 
180
+ def self._cleanup_session_state
181
+ session_dir = ENV.delete("RPERF_SESSION_DIR")
182
+ ENV.delete("RPERF_ROOT_PROCESS")
183
+ ENV.delete("RPERF_DEFER")
184
+ @_session_dir_created = false
185
+ @_session_dir_output = false
186
+ # Restore ENV variables saved by _setup_inherit (inherit: true)
187
+ if @_saved_env
188
+ @_saved_env.each do |key, original|
189
+ if original.nil?
190
+ ENV.delete(key)
191
+ else
192
+ ENV[key] = original
193
+ end
194
+ end
195
+ @_saved_env = nil
196
+ end
197
+ # Remove eagerly-created session dir if it's empty (no children ran)
198
+ if session_dir && File.directory?(session_dir)
199
+ begin
200
+ Dir.rmdir(session_dir) # only succeeds if empty
201
+ rescue SystemCallError
202
+ # not empty or already removed — fine
203
+ end
204
+ end
205
+ end
206
+ private_class_method :_cleanup_session_state
207
+
89
208
  # Returns a snapshot of the current profiling data without stopping.
90
209
  # Only works in aggregate mode (the default). Returns nil if not profiling.
91
210
  # The returned data has the same format as stop's return value and can be
@@ -95,31 +214,48 @@ module Rperf
95
214
  # This allows interval-based profiling where each snapshot covers only
96
215
  # the period since the last clear.
97
216
  def self.snapshot(clear: false)
98
- _c_snapshot(clear)
217
+ data = _c_snapshot(clear)
218
+ return unless data
219
+ merge_vm_state_labels!(data)
220
+ data
99
221
  end
100
222
 
101
223
  # Label set management for per-context profiling.
102
224
  # Label sets are stored as an Array of Hashes, indexed by label_set_id.
103
225
  # Index 0 is reserved (no labels).
104
226
 
105
- @label_set_table = nil # Array of frozen Hash
106
- @label_set_index = nil # Hash → id (for dedup)
107
-
108
227
  def self._init_label_sets
109
228
  @label_set_table = [{}] # id 0 = no labels
110
229
  @label_set_index = { {} => 0 }
111
230
  end
112
231
 
113
232
  def self._intern_label_set(hash)
114
- frozen = hash.frozen? ? hash : hash.freeze
115
- @label_set_index[frozen] ||= begin
233
+ hash.freeze
234
+ @label_set_index[hash] ||= begin
116
235
  id = @label_set_table.size
117
- @label_set_table << frozen
236
+ @label_set_table << hash
118
237
  _c_set_label_sets(@label_set_table)
119
238
  id
120
239
  end
121
240
  end
122
241
 
242
+ # Merges the given keyword labels into the current thread's label set,
243
+ # sets the result on the current thread, and returns [previous_id, new_id].
244
+ # Callers use previous_id to restore labels after a block.
245
+ def self._merge_and_set_label(kw)
246
+ _init_label_sets unless @label_set_table
247
+
248
+ cur_id = _c_get_label
249
+ cur_labels = @label_set_table[cur_id] || {}
250
+ kw.each_value { |v| v.freeze }
251
+ new_labels = cur_labels.merge(kw).reject { |_, v| v.nil? }
252
+ new_id = _intern_label_set(new_labels)
253
+ _c_set_label(new_id)
254
+
255
+ [cur_id, new_id]
256
+ end
257
+ private_class_method :_merge_and_set_label
258
+
123
259
  # Sets labels on the current thread for profiling annotation.
124
260
  # With a block: restores previous labels when the block exits.
125
261
  # Without a block: sets labels persistently on the current thread.
@@ -130,14 +266,10 @@ module Rperf
130
266
  #
131
267
  # Values of nil remove that key. Existing labels are merged.
132
268
  def self.label(**kw, &block)
133
- _init_label_sets unless @label_set_table
269
+ return yield if block && !_c_running?
270
+ return unless _c_running?
134
271
 
135
- cur_id = _c_get_label
136
- cur_labels = @label_set_table[cur_id] || {}
137
-
138
- new_labels = cur_labels.merge(kw).reject { |_, v| v.nil? }
139
- new_id = _intern_label_set(new_labels)
140
- _c_set_label(new_id)
272
+ cur_id, _new_id = _merge_and_set_label(kw)
141
273
 
142
274
  if block
143
275
  begin
@@ -162,13 +294,7 @@ module Rperf
162
294
  raise ArgumentError, "Rperf.profile requires a block" unless block
163
295
  raise RuntimeError, "Rperf is not started" unless _c_running?
164
296
 
165
- _init_label_sets unless @label_set_table
166
-
167
- cur_id = _c_get_label
168
- cur_labels = @label_set_table[cur_id] || {}
169
- new_labels = cur_labels.merge(kw).reject { |_, v| v.nil? }
170
- new_id = _intern_label_set(new_labels)
171
- _c_set_label(new_id)
297
+ cur_id, _new_id = _merge_and_set_label(kw)
172
298
 
173
299
  _c_profile_inc
174
300
 
@@ -189,11 +315,54 @@ module Rperf
189
315
  end
190
316
 
191
317
 
318
+ # Merge vm_state from C samples into label_sets as a "Ruby" label key.
319
+ # Mutates data in place: updates label_set_id on each sample, strips vm_state,
320
+ # and extends label_sets with new entries as needed.
321
+ def self.merge_vm_state_labels!(data)
322
+ samples_key = data[:aggregated_samples] ? :aggregated_samples : :raw_samples
323
+ samples = data[samples_key]
324
+ return unless samples
325
+
326
+ orig_label_sets = data[:label_sets]
327
+ label_sets = (orig_label_sets || [{}]).dup
328
+ mapping = {} # [original_label_set_id, vm_state] => new_label_set_id
329
+ modified = false
330
+
331
+ samples.each do |sample|
332
+ vm_state = sample[4] || 0
333
+ next if vm_state == 0
334
+ next unless VM_STATE_LABELS.key?(vm_state)
335
+
336
+ label_set_id = sample[3] || 0
337
+ cache_key = [label_set_id, vm_state]
338
+ new_id = mapping[cache_key]
339
+ unless new_id
340
+ base = label_sets[label_set_id] || {}
341
+ key, value = VM_STATE_LABELS[vm_state]
342
+ new_ls = base.merge(key.to_sym => value).freeze
343
+ new_id = label_sets.size
344
+ label_sets << new_ls
345
+ mapping[cache_key] = new_id
346
+ end
347
+ sample[3] = new_id
348
+ modified = true
349
+ end
350
+
351
+ # Strip vm_state (5th element) from all samples
352
+ samples.each { |s| s.pop if s.size > 4 }
353
+
354
+ # Only set label_sets if they were already present or we added vm_state labels
355
+ data[:label_sets] = label_sets if orig_label_sets || modified
356
+ end
357
+ private_class_method :merge_vm_state_labels!
358
+
192
359
  # Saves profiling data to a file.
193
- # format: :pprof, :collapsed, or :text. nil = auto-detect from path extension
360
+ # format: :json, :pprof, :collapsed, or :text. nil = auto-detect from path extension
361
+ # .json.gz → json (rperf native, gzip compressed, default)
362
+ # .json → json (plain text, readable by jq etc.)
194
363
  # .collapsed → collapsed stacks (FlameGraph / speedscope compatible)
195
364
  # .txt → text report (human/AI readable flat + cumulative table)
196
- # otherwise (.pb.gz etc) → pprof protobuf (gzip compressed)
365
+ # .pb.gz → pprof protobuf (gzip compressed)
197
366
  def self.save(path, data, format: nil)
198
367
  write_data(path, data, format)
199
368
  end
@@ -205,17 +374,49 @@ module Rperf
205
374
  File.write(path, Collapsed.encode(data))
206
375
  when :text
207
376
  File.write(path, Text.encode(data))
377
+ when :json
378
+ require "json"
379
+ json_data = data.merge(rperf_version: VERSION, pid: Process.pid, ppid: Process.ppid)
380
+ json_str = JSON.generate(json_data)
381
+ if path.to_s.end_with?(".gz")
382
+ File.binwrite(path, gzip(json_str))
383
+ else
384
+ File.write(path, json_str)
385
+ end
208
386
  else
209
387
  File.binwrite(path, gzip(PProf.encode(data)))
210
388
  end
211
389
  end
212
390
  private_class_method :write_data
213
391
 
392
+ # Load a profile saved by rperf record (.json.gz or .json).
393
+ # Returns the data hash (same format as Rperf.stop / Rperf.snapshot).
394
+ # Warns to stderr if the file was saved by a different rperf version.
395
+ def self.load(path)
396
+ raw_bytes = File.binread(path)
397
+ # Auto-detect gzip by magic bytes (1f 8b)
398
+ raw = if raw_bytes.byteslice(0, 2) == "\x1f\x8b".b
399
+ Zlib::GzipReader.new(StringIO.new(raw_bytes)).read
400
+ else
401
+ raw_bytes
402
+ end
403
+ require "json"
404
+ data = JSON.parse(raw, symbolize_names: true)
405
+ saved_version = data.delete(:rperf_version)
406
+ if saved_version && saved_version != VERSION
407
+ $stderr.puts "rperf: warning: file was saved by rperf #{saved_version} (current: #{VERSION})"
408
+ elsif saved_version.nil?
409
+ $stderr.puts "rperf: warning: file has no version info (may be from an older rperf)"
410
+ end
411
+ data
412
+ end
413
+
214
414
  def self.detect_format(path, format)
215
415
  return format.to_sym if format
216
416
  case path.to_s
217
- when /\.collapsed\z/ then :collapsed
218
- when /\.txt\z/ then :text
417
+ when /\.collapsed\z/ then :collapsed
418
+ when /\.txt\z/ then :text
419
+ when /\.json(\.gz)?\z/ then :json
219
420
  else :pprof
220
421
  end
221
422
  end
@@ -233,16 +434,15 @@ module Rperf
233
434
  def self.print_stats(data)
234
435
  count = data[:sampling_count] || 0
235
436
  total_ns = data[:sampling_time_ns] || 0
236
- sample_count = data[:sampling_count] || 0
237
437
  mode = data[:mode] || :cpu
238
438
  frequency = data[:frequency] || 0
239
439
 
240
440
  total_ms = total_ns / 1_000_000.0
241
441
  avg_us = count > 0 ? total_ns / count / 1000.0 : 0.0
242
442
 
243
- $stderr.puts "[rperf] mode=#{mode} frequency=#{frequency}Hz"
244
- $stderr.puts "[rperf] sampling: #{count} calls, #{format("%.2f", total_ms)}ms total, #{format("%.1f", avg_us)}us/call avg"
245
- $stderr.puts "[rperf] samples recorded: #{sample_count}"
443
+ $stderr.puts "[Rperf] mode=#{mode} frequency=#{frequency}Hz"
444
+ $stderr.puts "[Rperf] sampling: #{count} calls, #{format("%.2f", total_ms)}ms total, #{format("%.1f", avg_us)}us/call avg"
445
+ $stderr.puts "[Rperf] samples recorded: #{count}"
246
446
 
247
447
  print_top(data)
248
448
  end
@@ -291,13 +491,13 @@ module Rperf
291
491
 
292
492
  def self.print_top_table(kind, table, total_weight)
293
493
  top = table.sort_by { |_, w| -w }.first(TOP_N)
294
- $stderr.puts "[rperf] top #{top.size} by #{kind}:"
494
+ $stderr.puts "[Rperf] top #{top.size} by #{kind}:"
295
495
  top.each do |key, weight|
296
496
  label, path = key
297
497
  ms = weight / 1_000_000.0
298
498
  pct = total_weight > 0 ? weight * 100.0 / total_weight : 0.0
299
499
  loc = path.empty? ? "" : " (#{path})"
300
- $stderr.puts format("[rperf] %8.1fms %5.1f%% %s%s", ms, pct, label, loc)
500
+ $stderr.puts format("[Rperf] %8.1fms %5.1f%% %s%s", ms, pct, label, loc)
301
501
  end
302
502
  end
303
503
 
@@ -314,8 +514,16 @@ module Rperf
314
514
  samples_raw = data[:aggregated_samples] || []
315
515
  real_ns = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - @stat_start_mono) * 1_000_000_000).to_i
316
516
  times = Process.times
317
- user_ns = (times.utime * 1_000_000_000).to_i
318
- sys_ns = (times.stime * 1_000_000_000).to_i
517
+ start_times = @stat_start_times || Struct.new(:utime, :stime).new(0.0, 0.0)
518
+ user_ns = ((times.utime - start_times.utime) * 1_000_000_000).to_i
519
+ sys_ns = ((times.stime - start_times.stime) * 1_000_000_000).to_i
520
+
521
+ # In multi-process mode, use aggregated user/sys from all processes
522
+ process_count = data[:process_count] || 0
523
+ if process_count > 1 && data[:user_ns]
524
+ user_ns = data[:user_ns]
525
+ sys_ns = data[:sys_ns] || 0
526
+ end
319
527
 
320
528
  command = ENV["RPERF_STAT_COMMAND"] || "(unknown)"
321
529
 
@@ -327,10 +535,10 @@ module Rperf
327
535
  $stderr.puts format(" %14s ms real", format_ms(real_ns))
328
536
 
329
537
  if samples_raw.size > 0
330
- breakdown, total_weight = compute_stat_breakdown(samples_raw)
331
- print_stat_breakdown(breakdown, total_weight)
538
+ breakdown, total_weight = compute_stat_breakdown(samples_raw, data[:label_sets])
539
+ print_stat_breakdown(breakdown, total_weight, data)
332
540
  print_stat_runtime_info(data)
333
- print_stat_system_info
541
+ print_stat_system_info(data)
334
542
  print_stat_report(data) if ENV["RPERF_STAT_REPORT"] == "1"
335
543
  print_stat_footer(samples_raw, real_ns, data)
336
544
  end
@@ -338,20 +546,25 @@ module Rperf
338
546
  $stderr.puts
339
547
  end
340
548
 
341
- def self.compute_stat_breakdown(samples_raw)
549
+ def self.compute_stat_breakdown(samples_raw, label_sets)
342
550
  breakdown = Hash.new(0)
343
551
  total_weight = 0
344
552
 
345
- samples_raw.each do |frames, weight|
553
+ samples_raw.each do |frames, weight, _thread_seq, label_set_id|
346
554
  total_weight += weight
347
- leaf_label = frames.first&.last || ""
348
- category = case leaf_label
349
- when "[GVL blocked]" then :gvl_blocked
350
- when "[GVL wait]" then :gvl_wait
351
- when "[GC marking]" then :gc_marking
352
- when "[GC sweeping]" then :gc_sweeping
353
- else :cpu_execution
354
- end
555
+ category = :cpu_execution
556
+ if label_sets && label_set_id && label_set_id > 0
557
+ ls = label_sets[label_set_id]
558
+ if ls
559
+ gvl = ls[:"%GVL"]
560
+ gc = ls[:"%GC"]
561
+ if gvl == "blocked" then category = :gvl_blocked
562
+ elsif gvl == "wait" then category = :gvl_wait
563
+ elsif gc == "mark" then category = :gc_marking
564
+ elsif gc == "sweep" then category = :gc_sweeping
565
+ end
566
+ end
567
+ end
355
568
  breakdown[category] += weight
356
569
  end
357
570
 
@@ -359,15 +572,19 @@ module Rperf
359
572
  end
360
573
  private_class_method :compute_stat_breakdown
361
574
 
362
- def self.print_stat_breakdown(breakdown, total_weight)
575
+ def self.print_stat_breakdown(breakdown, total_weight, data)
363
576
  $stderr.puts
577
+ process_count = data[:process_count] || 0
578
+ if process_count > 1
579
+ $stderr.puts STAT_LINE.call(format_integer(process_count), " ", "[Rperf] Ruby processes profiled")
580
+ end
364
581
 
365
582
  [
366
- [:cpu_execution, "CPU execution"],
367
- [:gvl_blocked, "[Ruby] GVL blocked (I/O, sleep)"],
368
- [:gvl_wait, "[Ruby] GVL wait (contention)"],
369
- [:gc_marking, "[Ruby] GC marking"],
370
- [:gc_sweeping, "[Ruby] GC sweeping"],
583
+ [:cpu_execution, "[Rperf] CPU execution"],
584
+ [:gvl_blocked, "[Rperf] GVL blocked (I/O, sleep)"],
585
+ [:gvl_wait, "[Rperf] GVL wait (contention)"],
586
+ [:gc_marking, "[Rperf] GC marking"],
587
+ [:gc_sweeping, "[Rperf] GC sweeping"],
371
588
  ].each do |key, label|
372
589
  w = breakdown[key]
373
590
  next if w == 0
@@ -378,35 +595,43 @@ module Rperf
378
595
  private_class_method :print_stat_breakdown
379
596
 
380
597
  def self.print_stat_runtime_info(data)
381
- thread_count = data[:detected_thread_count] || 0
382
- $stderr.puts STAT_LINE.call(format_integer(thread_count), " ", "[Ruby] detected threads") if thread_count > 0
383
598
  gc = GC.stat
384
599
  $stderr.puts STAT_LINE.call(format_ms(gc[:time] * 1_000_000), "ms",
385
- "[Ruby] GC time (%s count: %s minor, %s major)" % [
600
+ "[Ruby ] GC time (%s count: %s minor, %s major)" % [
386
601
  format_integer(gc[:count]),
387
602
  format_integer(gc[:minor_gc_count]),
388
603
  format_integer(gc[:major_gc_count])])
389
- $stderr.puts STAT_LINE.call(format_integer(gc[:total_allocated_objects]), " ", "[Ruby] allocated objects")
390
- $stderr.puts STAT_LINE.call(format_integer(gc[:total_freed_objects]), " ", "[Ruby] freed objects")
604
+ $stderr.puts STAT_LINE.call(format_integer(gc[:total_allocated_objects]), " ", "[Ruby ] allocated objects")
605
+ $stderr.puts STAT_LINE.call(format_integer(gc[:total_freed_objects]), " ", "[Ruby ] freed objects")
606
+ thread_count = data[:detected_thread_count] || 0
607
+ $stderr.puts STAT_LINE.call(format_integer(thread_count), " ", "[Ruby ] detected threads") if thread_count > 0
391
608
  if defined?(RubyVM::YJIT) && RubyVM::YJIT.enabled?
392
609
  yjit = RubyVM::YJIT.runtime_stats
393
610
  if yjit[:ratio_in_yjit]
394
- $stderr.puts STAT_LINE.call(format("%.1f%%", yjit[:ratio_in_yjit] * 100), " ", "[Ruby] YJIT code execution ratio")
611
+ $stderr.puts STAT_LINE.call(format("%.1f%%", yjit[:ratio_in_yjit] * 100), " ", "[Ruby ] YJIT code execution ratio")
395
612
  end
396
613
  end
397
614
  end
398
615
  private_class_method :print_stat_runtime_info
399
616
 
400
- def self.print_stat_system_info
617
+ def self.print_stat_system_info(data = nil)
401
618
  sys_stats = get_system_stats
402
619
  maxrss_kb = sys_stats[:maxrss_kb]
403
620
  if maxrss_kb
404
- $stderr.puts STAT_LINE.call(format_integer((maxrss_kb / 1024.0).round), "MB", "[OS] peak memory (maxrss)")
621
+ $stderr.puts STAT_LINE.call(format_integer((maxrss_kb / 1024.0).round), "MB", "[OS ] peak memory (maxrss)")
622
+ end
623
+ if sys_stats[:page_faults_minor]
624
+ minor = sys_stats[:page_faults_minor]
625
+ major = sys_stats[:page_faults_major]
626
+ $stderr.puts STAT_LINE.call(
627
+ format_integer(minor + major), " ",
628
+ "[OS ] page faults (%s minor, %s major)" % [
629
+ format_integer(minor), format_integer(major)])
405
630
  end
406
631
  if sys_stats[:ctx_voluntary]
407
632
  $stderr.puts STAT_LINE.call(
408
633
  format_integer(sys_stats[:ctx_voluntary] + sys_stats[:ctx_involuntary]), " ",
409
- "[OS] context switches (%s voluntary, %s involuntary)" % [
634
+ "[OS ] context switches (%s voluntary, %s involuntary)" % [
410
635
  format_integer(sys_stats[:ctx_voluntary]),
411
636
  format_integer(sys_stats[:ctx_involuntary])])
412
637
  end
@@ -415,10 +640,14 @@ module Rperf
415
640
  w = sys_stats[:io_write_bytes]
416
641
  $stderr.puts STAT_LINE.call(
417
642
  format_integer(((r + w) / 1024.0 / 1024.0).round), "MB",
418
- "[OS] disk I/O (%s MB read, %s MB write)" % [
643
+ "[OS ] disk I/O (%s MB read, %s MB write)" % [
419
644
  format_integer((r / 1024.0 / 1024.0).round),
420
645
  format_integer((w / 1024.0 / 1024.0).round)])
421
646
  end
647
+ process_count = data[:process_count] if data
648
+ if process_count && process_count > 1
649
+ $stderr.puts STAT_LINE.call("", " ", "(GC/OS stats are from root process only; user/sys/[Rperf] lines are aggregated)")
650
+ end
422
651
  end
423
652
  private_class_method :print_stat_system_info
424
653
 
@@ -431,11 +660,24 @@ module Rperf
431
660
 
432
661
  def self.print_stat_footer(samples_raw, real_ns, data)
433
662
  triggers = data[:trigger_count] || 0
434
- overhead_pct = real_ns > 0 ? (data[:sampling_time_ns] || 0) * 100.0 / real_ns : 0.0
663
+ sampling_time_ns = data[:sampling_time_ns] || 0
664
+ # In multi-process mode, use sum of all processes' durations as denominator.
665
+ # Single-process: fall back to root's real_ns.
666
+ total_real_ns = data[:total_duration_ns] || real_ns
667
+ total_real_ns = real_ns if total_real_ns == 0
668
+ overhead_pct = total_real_ns > 0 ? sampling_time_ns * 100.0 / total_real_ns : 0.0
435
669
  $stderr.puts
436
670
  samples = data[:sampling_count] || samples_raw.size
437
671
  $stderr.puts format(" %d samples / %d triggers, %.1f%% profiler overhead",
438
672
  samples, triggers, overhead_pct)
673
+ dropped = data[:dropped_samples] || 0
674
+ if dropped > 0
675
+ $stderr.puts format(" WARNING: %d samples dropped due to memory allocation failure", dropped)
676
+ end
677
+ dropped_agg = data[:dropped_aggregation] || 0
678
+ if dropped_agg > 0
679
+ $stderr.puts format(" WARNING: %d samples dropped during aggregation (frame/stack table full)", dropped_agg)
680
+ end
439
681
  end
440
682
  private_class_method :print_stat_footer
441
683
 
@@ -448,10 +690,10 @@ module Rperf
448
690
  # Example: 5_609_200_000 → "5,609.2"
449
691
  def self.format_ms(ns)
450
692
  ms = ns / 1_000_000.0
451
- int_part = ms.truncate
452
- frac = format(".%d", ((ms - int_part).abs * 10).round % 10)
453
- int_str = int_part.to_s.reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
454
- "#{int_str}#{frac}"
693
+ formatted = format("%.1f", ms)
694
+ int_str, frac = formatted.split(".")
695
+ int_str = int_str.reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
696
+ "#{int_str}.#{frac}"
455
697
  end
456
698
  private_class_method :format_ms
457
699
 
@@ -477,6 +719,12 @@ module Rperf
477
719
  stats[:maxrss_kb] = rss if rss && rss > 0
478
720
  end
479
721
 
722
+ if File.readable?("/proc/self/stat")
723
+ fields = File.read("/proc/self/stat").split
724
+ stats[:page_faults_minor] = fields[9].to_i
725
+ stats[:page_faults_major] = fields[11].to_i
726
+ end
727
+
480
728
  if File.readable?("/proc/self/io")
481
729
  # Linux: parse /proc/self/io
482
730
  File.read("/proc/self/io").each_line do |line|
@@ -493,6 +741,292 @@ module Rperf
493
741
  end
494
742
  private_class_method :get_system_stats
495
743
 
744
+ # --- Multi-process (fork) support ---
745
+
746
+ # Set up child process tracking from Rperf.start(inherit: ...).
747
+ # Called only when NOT already inside a CLI-managed session (no RPERF_SESSION_DIR).
748
+ # Creates the session directory eagerly — if creation fails, inherit is silently
749
+ # disabled and profiling continues in single-process mode.
750
+ def self._setup_inherit(mode, frequency, signal, aggregate, output, format, stat, inherit, defer)
751
+ session_dir = _create_session_dir
752
+ return unless session_dir
753
+
754
+ ENV["RPERF_ROOT_PROCESS"] = Process.pid.to_s
755
+ ENV["RPERF_SESSION_DIR"] = session_dir
756
+ ENV["RPERF_DEFER"] = "1" if defer
757
+
758
+ _install_fork_hook
759
+
760
+ if inherit == true
761
+ # inherit: true — also track spawned Ruby children via RUBYOPT.
762
+ # Save original values so _cleanup_session_state can restore them.
763
+ env_keys = %w[RPERF_ENABLED RPERF_FREQUENCY RPERF_MODE RPERF_SIGNAL RPERF_AGGREGATE RUBYLIB RUBYOPT]
764
+ @_saved_env = env_keys.to_h { |k| [k, ENV[k]] }
765
+
766
+ ENV["RPERF_ENABLED"] = "1"
767
+ ENV["RPERF_FREQUENCY"] = frequency.to_s
768
+ ENV["RPERF_MODE"] = mode.to_s
769
+ ENV["RPERF_SIGNAL"] = signal.nil? ? nil : signal.to_s
770
+ ENV["RPERF_AGGREGATE"] = aggregate ? nil : "0"
771
+ lib_dir = File.expand_path("..", __FILE__)
772
+ ENV["RUBYLIB"] = [lib_dir, ENV["RUBYLIB"]].compact.join(File::PATH_SEPARATOR)
773
+ ENV["RUBYOPT"] = "-rrperf #{ENV['RUBYOPT']}".strip
774
+ end
775
+ end
776
+ private_class_method :_setup_inherit
777
+
778
+ # Create session directory eagerly. Returns the session dir path on success,
779
+ # nil on failure (caller should fall back to single-process mode).
780
+ # Try each candidate base in order. If user_dir looks usable but
781
+ # session_dir creation fails (quota, ACL, sandbox, etc.), fall through
782
+ # to the next base instead of giving up.
783
+ # When clean_stale: true, removes session dirs from dead processes.
784
+ def self._create_session_dir(clean_stale: false)
785
+ require "securerandom"
786
+ require "tmpdir"
787
+
788
+ bases = [ENV["RPERF_TMPDIR"], ENV["XDG_RUNTIME_DIR"], Dir.tmpdir].compact
789
+ bases.each do |base|
790
+ user_dir = File.join(base, "rperf-#{Process.uid}")
791
+
792
+ if File.directory?(user_dir)
793
+ st = File.stat(user_dir) rescue next
794
+ next unless st.owned? && (st.mode & 0777) == 0700
795
+ elsif File.writable?(base)
796
+ begin
797
+ Dir.mkdir(user_dir, 0700)
798
+ rescue Errno::EEXIST
799
+ st = File.stat(user_dir) rescue next
800
+ next unless st.owned? && (st.mode & 0777) == 0700
801
+ rescue SystemCallError
802
+ next
803
+ end
804
+ else
805
+ next
806
+ end
807
+
808
+ if clean_stale
809
+ require "fileutils"
810
+ Dir.glob(File.join(user_dir, "rperf-*")).each do |dir|
811
+ m = File.basename(dir).match(/\Arperf-(\d+)-/)
812
+ next unless m
813
+ pid = m[1].to_i
814
+ begin
815
+ Process.kill(0, pid)
816
+ rescue Errno::ESRCH
817
+ FileUtils.rm_rf(dir)
818
+ rescue Errno::EPERM
819
+ # not ours
820
+ end
821
+ end
822
+ end
823
+
824
+ session_dir = File.join(user_dir, "rperf-#{Process.pid}-#{SecureRandom.hex(4)}")
825
+ begin
826
+ Dir.mkdir(session_dir, 0700)
827
+ return session_dir
828
+ rescue SystemCallError
829
+ next
830
+ end
831
+ end
832
+ nil
833
+ end
834
+ private_class_method :_create_session_dir
835
+
836
+ def self._parse_signal_env
837
+ case ENV["RPERF_SIGNAL"]
838
+ when nil then nil
839
+ when "false" then false
840
+ when /\A\d+\z/ then ENV["RPERF_SIGNAL"].to_i
841
+ end
842
+ end
843
+ private_class_method :_parse_signal_env
844
+
845
+ def self._install_fork_hook
846
+ return if @_fork_hook_installed
847
+ @_fork_hook_installed = true
848
+
849
+ ::Process.singleton_class.prepend(Module.new {
850
+ def _fork
851
+ if !Rperf.instance_variable_get(:@_session_dir_created) &&
852
+ Process.pid.to_s == ENV["RPERF_ROOT_PROCESS"]
853
+ Rperf._on_first_fork
854
+ end
855
+ pid = super
856
+ if pid == 0
857
+ Rperf._restart_in_child
858
+ end
859
+ pid
860
+ end
861
+ })
862
+ end
863
+ private_class_method :_install_fork_hook
864
+
865
+ def self._on_first_fork
866
+ return if @_session_dir_created
867
+ session_dir = ENV["RPERF_SESSION_DIR"]
868
+ return unless session_dir && File.directory?(session_dir)
869
+
870
+ @_session_dir_created = true
871
+ # Root's @output/@format/@stat are kept as-is (user's original settings).
872
+ # stop() writes root's profile to session dir with fixed json.gz format,
873
+ # then uses the original settings for the merged output.
874
+ end
875
+
876
+ def self._restart_in_child
877
+ session_dir = ENV["RPERF_SESSION_DIR"]
878
+ return unless session_dir && File.directory?(session_dir)
879
+ return if _c_running? # should not happen, but guard against it
880
+
881
+ # C state is already cleaned up by pthread_atfork child handler.
882
+ @label_set_table = nil
883
+ @label_set_index = nil
884
+
885
+ child_output = File.join(session_dir, "profile-#{Process.pid}.json.gz")
886
+
887
+ opts = {
888
+ frequency: (ENV["RPERF_FREQUENCY"] || 1000).to_i,
889
+ mode: ENV["RPERF_MODE"] == "cpu" ? :cpu : :wall,
890
+ aggregate: ENV["RPERF_AGGREGATE"] != "0",
891
+ output: child_output,
892
+ format: :json,
893
+ stat: false,
894
+ verbose: false,
895
+ }
896
+ sig = _parse_signal_env
897
+ opts[:signal] = sig unless sig.nil?
898
+ opts[:defer] = true if ENV["RPERF_DEFER"] == "1"
899
+
900
+ start(**opts, inherit: false)
901
+ @_session_dir_output = true
902
+ label("%pid": Process.pid.to_s)
903
+
904
+ # Register at_exit so child's profile is written even without explicit stop
905
+ at_exit { Rperf.stop }
906
+ end
907
+
908
+ def self._aggregate_and_report
909
+ session_dir = ENV["RPERF_SESSION_DIR"]
910
+ return unless session_dir && File.directory?(session_dir)
911
+
912
+ merged_samples = []
913
+ merged_label_sets = [{}]
914
+ merged_label_sets_index = { {} => 0 }
915
+ total_trigger_count = 0
916
+ total_sampling_count = 0
917
+ total_sampling_time_ns = 0
918
+ max_duration_ns = 0
919
+ total_duration_ns = 0
920
+ total_user_ns = 0
921
+ total_sys_ns = 0
922
+ process_count = 0
923
+
924
+ Dir.glob(File.join(session_dir, "profile-*.json.gz")).each do |file|
925
+ begin
926
+ data = load(file)
927
+ rescue StandardError => e
928
+ $stderr.puts "rperf: warning: failed to load #{file}: #{e.message}"
929
+ next
930
+ end
931
+ next unless data
932
+ _merge_into(merged_samples, merged_label_sets, data, merged_label_sets_index)
933
+ total_trigger_count += (data[:trigger_count] || 0)
934
+ total_sampling_count += (data[:sampling_count] || 0)
935
+ total_sampling_time_ns += (data[:sampling_time_ns] || 0)
936
+ d = data[:duration_ns] || 0
937
+ max_duration_ns = d if d > max_duration_ns
938
+ total_duration_ns += d
939
+ total_user_ns += (data[:user_ns] || 0)
940
+ total_sys_ns += (data[:sys_ns] || 0)
941
+ process_count += 1
942
+ end
943
+
944
+ return if process_count == 0
945
+
946
+ merged_data = {
947
+ mode: (ENV["RPERF_MODE"] || "wall").to_sym,
948
+ frequency: (ENV["RPERF_FREQUENCY"] || 1000).to_i,
949
+ aggregated_samples: merged_samples,
950
+ label_sets: merged_label_sets,
951
+ trigger_count: total_trigger_count,
952
+ sampling_count: total_sampling_count,
953
+ sampling_time_ns: total_sampling_time_ns,
954
+ duration_ns: max_duration_ns,
955
+ total_duration_ns: total_duration_ns,
956
+ user_ns: total_user_ns,
957
+ sys_ns: total_sys_ns,
958
+ process_count: process_count,
959
+ }
960
+
961
+ print_stat(merged_data) if @stat
962
+ if @output
963
+ write_data(@output, merged_data, @format)
964
+ end
965
+
966
+ _cleanup_session_dir(session_dir)
967
+
968
+ merged_data
969
+ rescue => e
970
+ $stderr.puts "rperf: warning: failed to aggregate multi-process data: #{e.message}"
971
+ # Fallback: try to write whatever individual profiles exist as-is
972
+ _fallback_aggregate_output(session_dir)
973
+ _cleanup_session_dir(session_dir)
974
+ nil
975
+ end
976
+ # Not private — called from at_exit block which runs in top-level context
977
+
978
+ def self._cleanup_session_dir(session_dir)
979
+ require "fileutils"
980
+ FileUtils.rm_rf(session_dir)
981
+ rescue => e
982
+ $stderr.puts "rperf: warning: failed to clean up session dir: #{e.message}"
983
+ end
984
+ private_class_method :_cleanup_session_dir
985
+
986
+ # Best-effort fallback: if aggregation failed, try to copy the first
987
+ # available child profile to @output so the user gets something.
988
+ def self._fallback_aggregate_output(session_dir)
989
+ return unless @output
990
+ return unless session_dir && File.directory?(session_dir)
991
+ files = Dir.glob(File.join(session_dir, "profile-*.json.gz"))
992
+ return if files.empty?
993
+ require "fileutils"
994
+ FileUtils.cp(files.first, @output)
995
+ rescue StandardError
996
+ # nothing more we can do
997
+ end
998
+ private_class_method :_fallback_aggregate_output
999
+
1000
+ def self._merge_into(merged_samples, merged_label_sets, data, merged_label_sets_index = nil)
1001
+ # Build a reverse index on first call for O(1) dedup lookups
1002
+ unless merged_label_sets_index
1003
+ merged_label_sets_index = {}
1004
+ merged_label_sets.each_with_index { |ls, i| merged_label_sets_index[ls] = i }
1005
+ end
1006
+
1007
+ child_label_sets = data[:label_sets] || [{}]
1008
+ id_map = {}
1009
+ child_label_sets.each_with_index do |ls, child_id|
1010
+ # Normalize keys to symbols for consistent comparison
1011
+ normalized = ls.is_a?(Hash) ? ls.transform_keys(&:to_sym) : ls
1012
+ existing = merged_label_sets_index[normalized]
1013
+ if existing
1014
+ id_map[child_id] = existing
1015
+ else
1016
+ new_idx = merged_label_sets.size
1017
+ id_map[child_id] = new_idx
1018
+ merged_label_sets << normalized
1019
+ merged_label_sets_index[normalized] = new_idx
1020
+ end
1021
+ end
1022
+
1023
+ (data[:aggregated_samples] || []).each do |frames, weight, thread_seq, label_set_id|
1024
+ new_lsi = id_map[label_set_id || 0] || 0
1025
+ merged_samples << [frames, weight, thread_seq, new_lsi]
1026
+ end
1027
+ end
1028
+ private_class_method :_merge_into
1029
+
496
1030
  # ENV-based auto-start for CLI usage
497
1031
  if ENV["RPERF_ENABLED"] == "1"
498
1032
  _rperf_mode_str = ENV["RPERF_MODE"] || "cpu"
@@ -500,23 +1034,60 @@ module Rperf
500
1034
  raise ArgumentError, "RPERF_MODE must be 'cpu' or 'wall', got: #{_rperf_mode_str.inspect}"
501
1035
  end
502
1036
  _rperf_mode = _rperf_mode_str == "wall" ? :wall : :cpu
503
- _rperf_format = ENV["RPERF_FORMAT"] ? ENV["RPERF_FORMAT"].to_sym : nil
504
- _rperf_stat = ENV["RPERF_STAT"] == "1"
505
- _rperf_signal = case ENV["RPERF_SIGNAL"]
506
- when nil then nil
507
- when "false" then false
508
- else ENV["RPERF_SIGNAL"].to_i
1037
+ _rperf_format = if ENV["RPERF_FORMAT"]
1038
+ unless %w[pprof collapsed text json].include?(ENV["RPERF_FORMAT"])
1039
+ raise ArgumentError, "RPERF_FORMAT must be one of pprof, collapsed, text, json, got: #{ENV["RPERF_FORMAT"].inspect}"
1040
+ end
1041
+ ENV["RPERF_FORMAT"].to_sym
509
1042
  end
1043
+ _rperf_stat = ENV["RPERF_STAT"] == "1"
1044
+ _rperf_signal = _parse_signal_env
510
1045
  _rperf_aggregate = ENV["RPERF_AGGREGATE"] != "0"
1046
+ _rperf_original_output = _rperf_stat ? ENV["RPERF_OUTPUT"] : (ENV["RPERF_OUTPUT"] || "rperf.json.gz")
1047
+
511
1048
  _rperf_start_opts = { frequency: (ENV["RPERF_FREQUENCY"] || 1000).to_i, mode: _rperf_mode,
512
- output: _rperf_stat ? ENV["RPERF_OUTPUT"] : (ENV["RPERF_OUTPUT"] || "rperf.data"),
513
1049
  verbose: ENV["RPERF_VERBOSE"] == "1",
514
- format: _rperf_format,
515
- stat: _rperf_stat,
516
1050
  aggregate: _rperf_aggregate }
517
1051
  _rperf_start_opts[:signal] = _rperf_signal unless _rperf_signal.nil?
518
- start(**_rperf_start_opts)
519
- at_exit { stop }
1052
+ _rperf_start_opts[:defer] = true if ENV["RPERF_DEFER"] == "1"
1053
+
1054
+ if ENV["RPERF_SESSION_DIR"] && Process.pid.to_s != ENV["RPERF_ROOT_PROCESS"]
1055
+ # spawn / fork+exec child: write to session dir, no aggregation.
1056
+ # Session dir is created eagerly by the root process (CLI or API).
1057
+ # If it doesn't exist, skip profiling entirely — don't fall back to
1058
+ # normal mode which would duplicate output with the root process.
1059
+ _rperf_session_dir = ENV["RPERF_SESSION_DIR"]
1060
+ if File.directory?(_rperf_session_dir)
1061
+ _rperf_start_opts[:output] = File.join(_rperf_session_dir, "profile-#{Process.pid}.json.gz")
1062
+ _rperf_start_opts[:format] = :json
1063
+ _rperf_start_opts[:stat] = false
1064
+ _rperf_start_opts[:verbose] = false
1065
+
1066
+ _install_fork_hook
1067
+ start(**_rperf_start_opts, inherit: false)
1068
+ @_session_dir_output = true
1069
+ label("%pid": Process.pid.to_s)
1070
+ at_exit { stop }
1071
+ end
1072
+ elsif ENV["RPERF_SESSION_DIR"]
1073
+ # Root process: start with normal output settings.
1074
+ # If no fork/spawn happens, behaves exactly like single-process mode.
1075
+ _rperf_start_opts[:output] = _rperf_original_output
1076
+ _rperf_start_opts[:format] = _rperf_format
1077
+ _rperf_start_opts[:stat] = _rperf_stat
1078
+
1079
+ _install_fork_hook
1080
+ start(**_rperf_start_opts, inherit: false)
1081
+
1082
+ at_exit { Rperf.stop }
1083
+ else
1084
+ _rperf_start_opts[:output] = _rperf_original_output
1085
+ _rperf_start_opts[:format] = _rperf_format
1086
+ _rperf_start_opts[:stat] = _rperf_stat
1087
+ _rperf_start_opts[:inherit] = false # no RPERF_SESSION_DIR means --no-inherit
1088
+ start(**_rperf_start_opts)
1089
+ at_exit { stop }
1090
+ end
520
1091
  end
521
1092
 
522
1093
  # Text report encoder — human/AI readable flat + cumulative top-N table.
@@ -692,7 +1263,7 @@ module Rperf
692
1263
  intern.("frequency: #{frequency}Hz"),
693
1264
  intern.("ruby: #{RUBY_DESCRIPTION}"),
694
1265
  ]
695
- doc_url_idx = intern.("https://ko1.github.io/rperf/help.html")
1266
+ doc_url_idx = intern.("https://ko1.github.io/rperf/docs/help.html")
696
1267
 
697
1268
  # field 6: string_table (repeated string)
698
1269
  string_table.each do |s|