rperf 0.8.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/rperf.rb CHANGED
@@ -1,4 +1,6 @@
1
1
  require_relative "rperf/version"
2
+ require_relative "rperf/meta"
3
+ require_relative "rperf/table"
2
4
  require "zlib"
3
5
  require "stringio"
4
6
 
@@ -13,10 +15,21 @@ end
13
15
 
14
16
  module Rperf
15
17
 
16
- @verbose = false
17
- @output = nil
18
- @stat = false
19
- @stat_start_mono = nil
18
+ # --- Module-level state (single global profiler) ---
19
+ # Profiling session
20
+ @verbose = false # verbose stats output on stop
21
+ @output = nil # output file path (nil = no file)
22
+ @format = nil # output format (:json, :pprof, :collapsed, :text, nil = auto)
23
+ @stat = false # print user/sys/real summary to stderr
24
+ @stat_start_mono = nil # Process::CLOCK_MONOTONIC at start (for real time)
25
+ @stat_start_times = nil # Process.times at start (for user/sys time)
26
+ @label_set_table = nil # Array: label_set_id → frozen Hash
27
+ @label_set_index = nil # Hash: frozen label Hash → label_set_id
28
+ # Multi-process (fork/spawn) support
29
+ @_session_dir_output = false # true when @output points to session dir (child process)
30
+ @_session_dir_created = false # true after first fork activates session dir
31
+ @_fork_hook_installed = false # true after Process._fork hook is prepended
32
+ @_saved_env = nil # saved ENV values for restore on stop (inherit: true)
20
33
 
21
34
  # Starts profiling.
22
35
  # format: :json, :pprof, :collapsed, or :text. nil = auto-detect from output extension
@@ -24,10 +37,19 @@ module Rperf
24
37
  # .collapsed → collapsed stacks (FlameGraph / speedscope compatible)
25
38
  # .txt → text report (human/AI readable flat + cumulative table)
26
39
  # .pb.gz → pprof protobuf (gzip compressed)
27
- def self.start(frequency: 1000, mode: :cpu, output: nil, verbose: false, format: nil, stat: false, signal: nil, aggregate: true, defer: false)
40
+ # inherit: controls child process profiling.
41
+ # :fork — (default) automatically profile forked child processes via Process._fork hook.
42
+ # Session dir is created eagerly at start time. Spawned processes are NOT tracked.
43
+ # true — profile both forked and spawned Ruby child processes. Sets RUBYOPT=-rrperf
44
+ # and RPERF_* env vars so spawned Ruby processes auto-start profiling.
45
+ # Use with caution: affects ALL spawned Ruby processes, including independent
46
+ # programs that may use rperf themselves.
47
+ # false — do not track child processes (single-process mode).
48
+ def self.start(frequency: 1000, mode: :cpu, output: nil, verbose: false, format: nil, stat: false, signal: nil, aggregate: true, defer: false, inherit: :fork)
28
49
  raise ArgumentError, "frequency must be a positive integer (got #{frequency.inspect})" unless frequency.is_a?(Integer) && frequency > 0
29
50
  raise ArgumentError, "frequency must be <= 10000 (10KHz), got #{frequency}" if frequency > 10_000
30
51
  raise ArgumentError, "mode must be :cpu or :wall, got #{mode.inspect}" unless %i[cpu wall].include?(mode)
52
+ raise ArgumentError, "inherit must be :fork, true, or false, got #{inherit.inspect}" unless [true, false, :fork].include?(inherit)
31
53
  c_mode = mode == :cpu ? 0 : 1
32
54
  unless signal.nil? || signal == false || signal.is_a?(Integer)
33
55
  raise ArgumentError, "signal must be nil, false, or an Integer, got #{signal.inspect}"
@@ -45,14 +67,19 @@ module Rperf
45
67
  @output = output
46
68
  @format = format
47
69
  @stat = stat
48
- if @stat
49
- @stat_start_mono = Process.clock_gettime(Process::CLOCK_MONOTONIC)
50
- @stat_start_times = Process.times
51
- end
70
+ @stat_start_mono = Process.clock_gettime(Process::CLOCK_MONOTONIC)
71
+ @stat_start_times = Process.times
72
+ @gc_stat_start = GC.stat
73
+ @gc_stat_snapshot_base = @gc_stat_start
52
74
  @label_set_table = nil
53
75
  @label_set_index = nil
54
76
  _c_start(frequency, c_mode, aggregate, c_signal, defer)
55
77
 
78
+ # Set up child process tracking
79
+ if inherit && !ENV["RPERF_SESSION_DIR"]
80
+ _setup_inherit(mode, frequency, signal, aggregate, inherit, defer)
81
+ end
82
+
56
83
  if block_given?
57
84
  begin
58
85
  yield
@@ -63,8 +90,12 @@ module Rperf
63
90
  end
64
91
  end
65
92
 
93
+ # Defensive fallback for process times when start didn't record them
94
+ ZERO_TIMES = Struct.new(:utime, :stime).new(0.0, 0.0).freeze
95
+ private_constant :ZERO_TIMES
96
+
66
97
  # VM state integer → label value mapping.
67
- # These values appear in the "Ruby" label key.
98
+ # These values appear as "%GVL" / "%GC" label keys in label_sets.
68
99
  VM_STATE_LABELS = {
69
100
  1 => ["%GVL", "blocked"],
70
101
  2 => ["%GVL", "wait"],
@@ -73,9 +104,43 @@ module Rperf
73
104
  }.freeze
74
105
 
75
106
  def self.stop
107
+ # Check if we need to aggregate child process data.
108
+ # @_session_dir_created: fork happened and session dir is active.
109
+ # Otherwise: check for actual child profile files (spawn-only case).
110
+ session_dir = ENV["RPERF_SESSION_DIR"]
111
+ is_root = session_dir && Process.pid.to_s == ENV["RPERF_ROOT_PROCESS"]
112
+ has_child_profiles = is_root && !@_session_dir_created &&
113
+ File.directory?(session_dir.to_s) &&
114
+ !Dir.glob(File.join(session_dir.to_s, "profile-*.json.gz")).empty?
115
+ needs_aggregation = is_root && (@_session_dir_created || has_child_profiles)
116
+
76
117
  data = _c_stop
77
118
  return unless data
78
119
 
120
+ # Record process times for multi-process aggregation
121
+ times = Process.times
122
+ start_times = @stat_start_times || ZERO_TIMES
123
+ data[:user_ns] = ((times.utime - start_times.utime) * 1_000_000_000).to_i
124
+ data[:sys_ns] = ((times.stime - start_times.stime) * 1_000_000_000).to_i
125
+
126
+ # GC / memory statistics for the summary (deltas since start; GC.stat is
127
+ # cumulative over the process lifetime). maxrss is a process-lifetime
128
+ # peak — no delta is possible.
129
+ if @gc_stat_start
130
+ gc = GC.stat
131
+ data[:gc_stats] = {
132
+ count: gc[:count] - @gc_stat_start[:count],
133
+ minor_count: gc[:minor_gc_count] - @gc_stat_start[:minor_gc_count],
134
+ major_count: gc[:major_gc_count] - @gc_stat_start[:major_gc_count],
135
+ time_ms: (gc[:time] || 0) - (@gc_stat_start[:time] || 0),
136
+ allocated_objects: gc[:total_allocated_objects] - @gc_stat_start[:total_allocated_objects],
137
+ freed_objects: gc[:total_freed_objects] - @gc_stat_start[:total_freed_objects],
138
+ }
139
+ @gc_stat_start = nil
140
+ end
141
+ sys_stats = get_system_stats
142
+ data[:maxrss_mb] = (sys_stats[:maxrss_kb] / 1024.0).round if sys_stats[:maxrss_kb]
143
+
79
144
  # When aggregate: false, C extension returns :raw_samples but not
80
145
  # :aggregated_samples. Build aggregated view so encoders always work.
81
146
  if data[:raw_samples] && !data[:aggregated_samples]
@@ -93,18 +158,79 @@ module Rperf
93
158
 
94
159
  merge_vm_state_labels!(data)
95
160
 
161
+ if needs_aggregation
162
+ # Root process with children: write root's own profile to session dir
163
+ # (fixed json.gz format), then aggregate all profiles.
164
+ # Root's @output/@format/@stat are preserved for the merged result.
165
+ print_stats(data) if @verbose
166
+ begin
167
+ write_data(File.join(session_dir, "profile-#{Process.pid}.json.gz"), data, :json, internal: true)
168
+ rescue SystemCallError
169
+ # Session dir may have been removed (e.g., test scenario) — continue to aggregation
170
+ end
171
+ merged = _aggregate_and_report(data)
172
+ if merged.nil? && data
173
+ # Aggregation failed — fall back to root's own data
174
+ $stderr.puts "rperf: warning: multi-process aggregation failed; writing root process data only"
175
+ write_data(@output, data, @format) if @output
176
+ print_stat(data) if @stat
177
+ end
178
+ _cleanup_session_state
179
+ return merged || data
180
+ end
181
+
96
182
  print_stats(data) if @verbose
97
183
  print_stat(data) if @stat
98
184
 
99
185
  if @output
100
- write_data(@output, data, @format)
186
+ if @_session_dir_output
187
+ # Child process writing to session dir — tolerate missing dir
188
+ begin
189
+ write_data(@output, data, @format, internal: true)
190
+ rescue SystemCallError
191
+ # Parent may have already cleaned up the session dir (e.g., parent
192
+ # exited first and rm_rf'd it), or disk is full. Silently skip —
193
+ # crashing in at_exit is worse than losing one child's profile.
194
+ end
195
+ else
196
+ write_data(@output, data, @format)
197
+ end
101
198
  @output = nil
102
199
  @format = nil
103
200
  end
104
201
 
202
+ _cleanup_session_state
105
203
  data
106
204
  end
107
205
 
206
+ def self._cleanup_session_state
207
+ session_dir = ENV.delete("RPERF_SESSION_DIR")
208
+ ENV.delete("RPERF_ROOT_PROCESS")
209
+ ENV.delete("RPERF_DEFER")
210
+ @_session_dir_created = false
211
+ @_session_dir_output = false
212
+ # Restore ENV variables saved by _setup_inherit (inherit: true)
213
+ if @_saved_env
214
+ @_saved_env.each do |key, original|
215
+ if original.nil?
216
+ ENV.delete(key)
217
+ else
218
+ ENV[key] = original
219
+ end
220
+ end
221
+ @_saved_env = nil
222
+ end
223
+ # Remove eagerly-created session dir if it's empty (no children ran)
224
+ if session_dir && File.directory?(session_dir)
225
+ begin
226
+ Dir.rmdir(session_dir) # only succeeds if empty
227
+ rescue SystemCallError
228
+ # not empty or already removed — fine
229
+ end
230
+ end
231
+ end
232
+ private_class_method :_cleanup_session_state
233
+
108
234
  # Returns a snapshot of the current profiling data without stopping.
109
235
  # Only works in aggregate mode (the default). Returns nil if not profiling.
110
236
  # The returned data has the same format as stop's return value and can be
@@ -116,6 +242,23 @@ module Rperf
116
242
  def self.snapshot(clear: false)
117
243
  data = _c_snapshot(clear)
118
244
  return unless data
245
+ # GC/memory stats for the snapshot's summary. The baseline advances on
246
+ # clear: true so interval snapshots report per-interval deltas.
247
+ if @gc_stat_snapshot_base
248
+ gc = GC.stat
249
+ base = @gc_stat_snapshot_base
250
+ data[:gc_stats] = {
251
+ count: gc[:count] - base[:count],
252
+ minor_count: gc[:minor_gc_count] - base[:minor_gc_count],
253
+ major_count: gc[:major_gc_count] - base[:major_gc_count],
254
+ time_ms: (gc[:time] || 0) - (base[:time] || 0),
255
+ allocated_objects: gc[:total_allocated_objects] - base[:total_allocated_objects],
256
+ freed_objects: gc[:total_freed_objects] - base[:total_freed_objects],
257
+ }
258
+ @gc_stat_snapshot_base = gc if clear
259
+ end
260
+ sys_stats = get_system_stats
261
+ data[:maxrss_mb] = (sys_stats[:maxrss_kb] / 1024.0).round if sys_stats[:maxrss_kb]
119
262
  merge_vm_state_labels!(data)
120
263
  data
121
264
  end
@@ -124,24 +267,40 @@ module Rperf
124
267
  # Label sets are stored as an Array of Hashes, indexed by label_set_id.
125
268
  # Index 0 is reserved (no labels).
126
269
 
127
- @label_set_table = nil # Array of frozen Hash
128
- @label_set_index = nil # Hash → id (for dedup)
129
-
130
270
  def self._init_label_sets
131
271
  @label_set_table = [{}] # id 0 = no labels
132
272
  @label_set_index = { {} => 0 }
133
273
  end
134
274
 
135
275
  def self._intern_label_set(hash)
136
- frozen = hash.frozen? ? hash : hash.freeze
137
- @label_set_index[frozen] ||= begin
276
+ hash.freeze
277
+ @label_set_index[hash] ||= begin
138
278
  id = @label_set_table.size
139
- @label_set_table << frozen
279
+ @label_set_table << hash
140
280
  _c_set_label_sets(@label_set_table)
141
281
  id
142
282
  end
143
283
  end
144
284
 
285
+ # Merges the given keyword labels into the current thread's label set,
286
+ # sets the result on the current thread, and returns [previous_id, new_id].
287
+ # Callers use previous_id to restore labels after a block.
288
+ def self._merge_and_set_label(kw)
289
+ _init_label_sets unless @label_set_table
290
+
291
+ cur_id = _c_get_label
292
+ cur_labels = @label_set_table[cur_id] || {}
293
+ # Interned label sets must be deeply immutable, but freezing the caller's
294
+ # own objects is an observable side effect — dup mutable Strings instead
295
+ kw = kw.transform_values { |v| v.is_a?(String) && !v.frozen? ? v.dup.freeze : v.freeze }
296
+ new_labels = cur_labels.merge(kw).reject { |_, v| v.nil? }
297
+ new_id = _intern_label_set(new_labels)
298
+ _c_set_label(new_id)
299
+
300
+ [cur_id, new_id]
301
+ end
302
+ private_class_method :_merge_and_set_label
303
+
145
304
  # Sets labels on the current thread for profiling annotation.
146
305
  # With a block: restores previous labels when the block exits.
147
306
  # Without a block: sets labels persistently on the current thread.
@@ -155,14 +314,7 @@ module Rperf
155
314
  return yield if block && !_c_running?
156
315
  return unless _c_running?
157
316
 
158
- _init_label_sets unless @label_set_table
159
-
160
- cur_id = _c_get_label
161
- cur_labels = @label_set_table[cur_id] || {}
162
-
163
- new_labels = cur_labels.merge(kw).reject { |_, v| v.nil? }
164
- new_id = _intern_label_set(new_labels)
165
- _c_set_label(new_id)
317
+ cur_id, _new_id = _merge_and_set_label(kw)
166
318
 
167
319
  if block
168
320
  begin
@@ -187,13 +339,7 @@ module Rperf
187
339
  raise ArgumentError, "Rperf.profile requires a block" unless block
188
340
  raise RuntimeError, "Rperf is not started" unless _c_running?
189
341
 
190
- _init_label_sets unless @label_set_table
191
-
192
- cur_id = _c_get_label
193
- cur_labels = @label_set_table[cur_id] || {}
194
- new_labels = cur_labels.merge(kw).reject { |_, v| v.nil? }
195
- new_id = _intern_label_set(new_labels)
196
- _c_set_label(new_id)
342
+ cur_id, _new_id = _merge_and_set_label(kw)
197
343
 
198
344
  _c_profile_inc
199
345
 
@@ -205,6 +351,11 @@ module Rperf
205
351
  end
206
352
  end
207
353
 
354
+ # Returns true while a profiling session is active (between start and stop).
355
+ def self.running?
356
+ _c_running?
357
+ end
358
+
208
359
  # Returns the current thread's labels as a Hash.
209
360
  # Returns an empty Hash if no labels are set or profiling is not running.
210
361
  def self.labels
@@ -238,7 +389,7 @@ module Rperf
238
389
  unless new_id
239
390
  base = label_sets[label_set_id] || {}
240
391
  key, value = VM_STATE_LABELS[vm_state]
241
- new_ls = base.merge(key => value).freeze
392
+ new_ls = base.merge(key.to_sym => value).freeze
242
393
  new_id = label_sets.size
243
394
  label_sets << new_ls
244
395
  mapping[cache_key] = new_id
@@ -257,7 +408,8 @@ module Rperf
257
408
 
258
409
  # Saves profiling data to a file.
259
410
  # format: :json, :pprof, :collapsed, or :text. nil = auto-detect from path extension
260
- # .json.gz → json (rperf native, default)
411
+ # .json.gz → json (rperf native, gzip compressed, default)
412
+ # .json → json (plain text, readable by jq etc.)
261
413
  # .collapsed → collapsed stacks (FlameGraph / speedscope compatible)
262
414
  # .txt → text report (human/AI readable flat + cumulative table)
263
415
  # .pb.gz → pprof protobuf (gzip compressed)
@@ -265,30 +417,85 @@ module Rperf
265
417
  write_data(path, data, format)
266
418
  end
267
419
 
268
- def self.write_data(path, data, format)
420
+ # internal: true skips meta/summary generation — used for per-process
421
+ # intermediate files in the multi-process session dir (meta is attached
422
+ # once, on the root's final output).
423
+ def self.write_data(path, data, format, internal: false)
269
424
  fmt = detect_format(path, format)
270
425
  case fmt
271
426
  when :collapsed
272
- File.write(path, Collapsed.encode(data))
427
+ atomic_write(path, Collapsed.encode(data))
273
428
  when :text
274
- File.write(path, Text.encode(data))
429
+ atomic_write(path, Text.encode(data))
275
430
  when :json
276
431
  require "json"
277
- File.binwrite(path, gzip(JSON.generate(data.merge(rperf_version: VERSION))))
432
+ json_data = data
433
+ unless internal
434
+ # meta/summary must be the FIRST keys so Meta.read can extract them
435
+ # from the head of the (gzipped) file without loading the body —
436
+ # reorder even when re-saving data that already carries them.
437
+ meta = data[:meta] || Meta.build_meta(data)
438
+ summary = data[:summary] || Meta.build_summary(data)
439
+ json_data = { meta: meta, summary: summary }.merge(data.except(:meta, :summary))
440
+ end
441
+ json_data = json_data.merge(rperf_version: VERSION, pid: Process.pid, ppid: Process.ppid)
442
+ json_str = JSON.generate(json_data)
443
+ if path.to_s.end_with?(".gz")
444
+ atomic_write(path, gzip(json_str), binary: true)
445
+ else
446
+ atomic_write(path, json_str)
447
+ end
278
448
  else
279
- File.binwrite(path, gzip(PProf.encode(data)))
449
+ atomic_write(path, gzip(PProf.encode(data)), binary: true)
280
450
  end
281
451
  end
282
452
  private_class_method :write_data
283
453
 
284
- # Load a profile saved by rperf record (.json.gz).
454
+ # Write via tmp file + rename so a crash mid-write never leaves a truncated
455
+ # file at the final path (the multi-process aggregator globs the session dir
456
+ # and would otherwise load — and then discard — a partial child profile).
457
+ def self.atomic_write(path, content, binary: false)
458
+ # rename cannot replace special files (/dev/null → EBUSY) and would
459
+ # replace a symlink instead of writing through it — write those directly
460
+ st = begin
461
+ File.lstat(path)
462
+ rescue SystemCallError
463
+ nil
464
+ end
465
+ if st && !st.file?
466
+ binary ? File.binwrite(path, content) : File.write(path, content)
467
+ return
468
+ end
469
+
470
+ tmp = "#{path}.tmp-#{Process.pid}"
471
+ binary ? File.binwrite(tmp, content) : File.write(tmp, content)
472
+ File.rename(tmp, path)
473
+ rescue Exception
474
+ begin
475
+ File.unlink(tmp)
476
+ rescue SystemCallError
477
+ # tmp was never created or already renamed
478
+ end
479
+ raise
480
+ end
481
+ private_class_method :atomic_write
482
+
483
+ # Load a profile saved by rperf record (.json.gz or .json).
285
484
  # Returns the data hash (same format as Rperf.stop / Rperf.snapshot).
286
485
  # Warns to stderr if the file was saved by a different rperf version.
287
486
  def self.load(path)
288
- compressed = File.binread(path)
289
- raw = Zlib::GzipReader.new(StringIO.new(compressed)).read
487
+ raw_bytes = File.binread(path)
488
+ # Auto-detect gzip by magic bytes (1f 8b)
489
+ raw = if raw_bytes.byteslice(0, 2) == "\x1f\x8b".b
490
+ Zlib::GzipReader.new(StringIO.new(raw_bytes)).read
491
+ else
492
+ raw_bytes
493
+ end
290
494
  require "json"
291
495
  data = JSON.parse(raw, symbolize_names: true)
496
+ # symbolize_names only converts keys — :mode round-trips as a String
497
+ # ("wall"), which encoders compare against :wall/:cpu symbols
498
+ data[:mode] = data[:mode].to_sym if data[:mode].is_a?(String)
292
499
  saved_version = data.delete(:rperf_version)
293
500
  if saved_version && saved_version != VERSION
294
501
  $stderr.puts "rperf: warning: file was saved by rperf #{saved_version} (current: #{VERSION})"
@@ -298,6 +505,14 @@ module Rperf
298
505
  data
299
506
  end
300
507
 
508
+ # Read only the meta/summary head of a profile saved by rperf record
509
+ # (.json.gz or .json) without loading the sample body.
510
+ # Returns { meta: Hash|nil, summary: Hash|nil }, or nil for files saved
511
+ # by older rperf versions (no leading meta) or unreadable files.
512
+ def self.read_meta(path)
513
+ Meta.read(path)
514
+ end
515
+
301
516
  def self.detect_format(path, format)
302
517
  return format.to_sym if format
303
518
  case path.to_s
@@ -401,10 +616,17 @@ module Rperf
401
616
  samples_raw = data[:aggregated_samples] || []
402
617
  real_ns = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - @stat_start_mono) * 1_000_000_000).to_i
403
618
  times = Process.times
404
- start_times = @stat_start_times || Struct.new(:utime, :stime).new(0.0, 0.0)
619
+ start_times = @stat_start_times || ZERO_TIMES
405
620
  user_ns = ((times.utime - start_times.utime) * 1_000_000_000).to_i
406
621
  sys_ns = ((times.stime - start_times.stime) * 1_000_000_000).to_i
407
622
 
623
+ # In multi-process mode, use aggregated user/sys from all processes
624
+ process_count = data[:process_count] || 0
625
+ if process_count > 1 && data[:user_ns]
626
+ user_ns = data[:user_ns]
627
+ sys_ns = data[:sys_ns] || 0
628
+ end
629
+
408
630
  command = ENV["RPERF_STAT_COMMAND"] || "(unknown)"
409
631
 
410
632
  $stderr.puts
@@ -416,9 +638,9 @@ module Rperf
416
638
 
417
639
  if samples_raw.size > 0
418
640
  breakdown, total_weight = compute_stat_breakdown(samples_raw, data[:label_sets])
419
- print_stat_breakdown(breakdown, total_weight)
641
+ print_stat_breakdown(breakdown, total_weight, data)
420
642
  print_stat_runtime_info(data)
421
- print_stat_system_info
643
+ print_stat_system_info(data)
422
644
  print_stat_report(data) if ENV["RPERF_STAT_REPORT"] == "1"
423
645
  print_stat_footer(samples_raw, real_ns, data)
424
646
  end
@@ -436,8 +658,8 @@ module Rperf
436
658
  if label_sets && label_set_id && label_set_id > 0
437
659
  ls = label_sets[label_set_id]
438
660
  if ls
439
- gvl = ls["%GVL"]
440
- gc = ls["%GC"]
661
+ gvl = ls[:"%GVL"]
662
+ gc = ls[:"%GC"]
441
663
  if gvl == "blocked" then category = :gvl_blocked
442
664
  elsif gvl == "wait" then category = :gvl_wait
443
665
  elsif gc == "mark" then category = :gc_marking
@@ -452,8 +674,12 @@ module Rperf
452
674
  end
453
675
  private_class_method :compute_stat_breakdown
454
676
 
455
- def self.print_stat_breakdown(breakdown, total_weight)
677
+ def self.print_stat_breakdown(breakdown, total_weight, data)
456
678
  $stderr.puts
679
+ process_count = data[:process_count] || 0
680
+ if process_count > 1
681
+ $stderr.puts STAT_LINE.call(format_integer(process_count), " ", "[Rperf] Ruby processes profiled")
682
+ end
457
683
 
458
684
  [
459
685
  [:cpu_execution, "[Rperf] CPU execution"],
@@ -490,7 +716,7 @@ module Rperf
490
716
  end
491
717
  private_class_method :print_stat_runtime_info
492
718
 
493
- def self.print_stat_system_info
719
+ def self.print_stat_system_info(data = nil)
494
720
  sys_stats = get_system_stats
495
721
  maxrss_kb = sys_stats[:maxrss_kb]
496
722
  if maxrss_kb
@@ -520,6 +746,10 @@ module Rperf
520
746
  format_integer((r / 1024.0 / 1024.0).round),
521
747
  format_integer((w / 1024.0 / 1024.0).round)])
522
748
  end
749
+ process_count = data[:process_count] if data
750
+ if process_count && process_count > 1
751
+ $stderr.puts STAT_LINE.call("", " ", "(GC/OS stats are from root process only; user/sys/[Rperf] lines are aggregated)")
752
+ end
523
753
  end
524
754
  private_class_method :print_stat_system_info
525
755
 
@@ -532,7 +762,12 @@ module Rperf
532
762
 
533
763
  def self.print_stat_footer(samples_raw, real_ns, data)
534
764
  triggers = data[:trigger_count] || 0
535
- overhead_pct = real_ns > 0 ? (data[:sampling_time_ns] || 0) * 100.0 / real_ns : 0.0
765
+ sampling_time_ns = data[:sampling_time_ns] || 0
766
+ # In multi-process mode, use sum of all processes' durations as denominator.
767
+ # Single-process: fall back to root's real_ns.
768
+ total_real_ns = data[:total_duration_ns] || real_ns
769
+ total_real_ns = real_ns if total_real_ns == 0
770
+ overhead_pct = total_real_ns > 0 ? sampling_time_ns * 100.0 / total_real_ns : 0.0
536
771
  $stderr.puts
537
772
  samples = data[:sampling_count] || samples_raw.size
538
773
  $stderr.puts format(" %d samples / %d triggers, %.1f%% profiler overhead",
@@ -541,6 +776,10 @@ module Rperf
541
776
  if dropped > 0
542
777
  $stderr.puts format(" WARNING: %d samples dropped due to memory allocation failure", dropped)
543
778
  end
779
+ dropped_agg = data[:dropped_aggregation] || 0
780
+ if dropped_agg > 0
781
+ $stderr.puts format(" WARNING: %d samples dropped during aggregation (frame/stack table full)", dropped_agg)
782
+ end
544
783
  end
545
784
  private_class_method :print_stat_footer
546
785
 
@@ -553,10 +792,10 @@ module Rperf
553
792
  # Example: 5_609_200_000 → "5,609.2"
554
793
  def self.format_ms(ns)
555
794
  ms = ns / 1_000_000.0
556
- int_part = ms.truncate
557
- frac = format(".%d", ((ms - int_part).abs * 10).round % 10)
558
- int_str = int_part.to_s.reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
559
- "#{int_str}#{frac}"
795
+ formatted = format("%.1f", ms)
796
+ int_str, frac = formatted.split(".")
797
+ int_str = int_str.reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
798
+ "#{int_str}.#{frac}"
560
799
  end
561
800
  private_class_method :format_ms
562
801
 
@@ -583,9 +822,11 @@ module Rperf
583
822
  end
584
823
 
585
824
  if File.readable?("/proc/self/stat")
586
- fields = File.read("/proc/self/stat").split
587
- stats[:page_faults_minor] = fields[9].to_i
588
- stats[:page_faults_major] = fields[11].to_i
825
+ # comm (field 2) is parenthesized and may contain spaces — split only
826
+ # the part after the closing paren (fields from state, field 3, onward)
827
+ fields = File.read("/proc/self/stat").rpartition(")").last.split
828
+ stats[:page_faults_minor] = fields[7].to_i # minflt (field 10)
829
+ stats[:page_faults_major] = fields[9].to_i # majflt (field 12)
589
830
  end
590
831
 
591
832
  if File.readable?("/proc/self/io")
@@ -604,6 +845,306 @@ module Rperf
604
845
  end
605
846
  private_class_method :get_system_stats
606
847
 
848
+ # --- Multi-process (fork) support ---
849
+
850
+ # Set up child process tracking from Rperf.start(inherit: ...).
851
+ # Called only when NOT already inside a CLI-managed session (no RPERF_SESSION_DIR).
852
+ # Creates the session directory eagerly — if creation fails, inherit is silently
853
+ # disabled and profiling continues in single-process mode.
854
+ def self._setup_inherit(mode, frequency, signal, aggregate, inherit, defer)
855
+ session_dir = _create_session_dir(clean_stale: true)
856
+ return unless session_dir
857
+
858
+ ENV["RPERF_ROOT_PROCESS"] = Process.pid.to_s
859
+ ENV["RPERF_SESSION_DIR"] = session_dir
860
+ ENV["RPERF_DEFER"] = "1" if defer
861
+
862
+ # Remember the start options for forked children (_restart_in_child).
863
+ # Fork preserves module state, so this works for inherit: :fork too,
864
+ # where the RPERF_* env vars below are NOT exported.
865
+ @_child_start_opts = { mode: mode, frequency: frequency, signal: signal,
866
+ aggregate: aggregate, defer: defer }
867
+
868
+ _install_fork_hook
869
+
870
+ if inherit == true
871
+ # inherit: true — also track spawned Ruby children via RUBYOPT.
872
+ # Save original values so _cleanup_session_state can restore them.
873
+ env_keys = %w[RPERF_ENABLED RPERF_FREQUENCY RPERF_MODE RPERF_SIGNAL RPERF_AGGREGATE RUBYLIB RUBYOPT]
874
+ @_saved_env = env_keys.to_h { |k| [k, ENV[k]] }
875
+
876
+ ENV["RPERF_ENABLED"] = "1"
877
+ ENV["RPERF_FREQUENCY"] = frequency.to_s
878
+ ENV["RPERF_MODE"] = mode.to_s
879
+ ENV["RPERF_SIGNAL"] = signal.nil? ? nil : signal.to_s
880
+ ENV["RPERF_AGGREGATE"] = aggregate ? nil : "0"
881
+ lib_dir = File.expand_path("..", __FILE__)
882
+ ENV["RUBYLIB"] = [lib_dir, ENV["RUBYLIB"]].compact.join(File::PATH_SEPARATOR)
883
+ ENV["RUBYOPT"] = "-rrperf #{ENV['RUBYOPT']}".strip
884
+ end
885
+ end
886
+ private_class_method :_setup_inherit
887
+
888
+ # Create session directory eagerly. Returns the session dir path on success,
889
+ # nil on failure (caller should fall back to single-process mode).
890
+ # Try each candidate base in order. If user_dir looks usable but
891
+ # session_dir creation fails (quota, ACL, sandbox, etc.), fall through
892
+ # to the next base instead of giving up.
893
+ # When clean_stale: true, removes session dirs from dead processes.
894
+ def self._create_session_dir(clean_stale: false)
895
+ require "securerandom"
896
+ require "tmpdir"
897
+
898
+ bases = [ENV["RPERF_TMPDIR"], ENV["XDG_RUNTIME_DIR"], Dir.tmpdir].compact
899
+ bases.each do |base|
900
+ user_dir = File.join(base, "rperf-#{Process.uid}")
901
+
902
+ if File.directory?(user_dir)
903
+ st = File.stat(user_dir) rescue next
904
+ next unless st.owned? && (st.mode & 0777) == 0700
905
+ elsif File.writable?(base)
906
+ begin
907
+ Dir.mkdir(user_dir, 0700)
908
+ rescue Errno::EEXIST
909
+ st = File.stat(user_dir) rescue next
910
+ next unless st.owned? && (st.mode & 0777) == 0700
911
+ rescue SystemCallError
912
+ next
913
+ end
914
+ else
915
+ next
916
+ end
917
+
918
+ if clean_stale
919
+ require "fileutils"
920
+ Dir.glob(File.join(user_dir, "rperf-*")).each do |dir|
921
+ m = File.basename(dir).match(/\Arperf-(\d+)-/)
922
+ next unless m
923
+ pid = m[1].to_i
924
+ begin
925
+ Process.kill(0, pid)
926
+ rescue Errno::ESRCH
927
+ FileUtils.rm_rf(dir)
928
+ rescue Errno::EPERM
929
+ # not ours
930
+ end
931
+ end
932
+ end
933
+
934
+ session_dir = File.join(user_dir, "rperf-#{Process.pid}-#{SecureRandom.hex(4)}")
935
+ begin
936
+ Dir.mkdir(session_dir, 0700)
937
+ return session_dir
938
+ rescue SystemCallError
939
+ next
940
+ end
941
+ end
942
+ nil
943
+ end
944
+ private_class_method :_create_session_dir
945
+
946
+ def self._parse_signal_env
947
+ case ENV["RPERF_SIGNAL"]
948
+ when nil then nil
949
+ when "false" then false
950
+ when /\A\d+\z/ then ENV["RPERF_SIGNAL"].to_i
951
+ end
952
+ end
953
+ private_class_method :_parse_signal_env
954
+
955
+ def self._install_fork_hook
956
+ return if @_fork_hook_installed
957
+ @_fork_hook_installed = true
958
+
959
+ ::Process.singleton_class.prepend(Module.new {
960
+ def _fork
961
+ if !Rperf.instance_variable_get(:@_session_dir_created) &&
962
+ Process.pid.to_s == ENV["RPERF_ROOT_PROCESS"]
963
+ Rperf._on_first_fork
964
+ end
965
+ pid = super
966
+ if pid == 0
967
+ Rperf._restart_in_child
968
+ end
969
+ pid
970
+ end
971
+ })
972
+ end
973
+ private_class_method :_install_fork_hook
974
+
975
+ def self._on_first_fork
976
+ return if @_session_dir_created
977
+ session_dir = ENV["RPERF_SESSION_DIR"]
978
+ return unless session_dir && File.directory?(session_dir)
979
+
980
+ @_session_dir_created = true
981
+ # Root's @output/@format/@stat are kept as-is (user's original settings).
982
+ # stop() writes root's profile to session dir with fixed json.gz format,
983
+ # then uses the original settings for the merged output.
984
+ end
985
+
986
+ def self._restart_in_child
987
+ session_dir = ENV["RPERF_SESSION_DIR"]
988
+ return unless session_dir && File.directory?(session_dir)
989
+ return if _c_running? # should not happen, but guard against it
990
+
991
+ # C state is already cleaned up by pthread_atfork child handler.
992
+ @label_set_table = nil
993
+ @label_set_index = nil
994
+
995
+ require "securerandom"
996
+ # Random suffix: PIDs can be recycled within a long-lived session, and a
997
+ # plain profile-<pid> name would silently overwrite an earlier child's data
998
+ child_output = File.join(session_dir, "profile-#{Process.pid}-#{SecureRandom.hex(4)}.json.gz")
999
+
1000
+ # Start options: prefer the values remembered by _setup_inherit (API
1001
+ # inherit: :fork / true — fork preserves module state); fall back to the
1002
+ # RPERF_* env vars (CLI-managed sessions always export them).
1003
+ saved = @_child_start_opts
1004
+ opts = {
1005
+ frequency: saved ? saved[:frequency] : (ENV["RPERF_FREQUENCY"] || 1000).to_i,
1006
+ mode: saved ? saved[:mode] : (ENV["RPERF_MODE"] == "cpu" ? :cpu : :wall),
1007
+ aggregate: saved ? saved[:aggregate] : ENV["RPERF_AGGREGATE"] != "0",
1008
+ output: child_output,
1009
+ format: :json,
1010
+ stat: false,
1011
+ verbose: false,
1012
+ }
1013
+ sig = saved ? saved[:signal] : _parse_signal_env
1014
+ opts[:signal] = sig unless sig.nil?
1015
+ opts[:defer] = true if saved ? saved[:defer] : ENV["RPERF_DEFER"] == "1"
1016
+
1017
+ start(**opts, inherit: false)
1018
+ @_session_dir_output = true
1019
+ label("%pid": Process.pid.to_s)
1020
+
1021
+ # Register at_exit so child's profile is written even without explicit stop
1022
+ at_exit { Rperf.stop }
1023
+ end
1024
+
1025
+ # root_data: the root process's own profile data — GC/OS stats in the
1026
+ # merged summary come from the root only (same policy as `rperf stat`).
1027
+ def self._aggregate_and_report(root_data = nil)
1028
+ session_dir = ENV["RPERF_SESSION_DIR"]
1029
+ return unless session_dir && File.directory?(session_dir)
1030
+
1031
+ merged_samples = []
1032
+ merged_label_sets = [{}]
1033
+ merged_label_sets_index = { {} => 0 }
1034
+ total_trigger_count = 0
1035
+ total_sampling_count = 0
1036
+ total_sampling_time_ns = 0
1037
+ max_duration_ns = 0
1038
+ total_duration_ns = 0
1039
+ total_user_ns = 0
1040
+ total_sys_ns = 0
1041
+ process_count = 0
1042
+
1043
+ Dir.glob(File.join(session_dir, "profile-*.json.gz")).each do |file|
1044
+ begin
1045
+ data = load(file)
1046
+ rescue StandardError => e
1047
+ $stderr.puts "rperf: warning: failed to load #{file}: #{e.message}"
1048
+ next
1049
+ end
1050
+ next unless data
1051
+ _merge_into(merged_samples, merged_label_sets, data, merged_label_sets_index)
1052
+ total_trigger_count += (data[:trigger_count] || 0)
1053
+ total_sampling_count += (data[:sampling_count] || 0)
1054
+ total_sampling_time_ns += (data[:sampling_time_ns] || 0)
1055
+ d = data[:duration_ns] || 0
1056
+ max_duration_ns = d if d > max_duration_ns
1057
+ total_duration_ns += d
1058
+ total_user_ns += (data[:user_ns] || 0)
1059
+ total_sys_ns += (data[:sys_ns] || 0)
1060
+ process_count += 1
1061
+ end
1062
+
1063
+ if process_count == 0
1064
+ # Nothing loadable — remove the session dir here, or stop's empty-dir
1065
+ # rmdir would fail on the leftover corrupt files and leak the dir
1066
+ _cleanup_session_dir(session_dir)
1067
+ return
1068
+ end
1069
+
1070
+ # mode/frequency: the root's own profile is authoritative; the env vars
1071
+ # are only set by the CLI or inherit: true (and default to the root's
1072
+ # actual settings via _setup_inherit for the API case)
1073
+ saved = @_child_start_opts
1074
+ merged_data = {
1075
+ mode: (root_data && root_data[:mode]) || (saved ? saved[:mode] : (ENV["RPERF_MODE"] || "wall").to_sym),
1076
+ frequency: (root_data && root_data[:frequency]) || (saved ? saved[:frequency] : (ENV["RPERF_FREQUENCY"] || 1000).to_i),
1077
+ aggregated_samples: merged_samples,
1078
+ label_sets: merged_label_sets,
1079
+ trigger_count: total_trigger_count,
1080
+ sampling_count: total_sampling_count,
1081
+ sampling_time_ns: total_sampling_time_ns,
1082
+ duration_ns: max_duration_ns,
1083
+ total_duration_ns: total_duration_ns,
1084
+ user_ns: total_user_ns,
1085
+ sys_ns: total_sys_ns,
1086
+ process_count: process_count,
1087
+ }
1088
+
1089
+ if root_data
1090
+ merged_data[:gc_stats] = root_data[:gc_stats] if root_data[:gc_stats]
1091
+ merged_data[:maxrss_mb] = root_data[:maxrss_mb] if root_data[:maxrss_mb]
1092
+ end
1093
+
1094
+ print_stat(merged_data) if @stat
1095
+ if @output
1096
+ write_data(@output, merged_data, @format)
1097
+ end
1098
+
1099
+ _cleanup_session_dir(session_dir)
1100
+
1101
+ merged_data
1102
+ rescue => e
1103
+ $stderr.puts "rperf: warning: failed to aggregate multi-process data: #{e.message}"
1104
+ # stop() falls back to writing the root's own data when this returns nil
1105
+ _cleanup_session_dir(session_dir)
1106
+ nil
1107
+ end
1108
+ # Not private — called from at_exit block which runs in top-level context
1109
+
1110
+ def self._cleanup_session_dir(session_dir)
1111
+ require "fileutils"
1112
+ FileUtils.rm_rf(session_dir)
1113
+ rescue => e
1114
+ $stderr.puts "rperf: warning: failed to clean up session dir: #{e.message}"
1115
+ end
1116
+ private_class_method :_cleanup_session_dir
1117
+
1118
+ def self._merge_into(merged_samples, merged_label_sets, data, merged_label_sets_index = nil)
1119
+ # Build a reverse index when the caller doesn't maintain one across calls
1120
+ unless merged_label_sets_index
1121
+ merged_label_sets_index = {}
1122
+ merged_label_sets.each_with_index { |ls, i| merged_label_sets_index[ls] = i }
1123
+ end
1124
+
1125
+ child_label_sets = data[:label_sets] || [{}]
1126
+ id_map = {}
1127
+ child_label_sets.each_with_index do |ls, child_id|
1128
+ # Normalize keys to symbols for consistent comparison
1129
+ normalized = ls.is_a?(Hash) ? ls.transform_keys(&:to_sym) : ls
1130
+ existing = merged_label_sets_index[normalized]
1131
+ if existing
1132
+ id_map[child_id] = existing
1133
+ else
1134
+ new_idx = merged_label_sets.size
1135
+ id_map[child_id] = new_idx
1136
+ merged_label_sets << normalized
1137
+ merged_label_sets_index[normalized] = new_idx
1138
+ end
1139
+ end
1140
+
1141
+ (data[:aggregated_samples] || []).each do |frames, weight, thread_seq, label_set_id|
1142
+ new_lsi = id_map[label_set_id || 0] || 0
1143
+ merged_samples << [frames, weight, thread_seq, new_lsi]
1144
+ end
1145
+ end
1146
+ private_class_method :_merge_into
1147
+
607
1148
  # ENV-based auto-start for CLI usage
608
1149
  if ENV["RPERF_ENABLED"] == "1"
609
1150
  _rperf_mode_str = ENV["RPERF_MODE"] || "cpu"
@@ -618,22 +1159,70 @@ module Rperf
618
1159
  ENV["RPERF_FORMAT"].to_sym
619
1160
  end
620
1161
  _rperf_stat = ENV["RPERF_STAT"] == "1"
621
- _rperf_signal = case ENV["RPERF_SIGNAL"]
622
- when nil then nil
623
- when "false" then false
624
- when /\A\d+\z/ then ENV["RPERF_SIGNAL"].to_i
625
- else raise ArgumentError, "RPERF_SIGNAL must be a signal number or 'false', got: #{ENV["RPERF_SIGNAL"].inspect}"
626
- end
1162
+ _rperf_signal = _parse_signal_env
627
1163
  _rperf_aggregate = ENV["RPERF_AGGREGATE"] != "0"
1164
+ _rperf_original_output = _rperf_stat ? ENV["RPERF_OUTPUT"] : (ENV["RPERF_OUTPUT"] || "rperf.json.gz")
1165
+
628
1166
  _rperf_start_opts = { frequency: (ENV["RPERF_FREQUENCY"] || 1000).to_i, mode: _rperf_mode,
629
- output: _rperf_stat ? ENV["RPERF_OUTPUT"] : (ENV["RPERF_OUTPUT"] || "rperf.json.gz"),
630
1167
  verbose: ENV["RPERF_VERBOSE"] == "1",
631
- format: _rperf_format,
632
- stat: _rperf_stat,
633
1168
  aggregate: _rperf_aggregate }
634
1169
  _rperf_start_opts[:signal] = _rperf_signal unless _rperf_signal.nil?
635
- start(**_rperf_start_opts)
636
- at_exit { stop }
1170
+ _rperf_start_opts[:defer] = true if ENV["RPERF_DEFER"] == "1"
1171
+
1172
+ if ENV["RPERF_SESSION_DIR"] && Process.pid.to_s != ENV["RPERF_ROOT_PROCESS"]
1173
+ # spawn / fork+exec child: write to session dir, no aggregation.
1174
+ # Session dir is created eagerly by the root process (CLI or API).
1175
+ # If it doesn't exist, skip profiling entirely — don't fall back to
1176
+ # normal mode which would duplicate output with the root process.
1177
+ _rperf_session_dir = ENV["RPERF_SESSION_DIR"]
1178
+ if File.directory?(_rperf_session_dir)
1179
+ require "securerandom"
1180
+ # Random suffix: PID reuse must not overwrite an earlier child's profile
1181
+ _rperf_start_opts[:output] = File.join(_rperf_session_dir, "profile-#{Process.pid}-#{SecureRandom.hex(4)}.json.gz")
1182
+ _rperf_start_opts[:format] = :json
1183
+ _rperf_start_opts[:stat] = false
1184
+ _rperf_start_opts[:verbose] = false
1185
+
1186
+ _install_fork_hook
1187
+ start(**_rperf_start_opts, inherit: false)
1188
+ @_session_dir_output = true
1189
+ label("%pid": Process.pid.to_s)
1190
+ at_exit { stop }
1191
+ end
1192
+ elsif ENV["RPERF_SESSION_DIR"]
1193
+ # Root process: start with normal output settings.
1194
+ # If no fork/spawn happens, behaves exactly like single-process mode.
1195
+ _rperf_start_opts[:output] = _rperf_original_output
1196
+ _rperf_start_opts[:format] = _rperf_format
1197
+ _rperf_start_opts[:stat] = _rperf_stat
1198
+
1199
+ _install_fork_hook
1200
+ start(**_rperf_start_opts, inherit: false)
1201
+
1202
+ at_exit { Rperf.stop }
1203
+ else
1204
+ _rperf_start_opts[:output] = _rperf_original_output
1205
+ _rperf_start_opts[:format] = _rperf_format
1206
+ _rperf_start_opts[:stat] = _rperf_stat
1207
+ _rperf_start_opts[:inherit] = false # no RPERF_SESSION_DIR means --no-inherit
1208
+ start(**_rperf_start_opts)
1209
+ # --no-inherit: scrub the env the CLI injected for THIS process, so
1210
+ # Ruby descendants spawned by the app don't auto-start their own
1211
+ # sessions (and clobber RPERF_OUTPUT with their own profiles)
1212
+ %w[RPERF_ENABLED RPERF_OUTPUT RPERF_STAT RPERF_FORMAT RPERF_VERBOSE
1213
+ RPERF_FREQUENCY RPERF_MODE RPERF_SIGNAL RPERF_AGGREGATE
1214
+ RPERF_DEFER].each { |k| ENV.delete(k) }
1215
+ if ENV["RUBYOPT"]
1216
+ rubyopt = ENV["RUBYOPT"].split(" ").reject { |o| o == "-rrperf" }.join(" ")
1217
+ rubyopt.empty? ? ENV.delete("RUBYOPT") : ENV["RUBYOPT"] = rubyopt
1218
+ end
1219
+ if ENV["RUBYLIB"]
1220
+ _rperf_lib_dir = File.expand_path("..", __FILE__)
1221
+ rubylib = ENV["RUBYLIB"].split(File::PATH_SEPARATOR).reject { |p| p == _rperf_lib_dir }.join(File::PATH_SEPARATOR)
1222
+ rubylib.empty? ? ENV.delete("RUBYLIB") : ENV["RUBYLIB"] = rubylib
1223
+ end
1224
+ at_exit { stop }
1225
+ end
637
1226
  end
638
1227
 
639
1228
  # Text report encoder — human/AI readable flat + cumulative top-N table.
@@ -687,7 +1276,10 @@ module Rperf
687
1276
  return "" if !samples || samples.empty?
688
1277
  merged = Hash.new(0)
689
1278
  samples.each do |frames, weight|
690
- key = frames.reverse.map { |_, label| label }.join(";")
1279
+ # ";" is the frame separator and has no escape in the collapsed
1280
+ # format — replace it so a pathological method name cannot corrupt
1281
+ # stack splitting downstream (FlameGraph/speedscope)
1282
+ key = frames.reverse.map { |_, label| label.include?(";") ? label.tr(";", ",") : label }.join(";")
691
1283
  merged[key] += weight
692
1284
  end
693
1285
  merged.map { |stack, weight| "#{stack} #{weight}" }.join("\n") + "\n"
@@ -703,10 +1295,10 @@ module Rperf
703
1295
  module_function
704
1296
 
705
1297
  def encode(data)
706
- samples_raw = data[:aggregated_samples]
1298
+ samples_raw = data[:aggregated_samples] || []
707
1299
  frequency = data[:frequency]
708
- interval_ns = 1_000_000_000 / frequency
709
- mode = data[:mode] || :cpu
1300
+ interval_ns = (frequency && frequency > 0) ? 1_000_000_000 / frequency : 0
1301
+ mode = (data[:mode] || :cpu).to_sym
710
1302
 
711
1303
  # Build string table: index 0 must be ""
712
1304
  string_table = [""]
@@ -722,7 +1314,7 @@ module Rperf
722
1314
 
723
1315
  # Convert string frames to index frames and merge identical stacks per thread/label
724
1316
  merged = Hash.new(0)
725
- thread_seq_key = intern.("thread_seq")
1317
+ thread_seq_key = nil # interned lazily — only when a sample carries thread_seq
726
1318
  label_sets = data[:label_sets] # Array of Hash (may be nil)
727
1319
  samples_raw.each do |frames, weight, thread_seq, label_set_id|
728
1320
  key = [frames.map { |path, label| [intern.(path), intern.(label)] }, thread_seq || 0, label_set_id || 0]
@@ -742,8 +1334,8 @@ module Rperf
742
1334
  end
743
1335
  end
744
1336
 
745
- # Build location/function tables
746
- locations, functions = build_tables(merged.map { |(frames, _, _), w| [frames, w] })
1337
+ # Build the frame → id table (locations and functions are 1:1)
1338
+ frame_ids = build_tables(merged)
747
1339
 
748
1340
  # Intern type label and unit
749
1341
  type_label = mode == :wall ? "wall" : "cpu"
@@ -759,11 +1351,12 @@ module Rperf
759
1351
  # field 2: sample (repeated Sample) with thread_seq + user labels
760
1352
  merged.each do |(frames, thread_seq, label_set_id), weight|
761
1353
  sample_buf = "".b
762
- loc_ids = frames.map { |f| locations[f] }
1354
+ loc_ids = frames.map { |f| frame_ids[f] }
763
1355
  sample_buf << encode_packed_uint64(1, loc_ids)
764
1356
  sample_buf << encode_packed_int64(2, [weight])
765
1357
  if thread_seq && thread_seq > 0
766
1358
  label_buf = "".b
1359
+ thread_seq_key ||= intern.("thread_seq")
767
1360
  label_buf << encode_int64(1, thread_seq_key) # key
768
1361
  label_buf << encode_int64(3, thread_seq) # num
769
1362
  sample_buf << encode_message(3, label_buf)
@@ -782,19 +1375,18 @@ module Rperf
782
1375
  buf << encode_message(2, sample_buf)
783
1376
  end
784
1377
 
785
- # field 4: location (repeated Location)
786
- locations.each do |frame, loc_id|
1378
+ # field 4: location (repeated Location) — Line points at the same id
1379
+ frame_ids.each do |_frame, id|
787
1380
  loc_buf = "".b
788
- loc_buf << encode_uint64(1, loc_id)
1381
+ loc_buf << encode_uint64(1, id)
789
1382
  line_buf = "".b
790
- func_id = functions[frame]
791
- line_buf << encode_uint64(1, func_id)
1383
+ line_buf << encode_uint64(1, id)
792
1384
  loc_buf << encode_message(4, line_buf)
793
1385
  buf << encode_message(4, loc_buf)
794
1386
  end
795
1387
 
796
1388
  # field 5: function (repeated Function)
797
- functions.each do |frame, func_id|
1389
+ frame_ids.each do |frame, func_id|
798
1390
  func_buf = "".b
799
1391
  func_buf << encode_uint64(1, func_id)
800
1392
  func_buf << encode_int64(2, frame[1]) # name (label_idx)
@@ -841,22 +1433,23 @@ module Rperf
841
1433
  buf
842
1434
  end
843
1435
 
1436
+ # Assign sequential ids to unique frames. rperf emits exactly one
1437
+ # Location and one Function per frame, sharing the same id, so a single
1438
+ # table serves both.
844
1439
  def build_tables(merged)
845
- locations = {}
846
- functions = {}
1440
+ frame_ids = {}
847
1441
  next_id = 1
848
1442
 
849
- merged.each do |frames, _weight|
1443
+ merged.each do |(frames, _thread_seq, _label_set_id), _weight|
850
1444
  frames.each do |frame|
851
- unless locations.key?(frame)
852
- locations[frame] = next_id
853
- functions[frame] = next_id
1445
+ unless frame_ids.key?(frame)
1446
+ frame_ids[frame] = next_id
854
1447
  next_id += 1
855
1448
  end
856
1449
  end
857
1450
  end
858
1451
 
859
- [locations, functions]
1452
+ frame_ids
860
1453
  end
861
1454
 
862
1455
  # --- Protobuf encoding helpers ---