rperf 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,13 @@
1
+ require "rperf"
2
+
3
+ module Rperf::ActiveJobMiddleware
4
+ extend ActiveSupport::Concern
5
+
6
+ included do
7
+ around_perform do |job, block|
8
+ Rperf.label(job: job.class.name) do
9
+ block.call
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,15 @@
1
+ require "rperf"
2
+
3
+ class Rperf::Middleware
4
+ def initialize(app, label_key: :endpoint)
5
+ @app = app
6
+ @label_key = label_key
7
+ end
8
+
9
+ def call(env)
10
+ endpoint = "#{env["REQUEST_METHOD"]} #{env["PATH_INFO"]}"
11
+ Rperf.label(@label_key => endpoint) do
12
+ @app.call(env)
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,9 @@
1
+ require "rperf"
2
+
3
+ class Rperf::SidekiqMiddleware
4
+ def call(_worker, job, _queue)
5
+ Rperf.label(job: job["class"]) do
6
+ yield
7
+ end
8
+ end
9
+ end
data/lib/rperf/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Rperf
2
- VERSION = "0.4.0"
2
+ VERSION = "0.6.0"
3
3
  end
data/lib/rperf.rb CHANGED
@@ -1,4 +1,4 @@
1
- require "rperf/version"
1
+ require_relative "rperf/version"
2
2
  require "zlib"
3
3
  require "stringio"
4
4
 
@@ -24,14 +24,27 @@ module Rperf
24
24
  # .txt → text report (human/AI readable flat + cumulative table)
25
25
  # otherwise (.pb.gz etc) → pprof protobuf (gzip compressed)
26
26
  def self.start(frequency: 1000, mode: :cpu, output: nil, verbose: false, format: nil, stat: false, signal: nil, aggregate: true)
27
+ raise ArgumentError, "frequency must be a positive integer (got #{frequency.inspect})" unless frequency.is_a?(Integer) && frequency > 0
28
+ raise ArgumentError, "frequency must be <= 10000 (10KHz), got #{frequency}" if frequency > 10_000
29
+ raise ArgumentError, "mode must be :cpu or :wall, got #{mode.inspect}" unless %i[cpu wall].include?(mode)
30
+ c_mode = mode == :cpu ? 0 : 1
31
+ c_signal = signal.nil? ? -1 : (signal ? signal.to_i : 0)
32
+ if c_signal > 0
33
+ raise ArgumentError, "signal mode is only supported on Linux" unless RUBY_PLATFORM =~ /linux/
34
+ uncatchable = [Signal.list["KILL"], Signal.list["STOP"]].compact
35
+ if uncatchable.include?(c_signal)
36
+ name = Signal.signame(c_signal) rescue c_signal.to_s
37
+ raise ArgumentError, "signal #{c_signal} (#{name}) cannot be caught; use a different signal"
38
+ end
39
+ end
27
40
  @verbose = verbose || ENV["RPERF_VERBOSE"] == "1"
28
41
  @output = output
29
42
  @format = format
30
43
  @stat = stat
31
44
  @stat_start_mono = Process.clock_gettime(Process::CLOCK_MONOTONIC) if @stat
32
- c_opts = { frequency: frequency, mode: mode, aggregate: aggregate }
33
- c_opts[:signal] = signal unless signal.nil?
34
- _c_start(**c_opts)
45
+ @label_set_table = nil
46
+ @label_set_index = nil
47
+ _c_start(frequency, c_mode, aggregate, c_signal)
35
48
 
36
49
  if block_given?
37
50
  begin
@@ -46,6 +59,21 @@ module Rperf
46
59
  data = _c_stop
47
60
  return unless data
48
61
 
62
+ # When aggregate: false, C extension returns :raw_samples but not
63
+ # :aggregated_samples. Build aggregated view so encoders always work.
64
+ if data[:raw_samples] && !data[:aggregated_samples]
65
+ merged = {}
66
+ data[:raw_samples].each do |frames, weight, thread_seq, label_set_id|
67
+ key = [frames, thread_seq || 0, label_set_id || 0]
68
+ if merged.key?(key)
69
+ merged[key] += weight
70
+ else
71
+ merged[key] = weight
72
+ end
73
+ end
74
+ data[:aggregated_samples] = merged.map { |(frames, ts, lsi), w| [frames, w, ts, lsi] }
75
+ end
76
+
49
77
  print_stats(data) if @verbose
50
78
  print_stat(data) if @stat
51
79
 
@@ -58,6 +86,77 @@ module Rperf
58
86
  data
59
87
  end
60
88
 
89
+ # Returns a snapshot of the current profiling data without stopping.
90
+ # Only works in aggregate mode (the default). Returns nil if not profiling.
91
+ # The returned data has the same format as stop's return value and can be
92
+ # passed to save(), PProf.encode(), Collapsed.encode(), or Text.encode().
93
+ #
94
+ # +clear:+ if true, resets aggregated data after taking the snapshot.
95
+ # This allows interval-based profiling where each snapshot covers only
96
+ # the period since the last clear.
97
+ def self.snapshot(clear: false)
98
+ _c_snapshot(clear)
99
+ end
100
+
101
+ # Label set management for per-context profiling.
102
+ # Label sets are stored as an Array of Hashes, indexed by label_set_id.
103
+ # Index 0 is reserved (no labels).
104
+
105
+ @label_set_table = nil # Array of frozen Hash
106
+ @label_set_index = nil # Hash → id (for dedup)
107
+
108
+ def self._init_label_sets
109
+ @label_set_table = [{}] # id 0 = no labels
110
+ @label_set_index = { {} => 0 }
111
+ end
112
+
113
+ def self._intern_label_set(hash)
114
+ frozen = hash.frozen? ? hash : hash.freeze
115
+ @label_set_index[frozen] ||= begin
116
+ id = @label_set_table.size
117
+ @label_set_table << frozen
118
+ _c_set_label_sets(@label_set_table)
119
+ id
120
+ end
121
+ end
122
+
123
+ # Sets labels on the current thread for profiling annotation.
124
+ # With a block: restores previous labels when the block exits.
125
+ # Without a block: sets labels persistently on the current thread.
126
+ # Labels are key-value pairs written into pprof sample labels.
127
+ #
128
+ # Rperf.label(request: "abc") { handle_request }
129
+ # Rperf.label(request: "abc") # persistent set
130
+ #
131
+ # Values of nil remove that key. Existing labels are merged.
132
+ def self.label(**kw, &block)
133
+ _init_label_sets unless @label_set_table
134
+
135
+ cur_id = _c_get_label
136
+ cur_labels = @label_set_table[cur_id] || {}
137
+
138
+ new_labels = cur_labels.merge(kw).reject { |_, v| v.nil? }
139
+ new_id = _intern_label_set(new_labels)
140
+ _c_set_label(new_id)
141
+
142
+ if block
143
+ begin
144
+ yield
145
+ ensure
146
+ _c_set_label(cur_id)
147
+ end
148
+ end
149
+ end
150
+
151
+ # Returns the current thread's labels as a Hash.
152
+ # Returns an empty Hash if no labels are set or profiling is not running.
153
+ def self.labels
154
+ return {} unless @label_set_table
155
+ cur_id = _c_get_label
156
+ @label_set_table[cur_id] || {}
157
+ end
158
+
159
+
61
160
  # Saves profiling data to a file.
62
161
  # format: :pprof, :collapsed, or :text. nil = auto-detect from path extension
63
162
  # .collapsed → collapsed stacks (FlameGraph / speedscope compatible)
@@ -148,7 +247,7 @@ module Rperf
148
247
 
149
248
  # Samples from C are now [[path_str, label_str], ...], weight]
150
249
  def self.print_top(data)
151
- samples_raw = data[:samples]
250
+ samples_raw = data[:aggregated_samples]
152
251
  return if !samples_raw || samples_raw.empty?
153
252
 
154
253
  result = compute_flat_cum(samples_raw)
@@ -180,7 +279,7 @@ module Rperf
180
279
  private_constant :STAT_PCT_LINE, :STAT_LINE
181
280
 
182
281
  def self.print_stat(data)
183
- samples_raw = data[:samples] || []
282
+ samples_raw = data[:aggregated_samples] || []
184
283
  real_ns = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - @stat_start_mono) * 1_000_000_000).to_i
185
284
  times = Process.times
186
285
  user_ns = (times.utime * 1_000_000_000).to_i
@@ -198,7 +297,7 @@ module Rperf
198
297
  if samples_raw.size > 0
199
298
  breakdown, total_weight = compute_stat_breakdown(samples_raw)
200
299
  print_stat_breakdown(breakdown, total_weight)
201
- print_stat_runtime_info
300
+ print_stat_runtime_info(data)
202
301
  print_stat_system_info
203
302
  print_stat_report(data) if ENV["RPERF_STAT_REPORT"] == "1"
204
303
  print_stat_footer(samples_raw, real_ns, data)
@@ -246,7 +345,9 @@ module Rperf
246
345
  end
247
346
  private_class_method :print_stat_breakdown
248
347
 
249
- def self.print_stat_runtime_info
348
+ def self.print_stat_runtime_info(data)
349
+ thread_count = data[:detected_thread_count] || 0
350
+ $stderr.puts STAT_LINE.call(format_integer(thread_count), " ", "[Ruby] detected threads") if thread_count > 0
250
351
  gc = GC.stat
251
352
  $stderr.puts STAT_LINE.call(format_ms(gc[:time] * 1_000_000), "ms",
252
353
  "[Ruby] GC time (%s count: %s minor, %s major)" % [
@@ -391,7 +492,7 @@ module Rperf
391
492
  module_function
392
493
 
393
494
  def encode(data, top_n: 50, header: true)
394
- samples_raw = data[:samples]
495
+ samples_raw = data[:aggregated_samples]
395
496
  mode = data[:mode] || :cpu
396
497
  frequency = data[:frequency] || 0
397
498
 
@@ -433,8 +534,10 @@ module Rperf
433
534
  module_function
434
535
 
435
536
  def encode(data)
537
+ samples = data[:aggregated_samples]
538
+ return "" if !samples || samples.empty?
436
539
  merged = Hash.new(0)
437
- data[:samples].each do |frames, weight|
540
+ samples.each do |frames, weight|
438
541
  key = frames.reverse.map { |_, label| label }.join(";")
439
542
  merged[key] += weight
440
543
  end
@@ -451,7 +554,7 @@ module Rperf
451
554
  module_function
452
555
 
453
556
  def encode(data)
454
- samples_raw = data[:samples]
557
+ samples_raw = data[:aggregated_samples]
455
558
  frequency = data[:frequency]
456
559
  interval_ns = 1_000_000_000 / frequency
457
560
  mode = data[:mode] || :cpu
@@ -468,17 +571,30 @@ module Rperf
468
571
  end
469
572
  }
470
573
 
471
- # Convert string frames to index frames and merge identical stacks per thread
574
+ # Convert string frames to index frames and merge identical stacks per thread/label
472
575
  merged = Hash.new(0)
473
576
  thread_seq_key = intern.("thread_seq")
474
- samples_raw.each do |frames, weight, thread_seq|
475
- key = [frames.map { |path, label| [intern.(path), intern.(label)] }, thread_seq || 0]
577
+ label_sets = data[:label_sets] # Array of Hash (may be nil)
578
+ samples_raw.each do |frames, weight, thread_seq, label_set_id|
579
+ key = [frames.map { |path, label| [intern.(path), intern.(label)] }, thread_seq || 0, label_set_id || 0]
476
580
  merged[key] += weight
477
581
  end
478
582
  merged = merged.to_a
479
583
 
584
+ # Intern label set keys/values for pprof labels
585
+ label_key_indices = {} # String key → string_table index
586
+ if label_sets
587
+ label_sets.each do |ls|
588
+ ls.each do |k, v|
589
+ sk = k.to_s
590
+ label_key_indices[sk] ||= intern.(sk)
591
+ intern.(v.to_s) # ensure value is interned
592
+ end
593
+ end
594
+ end
595
+
480
596
  # Build location/function tables
481
- locations, functions = build_tables(merged.map { |(frames, _), w| [frames, w] })
597
+ locations, functions = build_tables(merged.map { |(frames, _, _), w| [frames, w] })
482
598
 
483
599
  # Intern type label and unit
484
600
  type_label = mode == :wall ? "wall" : "cpu"
@@ -491,8 +607,8 @@ module Rperf
491
607
  # field 1: sample_type (repeated ValueType)
492
608
  buf << encode_message(1, encode_value_type(type_idx, ns_idx))
493
609
 
494
- # field 2: sample (repeated Sample) with thread_seq label
495
- merged.each do |(frames, thread_seq), weight|
610
+ # field 2: sample (repeated Sample) with thread_seq + user labels
611
+ merged.each do |(frames, thread_seq, label_set_id), weight|
496
612
  sample_buf = "".b
497
613
  loc_ids = frames.map { |f| locations[f] }
498
614
  sample_buf << encode_packed_uint64(1, loc_ids)
@@ -503,6 +619,17 @@ module Rperf
503
619
  label_buf << encode_int64(3, thread_seq) # num
504
620
  sample_buf << encode_message(3, label_buf)
505
621
  end
622
+ if label_sets && label_set_id && label_set_id > 0
623
+ ls = label_sets[label_set_id]
624
+ if ls
625
+ ls.each do |k, v|
626
+ label_buf = "".b
627
+ label_buf << encode_int64(1, label_key_indices[k.to_s]) # key
628
+ label_buf << encode_int64(2, string_index[v.to_s]) # str
629
+ sample_buf << encode_message(3, label_buf)
630
+ end
631
+ end
632
+ end
506
633
  buf << encode_message(2, sample_buf)
507
634
  end
508
635
 
@@ -537,7 +664,7 @@ module Rperf
537
664
 
538
665
  # field 6: string_table (repeated string)
539
666
  string_table.each do |s|
540
- buf << encode_bytes(6, s.encode("UTF-8"))
667
+ buf << encode_bytes(6, s.encode("UTF-8", invalid: :replace, undef: :replace))
541
668
  end
542
669
 
543
670
  # field 9: time_nanos (int64)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rperf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Koichi Sasada
@@ -52,6 +52,9 @@ files:
52
52
  - ext/rperf/extconf.rb
53
53
  - ext/rperf/rperf.c
54
54
  - lib/rperf.rb
55
+ - lib/rperf/active_job.rb
56
+ - lib/rperf/middleware.rb
57
+ - lib/rperf/sidekiq.rb
55
58
  - lib/rperf/version.rb
56
59
  homepage: https://github.com/ko1/rperf
57
60
  licenses: