rperf 0.4.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +69 -28
- data/docs/help.md +149 -7
- data/exe/rperf +33 -8
- data/ext/rperf/rperf.c +547 -264
- data/lib/rperf/active_job.rb +13 -0
- data/lib/rperf/middleware.rb +15 -0
- data/lib/rperf/sidekiq.rb +9 -0
- data/lib/rperf/version.rb +1 -1
- data/lib/rperf.rb +145 -18
- metadata +4 -1
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
require "rperf"
|
|
2
|
+
|
|
3
|
+
class Rperf::Middleware
|
|
4
|
+
def initialize(app, label_key: :endpoint)
|
|
5
|
+
@app = app
|
|
6
|
+
@label_key = label_key
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def call(env)
|
|
10
|
+
endpoint = "#{env["REQUEST_METHOD"]} #{env["PATH_INFO"]}"
|
|
11
|
+
Rperf.label(@label_key => endpoint) do
|
|
12
|
+
@app.call(env)
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
data/lib/rperf/version.rb
CHANGED
data/lib/rperf.rb
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
1
|
+
require_relative "rperf/version"
|
|
2
2
|
require "zlib"
|
|
3
3
|
require "stringio"
|
|
4
4
|
|
|
@@ -24,14 +24,27 @@ module Rperf
|
|
|
24
24
|
# .txt → text report (human/AI readable flat + cumulative table)
|
|
25
25
|
# otherwise (.pb.gz etc) → pprof protobuf (gzip compressed)
|
|
26
26
|
def self.start(frequency: 1000, mode: :cpu, output: nil, verbose: false, format: nil, stat: false, signal: nil, aggregate: true)
|
|
27
|
+
raise ArgumentError, "frequency must be a positive integer (got #{frequency.inspect})" unless frequency.is_a?(Integer) && frequency > 0
|
|
28
|
+
raise ArgumentError, "frequency must be <= 10000 (10KHz), got #{frequency}" if frequency > 10_000
|
|
29
|
+
raise ArgumentError, "mode must be :cpu or :wall, got #{mode.inspect}" unless %i[cpu wall].include?(mode)
|
|
30
|
+
c_mode = mode == :cpu ? 0 : 1
|
|
31
|
+
c_signal = signal.nil? ? -1 : (signal ? signal.to_i : 0)
|
|
32
|
+
if c_signal > 0
|
|
33
|
+
raise ArgumentError, "signal mode is only supported on Linux" unless RUBY_PLATFORM =~ /linux/
|
|
34
|
+
uncatchable = [Signal.list["KILL"], Signal.list["STOP"]].compact
|
|
35
|
+
if uncatchable.include?(c_signal)
|
|
36
|
+
name = Signal.signame(c_signal) rescue c_signal.to_s
|
|
37
|
+
raise ArgumentError, "signal #{c_signal} (#{name}) cannot be caught; use a different signal"
|
|
38
|
+
end
|
|
39
|
+
end
|
|
27
40
|
@verbose = verbose || ENV["RPERF_VERBOSE"] == "1"
|
|
28
41
|
@output = output
|
|
29
42
|
@format = format
|
|
30
43
|
@stat = stat
|
|
31
44
|
@stat_start_mono = Process.clock_gettime(Process::CLOCK_MONOTONIC) if @stat
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
_c_start(
|
|
45
|
+
@label_set_table = nil
|
|
46
|
+
@label_set_index = nil
|
|
47
|
+
_c_start(frequency, c_mode, aggregate, c_signal)
|
|
35
48
|
|
|
36
49
|
if block_given?
|
|
37
50
|
begin
|
|
@@ -46,6 +59,21 @@ module Rperf
|
|
|
46
59
|
data = _c_stop
|
|
47
60
|
return unless data
|
|
48
61
|
|
|
62
|
+
# When aggregate: false, C extension returns :raw_samples but not
|
|
63
|
+
# :aggregated_samples. Build aggregated view so encoders always work.
|
|
64
|
+
if data[:raw_samples] && !data[:aggregated_samples]
|
|
65
|
+
merged = {}
|
|
66
|
+
data[:raw_samples].each do |frames, weight, thread_seq, label_set_id|
|
|
67
|
+
key = [frames, thread_seq || 0, label_set_id || 0]
|
|
68
|
+
if merged.key?(key)
|
|
69
|
+
merged[key] += weight
|
|
70
|
+
else
|
|
71
|
+
merged[key] = weight
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
data[:aggregated_samples] = merged.map { |(frames, ts, lsi), w| [frames, w, ts, lsi] }
|
|
75
|
+
end
|
|
76
|
+
|
|
49
77
|
print_stats(data) if @verbose
|
|
50
78
|
print_stat(data) if @stat
|
|
51
79
|
|
|
@@ -58,6 +86,77 @@ module Rperf
|
|
|
58
86
|
data
|
|
59
87
|
end
|
|
60
88
|
|
|
89
|
+
# Returns a snapshot of the current profiling data without stopping.
|
|
90
|
+
# Only works in aggregate mode (the default). Returns nil if not profiling.
|
|
91
|
+
# The returned data has the same format as stop's return value and can be
|
|
92
|
+
# passed to save(), PProf.encode(), Collapsed.encode(), or Text.encode().
|
|
93
|
+
#
|
|
94
|
+
# +clear:+ if true, resets aggregated data after taking the snapshot.
|
|
95
|
+
# This allows interval-based profiling where each snapshot covers only
|
|
96
|
+
# the period since the last clear.
|
|
97
|
+
def self.snapshot(clear: false)
|
|
98
|
+
_c_snapshot(clear)
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Label set management for per-context profiling.
|
|
102
|
+
# Label sets are stored as an Array of Hashes, indexed by label_set_id.
|
|
103
|
+
# Index 0 is reserved (no labels).
|
|
104
|
+
|
|
105
|
+
@label_set_table = nil # Array of frozen Hash
|
|
106
|
+
@label_set_index = nil # Hash → id (for dedup)
|
|
107
|
+
|
|
108
|
+
def self._init_label_sets
|
|
109
|
+
@label_set_table = [{}] # id 0 = no labels
|
|
110
|
+
@label_set_index = { {} => 0 }
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def self._intern_label_set(hash)
|
|
114
|
+
frozen = hash.frozen? ? hash : hash.freeze
|
|
115
|
+
@label_set_index[frozen] ||= begin
|
|
116
|
+
id = @label_set_table.size
|
|
117
|
+
@label_set_table << frozen
|
|
118
|
+
_c_set_label_sets(@label_set_table)
|
|
119
|
+
id
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Sets labels on the current thread for profiling annotation.
|
|
124
|
+
# With a block: restores previous labels when the block exits.
|
|
125
|
+
# Without a block: sets labels persistently on the current thread.
|
|
126
|
+
# Labels are key-value pairs written into pprof sample labels.
|
|
127
|
+
#
|
|
128
|
+
# Rperf.label(request: "abc") { handle_request }
|
|
129
|
+
# Rperf.label(request: "abc") # persistent set
|
|
130
|
+
#
|
|
131
|
+
# Values of nil remove that key. Existing labels are merged.
|
|
132
|
+
def self.label(**kw, &block)
|
|
133
|
+
_init_label_sets unless @label_set_table
|
|
134
|
+
|
|
135
|
+
cur_id = _c_get_label
|
|
136
|
+
cur_labels = @label_set_table[cur_id] || {}
|
|
137
|
+
|
|
138
|
+
new_labels = cur_labels.merge(kw).reject { |_, v| v.nil? }
|
|
139
|
+
new_id = _intern_label_set(new_labels)
|
|
140
|
+
_c_set_label(new_id)
|
|
141
|
+
|
|
142
|
+
if block
|
|
143
|
+
begin
|
|
144
|
+
yield
|
|
145
|
+
ensure
|
|
146
|
+
_c_set_label(cur_id)
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# Returns the current thread's labels as a Hash.
|
|
152
|
+
# Returns an empty Hash if no labels are set or profiling is not running.
|
|
153
|
+
def self.labels
|
|
154
|
+
return {} unless @label_set_table
|
|
155
|
+
cur_id = _c_get_label
|
|
156
|
+
@label_set_table[cur_id] || {}
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
|
|
61
160
|
# Saves profiling data to a file.
|
|
62
161
|
# format: :pprof, :collapsed, or :text. nil = auto-detect from path extension
|
|
63
162
|
# .collapsed → collapsed stacks (FlameGraph / speedscope compatible)
|
|
@@ -148,7 +247,7 @@ module Rperf
|
|
|
148
247
|
|
|
149
248
|
# Samples from C are now [[path_str, label_str], ...], weight]
|
|
150
249
|
def self.print_top(data)
|
|
151
|
-
samples_raw = data[:
|
|
250
|
+
samples_raw = data[:aggregated_samples]
|
|
152
251
|
return if !samples_raw || samples_raw.empty?
|
|
153
252
|
|
|
154
253
|
result = compute_flat_cum(samples_raw)
|
|
@@ -180,7 +279,7 @@ module Rperf
|
|
|
180
279
|
private_constant :STAT_PCT_LINE, :STAT_LINE
|
|
181
280
|
|
|
182
281
|
def self.print_stat(data)
|
|
183
|
-
samples_raw = data[:
|
|
282
|
+
samples_raw = data[:aggregated_samples] || []
|
|
184
283
|
real_ns = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - @stat_start_mono) * 1_000_000_000).to_i
|
|
185
284
|
times = Process.times
|
|
186
285
|
user_ns = (times.utime * 1_000_000_000).to_i
|
|
@@ -198,7 +297,7 @@ module Rperf
|
|
|
198
297
|
if samples_raw.size > 0
|
|
199
298
|
breakdown, total_weight = compute_stat_breakdown(samples_raw)
|
|
200
299
|
print_stat_breakdown(breakdown, total_weight)
|
|
201
|
-
print_stat_runtime_info
|
|
300
|
+
print_stat_runtime_info(data)
|
|
202
301
|
print_stat_system_info
|
|
203
302
|
print_stat_report(data) if ENV["RPERF_STAT_REPORT"] == "1"
|
|
204
303
|
print_stat_footer(samples_raw, real_ns, data)
|
|
@@ -246,7 +345,9 @@ module Rperf
|
|
|
246
345
|
end
|
|
247
346
|
private_class_method :print_stat_breakdown
|
|
248
347
|
|
|
249
|
-
def self.print_stat_runtime_info
|
|
348
|
+
def self.print_stat_runtime_info(data)
|
|
349
|
+
thread_count = data[:detected_thread_count] || 0
|
|
350
|
+
$stderr.puts STAT_LINE.call(format_integer(thread_count), " ", "[Ruby] detected threads") if thread_count > 0
|
|
250
351
|
gc = GC.stat
|
|
251
352
|
$stderr.puts STAT_LINE.call(format_ms(gc[:time] * 1_000_000), "ms",
|
|
252
353
|
"[Ruby] GC time (%s count: %s minor, %s major)" % [
|
|
@@ -391,7 +492,7 @@ module Rperf
|
|
|
391
492
|
module_function
|
|
392
493
|
|
|
393
494
|
def encode(data, top_n: 50, header: true)
|
|
394
|
-
samples_raw = data[:
|
|
495
|
+
samples_raw = data[:aggregated_samples]
|
|
395
496
|
mode = data[:mode] || :cpu
|
|
396
497
|
frequency = data[:frequency] || 0
|
|
397
498
|
|
|
@@ -433,8 +534,10 @@ module Rperf
|
|
|
433
534
|
module_function
|
|
434
535
|
|
|
435
536
|
def encode(data)
|
|
537
|
+
samples = data[:aggregated_samples]
|
|
538
|
+
return "" if !samples || samples.empty?
|
|
436
539
|
merged = Hash.new(0)
|
|
437
|
-
|
|
540
|
+
samples.each do |frames, weight|
|
|
438
541
|
key = frames.reverse.map { |_, label| label }.join(";")
|
|
439
542
|
merged[key] += weight
|
|
440
543
|
end
|
|
@@ -451,7 +554,7 @@ module Rperf
|
|
|
451
554
|
module_function
|
|
452
555
|
|
|
453
556
|
def encode(data)
|
|
454
|
-
samples_raw = data[:
|
|
557
|
+
samples_raw = data[:aggregated_samples]
|
|
455
558
|
frequency = data[:frequency]
|
|
456
559
|
interval_ns = 1_000_000_000 / frequency
|
|
457
560
|
mode = data[:mode] || :cpu
|
|
@@ -468,17 +571,30 @@ module Rperf
|
|
|
468
571
|
end
|
|
469
572
|
}
|
|
470
573
|
|
|
471
|
-
# Convert string frames to index frames and merge identical stacks per thread
|
|
574
|
+
# Convert string frames to index frames and merge identical stacks per thread/label
|
|
472
575
|
merged = Hash.new(0)
|
|
473
576
|
thread_seq_key = intern.("thread_seq")
|
|
474
|
-
|
|
475
|
-
|
|
577
|
+
label_sets = data[:label_sets] # Array of Hash (may be nil)
|
|
578
|
+
samples_raw.each do |frames, weight, thread_seq, label_set_id|
|
|
579
|
+
key = [frames.map { |path, label| [intern.(path), intern.(label)] }, thread_seq || 0, label_set_id || 0]
|
|
476
580
|
merged[key] += weight
|
|
477
581
|
end
|
|
478
582
|
merged = merged.to_a
|
|
479
583
|
|
|
584
|
+
# Intern label set keys/values for pprof labels
|
|
585
|
+
label_key_indices = {} # String key → string_table index
|
|
586
|
+
if label_sets
|
|
587
|
+
label_sets.each do |ls|
|
|
588
|
+
ls.each do |k, v|
|
|
589
|
+
sk = k.to_s
|
|
590
|
+
label_key_indices[sk] ||= intern.(sk)
|
|
591
|
+
intern.(v.to_s) # ensure value is interned
|
|
592
|
+
end
|
|
593
|
+
end
|
|
594
|
+
end
|
|
595
|
+
|
|
480
596
|
# Build location/function tables
|
|
481
|
-
locations, functions = build_tables(merged.map { |(frames, _), w| [frames, w] })
|
|
597
|
+
locations, functions = build_tables(merged.map { |(frames, _, _), w| [frames, w] })
|
|
482
598
|
|
|
483
599
|
# Intern type label and unit
|
|
484
600
|
type_label = mode == :wall ? "wall" : "cpu"
|
|
@@ -491,8 +607,8 @@ module Rperf
|
|
|
491
607
|
# field 1: sample_type (repeated ValueType)
|
|
492
608
|
buf << encode_message(1, encode_value_type(type_idx, ns_idx))
|
|
493
609
|
|
|
494
|
-
# field 2: sample (repeated Sample) with thread_seq
|
|
495
|
-
merged.each do |(frames, thread_seq), weight|
|
|
610
|
+
# field 2: sample (repeated Sample) with thread_seq + user labels
|
|
611
|
+
merged.each do |(frames, thread_seq, label_set_id), weight|
|
|
496
612
|
sample_buf = "".b
|
|
497
613
|
loc_ids = frames.map { |f| locations[f] }
|
|
498
614
|
sample_buf << encode_packed_uint64(1, loc_ids)
|
|
@@ -503,6 +619,17 @@ module Rperf
|
|
|
503
619
|
label_buf << encode_int64(3, thread_seq) # num
|
|
504
620
|
sample_buf << encode_message(3, label_buf)
|
|
505
621
|
end
|
|
622
|
+
if label_sets && label_set_id && label_set_id > 0
|
|
623
|
+
ls = label_sets[label_set_id]
|
|
624
|
+
if ls
|
|
625
|
+
ls.each do |k, v|
|
|
626
|
+
label_buf = "".b
|
|
627
|
+
label_buf << encode_int64(1, label_key_indices[k.to_s]) # key
|
|
628
|
+
label_buf << encode_int64(2, string_index[v.to_s]) # str
|
|
629
|
+
sample_buf << encode_message(3, label_buf)
|
|
630
|
+
end
|
|
631
|
+
end
|
|
632
|
+
end
|
|
506
633
|
buf << encode_message(2, sample_buf)
|
|
507
634
|
end
|
|
508
635
|
|
|
@@ -537,7 +664,7 @@ module Rperf
|
|
|
537
664
|
|
|
538
665
|
# field 6: string_table (repeated string)
|
|
539
666
|
string_table.each do |s|
|
|
540
|
-
buf << encode_bytes(6, s.encode("UTF-8"))
|
|
667
|
+
buf << encode_bytes(6, s.encode("UTF-8", invalid: :replace, undef: :replace))
|
|
541
668
|
end
|
|
542
669
|
|
|
543
670
|
# field 9: time_nanos (int64)
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: rperf
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.6.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Koichi Sasada
|
|
@@ -52,6 +52,9 @@ files:
|
|
|
52
52
|
- ext/rperf/extconf.rb
|
|
53
53
|
- ext/rperf/rperf.c
|
|
54
54
|
- lib/rperf.rb
|
|
55
|
+
- lib/rperf/active_job.rb
|
|
56
|
+
- lib/rperf/middleware.rb
|
|
57
|
+
- lib/rperf/sidekiq.rb
|
|
55
58
|
- lib/rperf/version.rb
|
|
56
59
|
homepage: https://github.com/ko1/rperf
|
|
57
60
|
licenses:
|