rperf 0.6.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +71 -47
- data/docs/help.md +184 -34
- data/docs/logo.svg +25 -0
- data/exe/rperf +121 -26
- data/ext/rperf/rperf.c +250 -103
- data/lib/rperf/active_job.rb +1 -1
- data/lib/rperf/rack.rb +37 -0
- data/lib/rperf/sidekiq.rb +1 -1
- data/lib/rperf/version.rb +1 -1
- data/lib/rperf/viewer.rb +798 -0
- data/lib/rperf.rb +200 -51
- metadata +7 -5
- data/lib/rperf/middleware.rb +0 -15
data/lib/rperf.rb
CHANGED
|
@@ -19,15 +19,19 @@ module Rperf
|
|
|
19
19
|
@stat_start_mono = nil
|
|
20
20
|
|
|
21
21
|
# Starts profiling.
|
|
22
|
-
# format: :pprof, :collapsed, or :text. nil = auto-detect from output extension
|
|
22
|
+
# format: :json, :pprof, :collapsed, or :text. nil = auto-detect from output extension
|
|
23
|
+
# .json.gz → json (rperf native, default)
|
|
23
24
|
# .collapsed → collapsed stacks (FlameGraph / speedscope compatible)
|
|
24
25
|
# .txt → text report (human/AI readable flat + cumulative table)
|
|
25
|
-
#
|
|
26
|
-
def self.start(frequency: 1000, mode: :cpu, output: nil, verbose: false, format: nil, stat: false, signal: nil, aggregate: true)
|
|
26
|
+
# .pb.gz → pprof protobuf (gzip compressed)
|
|
27
|
+
def self.start(frequency: 1000, mode: :cpu, output: nil, verbose: false, format: nil, stat: false, signal: nil, aggregate: true, defer: false)
|
|
27
28
|
raise ArgumentError, "frequency must be a positive integer (got #{frequency.inspect})" unless frequency.is_a?(Integer) && frequency > 0
|
|
28
29
|
raise ArgumentError, "frequency must be <= 10000 (10KHz), got #{frequency}" if frequency > 10_000
|
|
29
30
|
raise ArgumentError, "mode must be :cpu or :wall, got #{mode.inspect}" unless %i[cpu wall].include?(mode)
|
|
30
31
|
c_mode = mode == :cpu ? 0 : 1
|
|
32
|
+
unless signal.nil? || signal == false || signal.is_a?(Integer)
|
|
33
|
+
raise ArgumentError, "signal must be nil, false, or an Integer, got #{signal.inspect}"
|
|
34
|
+
end
|
|
31
35
|
c_signal = signal.nil? ? -1 : (signal ? signal.to_i : 0)
|
|
32
36
|
if c_signal > 0
|
|
33
37
|
raise ArgumentError, "signal mode is only supported on Linux" unless RUBY_PLATFORM =~ /linux/
|
|
@@ -41,20 +45,33 @@ module Rperf
|
|
|
41
45
|
@output = output
|
|
42
46
|
@format = format
|
|
43
47
|
@stat = stat
|
|
44
|
-
|
|
48
|
+
if @stat
|
|
49
|
+
@stat_start_mono = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
50
|
+
@stat_start_times = Process.times
|
|
51
|
+
end
|
|
45
52
|
@label_set_table = nil
|
|
46
53
|
@label_set_index = nil
|
|
47
|
-
_c_start(frequency, c_mode, aggregate, c_signal)
|
|
54
|
+
_c_start(frequency, c_mode, aggregate, c_signal, defer)
|
|
48
55
|
|
|
49
56
|
if block_given?
|
|
50
57
|
begin
|
|
51
58
|
yield
|
|
52
59
|
ensure
|
|
53
|
-
|
|
60
|
+
result = stop
|
|
54
61
|
end
|
|
62
|
+
result
|
|
55
63
|
end
|
|
56
64
|
end
|
|
57
65
|
|
|
66
|
+
# VM state integer → label value mapping.
|
|
67
|
+
# These values appear in the "Ruby" label key.
|
|
68
|
+
VM_STATE_LABELS = {
|
|
69
|
+
1 => ["%GVL", "blocked"],
|
|
70
|
+
2 => ["%GVL", "wait"],
|
|
71
|
+
3 => ["%GC", "mark"],
|
|
72
|
+
4 => ["%GC", "sweep"],
|
|
73
|
+
}.freeze
|
|
74
|
+
|
|
58
75
|
def self.stop
|
|
59
76
|
data = _c_stop
|
|
60
77
|
return unless data
|
|
@@ -63,17 +80,19 @@ module Rperf
|
|
|
63
80
|
# :aggregated_samples. Build aggregated view so encoders always work.
|
|
64
81
|
if data[:raw_samples] && !data[:aggregated_samples]
|
|
65
82
|
merged = {}
|
|
66
|
-
data[:raw_samples].each do |frames, weight, thread_seq, label_set_id|
|
|
67
|
-
key = [frames, thread_seq || 0, label_set_id || 0]
|
|
83
|
+
data[:raw_samples].each do |frames, weight, thread_seq, label_set_id, vm_state|
|
|
84
|
+
key = [frames, thread_seq || 0, label_set_id || 0, vm_state || 0]
|
|
68
85
|
if merged.key?(key)
|
|
69
86
|
merged[key] += weight
|
|
70
87
|
else
|
|
71
88
|
merged[key] = weight
|
|
72
89
|
end
|
|
73
90
|
end
|
|
74
|
-
data[:aggregated_samples] = merged.map { |(frames, ts, lsi), w| [frames, w, ts, lsi] }
|
|
91
|
+
data[:aggregated_samples] = merged.map { |(frames, ts, lsi, vs), w| [frames, w, ts, lsi, vs] }
|
|
75
92
|
end
|
|
76
93
|
|
|
94
|
+
merge_vm_state_labels!(data)
|
|
95
|
+
|
|
77
96
|
print_stats(data) if @verbose
|
|
78
97
|
print_stat(data) if @stat
|
|
79
98
|
|
|
@@ -95,7 +114,10 @@ module Rperf
|
|
|
95
114
|
# This allows interval-based profiling where each snapshot covers only
|
|
96
115
|
# the period since the last clear.
|
|
97
116
|
def self.snapshot(clear: false)
|
|
98
|
-
_c_snapshot(clear)
|
|
117
|
+
data = _c_snapshot(clear)
|
|
118
|
+
return unless data
|
|
119
|
+
merge_vm_state_labels!(data)
|
|
120
|
+
data
|
|
99
121
|
end
|
|
100
122
|
|
|
101
123
|
# Label set management for per-context profiling.
|
|
@@ -130,6 +152,9 @@ module Rperf
|
|
|
130
152
|
#
|
|
131
153
|
# Values of nil remove that key. Existing labels are merged.
|
|
132
154
|
def self.label(**kw, &block)
|
|
155
|
+
return yield if block && !_c_running?
|
|
156
|
+
return unless _c_running?
|
|
157
|
+
|
|
133
158
|
_init_label_sets unless @label_set_table
|
|
134
159
|
|
|
135
160
|
cur_id = _c_get_label
|
|
@@ -148,6 +173,38 @@ module Rperf
|
|
|
148
173
|
end
|
|
149
174
|
end
|
|
150
175
|
|
|
176
|
+
# Profiles the given block: activates timer sampling for the duration
|
|
177
|
+
# and optionally applies labels. Use with start(defer: true) to profile
|
|
178
|
+
# only specific sections of code.
|
|
179
|
+
#
|
|
180
|
+
# Rperf.start(defer: true, mode: :wall)
|
|
181
|
+
# Rperf.profile(endpoint: "/users") { handle_request }
|
|
182
|
+
# data = Rperf.stop
|
|
183
|
+
#
|
|
184
|
+
# Nesting is supported: timer stays active until the outermost profile exits.
|
|
185
|
+
# Requires a block. Raises if profiling is not started.
|
|
186
|
+
def self.profile(**kw, &block)
|
|
187
|
+
raise ArgumentError, "Rperf.profile requires a block" unless block
|
|
188
|
+
raise RuntimeError, "Rperf is not started" unless _c_running?
|
|
189
|
+
|
|
190
|
+
_init_label_sets unless @label_set_table
|
|
191
|
+
|
|
192
|
+
cur_id = _c_get_label
|
|
193
|
+
cur_labels = @label_set_table[cur_id] || {}
|
|
194
|
+
new_labels = cur_labels.merge(kw).reject { |_, v| v.nil? }
|
|
195
|
+
new_id = _intern_label_set(new_labels)
|
|
196
|
+
_c_set_label(new_id)
|
|
197
|
+
|
|
198
|
+
_c_profile_inc
|
|
199
|
+
|
|
200
|
+
begin
|
|
201
|
+
yield
|
|
202
|
+
ensure
|
|
203
|
+
_c_profile_dec
|
|
204
|
+
_c_set_label(cur_id)
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
|
|
151
208
|
# Returns the current thread's labels as a Hash.
|
|
152
209
|
# Returns an empty Hash if no labels are set or profiling is not running.
|
|
153
210
|
def self.labels
|
|
@@ -157,11 +214,53 @@ module Rperf
|
|
|
157
214
|
end
|
|
158
215
|
|
|
159
216
|
|
|
217
|
+
# Merge vm_state from C samples into label_sets as a "Ruby" label key.
|
|
218
|
+
# Mutates data in place: updates label_set_id on each sample, strips vm_state,
|
|
219
|
+
# and extends label_sets with new entries as needed.
|
|
220
|
+
def self.merge_vm_state_labels!(data)
|
|
221
|
+
samples_key = data[:aggregated_samples] ? :aggregated_samples : :raw_samples
|
|
222
|
+
samples = data[samples_key]
|
|
223
|
+
return unless samples
|
|
224
|
+
|
|
225
|
+
orig_label_sets = data[:label_sets]
|
|
226
|
+
label_sets = (orig_label_sets || [{}]).dup
|
|
227
|
+
mapping = {} # [original_label_set_id, vm_state] => new_label_set_id
|
|
228
|
+
modified = false
|
|
229
|
+
|
|
230
|
+
samples.each do |sample|
|
|
231
|
+
vm_state = sample[4] || 0
|
|
232
|
+
next if vm_state == 0
|
|
233
|
+
next unless VM_STATE_LABELS.key?(vm_state)
|
|
234
|
+
|
|
235
|
+
label_set_id = sample[3] || 0
|
|
236
|
+
cache_key = [label_set_id, vm_state]
|
|
237
|
+
new_id = mapping[cache_key]
|
|
238
|
+
unless new_id
|
|
239
|
+
base = label_sets[label_set_id] || {}
|
|
240
|
+
key, value = VM_STATE_LABELS[vm_state]
|
|
241
|
+
new_ls = base.merge(key => value).freeze
|
|
242
|
+
new_id = label_sets.size
|
|
243
|
+
label_sets << new_ls
|
|
244
|
+
mapping[cache_key] = new_id
|
|
245
|
+
end
|
|
246
|
+
sample[3] = new_id
|
|
247
|
+
modified = true
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
# Strip vm_state (5th element) from all samples
|
|
251
|
+
samples.each { |s| s.pop if s.size > 4 }
|
|
252
|
+
|
|
253
|
+
# Only set label_sets if they were already present or we added vm_state labels
|
|
254
|
+
data[:label_sets] = label_sets if orig_label_sets || modified
|
|
255
|
+
end
|
|
256
|
+
private_class_method :merge_vm_state_labels!
|
|
257
|
+
|
|
160
258
|
# Saves profiling data to a file.
|
|
161
|
-
# format: :pprof, :collapsed, or :text. nil = auto-detect from path extension
|
|
259
|
+
# format: :json, :pprof, :collapsed, or :text. nil = auto-detect from path extension
|
|
260
|
+
# .json.gz → json (rperf native, default)
|
|
162
261
|
# .collapsed → collapsed stacks (FlameGraph / speedscope compatible)
|
|
163
262
|
# .txt → text report (human/AI readable flat + cumulative table)
|
|
164
|
-
#
|
|
263
|
+
# .pb.gz → pprof protobuf (gzip compressed)
|
|
165
264
|
def self.save(path, data, format: nil)
|
|
166
265
|
write_data(path, data, format)
|
|
167
266
|
end
|
|
@@ -173,17 +272,38 @@ module Rperf
|
|
|
173
272
|
File.write(path, Collapsed.encode(data))
|
|
174
273
|
when :text
|
|
175
274
|
File.write(path, Text.encode(data))
|
|
275
|
+
when :json
|
|
276
|
+
require "json"
|
|
277
|
+
File.binwrite(path, gzip(JSON.generate(data.merge(rperf_version: VERSION))))
|
|
176
278
|
else
|
|
177
279
|
File.binwrite(path, gzip(PProf.encode(data)))
|
|
178
280
|
end
|
|
179
281
|
end
|
|
180
282
|
private_class_method :write_data
|
|
181
283
|
|
|
284
|
+
# Load a profile saved by rperf record (.json.gz).
|
|
285
|
+
# Returns the data hash (same format as Rperf.stop / Rperf.snapshot).
|
|
286
|
+
# Warns to stderr if the file was saved by a different rperf version.
|
|
287
|
+
def self.load(path)
|
|
288
|
+
compressed = File.binread(path)
|
|
289
|
+
raw = Zlib::GzipReader.new(StringIO.new(compressed)).read
|
|
290
|
+
require "json"
|
|
291
|
+
data = JSON.parse(raw, symbolize_names: true)
|
|
292
|
+
saved_version = data.delete(:rperf_version)
|
|
293
|
+
if saved_version && saved_version != VERSION
|
|
294
|
+
$stderr.puts "rperf: warning: file was saved by rperf #{saved_version} (current: #{VERSION})"
|
|
295
|
+
elsif saved_version.nil?
|
|
296
|
+
$stderr.puts "rperf: warning: file has no version info (may be from an older rperf)"
|
|
297
|
+
end
|
|
298
|
+
data
|
|
299
|
+
end
|
|
300
|
+
|
|
182
301
|
def self.detect_format(path, format)
|
|
183
302
|
return format.to_sym if format
|
|
184
303
|
case path.to_s
|
|
185
|
-
when /\.collapsed\z/
|
|
186
|
-
when /\.txt\z/
|
|
304
|
+
when /\.collapsed\z/ then :collapsed
|
|
305
|
+
when /\.txt\z/ then :text
|
|
306
|
+
when /\.json(\.gz)?\z/ then :json
|
|
187
307
|
else :pprof
|
|
188
308
|
end
|
|
189
309
|
end
|
|
@@ -201,16 +321,15 @@ module Rperf
|
|
|
201
321
|
def self.print_stats(data)
|
|
202
322
|
count = data[:sampling_count] || 0
|
|
203
323
|
total_ns = data[:sampling_time_ns] || 0
|
|
204
|
-
sample_count = data[:sampling_count] || 0
|
|
205
324
|
mode = data[:mode] || :cpu
|
|
206
325
|
frequency = data[:frequency] || 0
|
|
207
326
|
|
|
208
327
|
total_ms = total_ns / 1_000_000.0
|
|
209
328
|
avg_us = count > 0 ? total_ns / count / 1000.0 : 0.0
|
|
210
329
|
|
|
211
|
-
$stderr.puts "[
|
|
212
|
-
$stderr.puts "[
|
|
213
|
-
$stderr.puts "[
|
|
330
|
+
$stderr.puts "[Rperf] mode=#{mode} frequency=#{frequency}Hz"
|
|
331
|
+
$stderr.puts "[Rperf] sampling: #{count} calls, #{format("%.2f", total_ms)}ms total, #{format("%.1f", avg_us)}us/call avg"
|
|
332
|
+
$stderr.puts "[Rperf] samples recorded: #{count}"
|
|
214
333
|
|
|
215
334
|
print_top(data)
|
|
216
335
|
end
|
|
@@ -259,13 +378,13 @@ module Rperf
|
|
|
259
378
|
|
|
260
379
|
def self.print_top_table(kind, table, total_weight)
|
|
261
380
|
top = table.sort_by { |_, w| -w }.first(TOP_N)
|
|
262
|
-
$stderr.puts "[
|
|
381
|
+
$stderr.puts "[Rperf] top #{top.size} by #{kind}:"
|
|
263
382
|
top.each do |key, weight|
|
|
264
383
|
label, path = key
|
|
265
384
|
ms = weight / 1_000_000.0
|
|
266
385
|
pct = total_weight > 0 ? weight * 100.0 / total_weight : 0.0
|
|
267
386
|
loc = path.empty? ? "" : " (#{path})"
|
|
268
|
-
$stderr.puts format("[
|
|
387
|
+
$stderr.puts format("[Rperf] %8.1fms %5.1f%% %s%s", ms, pct, label, loc)
|
|
269
388
|
end
|
|
270
389
|
end
|
|
271
390
|
|
|
@@ -282,8 +401,9 @@ module Rperf
|
|
|
282
401
|
samples_raw = data[:aggregated_samples] || []
|
|
283
402
|
real_ns = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - @stat_start_mono) * 1_000_000_000).to_i
|
|
284
403
|
times = Process.times
|
|
285
|
-
|
|
286
|
-
|
|
404
|
+
start_times = @stat_start_times || Struct.new(:utime, :stime).new(0.0, 0.0)
|
|
405
|
+
user_ns = ((times.utime - start_times.utime) * 1_000_000_000).to_i
|
|
406
|
+
sys_ns = ((times.stime - start_times.stime) * 1_000_000_000).to_i
|
|
287
407
|
|
|
288
408
|
command = ENV["RPERF_STAT_COMMAND"] || "(unknown)"
|
|
289
409
|
|
|
@@ -295,7 +415,7 @@ module Rperf
|
|
|
295
415
|
$stderr.puts format(" %14s ms real", format_ms(real_ns))
|
|
296
416
|
|
|
297
417
|
if samples_raw.size > 0
|
|
298
|
-
breakdown, total_weight = compute_stat_breakdown(samples_raw)
|
|
418
|
+
breakdown, total_weight = compute_stat_breakdown(samples_raw, data[:label_sets])
|
|
299
419
|
print_stat_breakdown(breakdown, total_weight)
|
|
300
420
|
print_stat_runtime_info(data)
|
|
301
421
|
print_stat_system_info
|
|
@@ -306,20 +426,25 @@ module Rperf
|
|
|
306
426
|
$stderr.puts
|
|
307
427
|
end
|
|
308
428
|
|
|
309
|
-
def self.compute_stat_breakdown(samples_raw)
|
|
429
|
+
def self.compute_stat_breakdown(samples_raw, label_sets)
|
|
310
430
|
breakdown = Hash.new(0)
|
|
311
431
|
total_weight = 0
|
|
312
432
|
|
|
313
|
-
samples_raw.each do |frames, weight|
|
|
433
|
+
samples_raw.each do |frames, weight, _thread_seq, label_set_id|
|
|
314
434
|
total_weight += weight
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
435
|
+
category = :cpu_execution
|
|
436
|
+
if label_sets && label_set_id && label_set_id > 0
|
|
437
|
+
ls = label_sets[label_set_id]
|
|
438
|
+
if ls
|
|
439
|
+
gvl = ls["%GVL"]
|
|
440
|
+
gc = ls["%GC"]
|
|
441
|
+
if gvl == "blocked" then category = :gvl_blocked
|
|
442
|
+
elsif gvl == "wait" then category = :gvl_wait
|
|
443
|
+
elsif gc == "mark" then category = :gc_marking
|
|
444
|
+
elsif gc == "sweep" then category = :gc_sweeping
|
|
445
|
+
end
|
|
446
|
+
end
|
|
447
|
+
end
|
|
323
448
|
breakdown[category] += weight
|
|
324
449
|
end
|
|
325
450
|
|
|
@@ -331,11 +456,11 @@ module Rperf
|
|
|
331
456
|
$stderr.puts
|
|
332
457
|
|
|
333
458
|
[
|
|
334
|
-
[:cpu_execution, "CPU execution"],
|
|
335
|
-
[:gvl_blocked, "[
|
|
336
|
-
[:gvl_wait, "[
|
|
337
|
-
[:gc_marking, "[
|
|
338
|
-
[:gc_sweeping, "[
|
|
459
|
+
[:cpu_execution, "[Rperf] CPU execution"],
|
|
460
|
+
[:gvl_blocked, "[Rperf] GVL blocked (I/O, sleep)"],
|
|
461
|
+
[:gvl_wait, "[Rperf] GVL wait (contention)"],
|
|
462
|
+
[:gc_marking, "[Rperf] GC marking"],
|
|
463
|
+
[:gc_sweeping, "[Rperf] GC sweeping"],
|
|
339
464
|
].each do |key, label|
|
|
340
465
|
w = breakdown[key]
|
|
341
466
|
next if w == 0
|
|
@@ -346,20 +471,20 @@ module Rperf
|
|
|
346
471
|
private_class_method :print_stat_breakdown
|
|
347
472
|
|
|
348
473
|
def self.print_stat_runtime_info(data)
|
|
349
|
-
thread_count = data[:detected_thread_count] || 0
|
|
350
|
-
$stderr.puts STAT_LINE.call(format_integer(thread_count), " ", "[Ruby] detected threads") if thread_count > 0
|
|
351
474
|
gc = GC.stat
|
|
352
475
|
$stderr.puts STAT_LINE.call(format_ms(gc[:time] * 1_000_000), "ms",
|
|
353
|
-
"[Ruby] GC time (%s count: %s minor, %s major)" % [
|
|
476
|
+
"[Ruby ] GC time (%s count: %s minor, %s major)" % [
|
|
354
477
|
format_integer(gc[:count]),
|
|
355
478
|
format_integer(gc[:minor_gc_count]),
|
|
356
479
|
format_integer(gc[:major_gc_count])])
|
|
357
|
-
$stderr.puts STAT_LINE.call(format_integer(gc[:total_allocated_objects]), " ", "[Ruby] allocated objects")
|
|
358
|
-
$stderr.puts STAT_LINE.call(format_integer(gc[:total_freed_objects]), " ", "[Ruby] freed objects")
|
|
480
|
+
$stderr.puts STAT_LINE.call(format_integer(gc[:total_allocated_objects]), " ", "[Ruby ] allocated objects")
|
|
481
|
+
$stderr.puts STAT_LINE.call(format_integer(gc[:total_freed_objects]), " ", "[Ruby ] freed objects")
|
|
482
|
+
thread_count = data[:detected_thread_count] || 0
|
|
483
|
+
$stderr.puts STAT_LINE.call(format_integer(thread_count), " ", "[Ruby ] detected threads") if thread_count > 0
|
|
359
484
|
if defined?(RubyVM::YJIT) && RubyVM::YJIT.enabled?
|
|
360
485
|
yjit = RubyVM::YJIT.runtime_stats
|
|
361
486
|
if yjit[:ratio_in_yjit]
|
|
362
|
-
$stderr.puts STAT_LINE.call(format("%.1f%%", yjit[:ratio_in_yjit] * 100), " ", "[Ruby] YJIT code execution ratio")
|
|
487
|
+
$stderr.puts STAT_LINE.call(format("%.1f%%", yjit[:ratio_in_yjit] * 100), " ", "[Ruby ] YJIT code execution ratio")
|
|
363
488
|
end
|
|
364
489
|
end
|
|
365
490
|
end
|
|
@@ -369,12 +494,20 @@ module Rperf
|
|
|
369
494
|
sys_stats = get_system_stats
|
|
370
495
|
maxrss_kb = sys_stats[:maxrss_kb]
|
|
371
496
|
if maxrss_kb
|
|
372
|
-
$stderr.puts STAT_LINE.call(format_integer((maxrss_kb / 1024.0).round), "MB", "[OS] peak memory (maxrss)")
|
|
497
|
+
$stderr.puts STAT_LINE.call(format_integer((maxrss_kb / 1024.0).round), "MB", "[OS ] peak memory (maxrss)")
|
|
498
|
+
end
|
|
499
|
+
if sys_stats[:page_faults_minor]
|
|
500
|
+
minor = sys_stats[:page_faults_minor]
|
|
501
|
+
major = sys_stats[:page_faults_major]
|
|
502
|
+
$stderr.puts STAT_LINE.call(
|
|
503
|
+
format_integer(minor + major), " ",
|
|
504
|
+
"[OS ] page faults (%s minor, %s major)" % [
|
|
505
|
+
format_integer(minor), format_integer(major)])
|
|
373
506
|
end
|
|
374
507
|
if sys_stats[:ctx_voluntary]
|
|
375
508
|
$stderr.puts STAT_LINE.call(
|
|
376
509
|
format_integer(sys_stats[:ctx_voluntary] + sys_stats[:ctx_involuntary]), " ",
|
|
377
|
-
"[OS] context switches (%s voluntary, %s involuntary)" % [
|
|
510
|
+
"[OS ] context switches (%s voluntary, %s involuntary)" % [
|
|
378
511
|
format_integer(sys_stats[:ctx_voluntary]),
|
|
379
512
|
format_integer(sys_stats[:ctx_involuntary])])
|
|
380
513
|
end
|
|
@@ -383,7 +516,7 @@ module Rperf
|
|
|
383
516
|
w = sys_stats[:io_write_bytes]
|
|
384
517
|
$stderr.puts STAT_LINE.call(
|
|
385
518
|
format_integer(((r + w) / 1024.0 / 1024.0).round), "MB",
|
|
386
|
-
"[OS] disk I/O (%s MB read, %s MB write)" % [
|
|
519
|
+
"[OS ] disk I/O (%s MB read, %s MB write)" % [
|
|
387
520
|
format_integer((r / 1024.0 / 1024.0).round),
|
|
388
521
|
format_integer((w / 1024.0 / 1024.0).round)])
|
|
389
522
|
end
|
|
@@ -404,6 +537,10 @@ module Rperf
|
|
|
404
537
|
samples = data[:sampling_count] || samples_raw.size
|
|
405
538
|
$stderr.puts format(" %d samples / %d triggers, %.1f%% profiler overhead",
|
|
406
539
|
samples, triggers, overhead_pct)
|
|
540
|
+
dropped = data[:dropped_samples] || 0
|
|
541
|
+
if dropped > 0
|
|
542
|
+
$stderr.puts format(" WARNING: %d samples dropped due to memory allocation failure", dropped)
|
|
543
|
+
end
|
|
407
544
|
end
|
|
408
545
|
private_class_method :print_stat_footer
|
|
409
546
|
|
|
@@ -445,6 +582,12 @@ module Rperf
|
|
|
445
582
|
stats[:maxrss_kb] = rss if rss && rss > 0
|
|
446
583
|
end
|
|
447
584
|
|
|
585
|
+
if File.readable?("/proc/self/stat")
|
|
586
|
+
fields = File.read("/proc/self/stat").split
|
|
587
|
+
stats[:page_faults_minor] = fields[9].to_i
|
|
588
|
+
stats[:page_faults_major] = fields[11].to_i
|
|
589
|
+
end
|
|
590
|
+
|
|
448
591
|
if File.readable?("/proc/self/io")
|
|
449
592
|
# Linux: parse /proc/self/io
|
|
450
593
|
File.read("/proc/self/io").each_line do |line|
|
|
@@ -468,16 +611,22 @@ module Rperf
|
|
|
468
611
|
raise ArgumentError, "RPERF_MODE must be 'cpu' or 'wall', got: #{_rperf_mode_str.inspect}"
|
|
469
612
|
end
|
|
470
613
|
_rperf_mode = _rperf_mode_str == "wall" ? :wall : :cpu
|
|
471
|
-
_rperf_format =
|
|
614
|
+
_rperf_format = if ENV["RPERF_FORMAT"]
|
|
615
|
+
unless %w[pprof collapsed text json].include?(ENV["RPERF_FORMAT"])
|
|
616
|
+
raise ArgumentError, "RPERF_FORMAT must be one of pprof, collapsed, text, json, got: #{ENV["RPERF_FORMAT"].inspect}"
|
|
617
|
+
end
|
|
618
|
+
ENV["RPERF_FORMAT"].to_sym
|
|
619
|
+
end
|
|
472
620
|
_rperf_stat = ENV["RPERF_STAT"] == "1"
|
|
473
621
|
_rperf_signal = case ENV["RPERF_SIGNAL"]
|
|
474
622
|
when nil then nil
|
|
475
623
|
when "false" then false
|
|
476
|
-
|
|
624
|
+
when /\A\d+\z/ then ENV["RPERF_SIGNAL"].to_i
|
|
625
|
+
else raise ArgumentError, "RPERF_SIGNAL must be a signal number or 'false', got: #{ENV["RPERF_SIGNAL"].inspect}"
|
|
477
626
|
end
|
|
478
627
|
_rperf_aggregate = ENV["RPERF_AGGREGATE"] != "0"
|
|
479
628
|
_rperf_start_opts = { frequency: (ENV["RPERF_FREQUENCY"] || 1000).to_i, mode: _rperf_mode,
|
|
480
|
-
output: _rperf_stat ? ENV["RPERF_OUTPUT"] : (ENV["RPERF_OUTPUT"] || "rperf.
|
|
629
|
+
output: _rperf_stat ? ENV["RPERF_OUTPUT"] : (ENV["RPERF_OUTPUT"] || "rperf.json.gz"),
|
|
481
630
|
verbose: ENV["RPERF_VERBOSE"] == "1",
|
|
482
631
|
format: _rperf_format,
|
|
483
632
|
stat: _rperf_stat,
|
|
@@ -660,7 +809,7 @@ module Rperf
|
|
|
660
809
|
intern.("frequency: #{frequency}Hz"),
|
|
661
810
|
intern.("ruby: #{RUBY_DESCRIPTION}"),
|
|
662
811
|
]
|
|
663
|
-
doc_url_idx = intern.("https://ko1.github.io/rperf/help.html")
|
|
812
|
+
doc_url_idx = intern.("https://ko1.github.io/rperf/docs/help.html")
|
|
664
813
|
|
|
665
814
|
# field 6: string_table (repeated string)
|
|
666
815
|
string_table.each do |s|
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: rperf
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.8.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Koichi Sasada
|
|
@@ -38,8 +38,8 @@ dependencies:
|
|
|
38
38
|
- !ruby/object:Gem::Version
|
|
39
39
|
version: '3.6'
|
|
40
40
|
description: A safepoint-based sampling performance profiler that uses thread CPU
|
|
41
|
-
time deltas as weights to correct safepoint bias. Outputs pprof, collapsed
|
|
42
|
-
or text report.
|
|
41
|
+
time deltas as weights to correct safepoint bias. Outputs JSON, pprof, collapsed
|
|
42
|
+
stacks, or text report.
|
|
43
43
|
executables:
|
|
44
44
|
- rperf
|
|
45
45
|
extensions:
|
|
@@ -48,14 +48,16 @@ extra_rdoc_files: []
|
|
|
48
48
|
files:
|
|
49
49
|
- README.md
|
|
50
50
|
- docs/help.md
|
|
51
|
+
- docs/logo.svg
|
|
51
52
|
- exe/rperf
|
|
52
53
|
- ext/rperf/extconf.rb
|
|
53
54
|
- ext/rperf/rperf.c
|
|
54
55
|
- lib/rperf.rb
|
|
55
56
|
- lib/rperf/active_job.rb
|
|
56
|
-
- lib/rperf/
|
|
57
|
+
- lib/rperf/rack.rb
|
|
57
58
|
- lib/rperf/sidekiq.rb
|
|
58
59
|
- lib/rperf/version.rb
|
|
60
|
+
- lib/rperf/viewer.rb
|
|
59
61
|
homepage: https://github.com/ko1/rperf
|
|
60
62
|
licenses:
|
|
61
63
|
- MIT
|
|
@@ -74,7 +76,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
74
76
|
- !ruby/object:Gem::Version
|
|
75
77
|
version: '0'
|
|
76
78
|
requirements: []
|
|
77
|
-
rubygems_version: 4.0.
|
|
79
|
+
rubygems_version: 4.0.6
|
|
78
80
|
specification_version: 4
|
|
79
81
|
summary: Safepoint-based sampling performance profiler for Ruby
|
|
80
82
|
test_files: []
|
data/lib/rperf/middleware.rb
DELETED
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
require "rperf"
|
|
2
|
-
|
|
3
|
-
class Rperf::Middleware
|
|
4
|
-
def initialize(app, label_key: :endpoint)
|
|
5
|
-
@app = app
|
|
6
|
-
@label_key = label_key
|
|
7
|
-
end
|
|
8
|
-
|
|
9
|
-
def call(env)
|
|
10
|
-
endpoint = "#{env["REQUEST_METHOD"]} #{env["PATH_INFO"]}"
|
|
11
|
-
Rperf.label(@label_key => endpoint) do
|
|
12
|
-
@app.call(env)
|
|
13
|
-
end
|
|
14
|
-
end
|
|
15
|
-
end
|