rperf 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/docs/help.md +47 -16
- data/exe/rperf +58 -10
- data/ext/rperf/rperf.c +875 -236
- data/lib/rperf/version.rb +1 -1
- data/lib/rperf.rb +65 -50
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 3413c4c6ed0cdc0897428bf01fc0fec17a4d14f1c2883e9e5afa0cff110247dc
|
|
4
|
+
data.tar.gz: '097b06203ce4648a860f2816635d6dfac52f8e5987aa381653cec874d52abf7c'
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 37065071f049a27eb1bab9f859ed39499022489a19aa8ecd91b3dc35cb6052ffb6b2fbc02c67ea46a94e8dba7644f2b23760d72d2dda7b998ccf3c61c304e225
|
|
7
|
+
data.tar.gz: 686ab430d58e5dd5163ae65a2bd330a76e57cf0dd72e7eac2b7c61621a03007bd724cac20b1e452766870ff33de325855e199bc3d873d004344f7b26b9b6614f
|
data/docs/help.md
CHANGED
|
@@ -10,6 +10,7 @@ POSIX systems (Linux, macOS). Requires Ruby >= 3.4.0.
|
|
|
10
10
|
|
|
11
11
|
rperf record [options] command [args...]
|
|
12
12
|
rperf stat [options] command [args...]
|
|
13
|
+
rperf exec [options] command [args...]
|
|
13
14
|
rperf report [options] [file]
|
|
14
15
|
rperf help
|
|
15
16
|
|
|
@@ -19,22 +20,41 @@ POSIX systems (Linux, macOS). Requires Ruby >= 3.4.0.
|
|
|
19
20
|
-f, --frequency HZ Sampling frequency in Hz (default: 1000)
|
|
20
21
|
-m, --mode MODE cpu or wall (default: cpu)
|
|
21
22
|
--format FORMAT pprof, collapsed, or text (default: auto from extension)
|
|
23
|
+
-p, --print Print text profile to stdout
|
|
24
|
+
(same as --format=text --output=/dev/stdout)
|
|
22
25
|
--signal VALUE Timer signal (Linux only): signal number, or 'false'
|
|
23
26
|
for nanosleep thread (default: auto)
|
|
24
27
|
-v, --verbose Print sampling statistics to stderr
|
|
25
28
|
|
|
26
29
|
### stat: Run command and print performance summary to stderr.
|
|
27
30
|
|
|
28
|
-
|
|
31
|
+
Uses wall mode by default. No file output by default.
|
|
29
32
|
|
|
30
33
|
-o, --output PATH Also save profile to file (default: none)
|
|
31
34
|
-f, --frequency HZ Sampling frequency in Hz (default: 1000)
|
|
35
|
+
-m, --mode MODE cpu or wall (default: wall)
|
|
36
|
+
--report Include flat/cumulative profile tables in output
|
|
32
37
|
--signal VALUE Timer signal (Linux only): signal number, or 'false'
|
|
33
38
|
for nanosleep thread (default: auto)
|
|
34
39
|
-v, --verbose Print additional sampling statistics
|
|
35
40
|
|
|
36
41
|
Shows: user/sys/real time, time breakdown (CPU execution, GVL blocked,
|
|
37
|
-
GVL wait, GC marking, GC sweeping),
|
|
42
|
+
GVL wait, GC marking, GC sweeping), GC/memory/OS stats, and profiler overhead.
|
|
43
|
+
Use --report to add flat and cumulative top-50 function tables.
|
|
44
|
+
|
|
45
|
+
### exec: Run command and print full profile report to stderr.
|
|
46
|
+
|
|
47
|
+
Like `stat --report`. Uses wall mode by default. No file output by default.
|
|
48
|
+
|
|
49
|
+
-o, --output PATH Also save profile to file (default: none)
|
|
50
|
+
-f, --frequency HZ Sampling frequency in Hz (default: 1000)
|
|
51
|
+
-m, --mode MODE cpu or wall (default: wall)
|
|
52
|
+
--signal VALUE Timer signal (Linux only): signal number, or 'false'
|
|
53
|
+
for nanosleep thread (default: auto)
|
|
54
|
+
-v, --verbose Print additional sampling statistics
|
|
55
|
+
|
|
56
|
+
Shows: user/sys/real time, time breakdown, GC/memory/OS stats, profiler overhead,
|
|
57
|
+
and flat/cumulative top-50 function tables.
|
|
38
58
|
|
|
39
59
|
### report: Open pprof profile with go tool pprof. Requires Go.
|
|
40
60
|
|
|
@@ -58,8 +78,12 @@ Default (no flag): opens diff in browser.
|
|
|
58
78
|
rperf record -m wall -f 500 -o profile.pb.gz ruby server.rb
|
|
59
79
|
rperf record -o profile.collapsed ruby app.rb
|
|
60
80
|
rperf record -o profile.txt ruby app.rb
|
|
81
|
+
rperf record -p ruby app.rb
|
|
61
82
|
rperf stat ruby app.rb
|
|
83
|
+
rperf stat --report ruby app.rb
|
|
62
84
|
rperf stat -o profile.pb.gz ruby app.rb
|
|
85
|
+
rperf exec ruby app.rb
|
|
86
|
+
rperf exec -m cpu ruby app.rb
|
|
63
87
|
rperf report
|
|
64
88
|
rperf report --top profile.pb.gz
|
|
65
89
|
rperf diff before.pb.gz after.pb.gz
|
|
@@ -99,16 +123,22 @@ Rperf.save("profile.txt", data)
|
|
|
99
123
|
nil if profiler was not running; otherwise a Hash:
|
|
100
124
|
|
|
101
125
|
```ruby
|
|
102
|
-
{ mode: :cpu,
|
|
126
|
+
{ mode: :cpu, # or :wall
|
|
103
127
|
frequency: 500,
|
|
104
128
|
sampling_count: 1234,
|
|
105
129
|
sampling_time_ns: 56789,
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
130
|
+
detected_thread_count: 4, # threads seen during profiling
|
|
131
|
+
start_time_ns: 17740..., # CLOCK_REALTIME epoch nanos
|
|
132
|
+
duration_ns: 10000000, # profiling duration in nanos
|
|
133
|
+
aggregated_samples: [ # when aggregate: true (default)
|
|
134
|
+
[frames, weight, seq], # frames: [[path, label], ...] deepest-first
|
|
135
|
+
... # weight: Integer (nanoseconds, merged per unique stack)
|
|
136
|
+
], # seq: Integer (thread sequence, 1-based)
|
|
137
|
+
# --- OR ---
|
|
138
|
+
raw_samples: [ # when aggregate: false
|
|
139
|
+
[frames, weight, seq], # one entry per timer sample (not merged)
|
|
140
|
+
...
|
|
141
|
+
] }
|
|
112
142
|
```
|
|
113
143
|
|
|
114
144
|
### Rperf.save(path, data, format: nil)
|
|
@@ -168,14 +198,14 @@ Example output:
|
|
|
168
198
|
Total: 1523.4ms (cpu)
|
|
169
199
|
Samples: 4820, Frequency: 500Hz
|
|
170
200
|
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
201
|
+
Flat:
|
|
202
|
+
820.3 ms 53.8% Array#each (app/models/user.rb)
|
|
203
|
+
312.1 ms 20.5% JSON.parse (lib/json/parser.rb)
|
|
204
|
+
...
|
|
175
205
|
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
206
|
+
Cumulative:
|
|
207
|
+
1,401.2 ms 92.0% UsersController#index (app/controllers/users_controller.rb)
|
|
208
|
+
...
|
|
179
209
|
|
|
180
210
|
### Format auto-detection
|
|
181
211
|
|
|
@@ -281,6 +311,7 @@ Used internally by the CLI to pass options to the auto-started profiler:
|
|
|
281
311
|
RPERF_VERBOSE=1 Print statistics
|
|
282
312
|
RPERF_SIGNAL=N|false Timer signal number or 'false' for nanosleep (Linux only)
|
|
283
313
|
RPERF_STAT=1 Enable stat mode (used by rperf stat)
|
|
314
|
+
RPERF_STAT_REPORT=1 Include profile tables in stat output
|
|
284
315
|
|
|
285
316
|
## TIPS
|
|
286
317
|
|
data/exe/rperf
CHANGED
|
@@ -72,6 +72,7 @@ HELP_TEXT = File.read(File.expand_path("../docs/help.md", __dir__))
|
|
|
72
72
|
|
|
73
73
|
USAGE = "Usage: rperf record [options] command [args...]\n" \
|
|
74
74
|
" rperf stat [options] command [args...]\n" \
|
|
75
|
+
" rperf exec [options] command [args...]\n" \
|
|
75
76
|
" rperf report [options] [file]\n" \
|
|
76
77
|
" rperf diff [options] base.pb.gz target.pb.gz\n" \
|
|
77
78
|
" rperf help\n"
|
|
@@ -120,7 +121,7 @@ when "diff"
|
|
|
120
121
|
else exec("go", "tool", "pprof", "-http=localhost:#{find_available_port}", "-diff_base=#{base_file}", target_file)
|
|
121
122
|
end
|
|
122
123
|
end
|
|
123
|
-
when "record", "stat"
|
|
124
|
+
when "record", "stat", "exec"
|
|
124
125
|
# continue below
|
|
125
126
|
else
|
|
126
127
|
$stderr.puts "Unknown subcommand: #{subcommand.inspect}" if subcommand
|
|
@@ -128,17 +129,23 @@ else
|
|
|
128
129
|
exit 1
|
|
129
130
|
end
|
|
130
131
|
|
|
131
|
-
output = (subcommand == "
|
|
132
|
+
output = (subcommand == "record") ? "rperf.data" : nil
|
|
132
133
|
frequency = 1000
|
|
133
|
-
mode = (subcommand == "
|
|
134
|
+
mode = (subcommand == "record") ? "cpu" : "wall"
|
|
134
135
|
format = nil
|
|
135
136
|
signal = nil
|
|
136
137
|
verbose = false
|
|
138
|
+
aggregate = true
|
|
139
|
+
stat_report = (subcommand == "exec")
|
|
137
140
|
|
|
138
141
|
parser = OptionParser.new do |opts|
|
|
139
|
-
opts.banner =
|
|
142
|
+
opts.banner = case subcommand
|
|
143
|
+
when "record" then "Usage: rperf record [options] command [args...]"
|
|
144
|
+
when "stat" then "Usage: rperf stat [options] command [args...]"
|
|
145
|
+
when "exec" then "Usage: rperf exec [options] command [args...]"
|
|
146
|
+
end
|
|
140
147
|
|
|
141
|
-
opts.on("-o", "--output PATH", "Output file#{subcommand == '
|
|
148
|
+
opts.on("-o", "--output PATH", "Output file#{subcommand == 'record' ? ' (default: rperf.data)' : ' (default: none)'}") do |v|
|
|
142
149
|
output = v
|
|
143
150
|
end
|
|
144
151
|
|
|
@@ -146,21 +153,37 @@ parser = OptionParser.new do |opts|
|
|
|
146
153
|
frequency = v
|
|
147
154
|
end
|
|
148
155
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
156
|
+
default_mode = (subcommand == "record") ? "cpu" : "wall"
|
|
157
|
+
opts.on("-m", "--mode MODE", %w[cpu wall], "Profiling mode: cpu or wall (default: #{default_mode})") do |v|
|
|
158
|
+
mode = v
|
|
159
|
+
end
|
|
153
160
|
|
|
161
|
+
if subcommand == "record"
|
|
154
162
|
opts.on("--format FORMAT", %w[pprof collapsed text],
|
|
155
163
|
"Output format: pprof, collapsed, or text (default: auto from extension)") do |v|
|
|
156
164
|
format = v
|
|
157
165
|
end
|
|
166
|
+
|
|
167
|
+
opts.on("-p", "--print", "Print text profile to stdout (same as --format=text --output=/dev/stdout)") do
|
|
168
|
+
format = "text"
|
|
169
|
+
output = "/dev/stdout"
|
|
170
|
+
end
|
|
158
171
|
end
|
|
159
172
|
|
|
160
173
|
opts.on("--signal VALUE", "Timer signal (Linux only): signal number, or 'false' for nanosleep thread") do |v|
|
|
161
174
|
signal = (v == "false") ? "false" : v
|
|
162
175
|
end
|
|
163
176
|
|
|
177
|
+
opts.on("--no-aggregate", "Disable sample aggregation (keep raw samples)") do
|
|
178
|
+
aggregate = false
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
if subcommand == "stat"
|
|
182
|
+
opts.on("--report", "Include flat/cumulative profile tables in output") do
|
|
183
|
+
stat_report = true
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
|
|
164
187
|
opts.on("-v", "--verbose", "Print sampling statistics to stderr") do
|
|
165
188
|
verbose = true
|
|
166
189
|
end
|
|
@@ -187,6 +210,29 @@ if ARGV.empty?
|
|
|
187
210
|
exit 1
|
|
188
211
|
end
|
|
189
212
|
|
|
213
|
+
if frequency <= 0
|
|
214
|
+
$stderr.puts "Error: frequency must be a positive integer (got #{frequency})"
|
|
215
|
+
exit 1
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
if frequency > 10_000
|
|
219
|
+
$stderr.puts "Error: frequency must be <= 10000 (10KHz), got #{frequency}"
|
|
220
|
+
exit 1
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
if signal && signal != "false"
|
|
224
|
+
unless RUBY_PLATFORM =~ /linux/
|
|
225
|
+
$stderr.puts "Error: signal mode is only supported on Linux"
|
|
226
|
+
exit 1
|
|
227
|
+
end
|
|
228
|
+
sig_num = signal.to_i
|
|
229
|
+
uncatchable = [Signal.list["KILL"], Signal.list["STOP"]].compact
|
|
230
|
+
if uncatchable.include?(sig_num)
|
|
231
|
+
$stderr.puts "Error: signal #{sig_num} (#{Signal.signame(sig_num)}) cannot be caught; use a different signal"
|
|
232
|
+
exit 1
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
|
|
190
236
|
# Add lib dir to RUBYLIB so -rrperf can find the extension
|
|
191
237
|
lib_dir = File.expand_path("../lib", __dir__)
|
|
192
238
|
ENV["RUBYLIB"] = [lib_dir, ENV["RUBYLIB"]].compact.join(File::PATH_SEPARATOR)
|
|
@@ -198,10 +244,12 @@ ENV["RPERF_MODE"] = mode
|
|
|
198
244
|
ENV["RPERF_FORMAT"] = format if format
|
|
199
245
|
ENV["RPERF_VERBOSE"] = "1" if verbose
|
|
200
246
|
ENV["RPERF_SIGNAL"] = signal if signal
|
|
247
|
+
ENV["RPERF_AGGREGATE"] = "0" unless aggregate
|
|
201
248
|
|
|
202
|
-
if subcommand == "stat"
|
|
249
|
+
if subcommand == "stat" || subcommand == "exec"
|
|
203
250
|
ENV["RPERF_STAT"] = "1"
|
|
204
251
|
ENV["RPERF_STAT_COMMAND"] = ARGV.join(" ")
|
|
252
|
+
ENV["RPERF_STAT_REPORT"] = "1" if stat_report
|
|
205
253
|
end
|
|
206
254
|
|
|
207
255
|
exec(*ARGV)
|