fluent-plugin-perf-tools 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.rubocop.yml +26 -0
- data/.ruby-version +1 -0
- data/CHANGELOG.md +5 -0
- data/CODE_OF_CONDUCT.md +84 -0
- data/Gemfile +5 -0
- data/LICENSE.txt +21 -0
- data/README.md +43 -0
- data/Rakefile +17 -0
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/fluent-plugin-perf-tools.gemspec +48 -0
- data/lib/fluent/plugin/in_perf_tools.rb +42 -0
- data/lib/fluent/plugin/perf_tools/cachestat.rb +65 -0
- data/lib/fluent/plugin/perf_tools/command.rb +30 -0
- data/lib/fluent/plugin/perf_tools/version.rb +9 -0
- data/lib/fluent/plugin/perf_tools.rb +11 -0
- data/perf-tools/LICENSE +339 -0
- data/perf-tools/README.md +205 -0
- data/perf-tools/bin/bitesize +1 -0
- data/perf-tools/bin/cachestat +1 -0
- data/perf-tools/bin/execsnoop +1 -0
- data/perf-tools/bin/funccount +1 -0
- data/perf-tools/bin/funcgraph +1 -0
- data/perf-tools/bin/funcslower +1 -0
- data/perf-tools/bin/functrace +1 -0
- data/perf-tools/bin/iolatency +1 -0
- data/perf-tools/bin/iosnoop +1 -0
- data/perf-tools/bin/killsnoop +1 -0
- data/perf-tools/bin/kprobe +1 -0
- data/perf-tools/bin/opensnoop +1 -0
- data/perf-tools/bin/perf-stat-hist +1 -0
- data/perf-tools/bin/reset-ftrace +1 -0
- data/perf-tools/bin/syscount +1 -0
- data/perf-tools/bin/tcpretrans +1 -0
- data/perf-tools/bin/tpoint +1 -0
- data/perf-tools/bin/uprobe +1 -0
- data/perf-tools/deprecated/README.md +1 -0
- data/perf-tools/deprecated/execsnoop-proc +150 -0
- data/perf-tools/deprecated/execsnoop-proc.8 +80 -0
- data/perf-tools/deprecated/execsnoop-proc_example.txt +46 -0
- data/perf-tools/disk/bitesize +175 -0
- data/perf-tools/examples/bitesize_example.txt +63 -0
- data/perf-tools/examples/cachestat_example.txt +58 -0
- data/perf-tools/examples/execsnoop_example.txt +153 -0
- data/perf-tools/examples/funccount_example.txt +126 -0
- data/perf-tools/examples/funcgraph_example.txt +2178 -0
- data/perf-tools/examples/funcslower_example.txt +110 -0
- data/perf-tools/examples/functrace_example.txt +341 -0
- data/perf-tools/examples/iolatency_example.txt +350 -0
- data/perf-tools/examples/iosnoop_example.txt +302 -0
- data/perf-tools/examples/killsnoop_example.txt +62 -0
- data/perf-tools/examples/kprobe_example.txt +379 -0
- data/perf-tools/examples/opensnoop_example.txt +47 -0
- data/perf-tools/examples/perf-stat-hist_example.txt +149 -0
- data/perf-tools/examples/reset-ftrace_example.txt +88 -0
- data/perf-tools/examples/syscount_example.txt +297 -0
- data/perf-tools/examples/tcpretrans_example.txt +93 -0
- data/perf-tools/examples/tpoint_example.txt +210 -0
- data/perf-tools/examples/uprobe_example.txt +321 -0
- data/perf-tools/execsnoop +292 -0
- data/perf-tools/fs/cachestat +167 -0
- data/perf-tools/images/perf-tools_2016.png +0 -0
- data/perf-tools/iolatency +296 -0
- data/perf-tools/iosnoop +296 -0
- data/perf-tools/kernel/funccount +146 -0
- data/perf-tools/kernel/funcgraph +259 -0
- data/perf-tools/kernel/funcslower +248 -0
- data/perf-tools/kernel/functrace +192 -0
- data/perf-tools/kernel/kprobe +270 -0
- data/perf-tools/killsnoop +263 -0
- data/perf-tools/man/man8/bitesize.8 +70 -0
- data/perf-tools/man/man8/cachestat.8 +111 -0
- data/perf-tools/man/man8/execsnoop.8 +104 -0
- data/perf-tools/man/man8/funccount.8 +76 -0
- data/perf-tools/man/man8/funcgraph.8 +166 -0
- data/perf-tools/man/man8/funcslower.8 +129 -0
- data/perf-tools/man/man8/functrace.8 +123 -0
- data/perf-tools/man/man8/iolatency.8 +116 -0
- data/perf-tools/man/man8/iosnoop.8 +169 -0
- data/perf-tools/man/man8/killsnoop.8 +100 -0
- data/perf-tools/man/man8/kprobe.8 +162 -0
- data/perf-tools/man/man8/opensnoop.8 +113 -0
- data/perf-tools/man/man8/perf-stat-hist.8 +111 -0
- data/perf-tools/man/man8/reset-ftrace.8 +49 -0
- data/perf-tools/man/man8/syscount.8 +96 -0
- data/perf-tools/man/man8/tcpretrans.8 +93 -0
- data/perf-tools/man/man8/tpoint.8 +140 -0
- data/perf-tools/man/man8/uprobe.8 +168 -0
- data/perf-tools/misc/perf-stat-hist +223 -0
- data/perf-tools/net/tcpretrans +311 -0
- data/perf-tools/opensnoop +280 -0
- data/perf-tools/syscount +192 -0
- data/perf-tools/system/tpoint +232 -0
- data/perf-tools/tools/reset-ftrace +123 -0
- data/perf-tools/user/uprobe +390 -0
- metadata +349 -0
@@ -0,0 +1,93 @@
|
|
1
|
+
.TH tcpretrans 8 "2014-07-31" "USER COMMANDS"
|
2
|
+
.SH NAME
|
3
|
+
tcpretrans \- show TCP retransmits, with address and other details. Uses Linux ftrace.
|
4
|
+
.SH SYNOPSIS
|
5
|
+
.B tcpretrans
|
6
|
+
[\-hsp]
|
7
|
+
.SH DESCRIPTION
|
8
|
+
This traces TCP retransmits that are sent by the system tcpretrans is executed
|
9
|
+
from, showing address, port, and TCP state information,
|
10
|
+
and sometimes the PID (although usually not, since retransmits are usually
|
11
|
+
sent by the kernel on timeout events). To keep overhead low, only
|
12
|
+
tcp_retransmit_skb() kernel calls are traced (this does not trace every packet).
|
13
|
+
|
14
|
+
This was written as a proof of concept for ftrace, for older Linux systems,
|
15
|
+
and without kernel debuginfo. It uses dynamic tracing of tcp_retransmit_skb(),
|
16
|
+
and reads /proc/net/tcp for socket details. Its use of dynamic tracing and
|
17
|
+
CPU registers is an unstable platform-specific workaround, and may require
|
18
|
+
modifications to work on different kernels and platforms. This would be better
|
19
|
+
written using a tracer such as SystemTap, and will likely be rewritten in the
|
20
|
+
future when certain tracing features are added to the Linux kernel.
|
21
|
+
|
22
|
+
When \-l is used, this also uses dynamic tracing of tcp_send_loss_probe() and
|
23
|
+
a register.
|
24
|
+
|
25
|
+
Currently only IPv4 is supported, on x86_64. If you try this on a different
|
26
|
+
architecture, you'll likely need to adjust the register locations (search
|
27
|
+
for %di).
|
28
|
+
|
29
|
+
Since this uses ftrace, only the root user can use this tool.
|
30
|
+
.SH REQUIREMENTS
|
31
|
+
FTRACE and KPROBE CONFIG, tcp_retransmit_skb() kernel function.
|
32
|
+
You may have these already have these on recent kernels. And Perl.
|
33
|
+
TCP tail loss probes were added in Linux 3.10.
|
34
|
+
.SH OPTIONS
|
35
|
+
.TP
|
36
|
+
\-h
|
37
|
+
Print usage message.
|
38
|
+
.TP
|
39
|
+
\-s
|
40
|
+
Include kernel stack traces.
|
41
|
+
.TP
|
42
|
+
\-l
|
43
|
+
Include TCP tail loss probes.
|
44
|
+
.SH EXAMPLES
|
45
|
+
.TP
|
46
|
+
Trace TCP retransmits
|
47
|
+
#
|
48
|
+
.B tcpretrans
|
49
|
+
.TP
|
50
|
+
TIME
|
51
|
+
Time of retransmit (may be rounded up to the nearest second).
|
52
|
+
.TP
|
53
|
+
PID
|
54
|
+
Process ID that was on-CPU. This is less useful than it might sound, as it
|
55
|
+
may usually be 0, for the kernel, for timer-based retransmits.
|
56
|
+
.TP
|
57
|
+
LADDR
|
58
|
+
Local address.
|
59
|
+
.TP
|
60
|
+
LPORT
|
61
|
+
Local port.
|
62
|
+
.TP
|
63
|
+
\-\-
|
64
|
+
Packet type: "R>" for retransmit, and "L>" for tail loss probe.
|
65
|
+
.TP
|
66
|
+
RADDR
|
67
|
+
Remote address.
|
68
|
+
.TP
|
69
|
+
RPORT
|
70
|
+
Remote port.
|
71
|
+
.TP
|
72
|
+
STATE
|
73
|
+
TCP session state.
|
74
|
+
.SH OVERHEAD
|
75
|
+
The CPU overhead is relative to the rate of TCP retransmits, and is
|
76
|
+
designed to be low as this does not examine every packet. Once per second the
|
77
|
+
/proc/net/tcp file is read, and a buffer of retransmit trace events is
|
78
|
+
retrieved from the kernel and processed.
|
79
|
+
.SH SOURCE
|
80
|
+
This is from the perf-tools collection.
|
81
|
+
.IP
|
82
|
+
https://github.com/brendangregg/perf-tools
|
83
|
+
.PP
|
84
|
+
Also look under the examples directory for a text file containing example
|
85
|
+
usage, output, and commentary for this tool.
|
86
|
+
.SH OS
|
87
|
+
Linux
|
88
|
+
.SH STABILITY
|
89
|
+
Unstable - in development.
|
90
|
+
.SH AUTHOR
|
91
|
+
Brendan Gregg
|
92
|
+
.SH SEE ALSO
|
93
|
+
tcpdump(1)
|
@@ -0,0 +1,140 @@
|
|
1
|
+
.TH tpoint 8 "2014-07-20" "USER COMMANDS"
|
2
|
+
.SH NAME
|
3
|
+
tpoint \- trace a given tracepoint. Static tracing. Uses Linux ftrace.
|
4
|
+
.SH SYNOPSIS
|
5
|
+
.B tpoint
|
6
|
+
[\-hHsv] [\-d secs] [\-p PID] [\-L TID] tracepoint [filter]
|
7
|
+
|
8
|
+
.B tpoint
|
9
|
+
\-l
|
10
|
+
.SH DESCRIPTION
|
11
|
+
This will enable a given tracepoint, print events, then disable the tracepoint
|
12
|
+
when the program ends. This is like a simple version of the "perf" command for
|
13
|
+
printing live tracepoint events only. Wildcards are currently not supported.
|
14
|
+
If for any reason tpoint(8) is insufficient, use the more powerful perf
|
15
|
+
command for tracing tracepoints instead.
|
16
|
+
|
17
|
+
Beware of feedback loops: tracing tcp functions over an ssh session,
|
18
|
+
or writing ext4 events to an ext4 file system. For the former, tcp
|
19
|
+
trace data could be redirected to a file (as in the usage message). For
|
20
|
+
the latter, trace to the screen or a different file system.
|
21
|
+
|
22
|
+
Since this uses ftrace, only the root user can use this tool.
|
23
|
+
.SH REQUIREMENTS
|
24
|
+
FTRACE CONFIG and tracepoints, which you may already have enabled and available
|
25
|
+
on recent kernels.
|
26
|
+
.SH OPTIONS
|
27
|
+
.TP
|
28
|
+
\-d seconds
|
29
|
+
Set the duration of tracing, in seconds. Trace output will be buffered and
|
30
|
+
printed at the end. This also reduces overheads by buffering in-kernel,
|
31
|
+
instead of printing events as they occur.
|
32
|
+
|
33
|
+
The ftrace buffer has a fixed size per-CPU (see
|
34
|
+
/sys/kernel/debug/tracing/buffer_size_kb). If you think events are missing,
|
35
|
+
try increasing that size.
|
36
|
+
.TP
|
37
|
+
\-h
|
38
|
+
Print usage message.
|
39
|
+
.TP
|
40
|
+
\-H
|
41
|
+
Print column headers.
|
42
|
+
.TP
|
43
|
+
\-l
|
44
|
+
List tracepoints only.
|
45
|
+
.TP
|
46
|
+
\-s
|
47
|
+
Print kernel stack traces after each event.
|
48
|
+
.TP
|
49
|
+
\-v
|
50
|
+
Show the tpoint format file only (do not trace), identifying possible variables
|
51
|
+
for use in a custom filter.
|
52
|
+
.TP
|
53
|
+
\-p PID
|
54
|
+
Only trace kernel functions when this process ID is on-CPU.
|
55
|
+
.TP
|
56
|
+
\-L TID
|
57
|
+
Only trace kernel functions when this thread ID is on-CPU.
|
58
|
+
.TP
|
59
|
+
tracepoint
|
60
|
+
A tracepoint name. Eg, block:block_rq_issue. See the EXAMPLES section.
|
61
|
+
.TP
|
62
|
+
filter
|
63
|
+
An ftrace filter definition.
|
64
|
+
.SH EXAMPLES
|
65
|
+
.TP
|
66
|
+
List tracepoints containing "open":
|
67
|
+
#
|
68
|
+
.B tpoint -l | grep open
|
69
|
+
.TP
|
70
|
+
Trace open() syscall entry:
|
71
|
+
#
|
72
|
+
.B tpoint syscalls:sys_enter_open
|
73
|
+
.TP
|
74
|
+
Trace open() syscall entry, showing column headers:
|
75
|
+
#
|
76
|
+
.B tpoint -H syscalls:sys_enter_open
|
77
|
+
.TP
|
78
|
+
Trace block I/O issue:
|
79
|
+
#
|
80
|
+
.B tpoint block:block_rq_issue
|
81
|
+
.TP
|
82
|
+
Trace block I/O issue with stack traces:
|
83
|
+
#
|
84
|
+
.B tpoint \-s block:block_rq_issue
|
85
|
+
.SH FIELDS
|
86
|
+
The output format depends on the kernel version, and headings can be printed
|
87
|
+
using \-H. The format is the same as the ftrace function trace format, described
|
88
|
+
in the kernel source under Documentation/trace/ftrace.txt.
|
89
|
+
|
90
|
+
Typical fields are:
|
91
|
+
.TP
|
92
|
+
TASK-PID
|
93
|
+
The process name (which could include dashes), a dash, and the process ID.
|
94
|
+
.TP
|
95
|
+
CPU#
|
96
|
+
The CPU ID, in brackets.
|
97
|
+
.TP
|
98
|
+
||||
|
99
|
+
Kernel state flags. For example, on Linux 3.16 these are for irqs-off,
|
100
|
+
need-resched, hardirq/softirq, and preempt-depth.
|
101
|
+
.TP
|
102
|
+
TIMESTAMP
|
103
|
+
Time of event, in seconds.
|
104
|
+
.TP
|
105
|
+
FUNCTION
|
106
|
+
Kernel function name.
|
107
|
+
.SH OVERHEAD
|
108
|
+
This can generate a lot of trace data quickly, depending on the
|
109
|
+
frequency of the traced events. Such data will cause performance overheads.
|
110
|
+
This also works without buffering by default, printing function events
|
111
|
+
as they happen (uses trace_pipe), context switching and consuming CPU to do
|
112
|
+
so. If needed, you can try the "\-d secs" option, which buffers events
|
113
|
+
instead, reducing overhead. If you think the buffer option is losing events,
|
114
|
+
try increasing the buffer size (buffer_size_kb).
|
115
|
+
|
116
|
+
Before using tpoint(8), you can use perf_events to count the rate of events
|
117
|
+
for the tracepoint of interest, to gauge overhead. For example:
|
118
|
+
|
119
|
+
.B perf stat \-e block:block_rq_issue \-a sleep 5
|
120
|
+
|
121
|
+
That counts the occurrences of the block:block_rq_issue tracepoint for
|
122
|
+
5 seconds.
|
123
|
+
|
124
|
+
Also consider using perf_events, which manages buffers differently and more
|
125
|
+
efficiently, for higher frequency applications.
|
126
|
+
.SH SOURCE
|
127
|
+
This is from the perf-tools collection:
|
128
|
+
.IP
|
129
|
+
https://github.com/brendangregg/perf-tools
|
130
|
+
.PP
|
131
|
+
Also look under the examples directory for a text file containing example
|
132
|
+
usage, output, and commentary for this tool.
|
133
|
+
.SH OS
|
134
|
+
Linux
|
135
|
+
.SH STABILITY
|
136
|
+
Unstable - in development.
|
137
|
+
.SH AUTHOR
|
138
|
+
Brendan Gregg
|
139
|
+
.SH SEE ALSO
|
140
|
+
functrace(8), funccount(8), perf(1)
|
@@ -0,0 +1,168 @@
|
|
1
|
+
.TH uprobe 8 "2014-07-20" "USER COMMANDS"
|
2
|
+
.SH NAME
|
3
|
+
uprobe \- trace a given uprobe definition. User-level dynamic tracing. Uses Linux ftrace. EXPERIMENTAL.
|
4
|
+
.SH SYNOPSIS
|
5
|
+
.B uprobe
|
6
|
+
[\-FhHsv] [\-d secs] [\-p PID] [\-L TID] {\-l target | uprobe_definition [filter]}
|
7
|
+
.SH DESCRIPTION
|
8
|
+
This will create, trace, then destroy a given uprobe definition. See
|
9
|
+
Documentation/trace/uprobetracer.txt in the Linux kernel source for the
|
10
|
+
syntax of a uprobe definition, and "uprobe -h" for examples. With this tool,
|
11
|
+
the probe alias is optional (it will default to something meaningful).
|
12
|
+
|
13
|
+
WARNING: This uses dynamic tracing of user-level functions, using some
|
14
|
+
relatively new kernel code. I have seen this cause target processes to fail,
|
15
|
+
either entering endless spin loops or crashing on illegal instructions. I
|
16
|
+
believe newer kernels (post 4.0) are relatively safer, but use caution. Test
|
17
|
+
in a lab environment, and know what you are doing, before use. Also consider
|
18
|
+
other (more developed) user-level tracers (perf_events, LTTng, etc.).
|
19
|
+
|
20
|
+
Use extreme caution with the raw address mode: eg, "p:libc:0xbf130". uprobe
|
21
|
+
does not check for instruction alignment, so tracing the wrong address (eg,
|
22
|
+
mid-way through a multi-byte instruction) will corrupt the target's memory.
|
23
|
+
Other tracers (eg, perf_events with debuginfo) check alignment.
|
24
|
+
|
25
|
+
Also beware of widespread tracing that interferes with the operation of the
|
26
|
+
system, eg, tracing libc:malloc, which by-default will trace _all_ processes.
|
27
|
+
|
28
|
+
I wrote this because I kept testing different custom uprobes at the command
|
29
|
+
line, and wanted a way to automate the steps. For generic user-level
|
30
|
+
tracing, use perf_events directly.
|
31
|
+
|
32
|
+
Since this uses ftrace, only the root user can use this tool.
|
33
|
+
.SH REQUIREMENTS
|
34
|
+
REQUIREMENTS: FTRACE and UPROBE CONFIG, which you may already have on recent
|
35
|
+
kernel versions, file(1), ldconfig(8), objdump(1), and some version of awk.
|
36
|
+
Also, currently only executes on Linux 4.0+ (see WARNING) unless -F is used.
|
37
|
+
.SH OPTIONS
|
38
|
+
.TP
|
39
|
+
\-F
|
40
|
+
Force. Trace despite kernel version warnings. Use on older kernels may expose
|
41
|
+
you to (since fixed) bugs, which can lock up or crash target processes, which
|
42
|
+
could also lock up the entire system. Test in a lab environment before use,
|
43
|
+
and consider other more developed user-level tracers (perf_events, LTTng,
|
44
|
+
etc.).
|
45
|
+
.TP
|
46
|
+
\-d seconds
|
47
|
+
Set the duration of tracing, in seconds. Trace output will be buffered and
|
48
|
+
printed at the end. This also reduces overheads by buffering in-kernel,
|
49
|
+
instead of printing events as they occur.
|
50
|
+
|
51
|
+
The ftrace buffer has a fixed size per-CPU (see
|
52
|
+
/sys/kernel/debug/tracing/buffer_size_kb). If you think events are missing,
|
53
|
+
try increasing that size.
|
54
|
+
.TP
|
55
|
+
\-h
|
56
|
+
Print usage message.
|
57
|
+
.TP
|
58
|
+
\-H
|
59
|
+
Print column headers.
|
60
|
+
.TP
|
61
|
+
\-s
|
62
|
+
Print user-level stack traces after each event. These are currently printed
|
63
|
+
in hex, and need post-processing to see user-level symbols (eg, addr2line;
|
64
|
+
I should automate that).
|
65
|
+
.TP
|
66
|
+
\-v
|
67
|
+
Show the uprobe format file only (do not trace), identifying possible variables
|
68
|
+
for use in a custom filter.
|
69
|
+
.TP
|
70
|
+
\-p PID
|
71
|
+
Only trace user-level functions when this process ID is on-CPU.
|
72
|
+
.TP
|
73
|
+
\-L TID
|
74
|
+
Only trace user-level functions when this thread ID is on-CPU.
|
75
|
+
.TP
|
76
|
+
uprobe_definition
|
77
|
+
A full uprobe definition, as documented by Documentation/trace/uprobetracer.txt
|
78
|
+
in the Linux kernel source. Note that the probe alias name is optional with
|
79
|
+
uprobe(8), and if not specified, it will default to something meaningful.
|
80
|
+
See the EXAMPLES section.
|
81
|
+
.TP
|
82
|
+
filter
|
83
|
+
An ftrace filter definition.
|
84
|
+
.SH EXAMPLES
|
85
|
+
These examples may need modification to match your target software function
|
86
|
+
names and platform's register usage. If using platform specific registers
|
87
|
+
becomes too painful in practice, consider a debuginfo-based tracer,
|
88
|
+
which can trace variables names instead (eg, perf_events).
|
89
|
+
.TP
|
90
|
+
trace readline() calls in all running "bash" executables:
|
91
|
+
#
|
92
|
+
.B uprobe p:bash:readline
|
93
|
+
.TP
|
94
|
+
trace readline() with explicit executable path:
|
95
|
+
#
|
96
|
+
.B uprobe p:/bin/bash:readline
|
97
|
+
.TP
|
98
|
+
trace the return of readline() with return value as a string:
|
99
|
+
#
|
100
|
+
.B uprobe 'r:bash:readline +0($retval):string'
|
101
|
+
.TP
|
102
|
+
trace sleep() calls in all running libc shared libraries:
|
103
|
+
#
|
104
|
+
.B uprobe p:libc:sleep
|
105
|
+
.TP
|
106
|
+
trace sleep() with register %di (x86):
|
107
|
+
#
|
108
|
+
.B uprobe 'p:libc:sleep %di'
|
109
|
+
.TP
|
110
|
+
trace this address (use caution: must be instruction aligned):
|
111
|
+
#
|
112
|
+
.B uprobe p:libc:0xbf130
|
113
|
+
.TP
|
114
|
+
trace gettimeofday() for PID 1182 only:
|
115
|
+
#
|
116
|
+
.B uprobe -p 1182 p:libc:gettimeofday
|
117
|
+
.TP
|
118
|
+
trace the return of fopen() only when it returns NULL:
|
119
|
+
#
|
120
|
+
.B uprobe 'r:libc:fopen file=$retval' 'file == 0'
|
121
|
+
.SH FIELDS
|
122
|
+
The output format depends on the kernel version, and headings can be printed
|
123
|
+
using \-H. The format is the same as the ftrace function trace format, described
|
124
|
+
in the kernel source under Documentation/trace/ftrace.txt.
|
125
|
+
|
126
|
+
Typical fields are:
|
127
|
+
.TP
|
128
|
+
TASK-PID
|
129
|
+
The process name (which could include dashes), a dash, and the process ID.
|
130
|
+
.TP
|
131
|
+
CPU#
|
132
|
+
The CPU ID, in brackets.
|
133
|
+
.TP
|
134
|
+
||||
|
135
|
+
Kernel state flags. For example, on Linux 3.16 these are for irqs-off,
|
136
|
+
need-resched, hardirq/softirq, and preempt-depth.
|
137
|
+
.TP
|
138
|
+
TIMESTAMP
|
139
|
+
Time of event, in seconds.
|
140
|
+
.TP
|
141
|
+
FUNCTION
|
142
|
+
User-level function name.
|
143
|
+
.SH OVERHEAD
|
144
|
+
This can generate a lot of trace data quickly, depending on the
|
145
|
+
frequency of the traced events. Such data will cause performance overheads.
|
146
|
+
This also works without buffering by default, printing function events
|
147
|
+
as they happen (uses trace_pipe), context switching and consuming CPU to do
|
148
|
+
so. If needed, you can try the "\-d secs" option, which buffers events
|
149
|
+
instead, reducing overhead. If you think the buffer option is losing events,
|
150
|
+
try increasing the buffer size (buffer_size_kb).
|
151
|
+
|
152
|
+
If you find a use for uprobe(8) where the overhead is prohibitive, consider
|
153
|
+
the same enabling using perf_events where overhead should be reduced.
|
154
|
+
.SH SOURCE
|
155
|
+
This is from the perf-tools collection:
|
156
|
+
.IP
|
157
|
+
https://github.com/brendangregg/perf-tools
|
158
|
+
.PP
|
159
|
+
Also look under the examples directory for a text file containing example
|
160
|
+
usage, output, and commentary for this tool.
|
161
|
+
.SH OS
|
162
|
+
Linux
|
163
|
+
.SH STABILITY
|
164
|
+
Unstable - in development.
|
165
|
+
.SH AUTHOR
|
166
|
+
Brendan Gregg
|
167
|
+
.SH SEE ALSO
|
168
|
+
kprobe(8)
|
@@ -0,0 +1,223 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
#
|
3
|
+
# perf-stat-hist - perf_events stat histogram hack.
|
4
|
+
# Written using Linux perf_events (aka "perf").
|
5
|
+
#
|
6
|
+
# This is a proof-of-concept showing in-kernel histogram summaries of a
|
7
|
+
# tracepoint variable.
|
8
|
+
#
|
9
|
+
# USAGE: perf-stat-hist [-h] [-b buckets|-P power] [-m max] tracepoint
|
10
|
+
# variable [seconds]
|
11
|
+
#
|
12
|
+
# Run "perf-stat-hist -h" for full usage.
|
13
|
+
#
|
14
|
+
# This uses multiple counting tracepoints with different filters, one for each
|
15
|
+
# histogram bucket. While this is summarized in-kernel, the use of multiple
|
16
|
+
# tracepoints does add addiitonal overhead, which is more evident if you change
|
17
|
+
# the power-of size from 4 to 2 (which creates more buckets). Hopefully, in the
|
18
|
+
# future this this functionality will be provided in an efficient way from
|
19
|
+
# perf_events itself, at which point this tool can be rewritten.
|
20
|
+
#
|
21
|
+
# From perf-tools: https://github.com/brendangregg/perf-tools
|
22
|
+
#
|
23
|
+
# COPYRIGHT: Copyright (c) 2014 Brendan Gregg.
|
24
|
+
#
|
25
|
+
# This program is free software; you can redistribute it and/or
|
26
|
+
# modify it under the terms of the GNU General Public License
|
27
|
+
# as published by the Free Software Foundation; either version 2
|
28
|
+
# of the License, or (at your option) any later version.
|
29
|
+
#
|
30
|
+
# This program is distributed in the hope that it will be useful,
|
31
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
32
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
33
|
+
# GNU General Public License for more details.
|
34
|
+
#
|
35
|
+
# You should have received a copy of the GNU General Public License
|
36
|
+
# along with this program; if not, write to the Free Software Foundation,
|
37
|
+
# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
38
|
+
#
|
39
|
+
# (http://www.gnu.org/copyleft/gpl.html)
|
40
|
+
#
|
41
|
+
# 30-Jun-2014 Brendan Gregg Created this.
|
42
|
+
|
43
|
+
opt_buckets=0; buckets=; opt_power=0; power=4; opt_max=0; max=$((1024 * 1024))
|
44
|
+
opt_filter=0; filter=; duration=0; debug=0
|
45
|
+
trap ':' INT QUIT TERM PIPE HUP
|
46
|
+
|
47
|
+
function usage {
|
48
|
+
cat <<-END >&2
|
49
|
+
USAGE: perf-stat-hist [-h] [-b buckets|-P power] [-m max] [-f filter]
|
50
|
+
tracepoint variable [seconds]
|
51
|
+
-b buckets # specify histogram bucket points
|
52
|
+
-P power # power-of (default is 4)
|
53
|
+
-m max # max value for power-of
|
54
|
+
-f filter # specify a filter
|
55
|
+
-h # this usage message
|
56
|
+
eg,
|
57
|
+
perf-stat-hist syscalls:sys_enter_read count 5
|
58
|
+
# read() request histogram, 5 seconds
|
59
|
+
perf-stat-hist syscalls:sys_exit_read ret 5
|
60
|
+
# read() return histogram, 5 seconds
|
61
|
+
perf-stat-hist -P 10 syscalls:sys_exit_read ret 5
|
62
|
+
# ... use power-of-10
|
63
|
+
perf-stat-hist -P 2 -m 1024 syscalls:sys_exit_read ret 5
|
64
|
+
# ... use power-of-2, max 1024
|
65
|
+
perf-stat-hist -b "10 50 100 500" syscalls:sys_exit_read ret 5
|
66
|
+
# ... histogram based on these bucket ranges
|
67
|
+
perf-stat-hist -b 10 syscalls:sys_exit_read ret 5
|
68
|
+
# ... bifurcate by the value 10 (lowest overhead)
|
69
|
+
perf-stat-hist -f 'rwbs == "WS"' block:block_rq_complete nr_sector 5
|
70
|
+
# ... synchronous writes histogram, 5 seconds
|
71
|
+
|
72
|
+
See the man page and example file for more info.
|
73
|
+
END
|
74
|
+
exit
|
75
|
+
}
|
76
|
+
|
77
|
+
function die {
|
78
|
+
echo >&2 "$@"
|
79
|
+
exit 1
|
80
|
+
}
|
81
|
+
|
82
|
+
### process options
|
83
|
+
while getopts b:hm:P:f: opt
|
84
|
+
do
|
85
|
+
case $opt in
|
86
|
+
b) opt_buckets=1; buckets=($OPTARG) ;;
|
87
|
+
P) opt_power=1; power=$OPTARG ;;
|
88
|
+
m) opt_max=1; max=$OPTARG ;;
|
89
|
+
f) opt_filter=1; filter="$OPTARG && " ;;
|
90
|
+
h|?) usage ;;
|
91
|
+
esac
|
92
|
+
done
|
93
|
+
shift $(( $OPTIND - 1 ))
|
94
|
+
(( $# < 2 )) && usage
|
95
|
+
tpoint=$1 # tracepoint
|
96
|
+
var=$2 # variable for histogram
|
97
|
+
duration=${3}
|
98
|
+
|
99
|
+
### option logic
|
100
|
+
(( opt_buckets && opt_power )) && die "ERROR: use either -b or -P"
|
101
|
+
(( opt_power && power < 2 )) && die "ERROR: -P power must be 2 or higher"
|
102
|
+
|
103
|
+
### check that tracepoint exists
|
104
|
+
if ! grep "^$tpoint\$" /sys/kernel/debug/tracing/available_events > /dev/null
|
105
|
+
then
|
106
|
+
echo >&2 "ERROR: tracepoint \"$tpoint\" not found. Exiting..."
|
107
|
+
[[ "$USER" != "root" ]] && echo >&2 "Not root user?"
|
108
|
+
exit 1
|
109
|
+
fi
|
110
|
+
|
111
|
+
### auto build power-of buckets
|
112
|
+
if (( !opt_buckets )); then
|
113
|
+
b=0
|
114
|
+
s=1
|
115
|
+
while (( s <= max )); do
|
116
|
+
b="$b $s"
|
117
|
+
(( s *= power ))
|
118
|
+
done
|
119
|
+
buckets=($b)
|
120
|
+
fi
|
121
|
+
|
122
|
+
### build list of tracepoints and filters for each histogram bucket
|
123
|
+
max=${buckets[${#buckets[@]} - 1]} # last element
|
124
|
+
((max_i = ${#buckets[*]} - 1))
|
125
|
+
tpoints="-e $tpoint --filter \"$filter $var < ${buckets[0]}\""
|
126
|
+
awkarray=
|
127
|
+
i=0
|
128
|
+
while (( i < max_i )); do
|
129
|
+
if (( i && ${buckets[$i]} <= ${buckets[$i - 1]} )); then
|
130
|
+
die "ERROR: bucket list must increase in size."
|
131
|
+
fi
|
132
|
+
tpoints="$tpoints -e $tpoint --filter \"$filter $var >= ${buckets[$i]} && "
|
133
|
+
tpoints="$tpoints $var < ${buckets[$i + 1]}\""
|
134
|
+
awkarray="$awkarray buckets[$i]=${buckets[$i]};"
|
135
|
+
(( i++ ))
|
136
|
+
done
|
137
|
+
awkarray="$awkarray buckets[$max_i]=${buckets[$max_i]};"
|
138
|
+
tpoints="$tpoints -e $tpoint --filter \"$filter $var >= ${buckets[$max_i]}\""
|
139
|
+
|
140
|
+
if (( debug )); then
|
141
|
+
echo buckets: ${buckets[*]}
|
142
|
+
echo tracepoints: $tpoints
|
143
|
+
echo awkarray: ${awkarray[*]}
|
144
|
+
fi
|
145
|
+
|
146
|
+
### prepare to run
|
147
|
+
if (( duration )); then
|
148
|
+
etext="for $duration seconds"
|
149
|
+
cmd="sleep $duration"
|
150
|
+
else
|
151
|
+
etext="until Ctrl-C"
|
152
|
+
cmd="sleep 999999"
|
153
|
+
fi
|
154
|
+
|
155
|
+
p_tpoint=$tpoint
|
156
|
+
if [ -n "$filter" ]; then
|
157
|
+
p_tpoint="$tpoint (Filter: ${filter%????})"
|
158
|
+
fi
|
159
|
+
|
160
|
+
if (( opt_buckets )); then
|
161
|
+
echo "Tracing $p_tpoint, specified buckets, $etext..."
|
162
|
+
else
|
163
|
+
echo "Tracing $p_tpoint, power-of-$power, max $max, $etext..."
|
164
|
+
fi
|
165
|
+
|
166
|
+
### run perf
|
167
|
+
out="-o /dev/stdout" # a workaround needed in linux 3.2; not by 3.4.15
|
168
|
+
stat=$(eval perf stat $tpoints -a $out $cmd 2>&1)
|
169
|
+
if (( $? != 0 )); then
|
170
|
+
echo >&2 "ERROR running perf:"
|
171
|
+
echo >&2 "$stat"
|
172
|
+
exit
|
173
|
+
fi
|
174
|
+
|
175
|
+
if (( debug )); then
|
176
|
+
echo raw output:
|
177
|
+
echo "$stat"
|
178
|
+
echo
|
179
|
+
fi
|
180
|
+
|
181
|
+
### find max value for ASCII histogram
|
182
|
+
most=$(echo "$stat" | awk -v tpoint=$tpoint '
|
183
|
+
$2 == tpoint { gsub(/,/, ""); if ($1 > m) { m = $1 } }
|
184
|
+
END { print m }'
|
185
|
+
)
|
186
|
+
|
187
|
+
### process output
|
188
|
+
echo
|
189
|
+
echo "$stat" | awk -v tpoint=$tpoint -v max_i=$max_i -v most=$most '
|
190
|
+
function star(sval, smax, swidth) {
|
191
|
+
stars = ""
|
192
|
+
if (smax == 0) return ""
|
193
|
+
for (si = 0; si < (swidth * sval / smax); si++) {
|
194
|
+
stars = stars "#"
|
195
|
+
}
|
196
|
+
return stars
|
197
|
+
}
|
198
|
+
BEGIN {
|
199
|
+
'"$awkarray"'
|
200
|
+
printf(" %-15s: %-8s %s\n", "Range", "Count",
|
201
|
+
"Distribution")
|
202
|
+
}
|
203
|
+
/Performance counter stats/ { i = -1 }
|
204
|
+
# reverse order of rule set is important
|
205
|
+
{ ok = 0 }
|
206
|
+
$2 == tpoint { num = $1; gsub(/,/, "", num); ok = 1 }
|
207
|
+
ok && i >= max_i {
|
208
|
+
printf(" %10d -> %-10s: %-8s |%-38s|\n", buckets[i],
|
209
|
+
"", num, star(num, most, 38))
|
210
|
+
next
|
211
|
+
}
|
212
|
+
ok && i >= 0 && i < max_i {
|
213
|
+
printf(" %10d -> %-10d: %-8s |%-38s|\n", buckets[i],
|
214
|
+
buckets[i+1] - 1, num, star(num, most, 38))
|
215
|
+
i++
|
216
|
+
next
|
217
|
+
}
|
218
|
+
ok && i == -1 {
|
219
|
+
printf(" %10s -> %-10d: %-8s |%-38s|\n", "",
|
220
|
+
buckets[0] - 1, num, star(num, most, 38))
|
221
|
+
i++
|
222
|
+
}
|
223
|
+
'
|