fluent-plugin-perf-tools 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.rubocop.yml +26 -0
- data/.ruby-version +1 -0
- data/CHANGELOG.md +5 -0
- data/CODE_OF_CONDUCT.md +84 -0
- data/Gemfile +5 -0
- data/LICENSE.txt +21 -0
- data/README.md +43 -0
- data/Rakefile +17 -0
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/fluent-plugin-perf-tools.gemspec +48 -0
- data/lib/fluent/plugin/in_perf_tools.rb +42 -0
- data/lib/fluent/plugin/perf_tools/cachestat.rb +65 -0
- data/lib/fluent/plugin/perf_tools/command.rb +30 -0
- data/lib/fluent/plugin/perf_tools/version.rb +9 -0
- data/lib/fluent/plugin/perf_tools.rb +11 -0
- data/perf-tools/LICENSE +339 -0
- data/perf-tools/README.md +205 -0
- data/perf-tools/bin/bitesize +1 -0
- data/perf-tools/bin/cachestat +1 -0
- data/perf-tools/bin/execsnoop +1 -0
- data/perf-tools/bin/funccount +1 -0
- data/perf-tools/bin/funcgraph +1 -0
- data/perf-tools/bin/funcslower +1 -0
- data/perf-tools/bin/functrace +1 -0
- data/perf-tools/bin/iolatency +1 -0
- data/perf-tools/bin/iosnoop +1 -0
- data/perf-tools/bin/killsnoop +1 -0
- data/perf-tools/bin/kprobe +1 -0
- data/perf-tools/bin/opensnoop +1 -0
- data/perf-tools/bin/perf-stat-hist +1 -0
- data/perf-tools/bin/reset-ftrace +1 -0
- data/perf-tools/bin/syscount +1 -0
- data/perf-tools/bin/tcpretrans +1 -0
- data/perf-tools/bin/tpoint +1 -0
- data/perf-tools/bin/uprobe +1 -0
- data/perf-tools/deprecated/README.md +1 -0
- data/perf-tools/deprecated/execsnoop-proc +150 -0
- data/perf-tools/deprecated/execsnoop-proc.8 +80 -0
- data/perf-tools/deprecated/execsnoop-proc_example.txt +46 -0
- data/perf-tools/disk/bitesize +175 -0
- data/perf-tools/examples/bitesize_example.txt +63 -0
- data/perf-tools/examples/cachestat_example.txt +58 -0
- data/perf-tools/examples/execsnoop_example.txt +153 -0
- data/perf-tools/examples/funccount_example.txt +126 -0
- data/perf-tools/examples/funcgraph_example.txt +2178 -0
- data/perf-tools/examples/funcslower_example.txt +110 -0
- data/perf-tools/examples/functrace_example.txt +341 -0
- data/perf-tools/examples/iolatency_example.txt +350 -0
- data/perf-tools/examples/iosnoop_example.txt +302 -0
- data/perf-tools/examples/killsnoop_example.txt +62 -0
- data/perf-tools/examples/kprobe_example.txt +379 -0
- data/perf-tools/examples/opensnoop_example.txt +47 -0
- data/perf-tools/examples/perf-stat-hist_example.txt +149 -0
- data/perf-tools/examples/reset-ftrace_example.txt +88 -0
- data/perf-tools/examples/syscount_example.txt +297 -0
- data/perf-tools/examples/tcpretrans_example.txt +93 -0
- data/perf-tools/examples/tpoint_example.txt +210 -0
- data/perf-tools/examples/uprobe_example.txt +321 -0
- data/perf-tools/execsnoop +292 -0
- data/perf-tools/fs/cachestat +167 -0
- data/perf-tools/images/perf-tools_2016.png +0 -0
- data/perf-tools/iolatency +296 -0
- data/perf-tools/iosnoop +296 -0
- data/perf-tools/kernel/funccount +146 -0
- data/perf-tools/kernel/funcgraph +259 -0
- data/perf-tools/kernel/funcslower +248 -0
- data/perf-tools/kernel/functrace +192 -0
- data/perf-tools/kernel/kprobe +270 -0
- data/perf-tools/killsnoop +263 -0
- data/perf-tools/man/man8/bitesize.8 +70 -0
- data/perf-tools/man/man8/cachestat.8 +111 -0
- data/perf-tools/man/man8/execsnoop.8 +104 -0
- data/perf-tools/man/man8/funccount.8 +76 -0
- data/perf-tools/man/man8/funcgraph.8 +166 -0
- data/perf-tools/man/man8/funcslower.8 +129 -0
- data/perf-tools/man/man8/functrace.8 +123 -0
- data/perf-tools/man/man8/iolatency.8 +116 -0
- data/perf-tools/man/man8/iosnoop.8 +169 -0
- data/perf-tools/man/man8/killsnoop.8 +100 -0
- data/perf-tools/man/man8/kprobe.8 +162 -0
- data/perf-tools/man/man8/opensnoop.8 +113 -0
- data/perf-tools/man/man8/perf-stat-hist.8 +111 -0
- data/perf-tools/man/man8/reset-ftrace.8 +49 -0
- data/perf-tools/man/man8/syscount.8 +96 -0
- data/perf-tools/man/man8/tcpretrans.8 +93 -0
- data/perf-tools/man/man8/tpoint.8 +140 -0
- data/perf-tools/man/man8/uprobe.8 +168 -0
- data/perf-tools/misc/perf-stat-hist +223 -0
- data/perf-tools/net/tcpretrans +311 -0
- data/perf-tools/opensnoop +280 -0
- data/perf-tools/syscount +192 -0
- data/perf-tools/system/tpoint +232 -0
- data/perf-tools/tools/reset-ftrace +123 -0
- data/perf-tools/user/uprobe +390 -0
- metadata +349 -0
@@ -0,0 +1,321 @@
|
|
1
|
+
Demonstrations of uprobe, the Linux ftrace version.
|
2
|
+
|
3
|
+
Trace the readline() function from all processes named "bash":
|
4
|
+
|
5
|
+
# ./uprobe p:bash:readline
|
6
|
+
Tracing uprobe readline (p:readline /bin/bash:0x8db60). Ctrl-C to end.
|
7
|
+
bash-11886 [003] d... 19601233.618462: readline: (0x48db60)
|
8
|
+
bash-11886 [003] d... 19601235.152067: readline: (0x48db60)
|
9
|
+
bash-11915 [003] d... 19601238.976244: readline: (0x48db60)
|
10
|
+
^C
|
11
|
+
Ending tracing...
|
12
|
+
|
13
|
+
readline() is the bash shell's function for reading interactive input, and
|
14
|
+
a line is printed each time I entered commands in separate bash shells.
|
15
|
+
The line contains default ftrace columns: the process name, "-", and PID;
|
16
|
+
the CPU, flags, a timestamp (in units of seconds), the probe name, then
|
17
|
+
other arguments. These columns are documented in the kernel source, under
|
18
|
+
Documentation/trace/ftrace.txt.
|
19
|
+
|
20
|
+
The first line of output is informational, and shows what uprobe is really
|
21
|
+
doing: it turned "bash" into "/bin/bash", using a $PATH lookup (via which(1)).
|
22
|
+
It then turned the "readline" symbol into 0x8db60, using objdump(1) for
|
23
|
+
symbol lookups.
|
24
|
+
|
25
|
+
Note that this traces _all_ bash processes simultaneously.
|
26
|
+
|
27
|
+
|
28
|
+
Tracing PID 11886 only:
|
29
|
+
|
30
|
+
# ./uprobe -p 11886 p:bash:readline
|
31
|
+
Tracing uprobe readline (p:readline /bin/bash:0x8db60). Ctrl-C to end.
|
32
|
+
bash-11886 [002] d... 19601657.753893: readline: (0x48db60)
|
33
|
+
bash-11886 [002] d... 19601658.246613: readline: (0x48db60)
|
34
|
+
bash-11886 [002] d... 19601658.386666: readline: (0x48db60)
|
35
|
+
bash-11886 [002] d... 19601661.415952: readline: (0x48db60)
|
36
|
+
^C
|
37
|
+
Ending tracing...
|
38
|
+
|
39
|
+
This may be important if you are tracing shared library functions, and only care
|
40
|
+
about one target process.
|
41
|
+
|
42
|
+
|
43
|
+
You can specify the full path to a binary to trace:
|
44
|
+
|
45
|
+
# ./uprobe p:/bin/bash:readline
|
46
|
+
Tracing uprobe readline (p:readline /bin/bash:0x8db60). Ctrl-C to end.
|
47
|
+
bash-11886 [002] d... 19601746.902461: readline: (0x48db60)
|
48
|
+
bash-11886 [002] d... 19601749.543485: readline: (0x48db60)
|
49
|
+
bash-11886 [001] d... 19601749.702369: readline: (0x48db60)
|
50
|
+
^C
|
51
|
+
Ending tracing...
|
52
|
+
|
53
|
+
This might be useful if uprobe picked the wrong binary to trace, as shown by
|
54
|
+
the informational line, and you wanted to specify it directly. It is also useful
|
55
|
+
for tracing binaries not in the $PATH, which uprobe can't otherwise find.
|
56
|
+
|
57
|
+
|
58
|
+
Use -l to list symbols available to trace; eg, searching for functions
|
59
|
+
containing "readline" in bash:
|
60
|
+
|
61
|
+
# ./uprobe -l bash | grep readline
|
62
|
+
initialize_readline
|
63
|
+
pcomp_set_readline_variables
|
64
|
+
posix_readline_initialize
|
65
|
+
readline
|
66
|
+
readline_internal_char
|
67
|
+
readline_internal_setup
|
68
|
+
readline_internal_teardown
|
69
|
+
|
70
|
+
|
71
|
+
Tracing the return of readline() with return value as a string:
|
72
|
+
|
73
|
+
# ./uprobe 'r:bash:readline +0($retval):string'
|
74
|
+
Tracing uprobe readline (r:readline /bin/bash:0x8db60 +0($retval):string). Ctrl-C to end.
|
75
|
+
bash-11886 [003] d... 19601837.001935: readline: (0x41e876 <- 0x48db60) arg1="ls -l"
|
76
|
+
bash-11886 [002] d... 19601851.008409: readline: (0x41e876 <- 0x48db60) arg1="echo "hello world""
|
77
|
+
bash-11886 [002] d... 19601854.099730: readline: (0x41e876 <- 0x48db60) arg1="df -h"
|
78
|
+
bash-11886 [002] d... 19601858.805740: readline: (0x41e876 <- 0x48db60) arg1="cd .."
|
79
|
+
bash-11886 [003] d... 19601898.378753: readline: (0x41e876 <- 0x48db60) arg1="foo bar"
|
80
|
+
^C
|
81
|
+
Ending tracing...
|
82
|
+
|
83
|
+
Now I can see the commands entered. Note that this traces what bash reads in,
|
84
|
+
even if the command eventually fails. Eg, the last command "foo bar" didn't
|
85
|
+
work (No command 'foo' found).
|
86
|
+
|
87
|
+
Note that this invocation now uses "r:" at the start of the probe description,
|
88
|
+
instead of "p:". r is for return probes, p for entry probes.
|
89
|
+
|
90
|
+
|
91
|
+
Tracing sleep() calls in all running libc shared libraries:
|
92
|
+
|
93
|
+
# ./uprobe p:libc:sleep
|
94
|
+
Tracing uprobe sleep (p:sleep /lib/x86_64-linux-gnu/libc-2.15.so:0xbf130). Ctrl-C to end.
|
95
|
+
svscan-2134 [000] d... 19602402.959904: sleep: (0x7f2dba562130)
|
96
|
+
cron-923 [000] d... 19602404.640507: sleep: (0x7f3e26d9e130)
|
97
|
+
cron-923 [002] d... 19602404.655232: sleep: (0x7f3e26d9e130)
|
98
|
+
cron-923 [002] d... 19602405.189271: sleep: (0x7f3e26d9e130)
|
99
|
+
svscan-2134 [000] d... 19602407.959947: sleep: (0x7f2dba562130)
|
100
|
+
[...]
|
101
|
+
|
102
|
+
This shows different programs calling sleep -- likely threads waiting for work.
|
103
|
+
|
104
|
+
I ran a "sleep 1" command in a bash shell, which wasn't seen above: probably
|
105
|
+
using a different sleep library call, which I'd need to trace separately.
|
106
|
+
|
107
|
+
|
108
|
+
Including headers (-H):
|
109
|
+
|
110
|
+
# ./uprobe -H p:libc:sleep
|
111
|
+
Tracing uprobe sleep (p:sleep /lib/x86_64-linux-gnu/libc-2.15.so:0xbf130). Ctrl-C to end.
|
112
|
+
# tracer: nop
|
113
|
+
#
|
114
|
+
# entries-in-buffer/entries-written: 0/0 #P:4
|
115
|
+
#
|
116
|
+
# _-----=> irqs-off
|
117
|
+
# / _----=> need-resched
|
118
|
+
# | / _---=> hardirq/softirq
|
119
|
+
# || / _--=> preempt-depth
|
120
|
+
# ||| / delay
|
121
|
+
# TASK-PID CPU# |||| TIMESTAMP FUNCTION
|
122
|
+
# | | | |||| | |
|
123
|
+
svscan-2134 [000] d... 19603052.976770: sleep: (0x7f2dba562130)
|
124
|
+
svscan-2134 [002] d... 19603057.976927: sleep: (0x7f2dba562130)
|
125
|
+
[...]
|
126
|
+
|
127
|
+
These are documented in Documentation/trace/ftrace.txt.
|
128
|
+
|
129
|
+
|
130
|
+
Tracing sleep() with its argument (seconds):
|
131
|
+
|
132
|
+
# ./uprobe 'p:libc:sleep %di'
|
133
|
+
Tracing uprobe sleep (p:sleep /lib/x86_64-linux-gnu/libc-2.15.so:0xbf130 %di). Ctrl-C to end.
|
134
|
+
svscan-2134 [002] d... 19602517.962925: sleep: (0x7f2dba562130) arg1=0x5
|
135
|
+
svscan-2134 [002] d... 19602522.963082: sleep: (0x7f2dba562130) arg1=0x5
|
136
|
+
cron-923 [002] d... 19602524.187733: sleep: (0x7f3e26d9e130) arg1=0x3c
|
137
|
+
svscan-2134 [002] d... 19602527.963267: sleep: (0x7f2dba562130) arg1=0x5
|
138
|
+
[...]
|
139
|
+
|
140
|
+
So svcan was sleeping for 5 seconds, and cron for 60 seconds (0x3c = 60).
|
141
|
+
|
142
|
+
The argument is specified by its register, %di. This is platform dependent: %di
|
143
|
+
may only be meaningful on x86. If you're on a different architecture (eg, ARM),
|
144
|
+
you will probably need to use something else.
|
145
|
+
|
146
|
+
If working with registers is not for you, then consider tracing this using
|
147
|
+
perf_events with debuginfo installed: in which case you can use the variable
|
148
|
+
names. Or consider a different tracer.
|
149
|
+
|
150
|
+
|
151
|
+
Here is an example of the optional filter expression, to only trace the return
|
152
|
+
of fopen() when it failed and returned NULL (0):
|
153
|
+
|
154
|
+
# ./uprobe 'r:libc:fopen file=$retval' 'file == 0'
|
155
|
+
Tracing uprobe fopen (r:fopen /lib/x86_64-linux-gnu/libc-2.15.so:0x6e540 file=$retval). Ctrl-C to end.
|
156
|
+
prog1-23982 [000] d... 19602894.346872: fopen: (0x40051e <- 0x7f637867f540) file=0x0
|
157
|
+
^C
|
158
|
+
Ending tracing...
|
159
|
+
|
160
|
+
The argument $retval was given a vanity name "file", which was then tested in
|
161
|
+
the filter expression "file == 0".
|
162
|
+
|
163
|
+
|
164
|
+
Here's an example of tracing the MySQL server dispatch_command() function, along
|
165
|
+
with the query string (note: the %dx register is only valid for this
|
166
|
+
architecture and this software build):
|
167
|
+
|
168
|
+
# ./uprobe 'p:dispatch_command /opt/mysql/bin/mysqld:_Z16dispatch_command19enum_server_commandP3THDPcj +0(%dx):string'
|
169
|
+
Tracing uprobe dispatch_command (p:dispatch_command /opt/mysql/bin/mysqld:0x2dbd40 +0(%dx):string). Ctrl-C to end.
|
170
|
+
mysqld-2855 [001] d... 19956674.509085: dispatch_command: (0x6dbd40) arg1="show tables"
|
171
|
+
mysqld-2855 [001] d... 19956675.541155: dispatch_command: (0x6dbd40) arg1="SELECT * FROM numbers where number > 32000"
|
172
|
+
^C
|
173
|
+
Ending tracing...
|
174
|
+
|
175
|
+
The function name, "_Z16dispatch_command19enum_server_commandP3THDPcj", is the
|
176
|
+
C++ mangled symbol.
|
177
|
+
|
178
|
+
I can name the query string argument "cmd" then test it in a filter; eg, to only
|
179
|
+
match queries that begin with "SELECT":
|
180
|
+
|
181
|
+
# ./uprobe 'p:dispatch_command /opt/mysql/bin/mysqld:_Z16dispatch_command19enum_server_commandP3THDPcj cmd=+0(%dx):string' 'cmd ~ "SELECT*"'
|
182
|
+
Tracing uprobe dispatch_command (p:dispatch_command /opt/mysql/bin/mysqld:0x2dbd40 cmd=+0(%dx):string). Ctrl-C to end.
|
183
|
+
mysqld-2855 [001] d... 19956754.619958: dispatch_command: (0x6dbd40) cmd="SELECT * FROM numbers where number > 32000"
|
184
|
+
mysqld-2855 [001] d... 19956755.060125: dispatch_command: (0x6dbd40) cmd="SELECT * FROM numbers where number > 32000"
|
185
|
+
^C
|
186
|
+
Ending tracing...
|
187
|
+
|
188
|
+
|
189
|
+
Overhead is relative to the rate of events: a higher rate of traced events,
|
190
|
+
means uprobe costs higher overhead. If you are unsure of the rate of events,
|
191
|
+
you can capture a set number only, or trace for a limited duration only (covered
|
192
|
+
in the next example). To trace a set number only, you can pipe into head, eg:
|
193
|
+
|
194
|
+
# ./uprobe -p 11982 p:bash:sh_malloc | head -15
|
195
|
+
Tracing uprobe sh_malloc (p:sh_malloc /bin/bash:0xaafa0). Ctrl-C to end.
|
196
|
+
bash-11982 [001] d... 19643121.529484: sh_malloc: (0x4aafa0)
|
197
|
+
bash-11982 [001] d... 19643121.529493: sh_malloc: (0x4aafa0)
|
198
|
+
bash-11982 [001] d... 19643121.529506: sh_malloc: (0x4aafa0)
|
199
|
+
bash-11982 [001] d... 19643121.529510: sh_malloc: (0x4aafa0)
|
200
|
+
bash-11982 [001] d... 19643121.529519: sh_malloc: (0x4aafa0)
|
201
|
+
bash-11982 [001] d... 19643121.529521: sh_malloc: (0x4aafa0)
|
202
|
+
bash-11982 [001] d... 19643121.529523: sh_malloc: (0x4aafa0)
|
203
|
+
bash-11982 [001] d... 19643121.529525: sh_malloc: (0x4aafa0)
|
204
|
+
bash-11982 [001] d... 19643121.529531: sh_malloc: (0x4aafa0)
|
205
|
+
bash-11982 [001] d... 19643121.529533: sh_malloc: (0x4aafa0)
|
206
|
+
bash-11982 [001] d... 19643121.529536: sh_malloc: (0x4aafa0)
|
207
|
+
bash-11982 [001] d... 19643121.529541: sh_malloc: (0x4aafa0)
|
208
|
+
bash-11982 [001] d... 19643121.529546: sh_malloc: (0x4aafa0)
|
209
|
+
bash-11982 [001] d... 19643121.529549: sh_malloc: (0x4aafa0)
|
210
|
+
|
211
|
+
uprobe traps SIGPIPE, so that it properly exits and cleans up probes when used
|
212
|
+
in this fashion.
|
213
|
+
|
214
|
+
Note the timestamps: by examining the rate they are increasing, you can have
|
215
|
+
some estimation for the rate of events. In this case, the 15 events all
|
216
|
+
happened within the same millisecond (the timestamp column is in units of
|
217
|
+
seconds), which suggests these are frequent events.
|
218
|
+
|
219
|
+
|
220
|
+
The -d option can be used to specify a duration for tracing, which also causes
|
221
|
+
uprobe to perform in-kernel buffering, which reduces the overhead of tracing:
|
222
|
+
|
223
|
+
# ./uprobe -d 5 p:libc:gettimeofday
|
224
|
+
Tracing uprobe gettimeofday for 5 seconds (buffered)...
|
225
|
+
sleep-12743 [001] d... 19642858.943440: gettimeofday: (0x7f400138ac10)
|
226
|
+
rotatelog-12744 [000] d... 19642858.955665: gettimeofday: (0x7f0ba34ebc10)
|
227
|
+
rotatelog-12745 [003] d... 19642858.956425: gettimeofday: (0x7f1e6db20c10)
|
228
|
+
rotatelog-12744 [000] d... 19642858.956924: gettimeofday: (0x7f0ba34ebc10)
|
229
|
+
rotatelog-12745 [003] d... 19642858.957608: gettimeofday: (0x7f1e6db20c10)
|
230
|
+
rotatelog-12744 [001] d... 19642858.958005: gettimeofday: (0x7fd8a1d64c10)
|
231
|
+
rotatelog-12744 [003] d... 19642858.959496: gettimeofday: (0x7f9531acdc10)
|
232
|
+
mkdir-12746 [002] d... 19642858.959542: gettimeofday: (0x7fd539474c10)
|
233
|
+
chown-12747 [001] d... 19642858.961455: gettimeofday: (0x7ff5646afc10)
|
234
|
+
rotatelog-12745 [000] d... 19642858.963065: gettimeofday: (0x7f406aca7c10)
|
235
|
+
rotatelog-12745 [001] d... 19642858.964280: gettimeofday: (0x7f6548debc10)
|
236
|
+
rotatelog-12749 [000] d... 19642859.977462: gettimeofday: (0x7fecaf7e1c10)
|
237
|
+
rotatelog-12750 [003] d... 19642859.977697: gettimeofday: (0x7f821eb3cc10)
|
238
|
+
rotatelog-12749 [000] d... 19642859.978707: gettimeofday: (0x7fecaf7e1c10)
|
239
|
+
[...]
|
240
|
+
|
241
|
+
You will not see live output during the -d mode, as it is being buffered
|
242
|
+
in-kernel.
|
243
|
+
|
244
|
+
|
245
|
+
Tracing func_abc() in my test program, and including user-level stacks:
|
246
|
+
|
247
|
+
# ./uprobe -s p:/root/func_abc:func_c
|
248
|
+
Tracing uprobe func_c (p:func_c /root/func_abc:0x4f4). Ctrl-C to end.
|
249
|
+
func_abc-25394 [000] d... 19603250.054040: func_c: (0x4004f4)
|
250
|
+
func_abc-25394 [000] d... 19603250.054056: <user stack trace>
|
251
|
+
=> <00000000004004f4>
|
252
|
+
=> <0000000000400527>
|
253
|
+
=> <0000000000400537>
|
254
|
+
=> <00007fca9f0e376d>
|
255
|
+
func_abc-25394 [000] d... 19603251.054250: func_c: (0x4004f4)
|
256
|
+
func_abc-25394 [000] d... 19603251.054266: <user stack trace>
|
257
|
+
=> <00000000004004f4>
|
258
|
+
=> <0000000000400527>
|
259
|
+
=> <0000000000400537>
|
260
|
+
=> <00007fca9f0e376d>
|
261
|
+
^C
|
262
|
+
Ending tracing...
|
263
|
+
|
264
|
+
The output has the raw hex addresses. If this is too much of a nuisance, then
|
265
|
+
try tracing this using perf_events which should automate the translation.
|
266
|
+
|
267
|
+
It can get worse, eg:
|
268
|
+
|
269
|
+
l# ./uprobe -s p:bash:readline
|
270
|
+
Tracing uprobe readline (p:readline /bin/bash:0x8db60). Ctrl-C to end.
|
271
|
+
bash-11886 [002] d... 19603434.397818: readline: (0x48db60)
|
272
|
+
bash-11886 [002] d... 19603434.397832: <user stack trace>
|
273
|
+
=> <000000000048db60>
|
274
|
+
bash-11886 [002] d... 19603434.592500: readline: (0x48db60)
|
275
|
+
bash-11886 [002] d... 19603434.592510: <user stack trace>
|
276
|
+
=> <000000000048db60>
|
277
|
+
^C
|
278
|
+
Ending tracing...
|
279
|
+
|
280
|
+
Here the stack trace is missing (0x48db60 is the traced function, transposed
|
281
|
+
from the base load address). This is due to compiler optimizations. It can be
|
282
|
+
fixed by recompiling with -fno-omit-frame-pointer, or, using perf_events and
|
283
|
+
a different method of stack walking.
|
284
|
+
|
285
|
+
|
286
|
+
Use -h to print the USAGE message:
|
287
|
+
|
288
|
+
# ./uprobe -h
|
289
|
+
USAGE: uprobe [-FhHsv] [-d secs] [-p PID] [-L TID] {-l target |
|
290
|
+
uprobe_definition [filter]}
|
291
|
+
-F # force. trace despite warnings.
|
292
|
+
-d seconds # trace duration, and use buffers
|
293
|
+
-l target # list functions from this executable
|
294
|
+
-p PID # PID to match on events
|
295
|
+
-L TID # thread id to match on events
|
296
|
+
-v # view format file (don't trace)
|
297
|
+
-H # include column headers
|
298
|
+
-s # show user stack traces
|
299
|
+
-h # this usage message
|
300
|
+
|
301
|
+
Note that these examples may need modification to match your kernel
|
302
|
+
version's function names and platform's register usage.
|
303
|
+
eg,
|
304
|
+
# trace readline() calls in all running "bash" executables:
|
305
|
+
uprobe p:bash:readline
|
306
|
+
# trace readline() with explicit executable path:
|
307
|
+
uprobe p:/bin/bash:readline
|
308
|
+
# trace the return of readline() with return value as a string:
|
309
|
+
uprobe 'r:bash:readline +0($retval):string'
|
310
|
+
# trace sleep() calls in all running libc shared libraries:
|
311
|
+
uprobe p:libc:sleep
|
312
|
+
# trace sleep() with register %di (x86):
|
313
|
+
uprobe 'p:libc:sleep %di'
|
314
|
+
# trace this address (use caution: must be instruction aligned):
|
315
|
+
uprobe p:libc:0xbf130
|
316
|
+
# trace gettimeofday() for PID 1182 only:
|
317
|
+
uprobe -p 1182 p:libc:gettimeofday
|
318
|
+
# trace the return of fopen() only when it returns NULL:
|
319
|
+
uprobe 'r:libc:fopen file=$retval' 'file == 0'
|
320
|
+
|
321
|
+
See the man page and example file for more info.
|
@@ -0,0 +1,292 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
#
|
3
|
+
# execsnoop - trace process exec() with arguments.
|
4
|
+
# Written using Linux ftrace.
|
5
|
+
#
|
6
|
+
# This shows the execution of new processes, especially short-lived ones that
|
7
|
+
# can be missed by sampling tools such as top(1).
|
8
|
+
#
|
9
|
+
# USAGE: ./execsnoop [-hrt] [-n name]
|
10
|
+
#
|
11
|
+
# REQUIREMENTS: FTRACE and KPROBE CONFIG, sched:sched_process_fork tracepoint,
|
12
|
+
# and either the sys_execve, stub_execve or do_execve kernel function. You may
|
13
|
+
# already have these on recent kernels. And awk.
|
14
|
+
#
|
15
|
+
# This traces exec() from the fork()->exec() sequence, which means it won't
|
16
|
+
# catch new processes that only fork(). With the -r option, it will also catch
|
17
|
+
# processes that re-exec. It makes a best-effort attempt to retrieve the program
|
18
|
+
# arguments and PPID; if these are unavailable, 0 and "[?]" are printed
|
19
|
+
# respectively. There is also a limit to the number of arguments printed (by
|
20
|
+
# default, 8), which can be increased using -a.
|
21
|
+
#
|
22
|
+
# This implementation is designed to work on older kernel versions, and without
|
23
|
+
# kernel debuginfo. It works by dynamic tracing an execve kernel function to
|
24
|
+
# read the arguments from the %si register. The sys_execve function is tried
|
25
|
+
# first, then stub_execve and do_execve. The sched:sched_process_fork
|
26
|
+
# tracepoint is used to get the PPID. This program is a workaround that should be
|
27
|
+
# improved in the future when other kernel capabilities are made available. If
|
28
|
+
# you need a more reliable tool now, then consider other tracing alternatives
|
29
|
+
# (eg, SystemTap). This tool is really a proof of concept to see what ftrace can
|
30
|
+
# currently do.
|
31
|
+
#
|
32
|
+
# From perf-tools: https://github.com/brendangregg/perf-tools
|
33
|
+
#
|
34
|
+
# See the execsnoop(8) man page (in perf-tools) for more info.
|
35
|
+
#
|
36
|
+
# COPYRIGHT: Copyright (c) 2014 Brendan Gregg.
|
37
|
+
#
|
38
|
+
# This program is free software; you can redistribute it and/or
|
39
|
+
# modify it under the terms of the GNU General Public License
|
40
|
+
# as published by the Free Software Foundation; either version 2
|
41
|
+
# of the License, or (at your option) any later version.
|
42
|
+
#
|
43
|
+
# This program is distributed in the hope that it will be useful,
|
44
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
45
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
46
|
+
# GNU General Public License for more details.
|
47
|
+
#
|
48
|
+
# You should have received a copy of the GNU General Public License
|
49
|
+
# along with this program; if not, write to the Free Software Foundation,
|
50
|
+
# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
51
|
+
#
|
52
|
+
# (http://www.gnu.org/copyleft/gpl.html)
|
53
|
+
#
|
54
|
+
# 07-Jul-2014 Brendan Gregg Created this.
|
55
|
+
|
56
|
+
### default variables
|
57
|
+
tracing=/sys/kernel/debug/tracing
|
58
|
+
flock=/var/tmp/.ftrace-lock; wroteflock=0
|
59
|
+
opt_duration=0; duration=; opt_name=0; name=; opt_time=0; opt_reexec=0
|
60
|
+
opt_argc=0; argc=8; max_argc=16; ftext=
|
61
|
+
trap ':' INT QUIT TERM PIPE HUP # sends execution to end tracing section
|
62
|
+
|
63
|
+
function usage {
|
64
|
+
cat <<-END >&2
|
65
|
+
USAGE: execsnoop [-hrt] [-a argc] [-d secs] [name]
|
66
|
+
-d seconds # trace duration, and use buffers
|
67
|
+
-a argc # max args to show (default 8)
|
68
|
+
-r # include re-execs
|
69
|
+
-t # include time (seconds)
|
70
|
+
-h # this usage message
|
71
|
+
name # process name to match (REs allowed)
|
72
|
+
eg,
|
73
|
+
execsnoop # watch exec()s live (unbuffered)
|
74
|
+
execsnoop -d 1 # trace 1 sec (buffered)
|
75
|
+
execsnoop grep # trace process names containing grep
|
76
|
+
execsnoop 'udevd$' # process names ending in "udevd"
|
77
|
+
|
78
|
+
See the man page and example file for more info.
|
79
|
+
END
|
80
|
+
exit
|
81
|
+
}
|
82
|
+
|
83
|
+
function warn {
|
84
|
+
if ! eval "$@"; then
|
85
|
+
echo >&2 "WARNING: command failed \"$@\""
|
86
|
+
fi
|
87
|
+
}
|
88
|
+
|
89
|
+
function end {
|
90
|
+
# disable tracing
|
91
|
+
echo 2>/dev/null
|
92
|
+
echo "Ending tracing..." 2>/dev/null
|
93
|
+
cd $tracing
|
94
|
+
warn "echo 0 > events/kprobes/$kname/enable"
|
95
|
+
warn "echo 0 > events/sched/sched_process_fork/enable"
|
96
|
+
warn "echo -:$kname >> kprobe_events"
|
97
|
+
warn "echo > trace"
|
98
|
+
(( wroteflock )) && warn "rm $flock"
|
99
|
+
}
|
100
|
+
|
101
|
+
function die {
|
102
|
+
echo >&2 "$@"
|
103
|
+
exit 1
|
104
|
+
}
|
105
|
+
|
106
|
+
function edie {
|
107
|
+
# die with a quiet end()
|
108
|
+
echo >&2 "$@"
|
109
|
+
exec >/dev/null 2>&1
|
110
|
+
end
|
111
|
+
exit 1
|
112
|
+
}
|
113
|
+
|
114
|
+
### process options
|
115
|
+
while getopts a:d:hrt opt
|
116
|
+
do
|
117
|
+
case $opt in
|
118
|
+
a) opt_argc=1; argc=$OPTARG ;;
|
119
|
+
d) opt_duration=1; duration=$OPTARG ;;
|
120
|
+
r) opt_reexec=1 ;;
|
121
|
+
t) opt_time=1 ;;
|
122
|
+
h|?) usage ;;
|
123
|
+
esac
|
124
|
+
done
|
125
|
+
shift $(( $OPTIND - 1 ))
|
126
|
+
if (( $# )); then
|
127
|
+
opt_name=1
|
128
|
+
name=$1
|
129
|
+
shift
|
130
|
+
fi
|
131
|
+
(( $# )) && usage
|
132
|
+
|
133
|
+
### option logic
|
134
|
+
(( opt_pid && opt_name )) && die "ERROR: use either -p or -n."
|
135
|
+
(( opt_pid )) && ftext=" issued by PID $pid"
|
136
|
+
(( opt_name )) && ftext=" issued by process name \"$name\""
|
137
|
+
(( opt_file )) && ftext="$ftext for filenames containing \"$file\""
|
138
|
+
(( opt_argc && argc > max_argc )) && die "ERROR: max -a argc is $max_argc."
|
139
|
+
if (( opt_duration )); then
|
140
|
+
echo "Tracing exec()s$ftext for $duration seconds (buffered)..."
|
141
|
+
else
|
142
|
+
echo "Tracing exec()s$ftext. Ctrl-C to end."
|
143
|
+
fi
|
144
|
+
|
145
|
+
### select awk
|
146
|
+
if (( opt_duration )); then
|
147
|
+
[[ -x /usr/bin/mawk ]] && awk=mawk || awk=awk
|
148
|
+
else
|
149
|
+
# workarounds for mawk/gawk fflush behavior
|
150
|
+
if [[ -x /usr/bin/gawk ]]; then
|
151
|
+
awk=gawk
|
152
|
+
elif [[ -x /usr/bin/mawk ]]; then
|
153
|
+
awk="mawk -W interactive"
|
154
|
+
else
|
155
|
+
awk=awk
|
156
|
+
fi
|
157
|
+
fi
|
158
|
+
|
159
|
+
### check permissions
|
160
|
+
cd $tracing || die "ERROR: accessing tracing. Root user? Kernel has FTRACE?
|
161
|
+
debugfs mounted? (mount -t debugfs debugfs /sys/kernel/debug)"
|
162
|
+
|
163
|
+
### ftrace lock
|
164
|
+
[[ -e $flock ]] && die "ERROR: ftrace may be in use by PID $(cat $flock) $flock"
|
165
|
+
echo $$ > $flock || die "ERROR: unable to write $flock."
|
166
|
+
wroteflock=1
|
167
|
+
|
168
|
+
### build probe
|
169
|
+
if [[ -x /usr/bin/getconf ]]; then
|
170
|
+
bits=$(getconf LONG_BIT)
|
171
|
+
else
|
172
|
+
bits=64
|
173
|
+
[[ $(uname -m) == i* ]] && bits=32
|
174
|
+
fi
|
175
|
+
(( offset = bits / 8 ))
|
176
|
+
function makeprobe {
|
177
|
+
func=$1
|
178
|
+
kname=execsnoop_$func
|
179
|
+
kprobe="p:$kname $func"
|
180
|
+
i=0
|
181
|
+
while (( i < argc + 1 )); do
|
182
|
+
# p:kname do_execve +0(+0(%si)):string +0(+8(%si)):string ...
|
183
|
+
kprobe="$kprobe +0(+$(( i * offset ))(%si)):string"
|
184
|
+
(( i++ ))
|
185
|
+
done
|
186
|
+
}
|
187
|
+
# try in this order: sys_execve, stub_execve, do_execve
|
188
|
+
makeprobe sys_execve
|
189
|
+
|
190
|
+
### setup and begin tracing
|
191
|
+
echo nop > current_tracer
|
192
|
+
if ! echo $kprobe >> kprobe_events 2>/dev/null; then
|
193
|
+
makeprobe stub_execve
|
194
|
+
if ! echo $kprobe >> kprobe_events 2>/dev/null; then
|
195
|
+
makeprobe do_execve
|
196
|
+
if ! echo $kprobe >> kprobe_events 2>/dev/null; then
|
197
|
+
edie "ERROR: adding a kprobe for execve. Exiting."
|
198
|
+
fi
|
199
|
+
fi
|
200
|
+
fi
|
201
|
+
if ! echo 1 > events/kprobes/$kname/enable; then
|
202
|
+
edie "ERROR: enabling kprobe for execve. Exiting."
|
203
|
+
fi
|
204
|
+
if ! echo 1 > events/sched/sched_process_fork/enable; then
|
205
|
+
edie "ERROR: enabling sched:sched_process_fork tracepoint. Exiting."
|
206
|
+
fi
|
207
|
+
echo "Instrumenting $func"
|
208
|
+
(( opt_time )) && printf "%-16s " "TIMEs"
|
209
|
+
printf "%6s %6s %s\n" "PID" "PPID" "ARGS"
|
210
|
+
|
211
|
+
#
|
212
|
+
# Determine output format. It may be one of the following (newest first):
|
213
|
+
# TASK-PID CPU# |||| TIMESTAMP FUNCTION
|
214
|
+
# TASK-PID CPU# TIMESTAMP FUNCTION
|
215
|
+
# To differentiate between them, the number of header fields is counted,
|
216
|
+
# and an offset set, to skip the extra column when needed.
|
217
|
+
#
|
218
|
+
offset=$($awk 'BEGIN { o = 0; }
|
219
|
+
$1 == "#" && $2 ~ /TASK/ && NF == 6 { o = 1; }
|
220
|
+
$2 ~ /TASK/ { print o; exit }' trace)
|
221
|
+
|
222
|
+
### print trace buffer
|
223
|
+
warn "echo > trace"
|
224
|
+
( if (( opt_duration )); then
|
225
|
+
# wait then dump buffer
|
226
|
+
sleep $duration
|
227
|
+
cat -v trace
|
228
|
+
else
|
229
|
+
# print buffer live
|
230
|
+
cat -v trace_pipe
|
231
|
+
fi ) | $awk -v o=$offset -v opt_name=$opt_name -v name=$name \
|
232
|
+
-v opt_duration=$opt_duration -v opt_time=$opt_time -v kname=$kname \
|
233
|
+
-v opt_reexec=$opt_reexec '
|
234
|
+
# common fields
|
235
|
+
$1 != "#" {
|
236
|
+
# task name can contain dashes
|
237
|
+
comm = pid = $1
|
238
|
+
sub(/-[0-9][0-9]*/, "", comm)
|
239
|
+
sub(/.*-/, "", pid)
|
240
|
+
}
|
241
|
+
|
242
|
+
$1 != "#" && $(4+o) ~ /sched_process_fork/ {
|
243
|
+
cpid=$0
|
244
|
+
sub(/.* child_pid=/, "", cpid)
|
245
|
+
sub(/ .*/, "", cpid)
|
246
|
+
getppid[cpid] = pid
|
247
|
+
delete seen[pid]
|
248
|
+
}
|
249
|
+
|
250
|
+
$1 != "#" && $(4+o) ~ kname {
|
251
|
+
if (seen[pid])
|
252
|
+
next
|
253
|
+
if (opt_name && comm !~ name)
|
254
|
+
next
|
255
|
+
|
256
|
+
#
|
257
|
+
# examples:
|
258
|
+
# ... arg1="/bin/echo" arg2="1" arg3="2" arg4="3" ...
|
259
|
+
# ... arg1="sleep" arg2="2" arg3=(fault) arg4="" ...
|
260
|
+
# ... arg1="" arg2=(fault) arg3="" arg4="" ...
|
261
|
+
# the last example is uncommon, and may be a race.
|
262
|
+
#
|
263
|
+
if ($0 ~ /arg1=""/) {
|
264
|
+
args = comm " [?]"
|
265
|
+
} else {
|
266
|
+
args=$0
|
267
|
+
sub(/ arg[0-9]*=\(fault\).*/, "", args)
|
268
|
+
sub(/.*arg1="/, "", args)
|
269
|
+
gsub(/" arg[0-9]*="/, " ", args)
|
270
|
+
sub(/"$/, "", args)
|
271
|
+
if ($0 !~ /\(fault\)/)
|
272
|
+
args = args " [...]"
|
273
|
+
}
|
274
|
+
|
275
|
+
if (opt_time) {
|
276
|
+
time = $(3+o); sub(":", "", time)
|
277
|
+
printf "%-16s ", time
|
278
|
+
}
|
279
|
+
printf "%6s %6d %s\n", pid, getppid[pid], args
|
280
|
+
if (!opt_duration)
|
281
|
+
fflush()
|
282
|
+
if (!opt_reexec) {
|
283
|
+
seen[pid] = 1
|
284
|
+
delete getppid[pid]
|
285
|
+
}
|
286
|
+
}
|
287
|
+
|
288
|
+
$0 ~ /LOST.*EVENT[S]/ { print "WARNING: " $0 > "/dev/stderr" }
|
289
|
+
'
|
290
|
+
|
291
|
+
### end tracing
|
292
|
+
end
|