fluent-plugin-perf-tools 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (98) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.rubocop.yml +26 -0
  4. data/.ruby-version +1 -0
  5. data/CHANGELOG.md +5 -0
  6. data/CODE_OF_CONDUCT.md +84 -0
  7. data/Gemfile +5 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +43 -0
  10. data/Rakefile +17 -0
  11. data/bin/console +15 -0
  12. data/bin/setup +8 -0
  13. data/fluent-plugin-perf-tools.gemspec +48 -0
  14. data/lib/fluent/plugin/in_perf_tools.rb +42 -0
  15. data/lib/fluent/plugin/perf_tools/cachestat.rb +65 -0
  16. data/lib/fluent/plugin/perf_tools/command.rb +30 -0
  17. data/lib/fluent/plugin/perf_tools/version.rb +9 -0
  18. data/lib/fluent/plugin/perf_tools.rb +11 -0
  19. data/perf-tools/LICENSE +339 -0
  20. data/perf-tools/README.md +205 -0
  21. data/perf-tools/bin/bitesize +1 -0
  22. data/perf-tools/bin/cachestat +1 -0
  23. data/perf-tools/bin/execsnoop +1 -0
  24. data/perf-tools/bin/funccount +1 -0
  25. data/perf-tools/bin/funcgraph +1 -0
  26. data/perf-tools/bin/funcslower +1 -0
  27. data/perf-tools/bin/functrace +1 -0
  28. data/perf-tools/bin/iolatency +1 -0
  29. data/perf-tools/bin/iosnoop +1 -0
  30. data/perf-tools/bin/killsnoop +1 -0
  31. data/perf-tools/bin/kprobe +1 -0
  32. data/perf-tools/bin/opensnoop +1 -0
  33. data/perf-tools/bin/perf-stat-hist +1 -0
  34. data/perf-tools/bin/reset-ftrace +1 -0
  35. data/perf-tools/bin/syscount +1 -0
  36. data/perf-tools/bin/tcpretrans +1 -0
  37. data/perf-tools/bin/tpoint +1 -0
  38. data/perf-tools/bin/uprobe +1 -0
  39. data/perf-tools/deprecated/README.md +1 -0
  40. data/perf-tools/deprecated/execsnoop-proc +150 -0
  41. data/perf-tools/deprecated/execsnoop-proc.8 +80 -0
  42. data/perf-tools/deprecated/execsnoop-proc_example.txt +46 -0
  43. data/perf-tools/disk/bitesize +175 -0
  44. data/perf-tools/examples/bitesize_example.txt +63 -0
  45. data/perf-tools/examples/cachestat_example.txt +58 -0
  46. data/perf-tools/examples/execsnoop_example.txt +153 -0
  47. data/perf-tools/examples/funccount_example.txt +126 -0
  48. data/perf-tools/examples/funcgraph_example.txt +2178 -0
  49. data/perf-tools/examples/funcslower_example.txt +110 -0
  50. data/perf-tools/examples/functrace_example.txt +341 -0
  51. data/perf-tools/examples/iolatency_example.txt +350 -0
  52. data/perf-tools/examples/iosnoop_example.txt +302 -0
  53. data/perf-tools/examples/killsnoop_example.txt +62 -0
  54. data/perf-tools/examples/kprobe_example.txt +379 -0
  55. data/perf-tools/examples/opensnoop_example.txt +47 -0
  56. data/perf-tools/examples/perf-stat-hist_example.txt +149 -0
  57. data/perf-tools/examples/reset-ftrace_example.txt +88 -0
  58. data/perf-tools/examples/syscount_example.txt +297 -0
  59. data/perf-tools/examples/tcpretrans_example.txt +93 -0
  60. data/perf-tools/examples/tpoint_example.txt +210 -0
  61. data/perf-tools/examples/uprobe_example.txt +321 -0
  62. data/perf-tools/execsnoop +292 -0
  63. data/perf-tools/fs/cachestat +167 -0
  64. data/perf-tools/images/perf-tools_2016.png +0 -0
  65. data/perf-tools/iolatency +296 -0
  66. data/perf-tools/iosnoop +296 -0
  67. data/perf-tools/kernel/funccount +146 -0
  68. data/perf-tools/kernel/funcgraph +259 -0
  69. data/perf-tools/kernel/funcslower +248 -0
  70. data/perf-tools/kernel/functrace +192 -0
  71. data/perf-tools/kernel/kprobe +270 -0
  72. data/perf-tools/killsnoop +263 -0
  73. data/perf-tools/man/man8/bitesize.8 +70 -0
  74. data/perf-tools/man/man8/cachestat.8 +111 -0
  75. data/perf-tools/man/man8/execsnoop.8 +104 -0
  76. data/perf-tools/man/man8/funccount.8 +76 -0
  77. data/perf-tools/man/man8/funcgraph.8 +166 -0
  78. data/perf-tools/man/man8/funcslower.8 +129 -0
  79. data/perf-tools/man/man8/functrace.8 +123 -0
  80. data/perf-tools/man/man8/iolatency.8 +116 -0
  81. data/perf-tools/man/man8/iosnoop.8 +169 -0
  82. data/perf-tools/man/man8/killsnoop.8 +100 -0
  83. data/perf-tools/man/man8/kprobe.8 +162 -0
  84. data/perf-tools/man/man8/opensnoop.8 +113 -0
  85. data/perf-tools/man/man8/perf-stat-hist.8 +111 -0
  86. data/perf-tools/man/man8/reset-ftrace.8 +49 -0
  87. data/perf-tools/man/man8/syscount.8 +96 -0
  88. data/perf-tools/man/man8/tcpretrans.8 +93 -0
  89. data/perf-tools/man/man8/tpoint.8 +140 -0
  90. data/perf-tools/man/man8/uprobe.8 +168 -0
  91. data/perf-tools/misc/perf-stat-hist +223 -0
  92. data/perf-tools/net/tcpretrans +311 -0
  93. data/perf-tools/opensnoop +280 -0
  94. data/perf-tools/syscount +192 -0
  95. data/perf-tools/system/tpoint +232 -0
  96. data/perf-tools/tools/reset-ftrace +123 -0
  97. data/perf-tools/user/uprobe +390 -0
  98. metadata +349 -0
@@ -0,0 +1,321 @@
1
+ Demonstrations of uprobe, the Linux ftrace version.
2
+
3
+ Trace the readline() function from all processes named "bash":
4
+
5
+ # ./uprobe p:bash:readline
6
+ Tracing uprobe readline (p:readline /bin/bash:0x8db60). Ctrl-C to end.
7
+ bash-11886 [003] d... 19601233.618462: readline: (0x48db60)
8
+ bash-11886 [003] d... 19601235.152067: readline: (0x48db60)
9
+ bash-11915 [003] d... 19601238.976244: readline: (0x48db60)
10
+ ^C
11
+ Ending tracing...
12
+
13
+ readline() is the bash shell's function for reading interactive input, and
14
+ a line is printed each time I entered commands in separate bash shells.
15
+ The line contains default ftrace columns: the process name, "-", and PID;
16
+ the CPU, flags, a timestamp (in units of seconds), the probe name, then
17
+ other arguments. These columns are documented in the kernel source, under
18
+ Documentation/trace/ftrace.txt.
19
+
20
+ The first line of output is informational, and shows what uprobe is really
21
+ doing: it turned "bash" into "/bin/bash", using a $PATH lookup (via which(1)).
22
+ It then turned the "readline" symbol into 0x8db60, using objdump(1) for
23
+ symbol lookups.
24
+
25
+ Note that this traces _all_ bash processes simultaneously.
26
+
27
+
28
+ Tracing PID 11886 only:
29
+
30
+ # ./uprobe -p 11886 p:bash:readline
31
+ Tracing uprobe readline (p:readline /bin/bash:0x8db60). Ctrl-C to end.
32
+ bash-11886 [002] d... 19601657.753893: readline: (0x48db60)
33
+ bash-11886 [002] d... 19601658.246613: readline: (0x48db60)
34
+ bash-11886 [002] d... 19601658.386666: readline: (0x48db60)
35
+ bash-11886 [002] d... 19601661.415952: readline: (0x48db60)
36
+ ^C
37
+ Ending tracing...
38
+
39
+ This may be important if you are tracing shared library functions, and only care
40
+ about one target process.
41
+
42
+
43
+ You can specify the full path to a binary to trace:
44
+
45
+ # ./uprobe p:/bin/bash:readline
46
+ Tracing uprobe readline (p:readline /bin/bash:0x8db60). Ctrl-C to end.
47
+ bash-11886 [002] d... 19601746.902461: readline: (0x48db60)
48
+ bash-11886 [002] d... 19601749.543485: readline: (0x48db60)
49
+ bash-11886 [001] d... 19601749.702369: readline: (0x48db60)
50
+ ^C
51
+ Ending tracing...
52
+
53
+ This might be useful if uprobe picked the wrong binary to trace, as shown by
54
+ the informational line, and you wanted to specify it directly. It is also useful
55
+ for tracing binaries not in the $PATH, which uprobe can't otherwise find.
56
+
57
+
58
+ Use -l to list symbols available to trace; eg, searching for functions
59
+ containing "readline" in bash:
60
+
61
+ # ./uprobe -l bash | grep readline
62
+ initialize_readline
63
+ pcomp_set_readline_variables
64
+ posix_readline_initialize
65
+ readline
66
+ readline_internal_char
67
+ readline_internal_setup
68
+ readline_internal_teardown
69
+
70
+
71
+ Tracing the return of readline() with return value as a string:
72
+
73
+ # ./uprobe 'r:bash:readline +0($retval):string'
74
+ Tracing uprobe readline (r:readline /bin/bash:0x8db60 +0($retval):string). Ctrl-C to end.
75
+ bash-11886 [003] d... 19601837.001935: readline: (0x41e876 <- 0x48db60) arg1="ls -l"
76
+ bash-11886 [002] d... 19601851.008409: readline: (0x41e876 <- 0x48db60) arg1="echo "hello world""
77
+ bash-11886 [002] d... 19601854.099730: readline: (0x41e876 <- 0x48db60) arg1="df -h"
78
+ bash-11886 [002] d... 19601858.805740: readline: (0x41e876 <- 0x48db60) arg1="cd .."
79
+ bash-11886 [003] d... 19601898.378753: readline: (0x41e876 <- 0x48db60) arg1="foo bar"
80
+ ^C
81
+ Ending tracing...
82
+
83
+ Now I can see the commands entered. Note that this traces what bash reads in,
84
+ even if the command eventually fails. Eg, the last command "foo bar" didn't
85
+ work (No command 'foo' found).
86
+
87
+ Note that this invocation now uses "r:" at the start of the probe description,
88
+ instead of "p:". r is for return probes, p for entry probes.
89
+
90
+
91
+ Tracing sleep() calls in all running libc shared libraries:
92
+
93
+ # ./uprobe p:libc:sleep
94
+ Tracing uprobe sleep (p:sleep /lib/x86_64-linux-gnu/libc-2.15.so:0xbf130). Ctrl-C to end.
95
+ svscan-2134 [000] d... 19602402.959904: sleep: (0x7f2dba562130)
96
+ cron-923 [000] d... 19602404.640507: sleep: (0x7f3e26d9e130)
97
+ cron-923 [002] d... 19602404.655232: sleep: (0x7f3e26d9e130)
98
+ cron-923 [002] d... 19602405.189271: sleep: (0x7f3e26d9e130)
99
+ svscan-2134 [000] d... 19602407.959947: sleep: (0x7f2dba562130)
100
+ [...]
101
+
102
+ This shows different programs calling sleep -- likely threads waiting for work.
103
+
104
+ I ran a "sleep 1" command in a bash shell, which wasn't seen above: probably
105
+ using a different sleep library call, which I'd need to trace separately.
106
+
107
+
108
+ Including headers (-H):
109
+
110
+ # ./uprobe -H p:libc:sleep
111
+ Tracing uprobe sleep (p:sleep /lib/x86_64-linux-gnu/libc-2.15.so:0xbf130). Ctrl-C to end.
112
+ # tracer: nop
113
+ #
114
+ # entries-in-buffer/entries-written: 0/0 #P:4
115
+ #
116
+ # _-----=> irqs-off
117
+ # / _----=> need-resched
118
+ # | / _---=> hardirq/softirq
119
+ # || / _--=> preempt-depth
120
+ # ||| / delay
121
+ # TASK-PID CPU# |||| TIMESTAMP FUNCTION
122
+ # | | | |||| | |
123
+ svscan-2134 [000] d... 19603052.976770: sleep: (0x7f2dba562130)
124
+ svscan-2134 [002] d... 19603057.976927: sleep: (0x7f2dba562130)
125
+ [...]
126
+
127
+ These are documented in Documentation/trace/ftrace.txt.
128
+
129
+
130
+ Tracing sleep() with its argument (seconds):
131
+
132
+ # ./uprobe 'p:libc:sleep %di'
133
+ Tracing uprobe sleep (p:sleep /lib/x86_64-linux-gnu/libc-2.15.so:0xbf130 %di). Ctrl-C to end.
134
+ svscan-2134 [002] d... 19602517.962925: sleep: (0x7f2dba562130) arg1=0x5
135
+ svscan-2134 [002] d... 19602522.963082: sleep: (0x7f2dba562130) arg1=0x5
136
+ cron-923 [002] d... 19602524.187733: sleep: (0x7f3e26d9e130) arg1=0x3c
137
+ svscan-2134 [002] d... 19602527.963267: sleep: (0x7f2dba562130) arg1=0x5
138
+ [...]
139
+
140
+ So svcan was sleeping for 5 seconds, and cron for 60 seconds (0x3c = 60).
141
+
142
+ The argument is specified by its register, %di. This is platform dependent: %di
143
+ may only be meaningful on x86. If you're on a different architecture (eg, ARM),
144
+ you will probably need to use something else.
145
+
146
+ If working with registers is not for you, then consider tracing this using
147
+ perf_events with debuginfo installed: in which case you can use the variable
148
+ names. Or consider a different tracer.
149
+
150
+
151
+ Here is an example of the optional filter expression, to only trace the return
152
+ of fopen() when it failed and returned NULL (0):
153
+
154
+ # ./uprobe 'r:libc:fopen file=$retval' 'file == 0'
155
+ Tracing uprobe fopen (r:fopen /lib/x86_64-linux-gnu/libc-2.15.so:0x6e540 file=$retval). Ctrl-C to end.
156
+ prog1-23982 [000] d... 19602894.346872: fopen: (0x40051e <- 0x7f637867f540) file=0x0
157
+ ^C
158
+ Ending tracing...
159
+
160
+ The argument $retval was given a vanity name "file", which was then tested in
161
+ the filter expression "file == 0".
162
+
163
+
164
+ Here's an example of tracing the MySQL server dispatch_command() function, along
165
+ with the query string (note: the %dx register is only valid for this
166
+ architecture and this software build):
167
+
168
+ # ./uprobe 'p:dispatch_command /opt/mysql/bin/mysqld:_Z16dispatch_command19enum_server_commandP3THDPcj +0(%dx):string'
169
+ Tracing uprobe dispatch_command (p:dispatch_command /opt/mysql/bin/mysqld:0x2dbd40 +0(%dx):string). Ctrl-C to end.
170
+ mysqld-2855 [001] d... 19956674.509085: dispatch_command: (0x6dbd40) arg1="show tables"
171
+ mysqld-2855 [001] d... 19956675.541155: dispatch_command: (0x6dbd40) arg1="SELECT * FROM numbers where number > 32000"
172
+ ^C
173
+ Ending tracing...
174
+
175
+ The function name, "_Z16dispatch_command19enum_server_commandP3THDPcj", is the
176
+ C++ mangled symbol.
177
+
178
+ I can name the query string argument "cmd" then test it in a filter; eg, to only
179
+ match queries that begin with "SELECT":
180
+
181
+ # ./uprobe 'p:dispatch_command /opt/mysql/bin/mysqld:_Z16dispatch_command19enum_server_commandP3THDPcj cmd=+0(%dx):string' 'cmd ~ "SELECT*"'
182
+ Tracing uprobe dispatch_command (p:dispatch_command /opt/mysql/bin/mysqld:0x2dbd40 cmd=+0(%dx):string). Ctrl-C to end.
183
+ mysqld-2855 [001] d... 19956754.619958: dispatch_command: (0x6dbd40) cmd="SELECT * FROM numbers where number > 32000"
184
+ mysqld-2855 [001] d... 19956755.060125: dispatch_command: (0x6dbd40) cmd="SELECT * FROM numbers where number > 32000"
185
+ ^C
186
+ Ending tracing...
187
+
188
+
189
+ Overhead is relative to the rate of events: a higher rate of traced events,
190
+ means uprobe costs higher overhead. If you are unsure of the rate of events,
191
+ you can capture a set number only, or trace for a limited duration only (covered
192
+ in the next example). To trace a set number only, you can pipe into head, eg:
193
+
194
+ # ./uprobe -p 11982 p:bash:sh_malloc | head -15
195
+ Tracing uprobe sh_malloc (p:sh_malloc /bin/bash:0xaafa0). Ctrl-C to end.
196
+ bash-11982 [001] d... 19643121.529484: sh_malloc: (0x4aafa0)
197
+ bash-11982 [001] d... 19643121.529493: sh_malloc: (0x4aafa0)
198
+ bash-11982 [001] d... 19643121.529506: sh_malloc: (0x4aafa0)
199
+ bash-11982 [001] d... 19643121.529510: sh_malloc: (0x4aafa0)
200
+ bash-11982 [001] d... 19643121.529519: sh_malloc: (0x4aafa0)
201
+ bash-11982 [001] d... 19643121.529521: sh_malloc: (0x4aafa0)
202
+ bash-11982 [001] d... 19643121.529523: sh_malloc: (0x4aafa0)
203
+ bash-11982 [001] d... 19643121.529525: sh_malloc: (0x4aafa0)
204
+ bash-11982 [001] d... 19643121.529531: sh_malloc: (0x4aafa0)
205
+ bash-11982 [001] d... 19643121.529533: sh_malloc: (0x4aafa0)
206
+ bash-11982 [001] d... 19643121.529536: sh_malloc: (0x4aafa0)
207
+ bash-11982 [001] d... 19643121.529541: sh_malloc: (0x4aafa0)
208
+ bash-11982 [001] d... 19643121.529546: sh_malloc: (0x4aafa0)
209
+ bash-11982 [001] d... 19643121.529549: sh_malloc: (0x4aafa0)
210
+
211
+ uprobe traps SIGPIPE, so that it properly exits and cleans up probes when used
212
+ in this fashion.
213
+
214
+ Note the timestamps: by examining the rate they are increasing, you can have
215
+ some estimation for the rate of events. In this case, the 15 events all
216
+ happened within the same millisecond (the timestamp column is in units of
217
+ seconds), which suggests these are frequent events.
218
+
219
+
220
+ The -d option can be used to specify a duration for tracing, which also causes
221
+ uprobe to perform in-kernel buffering, which reduces the overhead of tracing:
222
+
223
+ # ./uprobe -d 5 p:libc:gettimeofday
224
+ Tracing uprobe gettimeofday for 5 seconds (buffered)...
225
+ sleep-12743 [001] d... 19642858.943440: gettimeofday: (0x7f400138ac10)
226
+ rotatelog-12744 [000] d... 19642858.955665: gettimeofday: (0x7f0ba34ebc10)
227
+ rotatelog-12745 [003] d... 19642858.956425: gettimeofday: (0x7f1e6db20c10)
228
+ rotatelog-12744 [000] d... 19642858.956924: gettimeofday: (0x7f0ba34ebc10)
229
+ rotatelog-12745 [003] d... 19642858.957608: gettimeofday: (0x7f1e6db20c10)
230
+ rotatelog-12744 [001] d... 19642858.958005: gettimeofday: (0x7fd8a1d64c10)
231
+ rotatelog-12744 [003] d... 19642858.959496: gettimeofday: (0x7f9531acdc10)
232
+ mkdir-12746 [002] d... 19642858.959542: gettimeofday: (0x7fd539474c10)
233
+ chown-12747 [001] d... 19642858.961455: gettimeofday: (0x7ff5646afc10)
234
+ rotatelog-12745 [000] d... 19642858.963065: gettimeofday: (0x7f406aca7c10)
235
+ rotatelog-12745 [001] d... 19642858.964280: gettimeofday: (0x7f6548debc10)
236
+ rotatelog-12749 [000] d... 19642859.977462: gettimeofday: (0x7fecaf7e1c10)
237
+ rotatelog-12750 [003] d... 19642859.977697: gettimeofday: (0x7f821eb3cc10)
238
+ rotatelog-12749 [000] d... 19642859.978707: gettimeofday: (0x7fecaf7e1c10)
239
+ [...]
240
+
241
+ You will not see live output during the -d mode, as it is being buffered
242
+ in-kernel.
243
+
244
+
245
+ Tracing func_abc() in my test program, and including user-level stacks:
246
+
247
+ # ./uprobe -s p:/root/func_abc:func_c
248
+ Tracing uprobe func_c (p:func_c /root/func_abc:0x4f4). Ctrl-C to end.
249
+ func_abc-25394 [000] d... 19603250.054040: func_c: (0x4004f4)
250
+ func_abc-25394 [000] d... 19603250.054056: <user stack trace>
251
+ => <00000000004004f4>
252
+ => <0000000000400527>
253
+ => <0000000000400537>
254
+ => <00007fca9f0e376d>
255
+ func_abc-25394 [000] d... 19603251.054250: func_c: (0x4004f4)
256
+ func_abc-25394 [000] d... 19603251.054266: <user stack trace>
257
+ => <00000000004004f4>
258
+ => <0000000000400527>
259
+ => <0000000000400537>
260
+ => <00007fca9f0e376d>
261
+ ^C
262
+ Ending tracing...
263
+
264
+ The output has the raw hex addresses. If this is too much of a nuisance, then
265
+ try tracing this using perf_events which should automate the translation.
266
+
267
+ It can get worse, eg:
268
+
269
+ l# ./uprobe -s p:bash:readline
270
+ Tracing uprobe readline (p:readline /bin/bash:0x8db60). Ctrl-C to end.
271
+ bash-11886 [002] d... 19603434.397818: readline: (0x48db60)
272
+ bash-11886 [002] d... 19603434.397832: <user stack trace>
273
+ => <000000000048db60>
274
+ bash-11886 [002] d... 19603434.592500: readline: (0x48db60)
275
+ bash-11886 [002] d... 19603434.592510: <user stack trace>
276
+ => <000000000048db60>
277
+ ^C
278
+ Ending tracing...
279
+
280
+ Here the stack trace is missing (0x48db60 is the traced function, transposed
281
+ from the base load address). This is due to compiler optimizations. It can be
282
+ fixed by recompiling with -fno-omit-frame-pointer, or, using perf_events and
283
+ a different method of stack walking.
284
+
285
+
286
+ Use -h to print the USAGE message:
287
+
288
+ # ./uprobe -h
289
+ USAGE: uprobe [-FhHsv] [-d secs] [-p PID] [-L TID] {-l target |
290
+ uprobe_definition [filter]}
291
+ -F # force. trace despite warnings.
292
+ -d seconds # trace duration, and use buffers
293
+ -l target # list functions from this executable
294
+ -p PID # PID to match on events
295
+ -L TID # thread id to match on events
296
+ -v # view format file (don't trace)
297
+ -H # include column headers
298
+ -s # show user stack traces
299
+ -h # this usage message
300
+
301
+ Note that these examples may need modification to match your kernel
302
+ version's function names and platform's register usage.
303
+ eg,
304
+ # trace readline() calls in all running "bash" executables:
305
+ uprobe p:bash:readline
306
+ # trace readline() with explicit executable path:
307
+ uprobe p:/bin/bash:readline
308
+ # trace the return of readline() with return value as a string:
309
+ uprobe 'r:bash:readline +0($retval):string'
310
+ # trace sleep() calls in all running libc shared libraries:
311
+ uprobe p:libc:sleep
312
+ # trace sleep() with register %di (x86):
313
+ uprobe 'p:libc:sleep %di'
314
+ # trace this address (use caution: must be instruction aligned):
315
+ uprobe p:libc:0xbf130
316
+ # trace gettimeofday() for PID 1182 only:
317
+ uprobe -p 1182 p:libc:gettimeofday
318
+ # trace the return of fopen() only when it returns NULL:
319
+ uprobe 'r:libc:fopen file=$retval' 'file == 0'
320
+
321
+ See the man page and example file for more info.
@@ -0,0 +1,292 @@
1
+ #!/bin/bash
2
+ #
3
+ # execsnoop - trace process exec() with arguments.
4
+ # Written using Linux ftrace.
5
+ #
6
+ # This shows the execution of new processes, especially short-lived ones that
7
+ # can be missed by sampling tools such as top(1).
8
+ #
9
+ # USAGE: ./execsnoop [-hrt] [-n name]
10
+ #
11
+ # REQUIREMENTS: FTRACE and KPROBE CONFIG, sched:sched_process_fork tracepoint,
12
+ # and either the sys_execve, stub_execve or do_execve kernel function. You may
13
+ # already have these on recent kernels. And awk.
14
+ #
15
+ # This traces exec() from the fork()->exec() sequence, which means it won't
16
+ # catch new processes that only fork(). With the -r option, it will also catch
17
+ # processes that re-exec. It makes a best-effort attempt to retrieve the program
18
+ # arguments and PPID; if these are unavailable, 0 and "[?]" are printed
19
+ # respectively. There is also a limit to the number of arguments printed (by
20
+ # default, 8), which can be increased using -a.
21
+ #
22
+ # This implementation is designed to work on older kernel versions, and without
23
+ # kernel debuginfo. It works by dynamic tracing an execve kernel function to
24
+ # read the arguments from the %si register. The sys_execve function is tried
25
+ # first, then stub_execve and do_execve. The sched:sched_process_fork
26
+ # tracepoint is used to get the PPID. This program is a workaround that should be
27
+ # improved in the future when other kernel capabilities are made available. If
28
+ # you need a more reliable tool now, then consider other tracing alternatives
29
+ # (eg, SystemTap). This tool is really a proof of concept to see what ftrace can
30
+ # currently do.
31
+ #
32
+ # From perf-tools: https://github.com/brendangregg/perf-tools
33
+ #
34
+ # See the execsnoop(8) man page (in perf-tools) for more info.
35
+ #
36
+ # COPYRIGHT: Copyright (c) 2014 Brendan Gregg.
37
+ #
38
+ # This program is free software; you can redistribute it and/or
39
+ # modify it under the terms of the GNU General Public License
40
+ # as published by the Free Software Foundation; either version 2
41
+ # of the License, or (at your option) any later version.
42
+ #
43
+ # This program is distributed in the hope that it will be useful,
44
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
45
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
46
+ # GNU General Public License for more details.
47
+ #
48
+ # You should have received a copy of the GNU General Public License
49
+ # along with this program; if not, write to the Free Software Foundation,
50
+ # Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
51
+ #
52
+ # (http://www.gnu.org/copyleft/gpl.html)
53
+ #
54
+ # 07-Jul-2014 Brendan Gregg Created this.
55
+
56
+ ### default variables
57
+ tracing=/sys/kernel/debug/tracing
58
+ flock=/var/tmp/.ftrace-lock; wroteflock=0
59
+ opt_duration=0; duration=; opt_name=0; name=; opt_time=0; opt_reexec=0
60
+ opt_argc=0; argc=8; max_argc=16; ftext=
61
+ trap ':' INT QUIT TERM PIPE HUP # sends execution to end tracing section
62
+
63
+ function usage {
64
+ cat <<-END >&2
65
+ USAGE: execsnoop [-hrt] [-a argc] [-d secs] [name]
66
+ -d seconds # trace duration, and use buffers
67
+ -a argc # max args to show (default 8)
68
+ -r # include re-execs
69
+ -t # include time (seconds)
70
+ -h # this usage message
71
+ name # process name to match (REs allowed)
72
+ eg,
73
+ execsnoop # watch exec()s live (unbuffered)
74
+ execsnoop -d 1 # trace 1 sec (buffered)
75
+ execsnoop grep # trace process names containing grep
76
+ execsnoop 'udevd$' # process names ending in "udevd"
77
+
78
+ See the man page and example file for more info.
79
+ END
80
+ exit
81
+ }
82
+
83
+ function warn {
84
+ if ! eval "$@"; then
85
+ echo >&2 "WARNING: command failed \"$@\""
86
+ fi
87
+ }
88
+
89
+ function end {
90
+ # disable tracing
91
+ echo 2>/dev/null
92
+ echo "Ending tracing..." 2>/dev/null
93
+ cd $tracing
94
+ warn "echo 0 > events/kprobes/$kname/enable"
95
+ warn "echo 0 > events/sched/sched_process_fork/enable"
96
+ warn "echo -:$kname >> kprobe_events"
97
+ warn "echo > trace"
98
+ (( wroteflock )) && warn "rm $flock"
99
+ }
100
+
101
+ function die {
102
+ echo >&2 "$@"
103
+ exit 1
104
+ }
105
+
106
+ function edie {
107
+ # die with a quiet end()
108
+ echo >&2 "$@"
109
+ exec >/dev/null 2>&1
110
+ end
111
+ exit 1
112
+ }
113
+
114
+ ### process options
115
+ while getopts a:d:hrt opt
116
+ do
117
+ case $opt in
118
+ a) opt_argc=1; argc=$OPTARG ;;
119
+ d) opt_duration=1; duration=$OPTARG ;;
120
+ r) opt_reexec=1 ;;
121
+ t) opt_time=1 ;;
122
+ h|?) usage ;;
123
+ esac
124
+ done
125
+ shift $(( $OPTIND - 1 ))
126
+ if (( $# )); then
127
+ opt_name=1
128
+ name=$1
129
+ shift
130
+ fi
131
+ (( $# )) && usage
132
+
133
+ ### option logic
134
+ (( opt_pid && opt_name )) && die "ERROR: use either -p or -n."
135
+ (( opt_pid )) && ftext=" issued by PID $pid"
136
+ (( opt_name )) && ftext=" issued by process name \"$name\""
137
+ (( opt_file )) && ftext="$ftext for filenames containing \"$file\""
138
+ (( opt_argc && argc > max_argc )) && die "ERROR: max -a argc is $max_argc."
139
+ if (( opt_duration )); then
140
+ echo "Tracing exec()s$ftext for $duration seconds (buffered)..."
141
+ else
142
+ echo "Tracing exec()s$ftext. Ctrl-C to end."
143
+ fi
144
+
145
+ ### select awk
146
+ if (( opt_duration )); then
147
+ [[ -x /usr/bin/mawk ]] && awk=mawk || awk=awk
148
+ else
149
+ # workarounds for mawk/gawk fflush behavior
150
+ if [[ -x /usr/bin/gawk ]]; then
151
+ awk=gawk
152
+ elif [[ -x /usr/bin/mawk ]]; then
153
+ awk="mawk -W interactive"
154
+ else
155
+ awk=awk
156
+ fi
157
+ fi
158
+
159
+ ### check permissions
160
+ cd $tracing || die "ERROR: accessing tracing. Root user? Kernel has FTRACE?
161
+ debugfs mounted? (mount -t debugfs debugfs /sys/kernel/debug)"
162
+
163
+ ### ftrace lock
164
+ [[ -e $flock ]] && die "ERROR: ftrace may be in use by PID $(cat $flock) $flock"
165
+ echo $$ > $flock || die "ERROR: unable to write $flock."
166
+ wroteflock=1
167
+
168
+ ### build probe
169
+ if [[ -x /usr/bin/getconf ]]; then
170
+ bits=$(getconf LONG_BIT)
171
+ else
172
+ bits=64
173
+ [[ $(uname -m) == i* ]] && bits=32
174
+ fi
175
+ (( offset = bits / 8 ))
176
+ function makeprobe {
177
+ func=$1
178
+ kname=execsnoop_$func
179
+ kprobe="p:$kname $func"
180
+ i=0
181
+ while (( i < argc + 1 )); do
182
+ # p:kname do_execve +0(+0(%si)):string +0(+8(%si)):string ...
183
+ kprobe="$kprobe +0(+$(( i * offset ))(%si)):string"
184
+ (( i++ ))
185
+ done
186
+ }
187
+ # try in this order: sys_execve, stub_execve, do_execve
188
+ makeprobe sys_execve
189
+
190
+ ### setup and begin tracing
191
+ echo nop > current_tracer
192
+ if ! echo $kprobe >> kprobe_events 2>/dev/null; then
193
+ makeprobe stub_execve
194
+ if ! echo $kprobe >> kprobe_events 2>/dev/null; then
195
+ makeprobe do_execve
196
+ if ! echo $kprobe >> kprobe_events 2>/dev/null; then
197
+ edie "ERROR: adding a kprobe for execve. Exiting."
198
+ fi
199
+ fi
200
+ fi
201
+ if ! echo 1 > events/kprobes/$kname/enable; then
202
+ edie "ERROR: enabling kprobe for execve. Exiting."
203
+ fi
204
+ if ! echo 1 > events/sched/sched_process_fork/enable; then
205
+ edie "ERROR: enabling sched:sched_process_fork tracepoint. Exiting."
206
+ fi
207
+ echo "Instrumenting $func"
208
+ (( opt_time )) && printf "%-16s " "TIMEs"
209
+ printf "%6s %6s %s\n" "PID" "PPID" "ARGS"
210
+
211
+ #
212
+ # Determine output format. It may be one of the following (newest first):
213
+ # TASK-PID CPU# |||| TIMESTAMP FUNCTION
214
+ # TASK-PID CPU# TIMESTAMP FUNCTION
215
+ # To differentiate between them, the number of header fields is counted,
216
+ # and an offset set, to skip the extra column when needed.
217
+ #
218
+ offset=$($awk 'BEGIN { o = 0; }
219
+ $1 == "#" && $2 ~ /TASK/ && NF == 6 { o = 1; }
220
+ $2 ~ /TASK/ { print o; exit }' trace)
221
+
222
+ ### print trace buffer
223
+ warn "echo > trace"
224
+ ( if (( opt_duration )); then
225
+ # wait then dump buffer
226
+ sleep $duration
227
+ cat -v trace
228
+ else
229
+ # print buffer live
230
+ cat -v trace_pipe
231
+ fi ) | $awk -v o=$offset -v opt_name=$opt_name -v name=$name \
232
+ -v opt_duration=$opt_duration -v opt_time=$opt_time -v kname=$kname \
233
+ -v opt_reexec=$opt_reexec '
234
+ # common fields
235
+ $1 != "#" {
236
+ # task name can contain dashes
237
+ comm = pid = $1
238
+ sub(/-[0-9][0-9]*/, "", comm)
239
+ sub(/.*-/, "", pid)
240
+ }
241
+
242
+ $1 != "#" && $(4+o) ~ /sched_process_fork/ {
243
+ cpid=$0
244
+ sub(/.* child_pid=/, "", cpid)
245
+ sub(/ .*/, "", cpid)
246
+ getppid[cpid] = pid
247
+ delete seen[pid]
248
+ }
249
+
250
+ $1 != "#" && $(4+o) ~ kname {
251
+ if (seen[pid])
252
+ next
253
+ if (opt_name && comm !~ name)
254
+ next
255
+
256
+ #
257
+ # examples:
258
+ # ... arg1="/bin/echo" arg2="1" arg3="2" arg4="3" ...
259
+ # ... arg1="sleep" arg2="2" arg3=(fault) arg4="" ...
260
+ # ... arg1="" arg2=(fault) arg3="" arg4="" ...
261
+ # the last example is uncommon, and may be a race.
262
+ #
263
+ if ($0 ~ /arg1=""/) {
264
+ args = comm " [?]"
265
+ } else {
266
+ args=$0
267
+ sub(/ arg[0-9]*=\(fault\).*/, "", args)
268
+ sub(/.*arg1="/, "", args)
269
+ gsub(/" arg[0-9]*="/, " ", args)
270
+ sub(/"$/, "", args)
271
+ if ($0 !~ /\(fault\)/)
272
+ args = args " [...]"
273
+ }
274
+
275
+ if (opt_time) {
276
+ time = $(3+o); sub(":", "", time)
277
+ printf "%-16s ", time
278
+ }
279
+ printf "%6s %6d %s\n", pid, getppid[pid], args
280
+ if (!opt_duration)
281
+ fflush()
282
+ if (!opt_reexec) {
283
+ seen[pid] = 1
284
+ delete getppid[pid]
285
+ }
286
+ }
287
+
288
+ $0 ~ /LOST.*EVENT[S]/ { print "WARNING: " $0 > "/dev/stderr" }
289
+ '
290
+
291
+ ### end tracing
292
+ end