fluent-plugin-perf-tools 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.rubocop.yml +26 -0
  4. data/.ruby-version +1 -0
  5. data/CHANGELOG.md +5 -0
  6. data/CODE_OF_CONDUCT.md +84 -0
  7. data/Gemfile +5 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +43 -0
  10. data/Rakefile +17 -0
  11. data/bin/console +15 -0
  12. data/bin/setup +8 -0
  13. data/fluent-plugin-perf-tools.gemspec +48 -0
  14. data/lib/fluent/plugin/in_perf_tools.rb +42 -0
  15. data/lib/fluent/plugin/perf_tools/cachestat.rb +65 -0
  16. data/lib/fluent/plugin/perf_tools/command.rb +30 -0
  17. data/lib/fluent/plugin/perf_tools/version.rb +9 -0
  18. data/lib/fluent/plugin/perf_tools.rb +11 -0
  19. data/perf-tools/LICENSE +339 -0
  20. data/perf-tools/README.md +205 -0
  21. data/perf-tools/bin/bitesize +1 -0
  22. data/perf-tools/bin/cachestat +1 -0
  23. data/perf-tools/bin/execsnoop +1 -0
  24. data/perf-tools/bin/funccount +1 -0
  25. data/perf-tools/bin/funcgraph +1 -0
  26. data/perf-tools/bin/funcslower +1 -0
  27. data/perf-tools/bin/functrace +1 -0
  28. data/perf-tools/bin/iolatency +1 -0
  29. data/perf-tools/bin/iosnoop +1 -0
  30. data/perf-tools/bin/killsnoop +1 -0
  31. data/perf-tools/bin/kprobe +1 -0
  32. data/perf-tools/bin/opensnoop +1 -0
  33. data/perf-tools/bin/perf-stat-hist +1 -0
  34. data/perf-tools/bin/reset-ftrace +1 -0
  35. data/perf-tools/bin/syscount +1 -0
  36. data/perf-tools/bin/tcpretrans +1 -0
  37. data/perf-tools/bin/tpoint +1 -0
  38. data/perf-tools/bin/uprobe +1 -0
  39. data/perf-tools/deprecated/README.md +1 -0
  40. data/perf-tools/deprecated/execsnoop-proc +150 -0
  41. data/perf-tools/deprecated/execsnoop-proc.8 +80 -0
  42. data/perf-tools/deprecated/execsnoop-proc_example.txt +46 -0
  43. data/perf-tools/disk/bitesize +175 -0
  44. data/perf-tools/examples/bitesize_example.txt +63 -0
  45. data/perf-tools/examples/cachestat_example.txt +58 -0
  46. data/perf-tools/examples/execsnoop_example.txt +153 -0
  47. data/perf-tools/examples/funccount_example.txt +126 -0
  48. data/perf-tools/examples/funcgraph_example.txt +2178 -0
  49. data/perf-tools/examples/funcslower_example.txt +110 -0
  50. data/perf-tools/examples/functrace_example.txt +341 -0
  51. data/perf-tools/examples/iolatency_example.txt +350 -0
  52. data/perf-tools/examples/iosnoop_example.txt +302 -0
  53. data/perf-tools/examples/killsnoop_example.txt +62 -0
  54. data/perf-tools/examples/kprobe_example.txt +379 -0
  55. data/perf-tools/examples/opensnoop_example.txt +47 -0
  56. data/perf-tools/examples/perf-stat-hist_example.txt +149 -0
  57. data/perf-tools/examples/reset-ftrace_example.txt +88 -0
  58. data/perf-tools/examples/syscount_example.txt +297 -0
  59. data/perf-tools/examples/tcpretrans_example.txt +93 -0
  60. data/perf-tools/examples/tpoint_example.txt +210 -0
  61. data/perf-tools/examples/uprobe_example.txt +321 -0
  62. data/perf-tools/execsnoop +292 -0
  63. data/perf-tools/fs/cachestat +167 -0
  64. data/perf-tools/images/perf-tools_2016.png +0 -0
  65. data/perf-tools/iolatency +296 -0
  66. data/perf-tools/iosnoop +296 -0
  67. data/perf-tools/kernel/funccount +146 -0
  68. data/perf-tools/kernel/funcgraph +259 -0
  69. data/perf-tools/kernel/funcslower +248 -0
  70. data/perf-tools/kernel/functrace +192 -0
  71. data/perf-tools/kernel/kprobe +270 -0
  72. data/perf-tools/killsnoop +263 -0
  73. data/perf-tools/man/man8/bitesize.8 +70 -0
  74. data/perf-tools/man/man8/cachestat.8 +111 -0
  75. data/perf-tools/man/man8/execsnoop.8 +104 -0
  76. data/perf-tools/man/man8/funccount.8 +76 -0
  77. data/perf-tools/man/man8/funcgraph.8 +166 -0
  78. data/perf-tools/man/man8/funcslower.8 +129 -0
  79. data/perf-tools/man/man8/functrace.8 +123 -0
  80. data/perf-tools/man/man8/iolatency.8 +116 -0
  81. data/perf-tools/man/man8/iosnoop.8 +169 -0
  82. data/perf-tools/man/man8/killsnoop.8 +100 -0
  83. data/perf-tools/man/man8/kprobe.8 +162 -0
  84. data/perf-tools/man/man8/opensnoop.8 +113 -0
  85. data/perf-tools/man/man8/perf-stat-hist.8 +111 -0
  86. data/perf-tools/man/man8/reset-ftrace.8 +49 -0
  87. data/perf-tools/man/man8/syscount.8 +96 -0
  88. data/perf-tools/man/man8/tcpretrans.8 +93 -0
  89. data/perf-tools/man/man8/tpoint.8 +140 -0
  90. data/perf-tools/man/man8/uprobe.8 +168 -0
  91. data/perf-tools/misc/perf-stat-hist +223 -0
  92. data/perf-tools/net/tcpretrans +311 -0
  93. data/perf-tools/opensnoop +280 -0
  94. data/perf-tools/syscount +192 -0
  95. data/perf-tools/system/tpoint +232 -0
  96. data/perf-tools/tools/reset-ftrace +123 -0
  97. data/perf-tools/user/uprobe +390 -0
  98. metadata +349 -0
@@ -0,0 +1,321 @@
1
+ Demonstrations of uprobe, the Linux ftrace version.
2
+
3
+ Trace the readline() function from all processes named "bash":
4
+
5
+ # ./uprobe p:bash:readline
6
+ Tracing uprobe readline (p:readline /bin/bash:0x8db60). Ctrl-C to end.
7
+ bash-11886 [003] d... 19601233.618462: readline: (0x48db60)
8
+ bash-11886 [003] d... 19601235.152067: readline: (0x48db60)
9
+ bash-11915 [003] d... 19601238.976244: readline: (0x48db60)
10
+ ^C
11
+ Ending tracing...
12
+
13
+ readline() is the bash shell's function for reading interactive input, and
14
+ a line is printed each time I entered commands in separate bash shells.
15
+ The line contains default ftrace columns: the process name, "-", and PID;
16
+ the CPU, flags, a timestamp (in units of seconds), the probe name, then
17
+ other arguments. These columns are documented in the kernel source, under
18
+ Documentation/trace/ftrace.txt.
19
+
20
+ The first line of output is informational, and shows what uprobe is really
21
+ doing: it turned "bash" into "/bin/bash", using a $PATH lookup (via which(1)).
22
+ It then turned the "readline" symbol into 0x8db60, using objdump(1) for
23
+ symbol lookups.
24
+
25
+ Note that this traces _all_ bash processes simultaneously.
26
+
27
+
28
+ Tracing PID 11886 only:
29
+
30
+ # ./uprobe -p 11886 p:bash:readline
31
+ Tracing uprobe readline (p:readline /bin/bash:0x8db60). Ctrl-C to end.
32
+ bash-11886 [002] d... 19601657.753893: readline: (0x48db60)
33
+ bash-11886 [002] d... 19601658.246613: readline: (0x48db60)
34
+ bash-11886 [002] d... 19601658.386666: readline: (0x48db60)
35
+ bash-11886 [002] d... 19601661.415952: readline: (0x48db60)
36
+ ^C
37
+ Ending tracing...
38
+
39
+ This may be important if you are tracing shared library functions, and only care
40
+ about one target process.
41
+
42
+
43
+ You can specify the full path to a binary to trace:
44
+
45
+ # ./uprobe p:/bin/bash:readline
46
+ Tracing uprobe readline (p:readline /bin/bash:0x8db60). Ctrl-C to end.
47
+ bash-11886 [002] d... 19601746.902461: readline: (0x48db60)
48
+ bash-11886 [002] d... 19601749.543485: readline: (0x48db60)
49
+ bash-11886 [001] d... 19601749.702369: readline: (0x48db60)
50
+ ^C
51
+ Ending tracing...
52
+
53
+ This might be useful if uprobe picked the wrong binary to trace, as shown by
54
+ the informational line, and you wanted to specify it directly. It is also useful
55
+ for tracing binaries not in the $PATH, which uprobe can't otherwise find.
56
+
57
+
58
+ Use -l to list symbols available to trace; eg, searching for functions
59
+ containing "readline" in bash:
60
+
61
+ # ./uprobe -l bash | grep readline
62
+ initialize_readline
63
+ pcomp_set_readline_variables
64
+ posix_readline_initialize
65
+ readline
66
+ readline_internal_char
67
+ readline_internal_setup
68
+ readline_internal_teardown
69
+
70
+
71
+ Tracing the return of readline() with return value as a string:
72
+
73
+ # ./uprobe 'r:bash:readline +0($retval):string'
74
+ Tracing uprobe readline (r:readline /bin/bash:0x8db60 +0($retval):string). Ctrl-C to end.
75
+ bash-11886 [003] d... 19601837.001935: readline: (0x41e876 <- 0x48db60) arg1="ls -l"
76
+ bash-11886 [002] d... 19601851.008409: readline: (0x41e876 <- 0x48db60) arg1="echo "hello world""
77
+ bash-11886 [002] d... 19601854.099730: readline: (0x41e876 <- 0x48db60) arg1="df -h"
78
+ bash-11886 [002] d... 19601858.805740: readline: (0x41e876 <- 0x48db60) arg1="cd .."
79
+ bash-11886 [003] d... 19601898.378753: readline: (0x41e876 <- 0x48db60) arg1="foo bar"
80
+ ^C
81
+ Ending tracing...
82
+
83
+ Now I can see the commands entered. Note that this traces what bash reads in,
84
+ even if the command eventually fails. Eg, the last command "foo bar" didn't
85
+ work (No command 'foo' found).
86
+
87
+ Note that this invocation now uses "r:" at the start of the probe description,
88
+ instead of "p:". r is for return probes, p for entry probes.
89
+
90
+
91
+ Tracing sleep() calls in all running libc shared libraries:
92
+
93
+ # ./uprobe p:libc:sleep
94
+ Tracing uprobe sleep (p:sleep /lib/x86_64-linux-gnu/libc-2.15.so:0xbf130). Ctrl-C to end.
95
+ svscan-2134 [000] d... 19602402.959904: sleep: (0x7f2dba562130)
96
+ cron-923 [000] d... 19602404.640507: sleep: (0x7f3e26d9e130)
97
+ cron-923 [002] d... 19602404.655232: sleep: (0x7f3e26d9e130)
98
+ cron-923 [002] d... 19602405.189271: sleep: (0x7f3e26d9e130)
99
+ svscan-2134 [000] d... 19602407.959947: sleep: (0x7f2dba562130)
100
+ [...]
101
+
102
+ This shows different programs calling sleep -- likely threads waiting for work.
103
+
104
+ I ran a "sleep 1" command in a bash shell, which wasn't seen above: probably
105
+ using a different sleep library call, which I'd need to trace separately.
106
+
107
+
108
+ Including headers (-H):
109
+
110
+ # ./uprobe -H p:libc:sleep
111
+ Tracing uprobe sleep (p:sleep /lib/x86_64-linux-gnu/libc-2.15.so:0xbf130). Ctrl-C to end.
112
+ # tracer: nop
113
+ #
114
+ # entries-in-buffer/entries-written: 0/0 #P:4
115
+ #
116
+ # _-----=> irqs-off
117
+ # / _----=> need-resched
118
+ # | / _---=> hardirq/softirq
119
+ # || / _--=> preempt-depth
120
+ # ||| / delay
121
+ # TASK-PID CPU# |||| TIMESTAMP FUNCTION
122
+ # | | | |||| | |
123
+ svscan-2134 [000] d... 19603052.976770: sleep: (0x7f2dba562130)
124
+ svscan-2134 [002] d... 19603057.976927: sleep: (0x7f2dba562130)
125
+ [...]
126
+
127
+ These are documented in Documentation/trace/ftrace.txt.
128
+
129
+
130
+ Tracing sleep() with its argument (seconds):
131
+
132
+ # ./uprobe 'p:libc:sleep %di'
133
+ Tracing uprobe sleep (p:sleep /lib/x86_64-linux-gnu/libc-2.15.so:0xbf130 %di). Ctrl-C to end.
134
+ svscan-2134 [002] d... 19602517.962925: sleep: (0x7f2dba562130) arg1=0x5
135
+ svscan-2134 [002] d... 19602522.963082: sleep: (0x7f2dba562130) arg1=0x5
136
+ cron-923 [002] d... 19602524.187733: sleep: (0x7f3e26d9e130) arg1=0x3c
137
+ svscan-2134 [002] d... 19602527.963267: sleep: (0x7f2dba562130) arg1=0x5
138
+ [...]
139
+
140
+ So svcan was sleeping for 5 seconds, and cron for 60 seconds (0x3c = 60).
141
+
142
+ The argument is specified by its register, %di. This is platform dependent: %di
143
+ may only be meaningful on x86. If you're on a different architecture (eg, ARM),
144
+ you will probably need to use something else.
145
+
146
+ If working with registers is not for you, then consider tracing this using
147
+ perf_events with debuginfo installed: in which case you can use the variable
148
+ names. Or consider a different tracer.
149
+
150
+
151
+ Here is an example of the optional filter expression, to only trace the return
152
+ of fopen() when it failed and returned NULL (0):
153
+
154
+ # ./uprobe 'r:libc:fopen file=$retval' 'file == 0'
155
+ Tracing uprobe fopen (r:fopen /lib/x86_64-linux-gnu/libc-2.15.so:0x6e540 file=$retval). Ctrl-C to end.
156
+ prog1-23982 [000] d... 19602894.346872: fopen: (0x40051e <- 0x7f637867f540) file=0x0
157
+ ^C
158
+ Ending tracing...
159
+
160
+ The argument $retval was given a vanity name "file", which was then tested in
161
+ the filter expression "file == 0".
162
+
163
+
164
+ Here's an example of tracing the MySQL server dispatch_command() function, along
165
+ with the query string (note: the %dx register is only valid for this
166
+ architecture and this software build):
167
+
168
+ # ./uprobe 'p:dispatch_command /opt/mysql/bin/mysqld:_Z16dispatch_command19enum_server_commandP3THDPcj +0(%dx):string'
169
+ Tracing uprobe dispatch_command (p:dispatch_command /opt/mysql/bin/mysqld:0x2dbd40 +0(%dx):string). Ctrl-C to end.
170
+ mysqld-2855 [001] d... 19956674.509085: dispatch_command: (0x6dbd40) arg1="show tables"
171
+ mysqld-2855 [001] d... 19956675.541155: dispatch_command: (0x6dbd40) arg1="SELECT * FROM numbers where number > 32000"
172
+ ^C
173
+ Ending tracing...
174
+
175
+ The function name, "_Z16dispatch_command19enum_server_commandP3THDPcj", is the
176
+ C++ mangled symbol.
177
+
178
+ I can name the query string argument "cmd" then test it in a filter; eg, to only
179
+ match queries that begin with "SELECT":
180
+
181
+ # ./uprobe 'p:dispatch_command /opt/mysql/bin/mysqld:_Z16dispatch_command19enum_server_commandP3THDPcj cmd=+0(%dx):string' 'cmd ~ "SELECT*"'
182
+ Tracing uprobe dispatch_command (p:dispatch_command /opt/mysql/bin/mysqld:0x2dbd40 cmd=+0(%dx):string). Ctrl-C to end.
183
+ mysqld-2855 [001] d... 19956754.619958: dispatch_command: (0x6dbd40) cmd="SELECT * FROM numbers where number > 32000"
184
+ mysqld-2855 [001] d... 19956755.060125: dispatch_command: (0x6dbd40) cmd="SELECT * FROM numbers where number > 32000"
185
+ ^C
186
+ Ending tracing...
187
+
188
+
189
+ Overhead is relative to the rate of events: a higher rate of traced events,
190
+ means uprobe costs higher overhead. If you are unsure of the rate of events,
191
+ you can capture a set number only, or trace for a limited duration only (covered
192
+ in the next example). To trace a set number only, you can pipe into head, eg:
193
+
194
+ # ./uprobe -p 11982 p:bash:sh_malloc | head -15
195
+ Tracing uprobe sh_malloc (p:sh_malloc /bin/bash:0xaafa0). Ctrl-C to end.
196
+ bash-11982 [001] d... 19643121.529484: sh_malloc: (0x4aafa0)
197
+ bash-11982 [001] d... 19643121.529493: sh_malloc: (0x4aafa0)
198
+ bash-11982 [001] d... 19643121.529506: sh_malloc: (0x4aafa0)
199
+ bash-11982 [001] d... 19643121.529510: sh_malloc: (0x4aafa0)
200
+ bash-11982 [001] d... 19643121.529519: sh_malloc: (0x4aafa0)
201
+ bash-11982 [001] d... 19643121.529521: sh_malloc: (0x4aafa0)
202
+ bash-11982 [001] d... 19643121.529523: sh_malloc: (0x4aafa0)
203
+ bash-11982 [001] d... 19643121.529525: sh_malloc: (0x4aafa0)
204
+ bash-11982 [001] d... 19643121.529531: sh_malloc: (0x4aafa0)
205
+ bash-11982 [001] d... 19643121.529533: sh_malloc: (0x4aafa0)
206
+ bash-11982 [001] d... 19643121.529536: sh_malloc: (0x4aafa0)
207
+ bash-11982 [001] d... 19643121.529541: sh_malloc: (0x4aafa0)
208
+ bash-11982 [001] d... 19643121.529546: sh_malloc: (0x4aafa0)
209
+ bash-11982 [001] d... 19643121.529549: sh_malloc: (0x4aafa0)
210
+
211
+ uprobe traps SIGPIPE, so that it properly exits and cleans up probes when used
212
+ in this fashion.
213
+
214
+ Note the timestamps: by examining the rate they are increasing, you can have
215
+ some estimation for the rate of events. In this case, the 15 events all
216
+ happened within the same millisecond (the timestamp column is in units of
217
+ seconds), which suggests these are frequent events.
218
+
219
+
220
+ The -d option can be used to specify a duration for tracing, which also causes
221
+ uprobe to perform in-kernel buffering, which reduces the overhead of tracing:
222
+
223
+ # ./uprobe -d 5 p:libc:gettimeofday
224
+ Tracing uprobe gettimeofday for 5 seconds (buffered)...
225
+ sleep-12743 [001] d... 19642858.943440: gettimeofday: (0x7f400138ac10)
226
+ rotatelog-12744 [000] d... 19642858.955665: gettimeofday: (0x7f0ba34ebc10)
227
+ rotatelog-12745 [003] d... 19642858.956425: gettimeofday: (0x7f1e6db20c10)
228
+ rotatelog-12744 [000] d... 19642858.956924: gettimeofday: (0x7f0ba34ebc10)
229
+ rotatelog-12745 [003] d... 19642858.957608: gettimeofday: (0x7f1e6db20c10)
230
+ rotatelog-12744 [001] d... 19642858.958005: gettimeofday: (0x7fd8a1d64c10)
231
+ rotatelog-12744 [003] d... 19642858.959496: gettimeofday: (0x7f9531acdc10)
232
+ mkdir-12746 [002] d... 19642858.959542: gettimeofday: (0x7fd539474c10)
233
+ chown-12747 [001] d... 19642858.961455: gettimeofday: (0x7ff5646afc10)
234
+ rotatelog-12745 [000] d... 19642858.963065: gettimeofday: (0x7f406aca7c10)
235
+ rotatelog-12745 [001] d... 19642858.964280: gettimeofday: (0x7f6548debc10)
236
+ rotatelog-12749 [000] d... 19642859.977462: gettimeofday: (0x7fecaf7e1c10)
237
+ rotatelog-12750 [003] d... 19642859.977697: gettimeofday: (0x7f821eb3cc10)
238
+ rotatelog-12749 [000] d... 19642859.978707: gettimeofday: (0x7fecaf7e1c10)
239
+ [...]
240
+
241
+ You will not see live output during the -d mode, as it is being buffered
242
+ in-kernel.
243
+
244
+
245
+ Tracing func_abc() in my test program, and including user-level stacks:
246
+
247
+ # ./uprobe -s p:/root/func_abc:func_c
248
+ Tracing uprobe func_c (p:func_c /root/func_abc:0x4f4). Ctrl-C to end.
249
+ func_abc-25394 [000] d... 19603250.054040: func_c: (0x4004f4)
250
+ func_abc-25394 [000] d... 19603250.054056: <user stack trace>
251
+ => <00000000004004f4>
252
+ => <0000000000400527>
253
+ => <0000000000400537>
254
+ => <00007fca9f0e376d>
255
+ func_abc-25394 [000] d... 19603251.054250: func_c: (0x4004f4)
256
+ func_abc-25394 [000] d... 19603251.054266: <user stack trace>
257
+ => <00000000004004f4>
258
+ => <0000000000400527>
259
+ => <0000000000400537>
260
+ => <00007fca9f0e376d>
261
+ ^C
262
+ Ending tracing...
263
+
264
+ The output has the raw hex addresses. If this is too much of a nuisance, then
265
+ try tracing this using perf_events which should automate the translation.
266
+
267
+ It can get worse, eg:
268
+
269
+ l# ./uprobe -s p:bash:readline
270
+ Tracing uprobe readline (p:readline /bin/bash:0x8db60). Ctrl-C to end.
271
+ bash-11886 [002] d... 19603434.397818: readline: (0x48db60)
272
+ bash-11886 [002] d... 19603434.397832: <user stack trace>
273
+ => <000000000048db60>
274
+ bash-11886 [002] d... 19603434.592500: readline: (0x48db60)
275
+ bash-11886 [002] d... 19603434.592510: <user stack trace>
276
+ => <000000000048db60>
277
+ ^C
278
+ Ending tracing...
279
+
280
+ Here the stack trace is missing (0x48db60 is the traced function, transposed
281
+ from the base load address). This is due to compiler optimizations. It can be
282
+ fixed by recompiling with -fno-omit-frame-pointer, or, using perf_events and
283
+ a different method of stack walking.
284
+
285
+
286
+ Use -h to print the USAGE message:
287
+
288
+ # ./uprobe -h
289
+ USAGE: uprobe [-FhHsv] [-d secs] [-p PID] [-L TID] {-l target |
290
+ uprobe_definition [filter]}
291
+ -F # force. trace despite warnings.
292
+ -d seconds # trace duration, and use buffers
293
+ -l target # list functions from this executable
294
+ -p PID # PID to match on events
295
+ -L TID # thread id to match on events
296
+ -v # view format file (don't trace)
297
+ -H # include column headers
298
+ -s # show user stack traces
299
+ -h # this usage message
300
+
301
+ Note that these examples may need modification to match your kernel
302
+ version's function names and platform's register usage.
303
+ eg,
304
+ # trace readline() calls in all running "bash" executables:
305
+ uprobe p:bash:readline
306
+ # trace readline() with explicit executable path:
307
+ uprobe p:/bin/bash:readline
308
+ # trace the return of readline() with return value as a string:
309
+ uprobe 'r:bash:readline +0($retval):string'
310
+ # trace sleep() calls in all running libc shared libraries:
311
+ uprobe p:libc:sleep
312
+ # trace sleep() with register %di (x86):
313
+ uprobe 'p:libc:sleep %di'
314
+ # trace this address (use caution: must be instruction aligned):
315
+ uprobe p:libc:0xbf130
316
+ # trace gettimeofday() for PID 1182 only:
317
+ uprobe -p 1182 p:libc:gettimeofday
318
+ # trace the return of fopen() only when it returns NULL:
319
+ uprobe 'r:libc:fopen file=$retval' 'file == 0'
320
+
321
+ See the man page and example file for more info.
@@ -0,0 +1,292 @@
1
+ #!/bin/bash
2
+ #
3
+ # execsnoop - trace process exec() with arguments.
4
+ # Written using Linux ftrace.
5
+ #
6
+ # This shows the execution of new processes, especially short-lived ones that
7
+ # can be missed by sampling tools such as top(1).
8
+ #
9
+ # USAGE: ./execsnoop [-hrt] [-n name]
10
+ #
11
+ # REQUIREMENTS: FTRACE and KPROBE CONFIG, sched:sched_process_fork tracepoint,
12
+ # and either the sys_execve, stub_execve or do_execve kernel function. You may
13
+ # already have these on recent kernels. And awk.
14
+ #
15
+ # This traces exec() from the fork()->exec() sequence, which means it won't
16
+ # catch new processes that only fork(). With the -r option, it will also catch
17
+ # processes that re-exec. It makes a best-effort attempt to retrieve the program
18
+ # arguments and PPID; if these are unavailable, 0 and "[?]" are printed
19
+ # respectively. There is also a limit to the number of arguments printed (by
20
+ # default, 8), which can be increased using -a.
21
+ #
22
+ # This implementation is designed to work on older kernel versions, and without
23
+ # kernel debuginfo. It works by dynamic tracing an execve kernel function to
24
+ # read the arguments from the %si register. The sys_execve function is tried
25
+ # first, then stub_execve and do_execve. The sched:sched_process_fork
26
+ # tracepoint is used to get the PPID. This program is a workaround that should be
27
+ # improved in the future when other kernel capabilities are made available. If
28
+ # you need a more reliable tool now, then consider other tracing alternatives
29
+ # (eg, SystemTap). This tool is really a proof of concept to see what ftrace can
30
+ # currently do.
31
+ #
32
+ # From perf-tools: https://github.com/brendangregg/perf-tools
33
+ #
34
+ # See the execsnoop(8) man page (in perf-tools) for more info.
35
+ #
36
+ # COPYRIGHT: Copyright (c) 2014 Brendan Gregg.
37
+ #
38
+ # This program is free software; you can redistribute it and/or
39
+ # modify it under the terms of the GNU General Public License
40
+ # as published by the Free Software Foundation; either version 2
41
+ # of the License, or (at your option) any later version.
42
+ #
43
+ # This program is distributed in the hope that it will be useful,
44
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
45
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
46
+ # GNU General Public License for more details.
47
+ #
48
+ # You should have received a copy of the GNU General Public License
49
+ # along with this program; if not, write to the Free Software Foundation,
50
+ # Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
51
+ #
52
+ # (http://www.gnu.org/copyleft/gpl.html)
53
+ #
54
+ # 07-Jul-2014 Brendan Gregg Created this.
55
+
56
+ ### default variables
57
+ tracing=/sys/kernel/debug/tracing
58
+ flock=/var/tmp/.ftrace-lock; wroteflock=0
59
+ opt_duration=0; duration=; opt_name=0; name=; opt_time=0; opt_reexec=0
60
+ opt_argc=0; argc=8; max_argc=16; ftext=
61
+ trap ':' INT QUIT TERM PIPE HUP # sends execution to end tracing section
62
+
63
+ function usage {
64
+ cat <<-END >&2
65
+ USAGE: execsnoop [-hrt] [-a argc] [-d secs] [name]
66
+ -d seconds # trace duration, and use buffers
67
+ -a argc # max args to show (default 8)
68
+ -r # include re-execs
69
+ -t # include time (seconds)
70
+ -h # this usage message
71
+ name # process name to match (REs allowed)
72
+ eg,
73
+ execsnoop # watch exec()s live (unbuffered)
74
+ execsnoop -d 1 # trace 1 sec (buffered)
75
+ execsnoop grep # trace process names containing grep
76
+ execsnoop 'udevd$' # process names ending in "udevd"
77
+
78
+ See the man page and example file for more info.
79
+ END
80
+ exit
81
+ }
82
+
83
+ function warn {
84
+ if ! eval "$@"; then
85
+ echo >&2 "WARNING: command failed \"$@\""
86
+ fi
87
+ }
88
+
89
+ function end {
90
+ # disable tracing
91
+ echo 2>/dev/null
92
+ echo "Ending tracing..." 2>/dev/null
93
+ cd $tracing
94
+ warn "echo 0 > events/kprobes/$kname/enable"
95
+ warn "echo 0 > events/sched/sched_process_fork/enable"
96
+ warn "echo -:$kname >> kprobe_events"
97
+ warn "echo > trace"
98
+ (( wroteflock )) && warn "rm $flock"
99
+ }
100
+
101
+ function die {
102
+ echo >&2 "$@"
103
+ exit 1
104
+ }
105
+
106
+ function edie {
107
+ # die with a quiet end()
108
+ echo >&2 "$@"
109
+ exec >/dev/null 2>&1
110
+ end
111
+ exit 1
112
+ }
113
+
114
+ ### process options
115
+ while getopts a:d:hrt opt
116
+ do
117
+ case $opt in
118
+ a) opt_argc=1; argc=$OPTARG ;;
119
+ d) opt_duration=1; duration=$OPTARG ;;
120
+ r) opt_reexec=1 ;;
121
+ t) opt_time=1 ;;
122
+ h|?) usage ;;
123
+ esac
124
+ done
125
+ shift $(( $OPTIND - 1 ))
126
+ if (( $# )); then
127
+ opt_name=1
128
+ name=$1
129
+ shift
130
+ fi
131
+ (( $# )) && usage
132
+
133
+ ### option logic
134
+ (( opt_pid && opt_name )) && die "ERROR: use either -p or -n."
135
+ (( opt_pid )) && ftext=" issued by PID $pid"
136
+ (( opt_name )) && ftext=" issued by process name \"$name\""
137
+ (( opt_file )) && ftext="$ftext for filenames containing \"$file\""
138
+ (( opt_argc && argc > max_argc )) && die "ERROR: max -a argc is $max_argc."
139
+ if (( opt_duration )); then
140
+ echo "Tracing exec()s$ftext for $duration seconds (buffered)..."
141
+ else
142
+ echo "Tracing exec()s$ftext. Ctrl-C to end."
143
+ fi
144
+
145
+ ### select awk
146
+ if (( opt_duration )); then
147
+ [[ -x /usr/bin/mawk ]] && awk=mawk || awk=awk
148
+ else
149
+ # workarounds for mawk/gawk fflush behavior
150
+ if [[ -x /usr/bin/gawk ]]; then
151
+ awk=gawk
152
+ elif [[ -x /usr/bin/mawk ]]; then
153
+ awk="mawk -W interactive"
154
+ else
155
+ awk=awk
156
+ fi
157
+ fi
158
+
159
+ ### check permissions
160
+ cd $tracing || die "ERROR: accessing tracing. Root user? Kernel has FTRACE?
161
+ debugfs mounted? (mount -t debugfs debugfs /sys/kernel/debug)"
162
+
163
+ ### ftrace lock
164
+ [[ -e $flock ]] && die "ERROR: ftrace may be in use by PID $(cat $flock) $flock"
165
+ echo $$ > $flock || die "ERROR: unable to write $flock."
166
+ wroteflock=1
167
+
168
+ ### build probe
169
+ if [[ -x /usr/bin/getconf ]]; then
170
+ bits=$(getconf LONG_BIT)
171
+ else
172
+ bits=64
173
+ [[ $(uname -m) == i* ]] && bits=32
174
+ fi
175
+ (( offset = bits / 8 ))
176
+ function makeprobe {
177
+ func=$1
178
+ kname=execsnoop_$func
179
+ kprobe="p:$kname $func"
180
+ i=0
181
+ while (( i < argc + 1 )); do
182
+ # p:kname do_execve +0(+0(%si)):string +0(+8(%si)):string ...
183
+ kprobe="$kprobe +0(+$(( i * offset ))(%si)):string"
184
+ (( i++ ))
185
+ done
186
+ }
187
+ # try in this order: sys_execve, stub_execve, do_execve
188
+ makeprobe sys_execve
189
+
190
+ ### setup and begin tracing
191
+ echo nop > current_tracer
192
+ if ! echo $kprobe >> kprobe_events 2>/dev/null; then
193
+ makeprobe stub_execve
194
+ if ! echo $kprobe >> kprobe_events 2>/dev/null; then
195
+ makeprobe do_execve
196
+ if ! echo $kprobe >> kprobe_events 2>/dev/null; then
197
+ edie "ERROR: adding a kprobe for execve. Exiting."
198
+ fi
199
+ fi
200
+ fi
201
+ if ! echo 1 > events/kprobes/$kname/enable; then
202
+ edie "ERROR: enabling kprobe for execve. Exiting."
203
+ fi
204
+ if ! echo 1 > events/sched/sched_process_fork/enable; then
205
+ edie "ERROR: enabling sched:sched_process_fork tracepoint. Exiting."
206
+ fi
207
+ echo "Instrumenting $func"
208
+ (( opt_time )) && printf "%-16s " "TIMEs"
209
+ printf "%6s %6s %s\n" "PID" "PPID" "ARGS"
210
+
211
+ #
212
+ # Determine output format. It may be one of the following (newest first):
213
+ # TASK-PID CPU# |||| TIMESTAMP FUNCTION
214
+ # TASK-PID CPU# TIMESTAMP FUNCTION
215
+ # To differentiate between them, the number of header fields is counted,
216
+ # and an offset set, to skip the extra column when needed.
217
+ #
218
+ offset=$($awk 'BEGIN { o = 0; }
219
+ $1 == "#" && $2 ~ /TASK/ && NF == 6 { o = 1; }
220
+ $2 ~ /TASK/ { print o; exit }' trace)
221
+
222
+ ### print trace buffer
223
+ warn "echo > trace"
224
+ ( if (( opt_duration )); then
225
+ # wait then dump buffer
226
+ sleep $duration
227
+ cat -v trace
228
+ else
229
+ # print buffer live
230
+ cat -v trace_pipe
231
+ fi ) | $awk -v o=$offset -v opt_name=$opt_name -v name=$name \
232
+ -v opt_duration=$opt_duration -v opt_time=$opt_time -v kname=$kname \
233
+ -v opt_reexec=$opt_reexec '
234
+ # common fields
235
+ $1 != "#" {
236
+ # task name can contain dashes
237
+ comm = pid = $1
238
+ sub(/-[0-9][0-9]*/, "", comm)
239
+ sub(/.*-/, "", pid)
240
+ }
241
+
242
+ $1 != "#" && $(4+o) ~ /sched_process_fork/ {
243
+ cpid=$0
244
+ sub(/.* child_pid=/, "", cpid)
245
+ sub(/ .*/, "", cpid)
246
+ getppid[cpid] = pid
247
+ delete seen[pid]
248
+ }
249
+
250
+ $1 != "#" && $(4+o) ~ kname {
251
+ if (seen[pid])
252
+ next
253
+ if (opt_name && comm !~ name)
254
+ next
255
+
256
+ #
257
+ # examples:
258
+ # ... arg1="/bin/echo" arg2="1" arg3="2" arg4="3" ...
259
+ # ... arg1="sleep" arg2="2" arg3=(fault) arg4="" ...
260
+ # ... arg1="" arg2=(fault) arg3="" arg4="" ...
261
+ # the last example is uncommon, and may be a race.
262
+ #
263
+ if ($0 ~ /arg1=""/) {
264
+ args = comm " [?]"
265
+ } else {
266
+ args=$0
267
+ sub(/ arg[0-9]*=\(fault\).*/, "", args)
268
+ sub(/.*arg1="/, "", args)
269
+ gsub(/" arg[0-9]*="/, " ", args)
270
+ sub(/"$/, "", args)
271
+ if ($0 !~ /\(fault\)/)
272
+ args = args " [...]"
273
+ }
274
+
275
+ if (opt_time) {
276
+ time = $(3+o); sub(":", "", time)
277
+ printf "%-16s ", time
278
+ }
279
+ printf "%6s %6d %s\n", pid, getppid[pid], args
280
+ if (!opt_duration)
281
+ fflush()
282
+ if (!opt_reexec) {
283
+ seen[pid] = 1
284
+ delete getppid[pid]
285
+ }
286
+ }
287
+
288
+ $0 ~ /LOST.*EVENT[S]/ { print "WARNING: " $0 > "/dev/stderr" }
289
+ '
290
+
291
+ ### end tracing
292
+ end