fluent-plugin-perf-tools 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (98) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.rubocop.yml +26 -0
  4. data/.ruby-version +1 -0
  5. data/CHANGELOG.md +5 -0
  6. data/CODE_OF_CONDUCT.md +84 -0
  7. data/Gemfile +5 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +43 -0
  10. data/Rakefile +17 -0
  11. data/bin/console +15 -0
  12. data/bin/setup +8 -0
  13. data/fluent-plugin-perf-tools.gemspec +48 -0
  14. data/lib/fluent/plugin/in_perf_tools.rb +42 -0
  15. data/lib/fluent/plugin/perf_tools/cachestat.rb +65 -0
  16. data/lib/fluent/plugin/perf_tools/command.rb +30 -0
  17. data/lib/fluent/plugin/perf_tools/version.rb +9 -0
  18. data/lib/fluent/plugin/perf_tools.rb +11 -0
  19. data/perf-tools/LICENSE +339 -0
  20. data/perf-tools/README.md +205 -0
  21. data/perf-tools/bin/bitesize +1 -0
  22. data/perf-tools/bin/cachestat +1 -0
  23. data/perf-tools/bin/execsnoop +1 -0
  24. data/perf-tools/bin/funccount +1 -0
  25. data/perf-tools/bin/funcgraph +1 -0
  26. data/perf-tools/bin/funcslower +1 -0
  27. data/perf-tools/bin/functrace +1 -0
  28. data/perf-tools/bin/iolatency +1 -0
  29. data/perf-tools/bin/iosnoop +1 -0
  30. data/perf-tools/bin/killsnoop +1 -0
  31. data/perf-tools/bin/kprobe +1 -0
  32. data/perf-tools/bin/opensnoop +1 -0
  33. data/perf-tools/bin/perf-stat-hist +1 -0
  34. data/perf-tools/bin/reset-ftrace +1 -0
  35. data/perf-tools/bin/syscount +1 -0
  36. data/perf-tools/bin/tcpretrans +1 -0
  37. data/perf-tools/bin/tpoint +1 -0
  38. data/perf-tools/bin/uprobe +1 -0
  39. data/perf-tools/deprecated/README.md +1 -0
  40. data/perf-tools/deprecated/execsnoop-proc +150 -0
  41. data/perf-tools/deprecated/execsnoop-proc.8 +80 -0
  42. data/perf-tools/deprecated/execsnoop-proc_example.txt +46 -0
  43. data/perf-tools/disk/bitesize +175 -0
  44. data/perf-tools/examples/bitesize_example.txt +63 -0
  45. data/perf-tools/examples/cachestat_example.txt +58 -0
  46. data/perf-tools/examples/execsnoop_example.txt +153 -0
  47. data/perf-tools/examples/funccount_example.txt +126 -0
  48. data/perf-tools/examples/funcgraph_example.txt +2178 -0
  49. data/perf-tools/examples/funcslower_example.txt +110 -0
  50. data/perf-tools/examples/functrace_example.txt +341 -0
  51. data/perf-tools/examples/iolatency_example.txt +350 -0
  52. data/perf-tools/examples/iosnoop_example.txt +302 -0
  53. data/perf-tools/examples/killsnoop_example.txt +62 -0
  54. data/perf-tools/examples/kprobe_example.txt +379 -0
  55. data/perf-tools/examples/opensnoop_example.txt +47 -0
  56. data/perf-tools/examples/perf-stat-hist_example.txt +149 -0
  57. data/perf-tools/examples/reset-ftrace_example.txt +88 -0
  58. data/perf-tools/examples/syscount_example.txt +297 -0
  59. data/perf-tools/examples/tcpretrans_example.txt +93 -0
  60. data/perf-tools/examples/tpoint_example.txt +210 -0
  61. data/perf-tools/examples/uprobe_example.txt +321 -0
  62. data/perf-tools/execsnoop +292 -0
  63. data/perf-tools/fs/cachestat +167 -0
  64. data/perf-tools/images/perf-tools_2016.png +0 -0
  65. data/perf-tools/iolatency +296 -0
  66. data/perf-tools/iosnoop +296 -0
  67. data/perf-tools/kernel/funccount +146 -0
  68. data/perf-tools/kernel/funcgraph +259 -0
  69. data/perf-tools/kernel/funcslower +248 -0
  70. data/perf-tools/kernel/functrace +192 -0
  71. data/perf-tools/kernel/kprobe +270 -0
  72. data/perf-tools/killsnoop +263 -0
  73. data/perf-tools/man/man8/bitesize.8 +70 -0
  74. data/perf-tools/man/man8/cachestat.8 +111 -0
  75. data/perf-tools/man/man8/execsnoop.8 +104 -0
  76. data/perf-tools/man/man8/funccount.8 +76 -0
  77. data/perf-tools/man/man8/funcgraph.8 +166 -0
  78. data/perf-tools/man/man8/funcslower.8 +129 -0
  79. data/perf-tools/man/man8/functrace.8 +123 -0
  80. data/perf-tools/man/man8/iolatency.8 +116 -0
  81. data/perf-tools/man/man8/iosnoop.8 +169 -0
  82. data/perf-tools/man/man8/killsnoop.8 +100 -0
  83. data/perf-tools/man/man8/kprobe.8 +162 -0
  84. data/perf-tools/man/man8/opensnoop.8 +113 -0
  85. data/perf-tools/man/man8/perf-stat-hist.8 +111 -0
  86. data/perf-tools/man/man8/reset-ftrace.8 +49 -0
  87. data/perf-tools/man/man8/syscount.8 +96 -0
  88. data/perf-tools/man/man8/tcpretrans.8 +93 -0
  89. data/perf-tools/man/man8/tpoint.8 +140 -0
  90. data/perf-tools/man/man8/uprobe.8 +168 -0
  91. data/perf-tools/misc/perf-stat-hist +223 -0
  92. data/perf-tools/net/tcpretrans +311 -0
  93. data/perf-tools/opensnoop +280 -0
  94. data/perf-tools/syscount +192 -0
  95. data/perf-tools/system/tpoint +232 -0
  96. data/perf-tools/tools/reset-ftrace +123 -0
  97. data/perf-tools/user/uprobe +390 -0
  98. metadata +349 -0
@@ -0,0 +1,88 @@
1
+ Demonstrations of reset-ftrace, the Linux ftrace tool.
2
+
3
+
4
+ You will probably never need this tool. If you kill -9 an ftrace-based tool,
5
+ leaving the kernel in a tracing enabled state, you could try using this tool
6
+ to reset ftrace and disable tracing. Make sure no other ftrace sessions are
7
+ in use on your system, or it will kill those.
8
+
9
+ Here's an example:
10
+
11
+ # ./opensnoop
12
+ Tracing open()s. Ctrl-C to end.
13
+ ERROR: ftrace may be in use by PID 2197 /var/tmp/.ftrace-lock
14
+
15
+ I tried to run opensnoop, but there's a lock file for PID 2197. Checking if it
16
+ exists:
17
+
18
+ # ps -fp 2197
19
+ UID PID PPID C STIME TTY TIME CMD
20
+ #
21
+
22
+ No.
23
+
24
+ I also know that no one is using ftrace on this system. So I'll use reset-ftrace
25
+ to clean up this lock file and ftrace state:
26
+
27
+ # ./reset-ftrace
28
+ ERROR: ftrace lock (/var/tmp/.ftrace-lock) exists. It shows ftrace may be in use by PID 2197.
29
+ Double check to see if that PID is still active. If not, consider using -f to force a reset. Exiting.
30
+
31
+ ... except it's complaining about the lock file too. I'm already sure that this
32
+ PID doesn't exist, so I'll add the -f option:
33
+
34
+ # ./reset-ftrace -f
35
+ Reseting ftrace state...
36
+
37
+ current_tracer, before:
38
+ 1 nop
39
+ current_tracer, after:
40
+ 1 nop
41
+
42
+ set_ftrace_filter, before:
43
+ 1 #### all functions enabled ####
44
+ set_ftrace_filter, after:
45
+ 1 #### all functions enabled ####
46
+
47
+ set_ftrace_pid, before:
48
+ 1 no pid
49
+ set_ftrace_pid, after:
50
+ 1 no pid
51
+
52
+ kprobe_events, before:
53
+ kprobe_events, after:
54
+
55
+ Done.
56
+
57
+ The output shows what has been reset, including the before and after state of
58
+ these files.
59
+
60
+ Now I can try iosnoop again:
61
+
62
+ # ./iosnoop
63
+ Tracing block I/O. Ctrl-C to end.
64
+ COMM PID TYPE DEV BLOCK BYTES LATms
65
+ supervise 1689 W 202,1 17039664 4096 0.58
66
+ supervise 1689 W 202,1 17039672 4096 0.47
67
+ supervise 1694 W 202,1 17039744 4096 0.98
68
+ supervise 1694 W 202,1 17039752 4096 0.74
69
+ supervise 1684 W 202,1 17039760 4096 0.63
70
+ [...]
71
+
72
+ Fixed.
73
+
74
+ Note that reset-ftrace currently only resets a few methods of enabling
75
+ tracing, such as set_ftrace_filter and kprobe_events. Static tracepoints could
76
+ be enabled individually, and this script currently doesn't find and disable
77
+ those.
78
+
79
+
80
+ Use -h to print the USAGE message:
81
+
82
+ # ./reset-ftrace -h
83
+ USAGE: reset-ftrace [-fhq]
84
+ -f # force: delete ftrace lock file
85
+ -q # quiet: reset, but say nothing
86
+ -h # this usage message
87
+ eg,
88
+ reset-ftrace # disable active ftrace session
@@ -0,0 +1,297 @@
1
+ Demonstrations of syscount, the Linux perf_events version.
2
+
3
+
4
+ The first mode I use is "-c", where it behaves like "strace -c", but for the
5
+ entire system (all procesess) and with much lower overhead:
6
+
7
+ # ./syscount -c
8
+ Tracing... Ctrl-C to end.
9
+ ^Csleep: Interrupt
10
+ SYSCALL COUNT
11
+ accept 1
12
+ getsockopt 1
13
+ setsid 1
14
+ chdir 2
15
+ getcwd 2
16
+ getpeername 2
17
+ getsockname 2
18
+ setgid 2
19
+ setgroups 2
20
+ setpgid 2
21
+ setuid 2
22
+ getpgrp 4
23
+ getpid 4
24
+ rename 4
25
+ setitimer 4
26
+ setrlimit 4
27
+ setsockopt 4
28
+ statfs 4
29
+ set_tid_address 5
30
+ readlink 6
31
+ set_robust_list 6
32
+ nanosleep 7
33
+ newuname 7
34
+ faccessat 8
35
+ futex 10
36
+ clock_gettime 16
37
+ newlstat 20
38
+ pipe 20
39
+ epoll_wait 24
40
+ getrlimit 25
41
+ socket 27
42
+ connect 29
43
+ exit_group 30
44
+ getppid 31
45
+ dup2 34
46
+ wait4 51
47
+ fcntl 58
48
+ getegid 72
49
+ getgid 72
50
+ getuid 72
51
+ geteuid 75
52
+ perf_event_open 100
53
+ munmap 121
54
+ gettimeofday 216
55
+ access 266
56
+ ioctl 340
57
+ poll 348
58
+ sendto 374
59
+ mprotect 414
60
+ brk 597
61
+ rt_sigaction 632
62
+ recvfrom 664
63
+ lseek 749
64
+ newfstatat 2922
65
+ openat 2925
66
+ newfstat 3229
67
+ newstat 4334
68
+ open 4534
69
+ fchdir 5845
70
+ getdents 5854
71
+ read 7673
72
+ close 7728
73
+ select 9633
74
+ rt_sigprocmask 19886
75
+ write 34581
76
+
77
+ While tracing, the write() syscall was executed 34,581 times.
78
+
79
+ This mode uses "perf stat" to count the syscalls:* tracepoints in-kernel.
80
+
81
+
82
+ You can add a duration (-d) and limit the number shown (-t):
83
+
84
+ # ./syscount -cd 5 -t 10
85
+ Tracing for 5 seconds. Top 10 only...
86
+ SYSCALL COUNT
87
+ gettimeofday 1009
88
+ write 3583
89
+ read 8174
90
+ openat 21550
91
+ newfstat 21558
92
+ open 21824
93
+ fchdir 43098
94
+ getdents 43106
95
+ close 43694
96
+ newfstatat 110936
97
+
98
+ While tracing for 5 seconds, the newfstatat() syscall was executed 110,936
99
+ times.
100
+
101
+
102
+ Without the -c, syscount shows syscalls by process name:
103
+
104
+ # ./syscount -d 5 -t 10
105
+ Tracing for 5 seconds. Top 10 only...
106
+ [ perf record: Woken up 66 times to write data ]
107
+ [ perf record: Captured and wrote 16.513 MB perf.data (~721455 samples) ]
108
+ COMM COUNT
109
+ stat 450
110
+ perl 537
111
+ catalina.sh 1700
112
+ postgres 2094
113
+ run 2362
114
+ :6946 4764
115
+ ps 5961
116
+ sshd 45796
117
+ find 61039
118
+
119
+ So processes named "find" called 61,039 syscalls during the 5 seconds of
120
+ tracing.
121
+
122
+ Note that this mode writes a perf.data file. This is higher overhead for a
123
+ few reasons:
124
+
125
+ - all data is passed from kernel to user space, which eats CPU for the memory
126
+ copy. Note that it is buffered in an efficient way by perf_events, which
127
+ wakes up and context switches only a small number of times: 66 in this case,
128
+ to hand 16 Mbytes of trace data to user space.
129
+ - data is post-processed in user space, eating more CPU.
130
+ - data is stored on the file system in the perf.data file, consuming available
131
+ storage.
132
+
133
+ This will be improved in future kernels, but it is difficult to improve this
134
+ much further in existing kernels. For example, using a pipe to "perf script"
135
+ instead of writing perf.data can have issues with feedback loops, where
136
+ perf traces itself. This syscount version goes to lengths to avoid tracing
137
+ its own perf, but
138
+ right now with existing functionality in older kernels. The trip via perf.data
139
+ is necessary
140
+
141
+
142
+ Running without options shows syscalls by process name until Ctrl-C:
143
+
144
+ # ./syscount
145
+ Tracing... Ctrl-C to end.
146
+ ^C[ perf record: Woken up 39 times to write data ]
147
+ [ perf record: Captured and wrote 9.644 MB perf.data (~421335 samples) ]
148
+ COMM COUNT
149
+ apache2 8
150
+ apacheLogParser 13
151
+ platformservice 16
152
+ snmpd 16
153
+ ntpd 21
154
+ multilog 66
155
+ supervise 84
156
+ dirname 102
157
+ echo 102
158
+ svstat 108
159
+ cut 111
160
+ bash 113
161
+ grep 132
162
+ xargs 132
163
+ redis-server 190
164
+ sed 192
165
+ setuidgid 294
166
+ stat 450
167
+ perl 537
168
+ catalina.sh 1275
169
+ postgres 1736
170
+ run 2352
171
+ :7396 4527
172
+ ps 5925
173
+ sshd 20154
174
+ find 28700
175
+
176
+ Note again it is writing a perf.data file to do this.
177
+
178
+
179
+ The -v option adds process IDs:
180
+
181
+ # ./syscount -v
182
+ Tracing... Ctrl-C to end.
183
+ ^C[ perf record: Woken up 48 times to write data ]
184
+ [ perf record: Captured and wrote 12.114 MB perf.data (~529276 samples) ]
185
+ PID COMM COUNT
186
+ 3599 apacheLogParser 3
187
+ 7977 xargs 3
188
+ 7982 supervise 3
189
+ 7993 xargs 3
190
+ 3575 apache2 4
191
+ 1311 ntpd 6
192
+ 3135 postgres 6
193
+ 3600 apacheLogParser 6
194
+ 3210 platformservice 8
195
+ 6503 sshd 9
196
+ 7978 :7978 9
197
+ 7994 run 9
198
+ 7968 :7968 11
199
+ 7984 run 11
200
+ 1451 snmpd 16
201
+ 3040 svscan 17
202
+ 3066 postgres 17
203
+ 3133 postgres 24
204
+ 3134 postgres 24
205
+ 3136 postgres 24
206
+ 3061 multilog 29
207
+ 3055 supervise 30
208
+ 7979 bash 31
209
+ 7977 echo 34
210
+ 7981 dirname 34
211
+ 7993 echo 34
212
+ 7968 svstat 36
213
+ 7984 svstat 36
214
+ 7975 cut 37
215
+ 7991 cut 37
216
+ 9857 bash 37
217
+ 7967 :7967 40
218
+ 7983 run 40
219
+ 7972 :7972 41
220
+ 7976 xargs 41
221
+ 7988 run 41
222
+ 7992 xargs 41
223
+ 7969 :7969 42
224
+ 7976 :7976 42
225
+ 7985 run 42
226
+ 7992 run 42
227
+ 7973 :7973 43
228
+ 7974 :7974 43
229
+ 7989 run 43
230
+ 7990 run 43
231
+ 7973 grep 44
232
+ 7989 grep 44
233
+ 7975 :7975 45
234
+ 7991 run 45
235
+ 7970 :7970 51
236
+ 7986 run 51
237
+ 7981 catalina.sh 52
238
+ 7974 sed 64
239
+ 7990 sed 64
240
+ 3455 postgres 66
241
+ 7971 :7971 66
242
+ 7987 run 66
243
+ 7966 :7966 96
244
+ 7966 setuidgid 98
245
+ 3064 redis-server 110
246
+ 7970 stat 150
247
+ 7986 stat 150
248
+ 7969 perl 179
249
+ 7985 perl 179
250
+ 7982 run 341
251
+ 7966 catalina.sh 373
252
+ 7980 postgres 432
253
+ 7972 ps 1971
254
+ 7988 ps 1983
255
+ 9832 sshd 37511
256
+ 7979 find 51040
257
+
258
+ Once you've found a process ID of interest, you can use "-c" and "-p PID" to
259
+ show syscall names. This also switches to "perf stat" mode for in-kernel
260
+ counts, and lower overhead:
261
+
262
+ # ./syscount -cp 7979
263
+ Tracing PID 7979... Ctrl-C to end.
264
+ ^CSYSCALL COUNT
265
+ brk 10
266
+ newfstat 2171
267
+ open 2171
268
+ newfstatat 2175
269
+ openat 2175
270
+ close 4346
271
+ fchdir 4346
272
+ getdents 4351
273
+ write 25482
274
+
275
+ So the most frequent syscall by PID 7979 was write().
276
+
277
+
278
+ Use -h to print the USAGE message:
279
+
280
+ # ./syscount -h
281
+ USAGE: syscount [-chv] [-t top] {-p PID|-d seconds|command}
282
+ syscount # count by process name
283
+ -c # show counts by syscall name
284
+ -h # this usage message
285
+ -v # verbose: shows PID
286
+ -p PID # trace this PID only
287
+ -d seconds # duration of trace
288
+ -t num # show top number only
289
+ command # run and trace this command
290
+ eg,
291
+ syscount # syscalls by process name
292
+ syscount -c # syscalls by syscall name
293
+ syscount -d 5 # trace for 5 seconds
294
+ syscount -cp 923 # syscall names for PID 923
295
+ syscount -c ls # syscall names for "ls"
296
+
297
+ See the man page and example file for more info.
@@ -0,0 +1,93 @@
1
+ Demonstrations of tcpretrans, the Linux ftrace version.
2
+
3
+
4
+ Tracing TCP retransmits on a busy server:
5
+
6
+ # ./tcpretrans
7
+ TIME PID LADDR:LPORT -- RADDR:RPORT STATE
8
+ 05:16:44 3375 10.150.18.225:53874 R> 10.105.152.3:6001 ESTABLISHED
9
+ 05:16:44 3375 10.150.18.225:53874 R> 10.105.152.3:6001 ESTABLISHED
10
+ 05:16:54 4028 10.150.18.225:6002 R> 10.150.30.249:1710 ESTABLISHED
11
+ 05:16:54 4028 10.150.18.225:6002 R> 10.150.30.249:1710 ESTABLISHED
12
+ 05:16:54 4028 10.150.18.225:6002 R> 10.150.30.249:1710 ESTABLISHED
13
+ 05:16:54 4028 10.150.18.225:6002 R> 10.150.30.249:1710 ESTABLISHED
14
+ 05:16:54 4028 10.150.18.225:6002 R> 10.150.30.249:1710 ESTABLISHED
15
+ 05:16:54 4028 10.150.18.225:6002 R> 10.150.30.249:1710 ESTABLISHED
16
+ 05:16:54 4028 10.150.18.225:6002 R> 10.150.30.249:1710 ESTABLISHED
17
+ 05:16:55 0 10.150.18.225:47115 R> 10.71.171.158:6001 ESTABLISHED
18
+ 05:16:58 0 10.150.18.225:44388 R> 10.103.130.120:6001 ESTABLISHED
19
+ 05:16:58 0 10.150.18.225:44388 R> 10.103.130.120:6001 ESTABLISHED
20
+ 05:16:58 0 10.150.18.225:44388 R> 10.103.130.120:6001 ESTABLISHED
21
+ 05:16:59 0 10.150.18.225:56086 R> 10.150.32.107:6001 ESTABLISHED
22
+ 05:16:59 0 10.150.18.225:56086 R> 10.150.32.107:6001 ESTABLISHED
23
+ ^C
24
+ Ending tracing...
25
+
26
+ This shows TCP retransmits by dynamically tracing the kernel function that does
27
+ the retransmit. This is a low overhead approach.
28
+
29
+ The PID may or may not make sense: it's showing the PID that was on-CPU,
30
+ however, retransmits are often timer-based, where it's the kernel that is
31
+ on-CPU.
32
+
33
+ The STATE column shows the TCP state for the socket performing the retransmit.
34
+ The "--" column is the packet type. "R>" for retransmit.
35
+
36
+
37
+ Kernel stack traces can be included with -s, which may show the type of
38
+ retransmit:
39
+
40
+ # ./tcpretrans -s
41
+ TIME PID LADDR:LPORT -- RADDR:RPORT STATE
42
+ 06:21:10 19516 10.144.107.151:22 R> 10.13.106.251:32167 ESTABLISHED
43
+ => tcp_fastretrans_alert
44
+ => tcp_ack
45
+ => tcp_rcv_established
46
+ => tcp_v4_do_rcv
47
+ => tcp_v4_rcv
48
+ => ip_local_deliver_finish
49
+ => ip_local_deliver
50
+ => ip_rcv_finish
51
+ => ip_rcv
52
+ => __netif_receive_skb
53
+ => netif_receive_skb
54
+ => handle_incoming_queue
55
+ => xennet_poll
56
+ => net_rx_action
57
+ => __do_softirq
58
+ => call_softirq
59
+ => do_softirq
60
+ => irq_exit
61
+ => xen_evtchn_do_upcall
62
+ => xen_do_hypervisor_callback
63
+
64
+ This looks like a fast retransmit (inclusion of tcp_fastretrans_alert(), and
65
+ being based on receiving an ACK, rather than a timer).
66
+
67
+
68
+ The -l option will include TCP tail loss probe events (TLP; see
69
+ http://lwn.net/Articles/542642/). Eg:
70
+
71
+ # ./tcpretrans -l
72
+ TIME PID LADDR:LPORT -- RADDR:RPORT STATE
73
+ 21:56:06 0 10.100.155.200:22 R> 10.10.237.72:18554 LAST_ACK
74
+ 21:56:08 0 10.100.155.200:22 R> 10.10.237.72:18554 LAST_ACK
75
+ 21:56:10 16452 10.100.155.200:22 R> 10.10.237.72:18554 LAST_ACK
76
+ 21:56:10 0 10.100.155.200:22 L> 10.10.237.72:46408 LAST_ACK
77
+ 21:56:10 0 10.100.155.200:22 R> 10.10.237.72:46408 LAST_ACK
78
+ 21:56:12 0 10.100.155.200:22 R> 10.10.237.72:46408 LAST_ACK
79
+ 21:56:13 0 10.100.155.200:22 R> 10.10.237.72:46408 LAST_ACK
80
+ ^C
81
+ Ending tracing...
82
+
83
+ Look for "L>" in the type column ("--") for TLP events.
84
+
85
+
86
+ Use -h to print the USAGE message:
87
+
88
+ # ./tcpretrans -h
89
+ USAGE: tcpretrans [-hs]
90
+ -h # help message
91
+ -s # print stack traces
92
+ eg,
93
+ tcpretrans # trace TCP retransmits
@@ -0,0 +1,210 @@
1
+ Demonstrations of tpoint, the Linux ftrace version.
2
+
3
+
4
+ Let's trace block:block_rq_issue, to see block device (disk) I/O requests:
5
+
6
+ # ./tpoint block:block_rq_issue
7
+ Tracing block:block_rq_issue. Ctrl-C to end.
8
+ supervise-1692 [001] d... 7269912.982162: block_rq_issue: 202,1 W 0 () 17039656 + 8 [supervise]
9
+ supervise-1696 [000] d... 7269912.982243: block_rq_issue: 202,1 W 0 () 12862264 + 8 [supervise]
10
+ cksum-12994 [000] d... 7269913.317924: block_rq_issue: 202,1 R 0 () 9357056 + 72 [cksum]
11
+ cksum-12994 [000] d... 7269913.319013: block_rq_issue: 202,1 R 0 () 2977536 + 144 [cksum]
12
+ cksum-12994 [000] d... 7269913.320217: block_rq_issue: 202,1 R 0 () 2986240 + 216 [cksum]
13
+ cksum-12994 [000] d... 7269913.321677: block_rq_issue: 202,1 R 0 () 620344 + 56 [cksum]
14
+ cksum-12994 [001] d... 7269913.329309: block_rq_issue: 202,1 R 0 () 9107912 + 88 [cksum]
15
+ cksum-12994 [001] d... 7269913.340133: block_rq_issue: 202,1 R 0 () 3147008 + 248 [cksum]
16
+ cksum-12994 [001] d... 7269913.354551: block_rq_issue: 202,1 R 0 () 11583488 + 256 [cksum]
17
+ cksum-12994 [001] d... 7269913.379904: block_rq_issue: 202,1 R 0 () 11583744 + 256 [cksum]
18
+ [...]
19
+ ^C
20
+ Ending tracing...
21
+
22
+ Great, that was easy!
23
+
24
+ perf_events can do this as well, and is better in many ways, including a more
25
+ efficient buffering strategy, and multi-user access. It's not that easy to do
26
+ this one-liner in perf_events, however. An equivalent for recent kernels is:
27
+
28
+ perf record --no-buffer -e block:block_rq_issue -a -o - | PAGER=cat stdbuf -oL perf script -i -
29
+
30
+ Older kernels, use -D instead of --no-buffer. Even better is to set the buffer
31
+ page size to a sufficient grouping (using -m), to minimize overheads, at the
32
+ expense of liveliness of updates. Note that stack traces (-g) don't work on
33
+ my systems with this perf one-liner, however, they do work with tpoint -s.
34
+
35
+
36
+ Column headings can be printed using -H:
37
+
38
+ # ./tpoint -H block:block_rq_issue
39
+ Tracing block:block_rq_issue. Ctrl-C to end.
40
+ # tracer: nop
41
+ #
42
+ # entries-in-buffer/entries-written: 0/0 #P:2
43
+ #
44
+ # _-----=> irqs-off
45
+ # / _----=> need-resched
46
+ # | / _---=> hardirq/softirq
47
+ # || / _--=> preempt-depth
48
+ # ||| / delay
49
+ # TASK-PID CPU# |||| TIMESTAMP FUNCTION
50
+ # | | | |||| | |
51
+ supervise-1697 [000] d... 7270545.340856: block_rq_issue: 202,1 W 0 () 12862464 + 8 [supervise]
52
+ supervise-1697 [000] d... 7270545.341256: block_rq_issue: 202,1 W 0 () 12862472 + 8 [supervise]
53
+ supervise-1690 [000] d... 7270545.342363: block_rq_issue: 202,1 W 0 () 17040368 + 8 [supervise]
54
+ [...]
55
+
56
+ They are also documented in the Linux kernel source under:
57
+ Documentation/trace/ftrace.txt.
58
+
59
+
60
+ How about stacks traces for those block_rq_issue events? Adding -s:
61
+
62
+ # ./tpoint -s block:block_rq_issue
63
+ Tracing block:block_rq_issue. Ctrl-C to end.
64
+ supervise-1691 [000] d... 7269511.079179: block_rq_issue: 202,1 W 0 () 17040232 + 8 [supervise]
65
+ supervise-1691 [000] d... 7269511.079188: <stack trace>
66
+ => blk_peek_request
67
+ => do_blkif_request
68
+ => __blk_run_queue
69
+ => queue_unplugged
70
+ => blk_flush_plug_list
71
+ => blk_finish_plug
72
+ => ext4_writepages
73
+ => do_writepages
74
+ => __filemap_fdatawrite_range
75
+ => filemap_flush
76
+ => ext4_alloc_da_blocks
77
+ => ext4_rename
78
+ => vfs_rename
79
+ => SYSC_renameat2
80
+ => SyS_renameat2
81
+ => SyS_rename
82
+ => system_call_fastpath
83
+ cksum-7428 [000] d... 7269511.331778: block_rq_issue: 202,1 R 0 () 9006848 + 208 [cksum]
84
+ cksum-7428 [000] d... 7269511.331784: <stack trace>
85
+ => blk_peek_request
86
+ => do_blkif_request
87
+ => __blk_run_queue
88
+ => queue_unplugged
89
+ => blk_flush_plug_list
90
+ => blk_finish_plug
91
+ => __do_page_cache_readahead
92
+ => ondemand_readahead
93
+ => page_cache_async_readahead
94
+ => generic_file_read_iter
95
+ => new_sync_read
96
+ => vfs_read
97
+ => SyS_read
98
+ => system_call_fastpath
99
+ cksum-7428 [000] d... 7269511.332631: block_rq_issue: 202,1 R 0 () 620992 + 200 [cksum]
100
+ cksum-7428 [000] d... 7269511.332639: <stack trace>
101
+ => blk_peek_request
102
+ => do_blkif_request
103
+ => __blk_run_queue
104
+ => queue_unplugged
105
+ => blk_flush_plug_list
106
+ => blk_finish_plug
107
+ => __do_page_cache_readahead
108
+ => ondemand_readahead
109
+ => page_cache_sync_readahead
110
+ => generic_file_read_iter
111
+ => new_sync_read
112
+ => vfs_read
113
+ => SyS_read
114
+ => system_call_fastpath
115
+ ^C
116
+ Ending tracing...
117
+
118
+ Easy. Now I can read the ancestry to understand what actually lead to issuing
119
+ a block device (disk) I/O.
120
+
121
+
122
+ Here's insertion onto the block I/O queue (better matches processes):
123
+
124
+ # ./tpoint -s block:block_rq_insert
125
+ Tracing block:block_rq_insert. Ctrl-C to end.
126
+ cksum-11908 [000] d... 7269834.882517: block_rq_insert: 202,1 R 0 () 736304 + 256 [cksum]
127
+ cksum-11908 [000] d... 7269834.882528: <stack trace>
128
+ => __elv_add_request
129
+ => blk_flush_plug_list
130
+ => blk_finish_plug
131
+ => __do_page_cache_readahead
132
+ => ondemand_readahead
133
+ => page_cache_sync_readahead
134
+ => generic_file_read_iter
135
+ => new_sync_read
136
+ => vfs_read
137
+ => SyS_read
138
+ => system_call_fastpath
139
+ [...]
140
+
141
+
142
+ You can also add tracepoint filters. To see what variables you can use, use -v:
143
+
144
+ # ./tpoint -v block:block_rq_issue
145
+ name: block_rq_issue
146
+ ID: 942
147
+ format:
148
+ field:unsigned short common_type; offset:0; size:2; signed:0;
149
+ field:unsigned char common_flags; offset:2; size:1; signed:0;
150
+ field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
151
+ field:int common_pid; offset:4; size:4; signed:1;
152
+
153
+ field:dev_t dev; offset:8; size:4; signed:0;
154
+ field:sector_t sector; offset:16; size:8; signed:0;
155
+ field:unsigned int nr_sector; offset:24; size:4; signed:0;
156
+ field:unsigned int bytes; offset:28; size:4; signed:0;
157
+ field:char rwbs[8]; offset:32; size:8; signed:1;
158
+ field:char comm[16]; offset:40; size:16; signed:1;
159
+ field:__data_loc char[] cmd; offset:56; size:4; signed:1;
160
+
161
+ print fmt: "%d,%d %s %u (%s) %llu + %u [%s]", ((unsigned int) ((REC->dev) >> 20)), ((unsigned int) ((REC->dev) & ((1U << 20) - 1))), REC->rwbs, REC->bytes, __get_str(cmd), (unsigned long long)REC->sector, REC->nr_sector, REC->comm
162
+
163
+
164
+ Now I'll add a filter to check that the rwbs field (I/O type) includes an "R",
165
+ making it a read:
166
+
167
+ # ./tpoint -s block:block_rq_insert 'rwbs ~ "*R*"'
168
+ cksum-11908 [000] d... 7269839.919098: block_rq_insert: 202,1 R 0 () 736560 + 136 [cksum]
169
+ cksum-11908 [000] d... 7269839.919107: <stack trace>
170
+ => __elv_add_request
171
+ => blk_flush_plug_list
172
+ => blk_finish_plug
173
+ => __do_page_cache_readahead
174
+ => ondemand_readahead
175
+ => page_cache_async_readahead
176
+ => generic_file_read_iter
177
+ => new_sync_read
178
+ => vfs_read
179
+ => SyS_read
180
+ => system_call_fastpath
181
+ [...]
182
+
183
+
184
+ Use -h to print the USAGE message:
185
+
186
+ # ./tpoint -h
187
+ USAGE: tpoint [-hHsv] [-d secs] [-p PID] [-L TID] tracepoint [filter]
188
+ tpoint -l
189
+ -d seconds # trace duration, and use buffers
190
+ -p PID # PID to match on events
191
+ -L TID # thread id to match on events
192
+ -v # view format file (don't trace)
193
+ -H # include column headers
194
+ -l # list all tracepoints
195
+ -s # show kernel stack traces
196
+ -h # this usage message
197
+
198
+ Note that these examples may need modification to match your kernel
199
+ version's function names and platform's register usage.
200
+ eg,
201
+ tpoint -l | grep open
202
+ # find tracepoints containing "open"
203
+ tpoint syscalls:sys_enter_open
204
+ # trace open() syscall entry
205
+ tpoint block:block_rq_issue
206
+ # trace block I/O issue
207
+ tpoint -s block:black_rq_issue
208
+ # show kernel stacks
209
+
210
+ See the man page and example file for more info.