fluent-plugin-perf-tools 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.rubocop.yml +26 -0
  4. data/.ruby-version +1 -0
  5. data/CHANGELOG.md +5 -0
  6. data/CODE_OF_CONDUCT.md +84 -0
  7. data/Gemfile +5 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +43 -0
  10. data/Rakefile +17 -0
  11. data/bin/console +15 -0
  12. data/bin/setup +8 -0
  13. data/fluent-plugin-perf-tools.gemspec +48 -0
  14. data/lib/fluent/plugin/in_perf_tools.rb +42 -0
  15. data/lib/fluent/plugin/perf_tools/cachestat.rb +65 -0
  16. data/lib/fluent/plugin/perf_tools/command.rb +30 -0
  17. data/lib/fluent/plugin/perf_tools/version.rb +9 -0
  18. data/lib/fluent/plugin/perf_tools.rb +11 -0
  19. data/perf-tools/LICENSE +339 -0
  20. data/perf-tools/README.md +205 -0
  21. data/perf-tools/bin/bitesize +1 -0
  22. data/perf-tools/bin/cachestat +1 -0
  23. data/perf-tools/bin/execsnoop +1 -0
  24. data/perf-tools/bin/funccount +1 -0
  25. data/perf-tools/bin/funcgraph +1 -0
  26. data/perf-tools/bin/funcslower +1 -0
  27. data/perf-tools/bin/functrace +1 -0
  28. data/perf-tools/bin/iolatency +1 -0
  29. data/perf-tools/bin/iosnoop +1 -0
  30. data/perf-tools/bin/killsnoop +1 -0
  31. data/perf-tools/bin/kprobe +1 -0
  32. data/perf-tools/bin/opensnoop +1 -0
  33. data/perf-tools/bin/perf-stat-hist +1 -0
  34. data/perf-tools/bin/reset-ftrace +1 -0
  35. data/perf-tools/bin/syscount +1 -0
  36. data/perf-tools/bin/tcpretrans +1 -0
  37. data/perf-tools/bin/tpoint +1 -0
  38. data/perf-tools/bin/uprobe +1 -0
  39. data/perf-tools/deprecated/README.md +1 -0
  40. data/perf-tools/deprecated/execsnoop-proc +150 -0
  41. data/perf-tools/deprecated/execsnoop-proc.8 +80 -0
  42. data/perf-tools/deprecated/execsnoop-proc_example.txt +46 -0
  43. data/perf-tools/disk/bitesize +175 -0
  44. data/perf-tools/examples/bitesize_example.txt +63 -0
  45. data/perf-tools/examples/cachestat_example.txt +58 -0
  46. data/perf-tools/examples/execsnoop_example.txt +153 -0
  47. data/perf-tools/examples/funccount_example.txt +126 -0
  48. data/perf-tools/examples/funcgraph_example.txt +2178 -0
  49. data/perf-tools/examples/funcslower_example.txt +110 -0
  50. data/perf-tools/examples/functrace_example.txt +341 -0
  51. data/perf-tools/examples/iolatency_example.txt +350 -0
  52. data/perf-tools/examples/iosnoop_example.txt +302 -0
  53. data/perf-tools/examples/killsnoop_example.txt +62 -0
  54. data/perf-tools/examples/kprobe_example.txt +379 -0
  55. data/perf-tools/examples/opensnoop_example.txt +47 -0
  56. data/perf-tools/examples/perf-stat-hist_example.txt +149 -0
  57. data/perf-tools/examples/reset-ftrace_example.txt +88 -0
  58. data/perf-tools/examples/syscount_example.txt +297 -0
  59. data/perf-tools/examples/tcpretrans_example.txt +93 -0
  60. data/perf-tools/examples/tpoint_example.txt +210 -0
  61. data/perf-tools/examples/uprobe_example.txt +321 -0
  62. data/perf-tools/execsnoop +292 -0
  63. data/perf-tools/fs/cachestat +167 -0
  64. data/perf-tools/images/perf-tools_2016.png +0 -0
  65. data/perf-tools/iolatency +296 -0
  66. data/perf-tools/iosnoop +296 -0
  67. data/perf-tools/kernel/funccount +146 -0
  68. data/perf-tools/kernel/funcgraph +259 -0
  69. data/perf-tools/kernel/funcslower +248 -0
  70. data/perf-tools/kernel/functrace +192 -0
  71. data/perf-tools/kernel/kprobe +270 -0
  72. data/perf-tools/killsnoop +263 -0
  73. data/perf-tools/man/man8/bitesize.8 +70 -0
  74. data/perf-tools/man/man8/cachestat.8 +111 -0
  75. data/perf-tools/man/man8/execsnoop.8 +104 -0
  76. data/perf-tools/man/man8/funccount.8 +76 -0
  77. data/perf-tools/man/man8/funcgraph.8 +166 -0
  78. data/perf-tools/man/man8/funcslower.8 +129 -0
  79. data/perf-tools/man/man8/functrace.8 +123 -0
  80. data/perf-tools/man/man8/iolatency.8 +116 -0
  81. data/perf-tools/man/man8/iosnoop.8 +169 -0
  82. data/perf-tools/man/man8/killsnoop.8 +100 -0
  83. data/perf-tools/man/man8/kprobe.8 +162 -0
  84. data/perf-tools/man/man8/opensnoop.8 +113 -0
  85. data/perf-tools/man/man8/perf-stat-hist.8 +111 -0
  86. data/perf-tools/man/man8/reset-ftrace.8 +49 -0
  87. data/perf-tools/man/man8/syscount.8 +96 -0
  88. data/perf-tools/man/man8/tcpretrans.8 +93 -0
  89. data/perf-tools/man/man8/tpoint.8 +140 -0
  90. data/perf-tools/man/man8/uprobe.8 +168 -0
  91. data/perf-tools/misc/perf-stat-hist +223 -0
  92. data/perf-tools/net/tcpretrans +311 -0
  93. data/perf-tools/opensnoop +280 -0
  94. data/perf-tools/syscount +192 -0
  95. data/perf-tools/system/tpoint +232 -0
  96. data/perf-tools/tools/reset-ftrace +123 -0
  97. data/perf-tools/user/uprobe +390 -0
  98. metadata +349 -0
@@ -0,0 +1,166 @@
1
+ .TH funcgraph 8 "2014-07-29" "USER COMMANDS"
2
+ .SH NAME
3
+ funcgraph \- trace kernel function graph, showing child function calls and times. Uses Linux ftrace.
4
+ .SH SYNOPSIS
5
+ .B funcgraph
6
+ [\-aCDhHPtT] [\-m maxdepth] [\-p PID] [\-L TID] [\-d secs] funcstring
7
+ .SH DESCRIPTION
8
+ This is an exploratory tool that shows the graph of child function calls
9
+ for a given kernel function. This can cost moderate overhead to execute, and
10
+ should only be used to understand kernel behavior before using other, lower
11
+ overhead tools. This is a proof of concept using Linux ftrace capabilities
12
+ on older kernels.
13
+
14
+ The output format is the same as the ftrace function graph trace format,
15
+ described in the kernel source under Documentation/trace/ftrace.txt.
16
+ Note that the output may be shuffled when different CPU buffers are read;
17
+ check the CPU column for changes, or include timestamps (-t) and post sort.
18
+
19
+ The "-d duration" mode leaves the trace data in the kernel buffer, and
20
+ only reads it at the end. If the trace data is large, beware of exhausting
21
+ buffer space (/sys/kernel/debug/tracing/buffer_size_kb) and losing data.
22
+
23
+ Also beware of feedback loops: tracing tcp* functions over an ssh session,
24
+ or writing ext4* functions to an ext4 file system. For the former, tcp
25
+ trace data could be redirected to a file (as in the usage message). For
26
+ the latter, trace to the screen or a different file system.
27
+
28
+ WARNING: This uses dynamic tracing of kernel functions, and could cause
29
+ kernel panics or freezes. Test, and know what you are doing, before use.
30
+ Also see the OVERHEAD section.
31
+
32
+ Since this uses ftrace, only the root user can use this tool.
33
+ .SH REQUIREMENTS
34
+ FTRACE CONFIG, which you may already have enabled and available on recent
35
+ kernels.
36
+ .SH OPTIONS
37
+ .TP
38
+ \-a
39
+ All info. Same as \-HPt. (But no -T, which isn't available in older kernels.)
40
+ .TP
41
+ \-C
42
+ Function durations measure on-CPU time only (exclude sleep time).
43
+ .TP
44
+ \-d seconds
45
+ Set the duration of tracing, in seconds. Trace output will be buffered and
46
+ printed at the end. This also reduces overheads by buffering in-kernel,
47
+ instead of printing events as they occur.
48
+
49
+ The ftrace buffer has a fixed size per-CPU (see
50
+ /sys/kernel/debug/tracing/buffer_size_kb). If you think events are missing,
51
+ try increasing that size.
52
+ .TP
53
+ \-D
54
+ Do not show function duration times.
55
+ .TP
56
+ \-h
57
+ Print usage message.
58
+ .TP
59
+ \-H
60
+ Print column headers.
61
+ .TP
62
+ \-m
63
+ Max depth to trace functions. By default, unlimited (0). This feature is only
64
+ available for newer Linux kernel versions.
65
+ .TP
66
+ \-p PID
67
+ Only trace kernel functions when this process ID is on-CPU.
68
+ .TP
69
+ \-L TID
70
+ Only trace kernel functions when this thread ID is on-CPU.
71
+ .TP
72
+ \-P
73
+ Show process names and process IDs with every line of output.
74
+ .TP
75
+ \-t
76
+ Show timestamps on every line of output.
77
+ .TP
78
+ \-T
79
+ Tail mode: decorate function return lines with the name of the function. This
80
+ option may not be available for older kernels.
81
+ .TP
82
+ funcstring
83
+ A function name to trace, which may include file glob style wildcards ("*") at
84
+ the beginning or ending of a string only. Eg, "vfs*" means match "vfs" followed
85
+ by anything. Since the output is verbose, you probably only want to trace
86
+ single functions, and not use wildcards.
87
+ .SH EXAMPLES
88
+ .TP
89
+ Trace calls to do_nanosleep(), showing child functions and durations:
90
+ #
91
+ .B funcgraph do_nanosleep
92
+ .TP
93
+ Same as above, but include column headers:
94
+ #
95
+ .B funcgraph -H do_nanosleep
96
+ .TP
97
+ Same as above, but include timestamps and process names as well:
98
+ #
99
+ .B funcgraph -HtP do_nanosleep
100
+ .TP
101
+ Trace all vfs_read() kernel function calls, and child functions, for PID 198 only:
102
+ #
103
+ .B funcgraph \-p 198 vfs_read
104
+ .TP
105
+ Trace all vfs_read() kernel function calls, and child functions, for 1 second then write to a file.
106
+ #
107
+ .B funcgraph \-d 1 vfs_read > out
108
+ .SH FIELDS
109
+ The output format depends on the kernel version, and headings can be printed
110
+ using \-H. The format is the same as the ftrace function trace format, described
111
+ in the kernel source under Documentation/trace/ftrace.txt.
112
+
113
+ Typical fields are:
114
+ .TP
115
+ TIME
116
+ (Shown with \-t.) Time of event, in seconds.
117
+ .TP
118
+ CPU
119
+ The CPU this event occurred on.
120
+ .TP
121
+ TASK/PID
122
+ (Shown with \-P.) The process name (which could include dashes), a dash, and the process ID.
123
+ .TP
124
+ DURATION
125
+ Elapsed time during the function call, inclusive of children. This is also
126
+ inclusive of sleep time, unless -C is used. The time is either displayed on
127
+ the return of a function ("}"), or for a leaf function (no children), on the
128
+ same line.
129
+
130
+ If the trace output begins with some returns that lack entries, their durations
131
+ may not be trusted. This is usually only the case for the first dozen or so
132
+ lines.
133
+ .TP
134
+ FUNCTION CALLS
135
+ Entries and returns from kernel functions.
136
+ .SH OVERHEAD
137
+ This tool causes moderate to high overheads. Use with caution for
138
+ exploratory purposes, then switch to lower overhead techniques based on
139
+ findings. It's expected that the kernel will run at least 50% slower while
140
+ this tool is running -- even while no output is being generated. This is
141
+ because ALL kernel functions are traced, and filtered based on the function
142
+ of interest. When output is generated, it can generate many lines quickly
143
+ depending on the traced event. Such data will cause performance overheads.
144
+ This also works without buffering by default, printing function events
145
+ as they happen (uses trace_pipe), context switching and consuming CPU to do
146
+ so. If needed, you can try the "-d secs" option, which buffers events
147
+ instead, reducing overhead. If you think the buffer option is losing events,
148
+ try increasing the buffer size (buffer_size_kb).
149
+
150
+ It's a good idea to use funccount(8) first, which is lower overhead, to
151
+ help you select which functions you may want to trace using funcgraph(8).
152
+ .SH SOURCE
153
+ This is from the perf-tools collection:
154
+ .IP
155
+ https://github.com/brendangregg/perf-tools
156
+ .PP
157
+ Also look under the examples directory for a text file containing example
158
+ usage, output, and commentary for this tool.
159
+ .SH OS
160
+ Linux
161
+ .SH STABILITY
162
+ Unstable - in development.
163
+ .SH AUTHOR
164
+ Brendan Gregg
165
+ .SH SEE ALSO
166
+ funccount(8), functrace(8), kprobe(8)
@@ -0,0 +1,129 @@
1
+ .TH funcslower 8 "2014-07-30" "USER COMMANDS"
2
+ .SH NAME
3
+ funcslower \- trace kernel functions slower than a threshold (microseconds). Uses Linux ftrace.
4
+ .SH SYNOPSIS
5
+ .B funcslower
6
+ [\-aChHPt] [\-p PID] [\-L TID] [\-d secs] funcstring latency_us
7
+ .SH DESCRIPTION
8
+ This uses the Linux ftrace function graph profiler to time kernel functions
9
+ and filter them based on a latency threshold. Latency outliers can be studied
10
+ this way, confirming their presence, duration, and rate. This tool
11
+ is a proof of concept using Linux ftrace capabilities on older kernels.
12
+
13
+ The output format is based on the ftrace function graph trace format,
14
+ described in the kernel source under Documentation/trace/ftrace.txt. Use the
15
+ \-H option to print column headings.
16
+ Note that the output may be shuffled when different CPU buffers are read;
17
+ check the CPU column for changes, or include timestamps (-t) and post sort.
18
+
19
+ WARNING: This uses dynamic tracing of kernel functions, and could cause
20
+ kernel panics or freezes. Test, and know what you are doing, before use.
21
+
22
+ Since this uses ftrace, only the root user can use this tool.
23
+ .SH REQUIREMENTS
24
+ FTRACE function graph, which you may already have enabled and available on
25
+ recent kernels. And awk.
26
+ .SH OPTIONS
27
+ .TP
28
+ \-a
29
+ All info. Same as \-HPt.
30
+ .TP
31
+ \-C
32
+ Function durations measure on-CPU time only (exclude sleep time).
33
+ .TP
34
+ \-d seconds
35
+ Set the duration of tracing, in seconds. Trace output will be buffered and
36
+ printed at the end. This also reduces overheads by buffering in-kernel,
37
+ instead of printing events as they occur.
38
+
39
+ The ftrace buffer has a fixed size per-CPU (see
40
+ /sys/kernel/debug/tracing/buffer_size_kb). If you think events are missing,
41
+ try increasing that size.
42
+ .TP
43
+ \-h
44
+ Print usage message.
45
+ .TP
46
+ \-H
47
+ Print column headers.
48
+ .TP
49
+ \-p PID
50
+ Only trace kernel functions when this process ID is on-CPU.
51
+ .TP
52
+ \-L TID
53
+ Only trace kernel functions when this thread ID is on-CPU.
54
+ .TP
55
+ \-P
56
+ Show process names and process IDs with every line of output.
57
+ .TP
58
+ \-t
59
+ Show timestamps on every line of output.
60
+ .TP
61
+ funcstring
62
+ A function name to trace, which may include file glob style wildcards ("*") at
63
+ the beginning or ending of a string only. Eg, "vfs*" means match "vfs" followed
64
+ by anything. Since the output is verbose, you probably only want to trace
65
+ single functions, and not use wildcards.
66
+ .TP
67
+ latency_us
68
+ Minimum function duration to trace, in units of microseconds. This is filtered
69
+ in-kernel.
70
+ .SH EXAMPLES
71
+ .TP
72
+ Trace calls to vfs_read(), showing events slower than 10 ms:
73
+ #
74
+ .B funcslower vfs_read 10000
75
+ .TP
76
+ Same as above, but include column headers, event timestamps, and process names:
77
+ #
78
+ .B funcslower -HPt vfs_read 10000
79
+ .TP
80
+ Trace slow vfs_read()s for PID 198 only:
81
+ #
82
+ .B funcslower \-p 198 vfs_read 10000
83
+ .SH FIELDS
84
+ The output format depends on the kernel version, and headings can be printed
85
+ using \-H. The format is the same as the ftrace function trace format, described
86
+ in the kernel source under Documentation/trace/ftrace.txt.
87
+
88
+ Typical fields are:
89
+ .TP
90
+ TIME
91
+ (Shown with \-t.) Time of event, in seconds.
92
+ .TP
93
+ CPU
94
+ The CPU this event occurred on.
95
+ .TP
96
+ TASK/PID
97
+ (Shown with \-P.) The process name (which could include dashes), a dash, and the process ID.
98
+ .TP
99
+ DURATION
100
+ Elapsed time during the function call, inclusive of children. This is also
101
+ inclusive of sleep time, unless -C is used.
102
+ .TP
103
+ FUNCTION CALLS
104
+ Kernel function returns.
105
+ .SH OVERHEAD
106
+ OVERHEADS: Timing and filtering is performed in-kernel context, costing
107
+ lower overheads than post-processing in user space. If you trace frequent
108
+ events (eg, pick a common function and a low threshold), you might want to
109
+ try the "-d secs" option, which buffers events in-kernel instead of printing
110
+ them live.
111
+
112
+ It's a good idea to start with a high threshold (eg, "100000" for 100 ms) then
113
+ to decrease it. If you start low instead, you may start printing too many
114
+ events.
115
+ .SH SOURCE
116
+ This is from the perf-tools collection:
117
+ .IP
118
+ https://github.com/brendangregg/perf-tools
119
+ .PP
120
+ Also look under the examples directory for a text file containing example
121
+ usage, output, and commentary for this tool.
122
+ .SH OS
123
+ Linux
124
+ .SH STABILITY
125
+ Unstable - in development.
126
+ .SH AUTHOR
127
+ Brendan Gregg
128
+ .SH SEE ALSO
129
+ funccount(8), functrace(8), funcgraph(8), kprobe(8)
@@ -0,0 +1,123 @@
1
+ .TH functrace 8 "2014-07-20" "USER COMMANDS"
2
+ .SH NAME
3
+ functrace \- trace kernel function calls matching specified wildcards. Uses Linux ftrace.
4
+ .SH SYNOPSIS
5
+ .B functrace
6
+ [\-hH] [\-p PID] [\-L TID] [\-d secs] funcstring
7
+ .SH DESCRIPTION
8
+ This tool provides a quick way to capture the execution of kernel functions,
9
+ showing basic details including as the process ID, timestamp, and calling
10
+ function.
11
+
12
+ WARNING: This uses dynamic tracing of (what can be many) kernel functions,
13
+ and could cause kernel panics or freezes. Test, and know what you are doing,
14
+ before use.
15
+
16
+ Also beware of feedback loops: tracing tcp* functions over an ssh session,
17
+ or writing ext4* functions to an ext4 file system. For the former, tcp
18
+ trace data could be redirected to a file (as in the usage message). For
19
+ the latter, trace to the screen or a different file system.
20
+
21
+ SEE ALSO: kprobe(8), which can dynamically trace a single function call or
22
+ return, and examine CPU registers and return values.
23
+
24
+ Since this uses ftrace, only the root user can use this tool.
25
+ .SH REQUIREMENTS
26
+ FTRACE CONFIG, which you may already have enabled and available on recent
27
+ kernels.
28
+ .SH OPTIONS
29
+ .TP
30
+ \-d seconds
31
+ Set the duration of tracing, in seconds. Trace output will be buffered and
32
+ printed at the end. This also reduces overheads by buffering in-kernel,
33
+ instead of printing events as they occur.
34
+
35
+ The ftrace buffer has a fixed size per-CPU (see
36
+ /sys/kernel/debug/tracing/buffer_size_kb). If you think events are missing,
37
+ try increasing that size.
38
+ .TP
39
+ \-h
40
+ Print usage message.
41
+ .TP
42
+ \-H
43
+ Print column headers.
44
+ .TP
45
+ \-p PID
46
+ Only trace kernel functions when this process ID is on-CPU.
47
+ .TP
48
+ \-L TID
49
+ Only trace kernel functions when this thread ID is on-CPU.
50
+ .TP
51
+ funcstring
52
+ A function name to trace, which may include file glob style wildcards ("*") at
53
+ the beginning or ending of a string only. Eg, "vfs*" means match "vfs" followed
54
+ by anything.
55
+ .SH EXAMPLES
56
+ .TP
57
+ Trace calls to do_nanosleep():
58
+ #
59
+ .B functrace do_nanosleep
60
+ .TP
61
+ Trace calls to all kernel functions ending in "*sleep":
62
+ #
63
+ .B functrace '*sleep'
64
+ .TP
65
+ Trace all "vfs*" kernel function calls for PID 198:
66
+ #
67
+ .B functrace \-p 198 'vfs*'
68
+ .TP
69
+ Trace all "tcp*" kernel function calls, and output to a file until Ctrl-C:
70
+ #
71
+ .B functrace 'tcp*' > out
72
+ .TP
73
+ Trace all "tcp*" kernel function calls, output to a file, for 1 second (buffered):
74
+ #
75
+ .B functrace \-d 1 'tcp*' > out
76
+ .SH FIELDS
77
+ The output format depends on the kernel version, and headings can be printed
78
+ using \-H. The format is the same as the ftrace function trace format, described
79
+ in the kernel source under Documentation/trace/ftrace.txt.
80
+
81
+ Typical fields are:
82
+ .TP
83
+ TASK-PID
84
+ The process name (which could include dashes), a dash, and the process ID.
85
+ .TP
86
+ CPU#
87
+ The CPU ID, in brackets.
88
+ .TP
89
+ ||||
90
+ Kernel state flags. For example, on Linux 3.16 these are for irqs-off,
91
+ need-resched, hardirq/softirq, and preempt-depth.
92
+ .TP
93
+ TIMESTAMP
94
+ Time of event, in seconds.
95
+ .TP
96
+ FUNCTION
97
+ Kernel function name.
98
+ .SH OVERHEAD
99
+ This can generate a lot of trace data quickly, depending on the
100
+ frequency of the traced events. Such data will cause performance overheads.
101
+ This also works without buffering by default, printing function events
102
+ as they happen (uses trace_pipe), context switching and consuming CPU to do
103
+ so. If needed, you can try the "\-d secs" option, which buffers events
104
+ instead, reducing overhead. If you think the buffer option is losing events,
105
+ try increasing the buffer size (buffer_size_kb).
106
+
107
+ It's a good idea to use funccount(8) first, which is lower overhead, to
108
+ help you select which functions you may want to trace using functrace(8).
109
+ .SH SOURCE
110
+ This is from the perf-tools collection:
111
+ .IP
112
+ https://github.com/brendangregg/perf-tools
113
+ .PP
114
+ Also look under the examples directory for a text file containing example
115
+ usage, output, and commentary for this tool.
116
+ .SH OS
117
+ Linux
118
+ .SH STABILITY
119
+ Unstable - in development.
120
+ .SH AUTHOR
121
+ Brendan Gregg
122
+ .SH SEE ALSO
123
+ funccount(8), kprobe(8)
@@ -0,0 +1,116 @@
1
+ .TH iolatency 8 "2014-07-12" "USER COMMANDS"
2
+ .SH NAME
3
+ iolatency \- summarize block device I/O latency as a histogram. Uses Linux ftrace.
4
+ .SH SYNOPSIS
5
+ .B iolatency
6
+ [\-hQT] [\-d device] [\-i iotype] [interval [count]]
7
+ .SH DESCRIPTION
8
+ This shows the distribution of latency, allowing modes and latency outliers
9
+ to be identified and studied. For more details of block device I/O, use
10
+ iosnoop(8).
11
+
12
+ This is a proof of concept tool using ftrace, and involves user space
13
+ processing and related overheads. See the OVERHEAD section.
14
+
15
+ NOTE: Due to the way trace buffers are switched per interval, there is the
16
+ possibility of losing a small number of I/O (usually less than 1%). The
17
+ summary therefore shows the general distribution, but may be slightly
18
+ incomplete. If 100% of I/O must be studied, use iosnoop(8) and post-process.
19
+ Also note that I/O may be missed when the trace buffer is full: see the
20
+ interval section in OPTIONS.
21
+
22
+ Since this uses ftrace, only the root user can use this tool.
23
+ .SH REQUIREMENTS
24
+ FTRACE CONFIG, and the tracepoints block:block_rq_issue and
25
+ block:block_rq_complete, which you may already have enabled and available on
26
+ recent Linux kernels. And awk.
27
+ .SH OPTIONS
28
+ .TP
29
+ \-d device
30
+ Only show I/O issued by this device. (eg, "202,1"). This matches the DEV
31
+ column in the iolatency output, and is filtered in-kernel.
32
+ .TP
33
+ \-i iotype
34
+ Only show I/O issued that matches this I/O type. This matches the TYPE column
35
+ in the iolatency output, and wildcards ("*") can be used at the beginning or
36
+ end (only). Eg, "*R*" matches all reads. This is filtered in-kernel.
37
+ .TP
38
+ \-h
39
+ Print usage message.
40
+ .TP
41
+ \-Q
42
+ Include block I/O queueing time. This uses block I/O queue insertion as the
43
+ start tracepoint (block:block_rq_insert), instead of block I/O issue
44
+ (block:block_rq_issue).
45
+ .TP
46
+ \-T
47
+ Include timestamps with each summary output.
48
+ .TP
49
+ interval
50
+ Interval between summary histograms, in seconds.
51
+
52
+ During the interval, trace output will be buffered in-kernel, which is then
53
+ read and processed for the summary. This buffer has a fixed size per-CPU (see
54
+ /sys/kernel/debug/tracing/buffer_size_kb). If you think events are missing,
55
+ try increasing that size (the bufsize_kb setting in iolatency). With the
56
+ default setting (4 Mbytes), I'd expect this to happen around 50k I/O per
57
+ summary.
58
+ .TP
59
+ count
60
+ Number of summaries to print.
61
+ .SH EXAMPLES
62
+ .TP
63
+ Default output, print a summary of block I/O latency every 1 second:
64
+ #
65
+ .B iolatency
66
+ .TP
67
+ Include block I/O queue time:
68
+ .B iolatency \-Q
69
+ .TP
70
+ Print 5 x 1 second summaries:
71
+ #
72
+ .B iolatency 1 5
73
+ .TP
74
+ Trace reads only:
75
+ #
76
+ .B iolatency \-i '*R*'
77
+ .TP
78
+ Trace I/O issued to device 202,1 only:
79
+ #
80
+ .B iolatency \-d 202,1
81
+ .SH FIELDS
82
+ .TP
83
+ >=(ms)
84
+ Latency was greater than or equal-to this value, in milliseconds.
85
+ .TP
86
+ <(ms)
87
+ Latency was less than this value, in milliseconds.
88
+ .TP
89
+ I/O
90
+ Number of block device I/O in this latency range, during the interval.
91
+ .TP
92
+ Distribution
93
+ ASCII histogram representation of the I/O column.
94
+ .SH OVERHEAD
95
+ Block device I/O issue and completion events are traced and buffered
96
+ in-kernel, then processed and summarized in user space. There may be
97
+ measurable overhead with this approach, relative to the block device IOPS.
98
+
99
+ The overhead may be acceptable in many situations. If it isn't, this tool
100
+ can be reimplemented in C, or using a different tracer (eg, perf_events,
101
+ SystemTap, ktap.)
102
+ .SH SOURCE
103
+ This is from the perf-tools collection.
104
+ .IP
105
+ https://github.com/brendangregg/perf-tools
106
+ .PP
107
+ Also look under the examples directory for a text file containing example
108
+ usage, output, and commentary for this tool.
109
+ .SH OS
110
+ Linux
111
+ .SH STABILITY
112
+ Unstable - in development.
113
+ .SH AUTHOR
114
+ Brendan Gregg
115
+ .SH SEE ALSO
116
+ iosnoop(8), iostat(1)
@@ -0,0 +1,169 @@
1
+ .TH iosnoop 8 "2014-07-12" "USER COMMANDS"
2
+ .SH NAME
3
+ iosnoop \- trace block I/O events as they occur. Uses Linux ftrace.
4
+ .SH SYNOPSIS
5
+ .B iosnoop
6
+ [\-hQst] [\-d device] [\-i iotype] [\-p pid] [\-n name] [duration]
7
+ .SH DESCRIPTION
8
+ iosnoop prints block device I/O events as they happen, with useful details such
9
+ as PID, device, I/O type, block number, I/O size, and latency.
10
+
11
+ This traces disk I/O at the block device interface, using the block:
12
+ tracepoints. This can help characterize the I/O requested for the storage
13
+ devices and their resulting performance. I/O completions can also be studied
14
+ event-by-event for debugging disk and controller I/O scheduling issues.
15
+
16
+ NOTE: Use of a duration buffers I/O, which reduces overheads, but this also
17
+ introduces a limit to the number of I/O that will be captured. See the duration
18
+ section in OPTIONS.
19
+
20
+ Since this uses ftrace, only the root user can use this tool.
21
+ .SH REQUIREMENTS
22
+ FTRACE CONFIG, and the tracepoints block:block_rq_insert, block:block_rq_issue,
23
+ and block:block_rq_complete, which you may already have enabled and available on
24
+ recent Linux kernels. And awk.
25
+ .SH OPTIONS
26
+ .TP
27
+ \-d device
28
+ Only show I/O issued by this device. (eg, "202,1"). This matches the DEV
29
+ column in the iosnoop output, and is filtered in-kernel.
30
+ .TP
31
+ \-i iotype
32
+ Only show I/O issued that matches this I/O type. This matches the TYPE column
33
+ in the iosnoop output, and wildcards ("*") can be used at the beginning or
34
+ end (only). Eg, "*R*" matches all reads. This is filtered in-kernel.
35
+ .TP
36
+ \-p PID
37
+ Only show I/O issued by this PID. This filters in-kernel. Note that I/O may be
38
+ issued indirectly; for example, as the result of a memory allocation, causing
39
+ dirty buffers (maybe from another PID) to be written to storage.
40
+
41
+ With the \-Q
42
+ option, the identified PID is more accurate, however, LATms now includes
43
+ queueing time (see the \-Q option).
44
+ .TP
45
+ \-n name
46
+ Only show I/O issued by processes with this name. Partial strings and regular
47
+ expressions are allowed. This is a post-filter, so all I/O is traced and then
48
+ filtered in user space. As with PID, this includes indirectly issued I/O,
49
+ and \-Q can be used to improve accuracy (see the \-Q option).
50
+ .TP
51
+ \-h
52
+ Print usage message.
53
+ .TP
54
+ \-Q
55
+ Use block I/O queue insertion as the start tracepoint (block:block_rq_insert),
56
+ instead of block I/O issue (block:block_rq_issue). This makes the following
57
+ changes: COMM and PID are more likely to identify the origin process, as are
58
+ \-p PID and \-n name; STARTs shows queue insert; and LATms shows I/O
59
+ time including time spent on the block I/O queue.
60
+ .TP
61
+ \-s
62
+ Include a column for the start time (issue time) of the I/O, in seconds.
63
+ If the \-Q option is used, this is the time the I/O is inserted on the block
64
+ I/O queue.
65
+ .TP
66
+ \-t
67
+ Include a column for the completion time of the I/O, in seconds.
68
+ .TP
69
+ duration
70
+ Set the duration of tracing, in seconds. Trace output will be buffered and
71
+ printed at the end. This also reduces overheads by buffering in-kernel,
72
+ instead of printing events as they occur.
73
+
74
+ The ftrace buffer has a fixed size per-CPU (see
75
+ /sys/kernel/debug/tracing/buffer_size_kb). If you think events are missing,
76
+ try increasing that size (the bufsize_kb setting in iosnoop). With the
77
+ default setting (4 Mbytes), I'd expect this to happen around 50k I/O.
78
+ .SH EXAMPLES
79
+ .TP
80
+ Default output, print I/O activity as it occurs:
81
+ #
82
+ .B iosnoop
83
+ .TP
84
+ Buffer for 5 seconds (lower overhead) and write to a file:
85
+ #
86
+ .B iosnoop 5 > outfile
87
+ .TP
88
+ Trace based on block I/O queue insertion, showing queueing time:
89
+ #
90
+ .B iosnoop -Q
91
+ .TP
92
+ Trace reads only:
93
+ #
94
+ .B iosnoop \-i '*R*'
95
+ .TP
96
+ Trace I/O issued to device 202,1 only:
97
+ #
98
+ .B iosnoop \-d 202,1
99
+ .TP
100
+ Include I/O start and completion timestamps:
101
+ #
102
+ .B iosnoop \-ts
103
+ .TP
104
+ Include I/O queueing and completion timestamps:
105
+ #
106
+ .B iosnop \-Qts
107
+ .TP
108
+ Trace I/O issued when PID 181 was on-CPU only:
109
+ #
110
+ .B iosnoop \-p 181
111
+ .TP
112
+ Trace I/O queued when PID 181 was on-CPU (more accurate), and include queue time:
113
+ #
114
+ .B iosnoop \-Qp 181
115
+ .SH FIELDS
116
+ .TP
117
+ COMM
118
+ Process name (command) for the PID that was on-CPU when the I/O was issued, or
119
+ inserted if \-Q is used. See PID. This column is truncated to 12 characters.
120
+ .TP
121
+ PID
122
+ Process ID which was on-CPU when the I/O was issued, or inserted if \-Q is
123
+ used. This will usually be the
124
+ process directly requesting I/O, however, it may also include indirect I/O. For
125
+ example, a memory allocation by this PID which causes dirty memory from another
126
+ PID to be flushed to disk.
127
+ .TP
128
+ TYPE
129
+ Type of I/O. R=read, W=write, M=metadata, S=sync, A=readahead, F=flush or FUA (force unit access), D=discard, E=secure, N=null (not RWFD).
130
+ .TP
131
+ DEV
132
+ Storage device ID.
133
+ .TP
134
+ BLOCK
135
+ Disk block for the operation (location, relative to this device).
136
+ .TP
137
+ BYTES
138
+ Size of the I/O, in bytes.
139
+ .TP
140
+ LATms
141
+ Latency (time) for the I/O, in milliseconds.
142
+ .SH OVERHEAD
143
+ By default, iosnoop works without buffering, printing I/O events
144
+ as they happen (uses trace_pipe), context switching and consuming CPU to do
145
+ so. This has a limit of about 10,000 IOPS (depending on your platform), at
146
+ which point iosnoop will be consuming 1 CPU. The duration mode uses buffering,
147
+ and can handle much higher IOPS rates, however, the buffer has a limit of
148
+ about 50,000 I/O, after which events will be dropped. You can tune this with
149
+ bufsize_kb, which is per-CPU. Also note that the "-n" option is currently
150
+ post-filtered, so all events are traced.
151
+
152
+ The overhead may be acceptable in many situations. If it isn't, this tool
153
+ can be reimplemented in C, or using a different tracer (eg, perf_events,
154
+ SystemTap, ktap.)
155
+ .SH SOURCE
156
+ This is from the perf-tools collection.
157
+ .IP
158
+ https://github.com/brendangregg/perf-tools
159
+ .PP
160
+ Also look under the examples directory for a text file containing example
161
+ usage, output, and commentary for this tool.
162
+ .SH OS
163
+ Linux
164
+ .SH STABILITY
165
+ Unstable - in development.
166
+ .SH AUTHOR
167
+ Brendan Gregg
168
+ .SH SEE ALSO
169
+ iolatency(8), iostat(1), lsblk(8)