fluent-plugin-perf-tools 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.rubocop.yml +26 -0
  4. data/.ruby-version +1 -0
  5. data/CHANGELOG.md +5 -0
  6. data/CODE_OF_CONDUCT.md +84 -0
  7. data/Gemfile +5 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +43 -0
  10. data/Rakefile +17 -0
  11. data/bin/console +15 -0
  12. data/bin/setup +8 -0
  13. data/fluent-plugin-perf-tools.gemspec +48 -0
  14. data/lib/fluent/plugin/in_perf_tools.rb +42 -0
  15. data/lib/fluent/plugin/perf_tools/cachestat.rb +65 -0
  16. data/lib/fluent/plugin/perf_tools/command.rb +30 -0
  17. data/lib/fluent/plugin/perf_tools/version.rb +9 -0
  18. data/lib/fluent/plugin/perf_tools.rb +11 -0
  19. data/perf-tools/LICENSE +339 -0
  20. data/perf-tools/README.md +205 -0
  21. data/perf-tools/bin/bitesize +1 -0
  22. data/perf-tools/bin/cachestat +1 -0
  23. data/perf-tools/bin/execsnoop +1 -0
  24. data/perf-tools/bin/funccount +1 -0
  25. data/perf-tools/bin/funcgraph +1 -0
  26. data/perf-tools/bin/funcslower +1 -0
  27. data/perf-tools/bin/functrace +1 -0
  28. data/perf-tools/bin/iolatency +1 -0
  29. data/perf-tools/bin/iosnoop +1 -0
  30. data/perf-tools/bin/killsnoop +1 -0
  31. data/perf-tools/bin/kprobe +1 -0
  32. data/perf-tools/bin/opensnoop +1 -0
  33. data/perf-tools/bin/perf-stat-hist +1 -0
  34. data/perf-tools/bin/reset-ftrace +1 -0
  35. data/perf-tools/bin/syscount +1 -0
  36. data/perf-tools/bin/tcpretrans +1 -0
  37. data/perf-tools/bin/tpoint +1 -0
  38. data/perf-tools/bin/uprobe +1 -0
  39. data/perf-tools/deprecated/README.md +1 -0
  40. data/perf-tools/deprecated/execsnoop-proc +150 -0
  41. data/perf-tools/deprecated/execsnoop-proc.8 +80 -0
  42. data/perf-tools/deprecated/execsnoop-proc_example.txt +46 -0
  43. data/perf-tools/disk/bitesize +175 -0
  44. data/perf-tools/examples/bitesize_example.txt +63 -0
  45. data/perf-tools/examples/cachestat_example.txt +58 -0
  46. data/perf-tools/examples/execsnoop_example.txt +153 -0
  47. data/perf-tools/examples/funccount_example.txt +126 -0
  48. data/perf-tools/examples/funcgraph_example.txt +2178 -0
  49. data/perf-tools/examples/funcslower_example.txt +110 -0
  50. data/perf-tools/examples/functrace_example.txt +341 -0
  51. data/perf-tools/examples/iolatency_example.txt +350 -0
  52. data/perf-tools/examples/iosnoop_example.txt +302 -0
  53. data/perf-tools/examples/killsnoop_example.txt +62 -0
  54. data/perf-tools/examples/kprobe_example.txt +379 -0
  55. data/perf-tools/examples/opensnoop_example.txt +47 -0
  56. data/perf-tools/examples/perf-stat-hist_example.txt +149 -0
  57. data/perf-tools/examples/reset-ftrace_example.txt +88 -0
  58. data/perf-tools/examples/syscount_example.txt +297 -0
  59. data/perf-tools/examples/tcpretrans_example.txt +93 -0
  60. data/perf-tools/examples/tpoint_example.txt +210 -0
  61. data/perf-tools/examples/uprobe_example.txt +321 -0
  62. data/perf-tools/execsnoop +292 -0
  63. data/perf-tools/fs/cachestat +167 -0
  64. data/perf-tools/images/perf-tools_2016.png +0 -0
  65. data/perf-tools/iolatency +296 -0
  66. data/perf-tools/iosnoop +296 -0
  67. data/perf-tools/kernel/funccount +146 -0
  68. data/perf-tools/kernel/funcgraph +259 -0
  69. data/perf-tools/kernel/funcslower +248 -0
  70. data/perf-tools/kernel/functrace +192 -0
  71. data/perf-tools/kernel/kprobe +270 -0
  72. data/perf-tools/killsnoop +263 -0
  73. data/perf-tools/man/man8/bitesize.8 +70 -0
  74. data/perf-tools/man/man8/cachestat.8 +111 -0
  75. data/perf-tools/man/man8/execsnoop.8 +104 -0
  76. data/perf-tools/man/man8/funccount.8 +76 -0
  77. data/perf-tools/man/man8/funcgraph.8 +166 -0
  78. data/perf-tools/man/man8/funcslower.8 +129 -0
  79. data/perf-tools/man/man8/functrace.8 +123 -0
  80. data/perf-tools/man/man8/iolatency.8 +116 -0
  81. data/perf-tools/man/man8/iosnoop.8 +169 -0
  82. data/perf-tools/man/man8/killsnoop.8 +100 -0
  83. data/perf-tools/man/man8/kprobe.8 +162 -0
  84. data/perf-tools/man/man8/opensnoop.8 +113 -0
  85. data/perf-tools/man/man8/perf-stat-hist.8 +111 -0
  86. data/perf-tools/man/man8/reset-ftrace.8 +49 -0
  87. data/perf-tools/man/man8/syscount.8 +96 -0
  88. data/perf-tools/man/man8/tcpretrans.8 +93 -0
  89. data/perf-tools/man/man8/tpoint.8 +140 -0
  90. data/perf-tools/man/man8/uprobe.8 +168 -0
  91. data/perf-tools/misc/perf-stat-hist +223 -0
  92. data/perf-tools/net/tcpretrans +311 -0
  93. data/perf-tools/opensnoop +280 -0
  94. data/perf-tools/syscount +192 -0
  95. data/perf-tools/system/tpoint +232 -0
  96. data/perf-tools/tools/reset-ftrace +123 -0
  97. data/perf-tools/user/uprobe +390 -0
  98. metadata +349 -0
@@ -0,0 +1,296 @@
1
+ #!/bin/bash
2
+ #
3
+ # iosnoop - trace block device I/O.
4
+ # Written using Linux ftrace.
5
+ #
6
+ # This traces disk I/O at the block device interface, using the block:
7
+ # tracepoints. This can help characterize the I/O requested for the storage
8
+ # devices and their resulting performance. I/O completions can also be studied
9
+ # event-by-event for debugging disk and controller I/O scheduling issues.
10
+ #
11
+ # USAGE: ./iosnoop [-hQst] [-d device] [-i iotype] [-p pid] [-n name] [duration]
12
+ #
13
+ # Run "iosnoop -h" for full usage.
14
+ #
15
+ # REQUIREMENTS: FTRACE CONFIG, block:block_rq_* tracepoints (you may
16
+ # already have these on recent kernels).
17
+ #
18
+ # OVERHEAD: By default, iosnoop works without buffering, printing I/O events
19
+ # as they happen (uses trace_pipe), context switching and consuming CPU to do
20
+ # so. This has a limit of about 10,000 IOPS (depending on your platform), at
21
+ # which point iosnoop will be consuming 1 CPU. The duration mode uses buffering,
22
+ # and can handle much higher IOPS rates, however, the buffer has a limit of
23
+ # about 50,000 I/O, after which events will be dropped. You can tune this with
24
+ # bufsize_kb, which is per-CPU. Also note that the "-n" option is currently
25
+ # post-filtered, so all events are traced.
26
+ #
27
+ # This was written as a proof of concept for ftrace. It would be better written
28
+ # using perf_events (after some capabilities are added), which has a better
29
+ # buffering policy, or a tracer such as SystemTap or ktap.
30
+ #
31
+ # From perf-tools: https://github.com/brendangregg/perf-tools
32
+ #
33
+ # See the iosnoop(8) man page (in perf-tools) for more info.
34
+ #
35
+ # COPYRIGHT: Copyright (c) 2014 Brendan Gregg.
36
+ #
37
+ # This program is free software; you can redistribute it and/or
38
+ # modify it under the terms of the GNU General Public License
39
+ # as published by the Free Software Foundation; either version 2
40
+ # of the License, or (at your option) any later version.
41
+ #
42
+ # This program is distributed in the hope that it will be useful,
43
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
44
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
45
+ # GNU General Public License for more details.
46
+ #
47
+ # You should have received a copy of the GNU General Public License
48
+ # along with this program; if not, write to the Free Software Foundation,
49
+ # Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
50
+ #
51
+ # (http://www.gnu.org/copyleft/gpl.html)
52
+ #
53
+ # 12-Jul-2014 Brendan Gregg Created this.
54
+
55
+ ### default variables
56
+ tracing=/sys/kernel/debug/tracing
57
+ flock=/var/tmp/.ftrace-lock
58
+ bufsize_kb=4096
59
+ opt_duration=0; duration=; opt_name=0; name=; opt_pid=0; pid=; ftext=
60
+ opt_start=0; opt_end=0; opt_device=0; device=; opt_iotype=0; iotype=
61
+ opt_queue=0
62
+ trap ':' INT QUIT TERM PIPE HUP # sends execution to end tracing section
63
+
64
+ function usage {
65
+ cat <<-END >&2
66
+ USAGE: iosnoop [-hQst] [-d device] [-i iotype] [-p PID] [-n name]
67
+ [duration]
68
+ -d device # device string (eg, "202,1)
69
+ -i iotype # match type (eg, '*R*' for all reads)
70
+ -n name # process name to match on I/O issue
71
+ -p PID # PID to match on I/O issue
72
+ -Q # use queue insert as start time
73
+ -s # include start time of I/O (s)
74
+ -t # include completion time of I/O (s)
75
+ -h # this usage message
76
+ duration # duration seconds, and use buffers
77
+ eg,
78
+ iosnoop # watch block I/O live (unbuffered)
79
+ iosnoop 1 # trace 1 sec (buffered)
80
+ iosnoop -Q # include queueing time in LATms
81
+ iosnoop -ts # include start and end timestamps
82
+ iosnoop -i '*R*' # trace reads
83
+ iosnoop -p 91 # show I/O issued when PID 91 is on-CPU
84
+ iosnoop -Qp 91 # show I/O queued by PID 91, queue time
85
+
86
+ See the man page and example file for more info.
87
+ END
88
+ exit
89
+ }
90
+
91
+ function warn {
92
+ if ! eval "$@"; then
93
+ echo >&2 "WARNING: command failed \"$@\""
94
+ fi
95
+ }
96
+
97
+ function end {
98
+ # disable tracing
99
+ echo 2>/dev/null
100
+ echo "Ending tracing..." 2>/dev/null
101
+ cd $tracing
102
+ warn "echo 0 > events/block/$b_start/enable"
103
+ warn "echo 0 > events/block/block_rq_complete/enable"
104
+ if (( opt_device || opt_iotype || opt_pid )); then
105
+ warn "echo 0 > events/block/$b_start/filter"
106
+ warn "echo 0 > events/block/block_rq_complete/filter"
107
+ fi
108
+ warn "echo > trace"
109
+ (( wroteflock )) && warn "rm $flock"
110
+ }
111
+
112
+ function die {
113
+ echo >&2 "$@"
114
+ exit 1
115
+ }
116
+
117
+ function edie {
118
+ # die with a quiet end()
119
+ echo >&2 "$@"
120
+ exec >/dev/null 2>&1
121
+ end
122
+ exit 1
123
+ }
124
+
125
+ ### process options
126
+ while getopts d:hi:n:p:Qst opt
127
+ do
128
+ case $opt in
129
+ d) opt_device=1; device=$OPTARG ;;
130
+ i) opt_iotype=1; iotype=$OPTARG ;;
131
+ n) opt_name=1; name=$OPTARG ;;
132
+ p) opt_pid=1; pid=$OPTARG ;;
133
+ Q) opt_queue=1 ;;
134
+ s) opt_start=1 ;;
135
+ t) opt_end=1 ;;
136
+ h|?) usage ;;
137
+ esac
138
+ done
139
+ shift $(( $OPTIND - 1 ))
140
+ if (( $# )); then
141
+ opt_duration=1
142
+ duration=$1
143
+ shift
144
+ fi
145
+ if (( opt_device )); then
146
+ major=${device%,*}
147
+ minor=${device#*,}
148
+ dev=$(( (major << 20) + minor ))
149
+ fi
150
+
151
+ ### option logic
152
+ (( opt_pid && opt_name )) && die "ERROR: use either -p or -n."
153
+ (( opt_pid )) && ftext=" issued by PID $pid"
154
+ (( opt_name )) && ftext=" issued by process name \"$name\""
155
+ if (( opt_duration )); then
156
+ echo "Tracing block I/O$ftext for $duration seconds (buffered)..."
157
+ else
158
+ echo "Tracing block I/O$ftext. Ctrl-C to end."
159
+ fi
160
+ if (( opt_queue )); then
161
+ b_start=block_rq_insert
162
+ else
163
+ b_start=block_rq_issue
164
+ fi
165
+
166
+ ### select awk
167
+ (( opt_duration )) && use=mawk || use=gawk # workaround for mawk fflush()
168
+ [[ -x /usr/bin/$use ]] && awk=$use || awk=awk
169
+ wroteflock=1
170
+
171
+ ### check permissions
172
+ cd $tracing || die "ERROR: accessing tracing. Root user? Kernel has FTRACE?
173
+ debugfs mounted? (mount -t debugfs debugfs /sys/kernel/debug)"
174
+
175
+ ### ftrace lock
176
+ [[ -e $flock ]] && die "ERROR: ftrace may be in use by PID $(cat $flock) $flock"
177
+ echo $$ > $flock || die "ERROR: unable to write $flock."
178
+
179
+ ### setup and begin tracing
180
+ echo nop > current_tracer
181
+ warn "echo $bufsize_kb > buffer_size_kb"
182
+ filter=
183
+ if (( opt_iotype )); then
184
+ filter="rwbs ~ \"$iotype\""
185
+ fi
186
+ if (( opt_device )); then
187
+ [[ "$filter" != "" ]] && filter="$filter && "
188
+ filter="${filter}dev == $dev"
189
+ fi
190
+ filter_i=$filter
191
+ if (( opt_pid )); then
192
+ [[ "$filter_i" != "" ]] && filter_i="$filter_i && "
193
+ filter_i="${filter_i}common_pid == $pid"
194
+ [[ "$filter" == "" ]] && filter=0
195
+ fi
196
+ if (( opt_iotype || opt_device || opt_pid )); then
197
+ if ! echo "$filter_i" > events/block/$b_start/filter || \
198
+ ! echo "$filter" > events/block/block_rq_complete/filter
199
+ then
200
+ edie "ERROR: setting -d or -t filter. Exiting."
201
+ fi
202
+ fi
203
+ if ! echo 1 > events/block/$b_start/enable || \
204
+ ! echo 1 > events/block/block_rq_complete/enable; then
205
+ edie "ERROR: enabling block I/O tracepoints. Exiting."
206
+ fi
207
+ (( opt_start )) && printf "%-15s " "STARTs"
208
+ (( opt_end )) && printf "%-15s " "ENDs"
209
+ printf "%-12.12s %-6s %-4s %-8s %-12s %-6s %8s\n" \
210
+ "COMM" "PID" "TYPE" "DEV" "BLOCK" "BYTES" "LATms"
211
+
212
+ #
213
+ # Determine output format. It may be one of the following (newest first):
214
+ # TASK-PID CPU# |||| TIMESTAMP FUNCTION
215
+ # TASK-PID CPU# TIMESTAMP FUNCTION
216
+ # To differentiate between them, the number of header fields is counted,
217
+ # and an offset set, to skip the extra column when needed.
218
+ #
219
+ offset=$($awk 'BEGIN { o = 0; }
220
+ $1 == "#" && $2 ~ /TASK/ && NF == 6 { o = 1; }
221
+ $2 ~ /TASK/ { print o; exit }' trace)
222
+
223
+ ### print trace buffer
224
+ warn "echo > trace"
225
+ ( if (( opt_duration )); then
226
+ # wait then dump buffer
227
+ sleep $duration
228
+ cat trace
229
+ else
230
+ # print buffer live
231
+ cat trace_pipe
232
+ fi ) | $awk -v o=$offset -v opt_name=$opt_name -v name=$name \
233
+ -v opt_duration=$opt_duration -v opt_start=$opt_start -v opt_end=$opt_end \
234
+ -v b_start=$b_start '
235
+ # common fields
236
+ $1 != "#" {
237
+ # task name can contain dashes
238
+ comm = pid = $1
239
+ sub(/-[0-9][0-9]*/, "", comm)
240
+ sub(/.*-/, "", pid)
241
+ time = $(3+o); sub(":", "", time)
242
+ dev = $(5+o)
243
+ }
244
+
245
+ # block I/O request
246
+ $1 != "#" && $0 ~ b_start {
247
+ if (opt_name && match(comm, name) == 0)
248
+ next
249
+ #
250
+ # example: (fields1..4+o) 202,1 W 0 () 12862264 + 8 [tar]
251
+ # The cmd field "()" might contain multiple words (hex),
252
+ # hence stepping from the right (NF-3).
253
+ #
254
+ loc = $(NF-3)
255
+ starts[dev, loc] = time
256
+ comms[dev, loc] = comm
257
+ pids[dev, loc] = pid
258
+ next
259
+ }
260
+
261
+ # block I/O completion
262
+ $1 != "#" && $0 ~ /rq_complete/ {
263
+ #
264
+ # example: (fields1..4+o) 202,1 W () 12862256 + 8 [0]
265
+ #
266
+ dir = $(6+o)
267
+ loc = $(NF-3)
268
+ nsec = $(NF-1)
269
+
270
+ if (starts[dev, loc] > 0) {
271
+ latency = sprintf("%.2f",
272
+ 1000 * (time - starts[dev, loc]))
273
+ comm = comms[dev, loc]
274
+ pid = pids[dev, loc]
275
+
276
+ if (opt_start)
277
+ printf "%-15s ", starts[dev, loc]
278
+ if (opt_end)
279
+ printf "%-15s ", time
280
+ printf "%-12.12s %-6s %-4s %-8s %-12s %-6s %8s\n",
281
+ comm, pid, dir, dev, loc, nsec * 512, latency
282
+ if (!opt_duration)
283
+ fflush()
284
+
285
+ delete starts[dev, loc]
286
+ delete comms[dev, loc]
287
+ delete pids[dev, loc]
288
+ }
289
+ next
290
+ }
291
+
292
+ $0 ~ /LOST.*EVENTS/ { print "WARNING: " $0 > "/dev/stderr" }
293
+ '
294
+
295
+ ### end tracing
296
+ end
@@ -0,0 +1,146 @@
1
+ #!/bin/bash
2
+ #
3
+ # funccount - count kernel function calls matching specified wildcards.
4
+ # Uses Linux ftrace.
5
+ #
6
+ # This is a proof of concept using Linux ftrace capabilities on older kernels,
7
+ # and works by using function profiling: in-kernel counters.
8
+ #
9
+ # USAGE: funccount [-hT] [-i secs] [-d secs] [-t top] funcstring
10
+ # eg,
11
+ # funccount 'ext3*' # count all ext3* kernel function calls
12
+ #
13
+ # Run "funccount -h" for full usage.
14
+ #
15
+ # WARNING: This uses dynamic tracing of kernel functions, and could cause
16
+ # kernel panics or freezes. Test, and know what you are doing, before use.
17
+ #
18
+ # REQUIREMENTS: CONFIG_FUNCTION_PROFILER, awk.
19
+ #
20
+ # From perf-tools: https://github.com/brendangregg/perf-tools
21
+ #
22
+ # COPYRIGHT: Copyright (c) 2014 Brendan Gregg.
23
+ #
24
+ # This program is free software; you can redistribute it and/or
25
+ # modify it under the terms of the GNU General Public License
26
+ # as published by the Free Software Foundation; either version 2
27
+ # of the License, or (at your option) any later version.
28
+ #
29
+ # This program is distributed in the hope that it will be useful,
30
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
31
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
32
+ # GNU General Public License for more details.
33
+ #
34
+ # You should have received a copy of the GNU General Public License
35
+ # along with this program; if not, write to the Free Software Foundation,
36
+ # Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
37
+ #
38
+ # (http://www.gnu.org/copyleft/gpl.html)
39
+ #
40
+ # 12-Jul-2014 Brendan Gregg Created this.
41
+
42
+ ### default variables
43
+ tracing=/sys/kernel/debug/tracing
44
+ opt_duration=0; duration=; opt_interval=0; interval=999999; opt_timestamp=0
45
+ opt_tail=0; tcmd=cat; ttext=
46
+ trap 'quit=1' INT QUIT TERM PIPE HUP # sends execution to end tracing section
47
+
48
+ function usage {
49
+ cat <<-END >&2
50
+ USAGE: funccount [-hT] [-i secs] [-d secs] [-t top] funcstring
51
+ -d seconds # total duration of trace
52
+ -h # this usage message
53
+ -i seconds # interval summary
54
+ -t top # show top num entries only
55
+ -T # include timestamp (for -i)
56
+ eg,
57
+ funccount 'vfs*' # trace all funcs that match "vfs*"
58
+ funccount -d 5 'tcp*' # trace "tcp*" funcs for 5 seconds
59
+ funccount -t 10 'ext3*' # show top 10 "ext3*" funcs
60
+ funccount -i 1 'ext3*' # summary every 1 second
61
+ funccount -i 1 -d 5 'ext3*' # 5 x 1 second summaries
62
+
63
+ See the man page and example file for more info.
64
+ END
65
+ exit
66
+ }
67
+
68
+ function warn {
69
+ if ! eval "$@"; then
70
+ echo >&2 "WARNING: command failed \"$@\""
71
+ fi
72
+ }
73
+
74
+ function die {
75
+ echo >&2 "$@"
76
+ exit 1
77
+ }
78
+
79
+ ### process options
80
+ while getopts d:hi:t:T opt
81
+ do
82
+ case $opt in
83
+ d) opt_duration=1; duration=$OPTARG ;;
84
+ i) opt_interval=1; interval=$OPTARG ;;
85
+ t) opt_tail=1; tnum=$OPTARG ;;
86
+ T) opt_timestamp=1 ;;
87
+ h|?) usage ;;
88
+ esac
89
+ done
90
+ shift $(( $OPTIND - 1 ))
91
+
92
+ ### option logic
93
+ (( $# == 0 )) && usage
94
+ funcs="$1"
95
+ if (( opt_tail )); then
96
+ tcmd="tail -$tnum"
97
+ ttext=" Top $tnum only."
98
+ fi
99
+ if (( opt_duration )); then
100
+ echo "Tracing \"$funcs\" for $duration seconds.$ttext.."
101
+ else
102
+ echo "Tracing \"$funcs\".$ttext.. Ctrl-C to end."
103
+ fi
104
+ (( opt_duration && !opt_interval )) && interval=$duration
105
+
106
+ ### check permissions
107
+ cd $tracing || die "ERROR: accessing tracing. Root user? Kernel has FTRACE?
108
+ debugfs mounted? (mount -t debugfs debugfs /sys/kernel/debug)"
109
+
110
+ ### enable tracing
111
+ sysctl -q kernel.ftrace_enabled=1 # doesn't set exit status
112
+ echo "$funcs" > set_ftrace_filter || die "ERROR: enabling \"$funcs\". Exiting."
113
+ warn "echo nop > current_tracer"
114
+ if ! echo 1 > function_profile_enabled; then
115
+ echo > set_ftrace_filter
116
+ die "ERROR: enabling function profiling."\
117
+ "Have CONFIG_FUNCTION_PROFILER? Exiting."
118
+ fi
119
+
120
+ ### summarize
121
+ quit=0; secs=0
122
+ while (( !quit && (!opt_duration || secs < duration) )); do
123
+ (( secs += interval ))
124
+ echo 0 > function_profile_enabled
125
+ echo 1 > function_profile_enabled
126
+ sleep $interval
127
+
128
+ echo
129
+ (( opt_timestamp )) && date
130
+ printf "%-30s %8s\n" "FUNC" "COUNT"
131
+
132
+ cat trace_stat/function* | awk '
133
+ # skip headers by matching on the numeric hit column
134
+ $2 ~ /[0-9]/ { a[$1] += $2 }
135
+ END {
136
+ for (k in a) {
137
+ printf "%-30s %8d\n", k, a[k]
138
+ }
139
+ }' | sort -n -k2 | $tcmd
140
+ done
141
+
142
+ ### end tracing
143
+ echo 2>/dev/null
144
+ echo "Ending tracing..." 2>/dev/null
145
+ warn "echo 0 > function_profile_enabled"
146
+ warn "echo > set_ftrace_filter"
@@ -0,0 +1,259 @@
1
+ #!/bin/bash
2
+ #
3
+ # funcgraph - trace kernel function graph, showing child function calls.
4
+ # Uses Linux ftrace.
5
+ #
6
+ # This is an exploratory tool that shows the graph of child function calls
7
+ # for a given kernel function. This can cost moderate overhead to execute, and
8
+ # should only be used to understand kernel behavior for a given function before
9
+ # using other, lower overhead tools. This is a proof of concept using Linux
10
+ # ftrace capabilities on older kernels.
11
+ #
12
+ # USAGE: funcgraph [-aCDhHPtT] [-m maxdepth] [-p PID] [-L TID] [-d secs] funcstring
13
+ #
14
+ # Run "funcgraph -h" for full usage.
15
+ #
16
+ # The output format is the same as the ftrace function graph trace format,
17
+ # described in the kernel source under Documentation/trace/ftrace.txt.
18
+ # Note that the output may be shuffled when different CPU buffers are read;
19
+ # check the CPU column for changes, or include timestamps (-t) and post sort.
20
+ #
21
+ # The "-d duration" mode leaves the trace data in the kernel buffer, and
22
+ # only reads it at the end. If the trace data is large, beware of exhausting
23
+ # buffer space (/sys/kernel/debug/tracing/buffer_size_kb) and losing data.
24
+ #
25
+ # Also beware of feedback loops: tracing tcp* functions over an ssh session,
26
+ # or writing ext4* functions to an ext4 file system. For the former, tcp
27
+ # trace data could be redirected to a file (as in the usage message). For
28
+ # the latter, trace to the screen or a different file system.
29
+ #
30
+ # WARNING: This uses dynamic tracing of kernel functions, and could cause
31
+ # kernel panics or freezes. Test, and know what you are doing, before use.
32
+ #
33
+ # OVERHEADS: This tool causes moderate to high overheads. Use with caution for
34
+ # exploratory purposes, then switch to lower overhead techniques based on
35
+ # findings. It's expected that the kernel will run at least 50% slower while
36
+ # this tool is running -- even while no output is being generated. This is
37
+ # because ALL kernel functions are traced, and filtered based on the function
38
+ # of interest. When output is generated, it can generate many lines quickly
39
+ # depending on the traced event. Such data will cause performance overheads.
40
+ # This also works without buffering by default, printing function events
41
+ # as they happen (uses trace_pipe), context switching and consuming CPU to do
42
+ # so. If needed, you can try the "-d secs" option, which buffers events
43
+ # instead, reducing overhead. If you think the buffer option is losing events,
44
+ # try increasing the buffer size (buffer_size_kb).
45
+ #
46
+ # From perf-tools: https://github.com/brendangregg/perf-tools
47
+ #
48
+ # COPYRIGHT: Copyright (c) 2014 Brendan Gregg.
49
+ #
50
+ # This program is free software; you can redistribute it and/or
51
+ # modify it under the terms of the GNU General Public License
52
+ # as published by the Free Software Foundation; either version 2
53
+ # of the License, or (at your option) any later version.
54
+ #
55
+ # This program is distributed in the hope that it will be useful,
56
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
57
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
58
+ # GNU General Public License for more details.
59
+ #
60
+ # You should have received a copy of the GNU General Public License
61
+ # along with this program; if not, write to the Free Software Foundation,
62
+ # Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
63
+ #
64
+ # (http://www.gnu.org/copyleft/gpl.html)
65
+ #
66
+ # 12-Jul-2014 Brendan Gregg Created this.
67
+
68
+ ### default variables
69
+ tracing=/sys/kernel/debug/tracing
70
+ flock=/var/tmp/.ftrace-lock
71
+ opt_duration=0; duration=; opt_pid=0; pid=; opt_tid=0; tid=; pidtext=
72
+ opt_headers=0; opt_proc=0; opt_time=0; opt_tail=0; opt_nodur=0; opt_cpu=0
73
+ opt_max=0; max=0
74
+ trap ':' INT QUIT TERM PIPE HUP # sends execution to end tracing section
75
+
76
+ function usage {
77
+ cat <<-END >&2
78
+ USAGE: funcgraph [-aCDhHPtT] [-m maxdepth] [-p PID] [-L TID] [-d secs] funcstring
79
+ -a # all info (same as -HPt)
80
+ -C # measure on-CPU time only
81
+ -d seconds # trace duration, and use buffers
82
+ -D # do not show function duration
83
+ -h # this usage message
84
+ -H # include column headers
85
+ -m maxdepth # max stack depth to show
86
+ -p PID # trace when this pid is on-CPU
87
+ -L TID # trace when this thread is on-CPU
88
+ -P # show process names & PIDs
89
+ -t # show timestamps
90
+ -T # comment function tails
91
+ eg,
92
+ funcgraph do_nanosleep # trace do_nanosleep() and children
93
+ funcgraph -m 3 do_sys_open # trace do_sys_open() to 3 levels only
94
+ funcgraph -a do_sys_open # include timestamps and process name
95
+ funcgraph -p 198 do_sys_open # trace vfs_read() for PID 198 only
96
+ funcgraph -d 1 do_sys_open >out # trace 1 sec, then write to file
97
+
98
+ See the man page and example file for more info.
99
+ END
100
+ exit
101
+ }
102
+
103
+ function warn {
104
+ if ! eval "$@"; then
105
+ echo >&2 "WARNING: command failed \"$@\""
106
+ fi
107
+ }
108
+
109
+ function end {
110
+ # disable tracing
111
+ echo 2>/dev/null
112
+ echo "Ending tracing..." 2>/dev/null
113
+ cd $tracing
114
+
115
+ (( opt_time )) && warn "echo nofuncgraph-abstime > trace_options"
116
+ (( opt_proc )) && warn "echo nofuncgraph-proc > trace_options"
117
+ (( opt_tail )) && warn "echo nofuncgraph-tail > trace_options"
118
+ (( opt_nodur )) && warn "echo funcgraph-duration > trace_options"
119
+ (( opt_cpu )) && warn "echo sleep-time > trace_options"
120
+
121
+ warn "echo nop > current_tracer"
122
+ (( opt_pid || opt_tid )) && warn "echo > set_ftrace_pid"
123
+ (( opt_max )) && warn "echo 0 > max_graph_depth"
124
+ warn "echo > set_graph_function"
125
+ warn "echo > trace"
126
+
127
+ (( wroteflock )) && warn "rm $flock"
128
+ }
129
+
130
+ function die {
131
+ echo >&2 "$@"
132
+ exit 1
133
+ }
134
+
135
+ function edie {
136
+ # die with a quiet end()
137
+ echo >&2 "$@"
138
+ exec >/dev/null 2>&1
139
+ end
140
+ exit 1
141
+ }
142
+
143
+ ### process options
144
+ while getopts aCd:DhHm:p:L:PtT opt
145
+ do
146
+ case $opt in
147
+ a) opt_headers=1; opt_proc=1; opt_time=1 ;;
148
+ C) opt_cpu=1; ;;
149
+ d) opt_duration=1; duration=$OPTARG ;;
150
+ D) opt_nodur=1; ;;
151
+ m) opt_max=1; max=$OPTARG ;;
152
+ p) opt_pid=1; pid=$OPTARG ;;
153
+ L) opt_tid=1; tid=$OPTARG ;;
154
+ H) opt_headers=1; ;;
155
+ P) opt_proc=1; ;;
156
+ t) opt_time=1; ;;
157
+ T) opt_tail=1; ;;
158
+ h|?) usage ;;
159
+ esac
160
+ done
161
+ shift $(( $OPTIND - 1 ))
162
+
163
+ ### option logic
164
+ (( $# == 0 )) && usage
165
+ (( opt_pid && opt_tid )) && edie "ERROR: You can use -p or -L but not both."
166
+ funcs="$1"
167
+ (( opt_pid )) && pidtext=" for PID $pid"
168
+ (( opt_tid )) && pidtext=" for TID $tid"
169
+ if (( opt_duration )); then
170
+ echo "Tracing \"$funcs\"$pidtext for $duration seconds..."
171
+ else
172
+ echo "Tracing \"$funcs\"$pidtext... Ctrl-C to end."
173
+ fi
174
+
175
+ ### check permissions
176
+ cd $tracing || die "ERROR: accessing tracing. Root user? Kernel has FTRACE?
177
+ debugfs mounted? (mount -t debugfs debugfs /sys/kernel/debug)"
178
+
179
+ ### ftrace lock
180
+ [[ -e $flock ]] && die "ERROR: ftrace may be in use by PID $(cat $flock) $flock"
181
+ echo $$ > $flock || die "ERROR: unable to write $flock."
182
+ wroteflock=1
183
+
184
+ ### setup and commence tracing
185
+ sysctl -q kernel.ftrace_enabled=1 # doesn't set exit status
186
+ read mode < current_tracer
187
+ [[ "$mode" != "nop" ]] && edie "ERROR: ftrace active (current_tracer=$mode)"
188
+ if (( opt_max )); then
189
+ if ! echo $max > max_graph_depth; then
190
+ edie "ERROR: setting -m $max. Older kernel version? Exiting."
191
+ fi
192
+ fi
193
+ if (( opt_pid )); then
194
+ echo > set_ftrace_pid
195
+ # ftrace expects kernel pids, which are thread ids
196
+ for tid in /proc/$pid/task/*; do
197
+ if ! echo ${tid##*/} >> set_ftrace_pid; then
198
+ edie "ERROR: setting -p $pid (PID exist?). Exiting."
199
+ fi
200
+ done
201
+ fi
202
+ if (( opt_tid )); then
203
+ if ! echo $tid > set_ftrace_pid; then
204
+ edie "ERROR: setting -L $tid (TID exist?). Exiting."
205
+ fi
206
+ fi
207
+ if ! echo > set_ftrace_filter; then
208
+ edie "ERROR: writing to set_ftrace_filter. Exiting."
209
+ fi
210
+ if ! echo "$funcs" > set_graph_function; then
211
+ edie "ERROR: enabling \"$funcs\". Exiting."
212
+ fi
213
+ if ! echo function_graph > current_tracer; then
214
+ edie "ERROR: setting current_tracer to \"function\". Exiting."
215
+ fi
216
+ if (( opt_cpu )); then
217
+ if ! echo nosleep-time > trace_options; then
218
+ edie "ERROR: setting -C (nosleep-time). Exiting."
219
+ fi
220
+ fi
221
+ # the following must be done after setting current_tracer
222
+ if (( opt_time )); then
223
+ if ! echo funcgraph-abstime > trace_options; then
224
+ edie "ERROR: setting -t (funcgraph-abstime). Exiting."
225
+ fi
226
+ fi
227
+ if (( opt_proc )); then
228
+ if ! echo funcgraph-proc > trace_options; then
229
+ edie "ERROR: setting -P (funcgraph-proc). Exiting."
230
+ fi
231
+ fi
232
+ if (( opt_tail )); then
233
+ if ! echo funcgraph-tail > trace_options; then
234
+ edie "ERROR: setting -T (funcgraph-tail). Old kernel? Exiting."
235
+ fi
236
+ fi
237
+ if (( opt_nodur )); then
238
+ if ! echo nofuncgraph-duration > trace_options; then
239
+ edie "ERROR: setting -D (nofuncgraph-duration). Exiting."
240
+ fi
241
+ fi
242
+
243
+ ### print trace buffer
244
+ warn "echo > trace"
245
+ if (( opt_duration )); then
246
+ sleep $duration
247
+ if (( opt_headers )); then
248
+ cat trace
249
+ else
250
+ grep -v '^#' trace
251
+ fi
252
+ else
253
+ # trace_pipe lack headers, so fetch them from trace
254
+ (( opt_headers )) && cat trace
255
+ cat trace_pipe
256
+ fi
257
+
258
+ ### end tracing
259
+ end