RubyGems - fluent-plugin-perf-tools - Versions diffs - 0.1.0 - Mend

fluent-plugin-perf-tools 0.1.0

Files changed (98) hide show

checksums.yaml +7 -0
data/.gitignore +15 -0
data/.rubocop.yml +26 -0
data/.ruby-version +1 -0
data/CHANGELOG.md +5 -0
data/CODE_OF_CONDUCT.md +84 -0
data/Gemfile +5 -0
data/LICENSE.txt +21 -0
data/README.md +43 -0
data/Rakefile +17 -0
data/bin/console +15 -0
data/bin/setup +8 -0
data/fluent-plugin-perf-tools.gemspec +48 -0
data/lib/fluent/plugin/in_perf_tools.rb +42 -0
data/lib/fluent/plugin/perf_tools/cachestat.rb +65 -0
data/lib/fluent/plugin/perf_tools/command.rb +30 -0
data/lib/fluent/plugin/perf_tools/version.rb +9 -0
data/lib/fluent/plugin/perf_tools.rb +11 -0
data/perf-tools/LICENSE +339 -0
data/perf-tools/README.md +205 -0
data/perf-tools/bin/bitesize +1 -0
data/perf-tools/bin/cachestat +1 -0
data/perf-tools/bin/execsnoop +1 -0
data/perf-tools/bin/funccount +1 -0
data/perf-tools/bin/funcgraph +1 -0
data/perf-tools/bin/funcslower +1 -0
data/perf-tools/bin/functrace +1 -0
data/perf-tools/bin/iolatency +1 -0
data/perf-tools/bin/iosnoop +1 -0
data/perf-tools/bin/killsnoop +1 -0
data/perf-tools/bin/kprobe +1 -0
data/perf-tools/bin/opensnoop +1 -0
data/perf-tools/bin/perf-stat-hist +1 -0
data/perf-tools/bin/reset-ftrace +1 -0
data/perf-tools/bin/syscount +1 -0
data/perf-tools/bin/tcpretrans +1 -0
data/perf-tools/bin/tpoint +1 -0
data/perf-tools/bin/uprobe +1 -0
data/perf-tools/deprecated/README.md +1 -0
data/perf-tools/deprecated/execsnoop-proc +150 -0
data/perf-tools/deprecated/execsnoop-proc.8 +80 -0
data/perf-tools/deprecated/execsnoop-proc_example.txt +46 -0
data/perf-tools/disk/bitesize +175 -0
data/perf-tools/examples/bitesize_example.txt +63 -0
data/perf-tools/examples/cachestat_example.txt +58 -0
data/perf-tools/examples/execsnoop_example.txt +153 -0
data/perf-tools/examples/funccount_example.txt +126 -0
data/perf-tools/examples/funcgraph_example.txt +2178 -0
data/perf-tools/examples/funcslower_example.txt +110 -0
data/perf-tools/examples/functrace_example.txt +341 -0
data/perf-tools/examples/iolatency_example.txt +350 -0
data/perf-tools/examples/iosnoop_example.txt +302 -0
data/perf-tools/examples/killsnoop_example.txt +62 -0
data/perf-tools/examples/kprobe_example.txt +379 -0
data/perf-tools/examples/opensnoop_example.txt +47 -0
data/perf-tools/examples/perf-stat-hist_example.txt +149 -0
data/perf-tools/examples/reset-ftrace_example.txt +88 -0
data/perf-tools/examples/syscount_example.txt +297 -0
data/perf-tools/examples/tcpretrans_example.txt +93 -0
data/perf-tools/examples/tpoint_example.txt +210 -0
data/perf-tools/examples/uprobe_example.txt +321 -0
data/perf-tools/execsnoop +292 -0
data/perf-tools/fs/cachestat +167 -0
data/perf-tools/images/perf-tools_2016.png +0 -0
data/perf-tools/iolatency +296 -0
data/perf-tools/iosnoop +296 -0
data/perf-tools/kernel/funccount +146 -0
data/perf-tools/kernel/funcgraph +259 -0
data/perf-tools/kernel/funcslower +248 -0
data/perf-tools/kernel/functrace +192 -0
data/perf-tools/kernel/kprobe +270 -0
data/perf-tools/killsnoop +263 -0
data/perf-tools/man/man8/bitesize.8 +70 -0
data/perf-tools/man/man8/cachestat.8 +111 -0
data/perf-tools/man/man8/execsnoop.8 +104 -0
data/perf-tools/man/man8/funccount.8 +76 -0
data/perf-tools/man/man8/funcgraph.8 +166 -0
data/perf-tools/man/man8/funcslower.8 +129 -0
data/perf-tools/man/man8/functrace.8 +123 -0
data/perf-tools/man/man8/iolatency.8 +116 -0
data/perf-tools/man/man8/iosnoop.8 +169 -0
data/perf-tools/man/man8/killsnoop.8 +100 -0
data/perf-tools/man/man8/kprobe.8 +162 -0
data/perf-tools/man/man8/opensnoop.8 +113 -0
data/perf-tools/man/man8/perf-stat-hist.8 +111 -0
data/perf-tools/man/man8/reset-ftrace.8 +49 -0
data/perf-tools/man/man8/syscount.8 +96 -0
data/perf-tools/man/man8/tcpretrans.8 +93 -0
data/perf-tools/man/man8/tpoint.8 +140 -0
data/perf-tools/man/man8/uprobe.8 +168 -0
data/perf-tools/misc/perf-stat-hist +223 -0
data/perf-tools/net/tcpretrans +311 -0
data/perf-tools/opensnoop +280 -0
data/perf-tools/syscount +192 -0
data/perf-tools/system/tpoint +232 -0
data/perf-tools/tools/reset-ftrace +123 -0
data/perf-tools/user/uprobe +390 -0
metadata +349 -0

data/perf-tools/man/man8/tcpretrans.8 ADDED Viewed

@@ -0,0 +1,93 @@
+.TH tcpretrans 8  "2014-07-31" "USER COMMANDS"
+.SH NAME
+tcpretrans \- show TCP retransmits, with address and other details. Uses Linux ftrace.
+.SH SYNOPSIS
+.B tcpretrans
+[\-hsp]
+.SH DESCRIPTION
+This traces TCP retransmits that are sent by the system tcpretrans is executed
+from, showing address, port, and TCP state information,
+and sometimes the PID (although usually not, since retransmits are usually
+sent by the kernel on timeout events). To keep overhead low, only
+tcp_retransmit_skb() kernel calls are traced (this does not trace every packet).
+This was written as a proof of concept for ftrace, for older Linux systems,
+and without kernel debuginfo. It uses dynamic tracing of tcp_retransmit_skb(),
+and reads /proc/net/tcp for socket details. Its use of dynamic tracing and
+CPU registers is an unstable platform-specific workaround, and may require
+modifications to work on different kernels and platforms. This would be better
+written using a tracer such as SystemTap, and will likely be rewritten in the
+future when certain tracing features are added to the Linux kernel.
+When \-l is used, this also uses dynamic tracing of tcp_send_loss_probe() and
+a register.
+Currently only IPv4 is supported, on x86_64. If you try this on a different
+architecture, you'll likely need to adjust the register locations (search
+for %di).
+Since this uses ftrace, only the root user can use this tool.
+.SH REQUIREMENTS
+FTRACE and KPROBE CONFIG, tcp_retransmit_skb() kernel function.
+You may have these already have these on recent kernels. And Perl.
+TCP tail loss probes were added in Linux 3.10.
+.SH OPTIONS
+.TP
+\-h
+Print usage message.
+.TP
+\-s
+Include kernel stack traces.
+.TP
+\-l
+Include TCP tail loss probes.
+.SH EXAMPLES
+.TP
+Trace TCP retransmits
+#
+.B tcpretrans
+.TP
+TIME
+Time of retransmit (may be rounded up to the nearest second).
+.TP
+PID
+Process ID that was on-CPU. This is less useful than it might sound, as it
+may usually be 0, for the kernel, for timer-based retransmits.
+.TP
+LADDR
+Local address.
+.TP
+LPORT
+Local port.
+.TP
+\-\-
+Packet type: "R>" for retransmit, and "L>" for tail loss probe.
+.TP
+RADDR
+Remote address.
+.TP
+RPORT
+Remote port.
+.TP
+STATE
+TCP session state.
+.SH OVERHEAD
+The CPU overhead is relative to the rate of TCP retransmits, and is
+designed to be low as this does not examine every packet. Once per second the
+/proc/net/tcp file is read, and a buffer of retransmit trace events is
+retrieved from the kernel and processed.
+.SH SOURCE
+This is from the perf-tools collection.
+.IP
+https://github.com/brendangregg/perf-tools
+.PP
+Also look under the examples directory for a text file containing example
+usage, output, and commentary for this tool.
+.SH OS
+Linux
+.SH STABILITY
+Unstable - in development.
+.SH AUTHOR
+Brendan Gregg
+.SH SEE ALSO
+tcpdump(1)

data/perf-tools/man/man8/tpoint.8 ADDED Viewed

@@ -0,0 +1,140 @@
+.TH tpoint 8  "2014-07-20" "USER COMMANDS"
+.SH NAME
+tpoint \- trace a given tracepoint. Static tracing. Uses Linux ftrace.
+.SH SYNOPSIS
+.B tpoint
+[\-hHsv] [\-d secs] [\-p PID] [\-L TID] tracepoint [filter]
+.B tpoint
+\-l
+.SH DESCRIPTION
+This will enable a given tracepoint, print events, then disable the tracepoint
+when the program ends. This is like a simple version of the "perf" command for
+printing live tracepoint events only. Wildcards are currently not supported.
+If for any reason tpoint(8) is insufficient, use the more powerful perf
+command for tracing tracepoints instead.
+Beware of feedback loops: tracing tcp functions over an ssh session,
+or writing ext4 events to an ext4 file system. For the former, tcp
+trace data could be redirected to a file (as in the usage message). For
+the latter, trace to the screen or a different file system.
+Since this uses ftrace, only the root user can use this tool.
+.SH REQUIREMENTS
+FTRACE CONFIG and tracepoints, which you may already have enabled and available
+on recent kernels.
+.SH OPTIONS
+.TP
+\-d seconds
+Set the duration of tracing, in seconds. Trace output will be buffered and
+printed at the end. This also reduces overheads by buffering in-kernel,
+instead of printing events as they occur.
+The ftrace buffer has a fixed size per-CPU (see
+/sys/kernel/debug/tracing/buffer_size_kb). If you think events are missing,
+try increasing that size.
+.TP
+\-h
+Print usage message.
+.TP
+\-H
+Print column headers.
+.TP
+\-l
+List tracepoints only.
+.TP
+\-s
+Print kernel stack traces after each event.
+.TP
+\-v
+Show the tpoint format file only (do not trace), identifying possible variables
+for use in a custom filter.
+.TP
+\-p PID
+Only trace kernel functions when this process ID is on-CPU.
+.TP
+\-L TID
+Only trace kernel functions when this thread ID is on-CPU.
+.TP
+tracepoint
+A tracepoint name. Eg, block:block_rq_issue. See the EXAMPLES section.
+.TP
+filter
+An ftrace filter definition.
+.SH EXAMPLES
+.TP
+List tracepoints containing "open":
+#
+.B tpoint -l | grep open
+.TP
+Trace open() syscall entry:
+#
+.B tpoint syscalls:sys_enter_open
+.TP
+Trace open() syscall entry, showing column headers:
+#
+.B tpoint -H syscalls:sys_enter_open
+.TP
+Trace block I/O issue:
+#
+.B tpoint block:block_rq_issue
+.TP
+Trace block I/O issue with stack traces:
+#
+.B tpoint \-s block:block_rq_issue
+.SH FIELDS
+The output format depends on the kernel version, and headings can be printed
+using \-H. The format is the same as the ftrace function trace format, described
+in the kernel source under Documentation/trace/ftrace.txt.
+Typical fields are:
+.TP
+TASK-PID
+The process name (which could include dashes), a dash, and the process ID.
+.TP
+CPU#
+The CPU ID, in brackets.
+.TP
+||||
+Kernel state flags. For example, on Linux 3.16 these are for irqs-off,
+need-resched, hardirq/softirq, and preempt-depth.
+.TP
+TIMESTAMP
+Time of event, in seconds.
+.TP
+FUNCTION
+Kernel function name.
+.SH OVERHEAD
+This can generate a lot of trace data quickly, depending on the
+frequency of the traced events. Such data will cause performance overheads.
+This also works without buffering by default, printing function events
+as they happen (uses trace_pipe), context switching and consuming CPU to do
+so. If needed, you can try the "\-d secs" option, which buffers events
+instead, reducing overhead. If you think the buffer option is losing events,
+try increasing the buffer size (buffer_size_kb).
+Before using tpoint(8), you can use perf_events to count the rate of events
+for the tracepoint of interest, to gauge overhead. For example:
+.B perf stat \-e block:block_rq_issue \-a sleep 5
+That counts the occurrences of the block:block_rq_issue tracepoint for
+5 seconds.
+Also consider using perf_events, which manages buffers differently and more
+efficiently, for higher frequency applications.
+.SH SOURCE
+This is from the perf-tools collection:
+.IP
+https://github.com/brendangregg/perf-tools
+.PP
+Also look under the examples directory for a text file containing example
+usage, output, and commentary for this tool.
+.SH OS
+Linux
+.SH STABILITY
+Unstable - in development.
+.SH AUTHOR
+Brendan Gregg
+.SH SEE ALSO
+functrace(8), funccount(8), perf(1)

data/perf-tools/man/man8/uprobe.8 ADDED Viewed

@@ -0,0 +1,168 @@
+.TH uprobe 8  "2014-07-20" "USER COMMANDS"
+.SH NAME
+uprobe \- trace a given uprobe definition. User-level dynamic tracing. Uses Linux ftrace. EXPERIMENTAL.
+.SH SYNOPSIS
+.B uprobe
+[\-FhHsv] [\-d secs] [\-p PID] [\-L TID] {\-l target | uprobe_definition [filter]}
+.SH DESCRIPTION
+This will create, trace, then destroy a given uprobe definition. See
+Documentation/trace/uprobetracer.txt in the Linux kernel source for the
+syntax of a uprobe definition, and "uprobe -h" for examples. With this tool,
+the probe alias is optional (it will default to something meaningful).
+WARNING: This uses dynamic tracing of user-level functions, using some
+relatively new kernel code. I have seen this cause target processes to fail,
+either entering endless spin loops or crashing on illegal instructions. I
+believe newer kernels (post 4.0) are relatively safer, but use caution. Test
+in a lab environment, and know what you are doing, before use. Also consider
+other (more developed) user-level tracers (perf_events, LTTng, etc.).
+Use extreme caution with the raw address mode: eg, "p:libc:0xbf130". uprobe
+does not check for instruction alignment, so tracing the wrong address (eg,
+mid-way through a multi-byte instruction) will corrupt the target's memory.
+Other tracers (eg, perf_events with debuginfo) check alignment.
+Also beware of widespread tracing that interferes with the operation of the
+system, eg, tracing libc:malloc, which by-default will trace _all_ processes.
+I wrote this because I kept testing different custom uprobes at the command
+line, and wanted a way to automate the steps. For generic user-level
+tracing, use perf_events directly.
+Since this uses ftrace, only the root user can use this tool.
+.SH REQUIREMENTS
+REQUIREMENTS: FTRACE and UPROBE CONFIG, which you may already have on recent
+kernel versions, file(1), ldconfig(8), objdump(1), and some version of awk.
+Also, currently only executes on Linux 4.0+ (see WARNING) unless -F is used.
+.SH OPTIONS
+.TP
+\-F
+Force. Trace despite kernel version warnings. Use on older kernels may expose
+you to (since fixed) bugs, which can lock up or crash target processes, which
+could also lock up the entire system. Test in a lab environment before use,
+and consider other more developed user-level tracers (perf_events, LTTng,
+etc.).
+.TP
+\-d seconds
+Set the duration of tracing, in seconds. Trace output will be buffered and
+printed at the end. This also reduces overheads by buffering in-kernel,
+instead of printing events as they occur.
+The ftrace buffer has a fixed size per-CPU (see
+/sys/kernel/debug/tracing/buffer_size_kb). If you think events are missing,
+try increasing that size.
+.TP
+\-h
+Print usage message.
+.TP
+\-H
+Print column headers.
+.TP
+\-s
+Print user-level stack traces after each event. These are currently printed
+in hex, and need post-processing to see user-level symbols (eg, addr2line;
+I should automate that).
+.TP
+\-v
+Show the uprobe format file only (do not trace), identifying possible variables
+for use in a custom filter.
+.TP
+\-p PID
+Only trace user-level functions when this process ID is on-CPU.
+.TP
+\-L TID
+Only trace user-level functions when this thread ID is on-CPU.
+.TP
+uprobe_definition
+A full uprobe definition, as documented by Documentation/trace/uprobetracer.txt
+in the Linux kernel source. Note that the probe alias name is optional with
+uprobe(8), and if not specified, it will default to something meaningful.
+See the EXAMPLES section.
+.TP
+filter
+An ftrace filter definition.
+.SH EXAMPLES
+These examples may need modification to match your target software function
+names and platform's register usage. If using platform specific registers
+becomes too painful in practice, consider a debuginfo-based tracer,
+which can trace variables names instead (eg, perf_events).
+.TP
+trace readline() calls in all running "bash" executables:
+#
+.B uprobe p:bash:readline
+.TP
+trace readline() with explicit executable path:
+#
+.B uprobe p:/bin/bash:readline
+.TP
+trace the return of readline() with return value as a string:
+#
+.B uprobe 'r:bash:readline +0($retval):string'
+.TP
+trace sleep() calls in all running libc shared libraries:
+#
+.B uprobe p:libc:sleep
+.TP
+trace sleep() with register %di (x86):
+#
+.B uprobe 'p:libc:sleep %di'
+.TP
+trace this address (use caution: must be instruction aligned):
+#
+.B uprobe p:libc:0xbf130
+.TP
+trace gettimeofday() for PID 1182 only:
+#
+.B uprobe -p 1182 p:libc:gettimeofday
+.TP
+trace the return of fopen() only when it returns NULL:
+#
+.B uprobe 'r:libc:fopen file=$retval' 'file == 0'
+.SH FIELDS
+The output format depends on the kernel version, and headings can be printed
+using \-H. The format is the same as the ftrace function trace format, described
+in the kernel source under Documentation/trace/ftrace.txt.
+Typical fields are:
+.TP
+TASK-PID
+The process name (which could include dashes), a dash, and the process ID.
+.TP
+CPU#
+The CPU ID, in brackets.
+.TP
+||||
+Kernel state flags. For example, on Linux 3.16 these are for irqs-off,
+need-resched, hardirq/softirq, and preempt-depth.
+.TP
+TIMESTAMP
+Time of event, in seconds.
+.TP
+FUNCTION
+User-level function name.
+.SH OVERHEAD
+This can generate a lot of trace data quickly, depending on the
+frequency of the traced events. Such data will cause performance overheads.
+This also works without buffering by default, printing function events
+as they happen (uses trace_pipe), context switching and consuming CPU to do
+so. If needed, you can try the "\-d secs" option, which buffers events
+instead, reducing overhead. If you think the buffer option is losing events,
+try increasing the buffer size (buffer_size_kb).
+If you find a use for uprobe(8) where the overhead is prohibitive, consider
+the same enabling using perf_events where overhead should be reduced.
+.SH SOURCE
+This is from the perf-tools collection:
+.IP
+https://github.com/brendangregg/perf-tools
+.PP
+Also look under the examples directory for a text file containing example
+usage, output, and commentary for this tool.
+.SH OS
+Linux
+.SH STABILITY
+Unstable - in development.
+.SH AUTHOR
+Brendan Gregg
+.SH SEE ALSO
+kprobe(8)

data/perf-tools/misc/perf-stat-hist ADDED Viewed

@@ -0,0 +1,223 @@
+#!/bin/bash
+#
+# perf-stat-hist - perf_events stat histogram hack.
+#                  Written using Linux perf_events (aka "perf").
+#
+# This is a proof-of-concept showing in-kernel histogram summaries of a
+# tracepoint variable.
+#
+# USAGE: perf-stat-hist [-h] [-b buckets|-P power] [-m max] tracepoint
+#        variable [seconds]
+#
+# Run "perf-stat-hist -h" for full usage.
+#
+# This uses multiple counting tracepoints with different filters, one for each
+# histogram bucket. While this is summarized in-kernel, the use of multiple
+# tracepoints does add addiitonal overhead, which is more evident if you change
+# the power-of size from 4 to 2 (which creates more buckets). Hopefully, in the
+# future this this functionality will be provided in an efficient way from
+# perf_events itself, at which point this tool can be rewritten.
+#
+# From perf-tools: https://github.com/brendangregg/perf-tools
+#
+# COPYRIGHT: Copyright (c) 2014 Brendan Gregg.
+#
+#  This program is free software; you can redistribute it and/or
+#  modify it under the terms of the GNU General Public License
+#  as published by the Free Software Foundation; either version 2
+#  of the License, or (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program; if not, write to the Free Software Foundation,
+#  Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+#
+#  (http://www.gnu.org/copyleft/gpl.html)
+#
+# 30-Jun-2014	Brendan Gregg	Created this.
+opt_buckets=0; buckets=; opt_power=0; power=4; opt_max=0; max=$((1024 * 1024))
+opt_filter=0; filter=; duration=0; debug=0
+trap ':' INT QUIT TERM PIPE HUP
+function usage {
+	cat <<-END >&2
+	USAGE: perf-stat-hist [-h] [-b buckets|-P power] [-m max] [-f filter]
+	                      tracepoint variable [seconds]
+	                 -b buckets      # specify histogram bucket points
+	                 -P power        # power-of (default is 4)
+	                 -m max          # max value for power-of
+	                 -f filter       # specify a filter
+	                 -h              # this usage message
+	   eg,
+	       perf-stat-hist syscalls:sys_enter_read count 5
+	                 # read() request histogram, 5 seconds
+	       perf-stat-hist syscalls:sys_exit_read ret 5
+	                 # read() return histogram, 5 seconds
+	       perf-stat-hist -P 10 syscalls:sys_exit_read ret 5
+	                 # ... use power-of-10
+	       perf-stat-hist -P 2 -m 1024 syscalls:sys_exit_read ret 5
+	                 # ... use power-of-2, max 1024
+	       perf-stat-hist -b "10 50 100 500" syscalls:sys_exit_read ret 5
+	                 # ... histogram based on these bucket ranges
+	       perf-stat-hist -b 10 syscalls:sys_exit_read ret 5
+	                 # ... bifurcate by the value 10 (lowest overhead)
+	       perf-stat-hist -f 'rwbs == "WS"' block:block_rq_complete nr_sector 5
+	                 # ... synchronous writes histogram, 5 seconds
+	See the man page and example file for more info.
+END
+	exit
+}
+function die {
+	echo >&2 "$@"
+	exit 1
+}
+### process options
+while getopts b:hm:P:f: opt
+do
+	case $opt in
+	b)	opt_buckets=1; buckets=($OPTARG) ;;
+	P)	opt_power=1; power=$OPTARG ;;
+	m)	opt_max=1; max=$OPTARG ;;
+	f)	opt_filter=1; filter="$OPTARG && " ;;
+	h|?)	usage ;;
+	esac
+done
+shift $(( $OPTIND - 1 ))
+(( $# < 2 )) && usage
+tpoint=$1			# tracepoint
+var=$2				# variable for histogram
+duration=${3}
+### option logic
+(( opt_buckets && opt_power )) && die "ERROR: use either -b or -P"
+(( opt_power && power < 2 )) && die "ERROR: -P power must be 2 or higher"
+### check that tracepoint exists
+if ! grep "^$tpoint\$" /sys/kernel/debug/tracing/available_events > /dev/null
+then
+	echo >&2 "ERROR: tracepoint \"$tpoint\" not found. Exiting..."
+	[[ "$USER" != "root" ]] && echo >&2 "Not root user?"
+	exit 1
+fi
+### auto build power-of buckets
+if (( !opt_buckets )); then
+	b=0
+	s=1
+	while (( s <= max )); do
+		b="$b $s"
+		(( s *= power ))
+	done
+	buckets=($b)
+fi
+### build list of tracepoints and filters for each histogram bucket
+max=${buckets[${#buckets[@]} - 1]}	# last element
+((max_i = ${#buckets[*]} - 1))
+tpoints="-e $tpoint --filter \"$filter $var < ${buckets[0]}\""
+awkarray=
+i=0
+while (( i < max_i )); do
+	if (( i && ${buckets[$i]} <= ${buckets[$i - 1]} )); then
+		die "ERROR: bucket list must increase in size."
+	fi
+	tpoints="$tpoints -e $tpoint --filter \"$filter $var >= ${buckets[$i]} && "
+	tpoints="$tpoints $var < ${buckets[$i + 1]}\""
+	awkarray="$awkarray buckets[$i]=${buckets[$i]};"
+	(( i++ ))
+done
+awkarray="$awkarray buckets[$max_i]=${buckets[$max_i]};"
+tpoints="$tpoints -e $tpoint --filter \"$filter $var >= ${buckets[$max_i]}\""
+if (( debug )); then
+	echo buckets: ${buckets[*]}
+	echo tracepoints: $tpoints
+	echo awkarray: ${awkarray[*]}
+fi
+### prepare to run
+if (( duration )); then
+	etext="for $duration seconds"
+	cmd="sleep $duration"
+else
+	etext="until Ctrl-C"
+	cmd="sleep 999999"
+fi
+p_tpoint=$tpoint
+if [ -n "$filter" ]; then
+	p_tpoint="$tpoint (Filter: ${filter%????})"
+fi
+if (( opt_buckets )); then
+	echo "Tracing $p_tpoint, specified buckets, $etext..."
+else
+	echo "Tracing $p_tpoint, power-of-$power, max $max, $etext..."
+fi
+### run perf
+out="-o /dev/stdout"	# a workaround needed in linux 3.2; not by 3.4.15
+stat=$(eval perf stat $tpoints -a $out $cmd 2>&1)
+if (( $? != 0 )); then
+	echo >&2 "ERROR running perf:"
+	echo >&2 "$stat"
+	exit
+fi
+if (( debug )); then
+	echo raw output:
+	echo "$stat"
+	echo
+fi
+### find max value for ASCII histogram
+most=$(echo "$stat" | awk -v tpoint=$tpoint '
+	$2 == tpoint { gsub(/,/, ""); if ($1 > m) { m = $1 } }
+	END { print m }'
+)
+### process output
+echo
+echo "$stat" | awk -v tpoint=$tpoint -v max_i=$max_i -v most=$most '
+	function star(sval, smax, swidth) {
+		stars = ""
+		if (smax == 0) return ""
+		for (si = 0; si < (swidth * sval / smax); si++) {
+			stars = stars "#"
+		}
+		return stars
+	}
+	BEGIN {
+		'"$awkarray"'
+		printf("            %-15s: %-8s %s\n", "Range", "Count",
+		    "Distribution")
+	}
+	/Performance counter stats/ { i = -1 }
+	# reverse order of rule set is important
+	{ ok = 0 }
+	$2 == tpoint { num = $1; gsub(/,/, "", num); ok = 1 }
+	ok && i >= max_i {
+		printf("   %10d -> %-10s: %-8s |%-38s|\n", buckets[i],
+		    "", num, star(num, most, 38))
+		next
+	}
+	ok && i >= 0 && i < max_i {
+		printf("   %10d -> %-10d: %-8s |%-38s|\n", buckets[i],
+		    buckets[i+1] - 1, num, star(num, most, 38))
+		i++
+		next
+	}
+	ok && i == -1 {
+		printf("   %10s -> %-10d: %-8s |%-38s|\n", "",
+		    buckets[0] - 1, num, star(num, most, 38))
+		i++
+	}
+'