jruby-async-profiler 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +8 -0
  3. data/.gitmodules +3 -0
  4. data/Gemfile +4 -0
  5. data/README.md +35 -0
  6. data/Rakefile +2 -0
  7. data/bin/console +14 -0
  8. data/bin/setup +8 -0
  9. data/ext/Rakefile +6 -0
  10. data/ext/async-profiler/.gitattributes +1 -0
  11. data/ext/async-profiler/.gitignore +6 -0
  12. data/ext/async-profiler/.travis.yml +11 -0
  13. data/ext/async-profiler/CHANGELOG.md +107 -0
  14. data/ext/async-profiler/JavaHome.class +0 -0
  15. data/ext/async-profiler/LICENSE +201 -0
  16. data/ext/async-profiler/Makefile +66 -0
  17. data/ext/async-profiler/README.md +487 -0
  18. data/ext/async-profiler/demo/SwingSet2.svg +2247 -0
  19. data/ext/async-profiler/docs/cddl1.txt +358 -0
  20. data/ext/async-profiler/profiler.sh +240 -0
  21. data/ext/async-profiler/src/allocTracer.cpp +155 -0
  22. data/ext/async-profiler/src/allocTracer.h +74 -0
  23. data/ext/async-profiler/src/arch.h +69 -0
  24. data/ext/async-profiler/src/arguments.cpp +265 -0
  25. data/ext/async-profiler/src/arguments.h +152 -0
  26. data/ext/async-profiler/src/codeCache.cpp +128 -0
  27. data/ext/async-profiler/src/codeCache.h +99 -0
  28. data/ext/async-profiler/src/engine.cpp +50 -0
  29. data/ext/async-profiler/src/engine.h +38 -0
  30. data/ext/async-profiler/src/flameGraph.cpp +770 -0
  31. data/ext/async-profiler/src/flameGraph.h +118 -0
  32. data/ext/async-profiler/src/flightRecorder.cpp +727 -0
  33. data/ext/async-profiler/src/flightRecorder.h +39 -0
  34. data/ext/async-profiler/src/frameName.cpp +189 -0
  35. data/ext/async-profiler/src/frameName.h +56 -0
  36. data/ext/async-profiler/src/itimer.cpp +49 -0
  37. data/ext/async-profiler/src/itimer.h +43 -0
  38. data/ext/async-profiler/src/jattach/jattach.c +437 -0
  39. data/ext/async-profiler/src/java/one/profiler/AsyncProfiler.java +160 -0
  40. data/ext/async-profiler/src/java/one/profiler/AsyncProfilerMXBean.java +43 -0
  41. data/ext/async-profiler/src/java/one/profiler/Counter.java +25 -0
  42. data/ext/async-profiler/src/java/one/profiler/Events.java +28 -0
  43. data/ext/async-profiler/src/javaApi.cpp +124 -0
  44. data/ext/async-profiler/src/lockTracer.cpp +161 -0
  45. data/ext/async-profiler/src/lockTracer.h +55 -0
  46. data/ext/async-profiler/src/mutex.cpp +33 -0
  47. data/ext/async-profiler/src/mutex.h +49 -0
  48. data/ext/async-profiler/src/os.h +45 -0
  49. data/ext/async-profiler/src/os_linux.cpp +129 -0
  50. data/ext/async-profiler/src/os_macos.cpp +115 -0
  51. data/ext/async-profiler/src/perfEvents.h +60 -0
  52. data/ext/async-profiler/src/perfEvents_linux.cpp +550 -0
  53. data/ext/async-profiler/src/perfEvents_macos.cpp +64 -0
  54. data/ext/async-profiler/src/profiler.cpp +952 -0
  55. data/ext/async-profiler/src/profiler.h +238 -0
  56. data/ext/async-profiler/src/spinLock.h +66 -0
  57. data/ext/async-profiler/src/stackFrame.h +57 -0
  58. data/ext/async-profiler/src/stackFrame_aarch64.cpp +75 -0
  59. data/ext/async-profiler/src/stackFrame_arm.cpp +58 -0
  60. data/ext/async-profiler/src/stackFrame_i386.cpp +82 -0
  61. data/ext/async-profiler/src/stackFrame_x64.cpp +113 -0
  62. data/ext/async-profiler/src/symbols.h +37 -0
  63. data/ext/async-profiler/src/symbols_linux.cpp +354 -0
  64. data/ext/async-profiler/src/symbols_macos.cpp +156 -0
  65. data/ext/async-profiler/src/vmEntry.cpp +173 -0
  66. data/ext/async-profiler/src/vmEntry.h +105 -0
  67. data/ext/async-profiler/src/vmStructs.cpp +104 -0
  68. data/ext/async-profiler/src/vmStructs.h +112 -0
  69. data/ext/async-profiler/src/wallClock.cpp +96 -0
  70. data/ext/async-profiler/src/wallClock.h +56 -0
  71. data/ext/async-profiler/test/AllocatingTarget.java +26 -0
  72. data/ext/async-profiler/test/LoadLibraryTest.java +21 -0
  73. data/ext/async-profiler/test/Target.java +31 -0
  74. data/ext/async-profiler/test/ThreadsTarget.java +35 -0
  75. data/ext/async-profiler/test/alloc-smoke-test.sh +36 -0
  76. data/ext/async-profiler/test/load-library-test.sh +35 -0
  77. data/ext/async-profiler/test/smoke-test.sh +37 -0
  78. data/ext/async-profiler/test/thread-smoke-test.sh +32 -0
  79. data/jruby-async-profiler.gemspec +32 -0
  80. data/lib/jruby/async/profiler.rb +10 -0
  81. data/lib/jruby/async/profiler/version.rb +7 -0
  82. metadata +155 -0
@@ -0,0 +1,487 @@
1
+ # async-profiler
2
+
3
+ This project is a low overhead sampling profiler for Java
4
+ that does not suffer from [Safepoint bias problem](http://psy-lob-saw.blogspot.ru/2016/02/why-most-sampling-java-profilers-are.html).
5
+ It features HotSpot-specific APIs to collect stack traces
6
+ and to track memory allocations. The profiler works with
7
+ OpenJDK, Oracle JDK and other Java runtimes based on HotSpot JVM.
8
+
9
+ async-profiler can trace the following kinds of events:
10
+ - CPU cycles
11
+ - Hardware and Software performance counters like cache misses, branch misses, page faults, context switches etc.
12
+ - Allocations in Java Heap
13
+ - Contented lock attempts, including both Java object monitors and ReentrantLocks
14
+
15
+ ## Download
16
+
17
+ Latest release (1.6):
18
+
19
+ - Linux x64 (glibc): [async-profiler-1.6-linux-x64.tar.gz](https://github.com/jvm-profiling-tools/async-profiler/releases/download/v1.6/async-profiler-1.6-linux-x64.tar.gz)
20
+ - Linux x64 (musl): [async-profiler-1.6-linux-x64-musl.tar.gz](https://github.com/jvm-profiling-tools/async-profiler/releases/download/v1.6/async-profiler-1.6-linux-x64-musl.tar.gz)
21
+ - Linux ARM: [async-profiler-1.6-linux-arm.tar.gz](https://github.com/jvm-profiling-tools/async-profiler/releases/download/v1.6/async-profiler-1.6-linux-arm.tar.gz)
22
+ - macOS x64: [async-profiler-1.6-macos-x64.tar.gz](https://github.com/jvm-profiling-tools/async-profiler/releases/download/v1.6/async-profiler-1.6-macos-x64.tar.gz)
23
+
24
+ [Previous releases](https://github.com/jvm-profiling-tools/async-profiler/releases)
25
+
26
+ ## Supported platforms
27
+
28
+ - **Linux** / x64 / x86 / ARM / AArch64
29
+ - **macOS** / x64
30
+
31
+ Note: macOS profiling is limited to user space code only.
32
+
33
+ ## CPU profiling
34
+
35
+ In this mode profiler collects stack trace samples that include **Java** methods,
36
+ **native** calls, **JVM** code and **kernel** functions.
37
+
38
+ The general approach is receiving call stacks generated by `perf_events`
39
+ and matching them up with call stacks generated by `AsyncGetCallTrace`,
40
+ in order to produce an accurate profile of both Java and native code.
41
+ Additionally, async-profiler provides a workaround to recover stack traces
42
+ in some [corner cases](https://bugs.openjdk.java.net/browse/JDK-8178287)
43
+ where `AsyncGetCallTrace` fails.
44
+
45
+ This approach has the following advantages compared to using `perf_events`
46
+ directly with a Java agent that translates addresses to Java method names:
47
+
48
+ * Works on older Java versions because it doesn't require
49
+ `-XX:+PreserveFramePointer`, which is only available in JDK 8u60 and later.
50
+
51
+ * Does not introduce the performance overhead from `-XX:+PreserveFramePointer`,
52
+ which can in rare cases be as high as 10%.
53
+
54
+ * Does not require generating a map file to map Java code addresses to method
55
+ names.
56
+
57
+ * Works with interpreter frames.
58
+
59
+ * Does not require writing out a perf.data file for further processing in
60
+ user space scripts.
61
+
62
+ ## ALLOCATION profiling
63
+
64
+ Instead of detecting CPU-consuming code, the profiler can be configured
65
+ to collect call sites where the largest amount of heap memory is allocated.
66
+
67
+ async-profiler does not use intrusive techniques like bytecode instrumentation
68
+ or expensive DTrace probes which have significant performance impact.
69
+ It also does not affect Escape Analysis or prevent from JIT optimizations
70
+ like allocation elimination. Only actual heap allocations are measured.
71
+
72
+ The profiler features TLAB-driven sampling. It relies on HotSpot-specific
73
+ callbacks to receive two kinds of notifications:
74
+ - when an object is allocated in a newly created TLAB;
75
+ - when an object is allocated on a slow path outside TLAB.
76
+
77
+ This means not each allocation is counted, but only allocations every _N_ kB,
78
+ where _N_ is the average size of TLAB. This makes heap sampling very cheap
79
+ and suitable for production. On the other hand, the collected data
80
+ may be incomplete, though in practice it will often reflect the top allocation
81
+ sources.
82
+
83
+ Sampling interval can be adjusted with `-i` option.
84
+ For example, `-i 500k` will take one sample after 500 KB of allocated
85
+ space on average. However, intervals less than TLAB size will not take effect.
86
+
87
+ Unlike Java Mission Control which uses similar approach, async-profiler
88
+ does not require Java Flight Recorder or any other JDK commercial feature.
89
+ It is completely based on open source technologies and it works with OpenJDK.
90
+
91
+ The minimum supported JDK version is 7u40 where the TLAB callbacks appeared.
92
+
93
+ Heap profiler requires HotSpot debug symbols. Oracle JDK already has them
94
+ embedded in `libjvm.so`, but in OpenJDK builds they are typically shipped
95
+ in a separate package. For example, to install OpenJDK debug symbols on
96
+ Debian / Ubuntu, run:
97
+ ```
98
+ # apt install openjdk-8-dbg
99
+ ```
100
+ or for OpenJDK 11:
101
+ ```
102
+ # apt install openjdk-11-dbg
103
+ ```
104
+
105
+ On Gentoo the `icedtea` OpenJDK package can be built with the per-package setting
106
+ `FEATURES="nostrip"` to retain symbols.
107
+
108
+ ### Wall-clock profiling
109
+
110
+ `-e wall` option tells async-profiler to sample all threads equally every given
111
+ period of time regardless of thread status: Running, Sleeping or Blocked.
112
+ For instance, this can be helpful when profiling application start-up time.
113
+
114
+ Wall-clock profiler is most useful in per-thread mode: `-t`.
115
+
116
+ Example: `./profiler.sh -e wall -t -i 5ms -f result.svg 8983`
117
+
118
+ ## Building
119
+
120
+ Build status: [![Build Status](https://travis-ci.org/jvm-profiling-tools/async-profiler.svg?branch=master)](https://travis-ci.org/jvm-profiling-tools/async-profiler)
121
+
122
+ Make sure the `JAVA_HOME` environment variable points to your JDK installation,
123
+ and then run `make`. GCC is required. After building, the profiler agent binary
124
+ will be in the `build` subdirectory. Additionally, a small application `jattach`
125
+ that can load the agent into the target process will also be compiled to the
126
+ `build` subdirectory.
127
+
128
+ ## Basic Usage
129
+
130
+ As of Linux 4.6, capturing kernel call stacks using `perf_events` from a non-
131
+ root process requires setting two runtime variables. You can set them using
132
+ sysctl or as follows:
133
+
134
+ ```
135
+ # echo 1 > /proc/sys/kernel/perf_event_paranoid
136
+ # echo 0 > /proc/sys/kernel/kptr_restrict
137
+ ```
138
+
139
+ To run the agent and pass commands to it, the helper script `profiler.sh`
140
+ is provided. A typical workflow would be to launch your Java application,
141
+ attach the agent and start profiling, exercise your performance scenario, and
142
+ then stop profiling. The agent's output, including the profiling results, will
143
+ be displayed in the Java application's standard output.
144
+
145
+ Example:
146
+
147
+ ```
148
+ $ jps
149
+ 9234 Jps
150
+ 8983 Computey
151
+ $ ./profiler.sh start 8983
152
+ $ ./profiler.sh stop 8983
153
+ ```
154
+
155
+ Alternatively, you may specify `-d` (duration) argument to profile
156
+ the application for a fixed period of time with a single command.
157
+
158
+ ```
159
+ $ ./profiler.sh -d 30 8983
160
+ ```
161
+
162
+ By default, the profiling frequency is 100Hz (every 10ms of CPU time).
163
+ Here is a sample of the output printed to the Java application's terminal:
164
+
165
+ ```
166
+ --- Execution profile ---
167
+ Total samples: 687
168
+ Unknown (native): 1 (0.15%)
169
+
170
+ --- 6790000000 (98.84%) ns, 679 samples
171
+ [ 0] Primes.isPrime
172
+ [ 1] Primes.primesThread
173
+ [ 2] Primes.access$000
174
+ [ 3] Primes$1.run
175
+ [ 4] java.lang.Thread.run
176
+
177
+ ... a lot of output omitted for brevity ...
178
+
179
+ ns percent samples top
180
+ ---------- ------- ------- ---
181
+ 6790000000 98.84% 679 Primes.isPrime
182
+ 40000000 0.58% 4 __do_softirq
183
+
184
+ ... more output omitted ...
185
+ ```
186
+
187
+ This indicates that the hottest method was `Primes.isPrime`, and the hottest
188
+ call stack leading to it comes from `Primes.primesThread`.
189
+
190
+ ## Launching as an Agent
191
+
192
+ If you need to profile some code as soon as the JVM starts up, instead of using the `profiler.sh` script,
193
+ it is possible to attach async-profiler as an agent on the command line. For example:
194
+
195
+ ```
196
+ $ java -agentpath:/path/to/libasyncProfiler.so=start,file=profile.svg ...
197
+ ```
198
+
199
+ Agent library is configured through the JVMTI argument interface.
200
+ The format of the arguments string is described
201
+ [in the source code](https://github.com/jvm-profiling-tools/async-profiler/blob/b7e9e6b955210784d5dc1d1839bb0febab1b712b/src/arguments.cpp#L34).
202
+ The `profiler.sh` script actually converts command line arguments to the that format.
203
+
204
+ For instance, `-e alloc` is converted to `event=alloc`, `-f profile.svg`
205
+ is converted to `file=profile.svg` and so on. But some arguments are processed
206
+ directly by `profiler.sh` script. E.g. `-d 5` results in 3 actions:
207
+ attaching profiler agent with start command, sleeping for 5 seconds,
208
+ and then attaching the agent again with stop command.
209
+
210
+ ## Flame Graph visualization
211
+
212
+ async-profiler provides out-of-the-box [Flame Graph](https://github.com/BrendanGregg/FlameGraph) support.
213
+ Specify `-o svg` argument to dump profiling results as an interactive SVG
214
+ immediately viewable in all mainstream browsers.
215
+ Also, SVG output format will be chosen automatically if the target
216
+ filename ends with `.svg`.
217
+
218
+ ```
219
+ $ jps
220
+ 9234 Jps
221
+ 8983 Computey
222
+ $ ./profiler.sh -d 30 -f /tmp/flamegraph.svg 8983
223
+ ```
224
+
225
+ ![Example](https://github.com/jvm-profiling-tools/async-profiler/blob/master/demo/SwingSet2.svg)
226
+
227
+ ## Profiler Options
228
+
229
+ The following is a complete list of the command-line options accepted by
230
+ `profiler.sh` script.
231
+
232
+ * `start` - starts profiling in semi-automatic mode, i.e. profiler will run
233
+ until `stop` command is explicitly called.
234
+
235
+ * `resume` - starts or resumes earlier profiling session that has been stopped.
236
+ All the collected data remains valid. The profiling options are not preserved
237
+ between sessions, and should be specified again.
238
+
239
+ * `stop` - stops profiling and prints the report.
240
+
241
+ * `status` - prints profiling status: whether profiler is active and
242
+ for how long.
243
+
244
+ * `list` - show the list of available profiling events. This option still
245
+ requires PID, since supported events may differ depending on JVM version.
246
+
247
+ * `-d N` - the profiling duration, in seconds. If no `start`, `resume`, `stop`
248
+ or `status` option is given, the profiler will run for the specified period
249
+ of time and then automatically stop.
250
+ Example: `./profiler.sh -d 30 8983`
251
+
252
+ * `-e event` - the profiling event: `cpu`, `alloc`, `lock`, `cache-misses` etc.
253
+ Use `list` to see the complete list of available events.
254
+
255
+ In allocation profiling mode the top frame of every call trace is the class
256
+ of the allocated object, and the counter is the heap pressure (the total size
257
+ of allocated TLABs or objects outside TLAB).
258
+
259
+ In lock profiling mode the top frame is the class of lock/monitor, and
260
+ the counter is number of nanoseconds it took to enter this lock/monitor.
261
+
262
+ Two special event types are supported on Linux: hardware breakpoints
263
+ and kernel tracepoints:
264
+ - `-e mem:<func>[:rwx]` sets read/write/exec breakpoint at function
265
+ `<func>`. The format of `mem` event is the same as in `perf-record`.
266
+ Execution breakpoints can be also specified by the function name,
267
+ e.g. `-e malloc` will trace all calls of native `malloc` function.
268
+ - `-e trace:<id>` sets a kernel tracepoint. It is possible to specify
269
+ tracepoint symbolic name, e.g. `-e syscalls:sys_enter_open` will trace
270
+ all `open` syscalls.
271
+
272
+ * `-i N` - sets the profiling interval in nanoseconds or in other units,
273
+ if N is followed by `ms` (for milliseconds), `us` (for microseconds)
274
+ or `s` (for seconds). Only CPU active time is counted. No samples
275
+ are collected while CPU is idle. The default is 10000000 (10ms).
276
+ Example: `./profiler.sh -i 500us 8983`
277
+
278
+ * `-j N` - sets the Java stack profiling depth. This option will be ignored if N is greater
279
+ than default 2048.
280
+ Example: `./profiler.sh -j 30 8983`
281
+
282
+ * `-b N` - sets the frame buffer size, in the number of Java
283
+ method ids that should fit in the buffer. If you receive messages about an
284
+ insufficient frame buffer size, increase this value from the default.
285
+ Example: `./profiler.sh -b 5000000 8983`
286
+
287
+ * `-t` - profile threads separately. Each stack trace will end with a frame
288
+ that denotes a single thread.
289
+ Example: `./profiler.sh -t 8983`
290
+
291
+ * `-s` - print simple class names instead of FQN.
292
+
293
+ * `-g` - print method signatures.
294
+
295
+ * `-a` - annotate Java method names by adding `_[j]` suffix.
296
+
297
+ * `-o fmt` - specifies what information to dump when profiling ends.
298
+ `fmt` can be one of the following options:
299
+ - `summary` - dump basic profiling statistics;
300
+ - `traces[=N]` - dump call traces (at most N samples);
301
+ - `flat[=N]` - dump flat profile (top N hot methods);
302
+ - `jfr` - dump events in Java Flight Recorder format readable by Java Mission Control.
303
+ This *does not* require JDK commercial features to be enabled.
304
+ - `collapsed[=C]` - dump collapsed call traces in the format used by
305
+ [FlameGraph](https://github.com/brendangregg/FlameGraph) script. This is
306
+ a collection of call stacks, where each line is a semicolon separated list
307
+ of frames followed by a counter.
308
+ - `svg[=C]` - produce Flame Graph in SVG format.
309
+ - `tree[=C]` - produce call tree in HTML format.
310
+ --reverse option will generate backtrace view.
311
+
312
+ `C` is a counter type:
313
+ - `samples` - the counter is a number of samples for the given trace;
314
+ - `total` - the counter is a total value of collected metric, e.g. total allocation size.
315
+
316
+ `summary`, `traces` and `flat` can be combined together.
317
+ The default format is `summary,traces=200,flat=200`.
318
+
319
+ * `--title TITLE`, `--width PX`, `--height PX`, `--minwidth PX`, `--reverse` - FlameGraph parameters.
320
+ Example: `./profiler.sh -f profile.svg --title "Sample CPU profile" --minwidth 0.5 8983`
321
+
322
+ * `-f FILENAME` - the file name to dump the profile information to.
323
+ `%p` in the file name is expanded to the PID of the target JVM;
324
+ `%t` - to the timestamp at the time of command invocation.
325
+ Example: `./profiler.sh -o collapsed -f /tmp/traces-%t.txt 8983`
326
+
327
+ * `--all-user` - include only user-mode events. This option is helpful when kernel profiling
328
+ is restricted by `perf_event_paranoid` settings.
329
+ `--all-kernel` is its counterpart option for including only kernel-mode events.
330
+
331
+ * `--sync-walk` - prefer synchronous JVMTI stack walker instead of `AsyncGetCallTrace`.
332
+ This option may improve accuracy of Java stack traces when profiling JVM runtime
333
+ functions, e.g. `VMThread::execute`, `G1CollectedHeap::humongous_obj_allocate` etc.
334
+ Do not use unless you are absolutely sure! When used incorrectly, this mode will crash JVM!
335
+
336
+ * `-v`, `--version` - prints the version of profiler library. If PID is specified,
337
+ gets the version of the library loaded into the given process.
338
+
339
+ ## Profiling Java in a container
340
+
341
+ It is possible to profile Java processes running in a Docker or LXC container
342
+ both from within a container and from the host system.
343
+
344
+ When profiling from the host, `pid` should be the Java process ID in the host
345
+ namespace. Use `ps aux | grep java` or `docker top <container>` to find
346
+ the process ID.
347
+
348
+ async-profiler should be run from the host by a privileged user - it will
349
+ automatically switch to the proper pid/mount namespace and change
350
+ user credentials to match the target process. Also make sure that
351
+ the target container can access `libasyncProfiler.so` by the same
352
+ absolute path as on the host.
353
+
354
+ By default, Docker container restricts the access to `perf_event_open`
355
+ syscall. So, in order to allow profiling inside a container, you'll need
356
+ to modify [seccomp profile](https://docs.docker.com/engine/security/seccomp/)
357
+ or disable it altogether with `--security-opt=seccomp:unconfined` option.
358
+
359
+ Alternatively, if changing Docker configuration is not possible,
360
+ you may fall back to `-e itimer` profiling mode, see [Troubleshooting](#troubleshooting).
361
+
362
+ ## Restrictions/Limitations
363
+
364
+ * On most Linux systems, `perf_events` captures call stacks with a maximum depth
365
+ of 127 frames. On recent Linux kernels, this can be configured using
366
+ `sysctl kernel.perf_event_max_stack` or by writing to the
367
+ `/proc/sys/kernel/perf_event_max_stack` file.
368
+
369
+ * Profiler allocates 8kB perf_event buffer for each thread of the target process.
370
+ Make sure `/proc/sys/kernel/perf_event_mlock_kb` value is large enough
371
+ (more than `8 * threads`) when running under unprivileged user.
372
+ Otherwise the message _"perf_event mmap failed: Operation not permitted"_
373
+ will be printed, and no native stack traces will be collected.
374
+
375
+ * There is no bullet-proof guarantee that the `perf_events` overflow signal
376
+ is delivered to the Java thread in a way that guarantees no other code has run,
377
+ which means that in some rare cases, the captured Java stack might not match
378
+ the captured native (user+kernel) stack.
379
+
380
+ * You will not see the non-Java frames _preceding_ the Java frames on the
381
+ stack. For example, if `start_thread` called `JavaMain` and then your Java
382
+ code started running, you will not see the first two frames in the resulting
383
+ stack. On the other hand, you _will_ see non-Java frames (user and kernel)
384
+ invoked by your Java code.
385
+
386
+ * No Java stacks will be collected if `-XX:MaxJavaStackTraceDepth` is zero
387
+ or negative.
388
+
389
+ * Too short profiling interval may cause continuous interruption of heavy
390
+ system calls like `clone()`, so that it will never complete;
391
+ see [#97](https://github.com/jvm-profiling-tools/async-profiler/issues/97).
392
+ The workaround is simply to increase the interval.
393
+
394
+ * When agent is not loaded at JVM startup (by using -agentpath option) it is
395
+ highly recommended to use `-XX:+UnlockDiagnosticVMOptions -XX:+DebugNonSafepoints` JVM flags.
396
+ Without those flags the profiler will still work correctly but results might be
397
+ less accurate e.g. without `-XX:+DebugNonSafepoints` there is a high chance that simple inlined methods will not appear in the profile. When agent is attached at runtime `CompiledMethodLoad` JVMTI event
398
+ enables debug info, but only for methods compiled after the event is turned on.
399
+
400
+ ## Troubleshooting
401
+
402
+ ```
403
+ Failed to change credentials to match the target process: Operation not permitted
404
+ ```
405
+ Due to limitation of HotSpot Dynamic Attach mechanism, the profiler must be run
406
+ by exactly the same user (and group) as the owner of target JVM process.
407
+ If profiler is run by a different user, it will try to automatically change
408
+ current user and group. This will likely succeed for `root`, but not for
409
+ other users, resulting in the above error.
410
+
411
+ ```
412
+ Could not start attach mechanism: No such file or directory
413
+ ```
414
+ The profiler cannot establish communication with the target JVM through UNIX domain socket.
415
+
416
+ Usually this happens in one of the following cases:
417
+ 1. Attach socket `/tmp/.java_pidNNN` has been deleted. It is a common
418
+ practice to clean `/tmp` automatically with some scheduled script.
419
+ Configure the cleanup software to exclude `.java_pid*` files from deletion.
420
+ How to check: run `lsof -p PID | grep java_pid`
421
+ If it lists a socket file, but the file does not exist, then this is exactly
422
+ the described problem.
423
+ 2. JVM is started with `-XX:+DisableAttachMechanism` option.
424
+ 3. `/tmp` directory of Java process is not physically the same directory
425
+ as `/tmp` of your shell, because Java is running in a container or in
426
+ `chroot` environment. `jattach` attempts to solve this automatically,
427
+ but it might lack the required permissions to do so.
428
+ Check `strace build/jattach PID properties`
429
+ 4. JVM is busy and cannot reach a safepoint. For instance,
430
+ JVM is in the middle of long-running garbage collection.
431
+ How to check: run `kill -3 PID`. Healthy JVM process should print
432
+ a thread dump and heap info in its console.
433
+
434
+ ```
435
+ Failed to inject profiler into <pid>
436
+ ```
437
+ The connection with the target JVM has been established, but JVM is unable to load profiler shared library.
438
+ Make sure the user of JVM process has permissions to access `libasyncProfiler.so` by exactly the same absolute path.
439
+ For more information see [#78](https://github.com/jvm-profiling-tools/async-profiler/issues/78).
440
+
441
+ ```
442
+ Perf events unavailble. See stderr of the target process.
443
+ ```
444
+ `perf_event_open()` syscall has failed. The error message is printed to the error stream
445
+ of the target JVM.
446
+
447
+ Typical reasons include:
448
+ 1. `/proc/sys/kernel/perf_event_paranoid` is set to restricted mode (>=2).
449
+ 2. seccomp disables perf_event_open API in a container.
450
+ 3. OS runs under a hypervisor that does not virtualize performance counters.
451
+ 4. perf_event_open API is not supported on this system, e.g. WSL.
452
+
453
+ If changing the configuration is not possible, you may fall back to
454
+ `-e itimer` profiling mode. It is similar to `cpu` mode, but does not
455
+ require perf_events support. As a drawback, there will be no kernel
456
+ stack traces.
457
+
458
+ ```
459
+ No AllocTracer symbols found. Are JDK debug symbols installed?
460
+ ```
461
+ It might be needed to install the package with OpenJDK debug symbols.
462
+ See [Allocation profiling](#allocation-profiling) for details.
463
+
464
+ Note that allocation profiling is not supported on JVMs other than HotSpot, e.g. Zing.
465
+
466
+ ```
467
+ VMStructs unavailable. Unsupported JVM?
468
+ ```
469
+ JVM shared library does not export `gHotSpotVMStructs*` symbols -
470
+ apparently this is not a HotSpot JVM. Sometimes the same message
471
+ can be also caused by an incorrectly built JDK
472
+ (see [#218](https://github.com/jvm-profiling-tools/async-profiler/issues/218)).
473
+ In these cases installing JDK debug symbols may solve the problem.
474
+
475
+ ```
476
+ Could not parse symbols due to the OS bug
477
+ ```
478
+ Async-profiler was unable to parse non-Java function names because of
479
+ the corrupted contents in `/proc/[pid]/maps`. The problem is known to
480
+ occur in a container when running Ubuntu with Linux kernel 5.x.
481
+ This is the OS bug, see https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1843018.
482
+
483
+ ```
484
+ [frame_buffer_overflow]
485
+ ```
486
+ This message in the output means there was not enough space to store all call traces.
487
+ Consider increasing frame buffer size with `-b` option.