jruby-async-profiler 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (82) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +8 -0
  3. data/.gitmodules +3 -0
  4. data/Gemfile +4 -0
  5. data/README.md +35 -0
  6. data/Rakefile +2 -0
  7. data/bin/console +14 -0
  8. data/bin/setup +8 -0
  9. data/ext/Rakefile +6 -0
  10. data/ext/async-profiler/.gitattributes +1 -0
  11. data/ext/async-profiler/.gitignore +6 -0
  12. data/ext/async-profiler/.travis.yml +11 -0
  13. data/ext/async-profiler/CHANGELOG.md +107 -0
  14. data/ext/async-profiler/JavaHome.class +0 -0
  15. data/ext/async-profiler/LICENSE +201 -0
  16. data/ext/async-profiler/Makefile +66 -0
  17. data/ext/async-profiler/README.md +487 -0
  18. data/ext/async-profiler/demo/SwingSet2.svg +2247 -0
  19. data/ext/async-profiler/docs/cddl1.txt +358 -0
  20. data/ext/async-profiler/profiler.sh +240 -0
  21. data/ext/async-profiler/src/allocTracer.cpp +155 -0
  22. data/ext/async-profiler/src/allocTracer.h +74 -0
  23. data/ext/async-profiler/src/arch.h +69 -0
  24. data/ext/async-profiler/src/arguments.cpp +265 -0
  25. data/ext/async-profiler/src/arguments.h +152 -0
  26. data/ext/async-profiler/src/codeCache.cpp +128 -0
  27. data/ext/async-profiler/src/codeCache.h +99 -0
  28. data/ext/async-profiler/src/engine.cpp +50 -0
  29. data/ext/async-profiler/src/engine.h +38 -0
  30. data/ext/async-profiler/src/flameGraph.cpp +770 -0
  31. data/ext/async-profiler/src/flameGraph.h +118 -0
  32. data/ext/async-profiler/src/flightRecorder.cpp +727 -0
  33. data/ext/async-profiler/src/flightRecorder.h +39 -0
  34. data/ext/async-profiler/src/frameName.cpp +189 -0
  35. data/ext/async-profiler/src/frameName.h +56 -0
  36. data/ext/async-profiler/src/itimer.cpp +49 -0
  37. data/ext/async-profiler/src/itimer.h +43 -0
  38. data/ext/async-profiler/src/jattach/jattach.c +437 -0
  39. data/ext/async-profiler/src/java/one/profiler/AsyncProfiler.java +160 -0
  40. data/ext/async-profiler/src/java/one/profiler/AsyncProfilerMXBean.java +43 -0
  41. data/ext/async-profiler/src/java/one/profiler/Counter.java +25 -0
  42. data/ext/async-profiler/src/java/one/profiler/Events.java +28 -0
  43. data/ext/async-profiler/src/javaApi.cpp +124 -0
  44. data/ext/async-profiler/src/lockTracer.cpp +161 -0
  45. data/ext/async-profiler/src/lockTracer.h +55 -0
  46. data/ext/async-profiler/src/mutex.cpp +33 -0
  47. data/ext/async-profiler/src/mutex.h +49 -0
  48. data/ext/async-profiler/src/os.h +45 -0
  49. data/ext/async-profiler/src/os_linux.cpp +129 -0
  50. data/ext/async-profiler/src/os_macos.cpp +115 -0
  51. data/ext/async-profiler/src/perfEvents.h +60 -0
  52. data/ext/async-profiler/src/perfEvents_linux.cpp +550 -0
  53. data/ext/async-profiler/src/perfEvents_macos.cpp +64 -0
  54. data/ext/async-profiler/src/profiler.cpp +952 -0
  55. data/ext/async-profiler/src/profiler.h +238 -0
  56. data/ext/async-profiler/src/spinLock.h +66 -0
  57. data/ext/async-profiler/src/stackFrame.h +57 -0
  58. data/ext/async-profiler/src/stackFrame_aarch64.cpp +75 -0
  59. data/ext/async-profiler/src/stackFrame_arm.cpp +58 -0
  60. data/ext/async-profiler/src/stackFrame_i386.cpp +82 -0
  61. data/ext/async-profiler/src/stackFrame_x64.cpp +113 -0
  62. data/ext/async-profiler/src/symbols.h +37 -0
  63. data/ext/async-profiler/src/symbols_linux.cpp +354 -0
  64. data/ext/async-profiler/src/symbols_macos.cpp +156 -0
  65. data/ext/async-profiler/src/vmEntry.cpp +173 -0
  66. data/ext/async-profiler/src/vmEntry.h +105 -0
  67. data/ext/async-profiler/src/vmStructs.cpp +104 -0
  68. data/ext/async-profiler/src/vmStructs.h +112 -0
  69. data/ext/async-profiler/src/wallClock.cpp +96 -0
  70. data/ext/async-profiler/src/wallClock.h +56 -0
  71. data/ext/async-profiler/test/AllocatingTarget.java +26 -0
  72. data/ext/async-profiler/test/LoadLibraryTest.java +21 -0
  73. data/ext/async-profiler/test/Target.java +31 -0
  74. data/ext/async-profiler/test/ThreadsTarget.java +35 -0
  75. data/ext/async-profiler/test/alloc-smoke-test.sh +36 -0
  76. data/ext/async-profiler/test/load-library-test.sh +35 -0
  77. data/ext/async-profiler/test/smoke-test.sh +37 -0
  78. data/ext/async-profiler/test/thread-smoke-test.sh +32 -0
  79. data/jruby-async-profiler.gemspec +32 -0
  80. data/lib/jruby/async/profiler.rb +10 -0
  81. data/lib/jruby/async/profiler/version.rb +7 -0
  82. metadata +155 -0
@@ -0,0 +1,487 @@
1
+ # async-profiler
2
+
3
+ This project is a low overhead sampling profiler for Java
4
+ that does not suffer from [Safepoint bias problem](http://psy-lob-saw.blogspot.ru/2016/02/why-most-sampling-java-profilers-are.html).
5
+ It features HotSpot-specific APIs to collect stack traces
6
+ and to track memory allocations. The profiler works with
7
+ OpenJDK, Oracle JDK and other Java runtimes based on HotSpot JVM.
8
+
9
+ async-profiler can trace the following kinds of events:
10
+ - CPU cycles
11
+ - Hardware and Software performance counters like cache misses, branch misses, page faults, context switches etc.
12
+ - Allocations in Java Heap
13
+ - Contented lock attempts, including both Java object monitors and ReentrantLocks
14
+
15
+ ## Download
16
+
17
+ Latest release (1.6):
18
+
19
+ - Linux x64 (glibc): [async-profiler-1.6-linux-x64.tar.gz](https://github.com/jvm-profiling-tools/async-profiler/releases/download/v1.6/async-profiler-1.6-linux-x64.tar.gz)
20
+ - Linux x64 (musl): [async-profiler-1.6-linux-x64-musl.tar.gz](https://github.com/jvm-profiling-tools/async-profiler/releases/download/v1.6/async-profiler-1.6-linux-x64-musl.tar.gz)
21
+ - Linux ARM: [async-profiler-1.6-linux-arm.tar.gz](https://github.com/jvm-profiling-tools/async-profiler/releases/download/v1.6/async-profiler-1.6-linux-arm.tar.gz)
22
+ - macOS x64: [async-profiler-1.6-macos-x64.tar.gz](https://github.com/jvm-profiling-tools/async-profiler/releases/download/v1.6/async-profiler-1.6-macos-x64.tar.gz)
23
+
24
+ [Previous releases](https://github.com/jvm-profiling-tools/async-profiler/releases)
25
+
26
+ ## Supported platforms
27
+
28
+ - **Linux** / x64 / x86 / ARM / AArch64
29
+ - **macOS** / x64
30
+
31
+ Note: macOS profiling is limited to user space code only.
32
+
33
+ ## CPU profiling
34
+
35
+ In this mode profiler collects stack trace samples that include **Java** methods,
36
+ **native** calls, **JVM** code and **kernel** functions.
37
+
38
+ The general approach is receiving call stacks generated by `perf_events`
39
+ and matching them up with call stacks generated by `AsyncGetCallTrace`,
40
+ in order to produce an accurate profile of both Java and native code.
41
+ Additionally, async-profiler provides a workaround to recover stack traces
42
+ in some [corner cases](https://bugs.openjdk.java.net/browse/JDK-8178287)
43
+ where `AsyncGetCallTrace` fails.
44
+
45
+ This approach has the following advantages compared to using `perf_events`
46
+ directly with a Java agent that translates addresses to Java method names:
47
+
48
+ * Works on older Java versions because it doesn't require
49
+ `-XX:+PreserveFramePointer`, which is only available in JDK 8u60 and later.
50
+
51
+ * Does not introduce the performance overhead from `-XX:+PreserveFramePointer`,
52
+ which can in rare cases be as high as 10%.
53
+
54
+ * Does not require generating a map file to map Java code addresses to method
55
+ names.
56
+
57
+ * Works with interpreter frames.
58
+
59
+ * Does not require writing out a perf.data file for further processing in
60
+ user space scripts.
61
+
62
+ ## ALLOCATION profiling
63
+
64
+ Instead of detecting CPU-consuming code, the profiler can be configured
65
+ to collect call sites where the largest amount of heap memory is allocated.
66
+
67
+ async-profiler does not use intrusive techniques like bytecode instrumentation
68
+ or expensive DTrace probes which have significant performance impact.
69
+ It also does not affect Escape Analysis or prevent from JIT optimizations
70
+ like allocation elimination. Only actual heap allocations are measured.
71
+
72
+ The profiler features TLAB-driven sampling. It relies on HotSpot-specific
73
+ callbacks to receive two kinds of notifications:
74
+ - when an object is allocated in a newly created TLAB;
75
+ - when an object is allocated on a slow path outside TLAB.
76
+
77
+ This means not each allocation is counted, but only allocations every _N_ kB,
78
+ where _N_ is the average size of TLAB. This makes heap sampling very cheap
79
+ and suitable for production. On the other hand, the collected data
80
+ may be incomplete, though in practice it will often reflect the top allocation
81
+ sources.
82
+
83
+ Sampling interval can be adjusted with `-i` option.
84
+ For example, `-i 500k` will take one sample after 500 KB of allocated
85
+ space on average. However, intervals less than TLAB size will not take effect.
86
+
87
+ Unlike Java Mission Control which uses similar approach, async-profiler
88
+ does not require Java Flight Recorder or any other JDK commercial feature.
89
+ It is completely based on open source technologies and it works with OpenJDK.
90
+
91
+ The minimum supported JDK version is 7u40 where the TLAB callbacks appeared.
92
+
93
+ Heap profiler requires HotSpot debug symbols. Oracle JDK already has them
94
+ embedded in `libjvm.so`, but in OpenJDK builds they are typically shipped
95
+ in a separate package. For example, to install OpenJDK debug symbols on
96
+ Debian / Ubuntu, run:
97
+ ```
98
+ # apt install openjdk-8-dbg
99
+ ```
100
+ or for OpenJDK 11:
101
+ ```
102
+ # apt install openjdk-11-dbg
103
+ ```
104
+
105
+ On Gentoo the `icedtea` OpenJDK package can be built with the per-package setting
106
+ `FEATURES="nostrip"` to retain symbols.
107
+
108
+ ### Wall-clock profiling
109
+
110
+ `-e wall` option tells async-profiler to sample all threads equally every given
111
+ period of time regardless of thread status: Running, Sleeping or Blocked.
112
+ For instance, this can be helpful when profiling application start-up time.
113
+
114
+ Wall-clock profiler is most useful in per-thread mode: `-t`.
115
+
116
+ Example: `./profiler.sh -e wall -t -i 5ms -f result.svg 8983`
117
+
118
+ ## Building
119
+
120
+ Build status: [![Build Status](https://travis-ci.org/jvm-profiling-tools/async-profiler.svg?branch=master)](https://travis-ci.org/jvm-profiling-tools/async-profiler)
121
+
122
+ Make sure the `JAVA_HOME` environment variable points to your JDK installation,
123
+ and then run `make`. GCC is required. After building, the profiler agent binary
124
+ will be in the `build` subdirectory. Additionally, a small application `jattach`
125
+ that can load the agent into the target process will also be compiled to the
126
+ `build` subdirectory.
127
+
128
+ ## Basic Usage
129
+
130
+ As of Linux 4.6, capturing kernel call stacks using `perf_events` from a non-
131
+ root process requires setting two runtime variables. You can set them using
132
+ sysctl or as follows:
133
+
134
+ ```
135
+ # echo 1 > /proc/sys/kernel/perf_event_paranoid
136
+ # echo 0 > /proc/sys/kernel/kptr_restrict
137
+ ```
138
+
139
+ To run the agent and pass commands to it, the helper script `profiler.sh`
140
+ is provided. A typical workflow would be to launch your Java application,
141
+ attach the agent and start profiling, exercise your performance scenario, and
142
+ then stop profiling. The agent's output, including the profiling results, will
143
+ be displayed in the Java application's standard output.
144
+
145
+ Example:
146
+
147
+ ```
148
+ $ jps
149
+ 9234 Jps
150
+ 8983 Computey
151
+ $ ./profiler.sh start 8983
152
+ $ ./profiler.sh stop 8983
153
+ ```
154
+
155
+ Alternatively, you may specify `-d` (duration) argument to profile
156
+ the application for a fixed period of time with a single command.
157
+
158
+ ```
159
+ $ ./profiler.sh -d 30 8983
160
+ ```
161
+
162
+ By default, the profiling frequency is 100Hz (every 10ms of CPU time).
163
+ Here is a sample of the output printed to the Java application's terminal:
164
+
165
+ ```
166
+ --- Execution profile ---
167
+ Total samples: 687
168
+ Unknown (native): 1 (0.15%)
169
+
170
+ --- 6790000000 (98.84%) ns, 679 samples
171
+ [ 0] Primes.isPrime
172
+ [ 1] Primes.primesThread
173
+ [ 2] Primes.access$000
174
+ [ 3] Primes$1.run
175
+ [ 4] java.lang.Thread.run
176
+
177
+ ... a lot of output omitted for brevity ...
178
+
179
+ ns percent samples top
180
+ ---------- ------- ------- ---
181
+ 6790000000 98.84% 679 Primes.isPrime
182
+ 40000000 0.58% 4 __do_softirq
183
+
184
+ ... more output omitted ...
185
+ ```
186
+
187
+ This indicates that the hottest method was `Primes.isPrime`, and the hottest
188
+ call stack leading to it comes from `Primes.primesThread`.
189
+
190
+ ## Launching as an Agent
191
+
192
+ If you need to profile some code as soon as the JVM starts up, instead of using the `profiler.sh` script,
193
+ it is possible to attach async-profiler as an agent on the command line. For example:
194
+
195
+ ```
196
+ $ java -agentpath:/path/to/libasyncProfiler.so=start,file=profile.svg ...
197
+ ```
198
+
199
+ Agent library is configured through the JVMTI argument interface.
200
+ The format of the arguments string is described
201
+ [in the source code](https://github.com/jvm-profiling-tools/async-profiler/blob/b7e9e6b955210784d5dc1d1839bb0febab1b712b/src/arguments.cpp#L34).
202
+ The `profiler.sh` script actually converts command line arguments to the that format.
203
+
204
+ For instance, `-e alloc` is converted to `event=alloc`, `-f profile.svg`
205
+ is converted to `file=profile.svg` and so on. But some arguments are processed
206
+ directly by `profiler.sh` script. E.g. `-d 5` results in 3 actions:
207
+ attaching profiler agent with start command, sleeping for 5 seconds,
208
+ and then attaching the agent again with stop command.
209
+
210
+ ## Flame Graph visualization
211
+
212
+ async-profiler provides out-of-the-box [Flame Graph](https://github.com/BrendanGregg/FlameGraph) support.
213
+ Specify `-o svg` argument to dump profiling results as an interactive SVG
214
+ immediately viewable in all mainstream browsers.
215
+ Also, SVG output format will be chosen automatically if the target
216
+ filename ends with `.svg`.
217
+
218
+ ```
219
+ $ jps
220
+ 9234 Jps
221
+ 8983 Computey
222
+ $ ./profiler.sh -d 30 -f /tmp/flamegraph.svg 8983
223
+ ```
224
+
225
+ ![Example](https://github.com/jvm-profiling-tools/async-profiler/blob/master/demo/SwingSet2.svg)
226
+
227
+ ## Profiler Options
228
+
229
+ The following is a complete list of the command-line options accepted by
230
+ `profiler.sh` script.
231
+
232
+ * `start` - starts profiling in semi-automatic mode, i.e. profiler will run
233
+ until `stop` command is explicitly called.
234
+
235
+ * `resume` - starts or resumes earlier profiling session that has been stopped.
236
+ All the collected data remains valid. The profiling options are not preserved
237
+ between sessions, and should be specified again.
238
+
239
+ * `stop` - stops profiling and prints the report.
240
+
241
+ * `status` - prints profiling status: whether profiler is active and
242
+ for how long.
243
+
244
+ * `list` - show the list of available profiling events. This option still
245
+ requires PID, since supported events may differ depending on JVM version.
246
+
247
+ * `-d N` - the profiling duration, in seconds. If no `start`, `resume`, `stop`
248
+ or `status` option is given, the profiler will run for the specified period
249
+ of time and then automatically stop.
250
+ Example: `./profiler.sh -d 30 8983`
251
+
252
+ * `-e event` - the profiling event: `cpu`, `alloc`, `lock`, `cache-misses` etc.
253
+ Use `list` to see the complete list of available events.
254
+
255
+ In allocation profiling mode the top frame of every call trace is the class
256
+ of the allocated object, and the counter is the heap pressure (the total size
257
+ of allocated TLABs or objects outside TLAB).
258
+
259
+ In lock profiling mode the top frame is the class of lock/monitor, and
260
+ the counter is number of nanoseconds it took to enter this lock/monitor.
261
+
262
+ Two special event types are supported on Linux: hardware breakpoints
263
+ and kernel tracepoints:
264
+ - `-e mem:<func>[:rwx]` sets read/write/exec breakpoint at function
265
+ `<func>`. The format of `mem` event is the same as in `perf-record`.
266
+ Execution breakpoints can be also specified by the function name,
267
+ e.g. `-e malloc` will trace all calls of native `malloc` function.
268
+ - `-e trace:<id>` sets a kernel tracepoint. It is possible to specify
269
+ tracepoint symbolic name, e.g. `-e syscalls:sys_enter_open` will trace
270
+ all `open` syscalls.
271
+
272
+ * `-i N` - sets the profiling interval in nanoseconds or in other units,
273
+ if N is followed by `ms` (for milliseconds), `us` (for microseconds)
274
+ or `s` (for seconds). Only CPU active time is counted. No samples
275
+ are collected while CPU is idle. The default is 10000000 (10ms).
276
+ Example: `./profiler.sh -i 500us 8983`
277
+
278
+ * `-j N` - sets the Java stack profiling depth. This option will be ignored if N is greater
279
+ than default 2048.
280
+ Example: `./profiler.sh -j 30 8983`
281
+
282
+ * `-b N` - sets the frame buffer size, in the number of Java
283
+ method ids that should fit in the buffer. If you receive messages about an
284
+ insufficient frame buffer size, increase this value from the default.
285
+ Example: `./profiler.sh -b 5000000 8983`
286
+
287
+ * `-t` - profile threads separately. Each stack trace will end with a frame
288
+ that denotes a single thread.
289
+ Example: `./profiler.sh -t 8983`
290
+
291
+ * `-s` - print simple class names instead of FQN.
292
+
293
+ * `-g` - print method signatures.
294
+
295
+ * `-a` - annotate Java method names by adding `_[j]` suffix.
296
+
297
+ * `-o fmt` - specifies what information to dump when profiling ends.
298
+ `fmt` can be one of the following options:
299
+ - `summary` - dump basic profiling statistics;
300
+ - `traces[=N]` - dump call traces (at most N samples);
301
+ - `flat[=N]` - dump flat profile (top N hot methods);
302
+ - `jfr` - dump events in Java Flight Recorder format readable by Java Mission Control.
303
+ This *does not* require JDK commercial features to be enabled.
304
+ - `collapsed[=C]` - dump collapsed call traces in the format used by
305
+ [FlameGraph](https://github.com/brendangregg/FlameGraph) script. This is
306
+ a collection of call stacks, where each line is a semicolon separated list
307
+ of frames followed by a counter.
308
+ - `svg[=C]` - produce Flame Graph in SVG format.
309
+ - `tree[=C]` - produce call tree in HTML format.
310
+ --reverse option will generate backtrace view.
311
+
312
+ `C` is a counter type:
313
+ - `samples` - the counter is a number of samples for the given trace;
314
+ - `total` - the counter is a total value of collected metric, e.g. total allocation size.
315
+
316
+ `summary`, `traces` and `flat` can be combined together.
317
+ The default format is `summary,traces=200,flat=200`.
318
+
319
+ * `--title TITLE`, `--width PX`, `--height PX`, `--minwidth PX`, `--reverse` - FlameGraph parameters.
320
+ Example: `./profiler.sh -f profile.svg --title "Sample CPU profile" --minwidth 0.5 8983`
321
+
322
+ * `-f FILENAME` - the file name to dump the profile information to.
323
+ `%p` in the file name is expanded to the PID of the target JVM;
324
+ `%t` - to the timestamp at the time of command invocation.
325
+ Example: `./profiler.sh -o collapsed -f /tmp/traces-%t.txt 8983`
326
+
327
+ * `--all-user` - include only user-mode events. This option is helpful when kernel profiling
328
+ is restricted by `perf_event_paranoid` settings.
329
+ `--all-kernel` is its counterpart option for including only kernel-mode events.
330
+
331
+ * `--sync-walk` - prefer synchronous JVMTI stack walker instead of `AsyncGetCallTrace`.
332
+ This option may improve accuracy of Java stack traces when profiling JVM runtime
333
+ functions, e.g. `VMThread::execute`, `G1CollectedHeap::humongous_obj_allocate` etc.
334
+ Do not use unless you are absolutely sure! When used incorrectly, this mode will crash JVM!
335
+
336
+ * `-v`, `--version` - prints the version of profiler library. If PID is specified,
337
+ gets the version of the library loaded into the given process.
338
+
339
+ ## Profiling Java in a container
340
+
341
+ It is possible to profile Java processes running in a Docker or LXC container
342
+ both from within a container and from the host system.
343
+
344
+ When profiling from the host, `pid` should be the Java process ID in the host
345
+ namespace. Use `ps aux | grep java` or `docker top <container>` to find
346
+ the process ID.
347
+
348
+ async-profiler should be run from the host by a privileged user - it will
349
+ automatically switch to the proper pid/mount namespace and change
350
+ user credentials to match the target process. Also make sure that
351
+ the target container can access `libasyncProfiler.so` by the same
352
+ absolute path as on the host.
353
+
354
+ By default, Docker container restricts the access to `perf_event_open`
355
+ syscall. So, in order to allow profiling inside a container, you'll need
356
+ to modify [seccomp profile](https://docs.docker.com/engine/security/seccomp/)
357
+ or disable it altogether with `--security-opt=seccomp:unconfined` option.
358
+
359
+ Alternatively, if changing Docker configuration is not possible,
360
+ you may fall back to `-e itimer` profiling mode, see [Troubleshooting](#troubleshooting).
361
+
362
+ ## Restrictions/Limitations
363
+
364
+ * On most Linux systems, `perf_events` captures call stacks with a maximum depth
365
+ of 127 frames. On recent Linux kernels, this can be configured using
366
+ `sysctl kernel.perf_event_max_stack` or by writing to the
367
+ `/proc/sys/kernel/perf_event_max_stack` file.
368
+
369
+ * Profiler allocates 8kB perf_event buffer for each thread of the target process.
370
+ Make sure `/proc/sys/kernel/perf_event_mlock_kb` value is large enough
371
+ (more than `8 * threads`) when running under unprivileged user.
372
+ Otherwise the message _"perf_event mmap failed: Operation not permitted"_
373
+ will be printed, and no native stack traces will be collected.
374
+
375
+ * There is no bullet-proof guarantee that the `perf_events` overflow signal
376
+ is delivered to the Java thread in a way that guarantees no other code has run,
377
+ which means that in some rare cases, the captured Java stack might not match
378
+ the captured native (user+kernel) stack.
379
+
380
+ * You will not see the non-Java frames _preceding_ the Java frames on the
381
+ stack. For example, if `start_thread` called `JavaMain` and then your Java
382
+ code started running, you will not see the first two frames in the resulting
383
+ stack. On the other hand, you _will_ see non-Java frames (user and kernel)
384
+ invoked by your Java code.
385
+
386
+ * No Java stacks will be collected if `-XX:MaxJavaStackTraceDepth` is zero
387
+ or negative.
388
+
389
+ * Too short profiling interval may cause continuous interruption of heavy
390
+ system calls like `clone()`, so that it will never complete;
391
+ see [#97](https://github.com/jvm-profiling-tools/async-profiler/issues/97).
392
+ The workaround is simply to increase the interval.
393
+
394
+ * When agent is not loaded at JVM startup (by using -agentpath option) it is
395
+ highly recommended to use `-XX:+UnlockDiagnosticVMOptions -XX:+DebugNonSafepoints` JVM flags.
396
+ Without those flags the profiler will still work correctly but results might be
397
+ less accurate e.g. without `-XX:+DebugNonSafepoints` there is a high chance that simple inlined methods will not appear in the profile. When agent is attached at runtime `CompiledMethodLoad` JVMTI event
398
+ enables debug info, but only for methods compiled after the event is turned on.
399
+
400
+ ## Troubleshooting
401
+
402
+ ```
403
+ Failed to change credentials to match the target process: Operation not permitted
404
+ ```
405
+ Due to limitation of HotSpot Dynamic Attach mechanism, the profiler must be run
406
+ by exactly the same user (and group) as the owner of target JVM process.
407
+ If profiler is run by a different user, it will try to automatically change
408
+ current user and group. This will likely succeed for `root`, but not for
409
+ other users, resulting in the above error.
410
+
411
+ ```
412
+ Could not start attach mechanism: No such file or directory
413
+ ```
414
+ The profiler cannot establish communication with the target JVM through UNIX domain socket.
415
+
416
+ Usually this happens in one of the following cases:
417
+ 1. Attach socket `/tmp/.java_pidNNN` has been deleted. It is a common
418
+ practice to clean `/tmp` automatically with some scheduled script.
419
+ Configure the cleanup software to exclude `.java_pid*` files from deletion.
420
+ How to check: run `lsof -p PID | grep java_pid`
421
+ If it lists a socket file, but the file does not exist, then this is exactly
422
+ the described problem.
423
+ 2. JVM is started with `-XX:+DisableAttachMechanism` option.
424
+ 3. `/tmp` directory of Java process is not physically the same directory
425
+ as `/tmp` of your shell, because Java is running in a container or in
426
+ `chroot` environment. `jattach` attempts to solve this automatically,
427
+ but it might lack the required permissions to do so.
428
+ Check `strace build/jattach PID properties`
429
+ 4. JVM is busy and cannot reach a safepoint. For instance,
430
+ JVM is in the middle of long-running garbage collection.
431
+ How to check: run `kill -3 PID`. Healthy JVM process should print
432
+ a thread dump and heap info in its console.
433
+
434
+ ```
435
+ Failed to inject profiler into <pid>
436
+ ```
437
+ The connection with the target JVM has been established, but JVM is unable to load profiler shared library.
438
+ Make sure the user of JVM process has permissions to access `libasyncProfiler.so` by exactly the same absolute path.
439
+ For more information see [#78](https://github.com/jvm-profiling-tools/async-profiler/issues/78).
440
+
441
+ ```
442
+ Perf events unavailble. See stderr of the target process.
443
+ ```
444
+ `perf_event_open()` syscall has failed. The error message is printed to the error stream
445
+ of the target JVM.
446
+
447
+ Typical reasons include:
448
+ 1. `/proc/sys/kernel/perf_event_paranoid` is set to restricted mode (>=2).
449
+ 2. seccomp disables perf_event_open API in a container.
450
+ 3. OS runs under a hypervisor that does not virtualize performance counters.
451
+ 4. perf_event_open API is not supported on this system, e.g. WSL.
452
+
453
+ If changing the configuration is not possible, you may fall back to
454
+ `-e itimer` profiling mode. It is similar to `cpu` mode, but does not
455
+ require perf_events support. As a drawback, there will be no kernel
456
+ stack traces.
457
+
458
+ ```
459
+ No AllocTracer symbols found. Are JDK debug symbols installed?
460
+ ```
461
+ It might be needed to install the package with OpenJDK debug symbols.
462
+ See [Allocation profiling](#allocation-profiling) for details.
463
+
464
+ Note that allocation profiling is not supported on JVMs other than HotSpot, e.g. Zing.
465
+
466
+ ```
467
+ VMStructs unavailable. Unsupported JVM?
468
+ ```
469
+ JVM shared library does not export `gHotSpotVMStructs*` symbols -
470
+ apparently this is not a HotSpot JVM. Sometimes the same message
471
+ can be also caused by an incorrectly built JDK
472
+ (see [#218](https://github.com/jvm-profiling-tools/async-profiler/issues/218)).
473
+ In these cases installing JDK debug symbols may solve the problem.
474
+
475
+ ```
476
+ Could not parse symbols due to the OS bug
477
+ ```
478
+ Async-profiler was unable to parse non-Java function names because of
479
+ the corrupted contents in `/proc/[pid]/maps`. The problem is known to
480
+ occur in a container when running Ubuntu with Linux kernel 5.x.
481
+ This is the OS bug, see https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1843018.
482
+
483
+ ```
484
+ [frame_buffer_overflow]
485
+ ```
486
+ This message in the output means there was not enough space to store all call traces.
487
+ Consider increasing frame buffer size with `-b` option.