jruby-async-profiler 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +8 -0
  3. data/.gitmodules +3 -0
  4. data/Gemfile +4 -0
  5. data/README.md +35 -0
  6. data/Rakefile +2 -0
  7. data/bin/console +14 -0
  8. data/bin/setup +8 -0
  9. data/ext/Rakefile +6 -0
  10. data/ext/async-profiler/.gitattributes +1 -0
  11. data/ext/async-profiler/.gitignore +6 -0
  12. data/ext/async-profiler/.travis.yml +11 -0
  13. data/ext/async-profiler/CHANGELOG.md +107 -0
  14. data/ext/async-profiler/JavaHome.class +0 -0
  15. data/ext/async-profiler/LICENSE +201 -0
  16. data/ext/async-profiler/Makefile +66 -0
  17. data/ext/async-profiler/README.md +487 -0
  18. data/ext/async-profiler/demo/SwingSet2.svg +2247 -0
  19. data/ext/async-profiler/docs/cddl1.txt +358 -0
  20. data/ext/async-profiler/profiler.sh +240 -0
  21. data/ext/async-profiler/src/allocTracer.cpp +155 -0
  22. data/ext/async-profiler/src/allocTracer.h +74 -0
  23. data/ext/async-profiler/src/arch.h +69 -0
  24. data/ext/async-profiler/src/arguments.cpp +265 -0
  25. data/ext/async-profiler/src/arguments.h +152 -0
  26. data/ext/async-profiler/src/codeCache.cpp +128 -0
  27. data/ext/async-profiler/src/codeCache.h +99 -0
  28. data/ext/async-profiler/src/engine.cpp +50 -0
  29. data/ext/async-profiler/src/engine.h +38 -0
  30. data/ext/async-profiler/src/flameGraph.cpp +770 -0
  31. data/ext/async-profiler/src/flameGraph.h +118 -0
  32. data/ext/async-profiler/src/flightRecorder.cpp +727 -0
  33. data/ext/async-profiler/src/flightRecorder.h +39 -0
  34. data/ext/async-profiler/src/frameName.cpp +189 -0
  35. data/ext/async-profiler/src/frameName.h +56 -0
  36. data/ext/async-profiler/src/itimer.cpp +49 -0
  37. data/ext/async-profiler/src/itimer.h +43 -0
  38. data/ext/async-profiler/src/jattach/jattach.c +437 -0
  39. data/ext/async-profiler/src/java/one/profiler/AsyncProfiler.java +160 -0
  40. data/ext/async-profiler/src/java/one/profiler/AsyncProfilerMXBean.java +43 -0
  41. data/ext/async-profiler/src/java/one/profiler/Counter.java +25 -0
  42. data/ext/async-profiler/src/java/one/profiler/Events.java +28 -0
  43. data/ext/async-profiler/src/javaApi.cpp +124 -0
  44. data/ext/async-profiler/src/lockTracer.cpp +161 -0
  45. data/ext/async-profiler/src/lockTracer.h +55 -0
  46. data/ext/async-profiler/src/mutex.cpp +33 -0
  47. data/ext/async-profiler/src/mutex.h +49 -0
  48. data/ext/async-profiler/src/os.h +45 -0
  49. data/ext/async-profiler/src/os_linux.cpp +129 -0
  50. data/ext/async-profiler/src/os_macos.cpp +115 -0
  51. data/ext/async-profiler/src/perfEvents.h +60 -0
  52. data/ext/async-profiler/src/perfEvents_linux.cpp +550 -0
  53. data/ext/async-profiler/src/perfEvents_macos.cpp +64 -0
  54. data/ext/async-profiler/src/profiler.cpp +952 -0
  55. data/ext/async-profiler/src/profiler.h +238 -0
  56. data/ext/async-profiler/src/spinLock.h +66 -0
  57. data/ext/async-profiler/src/stackFrame.h +57 -0
  58. data/ext/async-profiler/src/stackFrame_aarch64.cpp +75 -0
  59. data/ext/async-profiler/src/stackFrame_arm.cpp +58 -0
  60. data/ext/async-profiler/src/stackFrame_i386.cpp +82 -0
  61. data/ext/async-profiler/src/stackFrame_x64.cpp +113 -0
  62. data/ext/async-profiler/src/symbols.h +37 -0
  63. data/ext/async-profiler/src/symbols_linux.cpp +354 -0
  64. data/ext/async-profiler/src/symbols_macos.cpp +156 -0
  65. data/ext/async-profiler/src/vmEntry.cpp +173 -0
  66. data/ext/async-profiler/src/vmEntry.h +105 -0
  67. data/ext/async-profiler/src/vmStructs.cpp +104 -0
  68. data/ext/async-profiler/src/vmStructs.h +112 -0
  69. data/ext/async-profiler/src/wallClock.cpp +96 -0
  70. data/ext/async-profiler/src/wallClock.h +56 -0
  71. data/ext/async-profiler/test/AllocatingTarget.java +26 -0
  72. data/ext/async-profiler/test/LoadLibraryTest.java +21 -0
  73. data/ext/async-profiler/test/Target.java +31 -0
  74. data/ext/async-profiler/test/ThreadsTarget.java +35 -0
  75. data/ext/async-profiler/test/alloc-smoke-test.sh +36 -0
  76. data/ext/async-profiler/test/load-library-test.sh +35 -0
  77. data/ext/async-profiler/test/smoke-test.sh +37 -0
  78. data/ext/async-profiler/test/thread-smoke-test.sh +32 -0
  79. data/jruby-async-profiler.gemspec +32 -0
  80. data/lib/jruby/async/profiler.rb +10 -0
  81. data/lib/jruby/async/profiler/version.rb +7 -0
  82. metadata +155 -0
@@ -0,0 +1,64 @@
1
+ /*
2
+ * Copyright 2017 Andrei Pangin
3
+ *
4
+ * Licensed under the Apache License, Version 2.0 (the "License");
5
+ * you may not use this file except in compliance with the License.
6
+ * You may obtain a copy of the License at
7
+ *
8
+ * http://www.apache.org/licenses/LICENSE-2.0
9
+ *
10
+ * Unless required by applicable law or agreed to in writing, software
11
+ * distributed under the License is distributed on an "AS IS" BASIS,
12
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ * See the License for the specific language governing permissions and
14
+ * limitations under the License.
15
+ */
16
+
17
+ #ifdef __APPLE__
18
+
19
+ #include "perfEvents.h"
20
+
21
+
22
+ int PerfEvents::_max_events;
23
+ PerfEvent* PerfEvents::_events;
24
+ PerfEventType* PerfEvents::_event_type;
25
+ long PerfEvents::_interval;
26
+ Ring PerfEvents::_ring;
27
+ bool PerfEvents::_print_extended_warning;
28
+
29
+
30
+ bool PerfEvents::createForThread(int tid) { return false; }
31
+ void PerfEvents::destroyForThread(int tid) {}
32
+ void PerfEvents::signalHandler(int signo, siginfo_t* siginfo, void* ucontext) {}
33
+
34
+ const char* PerfEvents::units() {
35
+ return "ns";
36
+ }
37
+
38
+ Error PerfEvents::start(Arguments& args) {
39
+ return Error("PerfEvents are unsupported on macOS");
40
+ }
41
+
42
+ void PerfEvents::stop() {
43
+ }
44
+
45
+ void PerfEvents::onThreadStart() {
46
+ }
47
+
48
+ void PerfEvents::onThreadEnd() {
49
+ }
50
+
51
+ int PerfEvents::getNativeTrace(void* ucontext, int tid, const void** callchain, int max_depth,
52
+ const void* jit_min_address, const void* jit_max_address) {
53
+ return 0;
54
+ }
55
+
56
+ bool PerfEvents::supported() {
57
+ return false;
58
+ }
59
+
60
+ const char* PerfEvents::getEventName(int event_id) {
61
+ return NULL;
62
+ }
63
+
64
+ #endif // __APPLE__
@@ -0,0 +1,952 @@
1
+ /*
2
+ * Copyright 2016 Andrei Pangin
3
+ *
4
+ * Licensed under the Apache License, Version 2.0 (the "License");
5
+ * you may not use this file except in compliance with the License.
6
+ * You may obtain a copy of the License at
7
+ *
8
+ * http://www.apache.org/licenses/LICENSE-2.0
9
+ *
10
+ * Unless required by applicable law or agreed to in writing, software
11
+ * distributed under the License is distributed on an "AS IS" BASIS,
12
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ * See the License for the specific language governing permissions and
14
+ * limitations under the License.
15
+ */
16
+
17
+ #include <fstream>
18
+ #include <dlfcn.h>
19
+ #include <unistd.h>
20
+ #include <stdint.h>
21
+ #include <stdio.h>
22
+ #include <stdlib.h>
23
+ #include <string.h>
24
+ #include <sys/param.h>
25
+ #include "profiler.h"
26
+ #include "perfEvents.h"
27
+ #include "allocTracer.h"
28
+ #include "lockTracer.h"
29
+ #include "wallClock.h"
30
+ #include "itimer.h"
31
+ #include "flameGraph.h"
32
+ #include "flightRecorder.h"
33
+ #include "frameName.h"
34
+ #include "os.h"
35
+ #include "stackFrame.h"
36
+ #include "symbols.h"
37
+ #include "vmStructs.h"
38
+
39
+
40
+ Profiler Profiler::_instance;
41
+
42
+ static PerfEvents perf_events;
43
+ static AllocTracer alloc_tracer;
44
+ static LockTracer lock_tracer;
45
+ static WallClock wall_clock;
46
+ static ITimer itimer;
47
+
48
+
49
+ u64 Profiler::hashCallTrace(int num_frames, ASGCT_CallFrame* frames) {
50
+ const u64 M = 0xc6a4a7935bd1e995ULL;
51
+ const int R = 47;
52
+
53
+ u64 h = num_frames * M;
54
+
55
+ for (int i = 0; i < num_frames; i++) {
56
+ u64 k = (u64)frames[i].method_id;
57
+ k *= M;
58
+ k ^= k >> R;
59
+ k *= M;
60
+ h ^= k;
61
+ h *= M;
62
+ }
63
+
64
+ h ^= h >> R;
65
+ h *= M;
66
+ h ^= h >> R;
67
+
68
+ return h;
69
+ }
70
+
71
+ int Profiler::storeCallTrace(int num_frames, ASGCT_CallFrame* frames, u64 counter) {
72
+ u64 hash = hashCallTrace(num_frames, frames);
73
+ int bucket = (int)(hash % MAX_CALLTRACES);
74
+ int i = bucket;
75
+
76
+ while (_hashes[i] != hash) {
77
+ if (_hashes[i] == 0) {
78
+ if (__sync_bool_compare_and_swap(&_hashes[i], 0, hash)) {
79
+ copyToFrameBuffer(num_frames, frames, &_traces[i]);
80
+ break;
81
+ }
82
+ continue;
83
+ }
84
+
85
+ if (++i == MAX_CALLTRACES) i = 0; // move to next slot
86
+ if (i == bucket) return 0; // the table is full
87
+ }
88
+
89
+ // CallTrace hash found => atomically increment counter
90
+ atomicInc(_traces[i]._samples);
91
+ atomicInc(_traces[i]._counter, counter);
92
+ return i;
93
+ }
94
+
95
+ void Profiler::copyToFrameBuffer(int num_frames, ASGCT_CallFrame* frames, CallTraceSample* trace) {
96
+ // Atomically reserve space in frame buffer
97
+ int start_frame;
98
+ do {
99
+ start_frame = _frame_buffer_index;
100
+ if (start_frame + num_frames > _frame_buffer_size) {
101
+ _frame_buffer_overflow = true; // not enough space to store full trace
102
+ return;
103
+ }
104
+ } while (!__sync_bool_compare_and_swap(&_frame_buffer_index, start_frame, start_frame + num_frames));
105
+
106
+ trace->_start_frame = start_frame;
107
+ trace->_num_frames = num_frames;
108
+
109
+ for (int i = 0; i < num_frames; i++) {
110
+ _frame_buffer[start_frame++] = frames[i];
111
+ }
112
+ }
113
+
114
+ u64 Profiler::hashMethod(jmethodID method) {
115
+ const u64 M = 0xc6a4a7935bd1e995ULL;
116
+ const int R = 17;
117
+
118
+ u64 h = (u64)method;
119
+
120
+ h ^= h >> R;
121
+ h *= M;
122
+ h ^= h >> R;
123
+
124
+ return h;
125
+ }
126
+
127
+ void Profiler::storeMethod(jmethodID method, jint bci, u64 counter) {
128
+ u64 hash = hashMethod(method);
129
+ int bucket = (int)(hash % MAX_CALLTRACES);
130
+ int i = bucket;
131
+
132
+ while (_methods[i]._method.method_id != method) {
133
+ if (_methods[i]._method.method_id == NULL) {
134
+ if (__sync_bool_compare_and_swap(&_methods[i]._method.method_id, NULL, method)) {
135
+ _methods[i]._method.bci = bci;
136
+ break;
137
+ }
138
+ continue;
139
+ }
140
+
141
+ if (++i == MAX_CALLTRACES) i = 0; // move to next slot
142
+ if (i == bucket) return; // the table is full
143
+ }
144
+
145
+ // Method found => atomically increment counter
146
+ atomicInc(_methods[i]._samples);
147
+ atomicInc(_methods[i]._counter, counter);
148
+ }
149
+
150
+ void Profiler::addJavaMethod(const void* address, int length, jmethodID method) {
151
+ _jit_lock.lock();
152
+ _java_methods.add(address, length, method);
153
+ updateJitRange(address, (const char*)address + length);
154
+ _jit_lock.unlock();
155
+ }
156
+
157
+ void Profiler::removeJavaMethod(const void* address, jmethodID method) {
158
+ _jit_lock.lock();
159
+ _java_methods.remove(address, method);
160
+ _jit_lock.unlock();
161
+ }
162
+
163
+ void Profiler::addRuntimeStub(const void* address, int length, const char* name) {
164
+ _jit_lock.lock();
165
+ _runtime_stubs.add(address, length, name);
166
+ updateJitRange(address, (const char*)address + length);
167
+ _jit_lock.unlock();
168
+ }
169
+
170
+ void Profiler::updateJitRange(const void* min_address, const void* max_address) {
171
+ if (min_address < _jit_min_address) _jit_min_address = min_address;
172
+ if (max_address > _jit_max_address) _jit_max_address = max_address;
173
+ }
174
+
175
+ const char* Profiler::asgctError(int code) {
176
+ switch (code) {
177
+ case ticks_no_Java_frame:
178
+ case ticks_unknown_not_Java:
179
+ case ticks_not_walkable_not_Java:
180
+ // Not in Java context at all; this is not an error
181
+ return NULL;
182
+ case ticks_GC_active:
183
+ return "GC_active";
184
+ case ticks_unknown_Java:
185
+ return "unknown_Java";
186
+ case ticks_not_walkable_Java:
187
+ return "not_walkable_Java";
188
+ case ticks_thread_exit:
189
+ return "thread_exit";
190
+ case ticks_deopt:
191
+ return "deoptimization";
192
+ case ticks_safepoint:
193
+ return "safepoint";
194
+ case ticks_skipped:
195
+ return "skipped";
196
+ default:
197
+ // Should not happen
198
+ return "unexpected_state";
199
+ }
200
+ }
201
+
202
+ NativeCodeCache* Profiler::jvmLibrary() {
203
+ const void* asyncGetCallTraceAddr = (const void*)VM::_asyncGetCallTrace;
204
+ const int native_lib_count = _native_lib_count;
205
+ for (int i = 0; i < native_lib_count; i++) {
206
+ if (_native_libs[i]->contains(asyncGetCallTraceAddr)) {
207
+ return _native_libs[i];
208
+ }
209
+ }
210
+ return NULL;
211
+ }
212
+
213
+ const void* Profiler::findSymbol(const char* name) {
214
+ const int native_lib_count = _native_lib_count;
215
+ for (int i = 0; i < native_lib_count; i++) {
216
+ const void* address = _native_libs[i]->findSymbol(name);
217
+ if (address != NULL) {
218
+ return address;
219
+ }
220
+ }
221
+ return NULL;
222
+ }
223
+
224
+ const char* Profiler::findNativeMethod(const void* address) {
225
+ const int native_lib_count = _native_lib_count;
226
+ for (int i = 0; i < native_lib_count; i++) {
227
+ if (_native_libs[i]->contains(address)) {
228
+ return _native_libs[i]->binarySearch(address);
229
+ }
230
+ }
231
+ return NULL;
232
+ }
233
+
234
+ int Profiler::getNativeTrace(void* ucontext, ASGCT_CallFrame* frames, int tid, bool* stopped_at_java_frame) {
235
+ const void* native_callchain[MAX_NATIVE_FRAMES];
236
+ int native_frames = _engine->getNativeTrace(ucontext, tid, native_callchain, MAX_NATIVE_FRAMES,
237
+ _jit_min_address, _jit_max_address);
238
+
239
+ *stopped_at_java_frame = false;
240
+ if (native_frames > 0) {
241
+ const void* last_pc = native_callchain[native_frames - 1];
242
+ if (last_pc >= _jit_min_address && last_pc < _jit_max_address) {
243
+ *stopped_at_java_frame = true;
244
+ native_frames--;
245
+ }
246
+ }
247
+
248
+ for (int i = 0; i < native_frames; i++) {
249
+ frames[i].bci = BCI_NATIVE_FRAME;
250
+ frames[i].method_id = (jmethodID)findNativeMethod(native_callchain[i]);
251
+ }
252
+
253
+ return native_frames;
254
+ }
255
+
256
+ int Profiler::getJavaTraceAsync(void* ucontext, ASGCT_CallFrame* frames, int max_depth) {
257
+ JNIEnv* jni = VM::jni();
258
+ if (jni == NULL) {
259
+ // Not a Java thread
260
+ return 0;
261
+ }
262
+
263
+ ASGCT_CallTrace trace = {jni, 0, frames};
264
+ VM::_asyncGetCallTrace(&trace, max_depth, ucontext);
265
+
266
+ #ifndef SAFE_MODE
267
+ if (trace.num_frames == ticks_unknown_Java) {
268
+ // If current Java stack is not walkable (e.g. the top frame is not fully constructed),
269
+ // try to manually pop the top frame off, hoping that the previous frame is walkable.
270
+ // This is a temporary workaround for AsyncGetCallTrace issues,
271
+ // see https://bugs.openjdk.java.net/browse/JDK-8178287
272
+ StackFrame top_frame(ucontext);
273
+ uintptr_t pc = top_frame.pc(),
274
+ sp = top_frame.sp(),
275
+ fp = top_frame.fp();
276
+
277
+ // Guess top method by PC and insert it manually into the call trace
278
+ bool is_entry_frame = false;
279
+ if (fillTopFrame((const void*)pc, trace.frames)) {
280
+ is_entry_frame = trace.frames->bci == BCI_NATIVE_FRAME &&
281
+ strcmp((const char*)trace.frames->method_id, "call_stub") == 0;
282
+ trace.frames++;
283
+ max_depth--;
284
+ }
285
+
286
+ if (top_frame.pop(is_entry_frame)) {
287
+ // Retry with the fixed context, but only if PC looks reasonable,
288
+ // otherwise AsyncGetCallTrace may crash
289
+ if (addressInCode((const void*)top_frame.pc())) {
290
+ VM::_asyncGetCallTrace(&trace, max_depth, ucontext);
291
+ }
292
+ top_frame.restore(pc, sp, fp);
293
+
294
+ if (trace.num_frames > 0) {
295
+ return trace.num_frames + (trace.frames - frames);
296
+ }
297
+
298
+ // Restore previous context
299
+ trace.num_frames = ticks_unknown_Java;
300
+ }
301
+ } else if (trace.num_frames == ticks_GC_active && VM::is_hotspot() && _JvmtiEnv_GetStackTrace != NULL) {
302
+ // While GC is running Java threads are known to be at safepoint
303
+ return getJavaTraceJvmti((jvmtiFrameInfo*)frames, frames, max_depth);
304
+ }
305
+ #endif // SAFE_MODE
306
+
307
+ if (trace.num_frames > 0) {
308
+ return trace.num_frames;
309
+ }
310
+
311
+ const char* err_string = asgctError(trace.num_frames);
312
+ if (err_string == NULL) {
313
+ // No Java stack, because thread is not in Java context
314
+ return 0;
315
+ }
316
+
317
+ atomicInc(_failures[-trace.num_frames]);
318
+ frames[0].bci = BCI_ERROR;
319
+ frames[0].method_id = (jmethodID)err_string;
320
+ return 1;
321
+ }
322
+
323
+ int Profiler::getJavaTraceJvmti(jvmtiFrameInfo* jvmti_frames, ASGCT_CallFrame* frames, int max_depth) {
324
+ // We cannot call pure JVM TI here, because it assumes _thread_in_native state,
325
+ // but allocation events happen in _thread_in_vm state,
326
+ // see https://github.com/jvm-profiling-tools/async-profiler/issues/64
327
+ void* thread = _ThreadLocalStorage_thread();
328
+ int num_frames;
329
+ if (_JvmtiEnv_GetStackTrace(NULL, thread, 0, max_depth, jvmti_frames, &num_frames) == 0 && num_frames > 0) {
330
+ // Profiler expects stack trace in AsyncGetCallTrace format; convert it now
331
+ for (int i = 0; i < num_frames; i++) {
332
+ frames[i].method_id = jvmti_frames[i].method;
333
+ frames[i].bci = 0;
334
+ }
335
+ return num_frames;
336
+ }
337
+
338
+ return 0;
339
+ }
340
+
341
+ int Profiler::makeEventFrame(ASGCT_CallFrame* frames, jint event_type, jmethodID event) {
342
+ frames[0].bci = event_type;
343
+ frames[0].method_id = event;
344
+ return 1;
345
+ }
346
+
347
+ bool Profiler::fillTopFrame(const void* pc, ASGCT_CallFrame* frame) {
348
+ jmethodID method = NULL;
349
+ _jit_lock.lockShared();
350
+
351
+ // Check if PC lies within JVM's compiled code cache
352
+ if (pc >= _jit_min_address && pc < _jit_max_address) {
353
+ if ((method = _java_methods.find(pc)) != NULL) {
354
+ // PC belong to a JIT compiled method
355
+ frame->bci = 0;
356
+ frame->method_id = method;
357
+ } else if ((method = _runtime_stubs.find(pc)) != NULL) {
358
+ // PC belongs to a VM runtime stub
359
+ frame->bci = BCI_NATIVE_FRAME;
360
+ frame->method_id = method;
361
+ }
362
+ }
363
+
364
+ _jit_lock.unlockShared();
365
+ return method != NULL;
366
+ }
367
+
368
+ bool Profiler::addressInCode(const void* pc) {
369
+ // 1. Check if PC lies within JVM's compiled code cache
370
+ // Address in CodeCache is executable if it belongs to a Java method or a runtime stub
371
+ if (pc >= _jit_min_address && pc < _jit_max_address) {
372
+ _jit_lock.lockShared();
373
+ bool valid = _java_methods.find(pc) != NULL || _runtime_stubs.find(pc) != NULL;
374
+ _jit_lock.unlockShared();
375
+ return valid;
376
+ }
377
+
378
+ // 2. Check if PC belongs to executable code of shared libraries
379
+ const int native_lib_count = _native_lib_count;
380
+ for (int i = 0; i < native_lib_count; i++) {
381
+ if (_native_libs[i]->contains(pc)) {
382
+ return true;
383
+ }
384
+ }
385
+
386
+ // This can be some other dynamically generated code, but we don't know it. Better stay safe.
387
+ return false;
388
+ }
389
+
390
+ void Profiler::recordSample(void* ucontext, u64 counter, jint event_type, jmethodID event) {
391
+ int tid = OS::threadId();
392
+
393
+ u64 lock_index = atomicInc(_total_samples) % CONCURRENCY_LEVEL;
394
+ if (!_locks[lock_index].tryLock()) {
395
+ // Too many concurrent signals already
396
+ atomicInc(_failures[-ticks_skipped]);
397
+
398
+ if (event_type == 0) {
399
+ // Need to reset PerfEvents ring buffer, even though we discard the collected trace
400
+ _engine->getNativeTrace(ucontext, tid, NULL, 0, _jit_min_address, _jit_max_address);
401
+ }
402
+ return;
403
+ }
404
+
405
+ atomicInc(_total_counter, counter);
406
+
407
+ ASGCT_CallFrame* frames = _calltrace_buffer[lock_index]->_asgct_frames;
408
+ bool need_java_trace = true;
409
+
410
+ int num_frames = 0;
411
+ if (event_type == 0) {
412
+ num_frames = getNativeTrace(ucontext, frames, tid, &need_java_trace);
413
+ } else if (event != NULL) {
414
+ num_frames = makeEventFrame(frames, event_type, event);
415
+ }
416
+
417
+ if ((_sync_walk || event_type != 0) && _JvmtiEnv_GetStackTrace != NULL) {
418
+ // Events like object allocation happen at known places where it is safe to call JVM TI
419
+ jvmtiFrameInfo* jvmti_frames = _calltrace_buffer[lock_index]->_jvmti_frames;
420
+ num_frames += getJavaTraceJvmti(jvmti_frames + num_frames, frames + num_frames, _max_stack_depth);
421
+ } else if (OS::isSignalSafeTLS() || need_java_trace) {
422
+ num_frames += getJavaTraceAsync(ucontext, frames + num_frames, _max_stack_depth);
423
+ }
424
+
425
+ if (num_frames == 0 || (num_frames == 1 && event != NULL)) {
426
+ num_frames += makeEventFrame(frames + num_frames, BCI_ERROR, (jmethodID)"not_walkable");
427
+ }
428
+
429
+ if (_threads) {
430
+ num_frames += makeEventFrame(frames + num_frames, BCI_THREAD_ID, (jmethodID)(uintptr_t)tid);
431
+ }
432
+
433
+ storeMethod(frames[0].method_id, frames[0].bci, counter);
434
+ int call_trace_id = storeCallTrace(num_frames, frames, counter);
435
+ _jfr.recordExecutionSample(lock_index, tid, call_trace_id);
436
+
437
+ _locks[lock_index].unlock();
438
+ }
439
+
440
+ jboolean JNICALL Profiler::NativeLibraryLoadTrap(JNIEnv* env, jobject self, jstring name, jboolean builtin) {
441
+ jboolean result = _instance._original_NativeLibrary_load(env, self, name, builtin);
442
+ Symbols::parseLibraries(_instance._native_libs, _instance._native_lib_count, MAX_NATIVE_LIBS);
443
+ return result;
444
+ }
445
+
446
+ void Profiler::bindNativeLibraryLoad(NativeLoadLibraryFunc entry) {
447
+ JNIEnv* env = VM::jni();
448
+ jclass NativeLibrary = env->FindClass("java/lang/ClassLoader$NativeLibrary");
449
+
450
+ if (NativeLibrary != NULL) {
451
+ // Find JNI entry for NativeLibrary.load() method
452
+ if (_original_NativeLibrary_load == NULL) {
453
+ if (env->GetMethodID(NativeLibrary, "load0", "(Ljava/lang/String;Z)Z") != NULL) {
454
+ // JDK 9+
455
+ _load_method.name = (char*)"load0";
456
+ _load_method.signature = (char*)"(Ljava/lang/String;Z)Z";
457
+ } else if (env->GetMethodID(NativeLibrary, "load", "(Ljava/lang/String;Z)V") != NULL) {
458
+ // JDK 8
459
+ _load_method.name = (char*)"load";
460
+ _load_method.signature = (char*)"(Ljava/lang/String;Z)V";
461
+ } else {
462
+ // JDK 7
463
+ _load_method.name = (char*)"load";
464
+ _load_method.signature = (char*)"(Ljava/lang/String;)V";
465
+ }
466
+
467
+ char jni_name[64];
468
+ strcpy(jni_name, "Java_java_lang_ClassLoader_00024NativeLibrary_");
469
+ strcat(jni_name, _load_method.name);
470
+ _original_NativeLibrary_load = (NativeLoadLibraryFunc)dlsym(VM::_libjava, jni_name);
471
+ }
472
+
473
+ // Change function pointer for the native method
474
+ if (_original_NativeLibrary_load != NULL) {
475
+ _load_method.fnPtr = (void*)entry;
476
+ env->RegisterNatives(NativeLibrary, &_load_method, 1);
477
+ }
478
+ }
479
+
480
+ env->ExceptionClear();
481
+ }
482
+
483
+ void Profiler::initJvmtiFunctions(NativeCodeCache* libjvm) {
484
+ if (_JvmtiEnv_GetStackTrace == NULL) {
485
+ // Find ThreadLocalStorage::thread() if exists
486
+ if (_ThreadLocalStorage_thread == NULL) {
487
+ _ThreadLocalStorage_thread = (void* (*)()) libjvm->findSymbol("_ZN18ThreadLocalStorage6threadEv");
488
+ }
489
+ // Fallback to ThreadLocalStorage::get_thread_slow()
490
+ if (_ThreadLocalStorage_thread == NULL) {
491
+ _ThreadLocalStorage_thread = (void* (*)()) libjvm->findSymbol("_ZN18ThreadLocalStorage15get_thread_slowEv");
492
+ }
493
+ // Fallback to Thread::current(), e.g. on Zing
494
+ if (_ThreadLocalStorage_thread == NULL) {
495
+ _ThreadLocalStorage_thread = (void* (*)()) libjvm->findSymbol("_ZN6Thread7currentEv");
496
+ }
497
+ // JvmtiEnv::GetStackTrace(JavaThread* java_thread, jint start_depth, jint max_frame_count, jvmtiFrameInfo* frame_buffer, jint* count_ptr)
498
+ if (_ThreadLocalStorage_thread != NULL) {
499
+ _JvmtiEnv_GetStackTrace = (jvmtiError (*)(void*, void*, jint, jint, jvmtiFrameInfo*, jint*))
500
+ libjvm->findSymbol("_ZN8JvmtiEnv13GetStackTraceEP10JavaThreadiiP15_jvmtiFrameInfoPi");
501
+ }
502
+
503
+ if (_JvmtiEnv_GetStackTrace == NULL) {
504
+ fprintf(stderr, "WARNING: Install JVM debug symbols to improve profile accuracy\n");
505
+ }
506
+ }
507
+ }
508
+
509
+ void Profiler::setThreadName(int tid, const char* name) {
510
+ MutexLocker ml(_thread_names_lock);
511
+ _thread_names[tid] = name;
512
+ }
513
+
514
+ void Profiler::updateThreadName(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread) {
515
+ if (_threads && VMThread::available()) {
516
+ VMThread* vm_thread = VMThread::fromJavaThread(jni, thread);
517
+ jvmtiThreadInfo thread_info;
518
+ if (vm_thread != NULL && jvmti->GetThreadInfo(thread, &thread_info) == 0) {
519
+ setThreadName(vm_thread->osThreadId(), thread_info.name);
520
+ jvmti->Deallocate((unsigned char*)thread_info.name);
521
+ }
522
+ }
523
+ }
524
+
525
+ void Profiler::updateAllThreadNames() {
526
+ if (_threads && VMThread::available()) {
527
+ jvmtiEnv* jvmti = VM::jvmti();
528
+ jint thread_count;
529
+ jthread* thread_objects;
530
+ if (jvmti->GetAllThreads(&thread_count, &thread_objects) != 0) {
531
+ return;
532
+ }
533
+
534
+ JNIEnv* jni = VM::jni();
535
+ for (int i = 0; i < thread_count; i++) {
536
+ updateThreadName(jvmti, jni, thread_objects[i]);
537
+ }
538
+
539
+ jvmti->Deallocate((unsigned char*)thread_objects);
540
+ }
541
+ }
542
+
543
+ Engine* Profiler::selectEngine(const char* event_name) {
544
+ if (strcmp(event_name, EVENT_CPU) == 0) {
545
+ return PerfEvents::supported() ? (Engine*)&perf_events : (Engine*)&wall_clock;
546
+ } else if (strcmp(event_name, EVENT_ALLOC) == 0) {
547
+ return &alloc_tracer;
548
+ } else if (strcmp(event_name, EVENT_LOCK) == 0) {
549
+ return &lock_tracer;
550
+ } else if (strcmp(event_name, EVENT_WALL) == 0) {
551
+ return &wall_clock;
552
+ } else if (strcmp(event_name, EVENT_ITIMER) == 0) {
553
+ return &itimer;
554
+ } else {
555
+ return &perf_events;
556
+ }
557
+ }
558
+
559
+ Error Profiler::start(Arguments& args, bool reset) {
560
+ MutexLocker ml(_state_lock);
561
+ if (_state != IDLE) {
562
+ return Error("Profiler already started");
563
+ }
564
+
565
+ if (VM::_asyncGetCallTrace == NULL) {
566
+ return Error("Could not find AsyncGetCallTrace function");
567
+ }
568
+
569
+ bool first_time = _native_lib_count == 0;
570
+ if (first_time || reset) {
571
+ // Reset counters
572
+ _total_samples = 0;
573
+ _total_counter = 0;
574
+ memset(_failures, 0, sizeof(_failures));
575
+ memset(_hashes, 0, sizeof(_hashes));
576
+ memset(_traces, 0, sizeof(_traces));
577
+ memset(_methods, 0, sizeof(_methods));
578
+
579
+ // Index 0 denotes special call trace with no frames
580
+ _hashes[0] = (u64)-1;
581
+
582
+ // Reset frame buffer
583
+ _frame_buffer_index = 0;
584
+ _frame_buffer_overflow = false;
585
+
586
+ // Reset thread names
587
+ {
588
+ MutexLocker ml(_thread_names_lock);
589
+ _thread_names.clear();
590
+ }
591
+ }
592
+
593
+ // (Re-)allocate frames
594
+ if (_frame_buffer_size != args._framebuf) {
595
+ _frame_buffer_size = args._framebuf;
596
+ _frame_buffer = (ASGCT_CallFrame*)realloc(_frame_buffer, _frame_buffer_size * sizeof(ASGCT_CallFrame));
597
+ if (_frame_buffer == NULL) {
598
+ _frame_buffer_size = 0;
599
+ return Error("Not enough memory to allocate frame buffer (try smaller framebuf)");
600
+ }
601
+ }
602
+
603
+ // (Re-)allocate calltrace buffers
604
+ if (_max_stack_depth != args._jstackdepth) {
605
+ _max_stack_depth = args._jstackdepth;
606
+ size_t buffer_size = (_max_stack_depth + MAX_NATIVE_FRAMES + RESERVED_FRAMES) * sizeof(CallTraceBuffer);
607
+
608
+ for (int i = 0; i < CONCURRENCY_LEVEL; i++) {
609
+ free(_calltrace_buffer[i]);
610
+ _calltrace_buffer[i] = (CallTraceBuffer*)malloc(buffer_size);
611
+ if (_calltrace_buffer[i] == NULL) {
612
+ _max_stack_depth = 0;
613
+ return Error("Not enough memory to allocate stack trace buffers (try smaller jstackdepth)");
614
+ }
615
+ }
616
+ }
617
+
618
+ _threads = args._threads && args._output != OUTPUT_JFR;
619
+ _sync_walk = args._sync_walk;
620
+
621
+ Symbols::parseLibraries(_native_libs, _native_lib_count, MAX_NATIVE_LIBS);
622
+ NativeCodeCache* libjvm = jvmLibrary();
623
+ if (libjvm == NULL) {
624
+ return Error("libjvm not found among loaded libraries");
625
+ }
626
+ VMStructs::init(libjvm);
627
+ initJvmtiFunctions(libjvm);
628
+
629
+ if (args._output == OUTPUT_JFR) {
630
+ Error error = _jfr.start(args._file);
631
+ if (error) {
632
+ return error;
633
+ }
634
+ }
635
+
636
+ _engine = selectEngine(args._event);
637
+ Error error = _engine->start(args);
638
+ if (error) {
639
+ _jfr.stop();
640
+ return error;
641
+ }
642
+
643
+ if (_threads) {
644
+ // Thread events might be already enabled by PerfEvents::start
645
+ switchThreadEvents(JVMTI_ENABLE);
646
+ }
647
+
648
+ bindNativeLibraryLoad(NativeLibraryLoadTrap);
649
+
650
+ _state = RUNNING;
651
+ _start_time = time(NULL);
652
+ return Error::OK;
653
+ }
654
+
655
+ Error Profiler::stop() {
656
+ MutexLocker ml(_state_lock);
657
+ if (_state != RUNNING) {
658
+ return Error("Profiler is not active");
659
+ }
660
+
661
+ _engine->stop();
662
+
663
+ // Acquire all spinlocks to avoid race with remaining signals
664
+ for (int i = 0; i < CONCURRENCY_LEVEL; i++) _locks[i].lock();
665
+ _jfr.stop();
666
+ for (int i = 0; i < CONCURRENCY_LEVEL; i++) _locks[i].unlock();
667
+
668
+ bindNativeLibraryLoad(_original_NativeLibrary_load);
669
+
670
+ switchThreadEvents(JVMTI_DISABLE);
671
+ updateAllThreadNames();
672
+
673
+ _state = IDLE;
674
+ return Error::OK;
675
+ }
676
+
677
+ void Profiler::switchThreadEvents(jvmtiEventMode mode) {
678
+ if (_thread_events_state != mode) {
679
+ jvmtiEnv* jvmti = VM::jvmti();
680
+ jvmti->SetEventNotificationMode(mode, JVMTI_EVENT_THREAD_START, NULL);
681
+ jvmti->SetEventNotificationMode(mode, JVMTI_EVENT_THREAD_END, NULL);
682
+ _thread_events_state = mode;
683
+ }
684
+ }
685
+
686
+ void Profiler::dumpSummary(std::ostream& out) {
687
+ char buf[256];
688
+ snprintf(buf, sizeof(buf),
689
+ "--- Execution profile ---\n"
690
+ "Total samples : %lld\n",
691
+ _total_samples);
692
+ out << buf;
693
+
694
+ double percent = 100.0 / _total_samples;
695
+ for (int i = 1; i < ASGCT_FAILURE_TYPES; i++) {
696
+ const char* err_string = asgctError(-i);
697
+ if (err_string != NULL && _failures[i] > 0) {
698
+ snprintf(buf, sizeof(buf), "%-20s: %lld (%.2f%%)\n", err_string, _failures[i], _failures[i] * percent);
699
+ out << buf;
700
+ }
701
+ }
702
+ out << std::endl;
703
+
704
+ if (_frame_buffer_overflow) {
705
+ out << "Frame buffer overflowed! Consider increasing its size." << std::endl;
706
+ } else {
707
+ double usage = 100.0 * _frame_buffer_index / _frame_buffer_size;
708
+ out << "Frame buffer usage : " << usage << "%" << std::endl;
709
+ }
710
+ out << std::endl;
711
+ }
712
+
713
+ /*
714
+ * Dump stacks in FlameGraph input format:
715
+ *
716
+ * <frame>;<frame>;...;<topmost frame> <count>
717
+ */
718
+ void Profiler::dumpCollapsed(std::ostream& out, Arguments& args) {
719
+ MutexLocker ml(_state_lock);
720
+ if (_state != IDLE || _engine == NULL) return;
721
+
722
+ FrameName fn(args._style, _thread_names_lock, _thread_names);
723
+ u64 unknown = 0;
724
+
725
+ for (int i = 0; i < MAX_CALLTRACES; i++) {
726
+ CallTraceSample& trace = _traces[i];
727
+ if (trace._samples == 0) continue;
728
+
729
+ if (trace._num_frames == 0) {
730
+ unknown += (args._counter == COUNTER_SAMPLES ? trace._samples : trace._counter);
731
+ continue;
732
+ }
733
+
734
+ for (int j = trace._num_frames - 1; j >= 0; j--) {
735
+ const char* frame_name = fn.name(_frame_buffer[trace._start_frame + j]);
736
+ out << frame_name << (j == 0 ? ' ' : ';');
737
+ }
738
+ out << (args._counter == COUNTER_SAMPLES ? trace._samples : trace._counter) << "\n";
739
+ }
740
+
741
+ if (unknown != 0) {
742
+ out << "[frame_buffer_overflow] " << unknown << "\n";
743
+ }
744
+ }
745
+
746
+ void Profiler::dumpFlameGraph(std::ostream& out, Arguments& args, bool tree) {
747
+ MutexLocker ml(_state_lock);
748
+ if (_state != IDLE || _engine == NULL) return;
749
+
750
+ FlameGraph flamegraph(args._title, args._counter, args._width, args._height, args._minwidth, args._reverse);
751
+ FrameName fn(args._style, _thread_names_lock, _thread_names);
752
+
753
+ for (int i = 0; i < MAX_CALLTRACES; i++) {
754
+ CallTraceSample& trace = _traces[i];
755
+ if (trace._samples == 0) continue;
756
+
757
+ u64 samples = (args._counter == COUNTER_SAMPLES ? trace._samples : trace._counter);
758
+
759
+ Trie* f = flamegraph.root();
760
+ if (trace._num_frames == 0) {
761
+ f = f->addChild("[frame_buffer_overflow]", samples);
762
+ } else if (args._reverse) {
763
+ for (int j = 0; j < trace._num_frames; j++) {
764
+ const char* frame_name = fn.name(_frame_buffer[trace._start_frame + j]);
765
+ f = f->addChild(frame_name, samples);
766
+ }
767
+ } else {
768
+ for (int j = trace._num_frames - 1; j >= 0; j--) {
769
+ const char* frame_name = fn.name(_frame_buffer[trace._start_frame + j]);
770
+ f = f->addChild(frame_name, samples);
771
+ }
772
+ }
773
+ f->addLeaf(samples);
774
+ }
775
+
776
+ flamegraph.dump(out, tree);
777
+ }
778
+
779
+ void Profiler::dumpTraces(std::ostream& out, Arguments& args) {
780
+ MutexLocker ml(_state_lock);
781
+ if (_state != IDLE || _engine == NULL) return;
782
+
783
+ FrameName fn(args._style | STYLE_DOTTED, _thread_names_lock, _thread_names);
784
+ double percent = 100.0 / _total_counter;
785
+ char buf[1024];
786
+
787
+ CallTraceSample** traces = new CallTraceSample*[MAX_CALLTRACES];
788
+ for (int i = 0; i < MAX_CALLTRACES; i++) {
789
+ traces[i] = &_traces[i];
790
+ }
791
+ qsort(traces, MAX_CALLTRACES, sizeof(CallTraceSample*), CallTraceSample::comparator);
792
+
793
+ int max_traces = args._dump_traces < MAX_CALLTRACES ? args._dump_traces : MAX_CALLTRACES;
794
+ for (int i = 0; i < max_traces; i++) {
795
+ CallTraceSample* trace = traces[i];
796
+ if (trace->_samples == 0) break;
797
+
798
+ snprintf(buf, sizeof(buf), "--- %lld %s (%.2f%%), %lld sample%s\n",
799
+ trace->_counter, _engine->units(), trace->_counter * percent,
800
+ trace->_samples, trace->_samples == 1 ? "" : "s");
801
+ out << buf;
802
+
803
+ if (trace->_num_frames == 0) {
804
+ out << " [ 0] [frame_buffer_overflow]\n";
805
+ }
806
+
807
+ for (int j = 0; j < trace->_num_frames; j++) {
808
+ const char* frame_name = fn.name(_frame_buffer[trace->_start_frame + j]);
809
+ snprintf(buf, sizeof(buf), " [%2d] %s\n", j, frame_name);
810
+ out << buf;
811
+ }
812
+ out << "\n";
813
+ }
814
+
815
+ delete[] traces;
816
+ }
817
+
818
+ void Profiler::dumpFlat(std::ostream& out, Arguments& args) {
819
+ MutexLocker ml(_state_lock);
820
+ if (_state != IDLE || _engine == NULL) return;
821
+
822
+ FrameName fn(args._style | STYLE_DOTTED, _thread_names_lock, _thread_names);
823
+ double percent = 100.0 / _total_counter;
824
+ char buf[1024];
825
+
826
+ MethodSample** methods = new MethodSample*[MAX_CALLTRACES];
827
+ for (int i = 0; i < MAX_CALLTRACES; i++) {
828
+ methods[i] = &_methods[i];
829
+ }
830
+ qsort(methods, MAX_CALLTRACES, sizeof(MethodSample*), MethodSample::comparator);
831
+
832
+ snprintf(buf, sizeof(buf), "%12s percent samples top\n"
833
+ " ---------- ------- ------- ---\n", _engine->units());
834
+ out << buf;
835
+
836
+ int max_methods = args._dump_flat < MAX_CALLTRACES ? args._dump_flat : MAX_CALLTRACES;
837
+ for (int i = 0; i < max_methods; i++) {
838
+ MethodSample* method = methods[i];
839
+ if (method->_samples == 0) break;
840
+
841
+ const char* frame_name = fn.name(method->_method);
842
+ snprintf(buf, sizeof(buf), "%12lld %6.2f%% %7lld %s\n",
843
+ method->_counter, method->_counter * percent, method->_samples, frame_name);
844
+ out << buf;
845
+ }
846
+
847
+ delete[] methods;
848
+ }
849
+
850
+ void Profiler::runInternal(Arguments& args, std::ostream& out) {
851
+ switch (args._action) {
852
+ case ACTION_START:
853
+ case ACTION_RESUME: {
854
+ Error error = start(args, args._action == ACTION_START);
855
+ if (error) {
856
+ out << error.message() << std::endl;
857
+ } else {
858
+ out << "Started [" << args._event << "] profiling" << std::endl;
859
+ }
860
+ break;
861
+ }
862
+ case ACTION_STOP: {
863
+ Error error = stop();
864
+ if (error) {
865
+ out << error.message() << std::endl;
866
+ } else {
867
+ out << "Stopped profiling after " << uptime() << " seconds. No dump options specified" << std::endl;
868
+ }
869
+ break;
870
+ }
871
+ case ACTION_STATUS: {
872
+ MutexLocker ml(_state_lock);
873
+ if (_state == RUNNING) {
874
+ out << "[" << _engine->name() << "] profiling is running for " << uptime() << " seconds" << std::endl;
875
+ } else {
876
+ out << "Profiler is not active" << std::endl;
877
+ }
878
+ break;
879
+ }
880
+ case ACTION_LIST: {
881
+ out << "Basic events:" << std::endl;
882
+ out << " " << EVENT_CPU << std::endl;
883
+ out << " " << EVENT_ALLOC << std::endl;
884
+ out << " " << EVENT_LOCK << std::endl;
885
+ out << " " << EVENT_WALL << std::endl;
886
+ out << " " << EVENT_ITIMER << std::endl;
887
+
888
+ if (PerfEvents::supported()) {
889
+ out << "Perf events:" << std::endl;
890
+ // The first perf event is "cpu" which is already printed
891
+ for (int event_id = 1; ; event_id++) {
892
+ const char* event_name = PerfEvents::getEventName(event_id);
893
+ if (event_name == NULL) break;
894
+ out << " " << event_name << std::endl;
895
+ }
896
+ }
897
+ break;
898
+ }
899
+ case ACTION_VERSION:
900
+ out << FULL_VERSION_STRING;
901
+ break;
902
+ case ACTION_DUMP:
903
+ stop();
904
+ switch (args._output) {
905
+ case OUTPUT_COLLAPSED:
906
+ dumpCollapsed(out, args);
907
+ break;
908
+ case OUTPUT_FLAMEGRAPH:
909
+ dumpFlameGraph(out, args, false);
910
+ break;
911
+ case OUTPUT_TREE:
912
+ dumpFlameGraph(out, args, true);
913
+ break;
914
+ case OUTPUT_TEXT:
915
+ dumpSummary(out);
916
+ if (args._dump_traces > 0) dumpTraces(out, args);
917
+ if (args._dump_flat > 0) dumpFlat(out, args);
918
+ break;
919
+ default:
920
+ break;
921
+ }
922
+ break;
923
+ default:
924
+ break;
925
+ }
926
+ }
927
+
928
+ void Profiler::run(Arguments& args) {
929
+ if (args._file == NULL || args._output == OUTPUT_JFR) {
930
+ runInternal(args, std::cout);
931
+ } else {
932
+ std::ofstream out(args._file, std::ios::out | std::ios::trunc);
933
+ if (out.is_open()) {
934
+ runInternal(args, out);
935
+ out.close();
936
+ } else {
937
+ std::cerr << "Could not open " << args._file << std::endl;
938
+ }
939
+ }
940
+ }
941
+
942
+ void Profiler::shutdown(Arguments& args) {
943
+ MutexLocker ml(_state_lock);
944
+
945
+ // The last chance to dump profile before VM terminates
946
+ if (_state == RUNNING && args._output != OUTPUT_NONE) {
947
+ args._action = ACTION_DUMP;
948
+ run(args);
949
+ }
950
+
951
+ _state = TERMINATED;
952
+ }