jruby-async-profiler 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (82) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +8 -0
  3. data/.gitmodules +3 -0
  4. data/Gemfile +4 -0
  5. data/README.md +35 -0
  6. data/Rakefile +2 -0
  7. data/bin/console +14 -0
  8. data/bin/setup +8 -0
  9. data/ext/Rakefile +6 -0
  10. data/ext/async-profiler/.gitattributes +1 -0
  11. data/ext/async-profiler/.gitignore +6 -0
  12. data/ext/async-profiler/.travis.yml +11 -0
  13. data/ext/async-profiler/CHANGELOG.md +107 -0
  14. data/ext/async-profiler/JavaHome.class +0 -0
  15. data/ext/async-profiler/LICENSE +201 -0
  16. data/ext/async-profiler/Makefile +66 -0
  17. data/ext/async-profiler/README.md +487 -0
  18. data/ext/async-profiler/demo/SwingSet2.svg +2247 -0
  19. data/ext/async-profiler/docs/cddl1.txt +358 -0
  20. data/ext/async-profiler/profiler.sh +240 -0
  21. data/ext/async-profiler/src/allocTracer.cpp +155 -0
  22. data/ext/async-profiler/src/allocTracer.h +74 -0
  23. data/ext/async-profiler/src/arch.h +69 -0
  24. data/ext/async-profiler/src/arguments.cpp +265 -0
  25. data/ext/async-profiler/src/arguments.h +152 -0
  26. data/ext/async-profiler/src/codeCache.cpp +128 -0
  27. data/ext/async-profiler/src/codeCache.h +99 -0
  28. data/ext/async-profiler/src/engine.cpp +50 -0
  29. data/ext/async-profiler/src/engine.h +38 -0
  30. data/ext/async-profiler/src/flameGraph.cpp +770 -0
  31. data/ext/async-profiler/src/flameGraph.h +118 -0
  32. data/ext/async-profiler/src/flightRecorder.cpp +727 -0
  33. data/ext/async-profiler/src/flightRecorder.h +39 -0
  34. data/ext/async-profiler/src/frameName.cpp +189 -0
  35. data/ext/async-profiler/src/frameName.h +56 -0
  36. data/ext/async-profiler/src/itimer.cpp +49 -0
  37. data/ext/async-profiler/src/itimer.h +43 -0
  38. data/ext/async-profiler/src/jattach/jattach.c +437 -0
  39. data/ext/async-profiler/src/java/one/profiler/AsyncProfiler.java +160 -0
  40. data/ext/async-profiler/src/java/one/profiler/AsyncProfilerMXBean.java +43 -0
  41. data/ext/async-profiler/src/java/one/profiler/Counter.java +25 -0
  42. data/ext/async-profiler/src/java/one/profiler/Events.java +28 -0
  43. data/ext/async-profiler/src/javaApi.cpp +124 -0
  44. data/ext/async-profiler/src/lockTracer.cpp +161 -0
  45. data/ext/async-profiler/src/lockTracer.h +55 -0
  46. data/ext/async-profiler/src/mutex.cpp +33 -0
  47. data/ext/async-profiler/src/mutex.h +49 -0
  48. data/ext/async-profiler/src/os.h +45 -0
  49. data/ext/async-profiler/src/os_linux.cpp +129 -0
  50. data/ext/async-profiler/src/os_macos.cpp +115 -0
  51. data/ext/async-profiler/src/perfEvents.h +60 -0
  52. data/ext/async-profiler/src/perfEvents_linux.cpp +550 -0
  53. data/ext/async-profiler/src/perfEvents_macos.cpp +64 -0
  54. data/ext/async-profiler/src/profiler.cpp +952 -0
  55. data/ext/async-profiler/src/profiler.h +238 -0
  56. data/ext/async-profiler/src/spinLock.h +66 -0
  57. data/ext/async-profiler/src/stackFrame.h +57 -0
  58. data/ext/async-profiler/src/stackFrame_aarch64.cpp +75 -0
  59. data/ext/async-profiler/src/stackFrame_arm.cpp +58 -0
  60. data/ext/async-profiler/src/stackFrame_i386.cpp +82 -0
  61. data/ext/async-profiler/src/stackFrame_x64.cpp +113 -0
  62. data/ext/async-profiler/src/symbols.h +37 -0
  63. data/ext/async-profiler/src/symbols_linux.cpp +354 -0
  64. data/ext/async-profiler/src/symbols_macos.cpp +156 -0
  65. data/ext/async-profiler/src/vmEntry.cpp +173 -0
  66. data/ext/async-profiler/src/vmEntry.h +105 -0
  67. data/ext/async-profiler/src/vmStructs.cpp +104 -0
  68. data/ext/async-profiler/src/vmStructs.h +112 -0
  69. data/ext/async-profiler/src/wallClock.cpp +96 -0
  70. data/ext/async-profiler/src/wallClock.h +56 -0
  71. data/ext/async-profiler/test/AllocatingTarget.java +26 -0
  72. data/ext/async-profiler/test/LoadLibraryTest.java +21 -0
  73. data/ext/async-profiler/test/Target.java +31 -0
  74. data/ext/async-profiler/test/ThreadsTarget.java +35 -0
  75. data/ext/async-profiler/test/alloc-smoke-test.sh +36 -0
  76. data/ext/async-profiler/test/load-library-test.sh +35 -0
  77. data/ext/async-profiler/test/smoke-test.sh +37 -0
  78. data/ext/async-profiler/test/thread-smoke-test.sh +32 -0
  79. data/jruby-async-profiler.gemspec +32 -0
  80. data/lib/jruby/async/profiler.rb +10 -0
  81. data/lib/jruby/async/profiler/version.rb +7 -0
  82. metadata +155 -0
@@ -0,0 +1,64 @@
1
+ /*
2
+ * Copyright 2017 Andrei Pangin
3
+ *
4
+ * Licensed under the Apache License, Version 2.0 (the "License");
5
+ * you may not use this file except in compliance with the License.
6
+ * You may obtain a copy of the License at
7
+ *
8
+ * http://www.apache.org/licenses/LICENSE-2.0
9
+ *
10
+ * Unless required by applicable law or agreed to in writing, software
11
+ * distributed under the License is distributed on an "AS IS" BASIS,
12
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ * See the License for the specific language governing permissions and
14
+ * limitations under the License.
15
+ */
16
+
17
+ #ifdef __APPLE__
18
+
19
+ #include "perfEvents.h"
20
+
21
+
22
+ int PerfEvents::_max_events;
23
+ PerfEvent* PerfEvents::_events;
24
+ PerfEventType* PerfEvents::_event_type;
25
+ long PerfEvents::_interval;
26
+ Ring PerfEvents::_ring;
27
+ bool PerfEvents::_print_extended_warning;
28
+
29
+
30
+ bool PerfEvents::createForThread(int tid) { return false; }
31
+ void PerfEvents::destroyForThread(int tid) {}
32
+ void PerfEvents::signalHandler(int signo, siginfo_t* siginfo, void* ucontext) {}
33
+
34
+ const char* PerfEvents::units() {
35
+ return "ns";
36
+ }
37
+
38
+ Error PerfEvents::start(Arguments& args) {
39
+ return Error("PerfEvents are unsupported on macOS");
40
+ }
41
+
42
+ void PerfEvents::stop() {
43
+ }
44
+
45
+ void PerfEvents::onThreadStart() {
46
+ }
47
+
48
+ void PerfEvents::onThreadEnd() {
49
+ }
50
+
51
+ int PerfEvents::getNativeTrace(void* ucontext, int tid, const void** callchain, int max_depth,
52
+ const void* jit_min_address, const void* jit_max_address) {
53
+ return 0;
54
+ }
55
+
56
+ bool PerfEvents::supported() {
57
+ return false;
58
+ }
59
+
60
+ const char* PerfEvents::getEventName(int event_id) {
61
+ return NULL;
62
+ }
63
+
64
+ #endif // __APPLE__
@@ -0,0 +1,952 @@
1
+ /*
2
+ * Copyright 2016 Andrei Pangin
3
+ *
4
+ * Licensed under the Apache License, Version 2.0 (the "License");
5
+ * you may not use this file except in compliance with the License.
6
+ * You may obtain a copy of the License at
7
+ *
8
+ * http://www.apache.org/licenses/LICENSE-2.0
9
+ *
10
+ * Unless required by applicable law or agreed to in writing, software
11
+ * distributed under the License is distributed on an "AS IS" BASIS,
12
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ * See the License for the specific language governing permissions and
14
+ * limitations under the License.
15
+ */
16
+
17
+ #include <fstream>
18
+ #include <dlfcn.h>
19
+ #include <unistd.h>
20
+ #include <stdint.h>
21
+ #include <stdio.h>
22
+ #include <stdlib.h>
23
+ #include <string.h>
24
+ #include <sys/param.h>
25
+ #include "profiler.h"
26
+ #include "perfEvents.h"
27
+ #include "allocTracer.h"
28
+ #include "lockTracer.h"
29
+ #include "wallClock.h"
30
+ #include "itimer.h"
31
+ #include "flameGraph.h"
32
+ #include "flightRecorder.h"
33
+ #include "frameName.h"
34
+ #include "os.h"
35
+ #include "stackFrame.h"
36
+ #include "symbols.h"
37
+ #include "vmStructs.h"
38
+
39
+
40
+ Profiler Profiler::_instance;
41
+
42
+ static PerfEvents perf_events;
43
+ static AllocTracer alloc_tracer;
44
+ static LockTracer lock_tracer;
45
+ static WallClock wall_clock;
46
+ static ITimer itimer;
47
+
48
+
49
+ u64 Profiler::hashCallTrace(int num_frames, ASGCT_CallFrame* frames) {
50
+ const u64 M = 0xc6a4a7935bd1e995ULL;
51
+ const int R = 47;
52
+
53
+ u64 h = num_frames * M;
54
+
55
+ for (int i = 0; i < num_frames; i++) {
56
+ u64 k = (u64)frames[i].method_id;
57
+ k *= M;
58
+ k ^= k >> R;
59
+ k *= M;
60
+ h ^= k;
61
+ h *= M;
62
+ }
63
+
64
+ h ^= h >> R;
65
+ h *= M;
66
+ h ^= h >> R;
67
+
68
+ return h;
69
+ }
70
+
71
+ int Profiler::storeCallTrace(int num_frames, ASGCT_CallFrame* frames, u64 counter) {
72
+ u64 hash = hashCallTrace(num_frames, frames);
73
+ int bucket = (int)(hash % MAX_CALLTRACES);
74
+ int i = bucket;
75
+
76
+ while (_hashes[i] != hash) {
77
+ if (_hashes[i] == 0) {
78
+ if (__sync_bool_compare_and_swap(&_hashes[i], 0, hash)) {
79
+ copyToFrameBuffer(num_frames, frames, &_traces[i]);
80
+ break;
81
+ }
82
+ continue;
83
+ }
84
+
85
+ if (++i == MAX_CALLTRACES) i = 0; // move to next slot
86
+ if (i == bucket) return 0; // the table is full
87
+ }
88
+
89
+ // CallTrace hash found => atomically increment counter
90
+ atomicInc(_traces[i]._samples);
91
+ atomicInc(_traces[i]._counter, counter);
92
+ return i;
93
+ }
94
+
95
+ void Profiler::copyToFrameBuffer(int num_frames, ASGCT_CallFrame* frames, CallTraceSample* trace) {
96
+ // Atomically reserve space in frame buffer
97
+ int start_frame;
98
+ do {
99
+ start_frame = _frame_buffer_index;
100
+ if (start_frame + num_frames > _frame_buffer_size) {
101
+ _frame_buffer_overflow = true; // not enough space to store full trace
102
+ return;
103
+ }
104
+ } while (!__sync_bool_compare_and_swap(&_frame_buffer_index, start_frame, start_frame + num_frames));
105
+
106
+ trace->_start_frame = start_frame;
107
+ trace->_num_frames = num_frames;
108
+
109
+ for (int i = 0; i < num_frames; i++) {
110
+ _frame_buffer[start_frame++] = frames[i];
111
+ }
112
+ }
113
+
114
+ u64 Profiler::hashMethod(jmethodID method) {
115
+ const u64 M = 0xc6a4a7935bd1e995ULL;
116
+ const int R = 17;
117
+
118
+ u64 h = (u64)method;
119
+
120
+ h ^= h >> R;
121
+ h *= M;
122
+ h ^= h >> R;
123
+
124
+ return h;
125
+ }
126
+
127
+ void Profiler::storeMethod(jmethodID method, jint bci, u64 counter) {
128
+ u64 hash = hashMethod(method);
129
+ int bucket = (int)(hash % MAX_CALLTRACES);
130
+ int i = bucket;
131
+
132
+ while (_methods[i]._method.method_id != method) {
133
+ if (_methods[i]._method.method_id == NULL) {
134
+ if (__sync_bool_compare_and_swap(&_methods[i]._method.method_id, NULL, method)) {
135
+ _methods[i]._method.bci = bci;
136
+ break;
137
+ }
138
+ continue;
139
+ }
140
+
141
+ if (++i == MAX_CALLTRACES) i = 0; // move to next slot
142
+ if (i == bucket) return; // the table is full
143
+ }
144
+
145
+ // Method found => atomically increment counter
146
+ atomicInc(_methods[i]._samples);
147
+ atomicInc(_methods[i]._counter, counter);
148
+ }
149
+
150
+ void Profiler::addJavaMethod(const void* address, int length, jmethodID method) {
151
+ _jit_lock.lock();
152
+ _java_methods.add(address, length, method);
153
+ updateJitRange(address, (const char*)address + length);
154
+ _jit_lock.unlock();
155
+ }
156
+
157
+ void Profiler::removeJavaMethod(const void* address, jmethodID method) {
158
+ _jit_lock.lock();
159
+ _java_methods.remove(address, method);
160
+ _jit_lock.unlock();
161
+ }
162
+
163
+ void Profiler::addRuntimeStub(const void* address, int length, const char* name) {
164
+ _jit_lock.lock();
165
+ _runtime_stubs.add(address, length, name);
166
+ updateJitRange(address, (const char*)address + length);
167
+ _jit_lock.unlock();
168
+ }
169
+
170
+ void Profiler::updateJitRange(const void* min_address, const void* max_address) {
171
+ if (min_address < _jit_min_address) _jit_min_address = min_address;
172
+ if (max_address > _jit_max_address) _jit_max_address = max_address;
173
+ }
174
+
175
+ const char* Profiler::asgctError(int code) {
176
+ switch (code) {
177
+ case ticks_no_Java_frame:
178
+ case ticks_unknown_not_Java:
179
+ case ticks_not_walkable_not_Java:
180
+ // Not in Java context at all; this is not an error
181
+ return NULL;
182
+ case ticks_GC_active:
183
+ return "GC_active";
184
+ case ticks_unknown_Java:
185
+ return "unknown_Java";
186
+ case ticks_not_walkable_Java:
187
+ return "not_walkable_Java";
188
+ case ticks_thread_exit:
189
+ return "thread_exit";
190
+ case ticks_deopt:
191
+ return "deoptimization";
192
+ case ticks_safepoint:
193
+ return "safepoint";
194
+ case ticks_skipped:
195
+ return "skipped";
196
+ default:
197
+ // Should not happen
198
+ return "unexpected_state";
199
+ }
200
+ }
201
+
202
+ NativeCodeCache* Profiler::jvmLibrary() {
203
+ const void* asyncGetCallTraceAddr = (const void*)VM::_asyncGetCallTrace;
204
+ const int native_lib_count = _native_lib_count;
205
+ for (int i = 0; i < native_lib_count; i++) {
206
+ if (_native_libs[i]->contains(asyncGetCallTraceAddr)) {
207
+ return _native_libs[i];
208
+ }
209
+ }
210
+ return NULL;
211
+ }
212
+
213
+ const void* Profiler::findSymbol(const char* name) {
214
+ const int native_lib_count = _native_lib_count;
215
+ for (int i = 0; i < native_lib_count; i++) {
216
+ const void* address = _native_libs[i]->findSymbol(name);
217
+ if (address != NULL) {
218
+ return address;
219
+ }
220
+ }
221
+ return NULL;
222
+ }
223
+
224
+ const char* Profiler::findNativeMethod(const void* address) {
225
+ const int native_lib_count = _native_lib_count;
226
+ for (int i = 0; i < native_lib_count; i++) {
227
+ if (_native_libs[i]->contains(address)) {
228
+ return _native_libs[i]->binarySearch(address);
229
+ }
230
+ }
231
+ return NULL;
232
+ }
233
+
234
+ int Profiler::getNativeTrace(void* ucontext, ASGCT_CallFrame* frames, int tid, bool* stopped_at_java_frame) {
235
+ const void* native_callchain[MAX_NATIVE_FRAMES];
236
+ int native_frames = _engine->getNativeTrace(ucontext, tid, native_callchain, MAX_NATIVE_FRAMES,
237
+ _jit_min_address, _jit_max_address);
238
+
239
+ *stopped_at_java_frame = false;
240
+ if (native_frames > 0) {
241
+ const void* last_pc = native_callchain[native_frames - 1];
242
+ if (last_pc >= _jit_min_address && last_pc < _jit_max_address) {
243
+ *stopped_at_java_frame = true;
244
+ native_frames--;
245
+ }
246
+ }
247
+
248
+ for (int i = 0; i < native_frames; i++) {
249
+ frames[i].bci = BCI_NATIVE_FRAME;
250
+ frames[i].method_id = (jmethodID)findNativeMethod(native_callchain[i]);
251
+ }
252
+
253
+ return native_frames;
254
+ }
255
+
256
+ int Profiler::getJavaTraceAsync(void* ucontext, ASGCT_CallFrame* frames, int max_depth) {
257
+ JNIEnv* jni = VM::jni();
258
+ if (jni == NULL) {
259
+ // Not a Java thread
260
+ return 0;
261
+ }
262
+
263
+ ASGCT_CallTrace trace = {jni, 0, frames};
264
+ VM::_asyncGetCallTrace(&trace, max_depth, ucontext);
265
+
266
+ #ifndef SAFE_MODE
267
+ if (trace.num_frames == ticks_unknown_Java) {
268
+ // If current Java stack is not walkable (e.g. the top frame is not fully constructed),
269
+ // try to manually pop the top frame off, hoping that the previous frame is walkable.
270
+ // This is a temporary workaround for AsyncGetCallTrace issues,
271
+ // see https://bugs.openjdk.java.net/browse/JDK-8178287
272
+ StackFrame top_frame(ucontext);
273
+ uintptr_t pc = top_frame.pc(),
274
+ sp = top_frame.sp(),
275
+ fp = top_frame.fp();
276
+
277
+ // Guess top method by PC and insert it manually into the call trace
278
+ bool is_entry_frame = false;
279
+ if (fillTopFrame((const void*)pc, trace.frames)) {
280
+ is_entry_frame = trace.frames->bci == BCI_NATIVE_FRAME &&
281
+ strcmp((const char*)trace.frames->method_id, "call_stub") == 0;
282
+ trace.frames++;
283
+ max_depth--;
284
+ }
285
+
286
+ if (top_frame.pop(is_entry_frame)) {
287
+ // Retry with the fixed context, but only if PC looks reasonable,
288
+ // otherwise AsyncGetCallTrace may crash
289
+ if (addressInCode((const void*)top_frame.pc())) {
290
+ VM::_asyncGetCallTrace(&trace, max_depth, ucontext);
291
+ }
292
+ top_frame.restore(pc, sp, fp);
293
+
294
+ if (trace.num_frames > 0) {
295
+ return trace.num_frames + (trace.frames - frames);
296
+ }
297
+
298
+ // Restore previous context
299
+ trace.num_frames = ticks_unknown_Java;
300
+ }
301
+ } else if (trace.num_frames == ticks_GC_active && VM::is_hotspot() && _JvmtiEnv_GetStackTrace != NULL) {
302
+ // While GC is running Java threads are known to be at safepoint
303
+ return getJavaTraceJvmti((jvmtiFrameInfo*)frames, frames, max_depth);
304
+ }
305
+ #endif // SAFE_MODE
306
+
307
+ if (trace.num_frames > 0) {
308
+ return trace.num_frames;
309
+ }
310
+
311
+ const char* err_string = asgctError(trace.num_frames);
312
+ if (err_string == NULL) {
313
+ // No Java stack, because thread is not in Java context
314
+ return 0;
315
+ }
316
+
317
+ atomicInc(_failures[-trace.num_frames]);
318
+ frames[0].bci = BCI_ERROR;
319
+ frames[0].method_id = (jmethodID)err_string;
320
+ return 1;
321
+ }
322
+
323
+ int Profiler::getJavaTraceJvmti(jvmtiFrameInfo* jvmti_frames, ASGCT_CallFrame* frames, int max_depth) {
324
+ // We cannot call pure JVM TI here, because it assumes _thread_in_native state,
325
+ // but allocation events happen in _thread_in_vm state,
326
+ // see https://github.com/jvm-profiling-tools/async-profiler/issues/64
327
+ void* thread = _ThreadLocalStorage_thread();
328
+ int num_frames;
329
+ if (_JvmtiEnv_GetStackTrace(NULL, thread, 0, max_depth, jvmti_frames, &num_frames) == 0 && num_frames > 0) {
330
+ // Profiler expects stack trace in AsyncGetCallTrace format; convert it now
331
+ for (int i = 0; i < num_frames; i++) {
332
+ frames[i].method_id = jvmti_frames[i].method;
333
+ frames[i].bci = 0;
334
+ }
335
+ return num_frames;
336
+ }
337
+
338
+ return 0;
339
+ }
340
+
341
+ int Profiler::makeEventFrame(ASGCT_CallFrame* frames, jint event_type, jmethodID event) {
342
+ frames[0].bci = event_type;
343
+ frames[0].method_id = event;
344
+ return 1;
345
+ }
346
+
347
+ bool Profiler::fillTopFrame(const void* pc, ASGCT_CallFrame* frame) {
348
+ jmethodID method = NULL;
349
+ _jit_lock.lockShared();
350
+
351
+ // Check if PC lies within JVM's compiled code cache
352
+ if (pc >= _jit_min_address && pc < _jit_max_address) {
353
+ if ((method = _java_methods.find(pc)) != NULL) {
354
+ // PC belong to a JIT compiled method
355
+ frame->bci = 0;
356
+ frame->method_id = method;
357
+ } else if ((method = _runtime_stubs.find(pc)) != NULL) {
358
+ // PC belongs to a VM runtime stub
359
+ frame->bci = BCI_NATIVE_FRAME;
360
+ frame->method_id = method;
361
+ }
362
+ }
363
+
364
+ _jit_lock.unlockShared();
365
+ return method != NULL;
366
+ }
367
+
368
+ bool Profiler::addressInCode(const void* pc) {
369
+ // 1. Check if PC lies within JVM's compiled code cache
370
+ // Address in CodeCache is executable if it belongs to a Java method or a runtime stub
371
+ if (pc >= _jit_min_address && pc < _jit_max_address) {
372
+ _jit_lock.lockShared();
373
+ bool valid = _java_methods.find(pc) != NULL || _runtime_stubs.find(pc) != NULL;
374
+ _jit_lock.unlockShared();
375
+ return valid;
376
+ }
377
+
378
+ // 2. Check if PC belongs to executable code of shared libraries
379
+ const int native_lib_count = _native_lib_count;
380
+ for (int i = 0; i < native_lib_count; i++) {
381
+ if (_native_libs[i]->contains(pc)) {
382
+ return true;
383
+ }
384
+ }
385
+
386
+ // This can be some other dynamically generated code, but we don't know it. Better stay safe.
387
+ return false;
388
+ }
389
+
390
+ void Profiler::recordSample(void* ucontext, u64 counter, jint event_type, jmethodID event) {
391
+ int tid = OS::threadId();
392
+
393
+ u64 lock_index = atomicInc(_total_samples) % CONCURRENCY_LEVEL;
394
+ if (!_locks[lock_index].tryLock()) {
395
+ // Too many concurrent signals already
396
+ atomicInc(_failures[-ticks_skipped]);
397
+
398
+ if (event_type == 0) {
399
+ // Need to reset PerfEvents ring buffer, even though we discard the collected trace
400
+ _engine->getNativeTrace(ucontext, tid, NULL, 0, _jit_min_address, _jit_max_address);
401
+ }
402
+ return;
403
+ }
404
+
405
+ atomicInc(_total_counter, counter);
406
+
407
+ ASGCT_CallFrame* frames = _calltrace_buffer[lock_index]->_asgct_frames;
408
+ bool need_java_trace = true;
409
+
410
+ int num_frames = 0;
411
+ if (event_type == 0) {
412
+ num_frames = getNativeTrace(ucontext, frames, tid, &need_java_trace);
413
+ } else if (event != NULL) {
414
+ num_frames = makeEventFrame(frames, event_type, event);
415
+ }
416
+
417
+ if ((_sync_walk || event_type != 0) && _JvmtiEnv_GetStackTrace != NULL) {
418
+ // Events like object allocation happen at known places where it is safe to call JVM TI
419
+ jvmtiFrameInfo* jvmti_frames = _calltrace_buffer[lock_index]->_jvmti_frames;
420
+ num_frames += getJavaTraceJvmti(jvmti_frames + num_frames, frames + num_frames, _max_stack_depth);
421
+ } else if (OS::isSignalSafeTLS() || need_java_trace) {
422
+ num_frames += getJavaTraceAsync(ucontext, frames + num_frames, _max_stack_depth);
423
+ }
424
+
425
+ if (num_frames == 0 || (num_frames == 1 && event != NULL)) {
426
+ num_frames += makeEventFrame(frames + num_frames, BCI_ERROR, (jmethodID)"not_walkable");
427
+ }
428
+
429
+ if (_threads) {
430
+ num_frames += makeEventFrame(frames + num_frames, BCI_THREAD_ID, (jmethodID)(uintptr_t)tid);
431
+ }
432
+
433
+ storeMethod(frames[0].method_id, frames[0].bci, counter);
434
+ int call_trace_id = storeCallTrace(num_frames, frames, counter);
435
+ _jfr.recordExecutionSample(lock_index, tid, call_trace_id);
436
+
437
+ _locks[lock_index].unlock();
438
+ }
439
+
440
+ jboolean JNICALL Profiler::NativeLibraryLoadTrap(JNIEnv* env, jobject self, jstring name, jboolean builtin) {
441
+ jboolean result = _instance._original_NativeLibrary_load(env, self, name, builtin);
442
+ Symbols::parseLibraries(_instance._native_libs, _instance._native_lib_count, MAX_NATIVE_LIBS);
443
+ return result;
444
+ }
445
+
446
+ void Profiler::bindNativeLibraryLoad(NativeLoadLibraryFunc entry) {
447
+ JNIEnv* env = VM::jni();
448
+ jclass NativeLibrary = env->FindClass("java/lang/ClassLoader$NativeLibrary");
449
+
450
+ if (NativeLibrary != NULL) {
451
+ // Find JNI entry for NativeLibrary.load() method
452
+ if (_original_NativeLibrary_load == NULL) {
453
+ if (env->GetMethodID(NativeLibrary, "load0", "(Ljava/lang/String;Z)Z") != NULL) {
454
+ // JDK 9+
455
+ _load_method.name = (char*)"load0";
456
+ _load_method.signature = (char*)"(Ljava/lang/String;Z)Z";
457
+ } else if (env->GetMethodID(NativeLibrary, "load", "(Ljava/lang/String;Z)V") != NULL) {
458
+ // JDK 8
459
+ _load_method.name = (char*)"load";
460
+ _load_method.signature = (char*)"(Ljava/lang/String;Z)V";
461
+ } else {
462
+ // JDK 7
463
+ _load_method.name = (char*)"load";
464
+ _load_method.signature = (char*)"(Ljava/lang/String;)V";
465
+ }
466
+
467
+ char jni_name[64];
468
+ strcpy(jni_name, "Java_java_lang_ClassLoader_00024NativeLibrary_");
469
+ strcat(jni_name, _load_method.name);
470
+ _original_NativeLibrary_load = (NativeLoadLibraryFunc)dlsym(VM::_libjava, jni_name);
471
+ }
472
+
473
+ // Change function pointer for the native method
474
+ if (_original_NativeLibrary_load != NULL) {
475
+ _load_method.fnPtr = (void*)entry;
476
+ env->RegisterNatives(NativeLibrary, &_load_method, 1);
477
+ }
478
+ }
479
+
480
+ env->ExceptionClear();
481
+ }
482
+
483
+ void Profiler::initJvmtiFunctions(NativeCodeCache* libjvm) {
484
+ if (_JvmtiEnv_GetStackTrace == NULL) {
485
+ // Find ThreadLocalStorage::thread() if exists
486
+ if (_ThreadLocalStorage_thread == NULL) {
487
+ _ThreadLocalStorage_thread = (void* (*)()) libjvm->findSymbol("_ZN18ThreadLocalStorage6threadEv");
488
+ }
489
+ // Fallback to ThreadLocalStorage::get_thread_slow()
490
+ if (_ThreadLocalStorage_thread == NULL) {
491
+ _ThreadLocalStorage_thread = (void* (*)()) libjvm->findSymbol("_ZN18ThreadLocalStorage15get_thread_slowEv");
492
+ }
493
+ // Fallback to Thread::current(), e.g. on Zing
494
+ if (_ThreadLocalStorage_thread == NULL) {
495
+ _ThreadLocalStorage_thread = (void* (*)()) libjvm->findSymbol("_ZN6Thread7currentEv");
496
+ }
497
+ // JvmtiEnv::GetStackTrace(JavaThread* java_thread, jint start_depth, jint max_frame_count, jvmtiFrameInfo* frame_buffer, jint* count_ptr)
498
+ if (_ThreadLocalStorage_thread != NULL) {
499
+ _JvmtiEnv_GetStackTrace = (jvmtiError (*)(void*, void*, jint, jint, jvmtiFrameInfo*, jint*))
500
+ libjvm->findSymbol("_ZN8JvmtiEnv13GetStackTraceEP10JavaThreadiiP15_jvmtiFrameInfoPi");
501
+ }
502
+
503
+ if (_JvmtiEnv_GetStackTrace == NULL) {
504
+ fprintf(stderr, "WARNING: Install JVM debug symbols to improve profile accuracy\n");
505
+ }
506
+ }
507
+ }
508
+
509
+ void Profiler::setThreadName(int tid, const char* name) {
510
+ MutexLocker ml(_thread_names_lock);
511
+ _thread_names[tid] = name;
512
+ }
513
+
514
+ void Profiler::updateThreadName(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread) {
515
+ if (_threads && VMThread::available()) {
516
+ VMThread* vm_thread = VMThread::fromJavaThread(jni, thread);
517
+ jvmtiThreadInfo thread_info;
518
+ if (vm_thread != NULL && jvmti->GetThreadInfo(thread, &thread_info) == 0) {
519
+ setThreadName(vm_thread->osThreadId(), thread_info.name);
520
+ jvmti->Deallocate((unsigned char*)thread_info.name);
521
+ }
522
+ }
523
+ }
524
+
525
+ void Profiler::updateAllThreadNames() {
526
+ if (_threads && VMThread::available()) {
527
+ jvmtiEnv* jvmti = VM::jvmti();
528
+ jint thread_count;
529
+ jthread* thread_objects;
530
+ if (jvmti->GetAllThreads(&thread_count, &thread_objects) != 0) {
531
+ return;
532
+ }
533
+
534
+ JNIEnv* jni = VM::jni();
535
+ for (int i = 0; i < thread_count; i++) {
536
+ updateThreadName(jvmti, jni, thread_objects[i]);
537
+ }
538
+
539
+ jvmti->Deallocate((unsigned char*)thread_objects);
540
+ }
541
+ }
542
+
543
+ Engine* Profiler::selectEngine(const char* event_name) {
544
+ if (strcmp(event_name, EVENT_CPU) == 0) {
545
+ return PerfEvents::supported() ? (Engine*)&perf_events : (Engine*)&wall_clock;
546
+ } else if (strcmp(event_name, EVENT_ALLOC) == 0) {
547
+ return &alloc_tracer;
548
+ } else if (strcmp(event_name, EVENT_LOCK) == 0) {
549
+ return &lock_tracer;
550
+ } else if (strcmp(event_name, EVENT_WALL) == 0) {
551
+ return &wall_clock;
552
+ } else if (strcmp(event_name, EVENT_ITIMER) == 0) {
553
+ return &itimer;
554
+ } else {
555
+ return &perf_events;
556
+ }
557
+ }
558
+
559
+ Error Profiler::start(Arguments& args, bool reset) {
560
+ MutexLocker ml(_state_lock);
561
+ if (_state != IDLE) {
562
+ return Error("Profiler already started");
563
+ }
564
+
565
+ if (VM::_asyncGetCallTrace == NULL) {
566
+ return Error("Could not find AsyncGetCallTrace function");
567
+ }
568
+
569
+ bool first_time = _native_lib_count == 0;
570
+ if (first_time || reset) {
571
+ // Reset counters
572
+ _total_samples = 0;
573
+ _total_counter = 0;
574
+ memset(_failures, 0, sizeof(_failures));
575
+ memset(_hashes, 0, sizeof(_hashes));
576
+ memset(_traces, 0, sizeof(_traces));
577
+ memset(_methods, 0, sizeof(_methods));
578
+
579
+ // Index 0 denotes special call trace with no frames
580
+ _hashes[0] = (u64)-1;
581
+
582
+ // Reset frame buffer
583
+ _frame_buffer_index = 0;
584
+ _frame_buffer_overflow = false;
585
+
586
+ // Reset thread names
587
+ {
588
+ MutexLocker ml(_thread_names_lock);
589
+ _thread_names.clear();
590
+ }
591
+ }
592
+
593
+ // (Re-)allocate frames
594
+ if (_frame_buffer_size != args._framebuf) {
595
+ _frame_buffer_size = args._framebuf;
596
+ _frame_buffer = (ASGCT_CallFrame*)realloc(_frame_buffer, _frame_buffer_size * sizeof(ASGCT_CallFrame));
597
+ if (_frame_buffer == NULL) {
598
+ _frame_buffer_size = 0;
599
+ return Error("Not enough memory to allocate frame buffer (try smaller framebuf)");
600
+ }
601
+ }
602
+
603
+ // (Re-)allocate calltrace buffers
604
+ if (_max_stack_depth != args._jstackdepth) {
605
+ _max_stack_depth = args._jstackdepth;
606
+ size_t buffer_size = (_max_stack_depth + MAX_NATIVE_FRAMES + RESERVED_FRAMES) * sizeof(CallTraceBuffer);
607
+
608
+ for (int i = 0; i < CONCURRENCY_LEVEL; i++) {
609
+ free(_calltrace_buffer[i]);
610
+ _calltrace_buffer[i] = (CallTraceBuffer*)malloc(buffer_size);
611
+ if (_calltrace_buffer[i] == NULL) {
612
+ _max_stack_depth = 0;
613
+ return Error("Not enough memory to allocate stack trace buffers (try smaller jstackdepth)");
614
+ }
615
+ }
616
+ }
617
+
618
+ _threads = args._threads && args._output != OUTPUT_JFR;
619
+ _sync_walk = args._sync_walk;
620
+
621
+ Symbols::parseLibraries(_native_libs, _native_lib_count, MAX_NATIVE_LIBS);
622
+ NativeCodeCache* libjvm = jvmLibrary();
623
+ if (libjvm == NULL) {
624
+ return Error("libjvm not found among loaded libraries");
625
+ }
626
+ VMStructs::init(libjvm);
627
+ initJvmtiFunctions(libjvm);
628
+
629
+ if (args._output == OUTPUT_JFR) {
630
+ Error error = _jfr.start(args._file);
631
+ if (error) {
632
+ return error;
633
+ }
634
+ }
635
+
636
+ _engine = selectEngine(args._event);
637
+ Error error = _engine->start(args);
638
+ if (error) {
639
+ _jfr.stop();
640
+ return error;
641
+ }
642
+
643
+ if (_threads) {
644
+ // Thread events might be already enabled by PerfEvents::start
645
+ switchThreadEvents(JVMTI_ENABLE);
646
+ }
647
+
648
+ bindNativeLibraryLoad(NativeLibraryLoadTrap);
649
+
650
+ _state = RUNNING;
651
+ _start_time = time(NULL);
652
+ return Error::OK;
653
+ }
654
+
655
+ Error Profiler::stop() {
656
+ MutexLocker ml(_state_lock);
657
+ if (_state != RUNNING) {
658
+ return Error("Profiler is not active");
659
+ }
660
+
661
+ _engine->stop();
662
+
663
+ // Acquire all spinlocks to avoid race with remaining signals
664
+ for (int i = 0; i < CONCURRENCY_LEVEL; i++) _locks[i].lock();
665
+ _jfr.stop();
666
+ for (int i = 0; i < CONCURRENCY_LEVEL; i++) _locks[i].unlock();
667
+
668
+ bindNativeLibraryLoad(_original_NativeLibrary_load);
669
+
670
+ switchThreadEvents(JVMTI_DISABLE);
671
+ updateAllThreadNames();
672
+
673
+ _state = IDLE;
674
+ return Error::OK;
675
+ }
676
+
677
+ void Profiler::switchThreadEvents(jvmtiEventMode mode) {
678
+ if (_thread_events_state != mode) {
679
+ jvmtiEnv* jvmti = VM::jvmti();
680
+ jvmti->SetEventNotificationMode(mode, JVMTI_EVENT_THREAD_START, NULL);
681
+ jvmti->SetEventNotificationMode(mode, JVMTI_EVENT_THREAD_END, NULL);
682
+ _thread_events_state = mode;
683
+ }
684
+ }
685
+
686
+ void Profiler::dumpSummary(std::ostream& out) {
687
+ char buf[256];
688
+ snprintf(buf, sizeof(buf),
689
+ "--- Execution profile ---\n"
690
+ "Total samples : %lld\n",
691
+ _total_samples);
692
+ out << buf;
693
+
694
+ double percent = 100.0 / _total_samples;
695
+ for (int i = 1; i < ASGCT_FAILURE_TYPES; i++) {
696
+ const char* err_string = asgctError(-i);
697
+ if (err_string != NULL && _failures[i] > 0) {
698
+ snprintf(buf, sizeof(buf), "%-20s: %lld (%.2f%%)\n", err_string, _failures[i], _failures[i] * percent);
699
+ out << buf;
700
+ }
701
+ }
702
+ out << std::endl;
703
+
704
+ if (_frame_buffer_overflow) {
705
+ out << "Frame buffer overflowed! Consider increasing its size." << std::endl;
706
+ } else {
707
+ double usage = 100.0 * _frame_buffer_index / _frame_buffer_size;
708
+ out << "Frame buffer usage : " << usage << "%" << std::endl;
709
+ }
710
+ out << std::endl;
711
+ }
712
+
713
+ /*
714
+ * Dump stacks in FlameGraph input format:
715
+ *
716
+ * <frame>;<frame>;...;<topmost frame> <count>
717
+ */
718
+ void Profiler::dumpCollapsed(std::ostream& out, Arguments& args) {
719
+ MutexLocker ml(_state_lock);
720
+ if (_state != IDLE || _engine == NULL) return;
721
+
722
+ FrameName fn(args._style, _thread_names_lock, _thread_names);
723
+ u64 unknown = 0;
724
+
725
+ for (int i = 0; i < MAX_CALLTRACES; i++) {
726
+ CallTraceSample& trace = _traces[i];
727
+ if (trace._samples == 0) continue;
728
+
729
+ if (trace._num_frames == 0) {
730
+ unknown += (args._counter == COUNTER_SAMPLES ? trace._samples : trace._counter);
731
+ continue;
732
+ }
733
+
734
+ for (int j = trace._num_frames - 1; j >= 0; j--) {
735
+ const char* frame_name = fn.name(_frame_buffer[trace._start_frame + j]);
736
+ out << frame_name << (j == 0 ? ' ' : ';');
737
+ }
738
+ out << (args._counter == COUNTER_SAMPLES ? trace._samples : trace._counter) << "\n";
739
+ }
740
+
741
+ if (unknown != 0) {
742
+ out << "[frame_buffer_overflow] " << unknown << "\n";
743
+ }
744
+ }
745
+
746
+ void Profiler::dumpFlameGraph(std::ostream& out, Arguments& args, bool tree) {
747
+ MutexLocker ml(_state_lock);
748
+ if (_state != IDLE || _engine == NULL) return;
749
+
750
+ FlameGraph flamegraph(args._title, args._counter, args._width, args._height, args._minwidth, args._reverse);
751
+ FrameName fn(args._style, _thread_names_lock, _thread_names);
752
+
753
+ for (int i = 0; i < MAX_CALLTRACES; i++) {
754
+ CallTraceSample& trace = _traces[i];
755
+ if (trace._samples == 0) continue;
756
+
757
+ u64 samples = (args._counter == COUNTER_SAMPLES ? trace._samples : trace._counter);
758
+
759
+ Trie* f = flamegraph.root();
760
+ if (trace._num_frames == 0) {
761
+ f = f->addChild("[frame_buffer_overflow]", samples);
762
+ } else if (args._reverse) {
763
+ for (int j = 0; j < trace._num_frames; j++) {
764
+ const char* frame_name = fn.name(_frame_buffer[trace._start_frame + j]);
765
+ f = f->addChild(frame_name, samples);
766
+ }
767
+ } else {
768
+ for (int j = trace._num_frames - 1; j >= 0; j--) {
769
+ const char* frame_name = fn.name(_frame_buffer[trace._start_frame + j]);
770
+ f = f->addChild(frame_name, samples);
771
+ }
772
+ }
773
+ f->addLeaf(samples);
774
+ }
775
+
776
+ flamegraph.dump(out, tree);
777
+ }
778
+
779
+ void Profiler::dumpTraces(std::ostream& out, Arguments& args) {
780
+ MutexLocker ml(_state_lock);
781
+ if (_state != IDLE || _engine == NULL) return;
782
+
783
+ FrameName fn(args._style | STYLE_DOTTED, _thread_names_lock, _thread_names);
784
+ double percent = 100.0 / _total_counter;
785
+ char buf[1024];
786
+
787
+ CallTraceSample** traces = new CallTraceSample*[MAX_CALLTRACES];
788
+ for (int i = 0; i < MAX_CALLTRACES; i++) {
789
+ traces[i] = &_traces[i];
790
+ }
791
+ qsort(traces, MAX_CALLTRACES, sizeof(CallTraceSample*), CallTraceSample::comparator);
792
+
793
+ int max_traces = args._dump_traces < MAX_CALLTRACES ? args._dump_traces : MAX_CALLTRACES;
794
+ for (int i = 0; i < max_traces; i++) {
795
+ CallTraceSample* trace = traces[i];
796
+ if (trace->_samples == 0) break;
797
+
798
+ snprintf(buf, sizeof(buf), "--- %lld %s (%.2f%%), %lld sample%s\n",
799
+ trace->_counter, _engine->units(), trace->_counter * percent,
800
+ trace->_samples, trace->_samples == 1 ? "" : "s");
801
+ out << buf;
802
+
803
+ if (trace->_num_frames == 0) {
804
+ out << " [ 0] [frame_buffer_overflow]\n";
805
+ }
806
+
807
+ for (int j = 0; j < trace->_num_frames; j++) {
808
+ const char* frame_name = fn.name(_frame_buffer[trace->_start_frame + j]);
809
+ snprintf(buf, sizeof(buf), " [%2d] %s\n", j, frame_name);
810
+ out << buf;
811
+ }
812
+ out << "\n";
813
+ }
814
+
815
+ delete[] traces;
816
+ }
817
+
818
+ void Profiler::dumpFlat(std::ostream& out, Arguments& args) {
819
+ MutexLocker ml(_state_lock);
820
+ if (_state != IDLE || _engine == NULL) return;
821
+
822
+ FrameName fn(args._style | STYLE_DOTTED, _thread_names_lock, _thread_names);
823
+ double percent = 100.0 / _total_counter;
824
+ char buf[1024];
825
+
826
+ MethodSample** methods = new MethodSample*[MAX_CALLTRACES];
827
+ for (int i = 0; i < MAX_CALLTRACES; i++) {
828
+ methods[i] = &_methods[i];
829
+ }
830
+ qsort(methods, MAX_CALLTRACES, sizeof(MethodSample*), MethodSample::comparator);
831
+
832
+ snprintf(buf, sizeof(buf), "%12s percent samples top\n"
833
+ " ---------- ------- ------- ---\n", _engine->units());
834
+ out << buf;
835
+
836
+ int max_methods = args._dump_flat < MAX_CALLTRACES ? args._dump_flat : MAX_CALLTRACES;
837
+ for (int i = 0; i < max_methods; i++) {
838
+ MethodSample* method = methods[i];
839
+ if (method->_samples == 0) break;
840
+
841
+ const char* frame_name = fn.name(method->_method);
842
+ snprintf(buf, sizeof(buf), "%12lld %6.2f%% %7lld %s\n",
843
+ method->_counter, method->_counter * percent, method->_samples, frame_name);
844
+ out << buf;
845
+ }
846
+
847
+ delete[] methods;
848
+ }
849
+
850
+ void Profiler::runInternal(Arguments& args, std::ostream& out) {
851
+ switch (args._action) {
852
+ case ACTION_START:
853
+ case ACTION_RESUME: {
854
+ Error error = start(args, args._action == ACTION_START);
855
+ if (error) {
856
+ out << error.message() << std::endl;
857
+ } else {
858
+ out << "Started [" << args._event << "] profiling" << std::endl;
859
+ }
860
+ break;
861
+ }
862
+ case ACTION_STOP: {
863
+ Error error = stop();
864
+ if (error) {
865
+ out << error.message() << std::endl;
866
+ } else {
867
+ out << "Stopped profiling after " << uptime() << " seconds. No dump options specified" << std::endl;
868
+ }
869
+ break;
870
+ }
871
+ case ACTION_STATUS: {
872
+ MutexLocker ml(_state_lock);
873
+ if (_state == RUNNING) {
874
+ out << "[" << _engine->name() << "] profiling is running for " << uptime() << " seconds" << std::endl;
875
+ } else {
876
+ out << "Profiler is not active" << std::endl;
877
+ }
878
+ break;
879
+ }
880
+ case ACTION_LIST: {
881
+ out << "Basic events:" << std::endl;
882
+ out << " " << EVENT_CPU << std::endl;
883
+ out << " " << EVENT_ALLOC << std::endl;
884
+ out << " " << EVENT_LOCK << std::endl;
885
+ out << " " << EVENT_WALL << std::endl;
886
+ out << " " << EVENT_ITIMER << std::endl;
887
+
888
+ if (PerfEvents::supported()) {
889
+ out << "Perf events:" << std::endl;
890
+ // The first perf event is "cpu" which is already printed
891
+ for (int event_id = 1; ; event_id++) {
892
+ const char* event_name = PerfEvents::getEventName(event_id);
893
+ if (event_name == NULL) break;
894
+ out << " " << event_name << std::endl;
895
+ }
896
+ }
897
+ break;
898
+ }
899
+ case ACTION_VERSION:
900
+ out << FULL_VERSION_STRING;
901
+ break;
902
+ case ACTION_DUMP:
903
+ stop();
904
+ switch (args._output) {
905
+ case OUTPUT_COLLAPSED:
906
+ dumpCollapsed(out, args);
907
+ break;
908
+ case OUTPUT_FLAMEGRAPH:
909
+ dumpFlameGraph(out, args, false);
910
+ break;
911
+ case OUTPUT_TREE:
912
+ dumpFlameGraph(out, args, true);
913
+ break;
914
+ case OUTPUT_TEXT:
915
+ dumpSummary(out);
916
+ if (args._dump_traces > 0) dumpTraces(out, args);
917
+ if (args._dump_flat > 0) dumpFlat(out, args);
918
+ break;
919
+ default:
920
+ break;
921
+ }
922
+ break;
923
+ default:
924
+ break;
925
+ }
926
+ }
927
+
928
+ void Profiler::run(Arguments& args) {
929
+ if (args._file == NULL || args._output == OUTPUT_JFR) {
930
+ runInternal(args, std::cout);
931
+ } else {
932
+ std::ofstream out(args._file, std::ios::out | std::ios::trunc);
933
+ if (out.is_open()) {
934
+ runInternal(args, out);
935
+ out.close();
936
+ } else {
937
+ std::cerr << "Could not open " << args._file << std::endl;
938
+ }
939
+ }
940
+ }
941
+
942
+ void Profiler::shutdown(Arguments& args) {
943
+ MutexLocker ml(_state_lock);
944
+
945
+ // The last chance to dump profile before VM terminates
946
+ if (_state == RUNNING && args._output != OUTPUT_NONE) {
947
+ args._action = ACTION_DUMP;
948
+ run(args);
949
+ }
950
+
951
+ _state = TERMINATED;
952
+ }