jruby-async-profiler 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +8 -0
- data/.gitmodules +3 -0
- data/Gemfile +4 -0
- data/README.md +35 -0
- data/Rakefile +2 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/ext/Rakefile +6 -0
- data/ext/async-profiler/.gitattributes +1 -0
- data/ext/async-profiler/.gitignore +6 -0
- data/ext/async-profiler/.travis.yml +11 -0
- data/ext/async-profiler/CHANGELOG.md +107 -0
- data/ext/async-profiler/JavaHome.class +0 -0
- data/ext/async-profiler/LICENSE +201 -0
- data/ext/async-profiler/Makefile +66 -0
- data/ext/async-profiler/README.md +487 -0
- data/ext/async-profiler/demo/SwingSet2.svg +2247 -0
- data/ext/async-profiler/docs/cddl1.txt +358 -0
- data/ext/async-profiler/profiler.sh +240 -0
- data/ext/async-profiler/src/allocTracer.cpp +155 -0
- data/ext/async-profiler/src/allocTracer.h +74 -0
- data/ext/async-profiler/src/arch.h +69 -0
- data/ext/async-profiler/src/arguments.cpp +265 -0
- data/ext/async-profiler/src/arguments.h +152 -0
- data/ext/async-profiler/src/codeCache.cpp +128 -0
- data/ext/async-profiler/src/codeCache.h +99 -0
- data/ext/async-profiler/src/engine.cpp +50 -0
- data/ext/async-profiler/src/engine.h +38 -0
- data/ext/async-profiler/src/flameGraph.cpp +770 -0
- data/ext/async-profiler/src/flameGraph.h +118 -0
- data/ext/async-profiler/src/flightRecorder.cpp +727 -0
- data/ext/async-profiler/src/flightRecorder.h +39 -0
- data/ext/async-profiler/src/frameName.cpp +189 -0
- data/ext/async-profiler/src/frameName.h +56 -0
- data/ext/async-profiler/src/itimer.cpp +49 -0
- data/ext/async-profiler/src/itimer.h +43 -0
- data/ext/async-profiler/src/jattach/jattach.c +437 -0
- data/ext/async-profiler/src/java/one/profiler/AsyncProfiler.java +160 -0
- data/ext/async-profiler/src/java/one/profiler/AsyncProfilerMXBean.java +43 -0
- data/ext/async-profiler/src/java/one/profiler/Counter.java +25 -0
- data/ext/async-profiler/src/java/one/profiler/Events.java +28 -0
- data/ext/async-profiler/src/javaApi.cpp +124 -0
- data/ext/async-profiler/src/lockTracer.cpp +161 -0
- data/ext/async-profiler/src/lockTracer.h +55 -0
- data/ext/async-profiler/src/mutex.cpp +33 -0
- data/ext/async-profiler/src/mutex.h +49 -0
- data/ext/async-profiler/src/os.h +45 -0
- data/ext/async-profiler/src/os_linux.cpp +129 -0
- data/ext/async-profiler/src/os_macos.cpp +115 -0
- data/ext/async-profiler/src/perfEvents.h +60 -0
- data/ext/async-profiler/src/perfEvents_linux.cpp +550 -0
- data/ext/async-profiler/src/perfEvents_macos.cpp +64 -0
- data/ext/async-profiler/src/profiler.cpp +952 -0
- data/ext/async-profiler/src/profiler.h +238 -0
- data/ext/async-profiler/src/spinLock.h +66 -0
- data/ext/async-profiler/src/stackFrame.h +57 -0
- data/ext/async-profiler/src/stackFrame_aarch64.cpp +75 -0
- data/ext/async-profiler/src/stackFrame_arm.cpp +58 -0
- data/ext/async-profiler/src/stackFrame_i386.cpp +82 -0
- data/ext/async-profiler/src/stackFrame_x64.cpp +113 -0
- data/ext/async-profiler/src/symbols.h +37 -0
- data/ext/async-profiler/src/symbols_linux.cpp +354 -0
- data/ext/async-profiler/src/symbols_macos.cpp +156 -0
- data/ext/async-profiler/src/vmEntry.cpp +173 -0
- data/ext/async-profiler/src/vmEntry.h +105 -0
- data/ext/async-profiler/src/vmStructs.cpp +104 -0
- data/ext/async-profiler/src/vmStructs.h +112 -0
- data/ext/async-profiler/src/wallClock.cpp +96 -0
- data/ext/async-profiler/src/wallClock.h +56 -0
- data/ext/async-profiler/test/AllocatingTarget.java +26 -0
- data/ext/async-profiler/test/LoadLibraryTest.java +21 -0
- data/ext/async-profiler/test/Target.java +31 -0
- data/ext/async-profiler/test/ThreadsTarget.java +35 -0
- data/ext/async-profiler/test/alloc-smoke-test.sh +36 -0
- data/ext/async-profiler/test/load-library-test.sh +35 -0
- data/ext/async-profiler/test/smoke-test.sh +37 -0
- data/ext/async-profiler/test/thread-smoke-test.sh +32 -0
- data/jruby-async-profiler.gemspec +32 -0
- data/lib/jruby/async/profiler.rb +10 -0
- data/lib/jruby/async/profiler/version.rb +7 -0
- metadata +155 -0
@@ -0,0 +1,64 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright 2017 Andrei Pangin
|
3
|
+
*
|
4
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
* you may not use this file except in compliance with the License.
|
6
|
+
* You may obtain a copy of the License at
|
7
|
+
*
|
8
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
*
|
10
|
+
* Unless required by applicable law or agreed to in writing, software
|
11
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
* See the License for the specific language governing permissions and
|
14
|
+
* limitations under the License.
|
15
|
+
*/
|
16
|
+
|
17
|
+
#ifdef __APPLE__
|
18
|
+
|
19
|
+
#include "perfEvents.h"
|
20
|
+
|
21
|
+
|
22
|
+
int PerfEvents::_max_events;
|
23
|
+
PerfEvent* PerfEvents::_events;
|
24
|
+
PerfEventType* PerfEvents::_event_type;
|
25
|
+
long PerfEvents::_interval;
|
26
|
+
Ring PerfEvents::_ring;
|
27
|
+
bool PerfEvents::_print_extended_warning;
|
28
|
+
|
29
|
+
|
30
|
+
bool PerfEvents::createForThread(int tid) { return false; }
|
31
|
+
void PerfEvents::destroyForThread(int tid) {}
|
32
|
+
void PerfEvents::signalHandler(int signo, siginfo_t* siginfo, void* ucontext) {}
|
33
|
+
|
34
|
+
const char* PerfEvents::units() {
|
35
|
+
return "ns";
|
36
|
+
}
|
37
|
+
|
38
|
+
Error PerfEvents::start(Arguments& args) {
|
39
|
+
return Error("PerfEvents are unsupported on macOS");
|
40
|
+
}
|
41
|
+
|
42
|
+
void PerfEvents::stop() {
|
43
|
+
}
|
44
|
+
|
45
|
+
void PerfEvents::onThreadStart() {
|
46
|
+
}
|
47
|
+
|
48
|
+
void PerfEvents::onThreadEnd() {
|
49
|
+
}
|
50
|
+
|
51
|
+
int PerfEvents::getNativeTrace(void* ucontext, int tid, const void** callchain, int max_depth,
|
52
|
+
const void* jit_min_address, const void* jit_max_address) {
|
53
|
+
return 0;
|
54
|
+
}
|
55
|
+
|
56
|
+
bool PerfEvents::supported() {
|
57
|
+
return false;
|
58
|
+
}
|
59
|
+
|
60
|
+
const char* PerfEvents::getEventName(int event_id) {
|
61
|
+
return NULL;
|
62
|
+
}
|
63
|
+
|
64
|
+
#endif // __APPLE__
|
@@ -0,0 +1,952 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright 2016 Andrei Pangin
|
3
|
+
*
|
4
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
* you may not use this file except in compliance with the License.
|
6
|
+
* You may obtain a copy of the License at
|
7
|
+
*
|
8
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
*
|
10
|
+
* Unless required by applicable law or agreed to in writing, software
|
11
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
* See the License for the specific language governing permissions and
|
14
|
+
* limitations under the License.
|
15
|
+
*/
|
16
|
+
|
17
|
+
#include <fstream>
|
18
|
+
#include <dlfcn.h>
|
19
|
+
#include <unistd.h>
|
20
|
+
#include <stdint.h>
|
21
|
+
#include <stdio.h>
|
22
|
+
#include <stdlib.h>
|
23
|
+
#include <string.h>
|
24
|
+
#include <sys/param.h>
|
25
|
+
#include "profiler.h"
|
26
|
+
#include "perfEvents.h"
|
27
|
+
#include "allocTracer.h"
|
28
|
+
#include "lockTracer.h"
|
29
|
+
#include "wallClock.h"
|
30
|
+
#include "itimer.h"
|
31
|
+
#include "flameGraph.h"
|
32
|
+
#include "flightRecorder.h"
|
33
|
+
#include "frameName.h"
|
34
|
+
#include "os.h"
|
35
|
+
#include "stackFrame.h"
|
36
|
+
#include "symbols.h"
|
37
|
+
#include "vmStructs.h"
|
38
|
+
|
39
|
+
|
40
|
+
Profiler Profiler::_instance;
|
41
|
+
|
42
|
+
static PerfEvents perf_events;
|
43
|
+
static AllocTracer alloc_tracer;
|
44
|
+
static LockTracer lock_tracer;
|
45
|
+
static WallClock wall_clock;
|
46
|
+
static ITimer itimer;
|
47
|
+
|
48
|
+
|
49
|
+
u64 Profiler::hashCallTrace(int num_frames, ASGCT_CallFrame* frames) {
|
50
|
+
const u64 M = 0xc6a4a7935bd1e995ULL;
|
51
|
+
const int R = 47;
|
52
|
+
|
53
|
+
u64 h = num_frames * M;
|
54
|
+
|
55
|
+
for (int i = 0; i < num_frames; i++) {
|
56
|
+
u64 k = (u64)frames[i].method_id;
|
57
|
+
k *= M;
|
58
|
+
k ^= k >> R;
|
59
|
+
k *= M;
|
60
|
+
h ^= k;
|
61
|
+
h *= M;
|
62
|
+
}
|
63
|
+
|
64
|
+
h ^= h >> R;
|
65
|
+
h *= M;
|
66
|
+
h ^= h >> R;
|
67
|
+
|
68
|
+
return h;
|
69
|
+
}
|
70
|
+
|
71
|
+
int Profiler::storeCallTrace(int num_frames, ASGCT_CallFrame* frames, u64 counter) {
|
72
|
+
u64 hash = hashCallTrace(num_frames, frames);
|
73
|
+
int bucket = (int)(hash % MAX_CALLTRACES);
|
74
|
+
int i = bucket;
|
75
|
+
|
76
|
+
while (_hashes[i] != hash) {
|
77
|
+
if (_hashes[i] == 0) {
|
78
|
+
if (__sync_bool_compare_and_swap(&_hashes[i], 0, hash)) {
|
79
|
+
copyToFrameBuffer(num_frames, frames, &_traces[i]);
|
80
|
+
break;
|
81
|
+
}
|
82
|
+
continue;
|
83
|
+
}
|
84
|
+
|
85
|
+
if (++i == MAX_CALLTRACES) i = 0; // move to next slot
|
86
|
+
if (i == bucket) return 0; // the table is full
|
87
|
+
}
|
88
|
+
|
89
|
+
// CallTrace hash found => atomically increment counter
|
90
|
+
atomicInc(_traces[i]._samples);
|
91
|
+
atomicInc(_traces[i]._counter, counter);
|
92
|
+
return i;
|
93
|
+
}
|
94
|
+
|
95
|
+
void Profiler::copyToFrameBuffer(int num_frames, ASGCT_CallFrame* frames, CallTraceSample* trace) {
|
96
|
+
// Atomically reserve space in frame buffer
|
97
|
+
int start_frame;
|
98
|
+
do {
|
99
|
+
start_frame = _frame_buffer_index;
|
100
|
+
if (start_frame + num_frames > _frame_buffer_size) {
|
101
|
+
_frame_buffer_overflow = true; // not enough space to store full trace
|
102
|
+
return;
|
103
|
+
}
|
104
|
+
} while (!__sync_bool_compare_and_swap(&_frame_buffer_index, start_frame, start_frame + num_frames));
|
105
|
+
|
106
|
+
trace->_start_frame = start_frame;
|
107
|
+
trace->_num_frames = num_frames;
|
108
|
+
|
109
|
+
for (int i = 0; i < num_frames; i++) {
|
110
|
+
_frame_buffer[start_frame++] = frames[i];
|
111
|
+
}
|
112
|
+
}
|
113
|
+
|
114
|
+
u64 Profiler::hashMethod(jmethodID method) {
|
115
|
+
const u64 M = 0xc6a4a7935bd1e995ULL;
|
116
|
+
const int R = 17;
|
117
|
+
|
118
|
+
u64 h = (u64)method;
|
119
|
+
|
120
|
+
h ^= h >> R;
|
121
|
+
h *= M;
|
122
|
+
h ^= h >> R;
|
123
|
+
|
124
|
+
return h;
|
125
|
+
}
|
126
|
+
|
127
|
+
void Profiler::storeMethod(jmethodID method, jint bci, u64 counter) {
|
128
|
+
u64 hash = hashMethod(method);
|
129
|
+
int bucket = (int)(hash % MAX_CALLTRACES);
|
130
|
+
int i = bucket;
|
131
|
+
|
132
|
+
while (_methods[i]._method.method_id != method) {
|
133
|
+
if (_methods[i]._method.method_id == NULL) {
|
134
|
+
if (__sync_bool_compare_and_swap(&_methods[i]._method.method_id, NULL, method)) {
|
135
|
+
_methods[i]._method.bci = bci;
|
136
|
+
break;
|
137
|
+
}
|
138
|
+
continue;
|
139
|
+
}
|
140
|
+
|
141
|
+
if (++i == MAX_CALLTRACES) i = 0; // move to next slot
|
142
|
+
if (i == bucket) return; // the table is full
|
143
|
+
}
|
144
|
+
|
145
|
+
// Method found => atomically increment counter
|
146
|
+
atomicInc(_methods[i]._samples);
|
147
|
+
atomicInc(_methods[i]._counter, counter);
|
148
|
+
}
|
149
|
+
|
150
|
+
void Profiler::addJavaMethod(const void* address, int length, jmethodID method) {
|
151
|
+
_jit_lock.lock();
|
152
|
+
_java_methods.add(address, length, method);
|
153
|
+
updateJitRange(address, (const char*)address + length);
|
154
|
+
_jit_lock.unlock();
|
155
|
+
}
|
156
|
+
|
157
|
+
void Profiler::removeJavaMethod(const void* address, jmethodID method) {
|
158
|
+
_jit_lock.lock();
|
159
|
+
_java_methods.remove(address, method);
|
160
|
+
_jit_lock.unlock();
|
161
|
+
}
|
162
|
+
|
163
|
+
void Profiler::addRuntimeStub(const void* address, int length, const char* name) {
|
164
|
+
_jit_lock.lock();
|
165
|
+
_runtime_stubs.add(address, length, name);
|
166
|
+
updateJitRange(address, (const char*)address + length);
|
167
|
+
_jit_lock.unlock();
|
168
|
+
}
|
169
|
+
|
170
|
+
void Profiler::updateJitRange(const void* min_address, const void* max_address) {
|
171
|
+
if (min_address < _jit_min_address) _jit_min_address = min_address;
|
172
|
+
if (max_address > _jit_max_address) _jit_max_address = max_address;
|
173
|
+
}
|
174
|
+
|
175
|
+
const char* Profiler::asgctError(int code) {
|
176
|
+
switch (code) {
|
177
|
+
case ticks_no_Java_frame:
|
178
|
+
case ticks_unknown_not_Java:
|
179
|
+
case ticks_not_walkable_not_Java:
|
180
|
+
// Not in Java context at all; this is not an error
|
181
|
+
return NULL;
|
182
|
+
case ticks_GC_active:
|
183
|
+
return "GC_active";
|
184
|
+
case ticks_unknown_Java:
|
185
|
+
return "unknown_Java";
|
186
|
+
case ticks_not_walkable_Java:
|
187
|
+
return "not_walkable_Java";
|
188
|
+
case ticks_thread_exit:
|
189
|
+
return "thread_exit";
|
190
|
+
case ticks_deopt:
|
191
|
+
return "deoptimization";
|
192
|
+
case ticks_safepoint:
|
193
|
+
return "safepoint";
|
194
|
+
case ticks_skipped:
|
195
|
+
return "skipped";
|
196
|
+
default:
|
197
|
+
// Should not happen
|
198
|
+
return "unexpected_state";
|
199
|
+
}
|
200
|
+
}
|
201
|
+
|
202
|
+
NativeCodeCache* Profiler::jvmLibrary() {
|
203
|
+
const void* asyncGetCallTraceAddr = (const void*)VM::_asyncGetCallTrace;
|
204
|
+
const int native_lib_count = _native_lib_count;
|
205
|
+
for (int i = 0; i < native_lib_count; i++) {
|
206
|
+
if (_native_libs[i]->contains(asyncGetCallTraceAddr)) {
|
207
|
+
return _native_libs[i];
|
208
|
+
}
|
209
|
+
}
|
210
|
+
return NULL;
|
211
|
+
}
|
212
|
+
|
213
|
+
const void* Profiler::findSymbol(const char* name) {
|
214
|
+
const int native_lib_count = _native_lib_count;
|
215
|
+
for (int i = 0; i < native_lib_count; i++) {
|
216
|
+
const void* address = _native_libs[i]->findSymbol(name);
|
217
|
+
if (address != NULL) {
|
218
|
+
return address;
|
219
|
+
}
|
220
|
+
}
|
221
|
+
return NULL;
|
222
|
+
}
|
223
|
+
|
224
|
+
const char* Profiler::findNativeMethod(const void* address) {
|
225
|
+
const int native_lib_count = _native_lib_count;
|
226
|
+
for (int i = 0; i < native_lib_count; i++) {
|
227
|
+
if (_native_libs[i]->contains(address)) {
|
228
|
+
return _native_libs[i]->binarySearch(address);
|
229
|
+
}
|
230
|
+
}
|
231
|
+
return NULL;
|
232
|
+
}
|
233
|
+
|
234
|
+
int Profiler::getNativeTrace(void* ucontext, ASGCT_CallFrame* frames, int tid, bool* stopped_at_java_frame) {
|
235
|
+
const void* native_callchain[MAX_NATIVE_FRAMES];
|
236
|
+
int native_frames = _engine->getNativeTrace(ucontext, tid, native_callchain, MAX_NATIVE_FRAMES,
|
237
|
+
_jit_min_address, _jit_max_address);
|
238
|
+
|
239
|
+
*stopped_at_java_frame = false;
|
240
|
+
if (native_frames > 0) {
|
241
|
+
const void* last_pc = native_callchain[native_frames - 1];
|
242
|
+
if (last_pc >= _jit_min_address && last_pc < _jit_max_address) {
|
243
|
+
*stopped_at_java_frame = true;
|
244
|
+
native_frames--;
|
245
|
+
}
|
246
|
+
}
|
247
|
+
|
248
|
+
for (int i = 0; i < native_frames; i++) {
|
249
|
+
frames[i].bci = BCI_NATIVE_FRAME;
|
250
|
+
frames[i].method_id = (jmethodID)findNativeMethod(native_callchain[i]);
|
251
|
+
}
|
252
|
+
|
253
|
+
return native_frames;
|
254
|
+
}
|
255
|
+
|
256
|
+
int Profiler::getJavaTraceAsync(void* ucontext, ASGCT_CallFrame* frames, int max_depth) {
|
257
|
+
JNIEnv* jni = VM::jni();
|
258
|
+
if (jni == NULL) {
|
259
|
+
// Not a Java thread
|
260
|
+
return 0;
|
261
|
+
}
|
262
|
+
|
263
|
+
ASGCT_CallTrace trace = {jni, 0, frames};
|
264
|
+
VM::_asyncGetCallTrace(&trace, max_depth, ucontext);
|
265
|
+
|
266
|
+
#ifndef SAFE_MODE
|
267
|
+
if (trace.num_frames == ticks_unknown_Java) {
|
268
|
+
// If current Java stack is not walkable (e.g. the top frame is not fully constructed),
|
269
|
+
// try to manually pop the top frame off, hoping that the previous frame is walkable.
|
270
|
+
// This is a temporary workaround for AsyncGetCallTrace issues,
|
271
|
+
// see https://bugs.openjdk.java.net/browse/JDK-8178287
|
272
|
+
StackFrame top_frame(ucontext);
|
273
|
+
uintptr_t pc = top_frame.pc(),
|
274
|
+
sp = top_frame.sp(),
|
275
|
+
fp = top_frame.fp();
|
276
|
+
|
277
|
+
// Guess top method by PC and insert it manually into the call trace
|
278
|
+
bool is_entry_frame = false;
|
279
|
+
if (fillTopFrame((const void*)pc, trace.frames)) {
|
280
|
+
is_entry_frame = trace.frames->bci == BCI_NATIVE_FRAME &&
|
281
|
+
strcmp((const char*)trace.frames->method_id, "call_stub") == 0;
|
282
|
+
trace.frames++;
|
283
|
+
max_depth--;
|
284
|
+
}
|
285
|
+
|
286
|
+
if (top_frame.pop(is_entry_frame)) {
|
287
|
+
// Retry with the fixed context, but only if PC looks reasonable,
|
288
|
+
// otherwise AsyncGetCallTrace may crash
|
289
|
+
if (addressInCode((const void*)top_frame.pc())) {
|
290
|
+
VM::_asyncGetCallTrace(&trace, max_depth, ucontext);
|
291
|
+
}
|
292
|
+
top_frame.restore(pc, sp, fp);
|
293
|
+
|
294
|
+
if (trace.num_frames > 0) {
|
295
|
+
return trace.num_frames + (trace.frames - frames);
|
296
|
+
}
|
297
|
+
|
298
|
+
// Restore previous context
|
299
|
+
trace.num_frames = ticks_unknown_Java;
|
300
|
+
}
|
301
|
+
} else if (trace.num_frames == ticks_GC_active && VM::is_hotspot() && _JvmtiEnv_GetStackTrace != NULL) {
|
302
|
+
// While GC is running Java threads are known to be at safepoint
|
303
|
+
return getJavaTraceJvmti((jvmtiFrameInfo*)frames, frames, max_depth);
|
304
|
+
}
|
305
|
+
#endif // SAFE_MODE
|
306
|
+
|
307
|
+
if (trace.num_frames > 0) {
|
308
|
+
return trace.num_frames;
|
309
|
+
}
|
310
|
+
|
311
|
+
const char* err_string = asgctError(trace.num_frames);
|
312
|
+
if (err_string == NULL) {
|
313
|
+
// No Java stack, because thread is not in Java context
|
314
|
+
return 0;
|
315
|
+
}
|
316
|
+
|
317
|
+
atomicInc(_failures[-trace.num_frames]);
|
318
|
+
frames[0].bci = BCI_ERROR;
|
319
|
+
frames[0].method_id = (jmethodID)err_string;
|
320
|
+
return 1;
|
321
|
+
}
|
322
|
+
|
323
|
+
int Profiler::getJavaTraceJvmti(jvmtiFrameInfo* jvmti_frames, ASGCT_CallFrame* frames, int max_depth) {
|
324
|
+
// We cannot call pure JVM TI here, because it assumes _thread_in_native state,
|
325
|
+
// but allocation events happen in _thread_in_vm state,
|
326
|
+
// see https://github.com/jvm-profiling-tools/async-profiler/issues/64
|
327
|
+
void* thread = _ThreadLocalStorage_thread();
|
328
|
+
int num_frames;
|
329
|
+
if (_JvmtiEnv_GetStackTrace(NULL, thread, 0, max_depth, jvmti_frames, &num_frames) == 0 && num_frames > 0) {
|
330
|
+
// Profiler expects stack trace in AsyncGetCallTrace format; convert it now
|
331
|
+
for (int i = 0; i < num_frames; i++) {
|
332
|
+
frames[i].method_id = jvmti_frames[i].method;
|
333
|
+
frames[i].bci = 0;
|
334
|
+
}
|
335
|
+
return num_frames;
|
336
|
+
}
|
337
|
+
|
338
|
+
return 0;
|
339
|
+
}
|
340
|
+
|
341
|
+
int Profiler::makeEventFrame(ASGCT_CallFrame* frames, jint event_type, jmethodID event) {
|
342
|
+
frames[0].bci = event_type;
|
343
|
+
frames[0].method_id = event;
|
344
|
+
return 1;
|
345
|
+
}
|
346
|
+
|
347
|
+
bool Profiler::fillTopFrame(const void* pc, ASGCT_CallFrame* frame) {
|
348
|
+
jmethodID method = NULL;
|
349
|
+
_jit_lock.lockShared();
|
350
|
+
|
351
|
+
// Check if PC lies within JVM's compiled code cache
|
352
|
+
if (pc >= _jit_min_address && pc < _jit_max_address) {
|
353
|
+
if ((method = _java_methods.find(pc)) != NULL) {
|
354
|
+
// PC belong to a JIT compiled method
|
355
|
+
frame->bci = 0;
|
356
|
+
frame->method_id = method;
|
357
|
+
} else if ((method = _runtime_stubs.find(pc)) != NULL) {
|
358
|
+
// PC belongs to a VM runtime stub
|
359
|
+
frame->bci = BCI_NATIVE_FRAME;
|
360
|
+
frame->method_id = method;
|
361
|
+
}
|
362
|
+
}
|
363
|
+
|
364
|
+
_jit_lock.unlockShared();
|
365
|
+
return method != NULL;
|
366
|
+
}
|
367
|
+
|
368
|
+
bool Profiler::addressInCode(const void* pc) {
|
369
|
+
// 1. Check if PC lies within JVM's compiled code cache
|
370
|
+
// Address in CodeCache is executable if it belongs to a Java method or a runtime stub
|
371
|
+
if (pc >= _jit_min_address && pc < _jit_max_address) {
|
372
|
+
_jit_lock.lockShared();
|
373
|
+
bool valid = _java_methods.find(pc) != NULL || _runtime_stubs.find(pc) != NULL;
|
374
|
+
_jit_lock.unlockShared();
|
375
|
+
return valid;
|
376
|
+
}
|
377
|
+
|
378
|
+
// 2. Check if PC belongs to executable code of shared libraries
|
379
|
+
const int native_lib_count = _native_lib_count;
|
380
|
+
for (int i = 0; i < native_lib_count; i++) {
|
381
|
+
if (_native_libs[i]->contains(pc)) {
|
382
|
+
return true;
|
383
|
+
}
|
384
|
+
}
|
385
|
+
|
386
|
+
// This can be some other dynamically generated code, but we don't know it. Better stay safe.
|
387
|
+
return false;
|
388
|
+
}
|
389
|
+
|
390
|
+
void Profiler::recordSample(void* ucontext, u64 counter, jint event_type, jmethodID event) {
|
391
|
+
int tid = OS::threadId();
|
392
|
+
|
393
|
+
u64 lock_index = atomicInc(_total_samples) % CONCURRENCY_LEVEL;
|
394
|
+
if (!_locks[lock_index].tryLock()) {
|
395
|
+
// Too many concurrent signals already
|
396
|
+
atomicInc(_failures[-ticks_skipped]);
|
397
|
+
|
398
|
+
if (event_type == 0) {
|
399
|
+
// Need to reset PerfEvents ring buffer, even though we discard the collected trace
|
400
|
+
_engine->getNativeTrace(ucontext, tid, NULL, 0, _jit_min_address, _jit_max_address);
|
401
|
+
}
|
402
|
+
return;
|
403
|
+
}
|
404
|
+
|
405
|
+
atomicInc(_total_counter, counter);
|
406
|
+
|
407
|
+
ASGCT_CallFrame* frames = _calltrace_buffer[lock_index]->_asgct_frames;
|
408
|
+
bool need_java_trace = true;
|
409
|
+
|
410
|
+
int num_frames = 0;
|
411
|
+
if (event_type == 0) {
|
412
|
+
num_frames = getNativeTrace(ucontext, frames, tid, &need_java_trace);
|
413
|
+
} else if (event != NULL) {
|
414
|
+
num_frames = makeEventFrame(frames, event_type, event);
|
415
|
+
}
|
416
|
+
|
417
|
+
if ((_sync_walk || event_type != 0) && _JvmtiEnv_GetStackTrace != NULL) {
|
418
|
+
// Events like object allocation happen at known places where it is safe to call JVM TI
|
419
|
+
jvmtiFrameInfo* jvmti_frames = _calltrace_buffer[lock_index]->_jvmti_frames;
|
420
|
+
num_frames += getJavaTraceJvmti(jvmti_frames + num_frames, frames + num_frames, _max_stack_depth);
|
421
|
+
} else if (OS::isSignalSafeTLS() || need_java_trace) {
|
422
|
+
num_frames += getJavaTraceAsync(ucontext, frames + num_frames, _max_stack_depth);
|
423
|
+
}
|
424
|
+
|
425
|
+
if (num_frames == 0 || (num_frames == 1 && event != NULL)) {
|
426
|
+
num_frames += makeEventFrame(frames + num_frames, BCI_ERROR, (jmethodID)"not_walkable");
|
427
|
+
}
|
428
|
+
|
429
|
+
if (_threads) {
|
430
|
+
num_frames += makeEventFrame(frames + num_frames, BCI_THREAD_ID, (jmethodID)(uintptr_t)tid);
|
431
|
+
}
|
432
|
+
|
433
|
+
storeMethod(frames[0].method_id, frames[0].bci, counter);
|
434
|
+
int call_trace_id = storeCallTrace(num_frames, frames, counter);
|
435
|
+
_jfr.recordExecutionSample(lock_index, tid, call_trace_id);
|
436
|
+
|
437
|
+
_locks[lock_index].unlock();
|
438
|
+
}
|
439
|
+
|
440
|
+
jboolean JNICALL Profiler::NativeLibraryLoadTrap(JNIEnv* env, jobject self, jstring name, jboolean builtin) {
|
441
|
+
jboolean result = _instance._original_NativeLibrary_load(env, self, name, builtin);
|
442
|
+
Symbols::parseLibraries(_instance._native_libs, _instance._native_lib_count, MAX_NATIVE_LIBS);
|
443
|
+
return result;
|
444
|
+
}
|
445
|
+
|
446
|
+
void Profiler::bindNativeLibraryLoad(NativeLoadLibraryFunc entry) {
|
447
|
+
JNIEnv* env = VM::jni();
|
448
|
+
jclass NativeLibrary = env->FindClass("java/lang/ClassLoader$NativeLibrary");
|
449
|
+
|
450
|
+
if (NativeLibrary != NULL) {
|
451
|
+
// Find JNI entry for NativeLibrary.load() method
|
452
|
+
if (_original_NativeLibrary_load == NULL) {
|
453
|
+
if (env->GetMethodID(NativeLibrary, "load0", "(Ljava/lang/String;Z)Z") != NULL) {
|
454
|
+
// JDK 9+
|
455
|
+
_load_method.name = (char*)"load0";
|
456
|
+
_load_method.signature = (char*)"(Ljava/lang/String;Z)Z";
|
457
|
+
} else if (env->GetMethodID(NativeLibrary, "load", "(Ljava/lang/String;Z)V") != NULL) {
|
458
|
+
// JDK 8
|
459
|
+
_load_method.name = (char*)"load";
|
460
|
+
_load_method.signature = (char*)"(Ljava/lang/String;Z)V";
|
461
|
+
} else {
|
462
|
+
// JDK 7
|
463
|
+
_load_method.name = (char*)"load";
|
464
|
+
_load_method.signature = (char*)"(Ljava/lang/String;)V";
|
465
|
+
}
|
466
|
+
|
467
|
+
char jni_name[64];
|
468
|
+
strcpy(jni_name, "Java_java_lang_ClassLoader_00024NativeLibrary_");
|
469
|
+
strcat(jni_name, _load_method.name);
|
470
|
+
_original_NativeLibrary_load = (NativeLoadLibraryFunc)dlsym(VM::_libjava, jni_name);
|
471
|
+
}
|
472
|
+
|
473
|
+
// Change function pointer for the native method
|
474
|
+
if (_original_NativeLibrary_load != NULL) {
|
475
|
+
_load_method.fnPtr = (void*)entry;
|
476
|
+
env->RegisterNatives(NativeLibrary, &_load_method, 1);
|
477
|
+
}
|
478
|
+
}
|
479
|
+
|
480
|
+
env->ExceptionClear();
|
481
|
+
}
|
482
|
+
|
483
|
+
void Profiler::initJvmtiFunctions(NativeCodeCache* libjvm) {
|
484
|
+
if (_JvmtiEnv_GetStackTrace == NULL) {
|
485
|
+
// Find ThreadLocalStorage::thread() if exists
|
486
|
+
if (_ThreadLocalStorage_thread == NULL) {
|
487
|
+
_ThreadLocalStorage_thread = (void* (*)()) libjvm->findSymbol("_ZN18ThreadLocalStorage6threadEv");
|
488
|
+
}
|
489
|
+
// Fallback to ThreadLocalStorage::get_thread_slow()
|
490
|
+
if (_ThreadLocalStorage_thread == NULL) {
|
491
|
+
_ThreadLocalStorage_thread = (void* (*)()) libjvm->findSymbol("_ZN18ThreadLocalStorage15get_thread_slowEv");
|
492
|
+
}
|
493
|
+
// Fallback to Thread::current(), e.g. on Zing
|
494
|
+
if (_ThreadLocalStorage_thread == NULL) {
|
495
|
+
_ThreadLocalStorage_thread = (void* (*)()) libjvm->findSymbol("_ZN6Thread7currentEv");
|
496
|
+
}
|
497
|
+
// JvmtiEnv::GetStackTrace(JavaThread* java_thread, jint start_depth, jint max_frame_count, jvmtiFrameInfo* frame_buffer, jint* count_ptr)
|
498
|
+
if (_ThreadLocalStorage_thread != NULL) {
|
499
|
+
_JvmtiEnv_GetStackTrace = (jvmtiError (*)(void*, void*, jint, jint, jvmtiFrameInfo*, jint*))
|
500
|
+
libjvm->findSymbol("_ZN8JvmtiEnv13GetStackTraceEP10JavaThreadiiP15_jvmtiFrameInfoPi");
|
501
|
+
}
|
502
|
+
|
503
|
+
if (_JvmtiEnv_GetStackTrace == NULL) {
|
504
|
+
fprintf(stderr, "WARNING: Install JVM debug symbols to improve profile accuracy\n");
|
505
|
+
}
|
506
|
+
}
|
507
|
+
}
|
508
|
+
|
509
|
+
void Profiler::setThreadName(int tid, const char* name) {
|
510
|
+
MutexLocker ml(_thread_names_lock);
|
511
|
+
_thread_names[tid] = name;
|
512
|
+
}
|
513
|
+
|
514
|
+
void Profiler::updateThreadName(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread) {
|
515
|
+
if (_threads && VMThread::available()) {
|
516
|
+
VMThread* vm_thread = VMThread::fromJavaThread(jni, thread);
|
517
|
+
jvmtiThreadInfo thread_info;
|
518
|
+
if (vm_thread != NULL && jvmti->GetThreadInfo(thread, &thread_info) == 0) {
|
519
|
+
setThreadName(vm_thread->osThreadId(), thread_info.name);
|
520
|
+
jvmti->Deallocate((unsigned char*)thread_info.name);
|
521
|
+
}
|
522
|
+
}
|
523
|
+
}
|
524
|
+
|
525
|
+
void Profiler::updateAllThreadNames() {
|
526
|
+
if (_threads && VMThread::available()) {
|
527
|
+
jvmtiEnv* jvmti = VM::jvmti();
|
528
|
+
jint thread_count;
|
529
|
+
jthread* thread_objects;
|
530
|
+
if (jvmti->GetAllThreads(&thread_count, &thread_objects) != 0) {
|
531
|
+
return;
|
532
|
+
}
|
533
|
+
|
534
|
+
JNIEnv* jni = VM::jni();
|
535
|
+
for (int i = 0; i < thread_count; i++) {
|
536
|
+
updateThreadName(jvmti, jni, thread_objects[i]);
|
537
|
+
}
|
538
|
+
|
539
|
+
jvmti->Deallocate((unsigned char*)thread_objects);
|
540
|
+
}
|
541
|
+
}
|
542
|
+
|
543
|
+
Engine* Profiler::selectEngine(const char* event_name) {
|
544
|
+
if (strcmp(event_name, EVENT_CPU) == 0) {
|
545
|
+
return PerfEvents::supported() ? (Engine*)&perf_events : (Engine*)&wall_clock;
|
546
|
+
} else if (strcmp(event_name, EVENT_ALLOC) == 0) {
|
547
|
+
return &alloc_tracer;
|
548
|
+
} else if (strcmp(event_name, EVENT_LOCK) == 0) {
|
549
|
+
return &lock_tracer;
|
550
|
+
} else if (strcmp(event_name, EVENT_WALL) == 0) {
|
551
|
+
return &wall_clock;
|
552
|
+
} else if (strcmp(event_name, EVENT_ITIMER) == 0) {
|
553
|
+
return &itimer;
|
554
|
+
} else {
|
555
|
+
return &perf_events;
|
556
|
+
}
|
557
|
+
}
|
558
|
+
|
559
|
+
Error Profiler::start(Arguments& args, bool reset) {
|
560
|
+
MutexLocker ml(_state_lock);
|
561
|
+
if (_state != IDLE) {
|
562
|
+
return Error("Profiler already started");
|
563
|
+
}
|
564
|
+
|
565
|
+
if (VM::_asyncGetCallTrace == NULL) {
|
566
|
+
return Error("Could not find AsyncGetCallTrace function");
|
567
|
+
}
|
568
|
+
|
569
|
+
bool first_time = _native_lib_count == 0;
|
570
|
+
if (first_time || reset) {
|
571
|
+
// Reset counters
|
572
|
+
_total_samples = 0;
|
573
|
+
_total_counter = 0;
|
574
|
+
memset(_failures, 0, sizeof(_failures));
|
575
|
+
memset(_hashes, 0, sizeof(_hashes));
|
576
|
+
memset(_traces, 0, sizeof(_traces));
|
577
|
+
memset(_methods, 0, sizeof(_methods));
|
578
|
+
|
579
|
+
// Index 0 denotes special call trace with no frames
|
580
|
+
_hashes[0] = (u64)-1;
|
581
|
+
|
582
|
+
// Reset frame buffer
|
583
|
+
_frame_buffer_index = 0;
|
584
|
+
_frame_buffer_overflow = false;
|
585
|
+
|
586
|
+
// Reset thread names
|
587
|
+
{
|
588
|
+
MutexLocker ml(_thread_names_lock);
|
589
|
+
_thread_names.clear();
|
590
|
+
}
|
591
|
+
}
|
592
|
+
|
593
|
+
// (Re-)allocate frames
|
594
|
+
if (_frame_buffer_size != args._framebuf) {
|
595
|
+
_frame_buffer_size = args._framebuf;
|
596
|
+
_frame_buffer = (ASGCT_CallFrame*)realloc(_frame_buffer, _frame_buffer_size * sizeof(ASGCT_CallFrame));
|
597
|
+
if (_frame_buffer == NULL) {
|
598
|
+
_frame_buffer_size = 0;
|
599
|
+
return Error("Not enough memory to allocate frame buffer (try smaller framebuf)");
|
600
|
+
}
|
601
|
+
}
|
602
|
+
|
603
|
+
// (Re-)allocate calltrace buffers
|
604
|
+
if (_max_stack_depth != args._jstackdepth) {
|
605
|
+
_max_stack_depth = args._jstackdepth;
|
606
|
+
size_t buffer_size = (_max_stack_depth + MAX_NATIVE_FRAMES + RESERVED_FRAMES) * sizeof(CallTraceBuffer);
|
607
|
+
|
608
|
+
for (int i = 0; i < CONCURRENCY_LEVEL; i++) {
|
609
|
+
free(_calltrace_buffer[i]);
|
610
|
+
_calltrace_buffer[i] = (CallTraceBuffer*)malloc(buffer_size);
|
611
|
+
if (_calltrace_buffer[i] == NULL) {
|
612
|
+
_max_stack_depth = 0;
|
613
|
+
return Error("Not enough memory to allocate stack trace buffers (try smaller jstackdepth)");
|
614
|
+
}
|
615
|
+
}
|
616
|
+
}
|
617
|
+
|
618
|
+
_threads = args._threads && args._output != OUTPUT_JFR;
|
619
|
+
_sync_walk = args._sync_walk;
|
620
|
+
|
621
|
+
Symbols::parseLibraries(_native_libs, _native_lib_count, MAX_NATIVE_LIBS);
|
622
|
+
NativeCodeCache* libjvm = jvmLibrary();
|
623
|
+
if (libjvm == NULL) {
|
624
|
+
return Error("libjvm not found among loaded libraries");
|
625
|
+
}
|
626
|
+
VMStructs::init(libjvm);
|
627
|
+
initJvmtiFunctions(libjvm);
|
628
|
+
|
629
|
+
if (args._output == OUTPUT_JFR) {
|
630
|
+
Error error = _jfr.start(args._file);
|
631
|
+
if (error) {
|
632
|
+
return error;
|
633
|
+
}
|
634
|
+
}
|
635
|
+
|
636
|
+
_engine = selectEngine(args._event);
|
637
|
+
Error error = _engine->start(args);
|
638
|
+
if (error) {
|
639
|
+
_jfr.stop();
|
640
|
+
return error;
|
641
|
+
}
|
642
|
+
|
643
|
+
if (_threads) {
|
644
|
+
// Thread events might be already enabled by PerfEvents::start
|
645
|
+
switchThreadEvents(JVMTI_ENABLE);
|
646
|
+
}
|
647
|
+
|
648
|
+
bindNativeLibraryLoad(NativeLibraryLoadTrap);
|
649
|
+
|
650
|
+
_state = RUNNING;
|
651
|
+
_start_time = time(NULL);
|
652
|
+
return Error::OK;
|
653
|
+
}
|
654
|
+
|
655
|
+
Error Profiler::stop() {
|
656
|
+
MutexLocker ml(_state_lock);
|
657
|
+
if (_state != RUNNING) {
|
658
|
+
return Error("Profiler is not active");
|
659
|
+
}
|
660
|
+
|
661
|
+
_engine->stop();
|
662
|
+
|
663
|
+
// Acquire all spinlocks to avoid race with remaining signals
|
664
|
+
for (int i = 0; i < CONCURRENCY_LEVEL; i++) _locks[i].lock();
|
665
|
+
_jfr.stop();
|
666
|
+
for (int i = 0; i < CONCURRENCY_LEVEL; i++) _locks[i].unlock();
|
667
|
+
|
668
|
+
bindNativeLibraryLoad(_original_NativeLibrary_load);
|
669
|
+
|
670
|
+
switchThreadEvents(JVMTI_DISABLE);
|
671
|
+
updateAllThreadNames();
|
672
|
+
|
673
|
+
_state = IDLE;
|
674
|
+
return Error::OK;
|
675
|
+
}
|
676
|
+
|
677
|
+
void Profiler::switchThreadEvents(jvmtiEventMode mode) {
|
678
|
+
if (_thread_events_state != mode) {
|
679
|
+
jvmtiEnv* jvmti = VM::jvmti();
|
680
|
+
jvmti->SetEventNotificationMode(mode, JVMTI_EVENT_THREAD_START, NULL);
|
681
|
+
jvmti->SetEventNotificationMode(mode, JVMTI_EVENT_THREAD_END, NULL);
|
682
|
+
_thread_events_state = mode;
|
683
|
+
}
|
684
|
+
}
|
685
|
+
|
686
|
+
void Profiler::dumpSummary(std::ostream& out) {
|
687
|
+
char buf[256];
|
688
|
+
snprintf(buf, sizeof(buf),
|
689
|
+
"--- Execution profile ---\n"
|
690
|
+
"Total samples : %lld\n",
|
691
|
+
_total_samples);
|
692
|
+
out << buf;
|
693
|
+
|
694
|
+
double percent = 100.0 / _total_samples;
|
695
|
+
for (int i = 1; i < ASGCT_FAILURE_TYPES; i++) {
|
696
|
+
const char* err_string = asgctError(-i);
|
697
|
+
if (err_string != NULL && _failures[i] > 0) {
|
698
|
+
snprintf(buf, sizeof(buf), "%-20s: %lld (%.2f%%)\n", err_string, _failures[i], _failures[i] * percent);
|
699
|
+
out << buf;
|
700
|
+
}
|
701
|
+
}
|
702
|
+
out << std::endl;
|
703
|
+
|
704
|
+
if (_frame_buffer_overflow) {
|
705
|
+
out << "Frame buffer overflowed! Consider increasing its size." << std::endl;
|
706
|
+
} else {
|
707
|
+
double usage = 100.0 * _frame_buffer_index / _frame_buffer_size;
|
708
|
+
out << "Frame buffer usage : " << usage << "%" << std::endl;
|
709
|
+
}
|
710
|
+
out << std::endl;
|
711
|
+
}
|
712
|
+
|
713
|
+
/*
|
714
|
+
* Dump stacks in FlameGraph input format:
|
715
|
+
*
|
716
|
+
* <frame>;<frame>;...;<topmost frame> <count>
|
717
|
+
*/
|
718
|
+
void Profiler::dumpCollapsed(std::ostream& out, Arguments& args) {
|
719
|
+
MutexLocker ml(_state_lock);
|
720
|
+
if (_state != IDLE || _engine == NULL) return;
|
721
|
+
|
722
|
+
FrameName fn(args._style, _thread_names_lock, _thread_names);
|
723
|
+
u64 unknown = 0;
|
724
|
+
|
725
|
+
for (int i = 0; i < MAX_CALLTRACES; i++) {
|
726
|
+
CallTraceSample& trace = _traces[i];
|
727
|
+
if (trace._samples == 0) continue;
|
728
|
+
|
729
|
+
if (trace._num_frames == 0) {
|
730
|
+
unknown += (args._counter == COUNTER_SAMPLES ? trace._samples : trace._counter);
|
731
|
+
continue;
|
732
|
+
}
|
733
|
+
|
734
|
+
for (int j = trace._num_frames - 1; j >= 0; j--) {
|
735
|
+
const char* frame_name = fn.name(_frame_buffer[trace._start_frame + j]);
|
736
|
+
out << frame_name << (j == 0 ? ' ' : ';');
|
737
|
+
}
|
738
|
+
out << (args._counter == COUNTER_SAMPLES ? trace._samples : trace._counter) << "\n";
|
739
|
+
}
|
740
|
+
|
741
|
+
if (unknown != 0) {
|
742
|
+
out << "[frame_buffer_overflow] " << unknown << "\n";
|
743
|
+
}
|
744
|
+
}
|
745
|
+
|
746
|
+
void Profiler::dumpFlameGraph(std::ostream& out, Arguments& args, bool tree) {
|
747
|
+
MutexLocker ml(_state_lock);
|
748
|
+
if (_state != IDLE || _engine == NULL) return;
|
749
|
+
|
750
|
+
FlameGraph flamegraph(args._title, args._counter, args._width, args._height, args._minwidth, args._reverse);
|
751
|
+
FrameName fn(args._style, _thread_names_lock, _thread_names);
|
752
|
+
|
753
|
+
for (int i = 0; i < MAX_CALLTRACES; i++) {
|
754
|
+
CallTraceSample& trace = _traces[i];
|
755
|
+
if (trace._samples == 0) continue;
|
756
|
+
|
757
|
+
u64 samples = (args._counter == COUNTER_SAMPLES ? trace._samples : trace._counter);
|
758
|
+
|
759
|
+
Trie* f = flamegraph.root();
|
760
|
+
if (trace._num_frames == 0) {
|
761
|
+
f = f->addChild("[frame_buffer_overflow]", samples);
|
762
|
+
} else if (args._reverse) {
|
763
|
+
for (int j = 0; j < trace._num_frames; j++) {
|
764
|
+
const char* frame_name = fn.name(_frame_buffer[trace._start_frame + j]);
|
765
|
+
f = f->addChild(frame_name, samples);
|
766
|
+
}
|
767
|
+
} else {
|
768
|
+
for (int j = trace._num_frames - 1; j >= 0; j--) {
|
769
|
+
const char* frame_name = fn.name(_frame_buffer[trace._start_frame + j]);
|
770
|
+
f = f->addChild(frame_name, samples);
|
771
|
+
}
|
772
|
+
}
|
773
|
+
f->addLeaf(samples);
|
774
|
+
}
|
775
|
+
|
776
|
+
flamegraph.dump(out, tree);
|
777
|
+
}
|
778
|
+
|
779
|
+
void Profiler::dumpTraces(std::ostream& out, Arguments& args) {
|
780
|
+
MutexLocker ml(_state_lock);
|
781
|
+
if (_state != IDLE || _engine == NULL) return;
|
782
|
+
|
783
|
+
FrameName fn(args._style | STYLE_DOTTED, _thread_names_lock, _thread_names);
|
784
|
+
double percent = 100.0 / _total_counter;
|
785
|
+
char buf[1024];
|
786
|
+
|
787
|
+
CallTraceSample** traces = new CallTraceSample*[MAX_CALLTRACES];
|
788
|
+
for (int i = 0; i < MAX_CALLTRACES; i++) {
|
789
|
+
traces[i] = &_traces[i];
|
790
|
+
}
|
791
|
+
qsort(traces, MAX_CALLTRACES, sizeof(CallTraceSample*), CallTraceSample::comparator);
|
792
|
+
|
793
|
+
int max_traces = args._dump_traces < MAX_CALLTRACES ? args._dump_traces : MAX_CALLTRACES;
|
794
|
+
for (int i = 0; i < max_traces; i++) {
|
795
|
+
CallTraceSample* trace = traces[i];
|
796
|
+
if (trace->_samples == 0) break;
|
797
|
+
|
798
|
+
snprintf(buf, sizeof(buf), "--- %lld %s (%.2f%%), %lld sample%s\n",
|
799
|
+
trace->_counter, _engine->units(), trace->_counter * percent,
|
800
|
+
trace->_samples, trace->_samples == 1 ? "" : "s");
|
801
|
+
out << buf;
|
802
|
+
|
803
|
+
if (trace->_num_frames == 0) {
|
804
|
+
out << " [ 0] [frame_buffer_overflow]\n";
|
805
|
+
}
|
806
|
+
|
807
|
+
for (int j = 0; j < trace->_num_frames; j++) {
|
808
|
+
const char* frame_name = fn.name(_frame_buffer[trace->_start_frame + j]);
|
809
|
+
snprintf(buf, sizeof(buf), " [%2d] %s\n", j, frame_name);
|
810
|
+
out << buf;
|
811
|
+
}
|
812
|
+
out << "\n";
|
813
|
+
}
|
814
|
+
|
815
|
+
delete[] traces;
|
816
|
+
}
|
817
|
+
|
818
|
+
void Profiler::dumpFlat(std::ostream& out, Arguments& args) {
|
819
|
+
MutexLocker ml(_state_lock);
|
820
|
+
if (_state != IDLE || _engine == NULL) return;
|
821
|
+
|
822
|
+
FrameName fn(args._style | STYLE_DOTTED, _thread_names_lock, _thread_names);
|
823
|
+
double percent = 100.0 / _total_counter;
|
824
|
+
char buf[1024];
|
825
|
+
|
826
|
+
MethodSample** methods = new MethodSample*[MAX_CALLTRACES];
|
827
|
+
for (int i = 0; i < MAX_CALLTRACES; i++) {
|
828
|
+
methods[i] = &_methods[i];
|
829
|
+
}
|
830
|
+
qsort(methods, MAX_CALLTRACES, sizeof(MethodSample*), MethodSample::comparator);
|
831
|
+
|
832
|
+
snprintf(buf, sizeof(buf), "%12s percent samples top\n"
|
833
|
+
" ---------- ------- ------- ---\n", _engine->units());
|
834
|
+
out << buf;
|
835
|
+
|
836
|
+
int max_methods = args._dump_flat < MAX_CALLTRACES ? args._dump_flat : MAX_CALLTRACES;
|
837
|
+
for (int i = 0; i < max_methods; i++) {
|
838
|
+
MethodSample* method = methods[i];
|
839
|
+
if (method->_samples == 0) break;
|
840
|
+
|
841
|
+
const char* frame_name = fn.name(method->_method);
|
842
|
+
snprintf(buf, sizeof(buf), "%12lld %6.2f%% %7lld %s\n",
|
843
|
+
method->_counter, method->_counter * percent, method->_samples, frame_name);
|
844
|
+
out << buf;
|
845
|
+
}
|
846
|
+
|
847
|
+
delete[] methods;
|
848
|
+
}
|
849
|
+
|
850
|
+
void Profiler::runInternal(Arguments& args, std::ostream& out) {
|
851
|
+
switch (args._action) {
|
852
|
+
case ACTION_START:
|
853
|
+
case ACTION_RESUME: {
|
854
|
+
Error error = start(args, args._action == ACTION_START);
|
855
|
+
if (error) {
|
856
|
+
out << error.message() << std::endl;
|
857
|
+
} else {
|
858
|
+
out << "Started [" << args._event << "] profiling" << std::endl;
|
859
|
+
}
|
860
|
+
break;
|
861
|
+
}
|
862
|
+
case ACTION_STOP: {
|
863
|
+
Error error = stop();
|
864
|
+
if (error) {
|
865
|
+
out << error.message() << std::endl;
|
866
|
+
} else {
|
867
|
+
out << "Stopped profiling after " << uptime() << " seconds. No dump options specified" << std::endl;
|
868
|
+
}
|
869
|
+
break;
|
870
|
+
}
|
871
|
+
case ACTION_STATUS: {
|
872
|
+
MutexLocker ml(_state_lock);
|
873
|
+
if (_state == RUNNING) {
|
874
|
+
out << "[" << _engine->name() << "] profiling is running for " << uptime() << " seconds" << std::endl;
|
875
|
+
} else {
|
876
|
+
out << "Profiler is not active" << std::endl;
|
877
|
+
}
|
878
|
+
break;
|
879
|
+
}
|
880
|
+
case ACTION_LIST: {
|
881
|
+
out << "Basic events:" << std::endl;
|
882
|
+
out << " " << EVENT_CPU << std::endl;
|
883
|
+
out << " " << EVENT_ALLOC << std::endl;
|
884
|
+
out << " " << EVENT_LOCK << std::endl;
|
885
|
+
out << " " << EVENT_WALL << std::endl;
|
886
|
+
out << " " << EVENT_ITIMER << std::endl;
|
887
|
+
|
888
|
+
if (PerfEvents::supported()) {
|
889
|
+
out << "Perf events:" << std::endl;
|
890
|
+
// The first perf event is "cpu" which is already printed
|
891
|
+
for (int event_id = 1; ; event_id++) {
|
892
|
+
const char* event_name = PerfEvents::getEventName(event_id);
|
893
|
+
if (event_name == NULL) break;
|
894
|
+
out << " " << event_name << std::endl;
|
895
|
+
}
|
896
|
+
}
|
897
|
+
break;
|
898
|
+
}
|
899
|
+
case ACTION_VERSION:
|
900
|
+
out << FULL_VERSION_STRING;
|
901
|
+
break;
|
902
|
+
case ACTION_DUMP:
|
903
|
+
stop();
|
904
|
+
switch (args._output) {
|
905
|
+
case OUTPUT_COLLAPSED:
|
906
|
+
dumpCollapsed(out, args);
|
907
|
+
break;
|
908
|
+
case OUTPUT_FLAMEGRAPH:
|
909
|
+
dumpFlameGraph(out, args, false);
|
910
|
+
break;
|
911
|
+
case OUTPUT_TREE:
|
912
|
+
dumpFlameGraph(out, args, true);
|
913
|
+
break;
|
914
|
+
case OUTPUT_TEXT:
|
915
|
+
dumpSummary(out);
|
916
|
+
if (args._dump_traces > 0) dumpTraces(out, args);
|
917
|
+
if (args._dump_flat > 0) dumpFlat(out, args);
|
918
|
+
break;
|
919
|
+
default:
|
920
|
+
break;
|
921
|
+
}
|
922
|
+
break;
|
923
|
+
default:
|
924
|
+
break;
|
925
|
+
}
|
926
|
+
}
|
927
|
+
|
928
|
+
void Profiler::run(Arguments& args) {
|
929
|
+
if (args._file == NULL || args._output == OUTPUT_JFR) {
|
930
|
+
runInternal(args, std::cout);
|
931
|
+
} else {
|
932
|
+
std::ofstream out(args._file, std::ios::out | std::ios::trunc);
|
933
|
+
if (out.is_open()) {
|
934
|
+
runInternal(args, out);
|
935
|
+
out.close();
|
936
|
+
} else {
|
937
|
+
std::cerr << "Could not open " << args._file << std::endl;
|
938
|
+
}
|
939
|
+
}
|
940
|
+
}
|
941
|
+
|
942
|
+
void Profiler::shutdown(Arguments& args) {
|
943
|
+
MutexLocker ml(_state_lock);
|
944
|
+
|
945
|
+
// The last chance to dump profile before VM terminates
|
946
|
+
if (_state == RUNNING && args._output != OUTPUT_NONE) {
|
947
|
+
args._action = ACTION_DUMP;
|
948
|
+
run(args);
|
949
|
+
}
|
950
|
+
|
951
|
+
_state = TERMINATED;
|
952
|
+
}
|