jruby-async-profiler 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +8 -0
- data/.gitmodules +3 -0
- data/Gemfile +4 -0
- data/README.md +35 -0
- data/Rakefile +2 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/ext/Rakefile +6 -0
- data/ext/async-profiler/.gitattributes +1 -0
- data/ext/async-profiler/.gitignore +6 -0
- data/ext/async-profiler/.travis.yml +11 -0
- data/ext/async-profiler/CHANGELOG.md +107 -0
- data/ext/async-profiler/JavaHome.class +0 -0
- data/ext/async-profiler/LICENSE +201 -0
- data/ext/async-profiler/Makefile +66 -0
- data/ext/async-profiler/README.md +487 -0
- data/ext/async-profiler/demo/SwingSet2.svg +2247 -0
- data/ext/async-profiler/docs/cddl1.txt +358 -0
- data/ext/async-profiler/profiler.sh +240 -0
- data/ext/async-profiler/src/allocTracer.cpp +155 -0
- data/ext/async-profiler/src/allocTracer.h +74 -0
- data/ext/async-profiler/src/arch.h +69 -0
- data/ext/async-profiler/src/arguments.cpp +265 -0
- data/ext/async-profiler/src/arguments.h +152 -0
- data/ext/async-profiler/src/codeCache.cpp +128 -0
- data/ext/async-profiler/src/codeCache.h +99 -0
- data/ext/async-profiler/src/engine.cpp +50 -0
- data/ext/async-profiler/src/engine.h +38 -0
- data/ext/async-profiler/src/flameGraph.cpp +770 -0
- data/ext/async-profiler/src/flameGraph.h +118 -0
- data/ext/async-profiler/src/flightRecorder.cpp +727 -0
- data/ext/async-profiler/src/flightRecorder.h +39 -0
- data/ext/async-profiler/src/frameName.cpp +189 -0
- data/ext/async-profiler/src/frameName.h +56 -0
- data/ext/async-profiler/src/itimer.cpp +49 -0
- data/ext/async-profiler/src/itimer.h +43 -0
- data/ext/async-profiler/src/jattach/jattach.c +437 -0
- data/ext/async-profiler/src/java/one/profiler/AsyncProfiler.java +160 -0
- data/ext/async-profiler/src/java/one/profiler/AsyncProfilerMXBean.java +43 -0
- data/ext/async-profiler/src/java/one/profiler/Counter.java +25 -0
- data/ext/async-profiler/src/java/one/profiler/Events.java +28 -0
- data/ext/async-profiler/src/javaApi.cpp +124 -0
- data/ext/async-profiler/src/lockTracer.cpp +161 -0
- data/ext/async-profiler/src/lockTracer.h +55 -0
- data/ext/async-profiler/src/mutex.cpp +33 -0
- data/ext/async-profiler/src/mutex.h +49 -0
- data/ext/async-profiler/src/os.h +45 -0
- data/ext/async-profiler/src/os_linux.cpp +129 -0
- data/ext/async-profiler/src/os_macos.cpp +115 -0
- data/ext/async-profiler/src/perfEvents.h +60 -0
- data/ext/async-profiler/src/perfEvents_linux.cpp +550 -0
- data/ext/async-profiler/src/perfEvents_macos.cpp +64 -0
- data/ext/async-profiler/src/profiler.cpp +952 -0
- data/ext/async-profiler/src/profiler.h +238 -0
- data/ext/async-profiler/src/spinLock.h +66 -0
- data/ext/async-profiler/src/stackFrame.h +57 -0
- data/ext/async-profiler/src/stackFrame_aarch64.cpp +75 -0
- data/ext/async-profiler/src/stackFrame_arm.cpp +58 -0
- data/ext/async-profiler/src/stackFrame_i386.cpp +82 -0
- data/ext/async-profiler/src/stackFrame_x64.cpp +113 -0
- data/ext/async-profiler/src/symbols.h +37 -0
- data/ext/async-profiler/src/symbols_linux.cpp +354 -0
- data/ext/async-profiler/src/symbols_macos.cpp +156 -0
- data/ext/async-profiler/src/vmEntry.cpp +173 -0
- data/ext/async-profiler/src/vmEntry.h +105 -0
- data/ext/async-profiler/src/vmStructs.cpp +104 -0
- data/ext/async-profiler/src/vmStructs.h +112 -0
- data/ext/async-profiler/src/wallClock.cpp +96 -0
- data/ext/async-profiler/src/wallClock.h +56 -0
- data/ext/async-profiler/test/AllocatingTarget.java +26 -0
- data/ext/async-profiler/test/LoadLibraryTest.java +21 -0
- data/ext/async-profiler/test/Target.java +31 -0
- data/ext/async-profiler/test/ThreadsTarget.java +35 -0
- data/ext/async-profiler/test/alloc-smoke-test.sh +36 -0
- data/ext/async-profiler/test/load-library-test.sh +35 -0
- data/ext/async-profiler/test/smoke-test.sh +37 -0
- data/ext/async-profiler/test/thread-smoke-test.sh +32 -0
- data/jruby-async-profiler.gemspec +32 -0
- data/lib/jruby/async/profiler.rb +10 -0
- data/lib/jruby/async/profiler/version.rb +7 -0
- metadata +155 -0
@@ -0,0 +1,64 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright 2017 Andrei Pangin
|
3
|
+
*
|
4
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
* you may not use this file except in compliance with the License.
|
6
|
+
* You may obtain a copy of the License at
|
7
|
+
*
|
8
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
*
|
10
|
+
* Unless required by applicable law or agreed to in writing, software
|
11
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
* See the License for the specific language governing permissions and
|
14
|
+
* limitations under the License.
|
15
|
+
*/
|
16
|
+
|
17
|
+
#ifdef __APPLE__
|
18
|
+
|
19
|
+
#include "perfEvents.h"
|
20
|
+
|
21
|
+
|
22
|
+
int PerfEvents::_max_events;
|
23
|
+
PerfEvent* PerfEvents::_events;
|
24
|
+
PerfEventType* PerfEvents::_event_type;
|
25
|
+
long PerfEvents::_interval;
|
26
|
+
Ring PerfEvents::_ring;
|
27
|
+
bool PerfEvents::_print_extended_warning;
|
28
|
+
|
29
|
+
|
30
|
+
bool PerfEvents::createForThread(int tid) { return false; }
|
31
|
+
void PerfEvents::destroyForThread(int tid) {}
|
32
|
+
void PerfEvents::signalHandler(int signo, siginfo_t* siginfo, void* ucontext) {}
|
33
|
+
|
34
|
+
const char* PerfEvents::units() {
|
35
|
+
return "ns";
|
36
|
+
}
|
37
|
+
|
38
|
+
Error PerfEvents::start(Arguments& args) {
|
39
|
+
return Error("PerfEvents are unsupported on macOS");
|
40
|
+
}
|
41
|
+
|
42
|
+
void PerfEvents::stop() {
|
43
|
+
}
|
44
|
+
|
45
|
+
void PerfEvents::onThreadStart() {
|
46
|
+
}
|
47
|
+
|
48
|
+
void PerfEvents::onThreadEnd() {
|
49
|
+
}
|
50
|
+
|
51
|
+
int PerfEvents::getNativeTrace(void* ucontext, int tid, const void** callchain, int max_depth,
|
52
|
+
const void* jit_min_address, const void* jit_max_address) {
|
53
|
+
return 0;
|
54
|
+
}
|
55
|
+
|
56
|
+
bool PerfEvents::supported() {
|
57
|
+
return false;
|
58
|
+
}
|
59
|
+
|
60
|
+
const char* PerfEvents::getEventName(int event_id) {
|
61
|
+
return NULL;
|
62
|
+
}
|
63
|
+
|
64
|
+
#endif // __APPLE__
|
@@ -0,0 +1,952 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright 2016 Andrei Pangin
|
3
|
+
*
|
4
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
* you may not use this file except in compliance with the License.
|
6
|
+
* You may obtain a copy of the License at
|
7
|
+
*
|
8
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
*
|
10
|
+
* Unless required by applicable law or agreed to in writing, software
|
11
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
* See the License for the specific language governing permissions and
|
14
|
+
* limitations under the License.
|
15
|
+
*/
|
16
|
+
|
17
|
+
#include <fstream>
|
18
|
+
#include <dlfcn.h>
|
19
|
+
#include <unistd.h>
|
20
|
+
#include <stdint.h>
|
21
|
+
#include <stdio.h>
|
22
|
+
#include <stdlib.h>
|
23
|
+
#include <string.h>
|
24
|
+
#include <sys/param.h>
|
25
|
+
#include "profiler.h"
|
26
|
+
#include "perfEvents.h"
|
27
|
+
#include "allocTracer.h"
|
28
|
+
#include "lockTracer.h"
|
29
|
+
#include "wallClock.h"
|
30
|
+
#include "itimer.h"
|
31
|
+
#include "flameGraph.h"
|
32
|
+
#include "flightRecorder.h"
|
33
|
+
#include "frameName.h"
|
34
|
+
#include "os.h"
|
35
|
+
#include "stackFrame.h"
|
36
|
+
#include "symbols.h"
|
37
|
+
#include "vmStructs.h"
|
38
|
+
|
39
|
+
|
40
|
+
Profiler Profiler::_instance;
|
41
|
+
|
42
|
+
static PerfEvents perf_events;
|
43
|
+
static AllocTracer alloc_tracer;
|
44
|
+
static LockTracer lock_tracer;
|
45
|
+
static WallClock wall_clock;
|
46
|
+
static ITimer itimer;
|
47
|
+
|
48
|
+
|
49
|
+
u64 Profiler::hashCallTrace(int num_frames, ASGCT_CallFrame* frames) {
|
50
|
+
const u64 M = 0xc6a4a7935bd1e995ULL;
|
51
|
+
const int R = 47;
|
52
|
+
|
53
|
+
u64 h = num_frames * M;
|
54
|
+
|
55
|
+
for (int i = 0; i < num_frames; i++) {
|
56
|
+
u64 k = (u64)frames[i].method_id;
|
57
|
+
k *= M;
|
58
|
+
k ^= k >> R;
|
59
|
+
k *= M;
|
60
|
+
h ^= k;
|
61
|
+
h *= M;
|
62
|
+
}
|
63
|
+
|
64
|
+
h ^= h >> R;
|
65
|
+
h *= M;
|
66
|
+
h ^= h >> R;
|
67
|
+
|
68
|
+
return h;
|
69
|
+
}
|
70
|
+
|
71
|
+
int Profiler::storeCallTrace(int num_frames, ASGCT_CallFrame* frames, u64 counter) {
|
72
|
+
u64 hash = hashCallTrace(num_frames, frames);
|
73
|
+
int bucket = (int)(hash % MAX_CALLTRACES);
|
74
|
+
int i = bucket;
|
75
|
+
|
76
|
+
while (_hashes[i] != hash) {
|
77
|
+
if (_hashes[i] == 0) {
|
78
|
+
if (__sync_bool_compare_and_swap(&_hashes[i], 0, hash)) {
|
79
|
+
copyToFrameBuffer(num_frames, frames, &_traces[i]);
|
80
|
+
break;
|
81
|
+
}
|
82
|
+
continue;
|
83
|
+
}
|
84
|
+
|
85
|
+
if (++i == MAX_CALLTRACES) i = 0; // move to next slot
|
86
|
+
if (i == bucket) return 0; // the table is full
|
87
|
+
}
|
88
|
+
|
89
|
+
// CallTrace hash found => atomically increment counter
|
90
|
+
atomicInc(_traces[i]._samples);
|
91
|
+
atomicInc(_traces[i]._counter, counter);
|
92
|
+
return i;
|
93
|
+
}
|
94
|
+
|
95
|
+
void Profiler::copyToFrameBuffer(int num_frames, ASGCT_CallFrame* frames, CallTraceSample* trace) {
|
96
|
+
// Atomically reserve space in frame buffer
|
97
|
+
int start_frame;
|
98
|
+
do {
|
99
|
+
start_frame = _frame_buffer_index;
|
100
|
+
if (start_frame + num_frames > _frame_buffer_size) {
|
101
|
+
_frame_buffer_overflow = true; // not enough space to store full trace
|
102
|
+
return;
|
103
|
+
}
|
104
|
+
} while (!__sync_bool_compare_and_swap(&_frame_buffer_index, start_frame, start_frame + num_frames));
|
105
|
+
|
106
|
+
trace->_start_frame = start_frame;
|
107
|
+
trace->_num_frames = num_frames;
|
108
|
+
|
109
|
+
for (int i = 0; i < num_frames; i++) {
|
110
|
+
_frame_buffer[start_frame++] = frames[i];
|
111
|
+
}
|
112
|
+
}
|
113
|
+
|
114
|
+
u64 Profiler::hashMethod(jmethodID method) {
|
115
|
+
const u64 M = 0xc6a4a7935bd1e995ULL;
|
116
|
+
const int R = 17;
|
117
|
+
|
118
|
+
u64 h = (u64)method;
|
119
|
+
|
120
|
+
h ^= h >> R;
|
121
|
+
h *= M;
|
122
|
+
h ^= h >> R;
|
123
|
+
|
124
|
+
return h;
|
125
|
+
}
|
126
|
+
|
127
|
+
void Profiler::storeMethod(jmethodID method, jint bci, u64 counter) {
|
128
|
+
u64 hash = hashMethod(method);
|
129
|
+
int bucket = (int)(hash % MAX_CALLTRACES);
|
130
|
+
int i = bucket;
|
131
|
+
|
132
|
+
while (_methods[i]._method.method_id != method) {
|
133
|
+
if (_methods[i]._method.method_id == NULL) {
|
134
|
+
if (__sync_bool_compare_and_swap(&_methods[i]._method.method_id, NULL, method)) {
|
135
|
+
_methods[i]._method.bci = bci;
|
136
|
+
break;
|
137
|
+
}
|
138
|
+
continue;
|
139
|
+
}
|
140
|
+
|
141
|
+
if (++i == MAX_CALLTRACES) i = 0; // move to next slot
|
142
|
+
if (i == bucket) return; // the table is full
|
143
|
+
}
|
144
|
+
|
145
|
+
// Method found => atomically increment counter
|
146
|
+
atomicInc(_methods[i]._samples);
|
147
|
+
atomicInc(_methods[i]._counter, counter);
|
148
|
+
}
|
149
|
+
|
150
|
+
void Profiler::addJavaMethod(const void* address, int length, jmethodID method) {
|
151
|
+
_jit_lock.lock();
|
152
|
+
_java_methods.add(address, length, method);
|
153
|
+
updateJitRange(address, (const char*)address + length);
|
154
|
+
_jit_lock.unlock();
|
155
|
+
}
|
156
|
+
|
157
|
+
void Profiler::removeJavaMethod(const void* address, jmethodID method) {
|
158
|
+
_jit_lock.lock();
|
159
|
+
_java_methods.remove(address, method);
|
160
|
+
_jit_lock.unlock();
|
161
|
+
}
|
162
|
+
|
163
|
+
void Profiler::addRuntimeStub(const void* address, int length, const char* name) {
|
164
|
+
_jit_lock.lock();
|
165
|
+
_runtime_stubs.add(address, length, name);
|
166
|
+
updateJitRange(address, (const char*)address + length);
|
167
|
+
_jit_lock.unlock();
|
168
|
+
}
|
169
|
+
|
170
|
+
void Profiler::updateJitRange(const void* min_address, const void* max_address) {
|
171
|
+
if (min_address < _jit_min_address) _jit_min_address = min_address;
|
172
|
+
if (max_address > _jit_max_address) _jit_max_address = max_address;
|
173
|
+
}
|
174
|
+
|
175
|
+
const char* Profiler::asgctError(int code) {
|
176
|
+
switch (code) {
|
177
|
+
case ticks_no_Java_frame:
|
178
|
+
case ticks_unknown_not_Java:
|
179
|
+
case ticks_not_walkable_not_Java:
|
180
|
+
// Not in Java context at all; this is not an error
|
181
|
+
return NULL;
|
182
|
+
case ticks_GC_active:
|
183
|
+
return "GC_active";
|
184
|
+
case ticks_unknown_Java:
|
185
|
+
return "unknown_Java";
|
186
|
+
case ticks_not_walkable_Java:
|
187
|
+
return "not_walkable_Java";
|
188
|
+
case ticks_thread_exit:
|
189
|
+
return "thread_exit";
|
190
|
+
case ticks_deopt:
|
191
|
+
return "deoptimization";
|
192
|
+
case ticks_safepoint:
|
193
|
+
return "safepoint";
|
194
|
+
case ticks_skipped:
|
195
|
+
return "skipped";
|
196
|
+
default:
|
197
|
+
// Should not happen
|
198
|
+
return "unexpected_state";
|
199
|
+
}
|
200
|
+
}
|
201
|
+
|
202
|
+
NativeCodeCache* Profiler::jvmLibrary() {
|
203
|
+
const void* asyncGetCallTraceAddr = (const void*)VM::_asyncGetCallTrace;
|
204
|
+
const int native_lib_count = _native_lib_count;
|
205
|
+
for (int i = 0; i < native_lib_count; i++) {
|
206
|
+
if (_native_libs[i]->contains(asyncGetCallTraceAddr)) {
|
207
|
+
return _native_libs[i];
|
208
|
+
}
|
209
|
+
}
|
210
|
+
return NULL;
|
211
|
+
}
|
212
|
+
|
213
|
+
const void* Profiler::findSymbol(const char* name) {
|
214
|
+
const int native_lib_count = _native_lib_count;
|
215
|
+
for (int i = 0; i < native_lib_count; i++) {
|
216
|
+
const void* address = _native_libs[i]->findSymbol(name);
|
217
|
+
if (address != NULL) {
|
218
|
+
return address;
|
219
|
+
}
|
220
|
+
}
|
221
|
+
return NULL;
|
222
|
+
}
|
223
|
+
|
224
|
+
const char* Profiler::findNativeMethod(const void* address) {
|
225
|
+
const int native_lib_count = _native_lib_count;
|
226
|
+
for (int i = 0; i < native_lib_count; i++) {
|
227
|
+
if (_native_libs[i]->contains(address)) {
|
228
|
+
return _native_libs[i]->binarySearch(address);
|
229
|
+
}
|
230
|
+
}
|
231
|
+
return NULL;
|
232
|
+
}
|
233
|
+
|
234
|
+
int Profiler::getNativeTrace(void* ucontext, ASGCT_CallFrame* frames, int tid, bool* stopped_at_java_frame) {
|
235
|
+
const void* native_callchain[MAX_NATIVE_FRAMES];
|
236
|
+
int native_frames = _engine->getNativeTrace(ucontext, tid, native_callchain, MAX_NATIVE_FRAMES,
|
237
|
+
_jit_min_address, _jit_max_address);
|
238
|
+
|
239
|
+
*stopped_at_java_frame = false;
|
240
|
+
if (native_frames > 0) {
|
241
|
+
const void* last_pc = native_callchain[native_frames - 1];
|
242
|
+
if (last_pc >= _jit_min_address && last_pc < _jit_max_address) {
|
243
|
+
*stopped_at_java_frame = true;
|
244
|
+
native_frames--;
|
245
|
+
}
|
246
|
+
}
|
247
|
+
|
248
|
+
for (int i = 0; i < native_frames; i++) {
|
249
|
+
frames[i].bci = BCI_NATIVE_FRAME;
|
250
|
+
frames[i].method_id = (jmethodID)findNativeMethod(native_callchain[i]);
|
251
|
+
}
|
252
|
+
|
253
|
+
return native_frames;
|
254
|
+
}
|
255
|
+
|
256
|
+
int Profiler::getJavaTraceAsync(void* ucontext, ASGCT_CallFrame* frames, int max_depth) {
|
257
|
+
JNIEnv* jni = VM::jni();
|
258
|
+
if (jni == NULL) {
|
259
|
+
// Not a Java thread
|
260
|
+
return 0;
|
261
|
+
}
|
262
|
+
|
263
|
+
ASGCT_CallTrace trace = {jni, 0, frames};
|
264
|
+
VM::_asyncGetCallTrace(&trace, max_depth, ucontext);
|
265
|
+
|
266
|
+
#ifndef SAFE_MODE
|
267
|
+
if (trace.num_frames == ticks_unknown_Java) {
|
268
|
+
// If current Java stack is not walkable (e.g. the top frame is not fully constructed),
|
269
|
+
// try to manually pop the top frame off, hoping that the previous frame is walkable.
|
270
|
+
// This is a temporary workaround for AsyncGetCallTrace issues,
|
271
|
+
// see https://bugs.openjdk.java.net/browse/JDK-8178287
|
272
|
+
StackFrame top_frame(ucontext);
|
273
|
+
uintptr_t pc = top_frame.pc(),
|
274
|
+
sp = top_frame.sp(),
|
275
|
+
fp = top_frame.fp();
|
276
|
+
|
277
|
+
// Guess top method by PC and insert it manually into the call trace
|
278
|
+
bool is_entry_frame = false;
|
279
|
+
if (fillTopFrame((const void*)pc, trace.frames)) {
|
280
|
+
is_entry_frame = trace.frames->bci == BCI_NATIVE_FRAME &&
|
281
|
+
strcmp((const char*)trace.frames->method_id, "call_stub") == 0;
|
282
|
+
trace.frames++;
|
283
|
+
max_depth--;
|
284
|
+
}
|
285
|
+
|
286
|
+
if (top_frame.pop(is_entry_frame)) {
|
287
|
+
// Retry with the fixed context, but only if PC looks reasonable,
|
288
|
+
// otherwise AsyncGetCallTrace may crash
|
289
|
+
if (addressInCode((const void*)top_frame.pc())) {
|
290
|
+
VM::_asyncGetCallTrace(&trace, max_depth, ucontext);
|
291
|
+
}
|
292
|
+
top_frame.restore(pc, sp, fp);
|
293
|
+
|
294
|
+
if (trace.num_frames > 0) {
|
295
|
+
return trace.num_frames + (trace.frames - frames);
|
296
|
+
}
|
297
|
+
|
298
|
+
// Restore previous context
|
299
|
+
trace.num_frames = ticks_unknown_Java;
|
300
|
+
}
|
301
|
+
} else if (trace.num_frames == ticks_GC_active && VM::is_hotspot() && _JvmtiEnv_GetStackTrace != NULL) {
|
302
|
+
// While GC is running Java threads are known to be at safepoint
|
303
|
+
return getJavaTraceJvmti((jvmtiFrameInfo*)frames, frames, max_depth);
|
304
|
+
}
|
305
|
+
#endif // SAFE_MODE
|
306
|
+
|
307
|
+
if (trace.num_frames > 0) {
|
308
|
+
return trace.num_frames;
|
309
|
+
}
|
310
|
+
|
311
|
+
const char* err_string = asgctError(trace.num_frames);
|
312
|
+
if (err_string == NULL) {
|
313
|
+
// No Java stack, because thread is not in Java context
|
314
|
+
return 0;
|
315
|
+
}
|
316
|
+
|
317
|
+
atomicInc(_failures[-trace.num_frames]);
|
318
|
+
frames[0].bci = BCI_ERROR;
|
319
|
+
frames[0].method_id = (jmethodID)err_string;
|
320
|
+
return 1;
|
321
|
+
}
|
322
|
+
|
323
|
+
int Profiler::getJavaTraceJvmti(jvmtiFrameInfo* jvmti_frames, ASGCT_CallFrame* frames, int max_depth) {
|
324
|
+
// We cannot call pure JVM TI here, because it assumes _thread_in_native state,
|
325
|
+
// but allocation events happen in _thread_in_vm state,
|
326
|
+
// see https://github.com/jvm-profiling-tools/async-profiler/issues/64
|
327
|
+
void* thread = _ThreadLocalStorage_thread();
|
328
|
+
int num_frames;
|
329
|
+
if (_JvmtiEnv_GetStackTrace(NULL, thread, 0, max_depth, jvmti_frames, &num_frames) == 0 && num_frames > 0) {
|
330
|
+
// Profiler expects stack trace in AsyncGetCallTrace format; convert it now
|
331
|
+
for (int i = 0; i < num_frames; i++) {
|
332
|
+
frames[i].method_id = jvmti_frames[i].method;
|
333
|
+
frames[i].bci = 0;
|
334
|
+
}
|
335
|
+
return num_frames;
|
336
|
+
}
|
337
|
+
|
338
|
+
return 0;
|
339
|
+
}
|
340
|
+
|
341
|
+
int Profiler::makeEventFrame(ASGCT_CallFrame* frames, jint event_type, jmethodID event) {
|
342
|
+
frames[0].bci = event_type;
|
343
|
+
frames[0].method_id = event;
|
344
|
+
return 1;
|
345
|
+
}
|
346
|
+
|
347
|
+
bool Profiler::fillTopFrame(const void* pc, ASGCT_CallFrame* frame) {
|
348
|
+
jmethodID method = NULL;
|
349
|
+
_jit_lock.lockShared();
|
350
|
+
|
351
|
+
// Check if PC lies within JVM's compiled code cache
|
352
|
+
if (pc >= _jit_min_address && pc < _jit_max_address) {
|
353
|
+
if ((method = _java_methods.find(pc)) != NULL) {
|
354
|
+
// PC belong to a JIT compiled method
|
355
|
+
frame->bci = 0;
|
356
|
+
frame->method_id = method;
|
357
|
+
} else if ((method = _runtime_stubs.find(pc)) != NULL) {
|
358
|
+
// PC belongs to a VM runtime stub
|
359
|
+
frame->bci = BCI_NATIVE_FRAME;
|
360
|
+
frame->method_id = method;
|
361
|
+
}
|
362
|
+
}
|
363
|
+
|
364
|
+
_jit_lock.unlockShared();
|
365
|
+
return method != NULL;
|
366
|
+
}
|
367
|
+
|
368
|
+
bool Profiler::addressInCode(const void* pc) {
|
369
|
+
// 1. Check if PC lies within JVM's compiled code cache
|
370
|
+
// Address in CodeCache is executable if it belongs to a Java method or a runtime stub
|
371
|
+
if (pc >= _jit_min_address && pc < _jit_max_address) {
|
372
|
+
_jit_lock.lockShared();
|
373
|
+
bool valid = _java_methods.find(pc) != NULL || _runtime_stubs.find(pc) != NULL;
|
374
|
+
_jit_lock.unlockShared();
|
375
|
+
return valid;
|
376
|
+
}
|
377
|
+
|
378
|
+
// 2. Check if PC belongs to executable code of shared libraries
|
379
|
+
const int native_lib_count = _native_lib_count;
|
380
|
+
for (int i = 0; i < native_lib_count; i++) {
|
381
|
+
if (_native_libs[i]->contains(pc)) {
|
382
|
+
return true;
|
383
|
+
}
|
384
|
+
}
|
385
|
+
|
386
|
+
// This can be some other dynamically generated code, but we don't know it. Better stay safe.
|
387
|
+
return false;
|
388
|
+
}
|
389
|
+
|
390
|
+
void Profiler::recordSample(void* ucontext, u64 counter, jint event_type, jmethodID event) {
|
391
|
+
int tid = OS::threadId();
|
392
|
+
|
393
|
+
u64 lock_index = atomicInc(_total_samples) % CONCURRENCY_LEVEL;
|
394
|
+
if (!_locks[lock_index].tryLock()) {
|
395
|
+
// Too many concurrent signals already
|
396
|
+
atomicInc(_failures[-ticks_skipped]);
|
397
|
+
|
398
|
+
if (event_type == 0) {
|
399
|
+
// Need to reset PerfEvents ring buffer, even though we discard the collected trace
|
400
|
+
_engine->getNativeTrace(ucontext, tid, NULL, 0, _jit_min_address, _jit_max_address);
|
401
|
+
}
|
402
|
+
return;
|
403
|
+
}
|
404
|
+
|
405
|
+
atomicInc(_total_counter, counter);
|
406
|
+
|
407
|
+
ASGCT_CallFrame* frames = _calltrace_buffer[lock_index]->_asgct_frames;
|
408
|
+
bool need_java_trace = true;
|
409
|
+
|
410
|
+
int num_frames = 0;
|
411
|
+
if (event_type == 0) {
|
412
|
+
num_frames = getNativeTrace(ucontext, frames, tid, &need_java_trace);
|
413
|
+
} else if (event != NULL) {
|
414
|
+
num_frames = makeEventFrame(frames, event_type, event);
|
415
|
+
}
|
416
|
+
|
417
|
+
if ((_sync_walk || event_type != 0) && _JvmtiEnv_GetStackTrace != NULL) {
|
418
|
+
// Events like object allocation happen at known places where it is safe to call JVM TI
|
419
|
+
jvmtiFrameInfo* jvmti_frames = _calltrace_buffer[lock_index]->_jvmti_frames;
|
420
|
+
num_frames += getJavaTraceJvmti(jvmti_frames + num_frames, frames + num_frames, _max_stack_depth);
|
421
|
+
} else if (OS::isSignalSafeTLS() || need_java_trace) {
|
422
|
+
num_frames += getJavaTraceAsync(ucontext, frames + num_frames, _max_stack_depth);
|
423
|
+
}
|
424
|
+
|
425
|
+
if (num_frames == 0 || (num_frames == 1 && event != NULL)) {
|
426
|
+
num_frames += makeEventFrame(frames + num_frames, BCI_ERROR, (jmethodID)"not_walkable");
|
427
|
+
}
|
428
|
+
|
429
|
+
if (_threads) {
|
430
|
+
num_frames += makeEventFrame(frames + num_frames, BCI_THREAD_ID, (jmethodID)(uintptr_t)tid);
|
431
|
+
}
|
432
|
+
|
433
|
+
storeMethod(frames[0].method_id, frames[0].bci, counter);
|
434
|
+
int call_trace_id = storeCallTrace(num_frames, frames, counter);
|
435
|
+
_jfr.recordExecutionSample(lock_index, tid, call_trace_id);
|
436
|
+
|
437
|
+
_locks[lock_index].unlock();
|
438
|
+
}
|
439
|
+
|
440
|
+
jboolean JNICALL Profiler::NativeLibraryLoadTrap(JNIEnv* env, jobject self, jstring name, jboolean builtin) {
|
441
|
+
jboolean result = _instance._original_NativeLibrary_load(env, self, name, builtin);
|
442
|
+
Symbols::parseLibraries(_instance._native_libs, _instance._native_lib_count, MAX_NATIVE_LIBS);
|
443
|
+
return result;
|
444
|
+
}
|
445
|
+
|
446
|
+
void Profiler::bindNativeLibraryLoad(NativeLoadLibraryFunc entry) {
|
447
|
+
JNIEnv* env = VM::jni();
|
448
|
+
jclass NativeLibrary = env->FindClass("java/lang/ClassLoader$NativeLibrary");
|
449
|
+
|
450
|
+
if (NativeLibrary != NULL) {
|
451
|
+
// Find JNI entry for NativeLibrary.load() method
|
452
|
+
if (_original_NativeLibrary_load == NULL) {
|
453
|
+
if (env->GetMethodID(NativeLibrary, "load0", "(Ljava/lang/String;Z)Z") != NULL) {
|
454
|
+
// JDK 9+
|
455
|
+
_load_method.name = (char*)"load0";
|
456
|
+
_load_method.signature = (char*)"(Ljava/lang/String;Z)Z";
|
457
|
+
} else if (env->GetMethodID(NativeLibrary, "load", "(Ljava/lang/String;Z)V") != NULL) {
|
458
|
+
// JDK 8
|
459
|
+
_load_method.name = (char*)"load";
|
460
|
+
_load_method.signature = (char*)"(Ljava/lang/String;Z)V";
|
461
|
+
} else {
|
462
|
+
// JDK 7
|
463
|
+
_load_method.name = (char*)"load";
|
464
|
+
_load_method.signature = (char*)"(Ljava/lang/String;)V";
|
465
|
+
}
|
466
|
+
|
467
|
+
char jni_name[64];
|
468
|
+
strcpy(jni_name, "Java_java_lang_ClassLoader_00024NativeLibrary_");
|
469
|
+
strcat(jni_name, _load_method.name);
|
470
|
+
_original_NativeLibrary_load = (NativeLoadLibraryFunc)dlsym(VM::_libjava, jni_name);
|
471
|
+
}
|
472
|
+
|
473
|
+
// Change function pointer for the native method
|
474
|
+
if (_original_NativeLibrary_load != NULL) {
|
475
|
+
_load_method.fnPtr = (void*)entry;
|
476
|
+
env->RegisterNatives(NativeLibrary, &_load_method, 1);
|
477
|
+
}
|
478
|
+
}
|
479
|
+
|
480
|
+
env->ExceptionClear();
|
481
|
+
}
|
482
|
+
|
483
|
+
void Profiler::initJvmtiFunctions(NativeCodeCache* libjvm) {
|
484
|
+
if (_JvmtiEnv_GetStackTrace == NULL) {
|
485
|
+
// Find ThreadLocalStorage::thread() if exists
|
486
|
+
if (_ThreadLocalStorage_thread == NULL) {
|
487
|
+
_ThreadLocalStorage_thread = (void* (*)()) libjvm->findSymbol("_ZN18ThreadLocalStorage6threadEv");
|
488
|
+
}
|
489
|
+
// Fallback to ThreadLocalStorage::get_thread_slow()
|
490
|
+
if (_ThreadLocalStorage_thread == NULL) {
|
491
|
+
_ThreadLocalStorage_thread = (void* (*)()) libjvm->findSymbol("_ZN18ThreadLocalStorage15get_thread_slowEv");
|
492
|
+
}
|
493
|
+
// Fallback to Thread::current(), e.g. on Zing
|
494
|
+
if (_ThreadLocalStorage_thread == NULL) {
|
495
|
+
_ThreadLocalStorage_thread = (void* (*)()) libjvm->findSymbol("_ZN6Thread7currentEv");
|
496
|
+
}
|
497
|
+
// JvmtiEnv::GetStackTrace(JavaThread* java_thread, jint start_depth, jint max_frame_count, jvmtiFrameInfo* frame_buffer, jint* count_ptr)
|
498
|
+
if (_ThreadLocalStorage_thread != NULL) {
|
499
|
+
_JvmtiEnv_GetStackTrace = (jvmtiError (*)(void*, void*, jint, jint, jvmtiFrameInfo*, jint*))
|
500
|
+
libjvm->findSymbol("_ZN8JvmtiEnv13GetStackTraceEP10JavaThreadiiP15_jvmtiFrameInfoPi");
|
501
|
+
}
|
502
|
+
|
503
|
+
if (_JvmtiEnv_GetStackTrace == NULL) {
|
504
|
+
fprintf(stderr, "WARNING: Install JVM debug symbols to improve profile accuracy\n");
|
505
|
+
}
|
506
|
+
}
|
507
|
+
}
|
508
|
+
|
509
|
+
void Profiler::setThreadName(int tid, const char* name) {
|
510
|
+
MutexLocker ml(_thread_names_lock);
|
511
|
+
_thread_names[tid] = name;
|
512
|
+
}
|
513
|
+
|
514
|
+
void Profiler::updateThreadName(jvmtiEnv* jvmti, JNIEnv* jni, jthread thread) {
|
515
|
+
if (_threads && VMThread::available()) {
|
516
|
+
VMThread* vm_thread = VMThread::fromJavaThread(jni, thread);
|
517
|
+
jvmtiThreadInfo thread_info;
|
518
|
+
if (vm_thread != NULL && jvmti->GetThreadInfo(thread, &thread_info) == 0) {
|
519
|
+
setThreadName(vm_thread->osThreadId(), thread_info.name);
|
520
|
+
jvmti->Deallocate((unsigned char*)thread_info.name);
|
521
|
+
}
|
522
|
+
}
|
523
|
+
}
|
524
|
+
|
525
|
+
void Profiler::updateAllThreadNames() {
|
526
|
+
if (_threads && VMThread::available()) {
|
527
|
+
jvmtiEnv* jvmti = VM::jvmti();
|
528
|
+
jint thread_count;
|
529
|
+
jthread* thread_objects;
|
530
|
+
if (jvmti->GetAllThreads(&thread_count, &thread_objects) != 0) {
|
531
|
+
return;
|
532
|
+
}
|
533
|
+
|
534
|
+
JNIEnv* jni = VM::jni();
|
535
|
+
for (int i = 0; i < thread_count; i++) {
|
536
|
+
updateThreadName(jvmti, jni, thread_objects[i]);
|
537
|
+
}
|
538
|
+
|
539
|
+
jvmti->Deallocate((unsigned char*)thread_objects);
|
540
|
+
}
|
541
|
+
}
|
542
|
+
|
543
|
+
Engine* Profiler::selectEngine(const char* event_name) {
|
544
|
+
if (strcmp(event_name, EVENT_CPU) == 0) {
|
545
|
+
return PerfEvents::supported() ? (Engine*)&perf_events : (Engine*)&wall_clock;
|
546
|
+
} else if (strcmp(event_name, EVENT_ALLOC) == 0) {
|
547
|
+
return &alloc_tracer;
|
548
|
+
} else if (strcmp(event_name, EVENT_LOCK) == 0) {
|
549
|
+
return &lock_tracer;
|
550
|
+
} else if (strcmp(event_name, EVENT_WALL) == 0) {
|
551
|
+
return &wall_clock;
|
552
|
+
} else if (strcmp(event_name, EVENT_ITIMER) == 0) {
|
553
|
+
return &itimer;
|
554
|
+
} else {
|
555
|
+
return &perf_events;
|
556
|
+
}
|
557
|
+
}
|
558
|
+
|
559
|
+
Error Profiler::start(Arguments& args, bool reset) {
|
560
|
+
MutexLocker ml(_state_lock);
|
561
|
+
if (_state != IDLE) {
|
562
|
+
return Error("Profiler already started");
|
563
|
+
}
|
564
|
+
|
565
|
+
if (VM::_asyncGetCallTrace == NULL) {
|
566
|
+
return Error("Could not find AsyncGetCallTrace function");
|
567
|
+
}
|
568
|
+
|
569
|
+
bool first_time = _native_lib_count == 0;
|
570
|
+
if (first_time || reset) {
|
571
|
+
// Reset counters
|
572
|
+
_total_samples = 0;
|
573
|
+
_total_counter = 0;
|
574
|
+
memset(_failures, 0, sizeof(_failures));
|
575
|
+
memset(_hashes, 0, sizeof(_hashes));
|
576
|
+
memset(_traces, 0, sizeof(_traces));
|
577
|
+
memset(_methods, 0, sizeof(_methods));
|
578
|
+
|
579
|
+
// Index 0 denotes special call trace with no frames
|
580
|
+
_hashes[0] = (u64)-1;
|
581
|
+
|
582
|
+
// Reset frame buffer
|
583
|
+
_frame_buffer_index = 0;
|
584
|
+
_frame_buffer_overflow = false;
|
585
|
+
|
586
|
+
// Reset thread names
|
587
|
+
{
|
588
|
+
MutexLocker ml(_thread_names_lock);
|
589
|
+
_thread_names.clear();
|
590
|
+
}
|
591
|
+
}
|
592
|
+
|
593
|
+
// (Re-)allocate frames
|
594
|
+
if (_frame_buffer_size != args._framebuf) {
|
595
|
+
_frame_buffer_size = args._framebuf;
|
596
|
+
_frame_buffer = (ASGCT_CallFrame*)realloc(_frame_buffer, _frame_buffer_size * sizeof(ASGCT_CallFrame));
|
597
|
+
if (_frame_buffer == NULL) {
|
598
|
+
_frame_buffer_size = 0;
|
599
|
+
return Error("Not enough memory to allocate frame buffer (try smaller framebuf)");
|
600
|
+
}
|
601
|
+
}
|
602
|
+
|
603
|
+
// (Re-)allocate calltrace buffers
|
604
|
+
if (_max_stack_depth != args._jstackdepth) {
|
605
|
+
_max_stack_depth = args._jstackdepth;
|
606
|
+
size_t buffer_size = (_max_stack_depth + MAX_NATIVE_FRAMES + RESERVED_FRAMES) * sizeof(CallTraceBuffer);
|
607
|
+
|
608
|
+
for (int i = 0; i < CONCURRENCY_LEVEL; i++) {
|
609
|
+
free(_calltrace_buffer[i]);
|
610
|
+
_calltrace_buffer[i] = (CallTraceBuffer*)malloc(buffer_size);
|
611
|
+
if (_calltrace_buffer[i] == NULL) {
|
612
|
+
_max_stack_depth = 0;
|
613
|
+
return Error("Not enough memory to allocate stack trace buffers (try smaller jstackdepth)");
|
614
|
+
}
|
615
|
+
}
|
616
|
+
}
|
617
|
+
|
618
|
+
_threads = args._threads && args._output != OUTPUT_JFR;
|
619
|
+
_sync_walk = args._sync_walk;
|
620
|
+
|
621
|
+
Symbols::parseLibraries(_native_libs, _native_lib_count, MAX_NATIVE_LIBS);
|
622
|
+
NativeCodeCache* libjvm = jvmLibrary();
|
623
|
+
if (libjvm == NULL) {
|
624
|
+
return Error("libjvm not found among loaded libraries");
|
625
|
+
}
|
626
|
+
VMStructs::init(libjvm);
|
627
|
+
initJvmtiFunctions(libjvm);
|
628
|
+
|
629
|
+
if (args._output == OUTPUT_JFR) {
|
630
|
+
Error error = _jfr.start(args._file);
|
631
|
+
if (error) {
|
632
|
+
return error;
|
633
|
+
}
|
634
|
+
}
|
635
|
+
|
636
|
+
_engine = selectEngine(args._event);
|
637
|
+
Error error = _engine->start(args);
|
638
|
+
if (error) {
|
639
|
+
_jfr.stop();
|
640
|
+
return error;
|
641
|
+
}
|
642
|
+
|
643
|
+
if (_threads) {
|
644
|
+
// Thread events might be already enabled by PerfEvents::start
|
645
|
+
switchThreadEvents(JVMTI_ENABLE);
|
646
|
+
}
|
647
|
+
|
648
|
+
bindNativeLibraryLoad(NativeLibraryLoadTrap);
|
649
|
+
|
650
|
+
_state = RUNNING;
|
651
|
+
_start_time = time(NULL);
|
652
|
+
return Error::OK;
|
653
|
+
}
|
654
|
+
|
655
|
+
Error Profiler::stop() {
|
656
|
+
MutexLocker ml(_state_lock);
|
657
|
+
if (_state != RUNNING) {
|
658
|
+
return Error("Profiler is not active");
|
659
|
+
}
|
660
|
+
|
661
|
+
_engine->stop();
|
662
|
+
|
663
|
+
// Acquire all spinlocks to avoid race with remaining signals
|
664
|
+
for (int i = 0; i < CONCURRENCY_LEVEL; i++) _locks[i].lock();
|
665
|
+
_jfr.stop();
|
666
|
+
for (int i = 0; i < CONCURRENCY_LEVEL; i++) _locks[i].unlock();
|
667
|
+
|
668
|
+
bindNativeLibraryLoad(_original_NativeLibrary_load);
|
669
|
+
|
670
|
+
switchThreadEvents(JVMTI_DISABLE);
|
671
|
+
updateAllThreadNames();
|
672
|
+
|
673
|
+
_state = IDLE;
|
674
|
+
return Error::OK;
|
675
|
+
}
|
676
|
+
|
677
|
+
void Profiler::switchThreadEvents(jvmtiEventMode mode) {
|
678
|
+
if (_thread_events_state != mode) {
|
679
|
+
jvmtiEnv* jvmti = VM::jvmti();
|
680
|
+
jvmti->SetEventNotificationMode(mode, JVMTI_EVENT_THREAD_START, NULL);
|
681
|
+
jvmti->SetEventNotificationMode(mode, JVMTI_EVENT_THREAD_END, NULL);
|
682
|
+
_thread_events_state = mode;
|
683
|
+
}
|
684
|
+
}
|
685
|
+
|
686
|
+
void Profiler::dumpSummary(std::ostream& out) {
|
687
|
+
char buf[256];
|
688
|
+
snprintf(buf, sizeof(buf),
|
689
|
+
"--- Execution profile ---\n"
|
690
|
+
"Total samples : %lld\n",
|
691
|
+
_total_samples);
|
692
|
+
out << buf;
|
693
|
+
|
694
|
+
double percent = 100.0 / _total_samples;
|
695
|
+
for (int i = 1; i < ASGCT_FAILURE_TYPES; i++) {
|
696
|
+
const char* err_string = asgctError(-i);
|
697
|
+
if (err_string != NULL && _failures[i] > 0) {
|
698
|
+
snprintf(buf, sizeof(buf), "%-20s: %lld (%.2f%%)\n", err_string, _failures[i], _failures[i] * percent);
|
699
|
+
out << buf;
|
700
|
+
}
|
701
|
+
}
|
702
|
+
out << std::endl;
|
703
|
+
|
704
|
+
if (_frame_buffer_overflow) {
|
705
|
+
out << "Frame buffer overflowed! Consider increasing its size." << std::endl;
|
706
|
+
} else {
|
707
|
+
double usage = 100.0 * _frame_buffer_index / _frame_buffer_size;
|
708
|
+
out << "Frame buffer usage : " << usage << "%" << std::endl;
|
709
|
+
}
|
710
|
+
out << std::endl;
|
711
|
+
}
|
712
|
+
|
713
|
+
/*
|
714
|
+
* Dump stacks in FlameGraph input format:
|
715
|
+
*
|
716
|
+
* <frame>;<frame>;...;<topmost frame> <count>
|
717
|
+
*/
|
718
|
+
void Profiler::dumpCollapsed(std::ostream& out, Arguments& args) {
|
719
|
+
MutexLocker ml(_state_lock);
|
720
|
+
if (_state != IDLE || _engine == NULL) return;
|
721
|
+
|
722
|
+
FrameName fn(args._style, _thread_names_lock, _thread_names);
|
723
|
+
u64 unknown = 0;
|
724
|
+
|
725
|
+
for (int i = 0; i < MAX_CALLTRACES; i++) {
|
726
|
+
CallTraceSample& trace = _traces[i];
|
727
|
+
if (trace._samples == 0) continue;
|
728
|
+
|
729
|
+
if (trace._num_frames == 0) {
|
730
|
+
unknown += (args._counter == COUNTER_SAMPLES ? trace._samples : trace._counter);
|
731
|
+
continue;
|
732
|
+
}
|
733
|
+
|
734
|
+
for (int j = trace._num_frames - 1; j >= 0; j--) {
|
735
|
+
const char* frame_name = fn.name(_frame_buffer[trace._start_frame + j]);
|
736
|
+
out << frame_name << (j == 0 ? ' ' : ';');
|
737
|
+
}
|
738
|
+
out << (args._counter == COUNTER_SAMPLES ? trace._samples : trace._counter) << "\n";
|
739
|
+
}
|
740
|
+
|
741
|
+
if (unknown != 0) {
|
742
|
+
out << "[frame_buffer_overflow] " << unknown << "\n";
|
743
|
+
}
|
744
|
+
}
|
745
|
+
|
746
|
+
void Profiler::dumpFlameGraph(std::ostream& out, Arguments& args, bool tree) {
|
747
|
+
MutexLocker ml(_state_lock);
|
748
|
+
if (_state != IDLE || _engine == NULL) return;
|
749
|
+
|
750
|
+
FlameGraph flamegraph(args._title, args._counter, args._width, args._height, args._minwidth, args._reverse);
|
751
|
+
FrameName fn(args._style, _thread_names_lock, _thread_names);
|
752
|
+
|
753
|
+
for (int i = 0; i < MAX_CALLTRACES; i++) {
|
754
|
+
CallTraceSample& trace = _traces[i];
|
755
|
+
if (trace._samples == 0) continue;
|
756
|
+
|
757
|
+
u64 samples = (args._counter == COUNTER_SAMPLES ? trace._samples : trace._counter);
|
758
|
+
|
759
|
+
Trie* f = flamegraph.root();
|
760
|
+
if (trace._num_frames == 0) {
|
761
|
+
f = f->addChild("[frame_buffer_overflow]", samples);
|
762
|
+
} else if (args._reverse) {
|
763
|
+
for (int j = 0; j < trace._num_frames; j++) {
|
764
|
+
const char* frame_name = fn.name(_frame_buffer[trace._start_frame + j]);
|
765
|
+
f = f->addChild(frame_name, samples);
|
766
|
+
}
|
767
|
+
} else {
|
768
|
+
for (int j = trace._num_frames - 1; j >= 0; j--) {
|
769
|
+
const char* frame_name = fn.name(_frame_buffer[trace._start_frame + j]);
|
770
|
+
f = f->addChild(frame_name, samples);
|
771
|
+
}
|
772
|
+
}
|
773
|
+
f->addLeaf(samples);
|
774
|
+
}
|
775
|
+
|
776
|
+
flamegraph.dump(out, tree);
|
777
|
+
}
|
778
|
+
|
779
|
+
void Profiler::dumpTraces(std::ostream& out, Arguments& args) {
|
780
|
+
MutexLocker ml(_state_lock);
|
781
|
+
if (_state != IDLE || _engine == NULL) return;
|
782
|
+
|
783
|
+
FrameName fn(args._style | STYLE_DOTTED, _thread_names_lock, _thread_names);
|
784
|
+
double percent = 100.0 / _total_counter;
|
785
|
+
char buf[1024];
|
786
|
+
|
787
|
+
CallTraceSample** traces = new CallTraceSample*[MAX_CALLTRACES];
|
788
|
+
for (int i = 0; i < MAX_CALLTRACES; i++) {
|
789
|
+
traces[i] = &_traces[i];
|
790
|
+
}
|
791
|
+
qsort(traces, MAX_CALLTRACES, sizeof(CallTraceSample*), CallTraceSample::comparator);
|
792
|
+
|
793
|
+
int max_traces = args._dump_traces < MAX_CALLTRACES ? args._dump_traces : MAX_CALLTRACES;
|
794
|
+
for (int i = 0; i < max_traces; i++) {
|
795
|
+
CallTraceSample* trace = traces[i];
|
796
|
+
if (trace->_samples == 0) break;
|
797
|
+
|
798
|
+
snprintf(buf, sizeof(buf), "--- %lld %s (%.2f%%), %lld sample%s\n",
|
799
|
+
trace->_counter, _engine->units(), trace->_counter * percent,
|
800
|
+
trace->_samples, trace->_samples == 1 ? "" : "s");
|
801
|
+
out << buf;
|
802
|
+
|
803
|
+
if (trace->_num_frames == 0) {
|
804
|
+
out << " [ 0] [frame_buffer_overflow]\n";
|
805
|
+
}
|
806
|
+
|
807
|
+
for (int j = 0; j < trace->_num_frames; j++) {
|
808
|
+
const char* frame_name = fn.name(_frame_buffer[trace->_start_frame + j]);
|
809
|
+
snprintf(buf, sizeof(buf), " [%2d] %s\n", j, frame_name);
|
810
|
+
out << buf;
|
811
|
+
}
|
812
|
+
out << "\n";
|
813
|
+
}
|
814
|
+
|
815
|
+
delete[] traces;
|
816
|
+
}
|
817
|
+
|
818
|
+
void Profiler::dumpFlat(std::ostream& out, Arguments& args) {
|
819
|
+
MutexLocker ml(_state_lock);
|
820
|
+
if (_state != IDLE || _engine == NULL) return;
|
821
|
+
|
822
|
+
FrameName fn(args._style | STYLE_DOTTED, _thread_names_lock, _thread_names);
|
823
|
+
double percent = 100.0 / _total_counter;
|
824
|
+
char buf[1024];
|
825
|
+
|
826
|
+
MethodSample** methods = new MethodSample*[MAX_CALLTRACES];
|
827
|
+
for (int i = 0; i < MAX_CALLTRACES; i++) {
|
828
|
+
methods[i] = &_methods[i];
|
829
|
+
}
|
830
|
+
qsort(methods, MAX_CALLTRACES, sizeof(MethodSample*), MethodSample::comparator);
|
831
|
+
|
832
|
+
snprintf(buf, sizeof(buf), "%12s percent samples top\n"
|
833
|
+
" ---------- ------- ------- ---\n", _engine->units());
|
834
|
+
out << buf;
|
835
|
+
|
836
|
+
int max_methods = args._dump_flat < MAX_CALLTRACES ? args._dump_flat : MAX_CALLTRACES;
|
837
|
+
for (int i = 0; i < max_methods; i++) {
|
838
|
+
MethodSample* method = methods[i];
|
839
|
+
if (method->_samples == 0) break;
|
840
|
+
|
841
|
+
const char* frame_name = fn.name(method->_method);
|
842
|
+
snprintf(buf, sizeof(buf), "%12lld %6.2f%% %7lld %s\n",
|
843
|
+
method->_counter, method->_counter * percent, method->_samples, frame_name);
|
844
|
+
out << buf;
|
845
|
+
}
|
846
|
+
|
847
|
+
delete[] methods;
|
848
|
+
}
|
849
|
+
|
850
|
+
void Profiler::runInternal(Arguments& args, std::ostream& out) {
|
851
|
+
switch (args._action) {
|
852
|
+
case ACTION_START:
|
853
|
+
case ACTION_RESUME: {
|
854
|
+
Error error = start(args, args._action == ACTION_START);
|
855
|
+
if (error) {
|
856
|
+
out << error.message() << std::endl;
|
857
|
+
} else {
|
858
|
+
out << "Started [" << args._event << "] profiling" << std::endl;
|
859
|
+
}
|
860
|
+
break;
|
861
|
+
}
|
862
|
+
case ACTION_STOP: {
|
863
|
+
Error error = stop();
|
864
|
+
if (error) {
|
865
|
+
out << error.message() << std::endl;
|
866
|
+
} else {
|
867
|
+
out << "Stopped profiling after " << uptime() << " seconds. No dump options specified" << std::endl;
|
868
|
+
}
|
869
|
+
break;
|
870
|
+
}
|
871
|
+
case ACTION_STATUS: {
|
872
|
+
MutexLocker ml(_state_lock);
|
873
|
+
if (_state == RUNNING) {
|
874
|
+
out << "[" << _engine->name() << "] profiling is running for " << uptime() << " seconds" << std::endl;
|
875
|
+
} else {
|
876
|
+
out << "Profiler is not active" << std::endl;
|
877
|
+
}
|
878
|
+
break;
|
879
|
+
}
|
880
|
+
case ACTION_LIST: {
|
881
|
+
out << "Basic events:" << std::endl;
|
882
|
+
out << " " << EVENT_CPU << std::endl;
|
883
|
+
out << " " << EVENT_ALLOC << std::endl;
|
884
|
+
out << " " << EVENT_LOCK << std::endl;
|
885
|
+
out << " " << EVENT_WALL << std::endl;
|
886
|
+
out << " " << EVENT_ITIMER << std::endl;
|
887
|
+
|
888
|
+
if (PerfEvents::supported()) {
|
889
|
+
out << "Perf events:" << std::endl;
|
890
|
+
// The first perf event is "cpu" which is already printed
|
891
|
+
for (int event_id = 1; ; event_id++) {
|
892
|
+
const char* event_name = PerfEvents::getEventName(event_id);
|
893
|
+
if (event_name == NULL) break;
|
894
|
+
out << " " << event_name << std::endl;
|
895
|
+
}
|
896
|
+
}
|
897
|
+
break;
|
898
|
+
}
|
899
|
+
case ACTION_VERSION:
|
900
|
+
out << FULL_VERSION_STRING;
|
901
|
+
break;
|
902
|
+
case ACTION_DUMP:
|
903
|
+
stop();
|
904
|
+
switch (args._output) {
|
905
|
+
case OUTPUT_COLLAPSED:
|
906
|
+
dumpCollapsed(out, args);
|
907
|
+
break;
|
908
|
+
case OUTPUT_FLAMEGRAPH:
|
909
|
+
dumpFlameGraph(out, args, false);
|
910
|
+
break;
|
911
|
+
case OUTPUT_TREE:
|
912
|
+
dumpFlameGraph(out, args, true);
|
913
|
+
break;
|
914
|
+
case OUTPUT_TEXT:
|
915
|
+
dumpSummary(out);
|
916
|
+
if (args._dump_traces > 0) dumpTraces(out, args);
|
917
|
+
if (args._dump_flat > 0) dumpFlat(out, args);
|
918
|
+
break;
|
919
|
+
default:
|
920
|
+
break;
|
921
|
+
}
|
922
|
+
break;
|
923
|
+
default:
|
924
|
+
break;
|
925
|
+
}
|
926
|
+
}
|
927
|
+
|
928
|
+
void Profiler::run(Arguments& args) {
|
929
|
+
if (args._file == NULL || args._output == OUTPUT_JFR) {
|
930
|
+
runInternal(args, std::cout);
|
931
|
+
} else {
|
932
|
+
std::ofstream out(args._file, std::ios::out | std::ios::trunc);
|
933
|
+
if (out.is_open()) {
|
934
|
+
runInternal(args, out);
|
935
|
+
out.close();
|
936
|
+
} else {
|
937
|
+
std::cerr << "Could not open " << args._file << std::endl;
|
938
|
+
}
|
939
|
+
}
|
940
|
+
}
|
941
|
+
|
942
|
+
void Profiler::shutdown(Arguments& args) {
|
943
|
+
MutexLocker ml(_state_lock);
|
944
|
+
|
945
|
+
// The last chance to dump profile before VM terminates
|
946
|
+
if (_state == RUNNING && args._output != OUTPUT_NONE) {
|
947
|
+
args._action = ACTION_DUMP;
|
948
|
+
run(args);
|
949
|
+
}
|
950
|
+
|
951
|
+
_state = TERMINATED;
|
952
|
+
}
|