jruby-async-profiler 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +8 -0
- data/.gitmodules +3 -0
- data/Gemfile +4 -0
- data/README.md +35 -0
- data/Rakefile +2 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/ext/Rakefile +6 -0
- data/ext/async-profiler/.gitattributes +1 -0
- data/ext/async-profiler/.gitignore +6 -0
- data/ext/async-profiler/.travis.yml +11 -0
- data/ext/async-profiler/CHANGELOG.md +107 -0
- data/ext/async-profiler/JavaHome.class +0 -0
- data/ext/async-profiler/LICENSE +201 -0
- data/ext/async-profiler/Makefile +66 -0
- data/ext/async-profiler/README.md +487 -0
- data/ext/async-profiler/demo/SwingSet2.svg +2247 -0
- data/ext/async-profiler/docs/cddl1.txt +358 -0
- data/ext/async-profiler/profiler.sh +240 -0
- data/ext/async-profiler/src/allocTracer.cpp +155 -0
- data/ext/async-profiler/src/allocTracer.h +74 -0
- data/ext/async-profiler/src/arch.h +69 -0
- data/ext/async-profiler/src/arguments.cpp +265 -0
- data/ext/async-profiler/src/arguments.h +152 -0
- data/ext/async-profiler/src/codeCache.cpp +128 -0
- data/ext/async-profiler/src/codeCache.h +99 -0
- data/ext/async-profiler/src/engine.cpp +50 -0
- data/ext/async-profiler/src/engine.h +38 -0
- data/ext/async-profiler/src/flameGraph.cpp +770 -0
- data/ext/async-profiler/src/flameGraph.h +118 -0
- data/ext/async-profiler/src/flightRecorder.cpp +727 -0
- data/ext/async-profiler/src/flightRecorder.h +39 -0
- data/ext/async-profiler/src/frameName.cpp +189 -0
- data/ext/async-profiler/src/frameName.h +56 -0
- data/ext/async-profiler/src/itimer.cpp +49 -0
- data/ext/async-profiler/src/itimer.h +43 -0
- data/ext/async-profiler/src/jattach/jattach.c +437 -0
- data/ext/async-profiler/src/java/one/profiler/AsyncProfiler.java +160 -0
- data/ext/async-profiler/src/java/one/profiler/AsyncProfilerMXBean.java +43 -0
- data/ext/async-profiler/src/java/one/profiler/Counter.java +25 -0
- data/ext/async-profiler/src/java/one/profiler/Events.java +28 -0
- data/ext/async-profiler/src/javaApi.cpp +124 -0
- data/ext/async-profiler/src/lockTracer.cpp +161 -0
- data/ext/async-profiler/src/lockTracer.h +55 -0
- data/ext/async-profiler/src/mutex.cpp +33 -0
- data/ext/async-profiler/src/mutex.h +49 -0
- data/ext/async-profiler/src/os.h +45 -0
- data/ext/async-profiler/src/os_linux.cpp +129 -0
- data/ext/async-profiler/src/os_macos.cpp +115 -0
- data/ext/async-profiler/src/perfEvents.h +60 -0
- data/ext/async-profiler/src/perfEvents_linux.cpp +550 -0
- data/ext/async-profiler/src/perfEvents_macos.cpp +64 -0
- data/ext/async-profiler/src/profiler.cpp +952 -0
- data/ext/async-profiler/src/profiler.h +238 -0
- data/ext/async-profiler/src/spinLock.h +66 -0
- data/ext/async-profiler/src/stackFrame.h +57 -0
- data/ext/async-profiler/src/stackFrame_aarch64.cpp +75 -0
- data/ext/async-profiler/src/stackFrame_arm.cpp +58 -0
- data/ext/async-profiler/src/stackFrame_i386.cpp +82 -0
- data/ext/async-profiler/src/stackFrame_x64.cpp +113 -0
- data/ext/async-profiler/src/symbols.h +37 -0
- data/ext/async-profiler/src/symbols_linux.cpp +354 -0
- data/ext/async-profiler/src/symbols_macos.cpp +156 -0
- data/ext/async-profiler/src/vmEntry.cpp +173 -0
- data/ext/async-profiler/src/vmEntry.h +105 -0
- data/ext/async-profiler/src/vmStructs.cpp +104 -0
- data/ext/async-profiler/src/vmStructs.h +112 -0
- data/ext/async-profiler/src/wallClock.cpp +96 -0
- data/ext/async-profiler/src/wallClock.h +56 -0
- data/ext/async-profiler/test/AllocatingTarget.java +26 -0
- data/ext/async-profiler/test/LoadLibraryTest.java +21 -0
- data/ext/async-profiler/test/Target.java +31 -0
- data/ext/async-profiler/test/ThreadsTarget.java +35 -0
- data/ext/async-profiler/test/alloc-smoke-test.sh +36 -0
- data/ext/async-profiler/test/load-library-test.sh +35 -0
- data/ext/async-profiler/test/smoke-test.sh +37 -0
- data/ext/async-profiler/test/thread-smoke-test.sh +32 -0
- data/jruby-async-profiler.gemspec +32 -0
- data/lib/jruby/async/profiler.rb +10 -0
- data/lib/jruby/async/profiler/version.rb +7 -0
- metadata +155 -0
@@ -0,0 +1,550 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright 2017 Andrei Pangin
|
3
|
+
*
|
4
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
* you may not use this file except in compliance with the License.
|
6
|
+
* You may obtain a copy of the License at
|
7
|
+
*
|
8
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
*
|
10
|
+
* Unless required by applicable law or agreed to in writing, software
|
11
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
* See the License for the specific language governing permissions and
|
14
|
+
* limitations under the License.
|
15
|
+
*/
|
16
|
+
|
17
|
+
#ifdef __linux__
|
18
|
+
|
19
|
+
#include <jvmti.h>
|
20
|
+
#include <string.h>
|
21
|
+
#include <stdint.h>
|
22
|
+
#include <stdlib.h>
|
23
|
+
#include <stdio.h>
|
24
|
+
#include <dlfcn.h>
|
25
|
+
#include <fcntl.h>
|
26
|
+
#include <unistd.h>
|
27
|
+
#include <errno.h>
|
28
|
+
#include <sys/ioctl.h>
|
29
|
+
#include <sys/mman.h>
|
30
|
+
#include <sys/syscall.h>
|
31
|
+
#include <linux/perf_event.h>
|
32
|
+
#include "arch.h"
|
33
|
+
#include "os.h"
|
34
|
+
#include "perfEvents.h"
|
35
|
+
#include "profiler.h"
|
36
|
+
#include "spinLock.h"
|
37
|
+
#include "stackFrame.h"
|
38
|
+
|
39
|
+
|
40
|
+
// Ancient fcntl.h does not define F_SETOWN_EX constants and structures
|
41
|
+
#ifndef F_SETOWN_EX
|
42
|
+
#define F_SETOWN_EX 15
|
43
|
+
#define F_OWNER_TID 0
|
44
|
+
|
45
|
+
struct f_owner_ex {
|
46
|
+
int type;
|
47
|
+
pid_t pid;
|
48
|
+
};
|
49
|
+
#endif // F_SETOWN_EX
|
50
|
+
|
51
|
+
|
52
|
+
enum {
|
53
|
+
HW_BREAKPOINT_R = 1,
|
54
|
+
HW_BREAKPOINT_W = 2,
|
55
|
+
HW_BREAKPOINT_RW = 3,
|
56
|
+
HW_BREAKPOINT_X = 4
|
57
|
+
};
|
58
|
+
|
59
|
+
|
60
|
+
static const unsigned long PERF_PAGE_SIZE = sysconf(_SC_PAGESIZE);
|
61
|
+
|
62
|
+
static int getMaxPID() {
|
63
|
+
char buf[16] = "65536";
|
64
|
+
int fd = open("/proc/sys/kernel/pid_max", O_RDONLY);
|
65
|
+
if (fd != -1) {
|
66
|
+
ssize_t r = read(fd, buf, sizeof(buf) - 1);
|
67
|
+
(void) r;
|
68
|
+
close(fd);
|
69
|
+
}
|
70
|
+
return atoi(buf);
|
71
|
+
}
|
72
|
+
|
73
|
+
// Get perf_event_attr.config numeric value of the given tracepoint name
|
74
|
+
// by reading /sys/kernel/debug/tracing/events/<name>/id file
|
75
|
+
static int findTracepointId(const char* name) {
|
76
|
+
char buf[256];
|
77
|
+
if ((size_t)snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%s/id", name) >= sizeof(buf)) {
|
78
|
+
return 0;
|
79
|
+
}
|
80
|
+
|
81
|
+
*strchr(buf, ':') = '/'; // make path from event name
|
82
|
+
|
83
|
+
int fd = open(buf, O_RDONLY);
|
84
|
+
if (fd == -1) {
|
85
|
+
return 0;
|
86
|
+
}
|
87
|
+
|
88
|
+
char id[16] = "0";
|
89
|
+
ssize_t r = read(fd, id, sizeof(id) - 1);
|
90
|
+
(void) r;
|
91
|
+
close(fd);
|
92
|
+
return atoi(id);
|
93
|
+
}
|
94
|
+
|
95
|
+
|
96
|
+
struct FunctionWithCounter {
|
97
|
+
const char* name;
|
98
|
+
int counter_arg;
|
99
|
+
};
|
100
|
+
|
101
|
+
struct PerfEventType {
|
102
|
+
const char* name;
|
103
|
+
long default_interval;
|
104
|
+
__u32 type;
|
105
|
+
__u64 config;
|
106
|
+
__u32 bp_type;
|
107
|
+
__u32 bp_len;
|
108
|
+
int counter_arg;
|
109
|
+
|
110
|
+
static PerfEventType AVAILABLE_EVENTS[];
|
111
|
+
static FunctionWithCounter KNOWN_FUNCTIONS[];
|
112
|
+
|
113
|
+
// Find which argument of a known function serves as a profiling counter,
|
114
|
+
// e.g. the first argument of malloc() is allocation size
|
115
|
+
static int findCounterArg(const char* name) {
|
116
|
+
for (FunctionWithCounter* func = KNOWN_FUNCTIONS; func->name != NULL; func++) {
|
117
|
+
if (strcmp(name, func->name) == 0) {
|
118
|
+
return func->counter_arg;
|
119
|
+
}
|
120
|
+
}
|
121
|
+
return 0;
|
122
|
+
}
|
123
|
+
|
124
|
+
static PerfEventType* findByType(__u32 type) {
|
125
|
+
for (PerfEventType* event = AVAILABLE_EVENTS; ; event++) {
|
126
|
+
if (event->type == type) {
|
127
|
+
return event;
|
128
|
+
}
|
129
|
+
}
|
130
|
+
}
|
131
|
+
|
132
|
+
// Breakpoint format: func[+offset][/len][:rwx]
|
133
|
+
static PerfEventType* getBreakpoint(const char* name, __u32 bp_type, __u32 bp_len) {
|
134
|
+
char buf[256];
|
135
|
+
strncpy(buf, name, sizeof(buf) - 1);
|
136
|
+
buf[sizeof(buf) - 1] = 0;
|
137
|
+
|
138
|
+
// Parse access type [:rwx]
|
139
|
+
char* c = strrchr(buf, ':');
|
140
|
+
if (c != NULL) {
|
141
|
+
*c++ = 0;
|
142
|
+
if (strcmp(c, "r") == 0) {
|
143
|
+
bp_type = HW_BREAKPOINT_R;
|
144
|
+
} else if (strcmp(c, "w") == 0) {
|
145
|
+
bp_type = HW_BREAKPOINT_W;
|
146
|
+
} else if (strcmp(c, "x") == 0) {
|
147
|
+
bp_type = HW_BREAKPOINT_X;
|
148
|
+
bp_len = sizeof(long);
|
149
|
+
} else {
|
150
|
+
bp_type = HW_BREAKPOINT_RW;
|
151
|
+
}
|
152
|
+
}
|
153
|
+
|
154
|
+
// Parse length [/8]
|
155
|
+
c = strrchr(buf, '/');
|
156
|
+
if (c != NULL) {
|
157
|
+
*c++ = 0;
|
158
|
+
bp_len = (__u32)strtol(c, NULL, 0);
|
159
|
+
}
|
160
|
+
|
161
|
+
// Parse offset [+0x1234]
|
162
|
+
long long offset = 0;
|
163
|
+
c = strrchr(buf, '+');
|
164
|
+
if (c != NULL) {
|
165
|
+
*c++ = 0;
|
166
|
+
offset = strtoll(c, NULL, 0);
|
167
|
+
}
|
168
|
+
|
169
|
+
// Parse symbol or absolute address
|
170
|
+
__u64 addr;
|
171
|
+
if (strncmp(buf, "0x", 2) == 0) {
|
172
|
+
addr = (__u64)strtoll(buf, NULL, 0);
|
173
|
+
} else {
|
174
|
+
addr = (__u64)(uintptr_t)dlsym(RTLD_DEFAULT, buf);
|
175
|
+
if (addr == 0) {
|
176
|
+
addr = (__u64)(uintptr_t)Profiler::_instance.findSymbol(buf);
|
177
|
+
}
|
178
|
+
if (addr == 0) {
|
179
|
+
return NULL;
|
180
|
+
}
|
181
|
+
}
|
182
|
+
|
183
|
+
PerfEventType* breakpoint = findByType(PERF_TYPE_BREAKPOINT);
|
184
|
+
breakpoint->config = addr + offset;
|
185
|
+
breakpoint->bp_type = bp_type;
|
186
|
+
breakpoint->bp_len = bp_len;
|
187
|
+
breakpoint->counter_arg = bp_type == HW_BREAKPOINT_X ? findCounterArg(buf) : 0;
|
188
|
+
return breakpoint;
|
189
|
+
}
|
190
|
+
|
191
|
+
static PerfEventType* getTracepoint(int tracepoint_id) {
|
192
|
+
PerfEventType* tracepoint = findByType(PERF_TYPE_TRACEPOINT);
|
193
|
+
tracepoint->config = tracepoint_id;
|
194
|
+
return tracepoint;
|
195
|
+
}
|
196
|
+
|
197
|
+
static PerfEventType* forName(const char* name) {
|
198
|
+
// Hardware breakpoint
|
199
|
+
if (strncmp(name, "mem:", 4) == 0) {
|
200
|
+
return getBreakpoint(name + 4, HW_BREAKPOINT_RW, 1);
|
201
|
+
}
|
202
|
+
|
203
|
+
// Raw tracepoint ID
|
204
|
+
if (strncmp(name, "trace:", 6) == 0) {
|
205
|
+
int tracepoint_id = atoi(name + 6);
|
206
|
+
return tracepoint_id > 0 ? getTracepoint(tracepoint_id) : NULL;
|
207
|
+
}
|
208
|
+
|
209
|
+
// Look through the table of predefined perf events
|
210
|
+
for (PerfEventType* event = AVAILABLE_EVENTS; event->name != NULL; event++) {
|
211
|
+
if (strcmp(name, event->name) == 0) {
|
212
|
+
return event;
|
213
|
+
}
|
214
|
+
}
|
215
|
+
|
216
|
+
// Kernel tracepoints defined in debugfs
|
217
|
+
if (strchr(name, ':') != NULL) {
|
218
|
+
int tracepoint_id = findTracepointId(name);
|
219
|
+
if (tracepoint_id > 0) {
|
220
|
+
return getTracepoint(tracepoint_id);
|
221
|
+
}
|
222
|
+
}
|
223
|
+
|
224
|
+
// Finally, treat event as a function name and return an execution breakpoint
|
225
|
+
return getBreakpoint(name, HW_BREAKPOINT_X, sizeof(long));
|
226
|
+
}
|
227
|
+
};
|
228
|
+
|
229
|
+
// See perf_event_open(2)
|
230
|
+
#define LOAD_MISS(perf_hw_cache_id) \
|
231
|
+
((perf_hw_cache_id) | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
|
232
|
+
|
233
|
+
PerfEventType PerfEventType::AVAILABLE_EVENTS[] = {
|
234
|
+
{"cpu", DEFAULT_INTERVAL, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK},
|
235
|
+
{"page-faults", 1, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS},
|
236
|
+
{"context-switches", 1, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES},
|
237
|
+
|
238
|
+
{"cycles", 1000000, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES},
|
239
|
+
{"instructions", 1000000, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS},
|
240
|
+
{"cache-references", 1000000, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES},
|
241
|
+
{"cache-misses", 1000, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES},
|
242
|
+
{"branches", 1000000, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS},
|
243
|
+
{"branch-misses", 1000, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES},
|
244
|
+
{"bus-cycles", 1000000, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES},
|
245
|
+
|
246
|
+
{"L1-dcache-load-misses", 1000000, PERF_TYPE_HW_CACHE, LOAD_MISS(PERF_COUNT_HW_CACHE_L1D)},
|
247
|
+
{"LLC-load-misses", 1000, PERF_TYPE_HW_CACHE, LOAD_MISS(PERF_COUNT_HW_CACHE_LL)},
|
248
|
+
{"dTLB-load-misses", 1000, PERF_TYPE_HW_CACHE, LOAD_MISS(PERF_COUNT_HW_CACHE_DTLB)},
|
249
|
+
|
250
|
+
{"mem:breakpoint", 1, PERF_TYPE_BREAKPOINT, 0},
|
251
|
+
{"trace:tracepoint", 1, PERF_TYPE_TRACEPOINT, 0},
|
252
|
+
|
253
|
+
{NULL}
|
254
|
+
};
|
255
|
+
|
256
|
+
FunctionWithCounter PerfEventType::KNOWN_FUNCTIONS[] = {
|
257
|
+
{"malloc", 1},
|
258
|
+
{"mmap", 2},
|
259
|
+
{"read", 3},
|
260
|
+
{"write", 3},
|
261
|
+
{"send", 3},
|
262
|
+
{"recv", 3},
|
263
|
+
{"sendto", 3},
|
264
|
+
{"recvfrom", 3},
|
265
|
+
{NULL}
|
266
|
+
};
|
267
|
+
|
268
|
+
|
269
|
+
class RingBuffer {
|
270
|
+
private:
|
271
|
+
const char* _start;
|
272
|
+
unsigned long _offset;
|
273
|
+
|
274
|
+
public:
|
275
|
+
RingBuffer(struct perf_event_mmap_page* page) {
|
276
|
+
_start = (const char*)page + PERF_PAGE_SIZE;
|
277
|
+
}
|
278
|
+
|
279
|
+
struct perf_event_header* seek(u64 offset) {
|
280
|
+
_offset = (unsigned long)offset & (PERF_PAGE_SIZE - 1);
|
281
|
+
return (struct perf_event_header*)(_start + _offset);
|
282
|
+
}
|
283
|
+
|
284
|
+
u64 next() {
|
285
|
+
_offset = (_offset + sizeof(u64)) & (PERF_PAGE_SIZE - 1);
|
286
|
+
return *(u64*)(_start + _offset);
|
287
|
+
}
|
288
|
+
};
|
289
|
+
|
290
|
+
|
291
|
+
class PerfEvent : public SpinLock {
|
292
|
+
private:
|
293
|
+
int _fd;
|
294
|
+
struct perf_event_mmap_page* _page;
|
295
|
+
|
296
|
+
friend class PerfEvents;
|
297
|
+
};
|
298
|
+
|
299
|
+
|
300
|
+
int PerfEvents::_max_events = 0;
|
301
|
+
PerfEvent* PerfEvents::_events = NULL;
|
302
|
+
PerfEventType* PerfEvents::_event_type = NULL;
|
303
|
+
long PerfEvents::_interval;
|
304
|
+
Ring PerfEvents::_ring;
|
305
|
+
bool PerfEvents::_print_extended_warning;
|
306
|
+
|
307
|
+
bool PerfEvents::createForThread(int tid) {
|
308
|
+
if (tid >= _max_events) {
|
309
|
+
fprintf(stderr, "WARNING: tid[%d] > pid_max[%d]. Restart profiler after changing pid_max\n", tid, _max_events);
|
310
|
+
return false;
|
311
|
+
}
|
312
|
+
|
313
|
+
PerfEventType* event_type = _event_type;
|
314
|
+
if (event_type == NULL) {
|
315
|
+
return false;
|
316
|
+
}
|
317
|
+
|
318
|
+
struct perf_event_attr attr = {0};
|
319
|
+
attr.size = sizeof(attr);
|
320
|
+
attr.type = event_type->type;
|
321
|
+
|
322
|
+
if (attr.type == PERF_TYPE_BREAKPOINT) {
|
323
|
+
attr.bp_addr = event_type->config;
|
324
|
+
attr.bp_type = event_type->bp_type;
|
325
|
+
attr.bp_len = event_type->bp_len;
|
326
|
+
} else {
|
327
|
+
attr.config = event_type->config;
|
328
|
+
}
|
329
|
+
|
330
|
+
// Hardware events may not always support zero skid
|
331
|
+
if (attr.type == PERF_TYPE_SOFTWARE) {
|
332
|
+
attr.precise_ip = 2;
|
333
|
+
}
|
334
|
+
|
335
|
+
attr.sample_period = _interval;
|
336
|
+
attr.sample_type = PERF_SAMPLE_CALLCHAIN;
|
337
|
+
attr.disabled = 1;
|
338
|
+
attr.wakeup_events = 1;
|
339
|
+
|
340
|
+
if (_ring == RING_USER) {
|
341
|
+
attr.exclude_kernel = 1;
|
342
|
+
} else if (_ring == RING_KERNEL) {
|
343
|
+
attr.exclude_user = 1;
|
344
|
+
}
|
345
|
+
|
346
|
+
int fd = syscall(__NR_perf_event_open, &attr, tid, -1, -1, 0);
|
347
|
+
if (fd == -1) {
|
348
|
+
int err = errno;
|
349
|
+
perror("perf_event_open failed");
|
350
|
+
if (err == EACCES && _print_extended_warning) {
|
351
|
+
fprintf(stderr, "Due to permission restrictions, you cannot collect kernel events.\n");
|
352
|
+
fprintf(stderr, "Try with --all-user option, or 'echo 1 > /proc/sys/kernel/perf_event_paranoid'\n");
|
353
|
+
_print_extended_warning = false;
|
354
|
+
}
|
355
|
+
return false;
|
356
|
+
}
|
357
|
+
|
358
|
+
if (!__sync_bool_compare_and_swap(&_events[tid]._fd, 0, fd)) {
|
359
|
+
// Lost race. The event is created either from start() or from onThreadStart()
|
360
|
+
close(fd);
|
361
|
+
return false;
|
362
|
+
}
|
363
|
+
|
364
|
+
void* page = mmap(NULL, 2 * PERF_PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
365
|
+
if (page == MAP_FAILED) {
|
366
|
+
perror("perf_event mmap failed");
|
367
|
+
page = NULL;
|
368
|
+
}
|
369
|
+
|
370
|
+
_events[tid].reset();
|
371
|
+
_events[tid]._page = (struct perf_event_mmap_page*)page;
|
372
|
+
|
373
|
+
struct f_owner_ex ex;
|
374
|
+
ex.type = F_OWNER_TID;
|
375
|
+
ex.pid = tid;
|
376
|
+
|
377
|
+
fcntl(fd, F_SETFL, O_ASYNC);
|
378
|
+
fcntl(fd, F_SETSIG, SIGPROF);
|
379
|
+
fcntl(fd, F_SETOWN_EX, &ex);
|
380
|
+
|
381
|
+
ioctl(fd, PERF_EVENT_IOC_RESET, 0);
|
382
|
+
ioctl(fd, PERF_EVENT_IOC_REFRESH, 1);
|
383
|
+
|
384
|
+
return true;
|
385
|
+
}
|
386
|
+
|
387
|
+
void PerfEvents::destroyForThread(int tid) {
|
388
|
+
if (tid >= _max_events) {
|
389
|
+
return;
|
390
|
+
}
|
391
|
+
|
392
|
+
PerfEvent* event = &_events[tid];
|
393
|
+
int fd = event->_fd;
|
394
|
+
if (fd != 0 && __sync_bool_compare_and_swap(&event->_fd, fd, 0)) {
|
395
|
+
ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
|
396
|
+
close(fd);
|
397
|
+
}
|
398
|
+
if (event->_page != NULL) {
|
399
|
+
event->lock();
|
400
|
+
munmap(event->_page, 2 * PERF_PAGE_SIZE);
|
401
|
+
event->_page = NULL;
|
402
|
+
event->unlock();
|
403
|
+
}
|
404
|
+
}
|
405
|
+
|
406
|
+
void PerfEvents::signalHandler(int signo, siginfo_t* siginfo, void* ucontext) {
|
407
|
+
if (siginfo->si_code <= 0) {
|
408
|
+
// Looks like an external signal; don't treat as a profiling event
|
409
|
+
return;
|
410
|
+
}
|
411
|
+
|
412
|
+
u64 counter;
|
413
|
+
switch (_event_type->counter_arg) {
|
414
|
+
case 1: counter = StackFrame(ucontext).arg0(); break;
|
415
|
+
case 2: counter = StackFrame(ucontext).arg1(); break;
|
416
|
+
case 3: counter = StackFrame(ucontext).arg2(); break;
|
417
|
+
case 4: counter = StackFrame(ucontext).arg3(); break;
|
418
|
+
default:
|
419
|
+
if (read(siginfo->si_fd, &counter, sizeof(counter)) != sizeof(counter)) {
|
420
|
+
counter = 1;
|
421
|
+
}
|
422
|
+
}
|
423
|
+
|
424
|
+
Profiler::_instance.recordSample(ucontext, counter, 0, NULL);
|
425
|
+
ioctl(siginfo->si_fd, PERF_EVENT_IOC_RESET, 0);
|
426
|
+
ioctl(siginfo->si_fd, PERF_EVENT_IOC_REFRESH, 1);
|
427
|
+
}
|
428
|
+
|
429
|
+
const char* PerfEvents::units() {
|
430
|
+
if (_event_type == NULL || _event_type->name == EVENT_CPU) {
|
431
|
+
return "ns";
|
432
|
+
} else if (_event_type->type == PERF_TYPE_BREAKPOINT || _event_type->type == PERF_TYPE_TRACEPOINT) {
|
433
|
+
return "events";
|
434
|
+
}
|
435
|
+
|
436
|
+
const char* dash = strrchr(_event_type->name, '-');
|
437
|
+
return dash != NULL ? dash + 1 : _event_type->name;
|
438
|
+
}
|
439
|
+
|
440
|
+
Error PerfEvents::start(Arguments& args) {
|
441
|
+
_event_type = PerfEventType::forName(args._event);
|
442
|
+
if (_event_type == NULL) {
|
443
|
+
return Error("Unsupported event type");
|
444
|
+
}
|
445
|
+
|
446
|
+
if (args._interval < 0) {
|
447
|
+
return Error("interval must be positive");
|
448
|
+
}
|
449
|
+
_interval = args._interval ? args._interval : _event_type->default_interval;
|
450
|
+
|
451
|
+
_ring = args._ring;
|
452
|
+
_print_extended_warning = _ring != RING_USER;
|
453
|
+
|
454
|
+
int max_events = getMaxPID();
|
455
|
+
if (max_events != _max_events) {
|
456
|
+
free(_events);
|
457
|
+
_events = (PerfEvent*)calloc(max_events, sizeof(PerfEvent));
|
458
|
+
_max_events = max_events;
|
459
|
+
}
|
460
|
+
|
461
|
+
OS::installSignalHandler(SIGPROF, signalHandler);
|
462
|
+
|
463
|
+
// Enable thread events before traversing currently running threads
|
464
|
+
Profiler::_instance.switchThreadEvents(JVMTI_ENABLE);
|
465
|
+
|
466
|
+
// Create perf_events for all existing threads
|
467
|
+
bool created = false;
|
468
|
+
ThreadList* thread_list = OS::listThreads();
|
469
|
+
for (int tid; (tid = thread_list->next()) != -1; ) {
|
470
|
+
created |= createForThread(tid);
|
471
|
+
}
|
472
|
+
delete thread_list;
|
473
|
+
|
474
|
+
if (!created) {
|
475
|
+
Profiler::_instance.switchThreadEvents(JVMTI_DISABLE);
|
476
|
+
return Error("Perf events unavailable. See stderr of the target process.");
|
477
|
+
}
|
478
|
+
return Error::OK;
|
479
|
+
}
|
480
|
+
|
481
|
+
void PerfEvents::stop() {
|
482
|
+
for (int i = 0; i < _max_events; i++) {
|
483
|
+
destroyForThread(i);
|
484
|
+
}
|
485
|
+
}
|
486
|
+
|
487
|
+
void PerfEvents::onThreadStart() {
|
488
|
+
createForThread(OS::threadId());
|
489
|
+
}
|
490
|
+
|
491
|
+
void PerfEvents::onThreadEnd() {
|
492
|
+
destroyForThread(OS::threadId());
|
493
|
+
}
|
494
|
+
|
495
|
+
int PerfEvents::getNativeTrace(void* ucontext, int tid, const void** callchain, int max_depth,
|
496
|
+
const void* jit_min_address, const void* jit_max_address) {
|
497
|
+
PerfEvent* event = &_events[tid];
|
498
|
+
if (!event->tryLock()) {
|
499
|
+
return 0; // the event is being destroyed
|
500
|
+
}
|
501
|
+
|
502
|
+
int depth = 0;
|
503
|
+
|
504
|
+
struct perf_event_mmap_page* page = event->_page;
|
505
|
+
if (page != NULL) {
|
506
|
+
u64 tail = page->data_tail;
|
507
|
+
u64 head = page->data_head;
|
508
|
+
rmb();
|
509
|
+
|
510
|
+
RingBuffer ring(page);
|
511
|
+
|
512
|
+
while (tail < head) {
|
513
|
+
struct perf_event_header* hdr = ring.seek(tail);
|
514
|
+
if (hdr->type == PERF_RECORD_SAMPLE) {
|
515
|
+
u64 nr = ring.next();
|
516
|
+
while (nr-- > 0 && depth < max_depth) {
|
517
|
+
u64 ip = ring.next();
|
518
|
+
if (ip < PERF_CONTEXT_MAX) {
|
519
|
+
const void* iptr = (const void*)ip;
|
520
|
+
callchain[depth++] = iptr;
|
521
|
+
if (iptr >= jit_min_address && iptr < jit_max_address) {
|
522
|
+
// Stop at the first Java frame
|
523
|
+
break;
|
524
|
+
}
|
525
|
+
}
|
526
|
+
}
|
527
|
+
break;
|
528
|
+
}
|
529
|
+
tail += hdr->size;
|
530
|
+
}
|
531
|
+
|
532
|
+
page->data_tail = head;
|
533
|
+
}
|
534
|
+
|
535
|
+
event->unlock();
|
536
|
+
return depth;
|
537
|
+
}
|
538
|
+
|
539
|
+
bool PerfEvents::supported() {
|
540
|
+
return true;
|
541
|
+
}
|
542
|
+
|
543
|
+
const char* PerfEvents::getEventName(int event_id) {
|
544
|
+
if (event_id >= 0 && (size_t)event_id < sizeof(PerfEventType::AVAILABLE_EVENTS) / sizeof(PerfEventType)) {
|
545
|
+
return PerfEventType::AVAILABLE_EVENTS[event_id].name;
|
546
|
+
}
|
547
|
+
return NULL;
|
548
|
+
}
|
549
|
+
|
550
|
+
#endif // __linux__
|