jruby-async-profiler 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (82) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +8 -0
  3. data/.gitmodules +3 -0
  4. data/Gemfile +4 -0
  5. data/README.md +35 -0
  6. data/Rakefile +2 -0
  7. data/bin/console +14 -0
  8. data/bin/setup +8 -0
  9. data/ext/Rakefile +6 -0
  10. data/ext/async-profiler/.gitattributes +1 -0
  11. data/ext/async-profiler/.gitignore +6 -0
  12. data/ext/async-profiler/.travis.yml +11 -0
  13. data/ext/async-profiler/CHANGELOG.md +107 -0
  14. data/ext/async-profiler/JavaHome.class +0 -0
  15. data/ext/async-profiler/LICENSE +201 -0
  16. data/ext/async-profiler/Makefile +66 -0
  17. data/ext/async-profiler/README.md +487 -0
  18. data/ext/async-profiler/demo/SwingSet2.svg +2247 -0
  19. data/ext/async-profiler/docs/cddl1.txt +358 -0
  20. data/ext/async-profiler/profiler.sh +240 -0
  21. data/ext/async-profiler/src/allocTracer.cpp +155 -0
  22. data/ext/async-profiler/src/allocTracer.h +74 -0
  23. data/ext/async-profiler/src/arch.h +69 -0
  24. data/ext/async-profiler/src/arguments.cpp +265 -0
  25. data/ext/async-profiler/src/arguments.h +152 -0
  26. data/ext/async-profiler/src/codeCache.cpp +128 -0
  27. data/ext/async-profiler/src/codeCache.h +99 -0
  28. data/ext/async-profiler/src/engine.cpp +50 -0
  29. data/ext/async-profiler/src/engine.h +38 -0
  30. data/ext/async-profiler/src/flameGraph.cpp +770 -0
  31. data/ext/async-profiler/src/flameGraph.h +118 -0
  32. data/ext/async-profiler/src/flightRecorder.cpp +727 -0
  33. data/ext/async-profiler/src/flightRecorder.h +39 -0
  34. data/ext/async-profiler/src/frameName.cpp +189 -0
  35. data/ext/async-profiler/src/frameName.h +56 -0
  36. data/ext/async-profiler/src/itimer.cpp +49 -0
  37. data/ext/async-profiler/src/itimer.h +43 -0
  38. data/ext/async-profiler/src/jattach/jattach.c +437 -0
  39. data/ext/async-profiler/src/java/one/profiler/AsyncProfiler.java +160 -0
  40. data/ext/async-profiler/src/java/one/profiler/AsyncProfilerMXBean.java +43 -0
  41. data/ext/async-profiler/src/java/one/profiler/Counter.java +25 -0
  42. data/ext/async-profiler/src/java/one/profiler/Events.java +28 -0
  43. data/ext/async-profiler/src/javaApi.cpp +124 -0
  44. data/ext/async-profiler/src/lockTracer.cpp +161 -0
  45. data/ext/async-profiler/src/lockTracer.h +55 -0
  46. data/ext/async-profiler/src/mutex.cpp +33 -0
  47. data/ext/async-profiler/src/mutex.h +49 -0
  48. data/ext/async-profiler/src/os.h +45 -0
  49. data/ext/async-profiler/src/os_linux.cpp +129 -0
  50. data/ext/async-profiler/src/os_macos.cpp +115 -0
  51. data/ext/async-profiler/src/perfEvents.h +60 -0
  52. data/ext/async-profiler/src/perfEvents_linux.cpp +550 -0
  53. data/ext/async-profiler/src/perfEvents_macos.cpp +64 -0
  54. data/ext/async-profiler/src/profiler.cpp +952 -0
  55. data/ext/async-profiler/src/profiler.h +238 -0
  56. data/ext/async-profiler/src/spinLock.h +66 -0
  57. data/ext/async-profiler/src/stackFrame.h +57 -0
  58. data/ext/async-profiler/src/stackFrame_aarch64.cpp +75 -0
  59. data/ext/async-profiler/src/stackFrame_arm.cpp +58 -0
  60. data/ext/async-profiler/src/stackFrame_i386.cpp +82 -0
  61. data/ext/async-profiler/src/stackFrame_x64.cpp +113 -0
  62. data/ext/async-profiler/src/symbols.h +37 -0
  63. data/ext/async-profiler/src/symbols_linux.cpp +354 -0
  64. data/ext/async-profiler/src/symbols_macos.cpp +156 -0
  65. data/ext/async-profiler/src/vmEntry.cpp +173 -0
  66. data/ext/async-profiler/src/vmEntry.h +105 -0
  67. data/ext/async-profiler/src/vmStructs.cpp +104 -0
  68. data/ext/async-profiler/src/vmStructs.h +112 -0
  69. data/ext/async-profiler/src/wallClock.cpp +96 -0
  70. data/ext/async-profiler/src/wallClock.h +56 -0
  71. data/ext/async-profiler/test/AllocatingTarget.java +26 -0
  72. data/ext/async-profiler/test/LoadLibraryTest.java +21 -0
  73. data/ext/async-profiler/test/Target.java +31 -0
  74. data/ext/async-profiler/test/ThreadsTarget.java +35 -0
  75. data/ext/async-profiler/test/alloc-smoke-test.sh +36 -0
  76. data/ext/async-profiler/test/load-library-test.sh +35 -0
  77. data/ext/async-profiler/test/smoke-test.sh +37 -0
  78. data/ext/async-profiler/test/thread-smoke-test.sh +32 -0
  79. data/jruby-async-profiler.gemspec +32 -0
  80. data/lib/jruby/async/profiler.rb +10 -0
  81. data/lib/jruby/async/profiler/version.rb +7 -0
  82. metadata +155 -0
@@ -0,0 +1,550 @@
1
+ /*
2
+ * Copyright 2017 Andrei Pangin
3
+ *
4
+ * Licensed under the Apache License, Version 2.0 (the "License");
5
+ * you may not use this file except in compliance with the License.
6
+ * You may obtain a copy of the License at
7
+ *
8
+ * http://www.apache.org/licenses/LICENSE-2.0
9
+ *
10
+ * Unless required by applicable law or agreed to in writing, software
11
+ * distributed under the License is distributed on an "AS IS" BASIS,
12
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ * See the License for the specific language governing permissions and
14
+ * limitations under the License.
15
+ */
16
+
17
+ #ifdef __linux__
18
+
19
+ #include <jvmti.h>
20
+ #include <string.h>
21
+ #include <stdint.h>
22
+ #include <stdlib.h>
23
+ #include <stdio.h>
24
+ #include <dlfcn.h>
25
+ #include <fcntl.h>
26
+ #include <unistd.h>
27
+ #include <errno.h>
28
+ #include <sys/ioctl.h>
29
+ #include <sys/mman.h>
30
+ #include <sys/syscall.h>
31
+ #include <linux/perf_event.h>
32
+ #include "arch.h"
33
+ #include "os.h"
34
+ #include "perfEvents.h"
35
+ #include "profiler.h"
36
+ #include "spinLock.h"
37
+ #include "stackFrame.h"
38
+
39
+
40
+ // Ancient fcntl.h does not define F_SETOWN_EX constants and structures
41
+ #ifndef F_SETOWN_EX
42
+ #define F_SETOWN_EX 15
43
+ #define F_OWNER_TID 0
44
+
45
+ struct f_owner_ex {
46
+ int type;
47
+ pid_t pid;
48
+ };
49
+ #endif // F_SETOWN_EX
50
+
51
+
52
+ enum {
53
+ HW_BREAKPOINT_R = 1,
54
+ HW_BREAKPOINT_W = 2,
55
+ HW_BREAKPOINT_RW = 3,
56
+ HW_BREAKPOINT_X = 4
57
+ };
58
+
59
+
60
+ static const unsigned long PERF_PAGE_SIZE = sysconf(_SC_PAGESIZE);
61
+
62
+ static int getMaxPID() {
63
+ char buf[16] = "65536";
64
+ int fd = open("/proc/sys/kernel/pid_max", O_RDONLY);
65
+ if (fd != -1) {
66
+ ssize_t r = read(fd, buf, sizeof(buf) - 1);
67
+ (void) r;
68
+ close(fd);
69
+ }
70
+ return atoi(buf);
71
+ }
72
+
73
+ // Get perf_event_attr.config numeric value of the given tracepoint name
74
+ // by reading /sys/kernel/debug/tracing/events/<name>/id file
75
+ static int findTracepointId(const char* name) {
76
+ char buf[256];
77
+ if ((size_t)snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%s/id", name) >= sizeof(buf)) {
78
+ return 0;
79
+ }
80
+
81
+ *strchr(buf, ':') = '/'; // make path from event name
82
+
83
+ int fd = open(buf, O_RDONLY);
84
+ if (fd == -1) {
85
+ return 0;
86
+ }
87
+
88
+ char id[16] = "0";
89
+ ssize_t r = read(fd, id, sizeof(id) - 1);
90
+ (void) r;
91
+ close(fd);
92
+ return atoi(id);
93
+ }
94
+
95
+
96
+ struct FunctionWithCounter {
97
+ const char* name;
98
+ int counter_arg;
99
+ };
100
+
101
+ struct PerfEventType {
102
+ const char* name;
103
+ long default_interval;
104
+ __u32 type;
105
+ __u64 config;
106
+ __u32 bp_type;
107
+ __u32 bp_len;
108
+ int counter_arg;
109
+
110
+ static PerfEventType AVAILABLE_EVENTS[];
111
+ static FunctionWithCounter KNOWN_FUNCTIONS[];
112
+
113
+ // Find which argument of a known function serves as a profiling counter,
114
+ // e.g. the first argument of malloc() is allocation size
115
+ static int findCounterArg(const char* name) {
116
+ for (FunctionWithCounter* func = KNOWN_FUNCTIONS; func->name != NULL; func++) {
117
+ if (strcmp(name, func->name) == 0) {
118
+ return func->counter_arg;
119
+ }
120
+ }
121
+ return 0;
122
+ }
123
+
124
+ static PerfEventType* findByType(__u32 type) {
125
+ for (PerfEventType* event = AVAILABLE_EVENTS; ; event++) {
126
+ if (event->type == type) {
127
+ return event;
128
+ }
129
+ }
130
+ }
131
+
132
+ // Breakpoint format: func[+offset][/len][:rwx]
133
+ static PerfEventType* getBreakpoint(const char* name, __u32 bp_type, __u32 bp_len) {
134
+ char buf[256];
135
+ strncpy(buf, name, sizeof(buf) - 1);
136
+ buf[sizeof(buf) - 1] = 0;
137
+
138
+ // Parse access type [:rwx]
139
+ char* c = strrchr(buf, ':');
140
+ if (c != NULL) {
141
+ *c++ = 0;
142
+ if (strcmp(c, "r") == 0) {
143
+ bp_type = HW_BREAKPOINT_R;
144
+ } else if (strcmp(c, "w") == 0) {
145
+ bp_type = HW_BREAKPOINT_W;
146
+ } else if (strcmp(c, "x") == 0) {
147
+ bp_type = HW_BREAKPOINT_X;
148
+ bp_len = sizeof(long);
149
+ } else {
150
+ bp_type = HW_BREAKPOINT_RW;
151
+ }
152
+ }
153
+
154
+ // Parse length [/8]
155
+ c = strrchr(buf, '/');
156
+ if (c != NULL) {
157
+ *c++ = 0;
158
+ bp_len = (__u32)strtol(c, NULL, 0);
159
+ }
160
+
161
+ // Parse offset [+0x1234]
162
+ long long offset = 0;
163
+ c = strrchr(buf, '+');
164
+ if (c != NULL) {
165
+ *c++ = 0;
166
+ offset = strtoll(c, NULL, 0);
167
+ }
168
+
169
+ // Parse symbol or absolute address
170
+ __u64 addr;
171
+ if (strncmp(buf, "0x", 2) == 0) {
172
+ addr = (__u64)strtoll(buf, NULL, 0);
173
+ } else {
174
+ addr = (__u64)(uintptr_t)dlsym(RTLD_DEFAULT, buf);
175
+ if (addr == 0) {
176
+ addr = (__u64)(uintptr_t)Profiler::_instance.findSymbol(buf);
177
+ }
178
+ if (addr == 0) {
179
+ return NULL;
180
+ }
181
+ }
182
+
183
+ PerfEventType* breakpoint = findByType(PERF_TYPE_BREAKPOINT);
184
+ breakpoint->config = addr + offset;
185
+ breakpoint->bp_type = bp_type;
186
+ breakpoint->bp_len = bp_len;
187
+ breakpoint->counter_arg = bp_type == HW_BREAKPOINT_X ? findCounterArg(buf) : 0;
188
+ return breakpoint;
189
+ }
190
+
191
+ static PerfEventType* getTracepoint(int tracepoint_id) {
192
+ PerfEventType* tracepoint = findByType(PERF_TYPE_TRACEPOINT);
193
+ tracepoint->config = tracepoint_id;
194
+ return tracepoint;
195
+ }
196
+
197
+ static PerfEventType* forName(const char* name) {
198
+ // Hardware breakpoint
199
+ if (strncmp(name, "mem:", 4) == 0) {
200
+ return getBreakpoint(name + 4, HW_BREAKPOINT_RW, 1);
201
+ }
202
+
203
+ // Raw tracepoint ID
204
+ if (strncmp(name, "trace:", 6) == 0) {
205
+ int tracepoint_id = atoi(name + 6);
206
+ return tracepoint_id > 0 ? getTracepoint(tracepoint_id) : NULL;
207
+ }
208
+
209
+ // Look through the table of predefined perf events
210
+ for (PerfEventType* event = AVAILABLE_EVENTS; event->name != NULL; event++) {
211
+ if (strcmp(name, event->name) == 0) {
212
+ return event;
213
+ }
214
+ }
215
+
216
+ // Kernel tracepoints defined in debugfs
217
+ if (strchr(name, ':') != NULL) {
218
+ int tracepoint_id = findTracepointId(name);
219
+ if (tracepoint_id > 0) {
220
+ return getTracepoint(tracepoint_id);
221
+ }
222
+ }
223
+
224
+ // Finally, treat event as a function name and return an execution breakpoint
225
+ return getBreakpoint(name, HW_BREAKPOINT_X, sizeof(long));
226
+ }
227
+ };
228
+
229
+ // See perf_event_open(2)
230
+ #define LOAD_MISS(perf_hw_cache_id) \
231
+ ((perf_hw_cache_id) | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
232
+
233
+ PerfEventType PerfEventType::AVAILABLE_EVENTS[] = {
234
+ {"cpu", DEFAULT_INTERVAL, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK},
235
+ {"page-faults", 1, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS},
236
+ {"context-switches", 1, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES},
237
+
238
+ {"cycles", 1000000, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES},
239
+ {"instructions", 1000000, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS},
240
+ {"cache-references", 1000000, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES},
241
+ {"cache-misses", 1000, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES},
242
+ {"branches", 1000000, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS},
243
+ {"branch-misses", 1000, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES},
244
+ {"bus-cycles", 1000000, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES},
245
+
246
+ {"L1-dcache-load-misses", 1000000, PERF_TYPE_HW_CACHE, LOAD_MISS(PERF_COUNT_HW_CACHE_L1D)},
247
+ {"LLC-load-misses", 1000, PERF_TYPE_HW_CACHE, LOAD_MISS(PERF_COUNT_HW_CACHE_LL)},
248
+ {"dTLB-load-misses", 1000, PERF_TYPE_HW_CACHE, LOAD_MISS(PERF_COUNT_HW_CACHE_DTLB)},
249
+
250
+ {"mem:breakpoint", 1, PERF_TYPE_BREAKPOINT, 0},
251
+ {"trace:tracepoint", 1, PERF_TYPE_TRACEPOINT, 0},
252
+
253
+ {NULL}
254
+ };
255
+
256
+ FunctionWithCounter PerfEventType::KNOWN_FUNCTIONS[] = {
257
+ {"malloc", 1},
258
+ {"mmap", 2},
259
+ {"read", 3},
260
+ {"write", 3},
261
+ {"send", 3},
262
+ {"recv", 3},
263
+ {"sendto", 3},
264
+ {"recvfrom", 3},
265
+ {NULL}
266
+ };
267
+
268
+
269
+ class RingBuffer {
270
+ private:
271
+ const char* _start;
272
+ unsigned long _offset;
273
+
274
+ public:
275
+ RingBuffer(struct perf_event_mmap_page* page) {
276
+ _start = (const char*)page + PERF_PAGE_SIZE;
277
+ }
278
+
279
+ struct perf_event_header* seek(u64 offset) {
280
+ _offset = (unsigned long)offset & (PERF_PAGE_SIZE - 1);
281
+ return (struct perf_event_header*)(_start + _offset);
282
+ }
283
+
284
+ u64 next() {
285
+ _offset = (_offset + sizeof(u64)) & (PERF_PAGE_SIZE - 1);
286
+ return *(u64*)(_start + _offset);
287
+ }
288
+ };
289
+
290
+
291
+ class PerfEvent : public SpinLock {
292
+ private:
293
+ int _fd;
294
+ struct perf_event_mmap_page* _page;
295
+
296
+ friend class PerfEvents;
297
+ };
298
+
299
+
300
+ int PerfEvents::_max_events = 0;
301
+ PerfEvent* PerfEvents::_events = NULL;
302
+ PerfEventType* PerfEvents::_event_type = NULL;
303
+ long PerfEvents::_interval;
304
+ Ring PerfEvents::_ring;
305
+ bool PerfEvents::_print_extended_warning;
306
+
307
+ bool PerfEvents::createForThread(int tid) {
308
+ if (tid >= _max_events) {
309
+ fprintf(stderr, "WARNING: tid[%d] > pid_max[%d]. Restart profiler after changing pid_max\n", tid, _max_events);
310
+ return false;
311
+ }
312
+
313
+ PerfEventType* event_type = _event_type;
314
+ if (event_type == NULL) {
315
+ return false;
316
+ }
317
+
318
+ struct perf_event_attr attr = {0};
319
+ attr.size = sizeof(attr);
320
+ attr.type = event_type->type;
321
+
322
+ if (attr.type == PERF_TYPE_BREAKPOINT) {
323
+ attr.bp_addr = event_type->config;
324
+ attr.bp_type = event_type->bp_type;
325
+ attr.bp_len = event_type->bp_len;
326
+ } else {
327
+ attr.config = event_type->config;
328
+ }
329
+
330
+ // Hardware events may not always support zero skid
331
+ if (attr.type == PERF_TYPE_SOFTWARE) {
332
+ attr.precise_ip = 2;
333
+ }
334
+
335
+ attr.sample_period = _interval;
336
+ attr.sample_type = PERF_SAMPLE_CALLCHAIN;
337
+ attr.disabled = 1;
338
+ attr.wakeup_events = 1;
339
+
340
+ if (_ring == RING_USER) {
341
+ attr.exclude_kernel = 1;
342
+ } else if (_ring == RING_KERNEL) {
343
+ attr.exclude_user = 1;
344
+ }
345
+
346
+ int fd = syscall(__NR_perf_event_open, &attr, tid, -1, -1, 0);
347
+ if (fd == -1) {
348
+ int err = errno;
349
+ perror("perf_event_open failed");
350
+ if (err == EACCES && _print_extended_warning) {
351
+ fprintf(stderr, "Due to permission restrictions, you cannot collect kernel events.\n");
352
+ fprintf(stderr, "Try with --all-user option, or 'echo 1 > /proc/sys/kernel/perf_event_paranoid'\n");
353
+ _print_extended_warning = false;
354
+ }
355
+ return false;
356
+ }
357
+
358
+ if (!__sync_bool_compare_and_swap(&_events[tid]._fd, 0, fd)) {
359
+ // Lost race. The event is created either from start() or from onThreadStart()
360
+ close(fd);
361
+ return false;
362
+ }
363
+
364
+ void* page = mmap(NULL, 2 * PERF_PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
365
+ if (page == MAP_FAILED) {
366
+ perror("perf_event mmap failed");
367
+ page = NULL;
368
+ }
369
+
370
+ _events[tid].reset();
371
+ _events[tid]._page = (struct perf_event_mmap_page*)page;
372
+
373
+ struct f_owner_ex ex;
374
+ ex.type = F_OWNER_TID;
375
+ ex.pid = tid;
376
+
377
+ fcntl(fd, F_SETFL, O_ASYNC);
378
+ fcntl(fd, F_SETSIG, SIGPROF);
379
+ fcntl(fd, F_SETOWN_EX, &ex);
380
+
381
+ ioctl(fd, PERF_EVENT_IOC_RESET, 0);
382
+ ioctl(fd, PERF_EVENT_IOC_REFRESH, 1);
383
+
384
+ return true;
385
+ }
386
+
387
+ void PerfEvents::destroyForThread(int tid) {
388
+ if (tid >= _max_events) {
389
+ return;
390
+ }
391
+
392
+ PerfEvent* event = &_events[tid];
393
+ int fd = event->_fd;
394
+ if (fd != 0 && __sync_bool_compare_and_swap(&event->_fd, fd, 0)) {
395
+ ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
396
+ close(fd);
397
+ }
398
+ if (event->_page != NULL) {
399
+ event->lock();
400
+ munmap(event->_page, 2 * PERF_PAGE_SIZE);
401
+ event->_page = NULL;
402
+ event->unlock();
403
+ }
404
+ }
405
+
406
+ void PerfEvents::signalHandler(int signo, siginfo_t* siginfo, void* ucontext) {
407
+ if (siginfo->si_code <= 0) {
408
+ // Looks like an external signal; don't treat as a profiling event
409
+ return;
410
+ }
411
+
412
+ u64 counter;
413
+ switch (_event_type->counter_arg) {
414
+ case 1: counter = StackFrame(ucontext).arg0(); break;
415
+ case 2: counter = StackFrame(ucontext).arg1(); break;
416
+ case 3: counter = StackFrame(ucontext).arg2(); break;
417
+ case 4: counter = StackFrame(ucontext).arg3(); break;
418
+ default:
419
+ if (read(siginfo->si_fd, &counter, sizeof(counter)) != sizeof(counter)) {
420
+ counter = 1;
421
+ }
422
+ }
423
+
424
+ Profiler::_instance.recordSample(ucontext, counter, 0, NULL);
425
+ ioctl(siginfo->si_fd, PERF_EVENT_IOC_RESET, 0);
426
+ ioctl(siginfo->si_fd, PERF_EVENT_IOC_REFRESH, 1);
427
+ }
428
+
429
+ const char* PerfEvents::units() {
430
+ if (_event_type == NULL || _event_type->name == EVENT_CPU) {
431
+ return "ns";
432
+ } else if (_event_type->type == PERF_TYPE_BREAKPOINT || _event_type->type == PERF_TYPE_TRACEPOINT) {
433
+ return "events";
434
+ }
435
+
436
+ const char* dash = strrchr(_event_type->name, '-');
437
+ return dash != NULL ? dash + 1 : _event_type->name;
438
+ }
439
+
440
+ Error PerfEvents::start(Arguments& args) {
441
+ _event_type = PerfEventType::forName(args._event);
442
+ if (_event_type == NULL) {
443
+ return Error("Unsupported event type");
444
+ }
445
+
446
+ if (args._interval < 0) {
447
+ return Error("interval must be positive");
448
+ }
449
+ _interval = args._interval ? args._interval : _event_type->default_interval;
450
+
451
+ _ring = args._ring;
452
+ _print_extended_warning = _ring != RING_USER;
453
+
454
+ int max_events = getMaxPID();
455
+ if (max_events != _max_events) {
456
+ free(_events);
457
+ _events = (PerfEvent*)calloc(max_events, sizeof(PerfEvent));
458
+ _max_events = max_events;
459
+ }
460
+
461
+ OS::installSignalHandler(SIGPROF, signalHandler);
462
+
463
+ // Enable thread events before traversing currently running threads
464
+ Profiler::_instance.switchThreadEvents(JVMTI_ENABLE);
465
+
466
+ // Create perf_events for all existing threads
467
+ bool created = false;
468
+ ThreadList* thread_list = OS::listThreads();
469
+ for (int tid; (tid = thread_list->next()) != -1; ) {
470
+ created |= createForThread(tid);
471
+ }
472
+ delete thread_list;
473
+
474
+ if (!created) {
475
+ Profiler::_instance.switchThreadEvents(JVMTI_DISABLE);
476
+ return Error("Perf events unavailable. See stderr of the target process.");
477
+ }
478
+ return Error::OK;
479
+ }
480
+
481
+ void PerfEvents::stop() {
482
+ for (int i = 0; i < _max_events; i++) {
483
+ destroyForThread(i);
484
+ }
485
+ }
486
+
487
+ void PerfEvents::onThreadStart() {
488
+ createForThread(OS::threadId());
489
+ }
490
+
491
+ void PerfEvents::onThreadEnd() {
492
+ destroyForThread(OS::threadId());
493
+ }
494
+
495
+ int PerfEvents::getNativeTrace(void* ucontext, int tid, const void** callchain, int max_depth,
496
+ const void* jit_min_address, const void* jit_max_address) {
497
+ PerfEvent* event = &_events[tid];
498
+ if (!event->tryLock()) {
499
+ return 0; // the event is being destroyed
500
+ }
501
+
502
+ int depth = 0;
503
+
504
+ struct perf_event_mmap_page* page = event->_page;
505
+ if (page != NULL) {
506
+ u64 tail = page->data_tail;
507
+ u64 head = page->data_head;
508
+ rmb();
509
+
510
+ RingBuffer ring(page);
511
+
512
+ while (tail < head) {
513
+ struct perf_event_header* hdr = ring.seek(tail);
514
+ if (hdr->type == PERF_RECORD_SAMPLE) {
515
+ u64 nr = ring.next();
516
+ while (nr-- > 0 && depth < max_depth) {
517
+ u64 ip = ring.next();
518
+ if (ip < PERF_CONTEXT_MAX) {
519
+ const void* iptr = (const void*)ip;
520
+ callchain[depth++] = iptr;
521
+ if (iptr >= jit_min_address && iptr < jit_max_address) {
522
+ // Stop at the first Java frame
523
+ break;
524
+ }
525
+ }
526
+ }
527
+ break;
528
+ }
529
+ tail += hdr->size;
530
+ }
531
+
532
+ page->data_tail = head;
533
+ }
534
+
535
+ event->unlock();
536
+ return depth;
537
+ }
538
+
539
+ bool PerfEvents::supported() {
540
+ return true;
541
+ }
542
+
543
+ const char* PerfEvents::getEventName(int event_id) {
544
+ if (event_id >= 0 && (size_t)event_id < sizeof(PerfEventType::AVAILABLE_EVENTS) / sizeof(PerfEventType)) {
545
+ return PerfEventType::AVAILABLE_EVENTS[event_id].name;
546
+ }
547
+ return NULL;
548
+ }
549
+
550
+ #endif // __linux__