pf2 0.11.3 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +35 -0
- data/README.md +11 -15
- data/Rakefile +1 -0
- data/THIRD_PARTY_LICENSES.txt +59 -0
- data/ext/patches/libbacktrace/0001-Support-MACH_O_MH_BUNDLE.patch +32 -0
- data/ext/pf2/configuration.c +14 -0
- data/ext/pf2/configuration.h +3 -0
- data/ext/pf2/extconf.rb +37 -3
- data/ext/pf2/khashl.h +506 -0
- data/ext/pf2/sample.h +2 -2
- data/ext/pf2/serializer.c +115 -32
- data/ext/pf2/serializer.h +2 -0
- data/ext/pf2/session.c +247 -104
- data/ext/pf2/session.h +155 -3
- data/lib/pf2/cli.rb +1 -1
- data/lib/pf2/serve.rb +1 -2
- data/lib/pf2/version.rb +1 -1
- data/lib/pf2.rb +15 -2
- data/vendor/libbacktrace/atomic.c +1 -1
- data/vendor/libbacktrace/configure +12 -4
- data/vendor/libbacktrace/configure.ac +6 -1
- data/vendor/libbacktrace/elf.c +4 -4
- data/vendor/libbacktrace/fileline.c +35 -1
- data/vendor/libbacktrace/filetype.awk +1 -0
- metadata +4 -1
data/ext/pf2/session.c
CHANGED
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
#include <stdbool.h>
|
|
5
5
|
#include <stdio.h>
|
|
6
6
|
#include <stdlib.h>
|
|
7
|
+
#include <string.h>
|
|
7
8
|
#include <sys/time.h>
|
|
8
9
|
#include <time.h>
|
|
9
10
|
|
|
@@ -19,15 +20,17 @@
|
|
|
19
20
|
#include "session.h"
|
|
20
21
|
#include "serializer.h"
|
|
21
22
|
|
|
22
|
-
|
|
23
|
-
// Global session pointer for setitimer fallback
|
|
23
|
+
// Pointer to current active session, for access from signal handlers
|
|
24
24
|
static struct pf2_session *global_current_session = NULL;
|
|
25
|
-
#endif
|
|
26
25
|
|
|
27
26
|
static void *sample_collector_thread(void *arg);
|
|
27
|
+
static void drain_ringbuffer(struct pf2_session *session);
|
|
28
28
|
static void sigprof_handler(int sig, siginfo_t *info, void *ucontext);
|
|
29
|
-
bool ensure_sample_capacity(struct pf2_session *session);
|
|
30
29
|
static void pf2_session_stop(struct pf2_session *session);
|
|
30
|
+
static size_t intern_location(struct pf2_session *session, VALUE cme, int lineno);
|
|
31
|
+
static size_t intern_stack(struct pf2_session *session, const size_t *frames, size_t depth);
|
|
32
|
+
static size_t intern_native_stack(struct pf2_session *session, const uintptr_t *frames, size_t depth);
|
|
33
|
+
static bool insert_sample(struct pf2_session *session, const struct pf2_sample *sample);
|
|
31
34
|
|
|
32
35
|
VALUE
|
|
33
36
|
rb_pf2_session_initialize(int argc, VALUE *argv, VALUE self)
|
|
@@ -40,10 +43,11 @@ rb_pf2_session_initialize(int argc, VALUE *argv, VALUE self)
|
|
|
40
43
|
rb_scan_args(argc, argv, ":", &kwargs);
|
|
41
44
|
ID kwarg_labels[] = {
|
|
42
45
|
rb_intern("interval_ms"),
|
|
43
|
-
rb_intern("time_mode")
|
|
46
|
+
rb_intern("time_mode"),
|
|
47
|
+
rb_intern("_test_no_install_timer")
|
|
44
48
|
};
|
|
45
49
|
VALUE *kwarg_values = NULL;
|
|
46
|
-
rb_get_kwargs(kwargs, kwarg_labels, 0,
|
|
50
|
+
rb_get_kwargs(kwargs, kwarg_labels, 0, 3, kwarg_values);
|
|
47
51
|
|
|
48
52
|
session->configuration = pf2_configuration_new_from_options_hash(kwargs);
|
|
49
53
|
|
|
@@ -56,6 +60,9 @@ rb_pf2_session_start(VALUE self)
|
|
|
56
60
|
struct pf2_session *session;
|
|
57
61
|
TypedData_Get_Struct(self, struct pf2_session, &pf2_session_type, session);
|
|
58
62
|
|
|
63
|
+
// Store pointer to current session for access from signal handlers
|
|
64
|
+
global_current_session = session;
|
|
65
|
+
|
|
59
66
|
session->is_running = true;
|
|
60
67
|
|
|
61
68
|
// Record start time
|
|
@@ -87,58 +94,60 @@ rb_pf2_session_start(VALUE self)
|
|
|
87
94
|
}
|
|
88
95
|
#endif
|
|
89
96
|
|
|
90
|
-
#ifdef HAVE_TIMER_CREATE
|
|
91
|
-
// Configure a kernel timer to send SIGPROF periodically
|
|
92
|
-
struct sigevent sev;
|
|
93
|
-
sev.sigev_notify = SIGEV_SIGNAL;
|
|
94
|
-
sev.sigev_signo = SIGPROF;
|
|
95
|
-
sev.sigev_value.sival_ptr = session; // Passed as info->si_value.sival_ptr
|
|
96
|
-
if (timer_create(
|
|
97
|
-
session->configuration->time_mode == PF2_TIME_MODE_CPU_TIME
|
|
98
|
-
? CLOCK_PROCESS_CPUTIME_ID
|
|
99
|
-
: CLOCK_MONOTONIC,
|
|
100
|
-
&sev,
|
|
101
|
-
&session->timer
|
|
102
|
-
) == -1) {
|
|
103
|
-
rb_raise(rb_eRuntimeError, "Failed to create timer");
|
|
104
|
-
}
|
|
105
|
-
struct itimerspec its = {
|
|
106
|
-
.it_value = {
|
|
107
|
-
.tv_sec = 0,
|
|
108
|
-
.tv_nsec = session->configuration->interval_ms * 1000000,
|
|
109
|
-
},
|
|
110
|
-
.it_interval = {
|
|
111
|
-
.tv_sec = 0,
|
|
112
|
-
.tv_nsec = session->configuration->interval_ms * 1000000,
|
|
113
|
-
},
|
|
114
|
-
};
|
|
115
|
-
if (timer_settime(session->timer, 0, &its, NULL) == -1) {
|
|
116
|
-
rb_raise(rb_eRuntimeError, "Failed to start timer");
|
|
117
|
-
}
|
|
118
|
-
#else
|
|
119
|
-
// Use setitimer as fallback
|
|
120
|
-
// Some platforms (e.g. macOS) do not have timer_create(3).
|
|
121
|
-
// setitimer(3) can be used as a alternative, but has limited functionality.
|
|
122
97
|
global_current_session = session;
|
|
123
98
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
.
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
99
|
+
if (!session->configuration->_test_no_install_timer) {
|
|
100
|
+
#ifdef HAVE_TIMER_CREATE
|
|
101
|
+
// Configure a kernel timer to send SIGPROF periodically
|
|
102
|
+
struct sigevent sev;
|
|
103
|
+
sev.sigev_notify = SIGEV_SIGNAL;
|
|
104
|
+
sev.sigev_signo = SIGPROF;
|
|
105
|
+
if (timer_create(
|
|
106
|
+
session->configuration->time_mode == PF2_TIME_MODE_CPU_TIME
|
|
107
|
+
? CLOCK_PROCESS_CPUTIME_ID
|
|
108
|
+
: CLOCK_MONOTONIC,
|
|
109
|
+
&sev,
|
|
110
|
+
&session->timer
|
|
111
|
+
) == -1) {
|
|
112
|
+
rb_raise(rb_eRuntimeError, "Failed to create timer");
|
|
113
|
+
}
|
|
114
|
+
struct itimerspec its = {
|
|
115
|
+
.it_value = {
|
|
116
|
+
.tv_sec = 0,
|
|
117
|
+
.tv_nsec = session->configuration->interval_ms * 1000000,
|
|
118
|
+
},
|
|
119
|
+
.it_interval = {
|
|
120
|
+
.tv_sec = 0,
|
|
121
|
+
.tv_nsec = session->configuration->interval_ms * 1000000,
|
|
122
|
+
},
|
|
123
|
+
};
|
|
124
|
+
if (timer_settime(session->timer, 0, &its, NULL) == -1) {
|
|
125
|
+
rb_raise(rb_eRuntimeError, "Failed to start timer");
|
|
126
|
+
}
|
|
127
|
+
#else
|
|
128
|
+
// Use setitimer as fallback
|
|
129
|
+
// Some platforms (e.g. macOS) do not have timer_create(3).
|
|
130
|
+
// setitimer(3) can be used as a alternative, but has limited functionality.
|
|
131
|
+
|
|
132
|
+
struct itimerval itv = {
|
|
133
|
+
.it_value = {
|
|
134
|
+
.tv_sec = 0,
|
|
135
|
+
.tv_usec = session->configuration->interval_ms * 1000,
|
|
136
|
+
},
|
|
137
|
+
.it_interval = {
|
|
138
|
+
.tv_sec = 0,
|
|
139
|
+
.tv_usec = session->configuration->interval_ms * 1000,
|
|
140
|
+
},
|
|
141
|
+
};
|
|
142
|
+
int which_timer = session->configuration->time_mode == PF2_TIME_MODE_CPU_TIME
|
|
143
|
+
? ITIMER_PROF // CPU time (sends SIGPROF)
|
|
144
|
+
: ITIMER_REAL; // Wall time (sends SIGALRM)
|
|
145
|
+
|
|
146
|
+
if (setitimer(which_timer, &itv, NULL) == -1) {
|
|
147
|
+
rb_raise(rb_eRuntimeError, "Failed to start timer");
|
|
148
|
+
}
|
|
141
149
|
#endif
|
|
150
|
+
} // if !__test_no_install_timer
|
|
142
151
|
|
|
143
152
|
return Qtrue;
|
|
144
153
|
}
|
|
@@ -150,19 +159,9 @@ sample_collector_thread(void *arg)
|
|
|
150
159
|
|
|
151
160
|
while (session->is_running == true) {
|
|
152
161
|
// Take samples from the ring buffer
|
|
153
|
-
|
|
154
|
-
while (pf2_ringbuffer_pop(session->rbuf, &sample) == true) {
|
|
155
|
-
// Ensure we have capacity before adding a new sample
|
|
156
|
-
if (!ensure_sample_capacity(session)) {
|
|
157
|
-
// Failed to expand buffer
|
|
158
|
-
PF2_DEBUG_LOG("Failed to expand sample buffer. Dropping sample\n");
|
|
159
|
-
break;
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
session->samples[session->samples_index++] = sample;
|
|
163
|
-
}
|
|
162
|
+
drain_ringbuffer(session);
|
|
164
163
|
|
|
165
|
-
// Sleep for
|
|
164
|
+
// Sleep for 10 ms
|
|
166
165
|
// TODO: Replace with high watermark callback
|
|
167
166
|
struct timespec ts = { .tv_sec = 0, .tv_nsec = 10 * 1000000, }; // 10 ms
|
|
168
167
|
nanosleep(&ts, NULL);
|
|
@@ -171,6 +170,20 @@ sample_collector_thread(void *arg)
|
|
|
171
170
|
return NULL;
|
|
172
171
|
}
|
|
173
172
|
|
|
173
|
+
static void
|
|
174
|
+
drain_ringbuffer(struct pf2_session *session)
|
|
175
|
+
{
|
|
176
|
+
struct pf2_sample sample;
|
|
177
|
+
while (pf2_ringbuffer_pop(session->rbuf, &sample) == true) {
|
|
178
|
+
if (!insert_sample(session, &sample)) {
|
|
179
|
+
atomic_fetch_add_explicit(&session->dropped_sample_count, 1, memory_order_relaxed);
|
|
180
|
+
PF2_DEBUG_LOG("Failed to record sample. Dropping sample\n");
|
|
181
|
+
} else {
|
|
182
|
+
atomic_fetch_add_explicit(&session->collected_sample_count, 1, memory_order_relaxed);
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
174
187
|
// async-signal-safe
|
|
175
188
|
static void
|
|
176
189
|
sigprof_handler(int sig, siginfo_t *info, void *ucontext)
|
|
@@ -180,16 +193,12 @@ sigprof_handler(int sig, siginfo_t *info, void *ucontext)
|
|
|
180
193
|
clock_gettime(CLOCK_MONOTONIC, &sig_start_time);
|
|
181
194
|
#endif
|
|
182
195
|
|
|
183
|
-
struct pf2_session *session;
|
|
184
|
-
#ifdef HAVE_TIMER_CREATE
|
|
185
|
-
session = info->si_value.sival_ptr;
|
|
186
|
-
#else
|
|
187
|
-
session = global_current_session;
|
|
188
|
-
#endif
|
|
196
|
+
struct pf2_session *session = global_current_session;
|
|
189
197
|
|
|
190
198
|
// If garbage collection is in progress, don't collect samples.
|
|
191
199
|
if (atomic_load_explicit(&session->is_marking, memory_order_acquire)) {
|
|
192
200
|
PF2_DEBUG_LOG("Dropping sample: Garbage collection is in progress\n");
|
|
201
|
+
atomic_fetch_add_explicit(&session->dropped_sample_count, 1, memory_order_relaxed);
|
|
193
202
|
return;
|
|
194
203
|
}
|
|
195
204
|
|
|
@@ -197,6 +206,7 @@ sigprof_handler(int sig, siginfo_t *info, void *ucontext)
|
|
|
197
206
|
|
|
198
207
|
if (pf2_sample_capture(&sample) == false) {
|
|
199
208
|
PF2_DEBUG_LOG("Dropping sample: Failed to capture sample\n");
|
|
209
|
+
atomic_fetch_add_explicit(&session->dropped_sample_count, 1, memory_order_relaxed);
|
|
200
210
|
return;
|
|
201
211
|
}
|
|
202
212
|
|
|
@@ -204,6 +214,7 @@ sigprof_handler(int sig, siginfo_t *info, void *ucontext)
|
|
|
204
214
|
if (pf2_ringbuffer_push(session->rbuf, &sample) == false) {
|
|
205
215
|
// Copy failed. The sample buffer is full.
|
|
206
216
|
PF2_DEBUG_LOG("Dropping sample: Sample buffer is full\n");
|
|
217
|
+
atomic_fetch_add_explicit(&session->dropped_sample_count, 1, memory_order_relaxed);
|
|
207
218
|
return;
|
|
208
219
|
}
|
|
209
220
|
|
|
@@ -220,27 +231,117 @@ sigprof_handler(int sig, siginfo_t *info, void *ucontext)
|
|
|
220
231
|
#endif
|
|
221
232
|
}
|
|
222
233
|
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
234
|
+
static size_t
|
|
235
|
+
intern_location(struct pf2_session *session, VALUE cme, int lineno)
|
|
236
|
+
{
|
|
237
|
+
struct pf2_location_key key = { .cme = cme, .lineno = lineno };
|
|
238
|
+
int absent;
|
|
239
|
+
khint_t k = pf2_location_table_put(session->location_table, key, &absent);
|
|
240
|
+
if (k == kh_end(session->location_table)) { return (size_t)-1; }
|
|
241
|
+
if (absent) {
|
|
242
|
+
kh_val(session->location_table, k) = kh_size(session->location_table) - 1;
|
|
243
|
+
}
|
|
244
|
+
return kh_val(session->location_table, k);
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
static size_t
|
|
248
|
+
intern_stack(struct pf2_session *session, const size_t *frames, size_t depth)
|
|
227
249
|
{
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
250
|
+
struct pf2_stack_key skey = { .frames = frames, .depth = depth };
|
|
251
|
+
int absent;
|
|
252
|
+
khint_t k = pf2_stack_table_put(session->stack_table, skey, &absent);
|
|
253
|
+
if (k == kh_end(session->stack_table)) { return (size_t)-1; }
|
|
254
|
+
if (absent) {
|
|
255
|
+
size_t *copy = NULL;
|
|
256
|
+
if (depth > 0) {
|
|
257
|
+
copy = malloc(sizeof(size_t) * depth);
|
|
258
|
+
// TODO: if allocation fails, remove stack_table entry to avoid dangling stack-local key.
|
|
259
|
+
if (copy == NULL) return (size_t)-1;
|
|
260
|
+
memcpy(copy, frames, sizeof(size_t) * depth);
|
|
261
|
+
}
|
|
262
|
+
kh_key(session->stack_table, k).frames = copy;
|
|
263
|
+
kh_key(session->stack_table, k).depth = depth;
|
|
264
|
+
kh_val(session->stack_table, k) = kh_size(session->stack_table) - 1;
|
|
231
265
|
}
|
|
266
|
+
return kh_val(session->stack_table, k);
|
|
267
|
+
}
|
|
232
268
|
|
|
233
|
-
|
|
234
|
-
|
|
269
|
+
static size_t
|
|
270
|
+
intern_native_stack(struct pf2_session *session, const uintptr_t *frames, size_t depth)
|
|
271
|
+
{
|
|
272
|
+
struct pf2_native_stack_key skey = { .frames = frames, .depth = depth };
|
|
273
|
+
int absent;
|
|
274
|
+
khint_t k = pf2_native_stack_table_put(session->native_stack_table, skey, &absent);
|
|
275
|
+
if (k == kh_end(session->native_stack_table)) { return (size_t)-1; }
|
|
276
|
+
if (absent) {
|
|
277
|
+
uintptr_t *copy = NULL;
|
|
278
|
+
if (depth > 0) {
|
|
279
|
+
copy = malloc(sizeof(uintptr_t) * depth);
|
|
280
|
+
if (copy == NULL) return (size_t)-1;
|
|
281
|
+
memcpy(copy, frames, sizeof(uintptr_t) * depth);
|
|
282
|
+
}
|
|
283
|
+
kh_key(session->native_stack_table, k).frames = copy;
|
|
284
|
+
kh_key(session->native_stack_table, k).depth = depth;
|
|
285
|
+
kh_val(session->native_stack_table, k) = kh_size(session->native_stack_table) - 1;
|
|
286
|
+
}
|
|
287
|
+
return kh_val(session->native_stack_table, k);
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
static bool
|
|
291
|
+
insert_sample(struct pf2_session *session, const struct pf2_sample *sample)
|
|
292
|
+
{
|
|
293
|
+
size_t frame_ids[PF2_SAMPLE_MAX_RUBY_DEPTH];
|
|
235
294
|
|
|
236
|
-
//
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
return false;
|
|
295
|
+
// Convert each frame to a location
|
|
296
|
+
for (int i = 0; i < sample->depth; i++) {
|
|
297
|
+
frame_ids[i] = intern_location(session, sample->cmes[i], sample->linenos[i]);
|
|
298
|
+
if (frame_ids[i] == (size_t)-1) { return false; }
|
|
240
299
|
}
|
|
241
300
|
|
|
242
|
-
|
|
243
|
-
|
|
301
|
+
// Obtain stack_id for the array of locations
|
|
302
|
+
size_t stack_id = intern_stack(session, frame_ids, (size_t)sample->depth);
|
|
303
|
+
if (stack_id == (size_t)-1) { return false; }
|
|
304
|
+
|
|
305
|
+
size_t native_stack_id = intern_native_stack(session, sample->native_stack, sample->native_stack_depth);
|
|
306
|
+
if (native_stack_id == (size_t)-1) { return false; }
|
|
307
|
+
|
|
308
|
+
// Increment the observation count for this stack_id
|
|
309
|
+
int absent;
|
|
310
|
+
struct pf2_combined_stack_key ckey = {
|
|
311
|
+
.ruby_stack_id = stack_id,
|
|
312
|
+
.native_stack_id = native_stack_id
|
|
313
|
+
};
|
|
314
|
+
khint_t k = pf2_sample_table_put(session->sample_table, ckey, &absent);
|
|
315
|
+
if (k == kh_end(session->sample_table)) { return false; }
|
|
316
|
+
struct pf2_sample_stats *stats = &kh_val(session->sample_table, k);
|
|
317
|
+
if (absent) {
|
|
318
|
+
// This is the first time this stack was observed. Initialize stats.
|
|
319
|
+
stats->count = 0;
|
|
320
|
+
stats->timestamps = NULL;
|
|
321
|
+
stats->thread_ids = NULL;
|
|
322
|
+
stats->timestamps_count = 0;
|
|
323
|
+
stats->timestamps_capacity = 0;
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
// count
|
|
327
|
+
stats->count += 1;
|
|
328
|
+
// timestamps
|
|
329
|
+
if (stats->timestamps_count == stats->timestamps_capacity) {
|
|
330
|
+
size_t new_cap = stats->timestamps_capacity ? stats->timestamps_capacity * 2 : 16;
|
|
331
|
+
uint64_t *new_ts = realloc(stats->timestamps, sizeof(uint64_t) * new_cap);
|
|
332
|
+
uintptr_t *new_threads = realloc(stats->thread_ids, sizeof(uintptr_t) * new_cap);
|
|
333
|
+
if (new_ts == NULL || new_threads == NULL) {
|
|
334
|
+
free(new_ts);
|
|
335
|
+
free(new_threads);
|
|
336
|
+
return false;
|
|
337
|
+
}
|
|
338
|
+
stats->timestamps = new_ts;
|
|
339
|
+
stats->thread_ids = new_threads;
|
|
340
|
+
stats->timestamps_capacity = new_cap;
|
|
341
|
+
}
|
|
342
|
+
stats->timestamps[stats->timestamps_count] = sample->timestamp_ns;
|
|
343
|
+
stats->thread_ids[stats->timestamps_count] = (uintptr_t)sample->context_pthread;
|
|
344
|
+
stats->timestamps_count++;
|
|
244
345
|
|
|
245
346
|
return true;
|
|
246
347
|
}
|
|
@@ -274,8 +375,10 @@ pf2_session_stop(struct pf2_session *session)
|
|
|
274
375
|
|
|
275
376
|
// Disarm and delete the timer.
|
|
276
377
|
#ifdef HAVE_TIMER_CREATE
|
|
277
|
-
if (
|
|
278
|
-
|
|
378
|
+
if (!session->configuration->_test_no_install_timer) {
|
|
379
|
+
if (timer_delete(session->timer) == -1) {
|
|
380
|
+
rb_raise(rb_eRuntimeError, "Failed to delete timer");
|
|
381
|
+
}
|
|
279
382
|
}
|
|
280
383
|
#else
|
|
281
384
|
struct itimerval zero_timer = {{0, 0}, {0, 0}};
|
|
@@ -291,6 +394,7 @@ pf2_session_stop(struct pf2_session *session)
|
|
|
291
394
|
// Terminate the collector thread
|
|
292
395
|
session->is_running = false;
|
|
293
396
|
pthread_join(*session->collector_thread, NULL);
|
|
397
|
+
drain_ringbuffer(session);
|
|
294
398
|
}
|
|
295
399
|
|
|
296
400
|
VALUE
|
|
@@ -306,7 +410,7 @@ pf2_session_alloc(VALUE self)
|
|
|
306
410
|
{
|
|
307
411
|
// Initialize state for libbacktrace
|
|
308
412
|
if (global_backtrace_state == NULL) {
|
|
309
|
-
global_backtrace_state = backtrace_create_state(
|
|
413
|
+
global_backtrace_state = backtrace_create_state(NULL, 1, pf2_backtrace_print_error, NULL);
|
|
310
414
|
if (global_backtrace_state == NULL) {
|
|
311
415
|
rb_raise(rb_eRuntimeError, "Failed to initialize libbacktrace");
|
|
312
416
|
}
|
|
@@ -342,14 +446,28 @@ pf2_session_alloc(VALUE self)
|
|
|
342
446
|
rb_raise(rb_eNoMemError, "Failed to allocate memory");
|
|
343
447
|
}
|
|
344
448
|
|
|
345
|
-
//
|
|
346
|
-
session->
|
|
347
|
-
session->
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
449
|
+
// location_table, stack_table, native_stack_table, sample_table
|
|
450
|
+
session->location_table = pf2_location_table_init();
|
|
451
|
+
if (session->location_table == NULL) {
|
|
452
|
+
rb_raise(rb_eNoMemError, "Failed to allocate location table");
|
|
453
|
+
}
|
|
454
|
+
session->stack_table = pf2_stack_table_init();
|
|
455
|
+
if (session->stack_table == NULL) {
|
|
456
|
+
rb_raise(rb_eNoMemError, "Failed to allocate stack table");
|
|
457
|
+
}
|
|
458
|
+
session->native_stack_table = pf2_native_stack_table_init();
|
|
459
|
+
if (session->native_stack_table == NULL) {
|
|
460
|
+
rb_raise(rb_eNoMemError, "Failed to allocate native stack table");
|
|
461
|
+
}
|
|
462
|
+
session->sample_table = pf2_sample_table_init();
|
|
463
|
+
if (session->sample_table == NULL) {
|
|
464
|
+
rb_raise(rb_eNoMemError, "Failed to allocate stack sample table");
|
|
351
465
|
}
|
|
352
466
|
|
|
467
|
+
// collected_sample_count, dropped_sample_count
|
|
468
|
+
atomic_store_explicit(&session->collected_sample_count, 0, memory_order_relaxed);
|
|
469
|
+
atomic_store_explicit(&session->dropped_sample_count, 0, memory_order_relaxed);
|
|
470
|
+
|
|
353
471
|
// start_time_realtime, start_time
|
|
354
472
|
session->start_time_realtime = (struct timespec){0};
|
|
355
473
|
session->start_time = (struct timespec){0};
|
|
@@ -385,11 +503,11 @@ pf2_session_dmark(void *sess)
|
|
|
385
503
|
head = (head + 1) % rbuf->size;
|
|
386
504
|
}
|
|
387
505
|
|
|
388
|
-
//
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
rb_gc_mark(
|
|
506
|
+
// Mark Ruby VALUEs stored in location_table keys
|
|
507
|
+
if (session->location_table) {
|
|
508
|
+
khint_t k;
|
|
509
|
+
kh_foreach(session->location_table, k) {
|
|
510
|
+
rb_gc_mark(kh_key(session->location_table, k).cme);
|
|
393
511
|
}
|
|
394
512
|
}
|
|
395
513
|
|
|
@@ -411,7 +529,33 @@ pf2_session_dfree(void *sess)
|
|
|
411
529
|
|
|
412
530
|
pf2_configuration_free(session->configuration);
|
|
413
531
|
pf2_ringbuffer_free(session->rbuf);
|
|
414
|
-
|
|
532
|
+
|
|
533
|
+
if (session->sample_table) {
|
|
534
|
+
khint_t k;
|
|
535
|
+
kh_foreach(session->sample_table, k) {
|
|
536
|
+
free(kh_val(session->sample_table, k).timestamps);
|
|
537
|
+
free(kh_val(session->sample_table, k).thread_ids);
|
|
538
|
+
}
|
|
539
|
+
pf2_sample_table_destroy(session->sample_table);
|
|
540
|
+
}
|
|
541
|
+
if (session->stack_table) {
|
|
542
|
+
khint_t k;
|
|
543
|
+
kh_foreach(session->stack_table, k) {
|
|
544
|
+
free((void *)kh_key(session->stack_table, k).frames);
|
|
545
|
+
}
|
|
546
|
+
pf2_stack_table_destroy(session->stack_table);
|
|
547
|
+
}
|
|
548
|
+
if (session->native_stack_table) {
|
|
549
|
+
khint_t k;
|
|
550
|
+
kh_foreach(session->native_stack_table, k) {
|
|
551
|
+
free((void *)kh_key(session->native_stack_table, k).frames);
|
|
552
|
+
}
|
|
553
|
+
pf2_native_stack_table_destroy(session->native_stack_table);
|
|
554
|
+
}
|
|
555
|
+
if (session->location_table) {
|
|
556
|
+
pf2_location_table_destroy(session->location_table);
|
|
557
|
+
}
|
|
558
|
+
|
|
415
559
|
free(session->collector_thread);
|
|
416
560
|
free(session);
|
|
417
561
|
}
|
|
@@ -422,7 +566,6 @@ pf2_session_dsize(const void *sess)
|
|
|
422
566
|
const struct pf2_session *session = sess;
|
|
423
567
|
return (
|
|
424
568
|
sizeof(struct pf2_session)
|
|
425
|
-
+ sizeof(struct pf2_sample) * session->samples_capacity
|
|
426
569
|
+ sizeof(struct pf2_sample) * session->rbuf->size
|
|
427
570
|
);
|
|
428
571
|
}
|
data/ext/pf2/session.h
CHANGED
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
|
|
4
4
|
#include <pthread.h>
|
|
5
5
|
#include <stdatomic.h>
|
|
6
|
+
#include <stdint.h>
|
|
7
|
+
#include <limits.h>
|
|
6
8
|
#include <sys/time.h>
|
|
7
9
|
|
|
8
10
|
#include <ruby.h>
|
|
@@ -11,6 +13,152 @@
|
|
|
11
13
|
#include "ringbuffer.h"
|
|
12
14
|
#include "sample.h"
|
|
13
15
|
|
|
16
|
+
#include "khashl.h"
|
|
17
|
+
|
|
18
|
+
// Maps for sample storage
|
|
19
|
+
|
|
20
|
+
// BEGIN generic helpers
|
|
21
|
+
|
|
22
|
+
static inline khint_t hash_size_t(size_t v)
|
|
23
|
+
{
|
|
24
|
+
#if SIZE_MAX == UINT_MAX
|
|
25
|
+
return kh_hash_uint32((khint_t)v);
|
|
26
|
+
#else
|
|
27
|
+
return kh_hash_uint64((khint64_t)v);
|
|
28
|
+
#endif
|
|
29
|
+
}
|
|
30
|
+
static inline int eq_size_t(size_t a, size_t b) { return a == b; }
|
|
31
|
+
|
|
32
|
+
// END generic helpers
|
|
33
|
+
|
|
34
|
+
// BEGIN location_table
|
|
35
|
+
|
|
36
|
+
struct pf2_location_key {
|
|
37
|
+
VALUE cme;
|
|
38
|
+
int lineno;
|
|
39
|
+
};
|
|
40
|
+
static inline khint_t hash_location_key(struct pf2_location_key key)
|
|
41
|
+
{
|
|
42
|
+
khint_t h = hash_size_t((size_t)key.cme);
|
|
43
|
+
h ^= (khint_t)key.lineno + 0x9e3779b9U + (h << 6) + (h >> 2);
|
|
44
|
+
return h;
|
|
45
|
+
}
|
|
46
|
+
static inline int eq_location_key(struct pf2_location_key a, struct pf2_location_key b)
|
|
47
|
+
{
|
|
48
|
+
return a.cme == b.cme && a.lineno == b.lineno;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// END location_table
|
|
52
|
+
|
|
53
|
+
// BEGIN stack_table (Ruby stack)
|
|
54
|
+
|
|
55
|
+
struct pf2_stack_key {
|
|
56
|
+
const size_t *frames; // pointer to an immutable array of location_ids
|
|
57
|
+
size_t depth;
|
|
58
|
+
};
|
|
59
|
+
static inline khint_t hash_stack_key(struct pf2_stack_key key)
|
|
60
|
+
{
|
|
61
|
+
khint_t h = hash_size_t(key.depth);
|
|
62
|
+
for (size_t i = 0; i < key.depth; i++) {
|
|
63
|
+
h ^= hash_size_t(key.frames[i]) + 0x9e3779b9U + (h << 6) + (h >> 2);
|
|
64
|
+
}
|
|
65
|
+
return h;
|
|
66
|
+
}
|
|
67
|
+
static inline int eq_stack_key(struct pf2_stack_key a, struct pf2_stack_key b)
|
|
68
|
+
{
|
|
69
|
+
if (a.depth != b.depth) return 0;
|
|
70
|
+
for (size_t i = 0; i < a.depth; i++) {
|
|
71
|
+
if (a.frames[i] != b.frames[i]) return 0;
|
|
72
|
+
}
|
|
73
|
+
return 1;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// END stack_table
|
|
77
|
+
|
|
78
|
+
// BEGIN native_stack_table (raw PCs)
|
|
79
|
+
|
|
80
|
+
struct pf2_native_stack_key {
|
|
81
|
+
const uintptr_t *frames; // pointer to an immutable array of PCs
|
|
82
|
+
size_t depth;
|
|
83
|
+
};
|
|
84
|
+
static inline khint_t hash_native_stack_key(struct pf2_native_stack_key key)
|
|
85
|
+
{
|
|
86
|
+
khint_t h = hash_size_t(key.depth);
|
|
87
|
+
for (size_t i = 0; i < key.depth; i++) {
|
|
88
|
+
h ^= kh_hash_uint64((khint64_t)key.frames[i]) + 0x9e3779b9U + (h << 6) + (h >> 2);
|
|
89
|
+
}
|
|
90
|
+
return h;
|
|
91
|
+
}
|
|
92
|
+
static inline int eq_native_stack_key(struct pf2_native_stack_key a, struct pf2_native_stack_key b)
|
|
93
|
+
{
|
|
94
|
+
if (a.depth != b.depth) return 0;
|
|
95
|
+
for (size_t i = 0; i < a.depth; i++) {
|
|
96
|
+
if (a.frames[i] != b.frames[i]) return 0;
|
|
97
|
+
}
|
|
98
|
+
return 1;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// END native_stack_table
|
|
102
|
+
|
|
103
|
+
// BEGIN combined_sample_table
|
|
104
|
+
|
|
105
|
+
struct pf2_combined_stack_key {
|
|
106
|
+
size_t ruby_stack_id;
|
|
107
|
+
size_t native_stack_id;
|
|
108
|
+
};
|
|
109
|
+
static inline khint_t hash_combined_stack_key(struct pf2_combined_stack_key key)
|
|
110
|
+
{
|
|
111
|
+
khint_t h = hash_size_t(key.ruby_stack_id);
|
|
112
|
+
h ^= hash_size_t(key.native_stack_id) + 0x9e3779b9U + (h << 6) + (h >> 2);
|
|
113
|
+
return h;
|
|
114
|
+
}
|
|
115
|
+
static inline int eq_combined_stack_key(struct pf2_combined_stack_key a, struct pf2_combined_stack_key b)
|
|
116
|
+
{
|
|
117
|
+
return a.ruby_stack_id == b.ruby_stack_id && a.native_stack_id == b.native_stack_id;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// END combined_sample_table
|
|
121
|
+
|
|
122
|
+
struct pf2_sample_stats {
|
|
123
|
+
// The number of times this sample was observed.
|
|
124
|
+
size_t count;
|
|
125
|
+
// Timestamps which this sample was observed. This array's length = # of samples.
|
|
126
|
+
// TODO: Make timestamp collection optional?
|
|
127
|
+
uint64_t *timestamps;
|
|
128
|
+
// Thread ids corresponding to each timestamp.
|
|
129
|
+
uintptr_t *thread_ids;
|
|
130
|
+
// timestamps.length
|
|
131
|
+
size_t timestamps_count;
|
|
132
|
+
size_t timestamps_capacity;
|
|
133
|
+
};
|
|
134
|
+
|
|
135
|
+
#pragma GCC diagnostic push
|
|
136
|
+
#pragma GCC diagnostic ignored "-Wunused-function"
|
|
137
|
+
// location table: key = (cme, lineno), val = location_id
|
|
138
|
+
KHASHL_MAP_INIT(static, pf2_location_table, pf2_location_table, struct pf2_location_key, size_t, hash_location_key, eq_location_key)
|
|
139
|
+
// stack table: key = array of location_ids, val = stack_id
|
|
140
|
+
KHASHL_MAP_INIT(static, pf2_stack_table, pf2_stack_table, struct pf2_stack_key, size_t, hash_stack_key, eq_stack_key)
|
|
141
|
+
// native stack table: key = array of PCs, val = native_stack_id
|
|
142
|
+
KHASHL_MAP_INIT(static, pf2_native_stack_table, pf2_native_stack_table, struct pf2_native_stack_key, size_t, hash_native_stack_key, eq_native_stack_key)
|
|
143
|
+
// sample table: key = (ruby_stack_id, native_stack_id), val = aggregated counts/timestamps
|
|
144
|
+
KHASHL_MAP_INIT(static, pf2_sample_table, pf2_sample_table, struct pf2_combined_stack_key, struct pf2_sample_stats, hash_combined_stack_key, eq_combined_stack_key)
|
|
145
|
+
#pragma GCC diagnostic pop
|
|
146
|
+
|
|
147
|
+
struct pf2_sess_sample {
|
|
148
|
+
size_t *stack; // array of location_indexes
|
|
149
|
+
size_t stack_count;
|
|
150
|
+
size_t *native_stack; // array of location_indexes
|
|
151
|
+
size_t native_stack_count;
|
|
152
|
+
uintptr_t ruby_thread_id;
|
|
153
|
+
uint64_t elapsed_ns;
|
|
154
|
+
};
|
|
155
|
+
|
|
156
|
+
struct pf2_sess_location {
|
|
157
|
+
size_t function_index;
|
|
158
|
+
int32_t lineno;
|
|
159
|
+
size_t address;
|
|
160
|
+
};
|
|
161
|
+
|
|
14
162
|
struct pf2_session {
|
|
15
163
|
bool is_running;
|
|
16
164
|
#ifdef HAVE_TIMER_CREATE
|
|
@@ -22,14 +170,18 @@ struct pf2_session {
|
|
|
22
170
|
atomic_bool is_marking; // Whether garbage collection is in progress
|
|
23
171
|
pthread_t *collector_thread;
|
|
24
172
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
173
|
+
pf2_location_table *location_table;
|
|
174
|
+
pf2_stack_table *stack_table;
|
|
175
|
+
pf2_native_stack_table *native_stack_table;
|
|
176
|
+
pf2_sample_table *sample_table;
|
|
28
177
|
|
|
29
178
|
struct timespec start_time_realtime;
|
|
30
179
|
struct timespec start_time; // When profiling started
|
|
31
180
|
uint64_t duration_ns; // Duration of profiling in nanoseconds
|
|
32
181
|
|
|
182
|
+
atomic_uint_fast64_t collected_sample_count; // Number of samples copied out of the ringbuffer
|
|
183
|
+
atomic_uint_fast64_t dropped_sample_count; // Number of samples dropped for any reason
|
|
184
|
+
|
|
33
185
|
struct pf2_configuration *configuration;
|
|
34
186
|
};
|
|
35
187
|
|