pf2 0.11.3 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/pf2/session.c CHANGED
@@ -4,6 +4,7 @@
4
4
  #include <stdbool.h>
5
5
  #include <stdio.h>
6
6
  #include <stdlib.h>
7
+ #include <string.h>
7
8
  #include <sys/time.h>
8
9
  #include <time.h>
9
10
 
@@ -19,15 +20,17 @@
19
20
  #include "session.h"
20
21
  #include "serializer.h"
21
22
 
22
- #ifndef HAVE_TIMER_CREATE
23
- // Global session pointer for setitimer fallback
23
+ // Pointer to current active session, for access from signal handlers
24
24
  static struct pf2_session *global_current_session = NULL;
25
- #endif
26
25
 
27
26
  static void *sample_collector_thread(void *arg);
27
+ static void drain_ringbuffer(struct pf2_session *session);
28
28
  static void sigprof_handler(int sig, siginfo_t *info, void *ucontext);
29
- bool ensure_sample_capacity(struct pf2_session *session);
30
29
  static void pf2_session_stop(struct pf2_session *session);
30
+ static size_t intern_location(struct pf2_session *session, VALUE cme, int lineno);
31
+ static size_t intern_stack(struct pf2_session *session, const size_t *frames, size_t depth);
32
+ static size_t intern_native_stack(struct pf2_session *session, const uintptr_t *frames, size_t depth);
33
+ static bool insert_sample(struct pf2_session *session, const struct pf2_sample *sample);
31
34
 
32
35
  VALUE
33
36
  rb_pf2_session_initialize(int argc, VALUE *argv, VALUE self)
@@ -40,10 +43,11 @@ rb_pf2_session_initialize(int argc, VALUE *argv, VALUE self)
40
43
  rb_scan_args(argc, argv, ":", &kwargs);
41
44
  ID kwarg_labels[] = {
42
45
  rb_intern("interval_ms"),
43
- rb_intern("time_mode")
46
+ rb_intern("time_mode"),
47
+ rb_intern("_test_no_install_timer")
44
48
  };
45
49
  VALUE *kwarg_values = NULL;
46
- rb_get_kwargs(kwargs, kwarg_labels, 0, 2, kwarg_values);
50
+ rb_get_kwargs(kwargs, kwarg_labels, 0, 3, kwarg_values);
47
51
 
48
52
  session->configuration = pf2_configuration_new_from_options_hash(kwargs);
49
53
 
@@ -56,6 +60,9 @@ rb_pf2_session_start(VALUE self)
56
60
  struct pf2_session *session;
57
61
  TypedData_Get_Struct(self, struct pf2_session, &pf2_session_type, session);
58
62
 
63
+ // Store pointer to current session for access from signal handlers
64
+ global_current_session = session;
65
+
59
66
  session->is_running = true;
60
67
 
61
68
  // Record start time
@@ -87,58 +94,60 @@ rb_pf2_session_start(VALUE self)
87
94
  }
88
95
  #endif
89
96
 
90
- #ifdef HAVE_TIMER_CREATE
91
- // Configure a kernel timer to send SIGPROF periodically
92
- struct sigevent sev;
93
- sev.sigev_notify = SIGEV_SIGNAL;
94
- sev.sigev_signo = SIGPROF;
95
- sev.sigev_value.sival_ptr = session; // Passed as info->si_value.sival_ptr
96
- if (timer_create(
97
- session->configuration->time_mode == PF2_TIME_MODE_CPU_TIME
98
- ? CLOCK_PROCESS_CPUTIME_ID
99
- : CLOCK_MONOTONIC,
100
- &sev,
101
- &session->timer
102
- ) == -1) {
103
- rb_raise(rb_eRuntimeError, "Failed to create timer");
104
- }
105
- struct itimerspec its = {
106
- .it_value = {
107
- .tv_sec = 0,
108
- .tv_nsec = session->configuration->interval_ms * 1000000,
109
- },
110
- .it_interval = {
111
- .tv_sec = 0,
112
- .tv_nsec = session->configuration->interval_ms * 1000000,
113
- },
114
- };
115
- if (timer_settime(session->timer, 0, &its, NULL) == -1) {
116
- rb_raise(rb_eRuntimeError, "Failed to start timer");
117
- }
118
- #else
119
- // Use setitimer as fallback
120
- // Some platforms (e.g. macOS) do not have timer_create(3).
121
- // setitimer(3) can be used as a alternative, but has limited functionality.
122
97
  global_current_session = session;
123
98
 
124
- struct itimerval itv = {
125
- .it_value = {
126
- .tv_sec = 0,
127
- .tv_usec = session->configuration->interval_ms * 1000,
128
- },
129
- .it_interval = {
130
- .tv_sec = 0,
131
- .tv_usec = session->configuration->interval_ms * 1000,
132
- },
133
- };
134
- int which_timer = session->configuration->time_mode == PF2_TIME_MODE_CPU_TIME
135
- ? ITIMER_PROF // CPU time (sends SIGPROF)
136
- : ITIMER_REAL; // Wall time (sends SIGALRM)
137
-
138
- if (setitimer(which_timer, &itv, NULL) == -1) {
139
- rb_raise(rb_eRuntimeError, "Failed to start timer");
140
- }
99
+ if (!session->configuration->_test_no_install_timer) {
100
+ #ifdef HAVE_TIMER_CREATE
101
+ // Configure a kernel timer to send SIGPROF periodically
102
+ struct sigevent sev;
103
+ sev.sigev_notify = SIGEV_SIGNAL;
104
+ sev.sigev_signo = SIGPROF;
105
+ if (timer_create(
106
+ session->configuration->time_mode == PF2_TIME_MODE_CPU_TIME
107
+ ? CLOCK_PROCESS_CPUTIME_ID
108
+ : CLOCK_MONOTONIC,
109
+ &sev,
110
+ &session->timer
111
+ ) == -1) {
112
+ rb_raise(rb_eRuntimeError, "Failed to create timer");
113
+ }
114
+ struct itimerspec its = {
115
+ .it_value = {
116
+ .tv_sec = 0,
117
+ .tv_nsec = session->configuration->interval_ms * 1000000,
118
+ },
119
+ .it_interval = {
120
+ .tv_sec = 0,
121
+ .tv_nsec = session->configuration->interval_ms * 1000000,
122
+ },
123
+ };
124
+ if (timer_settime(session->timer, 0, &its, NULL) == -1) {
125
+ rb_raise(rb_eRuntimeError, "Failed to start timer");
126
+ }
127
+ #else
128
+ // Use setitimer as fallback
129
+ // Some platforms (e.g. macOS) do not have timer_create(3).
130
+ // setitimer(3) can be used as a alternative, but has limited functionality.
131
+
132
+ struct itimerval itv = {
133
+ .it_value = {
134
+ .tv_sec = 0,
135
+ .tv_usec = session->configuration->interval_ms * 1000,
136
+ },
137
+ .it_interval = {
138
+ .tv_sec = 0,
139
+ .tv_usec = session->configuration->interval_ms * 1000,
140
+ },
141
+ };
142
+ int which_timer = session->configuration->time_mode == PF2_TIME_MODE_CPU_TIME
143
+ ? ITIMER_PROF // CPU time (sends SIGPROF)
144
+ : ITIMER_REAL; // Wall time (sends SIGALRM)
145
+
146
+ if (setitimer(which_timer, &itv, NULL) == -1) {
147
+ rb_raise(rb_eRuntimeError, "Failed to start timer");
148
+ }
141
149
  #endif
150
+ } // if !__test_no_install_timer
142
151
 
143
152
  return Qtrue;
144
153
  }
@@ -150,19 +159,9 @@ sample_collector_thread(void *arg)
150
159
 
151
160
  while (session->is_running == true) {
152
161
  // Take samples from the ring buffer
153
- struct pf2_sample sample;
154
- while (pf2_ringbuffer_pop(session->rbuf, &sample) == true) {
155
- // Ensure we have capacity before adding a new sample
156
- if (!ensure_sample_capacity(session)) {
157
- // Failed to expand buffer
158
- PF2_DEBUG_LOG("Failed to expand sample buffer. Dropping sample\n");
159
- break;
160
- }
161
-
162
- session->samples[session->samples_index++] = sample;
163
- }
162
+ drain_ringbuffer(session);
164
163
 
165
- // Sleep for 100 ms
164
+ // Sleep for 10 ms
166
165
  // TODO: Replace with high watermark callback
167
166
  struct timespec ts = { .tv_sec = 0, .tv_nsec = 10 * 1000000, }; // 10 ms
168
167
  nanosleep(&ts, NULL);
@@ -171,6 +170,20 @@ sample_collector_thread(void *arg)
171
170
  return NULL;
172
171
  }
173
172
 
173
+ static void
174
+ drain_ringbuffer(struct pf2_session *session)
175
+ {
176
+ struct pf2_sample sample;
177
+ while (pf2_ringbuffer_pop(session->rbuf, &sample) == true) {
178
+ if (!insert_sample(session, &sample)) {
179
+ atomic_fetch_add_explicit(&session->dropped_sample_count, 1, memory_order_relaxed);
180
+ PF2_DEBUG_LOG("Failed to record sample. Dropping sample\n");
181
+ } else {
182
+ atomic_fetch_add_explicit(&session->collected_sample_count, 1, memory_order_relaxed);
183
+ }
184
+ }
185
+ }
186
+
174
187
  // async-signal-safe
175
188
  static void
176
189
  sigprof_handler(int sig, siginfo_t *info, void *ucontext)
@@ -180,16 +193,12 @@ sigprof_handler(int sig, siginfo_t *info, void *ucontext)
180
193
  clock_gettime(CLOCK_MONOTONIC, &sig_start_time);
181
194
  #endif
182
195
 
183
- struct pf2_session *session;
184
- #ifdef HAVE_TIMER_CREATE
185
- session = info->si_value.sival_ptr;
186
- #else
187
- session = global_current_session;
188
- #endif
196
+ struct pf2_session *session = global_current_session;
189
197
 
190
198
  // If garbage collection is in progress, don't collect samples.
191
199
  if (atomic_load_explicit(&session->is_marking, memory_order_acquire)) {
192
200
  PF2_DEBUG_LOG("Dropping sample: Garbage collection is in progress\n");
201
+ atomic_fetch_add_explicit(&session->dropped_sample_count, 1, memory_order_relaxed);
193
202
  return;
194
203
  }
195
204
 
@@ -197,6 +206,7 @@ sigprof_handler(int sig, siginfo_t *info, void *ucontext)
197
206
 
198
207
  if (pf2_sample_capture(&sample) == false) {
199
208
  PF2_DEBUG_LOG("Dropping sample: Failed to capture sample\n");
209
+ atomic_fetch_add_explicit(&session->dropped_sample_count, 1, memory_order_relaxed);
200
210
  return;
201
211
  }
202
212
 
@@ -204,6 +214,7 @@ sigprof_handler(int sig, siginfo_t *info, void *ucontext)
204
214
  if (pf2_ringbuffer_push(session->rbuf, &sample) == false) {
205
215
  // Copy failed. The sample buffer is full.
206
216
  PF2_DEBUG_LOG("Dropping sample: Sample buffer is full\n");
217
+ atomic_fetch_add_explicit(&session->dropped_sample_count, 1, memory_order_relaxed);
207
218
  return;
208
219
  }
209
220
 
@@ -220,27 +231,117 @@ sigprof_handler(int sig, siginfo_t *info, void *ucontext)
220
231
  #endif
221
232
  }
222
233
 
223
- // Ensures that the session's sample array has capacity for at least one more sample
224
- // Returns true if successful, false if memory allocation failed
225
- bool
226
- ensure_sample_capacity(struct pf2_session *session)
234
+ static size_t
235
+ intern_location(struct pf2_session *session, VALUE cme, int lineno)
236
+ {
237
+ struct pf2_location_key key = { .cme = cme, .lineno = lineno };
238
+ int absent;
239
+ khint_t k = pf2_location_table_put(session->location_table, key, &absent);
240
+ if (k == kh_end(session->location_table)) { return (size_t)-1; }
241
+ if (absent) {
242
+ kh_val(session->location_table, k) = kh_size(session->location_table) - 1;
243
+ }
244
+ return kh_val(session->location_table, k);
245
+ }
246
+
247
+ static size_t
248
+ intern_stack(struct pf2_session *session, const size_t *frames, size_t depth)
227
249
  {
228
- // Check if we need to expand
229
- if (session->samples_index < session->samples_capacity) {
230
- return true;
250
+ struct pf2_stack_key skey = { .frames = frames, .depth = depth };
251
+ int absent;
252
+ khint_t k = pf2_stack_table_put(session->stack_table, skey, &absent);
253
+ if (k == kh_end(session->stack_table)) { return (size_t)-1; }
254
+ if (absent) {
255
+ size_t *copy = NULL;
256
+ if (depth > 0) {
257
+ copy = malloc(sizeof(size_t) * depth);
258
+ // TODO: if allocation fails, remove stack_table entry to avoid dangling stack-local key.
259
+ if (copy == NULL) return (size_t)-1;
260
+ memcpy(copy, frames, sizeof(size_t) * depth);
261
+ }
262
+ kh_key(session->stack_table, k).frames = copy;
263
+ kh_key(session->stack_table, k).depth = depth;
264
+ kh_val(session->stack_table, k) = kh_size(session->stack_table) - 1;
231
265
  }
266
+ return kh_val(session->stack_table, k);
267
+ }
232
268
 
233
- // Calculate new size (double the current size)
234
- size_t new_capacity = session->samples_capacity * 2;
269
+ static size_t
270
+ intern_native_stack(struct pf2_session *session, const uintptr_t *frames, size_t depth)
271
+ {
272
+ struct pf2_native_stack_key skey = { .frames = frames, .depth = depth };
273
+ int absent;
274
+ khint_t k = pf2_native_stack_table_put(session->native_stack_table, skey, &absent);
275
+ if (k == kh_end(session->native_stack_table)) { return (size_t)-1; }
276
+ if (absent) {
277
+ uintptr_t *copy = NULL;
278
+ if (depth > 0) {
279
+ copy = malloc(sizeof(uintptr_t) * depth);
280
+ if (copy == NULL) return (size_t)-1;
281
+ memcpy(copy, frames, sizeof(uintptr_t) * depth);
282
+ }
283
+ kh_key(session->native_stack_table, k).frames = copy;
284
+ kh_key(session->native_stack_table, k).depth = depth;
285
+ kh_val(session->native_stack_table, k) = kh_size(session->native_stack_table) - 1;
286
+ }
287
+ return kh_val(session->native_stack_table, k);
288
+ }
289
+
290
+ static bool
291
+ insert_sample(struct pf2_session *session, const struct pf2_sample *sample)
292
+ {
293
+ size_t frame_ids[PF2_SAMPLE_MAX_RUBY_DEPTH];
235
294
 
236
- // Reallocate the array
237
- struct pf2_sample *new_samples = realloc(session->samples, new_capacity * sizeof(struct pf2_sample));
238
- if (new_samples == NULL) {
239
- return false;
295
+ // Convert each frame to a location
296
+ for (int i = 0; i < sample->depth; i++) {
297
+ frame_ids[i] = intern_location(session, sample->cmes[i], sample->linenos[i]);
298
+ if (frame_ids[i] == (size_t)-1) { return false; }
240
299
  }
241
300
 
242
- session->samples = new_samples;
243
- session->samples_capacity = new_capacity;
301
+ // Obtain stack_id for the array of locations
302
+ size_t stack_id = intern_stack(session, frame_ids, (size_t)sample->depth);
303
+ if (stack_id == (size_t)-1) { return false; }
304
+
305
+ size_t native_stack_id = intern_native_stack(session, sample->native_stack, sample->native_stack_depth);
306
+ if (native_stack_id == (size_t)-1) { return false; }
307
+
308
+ // Increment the observation count for this stack_id
309
+ int absent;
310
+ struct pf2_combined_stack_key ckey = {
311
+ .ruby_stack_id = stack_id,
312
+ .native_stack_id = native_stack_id
313
+ };
314
+ khint_t k = pf2_sample_table_put(session->sample_table, ckey, &absent);
315
+ if (k == kh_end(session->sample_table)) { return false; }
316
+ struct pf2_sample_stats *stats = &kh_val(session->sample_table, k);
317
+ if (absent) {
318
+ // This is the first time this stack was observed. Initialize stats.
319
+ stats->count = 0;
320
+ stats->timestamps = NULL;
321
+ stats->thread_ids = NULL;
322
+ stats->timestamps_count = 0;
323
+ stats->timestamps_capacity = 0;
324
+ }
325
+
326
+ // count
327
+ stats->count += 1;
328
+ // timestamps
329
+ if (stats->timestamps_count == stats->timestamps_capacity) {
330
+ size_t new_cap = stats->timestamps_capacity ? stats->timestamps_capacity * 2 : 16;
331
+ uint64_t *new_ts = realloc(stats->timestamps, sizeof(uint64_t) * new_cap);
332
+ uintptr_t *new_threads = realloc(stats->thread_ids, sizeof(uintptr_t) * new_cap);
333
+ if (new_ts == NULL || new_threads == NULL) {
334
+ free(new_ts);
335
+ free(new_threads);
336
+ return false;
337
+ }
338
+ stats->timestamps = new_ts;
339
+ stats->thread_ids = new_threads;
340
+ stats->timestamps_capacity = new_cap;
341
+ }
342
+ stats->timestamps[stats->timestamps_count] = sample->timestamp_ns;
343
+ stats->thread_ids[stats->timestamps_count] = (uintptr_t)sample->context_pthread;
344
+ stats->timestamps_count++;
244
345
 
245
346
  return true;
246
347
  }
@@ -274,8 +375,10 @@ pf2_session_stop(struct pf2_session *session)
274
375
 
275
376
  // Disarm and delete the timer.
276
377
  #ifdef HAVE_TIMER_CREATE
277
- if (timer_delete(session->timer) == -1) {
278
- rb_raise(rb_eRuntimeError, "Failed to delete timer");
378
+ if (!session->configuration->_test_no_install_timer) {
379
+ if (timer_delete(session->timer) == -1) {
380
+ rb_raise(rb_eRuntimeError, "Failed to delete timer");
381
+ }
279
382
  }
280
383
  #else
281
384
  struct itimerval zero_timer = {{0, 0}, {0, 0}};
@@ -291,6 +394,7 @@ pf2_session_stop(struct pf2_session *session)
291
394
  // Terminate the collector thread
292
395
  session->is_running = false;
293
396
  pthread_join(*session->collector_thread, NULL);
397
+ drain_ringbuffer(session);
294
398
  }
295
399
 
296
400
  VALUE
@@ -306,7 +410,7 @@ pf2_session_alloc(VALUE self)
306
410
  {
307
411
  // Initialize state for libbacktrace
308
412
  if (global_backtrace_state == NULL) {
309
- global_backtrace_state = backtrace_create_state("pf2", 1, pf2_backtrace_print_error, NULL);
413
+ global_backtrace_state = backtrace_create_state(NULL, 1, pf2_backtrace_print_error, NULL);
310
414
  if (global_backtrace_state == NULL) {
311
415
  rb_raise(rb_eRuntimeError, "Failed to initialize libbacktrace");
312
416
  }
@@ -342,14 +446,28 @@ pf2_session_alloc(VALUE self)
342
446
  rb_raise(rb_eNoMemError, "Failed to allocate memory");
343
447
  }
344
448
 
345
- // samples, samples_index, samples_capacity
346
- session->samples_index = 0;
347
- session->samples_capacity = 500; // 10 seconds worth of samples at 50 Hz
348
- session->samples = malloc(sizeof(struct pf2_sample) * session->samples_capacity);
349
- if (session->samples == NULL) {
350
- rb_raise(rb_eNoMemError, "Failed to allocate memory");
449
+ // location_table, stack_table, native_stack_table, sample_table
450
+ session->location_table = pf2_location_table_init();
451
+ if (session->location_table == NULL) {
452
+ rb_raise(rb_eNoMemError, "Failed to allocate location table");
453
+ }
454
+ session->stack_table = pf2_stack_table_init();
455
+ if (session->stack_table == NULL) {
456
+ rb_raise(rb_eNoMemError, "Failed to allocate stack table");
457
+ }
458
+ session->native_stack_table = pf2_native_stack_table_init();
459
+ if (session->native_stack_table == NULL) {
460
+ rb_raise(rb_eNoMemError, "Failed to allocate native stack table");
461
+ }
462
+ session->sample_table = pf2_sample_table_init();
463
+ if (session->sample_table == NULL) {
464
+ rb_raise(rb_eNoMemError, "Failed to allocate stack sample table");
351
465
  }
352
466
 
467
+ // collected_sample_count, dropped_sample_count
468
+ atomic_store_explicit(&session->collected_sample_count, 0, memory_order_relaxed);
469
+ atomic_store_explicit(&session->dropped_sample_count, 0, memory_order_relaxed);
470
+
353
471
  // start_time_realtime, start_time
354
472
  session->start_time_realtime = (struct timespec){0};
355
473
  session->start_time = (struct timespec){0};
@@ -385,11 +503,11 @@ pf2_session_dmark(void *sess)
385
503
  head = (head + 1) % rbuf->size;
386
504
  }
387
505
 
388
- // Iterate over all samples in the samples array and mark them
389
- for (size_t i = 0; i < session->samples_index; i++) {
390
- sample = &session->samples[i];
391
- for (int i = 0; i < sample->depth; i++) {
392
- rb_gc_mark(sample->cmes[i]);
506
+ // Mark Ruby VALUEs stored in location_table keys
507
+ if (session->location_table) {
508
+ khint_t k;
509
+ kh_foreach(session->location_table, k) {
510
+ rb_gc_mark(kh_key(session->location_table, k).cme);
393
511
  }
394
512
  }
395
513
 
@@ -411,7 +529,33 @@ pf2_session_dfree(void *sess)
411
529
 
412
530
  pf2_configuration_free(session->configuration);
413
531
  pf2_ringbuffer_free(session->rbuf);
414
- free(session->samples);
532
+
533
+ if (session->sample_table) {
534
+ khint_t k;
535
+ kh_foreach(session->sample_table, k) {
536
+ free(kh_val(session->sample_table, k).timestamps);
537
+ free(kh_val(session->sample_table, k).thread_ids);
538
+ }
539
+ pf2_sample_table_destroy(session->sample_table);
540
+ }
541
+ if (session->stack_table) {
542
+ khint_t k;
543
+ kh_foreach(session->stack_table, k) {
544
+ free((void *)kh_key(session->stack_table, k).frames);
545
+ }
546
+ pf2_stack_table_destroy(session->stack_table);
547
+ }
548
+ if (session->native_stack_table) {
549
+ khint_t k;
550
+ kh_foreach(session->native_stack_table, k) {
551
+ free((void *)kh_key(session->native_stack_table, k).frames);
552
+ }
553
+ pf2_native_stack_table_destroy(session->native_stack_table);
554
+ }
555
+ if (session->location_table) {
556
+ pf2_location_table_destroy(session->location_table);
557
+ }
558
+
415
559
  free(session->collector_thread);
416
560
  free(session);
417
561
  }
@@ -422,7 +566,6 @@ pf2_session_dsize(const void *sess)
422
566
  const struct pf2_session *session = sess;
423
567
  return (
424
568
  sizeof(struct pf2_session)
425
- + sizeof(struct pf2_sample) * session->samples_capacity
426
569
  + sizeof(struct pf2_sample) * session->rbuf->size
427
570
  );
428
571
  }
data/ext/pf2/session.h CHANGED
@@ -3,6 +3,8 @@
3
3
 
4
4
  #include <pthread.h>
5
5
  #include <stdatomic.h>
6
+ #include <stdint.h>
7
+ #include <limits.h>
6
8
  #include <sys/time.h>
7
9
 
8
10
  #include <ruby.h>
@@ -11,6 +13,152 @@
11
13
  #include "ringbuffer.h"
12
14
  #include "sample.h"
13
15
 
16
+ #include "khashl.h"
17
+
18
+ // Maps for sample storage
19
+
20
+ // BEGIN generic helpers
21
+
22
+ static inline khint_t hash_size_t(size_t v)
23
+ {
24
+ #if SIZE_MAX == UINT_MAX
25
+ return kh_hash_uint32((khint_t)v);
26
+ #else
27
+ return kh_hash_uint64((khint64_t)v);
28
+ #endif
29
+ }
30
+ static inline int eq_size_t(size_t a, size_t b) { return a == b; }
31
+
32
+ // END generic helpers
33
+
34
+ // BEGIN location_table
35
+
36
+ struct pf2_location_key {
37
+ VALUE cme;
38
+ int lineno;
39
+ };
40
+ static inline khint_t hash_location_key(struct pf2_location_key key)
41
+ {
42
+ khint_t h = hash_size_t((size_t)key.cme);
43
+ h ^= (khint_t)key.lineno + 0x9e3779b9U + (h << 6) + (h >> 2);
44
+ return h;
45
+ }
46
+ static inline int eq_location_key(struct pf2_location_key a, struct pf2_location_key b)
47
+ {
48
+ return a.cme == b.cme && a.lineno == b.lineno;
49
+ }
50
+
51
+ // END location_table
52
+
53
+ // BEGIN stack_table (Ruby stack)
54
+
55
+ struct pf2_stack_key {
56
+ const size_t *frames; // pointer to an immutable array of location_ids
57
+ size_t depth;
58
+ };
59
+ static inline khint_t hash_stack_key(struct pf2_stack_key key)
60
+ {
61
+ khint_t h = hash_size_t(key.depth);
62
+ for (size_t i = 0; i < key.depth; i++) {
63
+ h ^= hash_size_t(key.frames[i]) + 0x9e3779b9U + (h << 6) + (h >> 2);
64
+ }
65
+ return h;
66
+ }
67
+ static inline int eq_stack_key(struct pf2_stack_key a, struct pf2_stack_key b)
68
+ {
69
+ if (a.depth != b.depth) return 0;
70
+ for (size_t i = 0; i < a.depth; i++) {
71
+ if (a.frames[i] != b.frames[i]) return 0;
72
+ }
73
+ return 1;
74
+ }
75
+
76
+ // END stack_table
77
+
78
+ // BEGIN native_stack_table (raw PCs)
79
+
80
+ struct pf2_native_stack_key {
81
+ const uintptr_t *frames; // pointer to an immutable array of PCs
82
+ size_t depth;
83
+ };
84
+ static inline khint_t hash_native_stack_key(struct pf2_native_stack_key key)
85
+ {
86
+ khint_t h = hash_size_t(key.depth);
87
+ for (size_t i = 0; i < key.depth; i++) {
88
+ h ^= kh_hash_uint64((khint64_t)key.frames[i]) + 0x9e3779b9U + (h << 6) + (h >> 2);
89
+ }
90
+ return h;
91
+ }
92
+ static inline int eq_native_stack_key(struct pf2_native_stack_key a, struct pf2_native_stack_key b)
93
+ {
94
+ if (a.depth != b.depth) return 0;
95
+ for (size_t i = 0; i < a.depth; i++) {
96
+ if (a.frames[i] != b.frames[i]) return 0;
97
+ }
98
+ return 1;
99
+ }
100
+
101
+ // END native_stack_table
102
+
103
+ // BEGIN combined_sample_table
104
+
105
+ struct pf2_combined_stack_key {
106
+ size_t ruby_stack_id;
107
+ size_t native_stack_id;
108
+ };
109
+ static inline khint_t hash_combined_stack_key(struct pf2_combined_stack_key key)
110
+ {
111
+ khint_t h = hash_size_t(key.ruby_stack_id);
112
+ h ^= hash_size_t(key.native_stack_id) + 0x9e3779b9U + (h << 6) + (h >> 2);
113
+ return h;
114
+ }
115
+ static inline int eq_combined_stack_key(struct pf2_combined_stack_key a, struct pf2_combined_stack_key b)
116
+ {
117
+ return a.ruby_stack_id == b.ruby_stack_id && a.native_stack_id == b.native_stack_id;
118
+ }
119
+
120
+ // END combined_sample_table
121
+
122
+ struct pf2_sample_stats {
123
+ // The number of times this sample was observed.
124
+ size_t count;
125
+ // Timestamps which this sample was observed. This array's length = # of samples.
126
+ // TODO: Make timestamp collection optional?
127
+ uint64_t *timestamps;
128
+ // Thread ids corresponding to each timestamp.
129
+ uintptr_t *thread_ids;
130
+ // timestamps.length
131
+ size_t timestamps_count;
132
+ size_t timestamps_capacity;
133
+ };
134
+
135
+ #pragma GCC diagnostic push
136
+ #pragma GCC diagnostic ignored "-Wunused-function"
137
+ // location table: key = (cme, lineno), val = location_id
138
+ KHASHL_MAP_INIT(static, pf2_location_table, pf2_location_table, struct pf2_location_key, size_t, hash_location_key, eq_location_key)
139
+ // stack table: key = array of location_ids, val = stack_id
140
+ KHASHL_MAP_INIT(static, pf2_stack_table, pf2_stack_table, struct pf2_stack_key, size_t, hash_stack_key, eq_stack_key)
141
+ // native stack table: key = array of PCs, val = native_stack_id
142
+ KHASHL_MAP_INIT(static, pf2_native_stack_table, pf2_native_stack_table, struct pf2_native_stack_key, size_t, hash_native_stack_key, eq_native_stack_key)
143
+ // sample table: key = (ruby_stack_id, native_stack_id), val = aggregated counts/timestamps
144
+ KHASHL_MAP_INIT(static, pf2_sample_table, pf2_sample_table, struct pf2_combined_stack_key, struct pf2_sample_stats, hash_combined_stack_key, eq_combined_stack_key)
145
+ #pragma GCC diagnostic pop
146
+
147
+ struct pf2_sess_sample {
148
+ size_t *stack; // array of location_indexes
149
+ size_t stack_count;
150
+ size_t *native_stack; // array of location_indexes
151
+ size_t native_stack_count;
152
+ uintptr_t ruby_thread_id;
153
+ uint64_t elapsed_ns;
154
+ };
155
+
156
+ struct pf2_sess_location {
157
+ size_t function_index;
158
+ int32_t lineno;
159
+ size_t address;
160
+ };
161
+
14
162
  struct pf2_session {
15
163
  bool is_running;
16
164
  #ifdef HAVE_TIMER_CREATE
@@ -22,14 +170,18 @@ struct pf2_session {
22
170
  atomic_bool is_marking; // Whether garbage collection is in progress
23
171
  pthread_t *collector_thread;
24
172
 
25
- struct pf2_sample *samples; // Dynamic array of samples
26
- size_t samples_index;
27
- size_t samples_capacity; // Current capacity of the samples array
173
+ pf2_location_table *location_table;
174
+ pf2_stack_table *stack_table;
175
+ pf2_native_stack_table *native_stack_table;
176
+ pf2_sample_table *sample_table;
28
177
 
29
178
  struct timespec start_time_realtime;
30
179
  struct timespec start_time; // When profiling started
31
180
  uint64_t duration_ns; // Duration of profiling in nanoseconds
32
181
 
182
+ atomic_uint_fast64_t collected_sample_count; // Number of samples copied out of the ringbuffer
183
+ atomic_uint_fast64_t dropped_sample_count; // Number of samples dropped for any reason
184
+
33
185
  struct pf2_configuration *configuration;
34
186
  };
35
187
 
data/lib/pf2/cli.rb CHANGED
@@ -26,7 +26,7 @@ module Pf2
26
26
  when 'version'
27
27
  puts VERSION
28
28
  return 0
29
- when '--help'
29
+ when nil, '--help'
30
30
  STDERR.puts <<~__EOS__
31
31
  Usage: #{program_name} COMMAND [options]
32
32