pf2 0.12.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/pf2/session.c CHANGED
@@ -4,6 +4,7 @@
4
4
  #include <stdbool.h>
5
5
  #include <stdio.h>
6
6
  #include <stdlib.h>
7
+ #include <string.h>
7
8
  #include <sys/time.h>
8
9
  #include <time.h>
9
10
 
@@ -25,8 +26,11 @@ static struct pf2_session *global_current_session = NULL;
25
26
  static void *sample_collector_thread(void *arg);
26
27
  static void drain_ringbuffer(struct pf2_session *session);
27
28
  static void sigprof_handler(int sig, siginfo_t *info, void *ucontext);
28
- bool ensure_sample_capacity(struct pf2_session *session);
29
29
  static void pf2_session_stop(struct pf2_session *session);
30
+ static size_t intern_location(struct pf2_session *session, VALUE cme, int lineno);
31
+ static size_t intern_stack(struct pf2_session *session, const size_t *frames, size_t depth);
32
+ static size_t intern_native_stack(struct pf2_session *session, const uintptr_t *frames, size_t depth);
33
+ static bool insert_sample(struct pf2_session *session, const struct pf2_sample *sample);
30
34
 
31
35
  VALUE
32
36
  rb_pf2_session_initialize(int argc, VALUE *argv, VALUE self)
@@ -157,7 +161,7 @@ sample_collector_thread(void *arg)
157
161
  // Take samples from the ring buffer
158
162
  drain_ringbuffer(session);
159
163
 
160
- // Sleep for 100 ms
164
+ // Sleep for 10 ms
161
165
  // TODO: Replace with high watermark callback
162
166
  struct timespec ts = { .tv_sec = 0, .tv_nsec = 10 * 1000000, }; // 10 ms
163
167
  nanosleep(&ts, NULL);
@@ -171,16 +175,12 @@ drain_ringbuffer(struct pf2_session *session)
171
175
  {
172
176
  struct pf2_sample sample;
173
177
  while (pf2_ringbuffer_pop(session->rbuf, &sample) == true) {
174
- // Ensure we have capacity before adding a new sample
175
- if (!ensure_sample_capacity(session)) {
176
- // Failed to expand buffer
178
+ if (!insert_sample(session, &sample)) {
177
179
  atomic_fetch_add_explicit(&session->dropped_sample_count, 1, memory_order_relaxed);
178
- PF2_DEBUG_LOG("Failed to expand sample buffer. Dropping sample\n");
179
- break;
180
+ PF2_DEBUG_LOG("Failed to record sample. Dropping sample\n");
181
+ } else {
182
+ atomic_fetch_add_explicit(&session->collected_sample_count, 1, memory_order_relaxed);
180
183
  }
181
-
182
- session->samples[session->samples_index++] = sample;
183
- atomic_fetch_add_explicit(&session->collected_sample_count, 1, memory_order_relaxed);
184
184
  }
185
185
  }
186
186
 
@@ -195,6 +195,12 @@ sigprof_handler(int sig, siginfo_t *info, void *ucontext)
195
195
 
196
196
  struct pf2_session *session = global_current_session;
197
197
 
198
+ // Pending signals may be delivered even after the session is stopped.
199
+ // Simply ignore those.
200
+ if (session == NULL || session->is_running == false) {
201
+ return;
202
+ }
203
+
198
204
  // If garbage collection is in progress, don't collect samples.
199
205
  if (atomic_load_explicit(&session->is_marking, memory_order_acquire)) {
200
206
  PF2_DEBUG_LOG("Dropping sample: Garbage collection is in progress\n");
@@ -231,27 +237,117 @@ sigprof_handler(int sig, siginfo_t *info, void *ucontext)
231
237
  #endif
232
238
  }
233
239
 
234
- // Ensures that the session's sample array has capacity for at least one more sample
235
- // Returns true if successful, false if memory allocation failed
236
- bool
237
- ensure_sample_capacity(struct pf2_session *session)
240
+ static size_t
241
+ intern_location(struct pf2_session *session, VALUE cme, int lineno)
242
+ {
243
+ struct pf2_location_key key = { .cme = cme, .lineno = lineno };
244
+ int absent;
245
+ khint_t k = pf2_location_table_put(session->location_table, key, &absent);
246
+ if (k == kh_end(session->location_table)) { return (size_t)-1; }
247
+ if (absent) {
248
+ kh_val(session->location_table, k) = kh_size(session->location_table) - 1;
249
+ }
250
+ return kh_val(session->location_table, k);
251
+ }
252
+
253
+ static size_t
254
+ intern_stack(struct pf2_session *session, const size_t *frames, size_t depth)
255
+ {
256
+ struct pf2_stack_key skey = { .frames = frames, .depth = depth };
257
+ int absent;
258
+ khint_t k = pf2_stack_table_put(session->stack_table, skey, &absent);
259
+ if (k == kh_end(session->stack_table)) { return (size_t)-1; }
260
+ if (absent) {
261
+ size_t *copy = NULL;
262
+ if (depth > 0) {
263
+ copy = malloc(sizeof(size_t) * depth);
264
+ // TODO: if allocation fails, remove stack_table entry to avoid dangling stack-local key.
265
+ if (copy == NULL) return (size_t)-1;
266
+ memcpy(copy, frames, sizeof(size_t) * depth);
267
+ }
268
+ kh_key(session->stack_table, k).frames = copy;
269
+ kh_key(session->stack_table, k).depth = depth;
270
+ kh_val(session->stack_table, k) = kh_size(session->stack_table) - 1;
271
+ }
272
+ return kh_val(session->stack_table, k);
273
+ }
274
+
275
+ static size_t
276
+ intern_native_stack(struct pf2_session *session, const uintptr_t *frames, size_t depth)
277
+ {
278
+ struct pf2_native_stack_key skey = { .frames = frames, .depth = depth };
279
+ int absent;
280
+ khint_t k = pf2_native_stack_table_put(session->native_stack_table, skey, &absent);
281
+ if (k == kh_end(session->native_stack_table)) { return (size_t)-1; }
282
+ if (absent) {
283
+ uintptr_t *copy = NULL;
284
+ if (depth > 0) {
285
+ copy = malloc(sizeof(uintptr_t) * depth);
286
+ if (copy == NULL) return (size_t)-1;
287
+ memcpy(copy, frames, sizeof(uintptr_t) * depth);
288
+ }
289
+ kh_key(session->native_stack_table, k).frames = copy;
290
+ kh_key(session->native_stack_table, k).depth = depth;
291
+ kh_val(session->native_stack_table, k) = kh_size(session->native_stack_table) - 1;
292
+ }
293
+ return kh_val(session->native_stack_table, k);
294
+ }
295
+
296
+ static bool
297
+ insert_sample(struct pf2_session *session, const struct pf2_sample *sample)
238
298
  {
239
- // Check if we need to expand
240
- if (session->samples_index < session->samples_capacity) {
241
- return true;
299
+ size_t frame_ids[PF2_SAMPLE_MAX_RUBY_DEPTH];
300
+
301
+ // Convert each frame to a location
302
+ for (int i = 0; i < sample->depth; i++) {
303
+ frame_ids[i] = intern_location(session, sample->cmes[i], sample->linenos[i]);
304
+ if (frame_ids[i] == (size_t)-1) { return false; }
242
305
  }
243
306
 
244
- // Calculate new size (double the current size)
245
- size_t new_capacity = session->samples_capacity * 2;
307
+ // Obtain stack_id for the array of locations
308
+ size_t stack_id = intern_stack(session, frame_ids, (size_t)sample->depth);
309
+ if (stack_id == (size_t)-1) { return false; }
310
+
311
+ size_t native_stack_id = intern_native_stack(session, sample->native_stack, sample->native_stack_depth);
312
+ if (native_stack_id == (size_t)-1) { return false; }
246
313
 
247
- // Reallocate the array
248
- struct pf2_sample *new_samples = realloc(session->samples, new_capacity * sizeof(struct pf2_sample));
249
- if (new_samples == NULL) {
250
- return false;
314
+ // Increment the observation count for this stack_id
315
+ int absent;
316
+ struct pf2_combined_stack_key ckey = {
317
+ .ruby_stack_id = stack_id,
318
+ .native_stack_id = native_stack_id
319
+ };
320
+ khint_t k = pf2_sample_table_put(session->sample_table, ckey, &absent);
321
+ if (k == kh_end(session->sample_table)) { return false; }
322
+ struct pf2_sample_stats *stats = &kh_val(session->sample_table, k);
323
+ if (absent) {
324
+ // This is the first time this stack was observed. Initialize stats.
325
+ stats->count = 0;
326
+ stats->timestamps = NULL;
327
+ stats->thread_ids = NULL;
328
+ stats->timestamps_count = 0;
329
+ stats->timestamps_capacity = 0;
251
330
  }
252
331
 
253
- session->samples = new_samples;
254
- session->samples_capacity = new_capacity;
332
+ // count
333
+ stats->count += 1;
334
+ // timestamps
335
+ if (stats->timestamps_count == stats->timestamps_capacity) {
336
+ size_t new_cap = stats->timestamps_capacity ? stats->timestamps_capacity * 2 : 16;
337
+ uint64_t *new_ts = realloc(stats->timestamps, sizeof(uint64_t) * new_cap);
338
+ uintptr_t *new_threads = realloc(stats->thread_ids, sizeof(uintptr_t) * new_cap);
339
+ if (new_ts == NULL || new_threads == NULL) {
340
+ free(new_ts);
341
+ free(new_threads);
342
+ return false;
343
+ }
344
+ stats->timestamps = new_ts;
345
+ stats->thread_ids = new_threads;
346
+ stats->timestamps_capacity = new_cap;
347
+ }
348
+ stats->timestamps[stats->timestamps_count] = sample->timestamp_ns;
349
+ stats->thread_ids[stats->timestamps_count] = (uintptr_t)sample->context_pthread;
350
+ stats->timestamps_count++;
255
351
 
256
352
  return true;
257
353
  }
@@ -298,8 +394,8 @@ pf2_session_stop(struct pf2_session *session)
298
394
  if (setitimer(which_timer, &zero_timer, NULL) == -1) {
299
395
  rb_raise(rb_eRuntimeError, "Failed to stop timer");
300
396
  }
301
- global_current_session = NULL;
302
397
  #endif
398
+ global_current_session = NULL;
303
399
 
304
400
  // Terminate the collector thread
305
401
  session->is_running = false;
@@ -356,12 +452,22 @@ pf2_session_alloc(VALUE self)
356
452
  rb_raise(rb_eNoMemError, "Failed to allocate memory");
357
453
  }
358
454
 
359
- // samples, samples_index, samples_capacity
360
- session->samples_index = 0;
361
- session->samples_capacity = 500; // 10 seconds worth of samples at 50 Hz
362
- session->samples = malloc(sizeof(struct pf2_sample) * session->samples_capacity);
363
- if (session->samples == NULL) {
364
- rb_raise(rb_eNoMemError, "Failed to allocate memory");
455
+ // location_table, stack_table, native_stack_table, sample_table
456
+ session->location_table = pf2_location_table_init();
457
+ if (session->location_table == NULL) {
458
+ rb_raise(rb_eNoMemError, "Failed to allocate location table");
459
+ }
460
+ session->stack_table = pf2_stack_table_init();
461
+ if (session->stack_table == NULL) {
462
+ rb_raise(rb_eNoMemError, "Failed to allocate stack table");
463
+ }
464
+ session->native_stack_table = pf2_native_stack_table_init();
465
+ if (session->native_stack_table == NULL) {
466
+ rb_raise(rb_eNoMemError, "Failed to allocate native stack table");
467
+ }
468
+ session->sample_table = pf2_sample_table_init();
469
+ if (session->sample_table == NULL) {
470
+ rb_raise(rb_eNoMemError, "Failed to allocate stack sample table");
365
471
  }
366
472
 
367
473
  // collected_sample_count, dropped_sample_count
@@ -403,11 +509,11 @@ pf2_session_dmark(void *sess)
403
509
  head = (head + 1) % rbuf->size;
404
510
  }
405
511
 
406
- // Iterate over all samples in the samples array and mark them
407
- for (size_t i = 0; i < session->samples_index; i++) {
408
- sample = &session->samples[i];
409
- for (int i = 0; i < sample->depth; i++) {
410
- rb_gc_mark(sample->cmes[i]);
512
+ // Mark Ruby VALUEs stored in location_table keys
513
+ if (session->location_table) {
514
+ khint_t k;
515
+ kh_foreach(session->location_table, k) {
516
+ rb_gc_mark(kh_key(session->location_table, k).cme);
411
517
  }
412
518
  }
413
519
 
@@ -429,7 +535,33 @@ pf2_session_dfree(void *sess)
429
535
 
430
536
  pf2_configuration_free(session->configuration);
431
537
  pf2_ringbuffer_free(session->rbuf);
432
- free(session->samples);
538
+
539
+ if (session->sample_table) {
540
+ khint_t k;
541
+ kh_foreach(session->sample_table, k) {
542
+ free(kh_val(session->sample_table, k).timestamps);
543
+ free(kh_val(session->sample_table, k).thread_ids);
544
+ }
545
+ pf2_sample_table_destroy(session->sample_table);
546
+ }
547
+ if (session->stack_table) {
548
+ khint_t k;
549
+ kh_foreach(session->stack_table, k) {
550
+ free((void *)kh_key(session->stack_table, k).frames);
551
+ }
552
+ pf2_stack_table_destroy(session->stack_table);
553
+ }
554
+ if (session->native_stack_table) {
555
+ khint_t k;
556
+ kh_foreach(session->native_stack_table, k) {
557
+ free((void *)kh_key(session->native_stack_table, k).frames);
558
+ }
559
+ pf2_native_stack_table_destroy(session->native_stack_table);
560
+ }
561
+ if (session->location_table) {
562
+ pf2_location_table_destroy(session->location_table);
563
+ }
564
+
433
565
  free(session->collector_thread);
434
566
  free(session);
435
567
  }
@@ -440,7 +572,6 @@ pf2_session_dsize(const void *sess)
440
572
  const struct pf2_session *session = sess;
441
573
  return (
442
574
  sizeof(struct pf2_session)
443
- + sizeof(struct pf2_sample) * session->samples_capacity
444
575
  + sizeof(struct pf2_sample) * session->rbuf->size
445
576
  );
446
577
  }
data/ext/pf2/session.h CHANGED
@@ -3,6 +3,8 @@
3
3
 
4
4
  #include <pthread.h>
5
5
  #include <stdatomic.h>
6
+ #include <stdint.h>
7
+ #include <limits.h>
6
8
  #include <sys/time.h>
7
9
 
8
10
  #include <ruby.h>
@@ -11,6 +13,152 @@
11
13
  #include "ringbuffer.h"
12
14
  #include "sample.h"
13
15
 
16
+ #include "khashl.h"
17
+
18
+ // Maps for sample storage
19
+
20
+ // BEGIN generic helpers
21
+
22
+ static inline khint_t hash_size_t(size_t v)
23
+ {
24
+ #if SIZE_MAX == UINT_MAX
25
+ return kh_hash_uint32((khint_t)v);
26
+ #else
27
+ return kh_hash_uint64((khint64_t)v);
28
+ #endif
29
+ }
30
+ static inline int eq_size_t(size_t a, size_t b) { return a == b; }
31
+
32
+ // END generic helpers
33
+
34
+ // BEGIN location_table
35
+
36
+ struct pf2_location_key {
37
+ VALUE cme;
38
+ int lineno;
39
+ };
40
+ static inline khint_t hash_location_key(struct pf2_location_key key)
41
+ {
42
+ khint_t h = hash_size_t((size_t)key.cme);
43
+ h ^= (khint_t)key.lineno + 0x9e3779b9U + (h << 6) + (h >> 2);
44
+ return h;
45
+ }
46
+ static inline int eq_location_key(struct pf2_location_key a, struct pf2_location_key b)
47
+ {
48
+ return a.cme == b.cme && a.lineno == b.lineno;
49
+ }
50
+
51
+ // END location_table
52
+
53
+ // BEGIN stack_table (Ruby stack)
54
+
55
+ struct pf2_stack_key {
56
+ const size_t *frames; // pointer to an immutable array of location_ids
57
+ size_t depth;
58
+ };
59
+ static inline khint_t hash_stack_key(struct pf2_stack_key key)
60
+ {
61
+ khint_t h = hash_size_t(key.depth);
62
+ for (size_t i = 0; i < key.depth; i++) {
63
+ h ^= hash_size_t(key.frames[i]) + 0x9e3779b9U + (h << 6) + (h >> 2);
64
+ }
65
+ return h;
66
+ }
67
+ static inline int eq_stack_key(struct pf2_stack_key a, struct pf2_stack_key b)
68
+ {
69
+ if (a.depth != b.depth) return 0;
70
+ for (size_t i = 0; i < a.depth; i++) {
71
+ if (a.frames[i] != b.frames[i]) return 0;
72
+ }
73
+ return 1;
74
+ }
75
+
76
+ // END stack_table
77
+
78
+ // BEGIN native_stack_table (raw PCs)
79
+
80
+ struct pf2_native_stack_key {
81
+ const uintptr_t *frames; // pointer to an immutable array of PCs
82
+ size_t depth;
83
+ };
84
+ static inline khint_t hash_native_stack_key(struct pf2_native_stack_key key)
85
+ {
86
+ khint_t h = hash_size_t(key.depth);
87
+ for (size_t i = 0; i < key.depth; i++) {
88
+ h ^= kh_hash_uint64((khint64_t)key.frames[i]) + 0x9e3779b9U + (h << 6) + (h >> 2);
89
+ }
90
+ return h;
91
+ }
92
+ static inline int eq_native_stack_key(struct pf2_native_stack_key a, struct pf2_native_stack_key b)
93
+ {
94
+ if (a.depth != b.depth) return 0;
95
+ for (size_t i = 0; i < a.depth; i++) {
96
+ if (a.frames[i] != b.frames[i]) return 0;
97
+ }
98
+ return 1;
99
+ }
100
+
101
+ // END native_stack_table
102
+
103
+ // BEGIN combined_sample_table
104
+
105
+ struct pf2_combined_stack_key {
106
+ size_t ruby_stack_id;
107
+ size_t native_stack_id;
108
+ };
109
+ static inline khint_t hash_combined_stack_key(struct pf2_combined_stack_key key)
110
+ {
111
+ khint_t h = hash_size_t(key.ruby_stack_id);
112
+ h ^= hash_size_t(key.native_stack_id) + 0x9e3779b9U + (h << 6) + (h >> 2);
113
+ return h;
114
+ }
115
+ static inline int eq_combined_stack_key(struct pf2_combined_stack_key a, struct pf2_combined_stack_key b)
116
+ {
117
+ return a.ruby_stack_id == b.ruby_stack_id && a.native_stack_id == b.native_stack_id;
118
+ }
119
+
120
+ // END combined_sample_table
121
+
122
+ struct pf2_sample_stats {
123
+ // The number of times this sample was observed.
124
+ size_t count;
125
+ // Timestamps which this sample was observed. This array's length = # of samples.
126
+ // TODO: Make timestamp collection optional?
127
+ uint64_t *timestamps;
128
+ // Thread ids corresponding to each timestamp.
129
+ uintptr_t *thread_ids;
130
+ // timestamps.length
131
+ size_t timestamps_count;
132
+ size_t timestamps_capacity;
133
+ };
134
+
135
+ #pragma GCC diagnostic push
136
+ #pragma GCC diagnostic ignored "-Wunused-function"
137
+ // location table: key = (cme, lineno), val = location_id
138
+ KHASHL_MAP_INIT(static, pf2_location_table, pf2_location_table, struct pf2_location_key, size_t, hash_location_key, eq_location_key)
139
+ // stack table: key = array of location_ids, val = stack_id
140
+ KHASHL_MAP_INIT(static, pf2_stack_table, pf2_stack_table, struct pf2_stack_key, size_t, hash_stack_key, eq_stack_key)
141
+ // native stack table: key = array of PCs, val = native_stack_id
142
+ KHASHL_MAP_INIT(static, pf2_native_stack_table, pf2_native_stack_table, struct pf2_native_stack_key, size_t, hash_native_stack_key, eq_native_stack_key)
143
+ // sample table: key = (ruby_stack_id, native_stack_id), val = aggregated counts/timestamps
144
+ KHASHL_MAP_INIT(static, pf2_sample_table, pf2_sample_table, struct pf2_combined_stack_key, struct pf2_sample_stats, hash_combined_stack_key, eq_combined_stack_key)
145
+ #pragma GCC diagnostic pop
146
+
147
+ struct pf2_sess_sample {
148
+ size_t *stack; // array of location_indexes
149
+ size_t stack_count;
150
+ size_t *native_stack; // array of location_indexes
151
+ size_t native_stack_count;
152
+ uintptr_t ruby_thread_id;
153
+ uint64_t elapsed_ns;
154
+ };
155
+
156
+ struct pf2_sess_location {
157
+ size_t function_index;
158
+ int32_t lineno;
159
+ size_t address;
160
+ };
161
+
14
162
  struct pf2_session {
15
163
  bool is_running;
16
164
  #ifdef HAVE_TIMER_CREATE
@@ -22,9 +170,10 @@ struct pf2_session {
22
170
  atomic_bool is_marking; // Whether garbage collection is in progress
23
171
  pthread_t *collector_thread;
24
172
 
25
- struct pf2_sample *samples; // Dynamic array of samples
26
- size_t samples_index;
27
- size_t samples_capacity; // Current capacity of the samples array
173
+ pf2_location_table *location_table;
174
+ pf2_stack_table *stack_table;
175
+ pf2_native_stack_table *native_stack_table;
176
+ pf2_sample_table *sample_table;
28
177
 
29
178
  struct timespec start_time_realtime;
30
179
  struct timespec start_time; // When profiling started
data/lib/pf2/cli.rb CHANGED
@@ -60,7 +60,6 @@ module Pf2
60
60
 
61
61
  profile = Marshal.load(File.read(argv[0]))
62
62
  report = Pf2::Reporter::FirefoxProfilerSer2.new(profile).emit
63
- report = JSON.generate(report)
64
63
 
65
64
  if options[:output_file]
66
65
  File.write(options[:output_file], report)
@@ -61,7 +61,7 @@ module Pf2
61
61
  counters: [],
62
62
  threads: thread_reports,
63
63
  }
64
- FirefoxProfilerSer2.deep_camelize_keys(report)
64
+ JSON.generate(FirefoxProfilerSer2.deep_camelize_keys(report))
65
65
  end
66
66
 
67
67
  class ThreadReport