pf2 0.12.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/pf2/session.c CHANGED
@@ -4,6 +4,7 @@
4
4
  #include <stdbool.h>
5
5
  #include <stdio.h>
6
6
  #include <stdlib.h>
7
+ #include <string.h>
7
8
  #include <sys/time.h>
8
9
  #include <time.h>
9
10
 
@@ -25,8 +26,11 @@ static struct pf2_session *global_current_session = NULL;
25
26
  static void *sample_collector_thread(void *arg);
26
27
  static void drain_ringbuffer(struct pf2_session *session);
27
28
  static void sigprof_handler(int sig, siginfo_t *info, void *ucontext);
28
- bool ensure_sample_capacity(struct pf2_session *session);
29
29
  static void pf2_session_stop(struct pf2_session *session);
30
+ static size_t intern_location(struct pf2_session *session, VALUE cme, int lineno);
31
+ static size_t intern_stack(struct pf2_session *session, const size_t *frames, size_t depth);
32
+ static size_t intern_native_stack(struct pf2_session *session, const uintptr_t *frames, size_t depth);
33
+ static bool insert_sample(struct pf2_session *session, const struct pf2_sample *sample);
30
34
 
31
35
  VALUE
32
36
  rb_pf2_session_initialize(int argc, VALUE *argv, VALUE self)
@@ -157,7 +161,7 @@ sample_collector_thread(void *arg)
157
161
  // Take samples from the ring buffer
158
162
  drain_ringbuffer(session);
159
163
 
160
- // Sleep for 100 ms
164
+ // Sleep for 10 ms
161
165
  // TODO: Replace with high watermark callback
162
166
  struct timespec ts = { .tv_sec = 0, .tv_nsec = 10 * 1000000, }; // 10 ms
163
167
  nanosleep(&ts, NULL);
@@ -171,16 +175,12 @@ drain_ringbuffer(struct pf2_session *session)
171
175
  {
172
176
  struct pf2_sample sample;
173
177
  while (pf2_ringbuffer_pop(session->rbuf, &sample) == true) {
174
- // Ensure we have capacity before adding a new sample
175
- if (!ensure_sample_capacity(session)) {
176
- // Failed to expand buffer
178
+ if (!insert_sample(session, &sample)) {
177
179
  atomic_fetch_add_explicit(&session->dropped_sample_count, 1, memory_order_relaxed);
178
- PF2_DEBUG_LOG("Failed to expand sample buffer. Dropping sample\n");
179
- break;
180
+ PF2_DEBUG_LOG("Failed to record sample. Dropping sample\n");
181
+ } else {
182
+ atomic_fetch_add_explicit(&session->collected_sample_count, 1, memory_order_relaxed);
180
183
  }
181
-
182
- session->samples[session->samples_index++] = sample;
183
- atomic_fetch_add_explicit(&session->collected_sample_count, 1, memory_order_relaxed);
184
184
  }
185
185
  }
186
186
 
@@ -231,27 +231,117 @@ sigprof_handler(int sig, siginfo_t *info, void *ucontext)
231
231
  #endif
232
232
  }
233
233
 
234
- // Ensures that the session's sample array has capacity for at least one more sample
235
- // Returns true if successful, false if memory allocation failed
236
- bool
237
- ensure_sample_capacity(struct pf2_session *session)
234
+ static size_t
235
+ intern_location(struct pf2_session *session, VALUE cme, int lineno)
236
+ {
237
+ struct pf2_location_key key = { .cme = cme, .lineno = lineno };
238
+ int absent;
239
+ khint_t k = pf2_location_table_put(session->location_table, key, &absent);
240
+ if (k == kh_end(session->location_table)) { return (size_t)-1; }
241
+ if (absent) {
242
+ kh_val(session->location_table, k) = kh_size(session->location_table) - 1;
243
+ }
244
+ return kh_val(session->location_table, k);
245
+ }
246
+
247
+ static size_t
248
+ intern_stack(struct pf2_session *session, const size_t *frames, size_t depth)
249
+ {
250
+ struct pf2_stack_key skey = { .frames = frames, .depth = depth };
251
+ int absent;
252
+ khint_t k = pf2_stack_table_put(session->stack_table, skey, &absent);
253
+ if (k == kh_end(session->stack_table)) { return (size_t)-1; }
254
+ if (absent) {
255
+ size_t *copy = NULL;
256
+ if (depth > 0) {
257
+ copy = malloc(sizeof(size_t) * depth);
258
+ // TODO: if allocation fails, remove stack_table entry to avoid dangling stack-local key.
259
+ if (copy == NULL) return (size_t)-1;
260
+ memcpy(copy, frames, sizeof(size_t) * depth);
261
+ }
262
+ kh_key(session->stack_table, k).frames = copy;
263
+ kh_key(session->stack_table, k).depth = depth;
264
+ kh_val(session->stack_table, k) = kh_size(session->stack_table) - 1;
265
+ }
266
+ return kh_val(session->stack_table, k);
267
+ }
268
+
269
+ static size_t
270
+ intern_native_stack(struct pf2_session *session, const uintptr_t *frames, size_t depth)
238
271
  {
239
- // Check if we need to expand
240
- if (session->samples_index < session->samples_capacity) {
241
- return true;
272
+ struct pf2_native_stack_key skey = { .frames = frames, .depth = depth };
273
+ int absent;
274
+ khint_t k = pf2_native_stack_table_put(session->native_stack_table, skey, &absent);
275
+ if (k == kh_end(session->native_stack_table)) { return (size_t)-1; }
276
+ if (absent) {
277
+ uintptr_t *copy = NULL;
278
+ if (depth > 0) {
279
+ copy = malloc(sizeof(uintptr_t) * depth);
280
+ if (copy == NULL) return (size_t)-1;
281
+ memcpy(copy, frames, sizeof(uintptr_t) * depth);
282
+ }
283
+ kh_key(session->native_stack_table, k).frames = copy;
284
+ kh_key(session->native_stack_table, k).depth = depth;
285
+ kh_val(session->native_stack_table, k) = kh_size(session->native_stack_table) - 1;
242
286
  }
287
+ return kh_val(session->native_stack_table, k);
288
+ }
243
289
 
244
- // Calculate new size (double the current size)
245
- size_t new_capacity = session->samples_capacity * 2;
290
+ static bool
291
+ insert_sample(struct pf2_session *session, const struct pf2_sample *sample)
292
+ {
293
+ size_t frame_ids[PF2_SAMPLE_MAX_RUBY_DEPTH];
246
294
 
247
- // Reallocate the array
248
- struct pf2_sample *new_samples = realloc(session->samples, new_capacity * sizeof(struct pf2_sample));
249
- if (new_samples == NULL) {
250
- return false;
295
+ // Convert each frame to a location
296
+ for (int i = 0; i < sample->depth; i++) {
297
+ frame_ids[i] = intern_location(session, sample->cmes[i], sample->linenos[i]);
298
+ if (frame_ids[i] == (size_t)-1) { return false; }
251
299
  }
252
300
 
253
- session->samples = new_samples;
254
- session->samples_capacity = new_capacity;
301
+ // Obtain stack_id for the array of locations
302
+ size_t stack_id = intern_stack(session, frame_ids, (size_t)sample->depth);
303
+ if (stack_id == (size_t)-1) { return false; }
304
+
305
+ size_t native_stack_id = intern_native_stack(session, sample->native_stack, sample->native_stack_depth);
306
+ if (native_stack_id == (size_t)-1) { return false; }
307
+
308
+ // Increment the observation count for this stack_id
309
+ int absent;
310
+ struct pf2_combined_stack_key ckey = {
311
+ .ruby_stack_id = stack_id,
312
+ .native_stack_id = native_stack_id
313
+ };
314
+ khint_t k = pf2_sample_table_put(session->sample_table, ckey, &absent);
315
+ if (k == kh_end(session->sample_table)) { return false; }
316
+ struct pf2_sample_stats *stats = &kh_val(session->sample_table, k);
317
+ if (absent) {
318
+ // This is the first time this stack was observed. Initialize stats.
319
+ stats->count = 0;
320
+ stats->timestamps = NULL;
321
+ stats->thread_ids = NULL;
322
+ stats->timestamps_count = 0;
323
+ stats->timestamps_capacity = 0;
324
+ }
325
+
326
+ // count
327
+ stats->count += 1;
328
+ // timestamps
329
+ if (stats->timestamps_count == stats->timestamps_capacity) {
330
+ size_t new_cap = stats->timestamps_capacity ? stats->timestamps_capacity * 2 : 16;
331
+ uint64_t *new_ts = realloc(stats->timestamps, sizeof(uint64_t) * new_cap);
332
+ uintptr_t *new_threads = realloc(stats->thread_ids, sizeof(uintptr_t) * new_cap);
333
+ if (new_ts == NULL || new_threads == NULL) {
334
+ free(new_ts);
335
+ free(new_threads);
336
+ return false;
337
+ }
338
+ stats->timestamps = new_ts;
339
+ stats->thread_ids = new_threads;
340
+ stats->timestamps_capacity = new_cap;
341
+ }
342
+ stats->timestamps[stats->timestamps_count] = sample->timestamp_ns;
343
+ stats->thread_ids[stats->timestamps_count] = (uintptr_t)sample->context_pthread;
344
+ stats->timestamps_count++;
255
345
 
256
346
  return true;
257
347
  }
@@ -356,12 +446,22 @@ pf2_session_alloc(VALUE self)
356
446
  rb_raise(rb_eNoMemError, "Failed to allocate memory");
357
447
  }
358
448
 
359
- // samples, samples_index, samples_capacity
360
- session->samples_index = 0;
361
- session->samples_capacity = 500; // 10 seconds worth of samples at 50 Hz
362
- session->samples = malloc(sizeof(struct pf2_sample) * session->samples_capacity);
363
- if (session->samples == NULL) {
364
- rb_raise(rb_eNoMemError, "Failed to allocate memory");
449
+ // location_table, stack_table, native_stack_table, sample_table
450
+ session->location_table = pf2_location_table_init();
451
+ if (session->location_table == NULL) {
452
+ rb_raise(rb_eNoMemError, "Failed to allocate location table");
453
+ }
454
+ session->stack_table = pf2_stack_table_init();
455
+ if (session->stack_table == NULL) {
456
+ rb_raise(rb_eNoMemError, "Failed to allocate stack table");
457
+ }
458
+ session->native_stack_table = pf2_native_stack_table_init();
459
+ if (session->native_stack_table == NULL) {
460
+ rb_raise(rb_eNoMemError, "Failed to allocate native stack table");
461
+ }
462
+ session->sample_table = pf2_sample_table_init();
463
+ if (session->sample_table == NULL) {
464
+ rb_raise(rb_eNoMemError, "Failed to allocate stack sample table");
365
465
  }
366
466
 
367
467
  // collected_sample_count, dropped_sample_count
@@ -403,11 +503,11 @@ pf2_session_dmark(void *sess)
403
503
  head = (head + 1) % rbuf->size;
404
504
  }
405
505
 
406
- // Iterate over all samples in the samples array and mark them
407
- for (size_t i = 0; i < session->samples_index; i++) {
408
- sample = &session->samples[i];
409
- for (int i = 0; i < sample->depth; i++) {
410
- rb_gc_mark(sample->cmes[i]);
506
+ // Mark Ruby VALUEs stored in location_table keys
507
+ if (session->location_table) {
508
+ khint_t k;
509
+ kh_foreach(session->location_table, k) {
510
+ rb_gc_mark(kh_key(session->location_table, k).cme);
411
511
  }
412
512
  }
413
513
 
@@ -429,7 +529,33 @@ pf2_session_dfree(void *sess)
429
529
 
430
530
  pf2_configuration_free(session->configuration);
431
531
  pf2_ringbuffer_free(session->rbuf);
432
- free(session->samples);
532
+
533
+ if (session->sample_table) {
534
+ khint_t k;
535
+ kh_foreach(session->sample_table, k) {
536
+ free(kh_val(session->sample_table, k).timestamps);
537
+ free(kh_val(session->sample_table, k).thread_ids);
538
+ }
539
+ pf2_sample_table_destroy(session->sample_table);
540
+ }
541
+ if (session->stack_table) {
542
+ khint_t k;
543
+ kh_foreach(session->stack_table, k) {
544
+ free((void *)kh_key(session->stack_table, k).frames);
545
+ }
546
+ pf2_stack_table_destroy(session->stack_table);
547
+ }
548
+ if (session->native_stack_table) {
549
+ khint_t k;
550
+ kh_foreach(session->native_stack_table, k) {
551
+ free((void *)kh_key(session->native_stack_table, k).frames);
552
+ }
553
+ pf2_native_stack_table_destroy(session->native_stack_table);
554
+ }
555
+ if (session->location_table) {
556
+ pf2_location_table_destroy(session->location_table);
557
+ }
558
+
433
559
  free(session->collector_thread);
434
560
  free(session);
435
561
  }
@@ -440,7 +566,6 @@ pf2_session_dsize(const void *sess)
440
566
  const struct pf2_session *session = sess;
441
567
  return (
442
568
  sizeof(struct pf2_session)
443
- + sizeof(struct pf2_sample) * session->samples_capacity
444
569
  + sizeof(struct pf2_sample) * session->rbuf->size
445
570
  );
446
571
  }
data/ext/pf2/session.h CHANGED
@@ -3,6 +3,8 @@
3
3
 
4
4
  #include <pthread.h>
5
5
  #include <stdatomic.h>
6
+ #include <stdint.h>
7
+ #include <limits.h>
6
8
  #include <sys/time.h>
7
9
 
8
10
  #include <ruby.h>
@@ -11,6 +13,152 @@
11
13
  #include "ringbuffer.h"
12
14
  #include "sample.h"
13
15
 
16
+ #include "khashl.h"
17
+
18
+ // Maps for sample storage
19
+
20
+ // BEGIN generic helpers
21
+
22
+ static inline khint_t hash_size_t(size_t v)
23
+ {
24
+ #if SIZE_MAX == UINT_MAX
25
+ return kh_hash_uint32((khint_t)v);
26
+ #else
27
+ return kh_hash_uint64((khint64_t)v);
28
+ #endif
29
+ }
30
+ static inline int eq_size_t(size_t a, size_t b) { return a == b; }
31
+
32
+ // END generic helpers
33
+
34
+ // BEGIN location_table
35
+
36
+ struct pf2_location_key {
37
+ VALUE cme;
38
+ int lineno;
39
+ };
40
+ static inline khint_t hash_location_key(struct pf2_location_key key)
41
+ {
42
+ khint_t h = hash_size_t((size_t)key.cme);
43
+ h ^= (khint_t)key.lineno + 0x9e3779b9U + (h << 6) + (h >> 2);
44
+ return h;
45
+ }
46
+ static inline int eq_location_key(struct pf2_location_key a, struct pf2_location_key b)
47
+ {
48
+ return a.cme == b.cme && a.lineno == b.lineno;
49
+ }
50
+
51
+ // END location_table
52
+
53
+ // BEGIN stack_table (Ruby stack)
54
+
55
+ struct pf2_stack_key {
56
+ const size_t *frames; // pointer to an immutable array of location_ids
57
+ size_t depth;
58
+ };
59
+ static inline khint_t hash_stack_key(struct pf2_stack_key key)
60
+ {
61
+ khint_t h = hash_size_t(key.depth);
62
+ for (size_t i = 0; i < key.depth; i++) {
63
+ h ^= hash_size_t(key.frames[i]) + 0x9e3779b9U + (h << 6) + (h >> 2);
64
+ }
65
+ return h;
66
+ }
67
+ static inline int eq_stack_key(struct pf2_stack_key a, struct pf2_stack_key b)
68
+ {
69
+ if (a.depth != b.depth) return 0;
70
+ for (size_t i = 0; i < a.depth; i++) {
71
+ if (a.frames[i] != b.frames[i]) return 0;
72
+ }
73
+ return 1;
74
+ }
75
+
76
+ // END stack_table
77
+
78
+ // BEGIN native_stack_table (raw PCs)
79
+
80
+ struct pf2_native_stack_key {
81
+ const uintptr_t *frames; // pointer to an immutable array of PCs
82
+ size_t depth;
83
+ };
84
+ static inline khint_t hash_native_stack_key(struct pf2_native_stack_key key)
85
+ {
86
+ khint_t h = hash_size_t(key.depth);
87
+ for (size_t i = 0; i < key.depth; i++) {
88
+ h ^= kh_hash_uint64((khint64_t)key.frames[i]) + 0x9e3779b9U + (h << 6) + (h >> 2);
89
+ }
90
+ return h;
91
+ }
92
+ static inline int eq_native_stack_key(struct pf2_native_stack_key a, struct pf2_native_stack_key b)
93
+ {
94
+ if (a.depth != b.depth) return 0;
95
+ for (size_t i = 0; i < a.depth; i++) {
96
+ if (a.frames[i] != b.frames[i]) return 0;
97
+ }
98
+ return 1;
99
+ }
100
+
101
+ // END native_stack_table
102
+
103
+ // BEGIN combined_sample_table
104
+
105
+ struct pf2_combined_stack_key {
106
+ size_t ruby_stack_id;
107
+ size_t native_stack_id;
108
+ };
109
+ static inline khint_t hash_combined_stack_key(struct pf2_combined_stack_key key)
110
+ {
111
+ khint_t h = hash_size_t(key.ruby_stack_id);
112
+ h ^= hash_size_t(key.native_stack_id) + 0x9e3779b9U + (h << 6) + (h >> 2);
113
+ return h;
114
+ }
115
+ static inline int eq_combined_stack_key(struct pf2_combined_stack_key a, struct pf2_combined_stack_key b)
116
+ {
117
+ return a.ruby_stack_id == b.ruby_stack_id && a.native_stack_id == b.native_stack_id;
118
+ }
119
+
120
+ // END combined_sample_table
121
+
122
+ struct pf2_sample_stats {
123
+ // The number of times this sample was observed.
124
+ size_t count;
125
+ // Timestamps which this sample was observed. This array's length = # of samples.
126
+ // TODO: Make timestamp collection optional?
127
+ uint64_t *timestamps;
128
+ // Thread ids corresponding to each timestamp.
129
+ uintptr_t *thread_ids;
130
+ // timestamps.length
131
+ size_t timestamps_count;
132
+ size_t timestamps_capacity;
133
+ };
134
+
135
+ #pragma GCC diagnostic push
136
+ #pragma GCC diagnostic ignored "-Wunused-function"
137
+ // location table: key = (cme, lineno), val = location_id
138
+ KHASHL_MAP_INIT(static, pf2_location_table, pf2_location_table, struct pf2_location_key, size_t, hash_location_key, eq_location_key)
139
+ // stack table: key = array of location_ids, val = stack_id
140
+ KHASHL_MAP_INIT(static, pf2_stack_table, pf2_stack_table, struct pf2_stack_key, size_t, hash_stack_key, eq_stack_key)
141
+ // native stack table: key = array of PCs, val = native_stack_id
142
+ KHASHL_MAP_INIT(static, pf2_native_stack_table, pf2_native_stack_table, struct pf2_native_stack_key, size_t, hash_native_stack_key, eq_native_stack_key)
143
+ // sample table: key = (ruby_stack_id, native_stack_id), val = aggregated counts/timestamps
144
+ KHASHL_MAP_INIT(static, pf2_sample_table, pf2_sample_table, struct pf2_combined_stack_key, struct pf2_sample_stats, hash_combined_stack_key, eq_combined_stack_key)
145
+ #pragma GCC diagnostic pop
146
+
147
+ struct pf2_sess_sample {
148
+ size_t *stack; // array of location_indexes
149
+ size_t stack_count;
150
+ size_t *native_stack; // array of location_indexes
151
+ size_t native_stack_count;
152
+ uintptr_t ruby_thread_id;
153
+ uint64_t elapsed_ns;
154
+ };
155
+
156
+ struct pf2_sess_location {
157
+ size_t function_index;
158
+ int32_t lineno;
159
+ size_t address;
160
+ };
161
+
14
162
  struct pf2_session {
15
163
  bool is_running;
16
164
  #ifdef HAVE_TIMER_CREATE
@@ -22,9 +170,10 @@ struct pf2_session {
22
170
  atomic_bool is_marking; // Whether garbage collection is in progress
23
171
  pthread_t *collector_thread;
24
172
 
25
- struct pf2_sample *samples; // Dynamic array of samples
26
- size_t samples_index;
27
- size_t samples_capacity; // Current capacity of the samples array
173
+ pf2_location_table *location_table;
174
+ pf2_stack_table *stack_table;
175
+ pf2_native_stack_table *native_stack_table;
176
+ pf2_sample_table *sample_table;
28
177
 
29
178
  struct timespec start_time_realtime;
30
179
  struct timespec start_time; // When profiling started
data/lib/pf2/serve.rb CHANGED
@@ -27,10 +27,9 @@ module Pf2
27
27
  server = WEBrick::HTTPServer.new(CONFIG)
28
28
  server.mount_proc('/profile') do |req, res|
29
29
  profile = Pf2.stop
30
- profile = JSON.parse(profile, symbolize_names: true, max_nesting: false)
31
30
  res.header['Content-Type'] = 'application/json'
32
31
  res.header['Access-Control-Allow-Origin'] = '*'
33
- res.body = JSON.generate(Pf2::Reporter::FirefoxProfiler.new((profile)).emit)
32
+ res.body = JSON.generate(Pf2::Reporter::FirefoxProfilerSer2.new(profile).emit)
34
33
  Pf2.start
35
34
  end
36
35
 
data/lib/pf2/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Pf2
4
- VERSION = '0.12.0'
4
+ VERSION = '0.13.0'
5
5
  end
data/lib/pf2.rb CHANGED
@@ -30,5 +30,10 @@ module Pf2
30
30
  result = stop
31
31
  @@session = nil # let GC clean up the session
32
32
  result
33
+ ensure
34
+ if defined?(@@session) && @@session != nil
35
+ stop
36
+ @@session = nil
37
+ end
33
38
  end
34
39
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pf2
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.0
4
+ version: 0.13.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daisuke Aritomo
@@ -121,6 +121,7 @@ files:
121
121
  - LICENSE.txt
122
122
  - README.md
123
123
  - Rakefile
124
+ - THIRD_PARTY_LICENSES.txt
124
125
  - doc/development.md
125
126
  - examples/mandelbrot.rb
126
127
  - examples/mandelbrot_ractor.rb
@@ -132,6 +133,7 @@ files:
132
133
  - ext/pf2/configuration.h
133
134
  - ext/pf2/debug.h
134
135
  - ext/pf2/extconf.rb
136
+ - ext/pf2/khashl.h
135
137
  - ext/pf2/pf2.c
136
138
  - ext/pf2/pf2.h
137
139
  - ext/pf2/ringbuffer.c