sperf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/sperf/sperf.c ADDED
@@ -0,0 +1,708 @@
1
+ #include <ruby.h>
2
+ #include <ruby/debug.h>
3
+ #include <ruby/thread.h>
4
+ #include <pthread.h>
5
+ #include <time.h>
6
+ #include <string.h>
7
+ #include <stdlib.h>
8
+ #include <unistd.h>
9
+
10
+ #define SPERF_MAX_STACK_DEPTH 512
11
+ #define SPERF_INITIAL_SAMPLES 1024
12
+ #define SPERF_INITIAL_FRAME_POOL (1024 * 1024 / sizeof(VALUE)) /* ~1MB */
13
+
14
+ /* ---- Data structures ---- */
15
+
16
+ enum sperf_sample_type {
17
+ SPERF_SAMPLE_NORMAL = 0,
18
+ SPERF_SAMPLE_GVL_BLOCKED = 1, /* off-GVL: SUSPENDED → READY */
19
+ SPERF_SAMPLE_GVL_WAIT = 2, /* GVL wait: READY → RESUMED */
20
+ SPERF_SAMPLE_GC_MARKING = 3, /* GC marking phase */
21
+ SPERF_SAMPLE_GC_SWEEPING = 4, /* GC sweeping phase */
22
+ };
23
+
24
+ enum sperf_gc_phase {
25
+ SPERF_GC_NONE = 0,
26
+ SPERF_GC_MARKING = 1,
27
+ SPERF_GC_SWEEPING = 2,
28
+ };
29
+
30
+ typedef struct sperf_sample {
31
+ int depth;
32
+ size_t frame_start; /* index into frame_pool */
33
+ int64_t weight;
34
+ int type; /* sperf_sample_type */
35
+ } sperf_sample_t;
36
+
37
+ typedef struct sperf_thread_data {
38
+ int64_t prev_cpu_ns;
39
+ int64_t prev_wall_ns;
40
+ /* GVL event tracking */
41
+ int64_t suspended_at_ns; /* wall time at SUSPENDED */
42
+ int64_t ready_at_ns; /* wall time at READY */
43
+ size_t suspended_frame_start; /* saved stack in frame_pool */
44
+ int suspended_frame_depth; /* saved stack depth */
45
+ } sperf_thread_data_t;
46
+
47
+ typedef struct sperf_profiler {
48
+ int frequency;
49
+ int mode; /* 0 = cpu, 1 = wall */
50
+ volatile int running;
51
+ pthread_t timer_thread;
52
+ rb_postponed_job_handle_t pj_handle;
53
+ sperf_sample_t *samples;
54
+ size_t sample_count;
55
+ size_t sample_capacity;
56
+ VALUE *frame_pool; /* raw frame VALUEs from rb_profile_frames */
57
+ size_t frame_pool_count;
58
+ size_t frame_pool_capacity;
59
+ rb_internal_thread_specific_key_t ts_key;
60
+ rb_internal_thread_event_hook_t *thread_hook;
61
+ /* GC tracking */
62
+ int gc_phase; /* sperf_gc_phase */
63
+ int64_t gc_enter_ns; /* wall time at GC_ENTER */
64
+ size_t gc_frame_start; /* saved stack at GC_ENTER */
65
+ int gc_frame_depth; /* saved stack depth */
66
+ /* Sampling overhead stats */
67
+ size_t sampling_count;
68
+ int64_t sampling_total_ns;
69
+ } sperf_profiler_t;
70
+
71
+ static sperf_profiler_t g_profiler;
72
+ static VALUE g_profiler_wrapper = Qnil;
73
+
74
+ /* ---- TypedData for GC marking of frame_pool ---- */
75
+
76
+ static void
77
+ sperf_profiler_mark(void *ptr)
78
+ {
79
+ sperf_profiler_t *prof = (sperf_profiler_t *)ptr;
80
+ if (prof->frame_pool && prof->frame_pool_count > 0) {
81
+ rb_gc_mark_locations(prof->frame_pool, prof->frame_pool + prof->frame_pool_count);
82
+ }
83
+ }
84
+
85
+ static const rb_data_type_t sperf_profiler_type = {
86
+ .wrap_struct_name = "sperf_profiler",
87
+ .function = {
88
+ .dmark = sperf_profiler_mark,
89
+ .dfree = NULL,
90
+ .dsize = NULL,
91
+ },
92
+ };
93
+
94
+ /* ---- CPU time ---- */
95
+
96
+ static int64_t
97
+ sperf_cpu_time_ns(void)
98
+ {
99
+ struct timespec ts;
100
+ if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts) != 0) return -1;
101
+ return (int64_t)ts.tv_sec * 1000000000LL + ts.tv_nsec;
102
+ }
103
+
104
+ /* ---- Wall time ---- */
105
+
106
+ static int64_t
107
+ sperf_wall_time_ns(void)
108
+ {
109
+ struct timespec ts;
110
+ clock_gettime(CLOCK_MONOTONIC, &ts);
111
+ return (int64_t)ts.tv_sec * 1000000000LL + ts.tv_nsec;
112
+ }
113
+
114
+ /* ---- Get current thread's time based on profiler mode ---- */
115
+
116
+ static int64_t
117
+ sperf_current_time_ns(sperf_profiler_t *prof, sperf_thread_data_t *td)
118
+ {
119
+ if (prof->mode == 0) {
120
+ return sperf_cpu_time_ns();
121
+ } else {
122
+ return sperf_wall_time_ns();
123
+ }
124
+ }
125
+
126
+ /* ---- Sample buffer ---- */
127
+
128
+ /* Returns 0 on success, -1 on allocation failure */
129
+ static int
130
+ sperf_ensure_sample_capacity(sperf_profiler_t *prof)
131
+ {
132
+ if (prof->sample_count >= prof->sample_capacity) {
133
+ size_t new_cap = prof->sample_capacity * 2;
134
+ sperf_sample_t *new_samples = (sperf_sample_t *)realloc(
135
+ prof->samples,
136
+ new_cap * sizeof(sperf_sample_t));
137
+ if (!new_samples) return -1;
138
+ prof->samples = new_samples;
139
+ prof->sample_capacity = new_cap;
140
+ }
141
+ return 0;
142
+ }
143
+
144
+ /* ---- Frame pool ---- */
145
+
146
+ /* Ensure frame_pool has room for `needed` more entries. Returns 0 on success. */
147
+ static int
148
+ sperf_ensure_frame_pool_capacity(sperf_profiler_t *prof, int needed)
149
+ {
150
+ while (prof->frame_pool_count + (size_t)needed > prof->frame_pool_capacity) {
151
+ size_t new_cap = prof->frame_pool_capacity * 2;
152
+ VALUE *new_pool = (VALUE *)realloc(
153
+ prof->frame_pool,
154
+ new_cap * sizeof(VALUE));
155
+ if (!new_pool) return -1;
156
+ prof->frame_pool = new_pool;
157
+ prof->frame_pool_capacity = new_cap;
158
+ }
159
+ return 0;
160
+ }
161
+
162
+ /* ---- Record a sample ---- */
163
+
164
+ static void
165
+ sperf_record_sample(sperf_profiler_t *prof, size_t frame_start, int depth,
166
+ int64_t weight, int type)
167
+ {
168
+ if (weight <= 0) return;
169
+ if (sperf_ensure_sample_capacity(prof) < 0) return;
170
+
171
+ sperf_sample_t *sample = &prof->samples[prof->sample_count];
172
+ sample->depth = depth;
173
+ sample->frame_start = frame_start;
174
+ sample->weight = weight;
175
+ sample->type = type;
176
+ prof->sample_count++;
177
+ }
178
+
179
+ /* ---- Thread data initialization ---- */
180
+
181
+ /* Create and initialize per-thread data. Must be called on the target thread. */
182
+ static sperf_thread_data_t *
183
+ sperf_thread_data_create(sperf_profiler_t *prof, VALUE thread)
184
+ {
185
+ sperf_thread_data_t *td = (sperf_thread_data_t *)calloc(1, sizeof(sperf_thread_data_t));
186
+ if (!td) return NULL;
187
+ td->prev_cpu_ns = sperf_current_time_ns(prof, td);
188
+ td->prev_wall_ns = sperf_wall_time_ns();
189
+ rb_internal_thread_specific_set(thread, prof->ts_key, td);
190
+ return td;
191
+ }
192
+
193
+ /* ---- Thread event hooks ---- */
194
+
195
+ static void
196
+ sperf_handle_suspended(sperf_profiler_t *prof, VALUE thread)
197
+ {
198
+ /* Has GVL — safe to call Ruby APIs */
199
+ int64_t wall_now = sperf_wall_time_ns();
200
+
201
+ sperf_thread_data_t *td = (sperf_thread_data_t *)rb_internal_thread_specific_get(thread, prof->ts_key);
202
+ int is_first = 0;
203
+
204
+ if (td == NULL) {
205
+ td = sperf_thread_data_create(prof, thread);
206
+ if (!td) return;
207
+ is_first = 1;
208
+ }
209
+
210
+ int64_t time_now = sperf_current_time_ns(prof, td);
211
+ if (time_now < 0) return;
212
+
213
+ /* Capture backtrace into frame_pool */
214
+ if (sperf_ensure_frame_pool_capacity(prof, SPERF_MAX_STACK_DEPTH) < 0) return;
215
+ size_t frame_start = prof->frame_pool_count;
216
+ int depth = rb_profile_frames(0, SPERF_MAX_STACK_DEPTH,
217
+ &prof->frame_pool[frame_start], NULL);
218
+ if (depth <= 0) return;
219
+ prof->frame_pool_count += depth;
220
+
221
+ /* Record normal sample (skip if first time — no prev_time) */
222
+ if (!is_first) {
223
+ int64_t weight = time_now - td->prev_cpu_ns;
224
+ sperf_record_sample(prof, frame_start, depth, weight, SPERF_SAMPLE_NORMAL);
225
+ }
226
+
227
+ /* Save stack and timestamp for READY/RESUMED */
228
+ td->suspended_at_ns = wall_now;
229
+ td->suspended_frame_start = frame_start;
230
+ td->suspended_frame_depth = depth;
231
+ td->prev_cpu_ns = time_now;
232
+ td->prev_wall_ns = wall_now;
233
+ }
234
+
235
+ static void
236
+ sperf_handle_ready(sperf_profiler_t *prof, VALUE thread)
237
+ {
238
+ /* May NOT have GVL — only simple C operations allowed */
239
+ sperf_thread_data_t *td = (sperf_thread_data_t *)rb_internal_thread_specific_get(thread, prof->ts_key);
240
+ if (!td) return;
241
+
242
+ td->ready_at_ns = sperf_wall_time_ns();
243
+ }
244
+
245
+ static void
246
+ sperf_handle_resumed(sperf_profiler_t *prof, VALUE thread)
247
+ {
248
+ /* Has GVL */
249
+ sperf_thread_data_t *td = (sperf_thread_data_t *)rb_internal_thread_specific_get(thread, prof->ts_key);
250
+
251
+ if (td == NULL) {
252
+ td = sperf_thread_data_create(prof, thread);
253
+ if (!td) return;
254
+ }
255
+
256
+ int64_t wall_now = sperf_wall_time_ns();
257
+
258
+ /* Record GVL blocked/wait samples (wall mode only) */
259
+ if (prof->mode == 1 && td->suspended_frame_depth > 0) {
260
+ if (td->ready_at_ns > 0 && td->ready_at_ns > td->suspended_at_ns) {
261
+ int64_t blocked_ns = td->ready_at_ns - td->suspended_at_ns;
262
+ sperf_record_sample(prof, td->suspended_frame_start,
263
+ td->suspended_frame_depth, blocked_ns,
264
+ SPERF_SAMPLE_GVL_BLOCKED);
265
+ }
266
+ if (td->ready_at_ns > 0 && wall_now > td->ready_at_ns) {
267
+ int64_t wait_ns = wall_now - td->ready_at_ns;
268
+ sperf_record_sample(prof, td->suspended_frame_start,
269
+ td->suspended_frame_depth, wait_ns,
270
+ SPERF_SAMPLE_GVL_WAIT);
271
+ }
272
+ }
273
+
274
+ /* Reset prev times to current — next timer sample measures from resume */
275
+ int64_t time_now = sperf_current_time_ns(prof, td);
276
+ if (time_now >= 0) td->prev_cpu_ns = time_now;
277
+ td->prev_wall_ns = wall_now;
278
+
279
+ /* Clear suspended state */
280
+ td->suspended_frame_depth = 0;
281
+ td->ready_at_ns = 0;
282
+ }
283
+
284
+ static void
285
+ sperf_handle_exited(sperf_profiler_t *prof, VALUE thread)
286
+ {
287
+ sperf_thread_data_t *td = (sperf_thread_data_t *)rb_internal_thread_specific_get(thread, prof->ts_key);
288
+ if (td) {
289
+ free(td);
290
+ rb_internal_thread_specific_set(thread, prof->ts_key, NULL);
291
+ }
292
+ }
293
+
294
+ static void
295
+ sperf_thread_event_hook(rb_event_flag_t event, const rb_internal_thread_event_data_t *data, void *user_data)
296
+ {
297
+ sperf_profiler_t *prof = (sperf_profiler_t *)user_data;
298
+ if (!prof->running) return;
299
+
300
+ VALUE thread = data->thread;
301
+
302
+ if (event & RUBY_INTERNAL_THREAD_EVENT_SUSPENDED)
303
+ sperf_handle_suspended(prof, thread);
304
+ else if (event & RUBY_INTERNAL_THREAD_EVENT_READY)
305
+ sperf_handle_ready(prof, thread);
306
+ else if (event & RUBY_INTERNAL_THREAD_EVENT_RESUMED)
307
+ sperf_handle_resumed(prof, thread);
308
+ else if (event & RUBY_INTERNAL_THREAD_EVENT_EXITED)
309
+ sperf_handle_exited(prof, thread);
310
+ }
311
+
312
+ /* ---- GC event hook ---- */
313
+
314
+ static void
315
+ sperf_gc_event_hook(rb_event_flag_t event, VALUE data, VALUE self, ID id, VALUE klass)
316
+ {
317
+ sperf_profiler_t *prof = &g_profiler;
318
+ if (!prof->running) return;
319
+
320
+ if (event & RUBY_INTERNAL_EVENT_GC_START) {
321
+ prof->gc_phase = SPERF_GC_MARKING;
322
+ }
323
+ else if (event & RUBY_INTERNAL_EVENT_GC_END_MARK) {
324
+ prof->gc_phase = SPERF_GC_SWEEPING;
325
+ }
326
+ else if (event & RUBY_INTERNAL_EVENT_GC_END_SWEEP) {
327
+ prof->gc_phase = SPERF_GC_NONE;
328
+ }
329
+ else if (event & RUBY_INTERNAL_EVENT_GC_ENTER) {
330
+ /* Capture backtrace and timestamp at GC entry */
331
+ prof->gc_enter_ns = sperf_wall_time_ns();
332
+
333
+ if (sperf_ensure_frame_pool_capacity(prof, SPERF_MAX_STACK_DEPTH) < 0) return;
334
+ size_t frame_start = prof->frame_pool_count;
335
+ int depth = rb_profile_frames(0, SPERF_MAX_STACK_DEPTH,
336
+ &prof->frame_pool[frame_start], NULL);
337
+ if (depth <= 0) {
338
+ prof->gc_frame_depth = 0;
339
+ return;
340
+ }
341
+ prof->frame_pool_count += depth;
342
+ prof->gc_frame_start = frame_start;
343
+ prof->gc_frame_depth = depth;
344
+ }
345
+ else if (event & RUBY_INTERNAL_EVENT_GC_EXIT) {
346
+ if (prof->gc_frame_depth <= 0) return;
347
+
348
+ int64_t wall_now = sperf_wall_time_ns();
349
+ int64_t weight = wall_now - prof->gc_enter_ns;
350
+ int type = (prof->gc_phase == SPERF_GC_SWEEPING)
351
+ ? SPERF_SAMPLE_GC_SWEEPING
352
+ : SPERF_SAMPLE_GC_MARKING;
353
+
354
+ sperf_record_sample(prof, prof->gc_frame_start,
355
+ prof->gc_frame_depth, weight, type);
356
+ prof->gc_frame_depth = 0;
357
+ }
358
+ }
359
+
360
+ /* ---- Sampling callback (postponed job) — current thread only ---- */
361
+
362
+ static void
363
+ sperf_sample_job(void *arg)
364
+ {
365
+ sperf_profiler_t *prof = (sperf_profiler_t *)arg;
366
+
367
+ if (!prof->running) return;
368
+
369
+ /* Measure sampling overhead */
370
+ struct timespec ts_start, ts_end;
371
+ clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts_start);
372
+
373
+ VALUE thread = rb_thread_current();
374
+
375
+ /* Get/create per-thread data */
376
+ sperf_thread_data_t *td = (sperf_thread_data_t *)rb_internal_thread_specific_get(thread, prof->ts_key);
377
+ if (td == NULL) {
378
+ td = sperf_thread_data_create(prof, thread);
379
+ if (!td) return;
380
+ return; /* Skip first sample for this thread */
381
+ }
382
+
383
+ int64_t time_now = sperf_current_time_ns(prof, td);
384
+ if (time_now < 0) return;
385
+
386
+ int64_t weight = time_now - td->prev_cpu_ns;
387
+ td->prev_cpu_ns = time_now;
388
+ td->prev_wall_ns = sperf_wall_time_ns();
389
+
390
+ if (weight <= 0) return;
391
+
392
+ /* Capture backtrace and record sample */
393
+ if (sperf_ensure_frame_pool_capacity(prof, SPERF_MAX_STACK_DEPTH) < 0) return;
394
+
395
+ size_t frame_start = prof->frame_pool_count;
396
+ int depth = rb_profile_frames(0, SPERF_MAX_STACK_DEPTH,
397
+ &prof->frame_pool[frame_start], NULL);
398
+ if (depth <= 0) return;
399
+ prof->frame_pool_count += depth;
400
+
401
+ sperf_record_sample(prof, frame_start, depth, weight, SPERF_SAMPLE_NORMAL);
402
+
403
+ clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts_end);
404
+ prof->sampling_count++;
405
+ prof->sampling_total_ns +=
406
+ ((int64_t)ts_end.tv_sec - ts_start.tv_sec) * 1000000000LL +
407
+ (ts_end.tv_nsec - ts_start.tv_nsec);
408
+ }
409
+
410
+ /* ---- Timer thread ---- */
411
+
412
+ static void *
413
+ sperf_timer_func(void *arg)
414
+ {
415
+ sperf_profiler_t *prof = (sperf_profiler_t *)arg;
416
+ struct timespec interval;
417
+ interval.tv_sec = 0;
418
+ interval.tv_nsec = 1000000000L / prof->frequency;
419
+
420
+ while (prof->running) {
421
+ rb_postponed_job_trigger(prof->pj_handle);
422
+ nanosleep(&interval, NULL);
423
+ }
424
+ return NULL;
425
+ }
426
+
427
+ /* ---- Resolve frame VALUE to [path, label] Ruby strings ---- */
428
+
429
+ static VALUE
430
+ sperf_resolve_frame(VALUE fval)
431
+ {
432
+ VALUE path = rb_profile_frame_path(fval);
433
+ VALUE label = rb_profile_frame_full_label(fval);
434
+
435
+ if (NIL_P(path)) path = rb_str_new_lit("<C method>");
436
+
437
+ if (NIL_P(path)) path = rb_str_new_cstr("");
438
+ if (NIL_P(label)) label = rb_str_new_cstr("");
439
+
440
+ return rb_ary_new3(2, path, label);
441
+ }
442
+
443
+ /* ---- Ruby API ---- */
444
+
445
+ static VALUE
446
+ rb_sperf_start(int argc, VALUE *argv, VALUE self)
447
+ {
448
+ VALUE opts;
449
+ int frequency = 1000;
450
+ int mode = 0; /* 0 = cpu, 1 = wall */
451
+
452
+ rb_scan_args(argc, argv, ":", &opts);
453
+ if (!NIL_P(opts)) {
454
+ VALUE vfreq = rb_hash_aref(opts, ID2SYM(rb_intern("frequency")));
455
+ if (!NIL_P(vfreq)) {
456
+ frequency = NUM2INT(vfreq);
457
+ if (frequency <= 0 || frequency > 1000000) {
458
+ rb_raise(rb_eArgError, "frequency must be between 1 and 1000000");
459
+ }
460
+ }
461
+ VALUE vmode = rb_hash_aref(opts, ID2SYM(rb_intern("mode")));
462
+ if (!NIL_P(vmode)) {
463
+ ID mode_id = SYM2ID(vmode);
464
+ if (mode_id == rb_intern("cpu")) {
465
+ mode = 0;
466
+ } else if (mode_id == rb_intern("wall")) {
467
+ mode = 1;
468
+ } else {
469
+ rb_raise(rb_eArgError, "mode must be :cpu or :wall");
470
+ }
471
+ }
472
+ }
473
+
474
+ if (g_profiler.running) {
475
+ rb_raise(rb_eRuntimeError, "Sperf is already running");
476
+ }
477
+
478
+ g_profiler.frequency = frequency;
479
+ g_profiler.mode = mode;
480
+ g_profiler.sample_count = 0;
481
+ g_profiler.sampling_count = 0;
482
+ g_profiler.sampling_total_ns = 0;
483
+ g_profiler.sample_capacity = SPERF_INITIAL_SAMPLES;
484
+ g_profiler.samples = (sperf_sample_t *)calloc(
485
+ g_profiler.sample_capacity, sizeof(sperf_sample_t));
486
+ if (!g_profiler.samples) {
487
+ rb_raise(rb_eNoMemError, "sperf: failed to allocate sample buffer");
488
+ }
489
+
490
+ g_profiler.frame_pool_count = 0;
491
+ g_profiler.frame_pool_capacity = SPERF_INITIAL_FRAME_POOL;
492
+ g_profiler.frame_pool = (VALUE *)calloc(
493
+ g_profiler.frame_pool_capacity, sizeof(VALUE));
494
+ if (!g_profiler.frame_pool) {
495
+ free(g_profiler.samples);
496
+ g_profiler.samples = NULL;
497
+ rb_raise(rb_eNoMemError, "sperf: failed to allocate frame pool");
498
+ }
499
+
500
+ /* Register GC event hook */
501
+ g_profiler.gc_phase = SPERF_GC_NONE;
502
+ g_profiler.gc_frame_depth = 0;
503
+ rb_add_event_hook(sperf_gc_event_hook,
504
+ RUBY_INTERNAL_EVENT_GC_START |
505
+ RUBY_INTERNAL_EVENT_GC_END_MARK |
506
+ RUBY_INTERNAL_EVENT_GC_END_SWEEP |
507
+ RUBY_INTERNAL_EVENT_GC_ENTER |
508
+ RUBY_INTERNAL_EVENT_GC_EXIT,
509
+ Qnil);
510
+
511
+ /* Register thread event hook for all events */
512
+ g_profiler.thread_hook = rb_internal_thread_add_event_hook(
513
+ sperf_thread_event_hook,
514
+ RUBY_INTERNAL_THREAD_EVENT_EXITED |
515
+ RUBY_INTERNAL_THREAD_EVENT_SUSPENDED |
516
+ RUBY_INTERNAL_THREAD_EVENT_READY |
517
+ RUBY_INTERNAL_THREAD_EVENT_RESUMED,
518
+ &g_profiler);
519
+
520
+ /* Pre-initialize current thread's time so the first sample is not skipped */
521
+ {
522
+ VALUE cur_thread = rb_thread_current();
523
+ sperf_thread_data_t *td = sperf_thread_data_create(&g_profiler, cur_thread);
524
+ if (!td) {
525
+ free(g_profiler.samples);
526
+ g_profiler.samples = NULL;
527
+ free(g_profiler.frame_pool);
528
+ g_profiler.frame_pool = NULL;
529
+ rb_internal_thread_remove_event_hook(g_profiler.thread_hook);
530
+ g_profiler.thread_hook = NULL;
531
+ rb_raise(rb_eNoMemError, "sperf: failed to allocate thread data");
532
+ }
533
+ }
534
+
535
+ g_profiler.running = 1;
536
+
537
+ if (pthread_create(&g_profiler.timer_thread, NULL, sperf_timer_func, &g_profiler) != 0) {
538
+ g_profiler.running = 0;
539
+ {
540
+ VALUE cur = rb_thread_current();
541
+ sperf_thread_data_t *td = (sperf_thread_data_t *)rb_internal_thread_specific_get(cur, g_profiler.ts_key);
542
+ if (td) {
543
+ free(td);
544
+ rb_internal_thread_specific_set(cur, g_profiler.ts_key, NULL);
545
+ }
546
+ }
547
+ rb_internal_thread_remove_event_hook(g_profiler.thread_hook);
548
+ g_profiler.thread_hook = NULL;
549
+ free(g_profiler.samples);
550
+ g_profiler.samples = NULL;
551
+ free(g_profiler.frame_pool);
552
+ g_profiler.frame_pool = NULL;
553
+ rb_raise(rb_eRuntimeError, "sperf: failed to create timer thread");
554
+ }
555
+
556
+ return Qtrue;
557
+ }
558
+
559
+ static VALUE
560
+ rb_sperf_stop(VALUE self)
561
+ {
562
+ VALUE result, samples_ary;
563
+ size_t i;
564
+ int j;
565
+
566
+ if (!g_profiler.running) {
567
+ return Qnil;
568
+ }
569
+
570
+ g_profiler.running = 0;
571
+ pthread_join(g_profiler.timer_thread, NULL);
572
+
573
+ if (g_profiler.thread_hook) {
574
+ rb_internal_thread_remove_event_hook(g_profiler.thread_hook);
575
+ g_profiler.thread_hook = NULL;
576
+ }
577
+
578
+ /* Remove GC event hook */
579
+ rb_remove_event_hook(sperf_gc_event_hook);
580
+
581
+ /* Clean up thread-specific data for all live threads */
582
+ {
583
+ VALUE threads = rb_funcall(rb_cThread, rb_intern("list"), 0);
584
+ long tc = RARRAY_LEN(threads);
585
+ long ti;
586
+ for (ti = 0; ti < tc; ti++) {
587
+ VALUE thread = RARRAY_AREF(threads, ti);
588
+ sperf_thread_data_t *td = (sperf_thread_data_t *)rb_internal_thread_specific_get(thread, g_profiler.ts_key);
589
+ if (td) {
590
+ free(td);
591
+ rb_internal_thread_specific_set(thread, g_profiler.ts_key, NULL);
592
+ }
593
+ }
594
+ }
595
+
596
+ /* Build result hash */
597
+ result = rb_hash_new();
598
+
599
+ /* mode */
600
+ rb_hash_aset(result, ID2SYM(rb_intern("mode")),
601
+ ID2SYM(rb_intern(g_profiler.mode == 1 ? "wall" : "cpu")));
602
+
603
+ /* frequency */
604
+ rb_hash_aset(result, ID2SYM(rb_intern("frequency")), INT2NUM(g_profiler.frequency));
605
+
606
+ /* sampling_count, sampling_time_ns */
607
+ rb_hash_aset(result, ID2SYM(rb_intern("sampling_count")), SIZET2NUM(g_profiler.sampling_count));
608
+ rb_hash_aset(result, ID2SYM(rb_intern("sampling_time_ns")), LONG2NUM(g_profiler.sampling_total_ns));
609
+
610
+ /* samples: array of [frames_array, weight]
611
+ * Each frame is [path_string, label_string]
612
+ * GVL blocked/wait samples get synthetic frame prepended (leaf position) */
613
+ samples_ary = rb_ary_new_capa((long)g_profiler.sample_count);
614
+ for (i = 0; i < g_profiler.sample_count; i++) {
615
+ sperf_sample_t *s = &g_profiler.samples[i];
616
+ VALUE frames = rb_ary_new_capa(s->depth + 1);
617
+
618
+ /* Prepend synthetic frame at leaf position (index 0) */
619
+ if (s->type == SPERF_SAMPLE_GVL_BLOCKED) {
620
+ VALUE syn = rb_ary_new3(2, rb_str_new_lit("<GVL>"), rb_str_new_lit("[GVL blocked]"));
621
+ rb_ary_push(frames, syn);
622
+ } else if (s->type == SPERF_SAMPLE_GVL_WAIT) {
623
+ VALUE syn = rb_ary_new3(2, rb_str_new_lit("<GVL>"), rb_str_new_lit("[GVL wait]"));
624
+ rb_ary_push(frames, syn);
625
+ } else if (s->type == SPERF_SAMPLE_GC_MARKING) {
626
+ VALUE syn = rb_ary_new3(2, rb_str_new_lit("<GC>"), rb_str_new_lit("[GC marking]"));
627
+ rb_ary_push(frames, syn);
628
+ } else if (s->type == SPERF_SAMPLE_GC_SWEEPING) {
629
+ VALUE syn = rb_ary_new3(2, rb_str_new_lit("<GC>"), rb_str_new_lit("[GC sweeping]"));
630
+ rb_ary_push(frames, syn);
631
+ }
632
+
633
+ for (j = 0; j < s->depth; j++) {
634
+ VALUE fval = g_profiler.frame_pool[s->frame_start + j];
635
+ rb_ary_push(frames, sperf_resolve_frame(fval));
636
+ }
637
+
638
+ VALUE sample = rb_ary_new3(2, frames, LONG2NUM(s->weight));
639
+ rb_ary_push(samples_ary, sample);
640
+ }
641
+ rb_hash_aset(result, ID2SYM(rb_intern("samples")), samples_ary);
642
+
643
+ /* Cleanup */
644
+ free(g_profiler.samples);
645
+ g_profiler.samples = NULL;
646
+ free(g_profiler.frame_pool);
647
+ g_profiler.frame_pool = NULL;
648
+ g_profiler.frame_pool_count = 0;
649
+
650
+ return result;
651
+ }
652
+
653
+ /* ---- Fork safety ---- */
654
+
655
+ static void
656
+ sperf_after_fork_child(void)
657
+ {
658
+ if (!g_profiler.running) return;
659
+
660
+ /* Mark as not running — timer thread doesn't exist in child */
661
+ g_profiler.running = 0;
662
+
663
+ /* Remove hooks so they don't fire with stale state */
664
+ if (g_profiler.thread_hook) {
665
+ rb_internal_thread_remove_event_hook(g_profiler.thread_hook);
666
+ g_profiler.thread_hook = NULL;
667
+ }
668
+ rb_remove_event_hook(sperf_gc_event_hook);
669
+
670
+ /* Free sample buffer and frame pool — these hold parent's data */
671
+ free(g_profiler.samples);
672
+ g_profiler.samples = NULL;
673
+ g_profiler.sample_count = 0;
674
+ g_profiler.sample_capacity = 0;
675
+
676
+ free(g_profiler.frame_pool);
677
+ g_profiler.frame_pool = NULL;
678
+ g_profiler.frame_pool_count = 0;
679
+ g_profiler.frame_pool_capacity = 0;
680
+
681
+ /* Reset GC state */
682
+ g_profiler.gc_phase = 0;
683
+
684
+ /* Reset stats */
685
+ g_profiler.sampling_count = 0;
686
+ g_profiler.sampling_total_ns = 0;
687
+ }
688
+
689
+ /* ---- Init ---- */
690
+
691
+ void
692
+ Init_sperf(void)
693
+ {
694
+ VALUE mSperf = rb_define_module("Sperf");
695
+ rb_define_module_function(mSperf, "_c_start", rb_sperf_start, -1);
696
+ rb_define_module_function(mSperf, "_c_stop", rb_sperf_stop, 0);
697
+
698
+ memset(&g_profiler, 0, sizeof(g_profiler));
699
+ g_profiler.pj_handle = rb_postponed_job_preregister(0, sperf_sample_job, &g_profiler);
700
+ g_profiler.ts_key = rb_internal_thread_specific_key_create();
701
+
702
+ /* TypedData wrapper for GC marking of frame_pool */
703
+ g_profiler_wrapper = TypedData_Wrap_Struct(rb_cObject, &sperf_profiler_type, &g_profiler);
704
+ rb_gc_register_address(&g_profiler_wrapper);
705
+
706
+ /* Fork safety: silently stop profiling in child process */
707
+ pthread_atfork(NULL, NULL, sperf_after_fork_child);
708
+ }