rperf 0.5.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/rperf/rperf.c CHANGED
@@ -36,6 +36,7 @@
36
36
  #define RPERF_FRAME_TABLE_OLD_KEYS_INITIAL 16
37
37
  #define RPERF_AGG_TABLE_INITIAL 1024
38
38
  #define RPERF_STACK_POOL_INITIAL 4096
39
+ #define RPERF_PAUSED(prof) ((prof)->profile_refcount == 0)
39
40
 
40
41
  /* Synthetic frame IDs (reserved in frame_table, 0-based) */
41
42
  #define RPERF_SYNTHETIC_GVL_BLOCKED 0
@@ -66,6 +67,7 @@ typedef struct rperf_sample {
66
67
  int64_t weight;
67
68
  int type; /* rperf_sample_type */
68
69
  int thread_seq; /* thread sequence number (1-based) */
70
+ int label_set_id; /* label set ID (0 = no labels) */
69
71
  } rperf_sample_t;
70
72
 
71
73
  /* ---- Sample buffer (double-buffered) ---- */
@@ -103,6 +105,7 @@ typedef struct rperf_agg_entry {
103
105
  uint32_t frame_start; /* offset into stack_pool */
104
106
  int depth; /* includes synthetic frame */
105
107
  int thread_seq;
108
+ int label_set_id; /* label set ID (0 = no labels) */
106
109
  int64_t weight; /* accumulated */
107
110
  uint32_t hash; /* cached hash value */
108
111
  int used; /* 0 = empty, 1 = used */
@@ -124,6 +127,7 @@ typedef struct rperf_thread_data {
124
127
  int64_t suspended_at_ns; /* wall time at SUSPENDED */
125
128
  int64_t ready_at_ns; /* wall time at READY */
126
129
  int thread_seq; /* thread sequence number (1-based) */
130
+ int label_set_id; /* current label set ID (0 = no labels) */
127
131
  } rperf_thread_data_t;
128
132
 
129
133
  /* ---- GC tracking state ---- */
@@ -132,6 +136,7 @@ typedef struct rperf_gc_state {
132
136
  int phase; /* rperf_gc_phase */
133
137
  int64_t enter_ns; /* wall time at GC_ENTER */
134
138
  int thread_seq; /* thread_seq at GC_ENTER */
139
+ int label_set_id; /* label_set_id at GC_ENTER */
135
140
  } rperf_gc_state_t;
136
141
 
137
142
  /* ---- Sampling overhead stats ---- */
@@ -175,6 +180,14 @@ typedef struct rperf_profiler {
175
180
  int next_thread_seq;
176
181
  /* Sampling overhead stats */
177
182
  rperf_stats_t stats;
183
+ /* Label sets: Ruby Array of Hash objects, managed from Ruby side.
184
+ * Index 0 is reserved (no labels). GC-marked via profiler_mark. */
185
+ VALUE label_sets; /* Ruby Array or Qnil */
186
+ /* Profile refcount: controls timer active/paused state.
187
+ * start(defer:false) sets to 1, start(defer:true) sets to 0.
188
+ * profile_inc/dec transitions 0↔1 arm/disarm the timer.
189
+ * Modified only under GVL, so plain int is safe. */
190
+ int profile_refcount;
178
191
  } rperf_profiler_t;
179
192
 
180
193
  static rperf_profiler_t g_profiler;
@@ -195,6 +208,10 @@ rperf_profiler_mark(void *ptr)
195
208
  buf->frame_pool + buf->frame_pool_count);
196
209
  }
197
210
  }
211
+ /* Mark label_sets array */
212
+ if (prof->label_sets != Qnil) {
213
+ rb_gc_mark(prof->label_sets);
214
+ }
198
215
  /* Mark frame_table keys (unique frame VALUEs).
199
216
  * Acquire count to synchronize with the release-store in insert,
200
217
  * ensuring we see the keys pointer that is valid for [0, count).
@@ -431,7 +448,7 @@ rperf_frame_table_insert(rperf_frame_table_t *ft, VALUE fval)
431
448
  /* ---- Aggregation table operations (all malloc-based, no GVL needed) ---- */
432
449
 
433
450
  static uint32_t
434
- rperf_fnv1a_u32(const uint32_t *data, int len, int thread_seq)
451
+ rperf_fnv1a_u32(const uint32_t *data, int len, int thread_seq, int label_set_id)
435
452
  {
436
453
  uint32_t h = 2166136261u;
437
454
  int i;
@@ -441,6 +458,8 @@ rperf_fnv1a_u32(const uint32_t *data, int len, int thread_seq)
441
458
  }
442
459
  h ^= (uint32_t)thread_seq;
443
460
  h *= 16777619u;
461
+ h ^= (uint32_t)label_set_id;
462
+ h *= 16777619u;
444
463
  return h;
445
464
  }
446
465
 
@@ -506,7 +525,8 @@ rperf_agg_ensure_stack_pool(rperf_agg_table_t *at, int needed)
506
525
  /* Insert or merge a stack into the aggregation table */
507
526
  static void
508
527
  rperf_agg_table_insert(rperf_agg_table_t *at, const uint32_t *frame_ids,
509
- int depth, int thread_seq, int64_t weight, uint32_t hash)
528
+ int depth, int thread_seq, int label_set_id,
529
+ int64_t weight, uint32_t hash)
510
530
  {
511
531
  size_t idx = hash % at->bucket_capacity;
512
532
 
@@ -514,6 +534,7 @@ rperf_agg_table_insert(rperf_agg_table_t *at, const uint32_t *frame_ids,
514
534
  rperf_agg_entry_t *e = &at->buckets[idx];
515
535
  if (!e->used) break;
516
536
  if (e->hash == hash && e->depth == depth && e->thread_seq == thread_seq &&
537
+ e->label_set_id == label_set_id &&
517
538
  memcmp(at->stack_pool + e->frame_start, frame_ids,
518
539
  depth * sizeof(uint32_t)) == 0) {
519
540
  /* Match — merge weight */
@@ -530,6 +551,7 @@ rperf_agg_table_insert(rperf_agg_table_t *at, const uint32_t *frame_ids,
530
551
  e->frame_start = (uint32_t)at->stack_pool_count;
531
552
  e->depth = depth;
532
553
  e->thread_seq = thread_seq;
554
+ e->label_set_id = label_set_id;
533
555
  e->weight = weight;
534
556
  e->hash = hash;
535
557
  e->used = 1;
@@ -581,10 +603,10 @@ rperf_aggregate_buffer(rperf_profiler_t *prof, rperf_sample_buffer_t *buf)
581
603
  if (overflow) break; /* frame_table full, stop aggregating this buffer */
582
604
 
583
605
  int total_depth = off + s->depth;
584
- hash = rperf_fnv1a_u32(temp_ids, total_depth, s->thread_seq);
606
+ hash = rperf_fnv1a_u32(temp_ids, total_depth, s->thread_seq, s->label_set_id);
585
607
 
586
608
  rperf_agg_table_insert(&prof->agg_table, temp_ids, total_depth,
587
- s->thread_seq, s->weight, hash);
609
+ s->thread_seq, s->label_set_id, s->weight, hash);
588
610
  }
589
611
 
590
612
  /* Reset buffer for reuse.
@@ -634,7 +656,7 @@ rperf_try_swap(rperf_profiler_t *prof)
634
656
  /* Write a sample into a specific buffer. No swap check. */
635
657
  static int
636
658
  rperf_write_sample(rperf_sample_buffer_t *buf, size_t frame_start, int depth,
637
- int64_t weight, int type, int thread_seq)
659
+ int64_t weight, int type, int thread_seq, int label_set_id)
638
660
  {
639
661
  if (weight <= 0) return 0;
640
662
  if (rperf_ensure_sample_capacity(buf) < 0) return -1;
@@ -645,16 +667,17 @@ rperf_write_sample(rperf_sample_buffer_t *buf, size_t frame_start, int depth,
645
667
  sample->weight = weight;
646
668
  sample->type = type;
647
669
  sample->thread_seq = thread_seq;
670
+ sample->label_set_id = label_set_id;
648
671
  buf->sample_count++;
649
672
  return 0;
650
673
  }
651
674
 
652
675
  static void
653
676
  rperf_record_sample(rperf_profiler_t *prof, size_t frame_start, int depth,
654
- int64_t weight, int type, int thread_seq)
677
+ int64_t weight, int type, int thread_seq, int label_set_id)
655
678
  {
656
679
  rperf_sample_buffer_t *buf = &prof->buffers[atomic_load_explicit(&prof->active_idx, memory_order_relaxed)];
657
- rperf_write_sample(buf, frame_start, depth, weight, type, thread_seq);
680
+ rperf_write_sample(buf, frame_start, depth, weight, type, thread_seq, label_set_id);
658
681
  rperf_try_swap(prof);
659
682
  }
660
683
 
@@ -676,12 +699,11 @@ rperf_thread_data_create(rperf_profiler_t *prof, VALUE thread)
676
699
  /* ---- Thread event hooks ---- */
677
700
 
678
701
  static void
679
- rperf_handle_suspended(rperf_profiler_t *prof, VALUE thread)
702
+ rperf_handle_suspended(rperf_profiler_t *prof, VALUE thread, rperf_thread_data_t *td)
680
703
  {
681
704
  /* Has GVL — safe to call Ruby APIs */
682
705
  int64_t wall_now = rperf_wall_time_ns();
683
706
 
684
- rperf_thread_data_t *td = (rperf_thread_data_t *)rb_internal_thread_specific_get(thread, prof->ts_key);
685
707
  int is_first = 0;
686
708
 
687
709
  if (td == NULL) {
@@ -702,10 +724,10 @@ rperf_handle_suspended(rperf_profiler_t *prof, VALUE thread)
702
724
  if (depth <= 0) return;
703
725
  buf->frame_pool_count += depth;
704
726
 
705
- /* Record normal sample (skip if first time — no prev_time) */
706
- if (!is_first) {
727
+ /* Record normal sample (skip if first time — no prev_time, or if paused) */
728
+ if (!is_first && !RPERF_PAUSED(prof)) {
707
729
  int64_t weight = time_now - td->prev_time_ns;
708
- rperf_record_sample(prof, frame_start, depth, weight, RPERF_SAMPLE_NORMAL, td->thread_seq);
730
+ rperf_record_sample(prof, frame_start, depth, weight, RPERF_SAMPLE_NORMAL, td->thread_seq, td->label_set_id);
709
731
  }
710
732
 
711
733
  /* Save timestamp for READY/RESUMED */
@@ -715,21 +737,18 @@ rperf_handle_suspended(rperf_profiler_t *prof, VALUE thread)
715
737
  }
716
738
 
717
739
  static void
718
- rperf_handle_ready(rperf_profiler_t *prof, VALUE thread)
740
+ rperf_handle_ready(rperf_thread_data_t *td)
719
741
  {
720
742
  /* May NOT have GVL — only simple C operations allowed */
721
- rperf_thread_data_t *td = (rperf_thread_data_t *)rb_internal_thread_specific_get(thread, prof->ts_key);
722
743
  if (!td) return;
723
744
 
724
745
  td->ready_at_ns = rperf_wall_time_ns();
725
746
  }
726
747
 
727
748
  static void
728
- rperf_handle_resumed(rperf_profiler_t *prof, VALUE thread)
749
+ rperf_handle_resumed(rperf_profiler_t *prof, VALUE thread, rperf_thread_data_t *td)
729
750
  {
730
751
  /* Has GVL */
731
- rperf_thread_data_t *td = (rperf_thread_data_t *)rb_internal_thread_specific_get(thread, prof->ts_key);
732
-
733
752
  if (td == NULL) {
734
753
  td = rperf_thread_data_create(prof, thread);
735
754
  if (!td) return;
@@ -745,7 +764,7 @@ rperf_handle_resumed(rperf_profiler_t *prof, VALUE thread)
745
764
  * Both samples are written directly into the same buffer before calling
746
765
  * rperf_try_swap, so that a swap triggered by the first sample cannot
747
766
  * move the second into a different buffer with a stale frame_start. */
748
- if (prof->mode == 1 && td->suspended_at_ns > 0) {
767
+ if (prof->mode == 1 && td->suspended_at_ns > 0 && !RPERF_PAUSED(prof)) {
749
768
  rperf_sample_buffer_t *buf = &prof->buffers[atomic_load_explicit(&prof->active_idx, memory_order_relaxed)];
750
769
  if (rperf_ensure_frame_pool_capacity(buf, RPERF_MAX_STACK_DEPTH) < 0) goto skip_gvl;
751
770
  size_t frame_start = buf->frame_pool_count;
@@ -758,12 +777,12 @@ rperf_handle_resumed(rperf_profiler_t *prof, VALUE thread)
758
777
  if (td->ready_at_ns > 0 && td->ready_at_ns > td->suspended_at_ns) {
759
778
  int64_t blocked_ns = td->ready_at_ns - td->suspended_at_ns;
760
779
  rperf_write_sample(buf, frame_start, depth, blocked_ns,
761
- RPERF_SAMPLE_GVL_BLOCKED, td->thread_seq);
780
+ RPERF_SAMPLE_GVL_BLOCKED, td->thread_seq, td->label_set_id);
762
781
  }
763
782
  if (td->ready_at_ns > 0 && wall_now > td->ready_at_ns) {
764
783
  int64_t wait_ns = wall_now - td->ready_at_ns;
765
784
  rperf_write_sample(buf, frame_start, depth, wait_ns,
766
- RPERF_SAMPLE_GVL_WAIT, td->thread_seq);
785
+ RPERF_SAMPLE_GVL_WAIT, td->thread_seq, td->label_set_id);
767
786
  }
768
787
 
769
788
  rperf_try_swap(prof);
@@ -781,9 +800,8 @@ skip_gvl:
781
800
  }
782
801
 
783
802
  static void
784
- rperf_handle_exited(rperf_profiler_t *prof, VALUE thread)
803
+ rperf_handle_exited(rperf_profiler_t *prof, VALUE thread, rperf_thread_data_t *td)
785
804
  {
786
- rperf_thread_data_t *td = (rperf_thread_data_t *)rb_internal_thread_specific_get(thread, prof->ts_key);
787
805
  if (td) {
788
806
  free(td);
789
807
  rb_internal_thread_specific_set(thread, prof->ts_key, NULL);
@@ -797,15 +815,16 @@ rperf_thread_event_hook(rb_event_flag_t event, const rb_internal_thread_event_da
797
815
  if (!prof->running) return;
798
816
 
799
817
  VALUE thread = data->thread;
818
+ rperf_thread_data_t *td = (rperf_thread_data_t *)rb_internal_thread_specific_get(thread, prof->ts_key);
800
819
 
801
820
  if (event & RUBY_INTERNAL_THREAD_EVENT_SUSPENDED)
802
- rperf_handle_suspended(prof, thread);
821
+ rperf_handle_suspended(prof, thread, td);
803
822
  else if (event & RUBY_INTERNAL_THREAD_EVENT_READY)
804
- rperf_handle_ready(prof, thread);
823
+ rperf_handle_ready(td);
805
824
  else if (event & RUBY_INTERNAL_THREAD_EVENT_RESUMED)
806
- rperf_handle_resumed(prof, thread);
825
+ rperf_handle_resumed(prof, thread, td);
807
826
  else if (event & RUBY_INTERNAL_THREAD_EVENT_EXITED)
808
- rperf_handle_exited(prof, thread);
827
+ rperf_handle_exited(prof, thread, td);
809
828
  }
810
829
 
811
830
  /* ---- GC event hook ---- */
@@ -826,17 +845,19 @@ rperf_gc_event_hook(rb_event_flag_t event, VALUE data, VALUE self, ID id, VALUE
826
845
  prof->gc.phase = RPERF_GC_NONE;
827
846
  }
828
847
  else if (event & RUBY_INTERNAL_EVENT_GC_ENTER) {
829
- /* Save timestamp and thread_seq; backtrace is captured at GC_EXIT
848
+ /* Save timestamp, thread_seq, and label_set_id; backtrace is captured at GC_EXIT
830
849
  * to avoid buffer mismatch after a double-buffer swap. */
831
850
  prof->gc.enter_ns = rperf_wall_time_ns();
832
851
  {
833
852
  VALUE thread = rb_thread_current();
834
853
  rperf_thread_data_t *td = (rperf_thread_data_t *)rb_internal_thread_specific_get(thread, prof->ts_key);
835
854
  prof->gc.thread_seq = td ? td->thread_seq : 0;
855
+ prof->gc.label_set_id = td ? td->label_set_id : 0;
836
856
  }
837
857
  }
838
858
  else if (event & RUBY_INTERNAL_EVENT_GC_EXIT) {
839
859
  if (prof->gc.enter_ns <= 0) return;
860
+ if (RPERF_PAUSED(prof)) { prof->gc.enter_ns = 0; return; }
840
861
 
841
862
  int64_t wall_now = rperf_wall_time_ns();
842
863
  int64_t weight = wall_now - prof->gc.enter_ns;
@@ -861,7 +882,7 @@ rperf_gc_event_hook(rb_event_flag_t event, VALUE data, VALUE self, ID id, VALUE
861
882
  }
862
883
  buf->frame_pool_count += depth;
863
884
 
864
- rperf_record_sample(prof, frame_start, depth, weight, type, prof->gc.thread_seq);
885
+ rperf_record_sample(prof, frame_start, depth, weight, type, prof->gc.thread_seq, prof->gc.label_set_id);
865
886
  prof->gc.enter_ns = 0;
866
887
  }
867
888
  }
@@ -874,6 +895,7 @@ rperf_sample_job(void *arg)
874
895
  rperf_profiler_t *prof = (rperf_profiler_t *)arg;
875
896
 
876
897
  if (!prof->running) return;
898
+ if (RPERF_PAUSED(prof)) return;
877
899
 
878
900
  /* Measure sampling overhead */
879
901
  struct timespec ts_start, ts_end;
@@ -908,7 +930,7 @@ rperf_sample_job(void *arg)
908
930
  if (depth <= 0) return;
909
931
  buf->frame_pool_count += depth;
910
932
 
911
- rperf_record_sample(prof, frame_start, depth, weight, RPERF_SAMPLE_NORMAL, td->thread_seq);
933
+ rperf_record_sample(prof, frame_start, depth, weight, RPERF_SAMPLE_NORMAL, td->thread_seq, td->label_set_id);
912
934
 
913
935
  clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts_end);
914
936
  prof->stats.sampling_count++;
@@ -971,20 +993,32 @@ rperf_worker_nanosleep_func(void *arg)
971
993
 
972
994
  CHECKED(pthread_mutex_lock(&prof->worker_mutex));
973
995
  while (prof->running) {
974
- int ret = pthread_cond_timedwait(&prof->worker_cond, &prof->worker_mutex, &deadline);
975
- if (ret != 0 && ret != ETIMEDOUT) {
976
- fprintf(stderr, "rperf: pthread_cond_timedwait failed: %s\n", strerror(ret));
977
- abort();
978
- }
979
- if (ret == ETIMEDOUT) {
980
- prof->stats.trigger_count++;
981
- rb_postponed_job_trigger(prof->pj_handle);
982
- /* Advance deadline by interval */
996
+ if (RPERF_PAUSED(prof)) {
997
+ /* Paused: wait indefinitely until signaled (resume or stop) */
998
+ CHECKED(pthread_cond_wait(&prof->worker_cond, &prof->worker_mutex));
999
+ /* Reset deadline on wake to avoid burst of catch-up triggers */
1000
+ clock_gettime(CLOCK_REALTIME, &deadline);
983
1001
  deadline.tv_nsec += interval_ns;
984
1002
  if (deadline.tv_nsec >= 1000000000L) {
985
1003
  deadline.tv_sec++;
986
1004
  deadline.tv_nsec -= 1000000000L;
987
1005
  }
1006
+ } else {
1007
+ int ret = pthread_cond_timedwait(&prof->worker_cond, &prof->worker_mutex, &deadline);
1008
+ if (ret != 0 && ret != ETIMEDOUT) {
1009
+ fprintf(stderr, "rperf: pthread_cond_timedwait failed: %s\n", strerror(ret));
1010
+ abort();
1011
+ }
1012
+ if (ret == ETIMEDOUT) {
1013
+ prof->stats.trigger_count++;
1014
+ rb_postponed_job_trigger(prof->pj_handle);
1015
+ /* Advance deadline by interval */
1016
+ deadline.tv_nsec += interval_ns;
1017
+ if (deadline.tv_nsec >= 1000000000L) {
1018
+ deadline.tv_sec++;
1019
+ deadline.tv_nsec -= 1000000000L;
1020
+ }
1021
+ }
988
1022
  }
989
1023
  rperf_try_aggregate(prof);
990
1024
  }
@@ -1006,16 +1040,105 @@ rperf_resolve_frame(VALUE fval)
1006
1040
  return rb_ary_new3(2, path, label);
1007
1041
  }
1008
1042
 
1043
+ /* ---- Shared helpers for stop/snapshot ---- */
1044
+
1045
+ /* Flush pending sample buffers into agg_table.
1046
+ * Caller must ensure no concurrent access (worker joined or mutex held). */
1047
+ static void
1048
+ rperf_flush_buffers(rperf_profiler_t *prof)
1049
+ {
1050
+ int cur_idx = atomic_load_explicit(&prof->active_idx, memory_order_acquire);
1051
+ if (atomic_load_explicit(&prof->swap_ready, memory_order_acquire)) {
1052
+ int standby_idx = cur_idx ^ 1;
1053
+ rperf_aggregate_buffer(prof, &prof->buffers[standby_idx]);
1054
+ atomic_store_explicit(&prof->swap_ready, 0, memory_order_release);
1055
+ }
1056
+ rperf_aggregate_buffer(prof, &prof->buffers[cur_idx]);
1057
+ }
1058
+
1059
+ /* Build result hash from aggregated data (agg_table + frame_table).
1060
+ * Does NOT free any resources. Caller must hold GVL. */
1061
+ static VALUE
1062
+ rperf_build_aggregated_result(rperf_profiler_t *prof)
1063
+ {
1064
+ VALUE result, samples_ary;
1065
+ size_t i;
1066
+ int j;
1067
+
1068
+ result = rb_hash_new();
1069
+
1070
+ rb_hash_aset(result, ID2SYM(rb_intern("mode")),
1071
+ ID2SYM(rb_intern(prof->mode == 1 ? "wall" : "cpu")));
1072
+ rb_hash_aset(result, ID2SYM(rb_intern("frequency")), INT2NUM(prof->frequency));
1073
+ rb_hash_aset(result, ID2SYM(rb_intern("trigger_count")), SIZET2NUM(prof->stats.trigger_count));
1074
+ rb_hash_aset(result, ID2SYM(rb_intern("sampling_count")), SIZET2NUM(prof->stats.sampling_count));
1075
+ rb_hash_aset(result, ID2SYM(rb_intern("sampling_time_ns")), LONG2NUM(prof->stats.sampling_total_ns));
1076
+ rb_hash_aset(result, ID2SYM(rb_intern("detected_thread_count")), INT2NUM(prof->next_thread_seq));
1077
+ rb_hash_aset(result, ID2SYM(rb_intern("unique_frames")),
1078
+ SIZET2NUM(prof->frame_table.count - RPERF_SYNTHETIC_COUNT));
1079
+ rb_hash_aset(result, ID2SYM(rb_intern("unique_stacks")),
1080
+ SIZET2NUM(prof->agg_table.count));
1081
+
1082
+ {
1083
+ struct timespec now_monotonic;
1084
+ int64_t start_ns, duration_ns;
1085
+ clock_gettime(CLOCK_MONOTONIC, &now_monotonic);
1086
+ start_ns = (int64_t)prof->start_realtime.tv_sec * 1000000000LL
1087
+ + (int64_t)prof->start_realtime.tv_nsec;
1088
+ duration_ns = ((int64_t)now_monotonic.tv_sec - (int64_t)prof->start_monotonic.tv_sec) * 1000000000LL
1089
+ + ((int64_t)now_monotonic.tv_nsec - (int64_t)prof->start_monotonic.tv_nsec);
1090
+ rb_hash_aset(result, ID2SYM(rb_intern("start_time_ns")), LONG2NUM(start_ns));
1091
+ rb_hash_aset(result, ID2SYM(rb_intern("duration_ns")), LONG2NUM(duration_ns));
1092
+ }
1093
+
1094
+ {
1095
+ rperf_frame_table_t *ft = &prof->frame_table;
1096
+ VALUE resolved_ary = rb_ary_new_capa((long)ft->count);
1097
+ rb_ary_push(resolved_ary, rb_ary_new3(2, rb_str_new_lit("<GVL>"), rb_str_new_lit("[GVL blocked]")));
1098
+ rb_ary_push(resolved_ary, rb_ary_new3(2, rb_str_new_lit("<GVL>"), rb_str_new_lit("[GVL wait]")));
1099
+ rb_ary_push(resolved_ary, rb_ary_new3(2, rb_str_new_lit("<GC>"), rb_str_new_lit("[GC marking]")));
1100
+ rb_ary_push(resolved_ary, rb_ary_new3(2, rb_str_new_lit("<GC>"), rb_str_new_lit("[GC sweeping]")));
1101
+ for (i = RPERF_SYNTHETIC_COUNT; i < ft->count; i++) {
1102
+ rb_ary_push(resolved_ary, rperf_resolve_frame(atomic_load_explicit(&ft->keys, memory_order_relaxed)[i]));
1103
+ }
1104
+
1105
+ rperf_agg_table_t *at = &prof->agg_table;
1106
+ samples_ary = rb_ary_new();
1107
+ for (i = 0; i < at->bucket_capacity; i++) {
1108
+ rperf_agg_entry_t *e = &at->buckets[i];
1109
+ if (!e->used) continue;
1110
+
1111
+ VALUE frames = rb_ary_new_capa(e->depth);
1112
+ for (j = 0; j < e->depth; j++) {
1113
+ uint32_t fid = at->stack_pool[e->frame_start + j];
1114
+ rb_ary_push(frames, RARRAY_AREF(resolved_ary, fid));
1115
+ }
1116
+
1117
+ VALUE sample = rb_ary_new3(4, frames, LONG2NUM(e->weight), INT2NUM(e->thread_seq), INT2NUM(e->label_set_id));
1118
+ rb_ary_push(samples_ary, sample);
1119
+ }
1120
+ }
1121
+
1122
+ rb_hash_aset(result, ID2SYM(rb_intern("aggregated_samples")), samples_ary);
1123
+
1124
+ if (prof->label_sets != Qnil) {
1125
+ rb_hash_aset(result, ID2SYM(rb_intern("label_sets")), prof->label_sets);
1126
+ }
1127
+
1128
+ return result;
1129
+ }
1130
+
1009
1131
  /* ---- Ruby API ---- */
1010
1132
 
1011
- /* _c_start(frequency, mode, aggregate, signal)
1133
+ /* _c_start(frequency, mode, aggregate, signal, defer)
1012
1134
  * frequency: Integer (Hz)
1013
1135
  * mode: 0 = cpu, 1 = wall
1014
1136
  * aggregate: 0 or 1
1015
1137
  * signal: Integer (RT signal number, 0 = nanosleep, -1 = default)
1138
+ * defer: if truthy, start with timer paused (profile_refcount = 0)
1016
1139
  */
1017
1140
  static VALUE
1018
- rb_rperf_start(VALUE self, VALUE vfreq, VALUE vmode, VALUE vagg, VALUE vsig)
1141
+ rb_rperf_start(VALUE self, VALUE vfreq, VALUE vmode, VALUE vagg, VALUE vsig, VALUE vdefer)
1019
1142
  {
1020
1143
  int frequency = NUM2INT(vfreq);
1021
1144
  int mode = NUM2INT(vmode);
@@ -1038,6 +1161,7 @@ rb_rperf_start(VALUE self, VALUE vfreq, VALUE vmode, VALUE vagg, VALUE vsig)
1038
1161
  g_profiler.stats.trigger_count = 0;
1039
1162
  atomic_store_explicit(&g_profiler.active_idx, 0, memory_order_relaxed);
1040
1163
  atomic_store_explicit(&g_profiler.swap_ready, 0, memory_order_relaxed);
1164
+ g_profiler.label_sets = Qnil;
1041
1165
 
1042
1166
  /* Initialize worker mutex/cond */
1043
1167
  CHECKED(pthread_mutex_init(&g_profiler.worker_mutex, NULL));
@@ -1119,6 +1243,7 @@ rb_rperf_start(VALUE self, VALUE vfreq, VALUE vmode, VALUE vagg, VALUE vsig)
1119
1243
  clock_gettime(CLOCK_MONOTONIC, &g_profiler.start_monotonic);
1120
1244
 
1121
1245
  g_profiler.running = 1;
1246
+ g_profiler.profile_refcount = RTEST(vdefer) ? 0 : 1;
1122
1247
 
1123
1248
  #if RPERF_USE_TIMER_SIGNAL
1124
1249
  g_profiler.timer_signal = timer_signal;
@@ -1166,7 +1291,12 @@ rb_rperf_start(VALUE self, VALUE vfreq, VALUE vmode, VALUE vagg, VALUE vsig)
1166
1291
  }
1167
1292
 
1168
1293
  its.it_value.tv_sec = 0;
1169
- its.it_value.tv_nsec = 1000000000L / g_profiler.frequency;
1294
+ if (RPERF_PAUSED(&g_profiler)) {
1295
+ /* defer mode: create timer but don't arm it */
1296
+ its.it_value.tv_nsec = 0;
1297
+ } else {
1298
+ its.it_value.tv_nsec = 1000000000L / g_profiler.frequency;
1299
+ }
1170
1300
  its.it_interval = its.it_value;
1171
1301
  if (timer_settime(g_profiler.timer_id, 0, &its, NULL) != 0) {
1172
1302
  timer_delete(g_profiler.timer_id);
@@ -1259,15 +1389,8 @@ rb_rperf_stop(VALUE self)
1259
1389
  rb_remove_event_hook(rperf_gc_event_hook);
1260
1390
 
1261
1391
  if (g_profiler.aggregate) {
1262
- /* Worker thread is joined; no concurrent access to these atomics. */
1263
- int cur_idx = atomic_load_explicit(&g_profiler.active_idx, memory_order_relaxed);
1264
- /* Aggregate remaining samples from both buffers */
1265
- if (atomic_load_explicit(&g_profiler.swap_ready, memory_order_relaxed)) {
1266
- int standby_idx = cur_idx ^ 1;
1267
- rperf_aggregate_buffer(&g_profiler, &g_profiler.buffers[standby_idx]);
1268
- atomic_store_explicit(&g_profiler.swap_ready, 0, memory_order_relaxed);
1269
- }
1270
- rperf_aggregate_buffer(&g_profiler, &g_profiler.buffers[cur_idx]);
1392
+ /* Worker thread is joined; no concurrent access. */
1393
+ rperf_flush_buffers(&g_profiler);
1271
1394
  }
1272
1395
 
1273
1396
  /* Clean up thread-specific data for all live threads */
@@ -1285,73 +1408,8 @@ rb_rperf_stop(VALUE self)
1285
1408
  }
1286
1409
  }
1287
1410
 
1288
- /* Build result hash */
1289
- result = rb_hash_new();
1290
-
1291
- /* mode */
1292
- rb_hash_aset(result, ID2SYM(rb_intern("mode")),
1293
- ID2SYM(rb_intern(g_profiler.mode == 1 ? "wall" : "cpu")));
1294
-
1295
- /* frequency */
1296
- rb_hash_aset(result, ID2SYM(rb_intern("frequency")), INT2NUM(g_profiler.frequency));
1297
-
1298
- /* trigger_count, sampling_count, sampling_time_ns, detected_thread_count */
1299
- rb_hash_aset(result, ID2SYM(rb_intern("trigger_count")), SIZET2NUM(g_profiler.stats.trigger_count));
1300
- rb_hash_aset(result, ID2SYM(rb_intern("sampling_count")), SIZET2NUM(g_profiler.stats.sampling_count));
1301
- rb_hash_aset(result, ID2SYM(rb_intern("sampling_time_ns")), LONG2NUM(g_profiler.stats.sampling_total_ns));
1302
- rb_hash_aset(result, ID2SYM(rb_intern("detected_thread_count")), INT2NUM(g_profiler.next_thread_seq));
1303
-
1304
- /* aggregation stats */
1305
- if (g_profiler.aggregate) {
1306
- rb_hash_aset(result, ID2SYM(rb_intern("unique_frames")),
1307
- SIZET2NUM(g_profiler.frame_table.count - RPERF_SYNTHETIC_COUNT));
1308
- rb_hash_aset(result, ID2SYM(rb_intern("unique_stacks")),
1309
- SIZET2NUM(g_profiler.agg_table.count));
1310
- }
1311
-
1312
- /* start_time_ns (CLOCK_REALTIME epoch nanos), duration_ns (CLOCK_MONOTONIC delta) */
1313
- {
1314
- struct timespec stop_monotonic;
1315
- int64_t start_ns, duration_ns;
1316
- clock_gettime(CLOCK_MONOTONIC, &stop_monotonic);
1317
- start_ns = (int64_t)g_profiler.start_realtime.tv_sec * 1000000000LL
1318
- + (int64_t)g_profiler.start_realtime.tv_nsec;
1319
- duration_ns = ((int64_t)stop_monotonic.tv_sec - (int64_t)g_profiler.start_monotonic.tv_sec) * 1000000000LL
1320
- + ((int64_t)stop_monotonic.tv_nsec - (int64_t)g_profiler.start_monotonic.tv_nsec);
1321
- rb_hash_aset(result, ID2SYM(rb_intern("start_time_ns")), LONG2NUM(start_ns));
1322
- rb_hash_aset(result, ID2SYM(rb_intern("duration_ns")), LONG2NUM(duration_ns));
1323
- }
1324
-
1325
1411
  if (g_profiler.aggregate) {
1326
- /* Build samples from aggregation table.
1327
- * Use a Ruby array for resolved frames so GC protects them. */
1328
- rperf_frame_table_t *ft = &g_profiler.frame_table;
1329
- VALUE resolved_ary = rb_ary_new_capa((long)ft->count);
1330
- /* Synthetic frames */
1331
- rb_ary_push(resolved_ary, rb_ary_new3(2, rb_str_new_lit("<GVL>"), rb_str_new_lit("[GVL blocked]")));
1332
- rb_ary_push(resolved_ary, rb_ary_new3(2, rb_str_new_lit("<GVL>"), rb_str_new_lit("[GVL wait]")));
1333
- rb_ary_push(resolved_ary, rb_ary_new3(2, rb_str_new_lit("<GC>"), rb_str_new_lit("[GC marking]")));
1334
- rb_ary_push(resolved_ary, rb_ary_new3(2, rb_str_new_lit("<GC>"), rb_str_new_lit("[GC sweeping]")));
1335
- /* Real frames */
1336
- for (i = RPERF_SYNTHETIC_COUNT; i < ft->count; i++) {
1337
- rb_ary_push(resolved_ary, rperf_resolve_frame(atomic_load_explicit(&ft->keys, memory_order_relaxed)[i]));
1338
- }
1339
-
1340
- rperf_agg_table_t *at = &g_profiler.agg_table;
1341
- samples_ary = rb_ary_new();
1342
- for (i = 0; i < at->bucket_capacity; i++) {
1343
- rperf_agg_entry_t *e = &at->buckets[i];
1344
- if (!e->used) continue;
1345
-
1346
- VALUE frames = rb_ary_new_capa(e->depth);
1347
- for (j = 0; j < e->depth; j++) {
1348
- uint32_t fid = at->stack_pool[e->frame_start + j];
1349
- rb_ary_push(frames, RARRAY_AREF(resolved_ary, fid));
1350
- }
1351
-
1352
- VALUE sample = rb_ary_new3(3, frames, LONG2NUM(e->weight), INT2NUM(e->thread_seq));
1353
- rb_ary_push(samples_ary, sample);
1354
- }
1412
+ result = rperf_build_aggregated_result(&g_profiler);
1355
1413
 
1356
1414
  rperf_sample_buffer_free(&g_profiler.buffers[1]);
1357
1415
  rperf_frame_table_free(&g_profiler.frame_table);
@@ -1359,6 +1417,27 @@ rb_rperf_stop(VALUE self)
1359
1417
  } else {
1360
1418
  /* Raw samples path (aggregate: false) */
1361
1419
  rperf_sample_buffer_t *buf = &g_profiler.buffers[0];
1420
+
1421
+ result = rb_hash_new();
1422
+ rb_hash_aset(result, ID2SYM(rb_intern("mode")),
1423
+ ID2SYM(rb_intern(g_profiler.mode == 1 ? "wall" : "cpu")));
1424
+ rb_hash_aset(result, ID2SYM(rb_intern("frequency")), INT2NUM(g_profiler.frequency));
1425
+ rb_hash_aset(result, ID2SYM(rb_intern("trigger_count")), SIZET2NUM(g_profiler.stats.trigger_count));
1426
+ rb_hash_aset(result, ID2SYM(rb_intern("sampling_count")), SIZET2NUM(g_profiler.stats.sampling_count));
1427
+ rb_hash_aset(result, ID2SYM(rb_intern("sampling_time_ns")), LONG2NUM(g_profiler.stats.sampling_total_ns));
1428
+ rb_hash_aset(result, ID2SYM(rb_intern("detected_thread_count")), INT2NUM(g_profiler.next_thread_seq));
1429
+ {
1430
+ struct timespec stop_monotonic;
1431
+ int64_t start_ns, duration_ns;
1432
+ clock_gettime(CLOCK_MONOTONIC, &stop_monotonic);
1433
+ start_ns = (int64_t)g_profiler.start_realtime.tv_sec * 1000000000LL
1434
+ + (int64_t)g_profiler.start_realtime.tv_nsec;
1435
+ duration_ns = ((int64_t)stop_monotonic.tv_sec - (int64_t)g_profiler.start_monotonic.tv_sec) * 1000000000LL
1436
+ + ((int64_t)stop_monotonic.tv_nsec - (int64_t)g_profiler.start_monotonic.tv_nsec);
1437
+ rb_hash_aset(result, ID2SYM(rb_intern("start_time_ns")), LONG2NUM(start_ns));
1438
+ rb_hash_aset(result, ID2SYM(rb_intern("duration_ns")), LONG2NUM(duration_ns));
1439
+ }
1440
+
1362
1441
  samples_ary = rb_ary_new_capa((long)buf->sample_count);
1363
1442
  for (i = 0; i < buf->sample_count; i++) {
1364
1443
  rperf_sample_t *s = &buf->samples[i];
@@ -1384,13 +1463,14 @@ rb_rperf_stop(VALUE self)
1384
1463
  rb_ary_push(frames, rperf_resolve_frame(fval));
1385
1464
  }
1386
1465
 
1387
- VALUE sample = rb_ary_new3(3, frames, LONG2NUM(s->weight), INT2NUM(s->thread_seq));
1466
+ VALUE sample = rb_ary_new3(4, frames, LONG2NUM(s->weight), INT2NUM(s->thread_seq), INT2NUM(s->label_set_id));
1388
1467
  rb_ary_push(samples_ary, sample);
1389
1468
  }
1469
+ rb_hash_aset(result, ID2SYM(rb_intern("raw_samples")), samples_ary);
1470
+ if (g_profiler.label_sets != Qnil) {
1471
+ rb_hash_aset(result, ID2SYM(rb_intern("label_sets")), g_profiler.label_sets);
1472
+ }
1390
1473
  }
1391
- rb_hash_aset(result,
1392
- ID2SYM(rb_intern(g_profiler.aggregate ? "aggregated_samples" : "raw_samples")),
1393
- samples_ary);
1394
1474
 
1395
1475
  /* Cleanup */
1396
1476
  rperf_sample_buffer_free(&g_profiler.buffers[0]);
@@ -1398,6 +1478,201 @@ rb_rperf_stop(VALUE self)
1398
1478
  return result;
1399
1479
  }
1400
1480
 
1481
+ /* ---- Snapshot: read aggregated data without stopping ---- */
1482
+
1483
+ /* Clear aggregated data for the next interval.
1484
+ * Caller must hold GVL + worker_mutex.
1485
+ * Keeps allocations intact for reuse. Does NOT touch frame_table
1486
+ * (frame IDs must stay stable — dmark may be iterating keys outside GVL,
1487
+ * and existing threads reference frame IDs via their thread_data). */
1488
+ static void
1489
+ rperf_clear_aggregated_data(rperf_profiler_t *prof)
1490
+ {
1491
+ /* Clear agg_table entries (keep allocation) */
1492
+ memset(prof->agg_table.buckets, 0,
1493
+ prof->agg_table.bucket_capacity * sizeof(rperf_agg_entry_t));
1494
+ prof->agg_table.count = 0;
1495
+ prof->agg_table.stack_pool_count = 0;
1496
+
1497
+ /* Reset stats */
1498
+ prof->stats.trigger_count = 0;
1499
+ prof->stats.sampling_count = 0;
1500
+ prof->stats.sampling_total_ns = 0;
1501
+
1502
+ /* Reset start timestamps so next snapshot's duration_ns covers
1503
+ * only the period since this clear. */
1504
+ clock_gettime(CLOCK_REALTIME, &prof->start_realtime);
1505
+ clock_gettime(CLOCK_MONOTONIC, &prof->start_monotonic);
1506
+ }
1507
+
1508
+ static VALUE
1509
+ rb_rperf_snapshot(VALUE self, VALUE vclear)
1510
+ {
1511
+ VALUE result;
1512
+
1513
+ if (!g_profiler.running) {
1514
+ return Qnil;
1515
+ }
1516
+
1517
+ if (!g_profiler.aggregate) {
1518
+ rb_raise(rb_eRuntimeError, "snapshot requires aggregate mode (aggregate: true)");
1519
+ }
1520
+
1521
+ /* GVL is held → no postponed jobs fire → no new samples written.
1522
+ * Lock worker_mutex to pause worker thread's aggregation. */
1523
+ CHECKED(pthread_mutex_lock(&g_profiler.worker_mutex));
1524
+ rperf_flush_buffers(&g_profiler);
1525
+
1526
+ /* Build result while mutex is held. If clear is requested, we must
1527
+ * also clear under the same lock to avoid a window where the worker
1528
+ * could aggregate into the table between build and clear. */
1529
+ result = rperf_build_aggregated_result(&g_profiler);
1530
+
1531
+ if (RTEST(vclear)) {
1532
+ rperf_clear_aggregated_data(&g_profiler);
1533
+ }
1534
+
1535
+ CHECKED(pthread_mutex_unlock(&g_profiler.worker_mutex));
1536
+
1537
+ return result;
1538
+ }
1539
+
1540
+ /* ---- Label API ---- */
1541
+
1542
+ /* _c_set_label(label_set_id) — set current thread's label_set_id.
1543
+ * Called from Ruby with GVL held. */
1544
+ static VALUE
1545
+ rb_rperf_set_label(VALUE self, VALUE vid)
1546
+ {
1547
+ if (!g_profiler.running) return vid;
1548
+
1549
+ int label_set_id = NUM2INT(vid);
1550
+ VALUE thread = rb_thread_current();
1551
+ rperf_thread_data_t *td = (rperf_thread_data_t *)rb_internal_thread_specific_get(thread, g_profiler.ts_key);
1552
+ if (td == NULL) {
1553
+ td = rperf_thread_data_create(&g_profiler, thread);
1554
+ if (!td) rb_raise(rb_eNoMemError, "rperf: failed to allocate thread data");
1555
+ }
1556
+ td->label_set_id = label_set_id;
1557
+ return vid;
1558
+ }
1559
+
1560
+ /* _c_get_label() — get current thread's label_set_id.
1561
+ * Returns 0 if not profiling or thread not yet seen. */
1562
+ static VALUE
1563
+ rb_rperf_get_label(VALUE self)
1564
+ {
1565
+ if (!g_profiler.running) return INT2FIX(0);
1566
+
1567
+ VALUE thread = rb_thread_current();
1568
+ rperf_thread_data_t *td = (rperf_thread_data_t *)rb_internal_thread_specific_get(thread, g_profiler.ts_key);
1569
+ if (td == NULL) return INT2FIX(0);
1570
+ return INT2NUM(td->label_set_id);
1571
+ }
1572
+
1573
+ /* _c_set_label_sets(ary) — store label_sets Ruby Array for result building */
1574
+ static VALUE
1575
+ rb_rperf_set_label_sets(VALUE self, VALUE ary)
1576
+ {
1577
+ g_profiler.label_sets = ary;
1578
+ return ary;
1579
+ }
1580
+
1581
+ /* _c_get_label_sets() — get label_sets Ruby Array */
1582
+ static VALUE
1583
+ rb_rperf_get_label_sets(VALUE self)
1584
+ {
1585
+ return g_profiler.label_sets;
1586
+ }
1587
+
1588
+ /* ---- Profile refcount API (timer pause/resume) ---- */
1589
+
1590
+ /* Helper: arm the timer with the configured interval */
1591
+ static void
1592
+ rperf_arm_timer(rperf_profiler_t *prof)
1593
+ {
1594
+ #if RPERF_USE_TIMER_SIGNAL
1595
+ if (prof->timer_signal > 0) {
1596
+ struct itimerspec its;
1597
+ its.it_value.tv_sec = 0;
1598
+ its.it_value.tv_nsec = 1000000000L / prof->frequency;
1599
+ its.it_interval = its.it_value;
1600
+ timer_settime(prof->timer_id, 0, &its, NULL);
1601
+ return;
1602
+ }
1603
+ #endif
1604
+ /* nanosleep mode: signal the worker to wake from cond_wait */
1605
+ CHECKED(pthread_mutex_lock(&prof->worker_mutex));
1606
+ CHECKED(pthread_cond_signal(&prof->worker_cond));
1607
+ CHECKED(pthread_mutex_unlock(&prof->worker_mutex));
1608
+ }
1609
+
1610
+ /* Helper: disarm the timer (stop firing) */
1611
+ static void
1612
+ rperf_disarm_timer(rperf_profiler_t *prof)
1613
+ {
1614
+ #if RPERF_USE_TIMER_SIGNAL
1615
+ if (prof->timer_signal > 0) {
1616
+ struct itimerspec its;
1617
+ memset(&its, 0, sizeof(its));
1618
+ timer_settime(prof->timer_id, 0, &its, NULL);
1619
+ return;
1620
+ }
1621
+ #endif
1622
+ /* nanosleep mode: worker will see RPERF_PAUSED on next iteration */
1623
+ }
1624
+
1625
+ /* Helper: reset prev_time_ns for all threads (called on resume to avoid
1626
+ * inflated weight from pause duration). Must be called with GVL held. */
1627
+ static void
1628
+ rperf_reset_thread_times(rperf_profiler_t *prof)
1629
+ {
1630
+ VALUE threads = rb_funcall(rb_cThread, rb_intern("list"), 0);
1631
+ long tc = RARRAY_LEN(threads);
1632
+ for (long i = 0; i < tc; i++) {
1633
+ VALUE thread = RARRAY_AREF(threads, i);
1634
+ rperf_thread_data_t *td = (rperf_thread_data_t *)rb_internal_thread_specific_get(thread, prof->ts_key);
1635
+ if (td) {
1636
+ td->prev_time_ns = rperf_current_time_ns(prof, td);
1637
+ td->prev_wall_ns = rperf_wall_time_ns();
1638
+ }
1639
+ }
1640
+ }
1641
+
1642
+ /* _c_profile_inc() — increment profile refcount; resume timer on 0→1.
1643
+ * Called with GVL held. */
1644
+ static VALUE
1645
+ rb_rperf_profile_inc(VALUE self)
1646
+ {
1647
+ if (!g_profiler.running) return Qfalse;
1648
+ g_profiler.profile_refcount++;
1649
+ if (g_profiler.profile_refcount == 1) {
1650
+ rperf_reset_thread_times(&g_profiler);
1651
+ rperf_arm_timer(&g_profiler);
1652
+ }
1653
+ return Qtrue;
1654
+ }
1655
+
1656
+ /* _c_profile_dec() — decrement profile refcount; pause timer on 1→0.
1657
+ * Called with GVL held. */
1658
+ static VALUE
1659
+ rb_rperf_profile_dec(VALUE self)
1660
+ {
1661
+ if (!g_profiler.running) return Qfalse;
1662
+ g_profiler.profile_refcount--;
1663
+ if (g_profiler.profile_refcount == 0) {
1664
+ rperf_disarm_timer(&g_profiler);
1665
+ }
1666
+ return Qtrue;
1667
+ }
1668
+
1669
+ /* _c_running?() — check if profiler is running. */
1670
+ static VALUE
1671
+ rb_rperf_running_p(VALUE self)
1672
+ {
1673
+ return g_profiler.running ? Qtrue : Qfalse;
1674
+ }
1675
+
1401
1676
  /* ---- Fork safety ---- */
1402
1677
 
1403
1678
  static void
@@ -1448,6 +1723,7 @@ rperf_after_fork_child(void)
1448
1723
  /* Reset stats */
1449
1724
  g_profiler.stats.sampling_count = 0;
1450
1725
  g_profiler.stats.sampling_total_ns = 0;
1726
+ g_profiler.profile_refcount = 0;
1451
1727
  atomic_store_explicit(&g_profiler.swap_ready, 0, memory_order_relaxed);
1452
1728
  }
1453
1729
 
@@ -1457,10 +1733,19 @@ void
1457
1733
  Init_rperf(void)
1458
1734
  {
1459
1735
  VALUE mRperf = rb_define_module("Rperf");
1460
- rb_define_module_function(mRperf, "_c_start", rb_rperf_start, 4);
1736
+ rb_define_module_function(mRperf, "_c_start", rb_rperf_start, 5);
1461
1737
  rb_define_module_function(mRperf, "_c_stop", rb_rperf_stop, 0);
1738
+ rb_define_module_function(mRperf, "_c_snapshot", rb_rperf_snapshot, 1);
1739
+ rb_define_module_function(mRperf, "_c_set_label", rb_rperf_set_label, 1);
1740
+ rb_define_module_function(mRperf, "_c_get_label", rb_rperf_get_label, 0);
1741
+ rb_define_module_function(mRperf, "_c_set_label_sets", rb_rperf_set_label_sets, 1);
1742
+ rb_define_module_function(mRperf, "_c_get_label_sets", rb_rperf_get_label_sets, 0);
1743
+ rb_define_module_function(mRperf, "_c_profile_inc", rb_rperf_profile_inc, 0);
1744
+ rb_define_module_function(mRperf, "_c_profile_dec", rb_rperf_profile_dec, 0);
1745
+ rb_define_module_function(mRperf, "_c_running?", rb_rperf_running_p, 0);
1462
1746
 
1463
1747
  memset(&g_profiler, 0, sizeof(g_profiler));
1748
+ g_profiler.label_sets = Qnil;
1464
1749
  g_profiler.pj_handle = rb_postponed_job_preregister(0, rperf_sample_job, &g_profiler);
1465
1750
  g_profiler.ts_key = rb_internal_thread_specific_key_create();
1466
1751