vernier 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6050bca74116d0e90f98025fe23d7fbd40c6107c085e3f768d606d4f418ebc60
4
- data.tar.gz: e302542d8b06852d28d0ec1e2528f23bba46d5ef9ea9a828c9dbcdb985a292a3
3
+ metadata.gz: c3190b81748262d9620de74e12f6adca9b2c4126741781f9860076d96c96a123
4
+ data.tar.gz: b54848781f0b17c16074fd630d0aae6b6ea206e47679713ce858e6e3f08525a5
5
5
  SHA512:
6
- metadata.gz: 9d20e5f9d9c894a253bc4aeb6b9da6cb35d916e881abedf996ce19757a9cd92efe43bcc76fda0aa4ce5e334be36582adf7c716a998c4c5e1f403dc57364fb5ab
7
- data.tar.gz: 7cf03df7bcb4f961b5b456eb781fd5818de7e18501baa01592ce17528f5eae7b272e90dcff200f861495ebb1a83c87f9146306db66ce20a9d56467ab87bce3c6
6
+ metadata.gz: de91010589471c0b4a7cfddb37bab92262392ef33da2af44930cdab848a6fd290468abe76c147485ba032d89cdac33b631937eea01bd15af8e89b03a5200e69e
7
+ data.tar.gz: 82b40e4d93685ab8c560a995df31421c06ff00f8c27f33d6b37296bed96ca5b37cacda3be60e64464f535dd52d273f80d4885705f591e7246d7b3fb2269c151f
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Vernier
2
2
 
3
- Next-generation Ruby sampling profiler. Tracks multiple threads, GVL activity, GC pauses, idle time, and more.
3
+ Next-generation Ruby 3.2.1+ sampling profiler. Tracks multiple threads, GVL activity, GC pauses, idle time, and more.
4
4
 
5
5
  ## Examples
6
6
 
@@ -20,6 +20,8 @@ Rails benchmark - lobste.rs (time)
20
20
 
21
21
  ## Installation
22
22
 
23
+ Vernier requires Ruby version 3.2.1 or greater
24
+
23
25
  ```ruby
24
26
  gem 'vernier'
25
27
  ```
@@ -35,6 +37,9 @@ Vernier.trace(out: "time_profile.json") { some_slow_method }
35
37
 
36
38
  The output can then be viewed in the Firefox Profiler (demo) or the [`profile-viewer` gem](https://github.com/tenderlove/profiler/tree/ruby) (a Ruby-customized version of the firefox profiler.
37
39
 
40
+ - **Flame Graph**: Shows proportionally how much time is spent within particular stack frames. Frames are grouped together, which means that x-axis / left-to-right order is not meaningful.
41
+ - **Stack Chart**: Shows the stack at each sample with the x-axis representing time and can be read left-to-right.
42
+
38
43
  ### Retained memory
39
44
 
40
45
  Record a flamegraph of all **retained** allocations from loading `irb`.
@@ -43,6 +48,7 @@ Record a flamegraph of all **retained** allocations from loading `irb`.
43
48
  ruby -r vernier -e 'Vernier.trace_retained(out: "irb_profile.json") { require "irb" }'
44
49
  ```
45
50
 
51
+ Retained-memory flamegraphs must be interpreted a little differently than a typical profiling flamegraph. In a retained-memory flamegraph, the x-axis represents a proportion of memory in bytes, _not time or samples_ The topmost boxes on the y-axis represent the retained objects, with their stacktrace below; their width represents the percentage of overall retained memory each object occupies.
46
52
 
47
53
  ## Development
48
54
 
@@ -5,4 +5,11 @@ require "mkmf"
5
5
  $CXXFLAGS += " -std=c++14 "
6
6
  $CXXFLAGS += " -ggdb3 -Og "
7
7
 
8
+ have_header("ruby/thread.h")
9
+ have_struct_member("rb_internal_thread_event_data_t", "thread", ["ruby/thread.h"])
10
+
11
+ have_func("rb_profile_thread_frames", "ruby/debug.h")
12
+
13
+ have_func("pthread_setname_np")
14
+
8
15
  create_makefile("vernier/vernier")
@@ -1,3 +1,5 @@
1
+ // vim: expandtab:ts=4:sw=4
2
+
1
3
  #include <iostream>
2
4
  #include <iomanip>
3
5
  #include <vector>
@@ -27,6 +29,9 @@
27
29
  #include "ruby/debug.h"
28
30
  #include "ruby/thread.h"
29
31
 
32
+ #undef assert
33
+ #define assert RUBY_ASSERT_ALWAYS
34
+
30
35
  # define PTR2NUM(x) (rb_int2inum((intptr_t)(void *)(x)))
31
36
 
32
37
  // Internal TracePoint events we'll monitor during profiling
@@ -53,6 +58,22 @@ static VALUE rb_cVernierResult;
53
58
  static VALUE rb_mVernierMarkerType;
54
59
  static VALUE rb_cVernierCollector;
55
60
 
61
+ static const char *gvl_event_name(rb_event_flag_t event) {
62
+ switch (event) {
63
+ case RUBY_INTERNAL_THREAD_EVENT_STARTED:
64
+ return "started";
65
+ case RUBY_INTERNAL_THREAD_EVENT_READY:
66
+ return "ready";
67
+ case RUBY_INTERNAL_THREAD_EVENT_RESUMED:
68
+ return "resumed";
69
+ case RUBY_INTERNAL_THREAD_EVENT_SUSPENDED:
70
+ return "suspended";
71
+ case RUBY_INTERNAL_THREAD_EVENT_EXITED:
72
+ return "exited";
73
+ }
74
+ return "no-event";
75
+ }
76
+
56
77
  class TimeStamp {
57
78
  static const uint64_t nanoseconds_per_second = 1000000000;
58
79
  uint64_t value_ns;
@@ -85,8 +106,16 @@ class TimeStamp {
85
106
  } while (target_time > TimeStamp::Now());
86
107
  }
87
108
 
109
+ static TimeStamp from_seconds(uint64_t s) {
110
+ return TimeStamp::from_milliseconds(s * 1000);
111
+ }
112
+
113
+ static TimeStamp from_milliseconds(uint64_t ms) {
114
+ return TimeStamp::from_microseconds(ms * 1000);
115
+ }
116
+
88
117
  static TimeStamp from_microseconds(uint64_t us) {
89
- return TimeStamp(us * 1000);
118
+ return TimeStamp::from_nanoseconds(us * 1000);
90
119
  }
91
120
 
92
121
  static TimeStamp from_nanoseconds(uint64_t ns) {
@@ -266,6 +295,10 @@ class SamplerSemaphore {
266
295
  #ifdef __APPLE__
267
296
  dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER);
268
297
  #else
298
+ // Use sem_timedwait so that we get a crash instead of a deadlock for
299
+ // easier debugging
300
+ auto ts = (TimeStamp::Now() + TimeStamp::from_seconds(5)).timespec();
301
+
269
302
  int ret;
270
303
  do {
271
304
  ret = sem_wait(&sem);
@@ -304,16 +337,15 @@ struct RawSample {
304
337
  }
305
338
 
306
339
  void sample() {
340
+ clear();
341
+
307
342
  if (!ruby_native_thread_p()) {
308
- clear();
309
343
  return;
310
344
  }
311
345
 
312
346
  if (rb_during_gc()) {
313
347
  gc = true;
314
- len = 0;
315
348
  } else {
316
- gc = false;
317
349
  len = rb_profile_frames(0, MAX_LEN, frames, lines);
318
350
  }
319
351
  }
@@ -602,12 +634,13 @@ class Marker {
602
634
  Phase phase;
603
635
  TimeStamp timestamp;
604
636
  TimeStamp finish;
605
- native_thread_id_t thread_id;
637
+ // VALUE ruby_thread_id;
638
+ //native_thread_id_t thread_id;
606
639
  int stack_index = -1;
607
640
 
608
641
  VALUE to_array() {
609
642
  VALUE record[6] = {0};
610
- record[0] = ULL2NUM(thread_id);
643
+ record[0] = Qnil; // FIXME
611
644
  record[1] = INT2NUM(type);
612
645
  record[2] = INT2NUM(phase);
613
646
  record[3] = ULL2NUM(timestamp.nanoseconds());
@@ -625,30 +658,33 @@ class Marker {
625
658
  };
626
659
 
627
660
  class MarkerTable {
628
- TimeStamp last_gc_entry;
629
-
630
661
  public:
631
662
  std::vector<Marker> list;
632
663
  std::mutex mutex;
633
664
 
634
- void record_gc_entered() {
635
- last_gc_entry = TimeStamp::Now();
636
- }
637
-
638
- void record_gc_leave() {
639
- list.push_back({ Marker::MARKER_GC_PAUSE, Marker::INTERVAL, last_gc_entry, TimeStamp::Now(), get_native_thread_id(), -1 });
640
- }
641
-
642
665
  void record_interval(Marker::Type type, TimeStamp from, TimeStamp to, int stack_index = -1) {
643
666
  const std::lock_guard<std::mutex> lock(mutex);
644
667
 
645
- list.push_back({ type, Marker::INTERVAL, from, to, get_native_thread_id(), stack_index });
668
+ list.push_back({ type, Marker::INTERVAL, from, to, stack_index });
646
669
  }
647
670
 
648
671
  void record(Marker::Type type, int stack_index = -1) {
649
672
  const std::lock_guard<std::mutex> lock(mutex);
650
673
 
651
- list.push_back({ type, Marker::INSTANT, TimeStamp::Now(), TimeStamp(), get_native_thread_id(), stack_index });
674
+ list.push_back({ type, Marker::INSTANT, TimeStamp::Now(), TimeStamp(), stack_index });
675
+ }
676
+ };
677
+
678
+ class GCMarkerTable: public MarkerTable {
679
+ TimeStamp last_gc_entry;
680
+
681
+ public:
682
+ void record_gc_entered() {
683
+ last_gc_entry = TimeStamp::Now();
684
+ }
685
+
686
+ void record_gc_leave() {
687
+ list.push_back({ Marker::MARKER_GC_PAUSE, Marker::INTERVAL, last_gc_entry, TimeStamp::Now(), -1 });
652
688
  }
653
689
  };
654
690
 
@@ -731,6 +767,8 @@ class Thread {
731
767
  STOPPED
732
768
  };
733
769
 
770
+ VALUE ruby_thread;
771
+ VALUE ruby_thread_id;
734
772
  pthread_t pthread_id;
735
773
  native_thread_id_t native_tid;
736
774
  State state;
@@ -742,18 +780,33 @@ class Thread {
742
780
  int stack_on_suspend_idx;
743
781
  SampleTranslator translator;
744
782
 
745
- std::string name;
783
+ MarkerTable *markers;
746
784
 
747
- Thread(State state) : state(state), stack_on_suspend_idx(-1) {
748
- pthread_id = pthread_self();
785
+ std::string name;
786
+
787
+ // FIXME: don't use pthread at start
788
+ Thread(State state, pthread_t pthread_id, VALUE ruby_thread) : pthread_id(pthread_id), ruby_thread(ruby_thread), state(state), stack_on_suspend_idx(-1) {
789
+ name = Qnil;
790
+ ruby_thread_id = rb_obj_id(ruby_thread);
791
+ //ruby_thread_id = ULL2NUM(ruby_thread);
749
792
  native_tid = get_native_thread_id();
750
793
  started_at = state_changed_at = TimeStamp::Now();
794
+ name = "";
795
+ markers = new MarkerTable();
796
+
797
+ if (state == State::STARTED) {
798
+ markers->record(Marker::Type::MARKER_GVL_THREAD_STARTED);
799
+ }
751
800
  }
752
801
 
753
- void set_state(State new_state, MarkerTable *markers) {
802
+ void set_state(State new_state) {
754
803
  if (state == Thread::State::STOPPED) {
755
804
  return;
756
805
  }
806
+ if (new_state == Thread::State::SUSPENDED && state == new_state) {
807
+ // on Ruby 3.2 (only?) we may see duplicate suspended states
808
+ return;
809
+ }
757
810
 
758
811
  TimeStamp from = state_changed_at;
759
812
  auto now = TimeStamp::Now();
@@ -764,10 +817,13 @@ class Thread {
764
817
 
765
818
  switch (new_state) {
766
819
  case State::STARTED:
767
- new_state = State::RUNNING;
820
+ markers->record(Marker::Type::MARKER_GVL_THREAD_STARTED);
821
+ return; // no mutation of current state
768
822
  break;
769
823
  case State::RUNNING:
770
- assert(state == State::READY);
824
+ assert(state == State::READY || state == State::RUNNING);
825
+ pthread_id = pthread_self();
826
+ native_tid = get_native_thread_id();
771
827
 
772
828
  // If the GVL is immediately ready, and we measure no times
773
829
  // stalled, skip emitting the interval.
@@ -783,23 +839,25 @@ class Thread {
783
839
  // Threads can be preempted, which means they will have been in "Running"
784
840
  // state, and then the VM was like "no I need to stop you from working,
785
841
  // so I'll put you in the 'ready' (or stalled) state"
786
- assert(state == State::SUSPENDED || state == State::RUNNING);
842
+ assert(state == State::STARTED || state == State::SUSPENDED || state == State::RUNNING);
787
843
  if (state == State::SUSPENDED) {
788
844
  markers->record_interval(Marker::Type::MARKER_THREAD_SUSPENDED, from, now, stack_on_suspend_idx);
789
845
  }
790
- else {
846
+ else if (state == State::RUNNING) {
791
847
  markers->record_interval(Marker::Type::MARKER_THREAD_RUNNING, from, now);
792
848
  }
793
849
  break;
794
850
  case State::SUSPENDED:
795
851
  // We can go from RUNNING or STARTED to SUSPENDED
796
- assert(state == State::RUNNING || state == State::STARTED);
852
+ assert(state == State::RUNNING || state == State::STARTED || state == State::SUSPENDED);
797
853
  markers->record_interval(Marker::Type::MARKER_THREAD_RUNNING, from, now);
798
854
  break;
799
855
  case State::STOPPED:
800
856
  // We can go from RUNNING or STARTED to STOPPED
801
857
  assert(state == State::RUNNING || state == State::STARTED);
802
858
  markers->record_interval(Marker::Type::MARKER_THREAD_RUNNING, from, now);
859
+ markers->record(Marker::Type::MARKER_GVL_THREAD_EXITED);
860
+
803
861
  stopped_at = now;
804
862
  capture_name();
805
863
 
@@ -815,10 +873,13 @@ class Thread {
815
873
  }
816
874
 
817
875
  void capture_name() {
818
- char buf[128];
819
- int rc = pthread_getname_np(pthread_id, buf, sizeof(buf));
820
- if (rc == 0)
821
- name = std::string(buf);
876
+ //char buf[128];
877
+ //int rc = pthread_getname_np(pthread_id, buf, sizeof(buf));
878
+ //if (rc == 0)
879
+ // name = std::string(buf);
880
+ }
881
+
882
+ void mark() {
822
883
  }
823
884
  };
824
885
 
@@ -832,40 +893,46 @@ class ThreadTable {
832
893
  ThreadTable(FrameList &frame_list) : frame_list(frame_list) {
833
894
  }
834
895
 
835
- void started(MarkerTable *markers) {
836
- //const std::lock_guard<std::mutex> lock(mutex);
896
+ void mark() {
897
+ for (auto &thread : list) {
898
+ thread.mark();
899
+ }
900
+ }
837
901
 
902
+ void started(VALUE th) {
838
903
  //list.push_back(Thread{pthread_self(), Thread::State::SUSPENDED});
839
- markers->record(Marker::Type::MARKER_GVL_THREAD_STARTED);
840
- set_state(Thread::State::STARTED, markers);
904
+ set_state(Thread::State::STARTED, th);
841
905
  }
842
906
 
843
- void ready(MarkerTable *markers) {
844
- set_state(Thread::State::READY, markers);
907
+ void ready(VALUE th) {
908
+ set_state(Thread::State::READY, th);
845
909
  }
846
910
 
847
- void resumed(MarkerTable *markers) {
848
- set_state(Thread::State::RUNNING, markers);
911
+ void resumed(VALUE th) {
912
+ set_state(Thread::State::RUNNING, th);
849
913
  }
850
914
 
851
- void suspended(MarkerTable *markers) {
852
- set_state(Thread::State::SUSPENDED, markers);
915
+ void suspended(VALUE th) {
916
+ set_state(Thread::State::SUSPENDED, th);
853
917
  }
854
918
 
855
- void stopped(MarkerTable *markers) {
856
- markers->record(Marker::Type::MARKER_GVL_THREAD_EXITED);
857
- set_state(Thread::State::STOPPED, markers);
919
+ void stopped(VALUE th) {
920
+ set_state(Thread::State::STOPPED, th);
858
921
  }
859
922
 
860
923
  private:
861
- void set_state(Thread::State new_state, MarkerTable *markers) {
924
+ void set_state(Thread::State new_state, VALUE th) {
862
925
  const std::lock_guard<std::mutex> lock(mutex);
863
926
 
864
- pthread_t current_thread = pthread_self();
865
927
  //cerr << "set state=" << new_state << " thread=" << gettid() << endl;
866
928
 
929
+ pid_t native_tid = get_native_thread_id();
930
+ pthread_t pthread_id = pthread_self();
931
+
932
+ //fprintf(stderr, "th %p (tid: %i) from %s to %s\n", (void *)th, native_tid, gvl_event_name(state), gvl_event_name(new_state));
933
+
867
934
  for (auto &thread : list) {
868
- if (pthread_equal(current_thread, thread.pthread_id)) {
935
+ if (thread_equal(th, thread.ruby_thread)) {
869
936
  if (new_state == Thread::State::SUSPENDED) {
870
937
 
871
938
  RawSample sample;
@@ -875,14 +942,27 @@ class ThreadTable {
875
942
  //cerr << gettid() << " suspended! Stack size:" << thread.stack_on_suspend.size() << endl;
876
943
  }
877
944
 
878
- thread.set_state(new_state, markers);
945
+ thread.set_state(new_state);
946
+
947
+ if (thread.state == Thread::State::RUNNING) {
948
+ thread.pthread_id = pthread_self();
949
+ thread.native_tid = get_native_thread_id();
950
+ } else {
951
+ thread.pthread_id = 0;
952
+ thread.native_tid = 0;
953
+ }
954
+
879
955
 
880
956
  return;
881
957
  }
882
958
  }
883
959
 
884
- pid_t native_tid = get_native_thread_id();
885
- list.emplace_back(new_state);
960
+ //fprintf(stderr, "NEW THREAD: th: %p, state: %i\n", th, new_state);
961
+ list.emplace_back(new_state, pthread_self(), th);
962
+ }
963
+
964
+ bool thread_equal(VALUE a, VALUE b) {
965
+ return a == b;
886
966
  }
887
967
  };
888
968
 
@@ -1151,6 +1231,8 @@ class GlobalSignalHandler {
1151
1231
  void record_sample(LiveSample &sample, pthread_t pthread_id) {
1152
1232
  const std::lock_guard<std::mutex> lock(mutex);
1153
1233
 
1234
+ assert(pthread_id);
1235
+
1154
1236
  live_sample = &sample;
1155
1237
  if (pthread_kill(pthread_id, SIGPROF)) {
1156
1238
  rb_bug("pthread_kill failed");
@@ -1187,7 +1269,7 @@ class GlobalSignalHandler {
1187
1269
  LiveSample *GlobalSignalHandler::live_sample;
1188
1270
 
1189
1271
  class TimeCollector : public BaseCollector {
1190
- MarkerTable markers;
1272
+ GCMarkerTable gc_markers;
1191
1273
  ThreadTable threads;
1192
1274
 
1193
1275
  pthread_t sample_thread;
@@ -1216,10 +1298,22 @@ class TimeCollector : public BaseCollector {
1216
1298
  }
1217
1299
 
1218
1300
  VALUE get_markers() {
1219
- VALUE list = rb_ary_new2(this->markers.list.size());
1301
+ VALUE list = rb_ary_new();
1302
+ VALUE main_thread = rb_thread_main();
1303
+ VALUE main_thread_id = rb_obj_id(main_thread);
1304
+
1305
+ for (auto& marker: this->gc_markers.list) {
1306
+ VALUE ary = marker.to_array();
1220
1307
 
1221
- for (auto& marker: this->markers.list) {
1222
- rb_ary_push(list, marker.to_array());
1308
+ RARRAY_ASET(ary, 0, main_thread_id);
1309
+ rb_ary_push(list, ary);
1310
+ }
1311
+ for (auto &thread : threads.list) {
1312
+ for (auto& marker: thread.markers->list) {
1313
+ VALUE ary = marker.to_array();
1314
+ RARRAY_ASET(ary, 0, thread.ruby_thread_id);
1315
+ rb_ary_push(list, ary);
1316
+ }
1223
1317
  }
1224
1318
 
1225
1319
  return list;
@@ -1235,7 +1329,9 @@ class TimeCollector : public BaseCollector {
1235
1329
  threads.mutex.lock();
1236
1330
  for (auto &thread : threads.list) {
1237
1331
  //if (thread.state == Thread::State::RUNNING) {
1238
- if (thread.state == Thread::State::RUNNING || (thread.state == Thread::State::SUSPENDED && thread.stack_on_suspend_idx < 0)) {
1332
+ //if (thread.state == Thread::State::RUNNING || (thread.state == Thread::State::SUSPENDED && thread.stack_on_suspend_idx < 0)) {
1333
+ if (thread.state == Thread::State::RUNNING) {
1334
+ //fprintf(stderr, "sampling %p on tid:%i\n", thread.ruby_thread, thread.native_tid);
1239
1335
  GlobalSignalHandler::get_instance()->record_sample(sample, thread.pthread_id);
1240
1336
 
1241
1337
  if (sample.sample.gc) {
@@ -1271,6 +1367,13 @@ class TimeCollector : public BaseCollector {
1271
1367
  }
1272
1368
 
1273
1369
  static void *sample_thread_entry(void *arg) {
1370
+ #if HAVE_PTHREAD_SETNAME_NP
1371
+ #ifdef __APPLE__
1372
+ pthread_setname_np("Vernier profiler");
1373
+ #else
1374
+ pthread_setname_np(pthread_self(), "Vernier profiler");
1375
+ #endif
1376
+ #endif
1274
1377
  TimeCollector *collector = static_cast<TimeCollector *>(arg);
1275
1378
  collector->sample_thread_run();
1276
1379
  return NULL;
@@ -1281,10 +1384,10 @@ class TimeCollector : public BaseCollector {
1281
1384
 
1282
1385
  switch (event) {
1283
1386
  case RUBY_EVENT_THREAD_BEGIN:
1284
- collector->threads.started(&collector->markers);
1387
+ collector->threads.started(self);
1285
1388
  break;
1286
1389
  case RUBY_EVENT_THREAD_END:
1287
- collector->threads.stopped(&collector->markers);
1390
+ collector->threads.stopped(self);
1288
1391
  break;
1289
1392
  }
1290
1393
  }
@@ -1294,36 +1397,57 @@ class TimeCollector : public BaseCollector {
1294
1397
 
1295
1398
  switch (event) {
1296
1399
  case RUBY_INTERNAL_EVENT_GC_START:
1297
- collector->markers.record(Marker::Type::MARKER_GC_START);
1400
+ collector->gc_markers.record(Marker::Type::MARKER_GC_START);
1298
1401
  break;
1299
1402
  case RUBY_INTERNAL_EVENT_GC_END_MARK:
1300
- collector->markers.record(Marker::Type::MARKER_GC_END_MARK);
1403
+ collector->gc_markers.record(Marker::Type::MARKER_GC_END_MARK);
1301
1404
  break;
1302
1405
  case RUBY_INTERNAL_EVENT_GC_END_SWEEP:
1303
- collector->markers.record(Marker::Type::MARKER_GC_END_SWEEP);
1406
+ collector->gc_markers.record(Marker::Type::MARKER_GC_END_SWEEP);
1304
1407
  break;
1305
1408
  case RUBY_INTERNAL_EVENT_GC_ENTER:
1306
- collector->markers.record_gc_entered();
1409
+ collector->gc_markers.record_gc_entered();
1307
1410
  break;
1308
1411
  case RUBY_INTERNAL_EVENT_GC_EXIT:
1309
- collector->markers.record_gc_leave();
1412
+ collector->gc_markers.record_gc_leave();
1310
1413
  break;
1311
1414
  }
1312
1415
  }
1313
1416
 
1314
1417
  static void internal_thread_event_cb(rb_event_flag_t event, const rb_internal_thread_event_data_t *event_data, void *data) {
1315
1418
  TimeCollector *collector = static_cast<TimeCollector *>(data);
1419
+ VALUE thread = Qnil;
1420
+
1421
+ #if HAVE_RB_INTERNAL_THREAD_EVENT_DATA_T_THREAD
1422
+ thread = event_data->thread;
1423
+ #else
1424
+ // We may arrive here when starting a thread with
1425
+ // RUBY_INTERNAL_THREAD_EVENT_READY before the thread is actually set up.
1426
+ if (!ruby_native_thread_p()) return;
1427
+
1428
+ thread = rb_thread_current();
1429
+ #endif
1430
+
1431
+ auto native_tid = get_native_thread_id();
1316
1432
  //cerr << "internal thread event" << event << " at " << TimeStamp::Now() << endl;
1433
+ //fprintf(stderr, "(%i) th %p to %s\n", native_tid, (void *)thread, gvl_event_name(event));
1434
+
1317
1435
 
1318
1436
  switch (event) {
1437
+ case RUBY_INTERNAL_THREAD_EVENT_STARTED:
1438
+ collector->threads.started(thread);
1439
+ break;
1440
+ case RUBY_INTERNAL_THREAD_EVENT_EXITED:
1441
+ collector->threads.stopped(thread);
1442
+ break;
1319
1443
  case RUBY_INTERNAL_THREAD_EVENT_READY:
1320
- collector->threads.ready(&collector->markers);
1444
+ collector->threads.ready(thread);
1321
1445
  break;
1322
1446
  case RUBY_INTERNAL_THREAD_EVENT_RESUMED:
1323
- collector->threads.resumed(&collector->markers);
1447
+ collector->threads.resumed(thread);
1324
1448
  break;
1325
1449
  case RUBY_INTERNAL_THREAD_EVENT_SUSPENDED:
1326
- collector->threads.suspended(&collector->markers);
1450
+ collector->threads.suspended(thread);
1327
1451
  break;
1328
1452
 
1329
1453
  }
@@ -1351,7 +1475,7 @@ class TimeCollector : public BaseCollector {
1351
1475
  // have at least one thread in our thread list because it's possible
1352
1476
  // that the profile might be such that we don't get any thread switch
1353
1477
  // events and we need at least one
1354
- this->threads.resumed(&this->markers);
1478
+ this->threads.resumed(rb_thread_current());
1355
1479
 
1356
1480
  thread_hook = rb_internal_thread_add_event_hook(internal_thread_event_cb, RUBY_INTERNAL_THREAD_EVENT_MASK, this);
1357
1481
  rb_add_event_hook(internal_gc_event_cb, RUBY_INTERNAL_EVENTS, PTR2NUM((void *)this));
@@ -1398,7 +1522,7 @@ class TimeCollector : public BaseCollector {
1398
1522
  VALUE hash = rb_hash_new();
1399
1523
  thread.samples.write_result(hash);
1400
1524
 
1401
- rb_hash_aset(threads, ULL2NUM(thread.native_tid), hash);
1525
+ rb_hash_aset(threads, thread.ruby_thread_id, hash);
1402
1526
  rb_hash_aset(hash, sym("tid"), ULL2NUM(thread.native_tid));
1403
1527
  rb_hash_aset(hash, sym("started_at"), ULL2NUM(thread.started_at.nanoseconds()));
1404
1528
  if (!thread.stopped_at.zero()) {
@@ -1415,6 +1539,7 @@ class TimeCollector : public BaseCollector {
1415
1539
 
1416
1540
  void mark() {
1417
1541
  frame_list.mark_frames();
1542
+ threads.mark();
1418
1543
 
1419
1544
  //for (int i = 0; i < queued_length; i++) {
1420
1545
  // rb_gc_mark(queued_frames[i]);
@@ -19,7 +19,7 @@ module Vernier
19
19
  Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond)
20
20
  end
21
21
 
22
- def add_marker(name:, start:, finish:, thread: Thread.current.native_thread_id, phase: Marker::Phase::INTERVAL, data: nil)
22
+ def add_marker(name:, start:, finish:, thread: Thread.current.object_id, phase: Marker::Phase::INTERVAL, data: nil)
23
23
  @markers << [thread,
24
24
  name,
25
25
  start,
@@ -39,7 +39,7 @@ module Vernier
39
39
  start:,
40
40
  finish: current_time,
41
41
  phase: Marker::Phase::INTERVAL,
42
- thread: Thread.current.native_thread_id,
42
+ thread: Thread.current.object_id,
43
43
  data: { :type => 'UserTiming', :entryType => 'measure', :name => name }
44
44
  )
45
45
  end
@@ -99,9 +99,10 @@ module Vernier
99
99
  def data
100
100
  markers_by_thread = profile.markers.group_by { |marker| marker[0] }
101
101
 
102
- thread_data = profile.threads.map do |tid, thread_info|
103
- markers = markers_by_thread[tid] || []
102
+ thread_data = profile.threads.map do |ruby_thread_id, thread_info|
103
+ markers = markers_by_thread[ruby_thread_id] || []
104
104
  Thread.new(
105
+ ruby_thread_id,
105
106
  profile,
106
107
  @categorizer,
107
108
  markers: markers,
@@ -157,7 +158,8 @@ module Vernier
157
158
  class Thread
158
159
  attr_reader :profile
159
160
 
160
- def initialize(profile, categorizer, name:, tid:, samples:, weights:, timestamps: nil, sample_categories: nil, markers:, started_at:, stopped_at: nil)
161
+ def initialize(ruby_thread_id, profile, categorizer, name:, tid:, samples:, weights:, timestamps: nil, sample_categories: nil, markers:, started_at:, stopped_at: nil)
162
+ @ruby_thread_id = ruby_thread_id
161
163
  @profile = profile
162
164
  @categorizer = categorizer
163
165
  @tid = tid
@@ -212,7 +214,7 @@ module Vernier
212
214
  def data
213
215
  {
214
216
  name: @name,
215
- isMainThread: (@tid == ::Thread.main.native_thread_id) || (profile.threads.size == 1),
217
+ isMainThread: @ruby_thread_id == ::Thread.main.object_id || (profile.threads.size == 1),
216
218
  processStartupTime: 0, # FIXME
217
219
  processShutdownTime: nil, # FIXME
218
220
  registerTime: (@started_at - 0) / 1_000_000.0,
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Vernier
4
- VERSION = "0.3.1"
4
+ VERSION = "0.4.0"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: vernier
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Hawthorn
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-12-05 00:00:00.000000000 Z
11
+ date: 2024-01-15 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: An experimental profiler
14
14
  email: