vernier 0.3.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6050bca74116d0e90f98025fe23d7fbd40c6107c085e3f768d606d4f418ebc60
4
- data.tar.gz: e302542d8b06852d28d0ec1e2528f23bba46d5ef9ea9a828c9dbcdb985a292a3
3
+ metadata.gz: f552d6fe2d529de743412cbc9975bfbd4ac87b894777a44ce26ef07dddc8e032
4
+ data.tar.gz: b9ddcd3e4ce0acb5ac53363e041d1bd9e3a2d4c5bf21b2bbe56c48ed5fef4cdd
5
5
  SHA512:
6
- metadata.gz: 9d20e5f9d9c894a253bc4aeb6b9da6cb35d916e881abedf996ce19757a9cd92efe43bcc76fda0aa4ce5e334be36582adf7c716a998c4c5e1f403dc57364fb5ab
7
- data.tar.gz: 7cf03df7bcb4f961b5b456eb781fd5818de7e18501baa01592ce17528f5eae7b272e90dcff200f861495ebb1a83c87f9146306db66ce20a9d56467ab87bce3c6
6
+ metadata.gz: 1b0808ee6ae8e64866e81e7ba8ed4847788421a00517c00e7aacb54c2fdff16287f92eb10132b5802fcb93955c3e4cf1a8fe4cfc97f4a9742a8130341bea75f7
7
+ data.tar.gz: 686a7397043be44451cccf9380473cda1e350ee342878ca2428d8bcd6c69aeea36244d15c92e3f998d4b6245c46c37d74ac3aac18ed58c783c288501d0cf7243
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Vernier
2
2
 
3
- Next-generation Ruby sampling profiler. Tracks multiple threads, GVL activity, GC pauses, idle time, and more.
3
+ Next-generation Ruby 3.2.1+ sampling profiler. Tracks multiple threads, GVL activity, GC pauses, idle time, and more.
4
4
 
5
5
  ## Examples
6
6
 
@@ -20,6 +20,8 @@ Rails benchmark - lobste.rs (time)
20
20
 
21
21
  ## Installation
22
22
 
23
+ Vernier requires Ruby version 3.2.1 or greater
24
+
23
25
  ```ruby
24
26
  gem 'vernier'
25
27
  ```
@@ -35,6 +37,9 @@ Vernier.trace(out: "time_profile.json") { some_slow_method }
35
37
 
36
38
  The output can then be viewed in the Firefox Profiler (demo) or the [`profile-viewer` gem](https://github.com/tenderlove/profiler/tree/ruby) (a Ruby-customized version of the firefox profiler.
37
39
 
40
+ - **Flame Graph**: Shows proportionally how much time is spent within particular stack frames. Frames are grouped together, which means that x-axis / left-to-right order is not meaningful.
41
+ - **Stack Chart**: Shows the stack at each sample with the x-axis representing time and can be read left-to-right.
42
+
38
43
  ### Retained memory
39
44
 
40
45
  Record a flamegraph of all **retained** allocations from loading `irb`.
@@ -43,6 +48,7 @@ Record a flamegraph of all **retained** allocations from loading `irb`.
43
48
  ruby -r vernier -e 'Vernier.trace_retained(out: "irb_profile.json") { require "irb" }'
44
49
  ```
45
50
 
51
+ Retained-memory flamegraphs must be interpreted a little differently than a typical profiling flamegraph. In a retained-memory flamegraph, the x-axis represents a proportion of memory in bytes, _not time or samples_ The topmost boxes on the y-axis represent the retained objects, with their stacktrace below; their width represents the percentage of overall retained memory each object occupies.
46
52
 
47
53
  ## Development
48
54
 
@@ -5,4 +5,11 @@ require "mkmf"
5
5
  $CXXFLAGS += " -std=c++14 "
6
6
  $CXXFLAGS += " -ggdb3 -Og "
7
7
 
8
+ have_header("ruby/thread.h")
9
+ have_struct_member("rb_internal_thread_event_data_t", "thread", ["ruby/thread.h"])
10
+
11
+ have_func("rb_profile_thread_frames", "ruby/debug.h")
12
+
13
+ have_func("pthread_setname_np")
14
+
8
15
  create_makefile("vernier/vernier")
@@ -1,3 +1,5 @@
1
+ // vim: expandtab:ts=4:sw=4
2
+
1
3
  #include <iostream>
2
4
  #include <iomanip>
3
5
  #include <vector>
@@ -27,6 +29,9 @@
27
29
  #include "ruby/debug.h"
28
30
  #include "ruby/thread.h"
29
31
 
32
+ #undef assert
33
+ #define assert RUBY_ASSERT_ALWAYS
34
+
30
35
  # define PTR2NUM(x) (rb_int2inum((intptr_t)(void *)(x)))
31
36
 
32
37
  // Internal TracePoint events we'll monitor during profiling
@@ -53,6 +58,22 @@ static VALUE rb_cVernierResult;
53
58
  static VALUE rb_mVernierMarkerType;
54
59
  static VALUE rb_cVernierCollector;
55
60
 
61
+ static const char *gvl_event_name(rb_event_flag_t event) {
62
+ switch (event) {
63
+ case RUBY_INTERNAL_THREAD_EVENT_STARTED:
64
+ return "started";
65
+ case RUBY_INTERNAL_THREAD_EVENT_READY:
66
+ return "ready";
67
+ case RUBY_INTERNAL_THREAD_EVENT_RESUMED:
68
+ return "resumed";
69
+ case RUBY_INTERNAL_THREAD_EVENT_SUSPENDED:
70
+ return "suspended";
71
+ case RUBY_INTERNAL_THREAD_EVENT_EXITED:
72
+ return "exited";
73
+ }
74
+ return "no-event";
75
+ }
76
+
56
77
  class TimeStamp {
57
78
  static const uint64_t nanoseconds_per_second = 1000000000;
58
79
  uint64_t value_ns;
@@ -85,8 +106,16 @@ class TimeStamp {
85
106
  } while (target_time > TimeStamp::Now());
86
107
  }
87
108
 
109
+ static TimeStamp from_seconds(uint64_t s) {
110
+ return TimeStamp::from_milliseconds(s * 1000);
111
+ }
112
+
113
+ static TimeStamp from_milliseconds(uint64_t ms) {
114
+ return TimeStamp::from_microseconds(ms * 1000);
115
+ }
116
+
88
117
  static TimeStamp from_microseconds(uint64_t us) {
89
- return TimeStamp(us * 1000);
118
+ return TimeStamp::from_nanoseconds(us * 1000);
90
119
  }
91
120
 
92
121
  static TimeStamp from_nanoseconds(uint64_t ns) {
@@ -266,6 +295,10 @@ class SamplerSemaphore {
266
295
  #ifdef __APPLE__
267
296
  dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER);
268
297
  #else
298
+ // Use sem_timedwait so that we get a crash instead of a deadlock for
299
+ // easier debugging
300
+ auto ts = (TimeStamp::Now() + TimeStamp::from_seconds(5)).timespec();
301
+
269
302
  int ret;
270
303
  do {
271
304
  ret = sem_wait(&sem);
@@ -304,16 +337,15 @@ struct RawSample {
304
337
  }
305
338
 
306
339
  void sample() {
340
+ clear();
341
+
307
342
  if (!ruby_native_thread_p()) {
308
- clear();
309
343
  return;
310
344
  }
311
345
 
312
346
  if (rb_during_gc()) {
313
347
  gc = true;
314
- len = 0;
315
348
  } else {
316
- gc = false;
317
349
  len = rb_profile_frames(0, MAX_LEN, frames, lines);
318
350
  }
319
351
  }
@@ -602,12 +634,13 @@ class Marker {
602
634
  Phase phase;
603
635
  TimeStamp timestamp;
604
636
  TimeStamp finish;
605
- native_thread_id_t thread_id;
637
+ // VALUE ruby_thread_id;
638
+ //native_thread_id_t thread_id;
606
639
  int stack_index = -1;
607
640
 
608
641
  VALUE to_array() {
609
642
  VALUE record[6] = {0};
610
- record[0] = ULL2NUM(thread_id);
643
+ record[0] = Qnil; // FIXME
611
644
  record[1] = INT2NUM(type);
612
645
  record[2] = INT2NUM(phase);
613
646
  record[3] = ULL2NUM(timestamp.nanoseconds());
@@ -625,30 +658,33 @@ class Marker {
625
658
  };
626
659
 
627
660
  class MarkerTable {
628
- TimeStamp last_gc_entry;
629
-
630
661
  public:
631
662
  std::vector<Marker> list;
632
663
  std::mutex mutex;
633
664
 
634
- void record_gc_entered() {
635
- last_gc_entry = TimeStamp::Now();
636
- }
637
-
638
- void record_gc_leave() {
639
- list.push_back({ Marker::MARKER_GC_PAUSE, Marker::INTERVAL, last_gc_entry, TimeStamp::Now(), get_native_thread_id(), -1 });
640
- }
641
-
642
665
  void record_interval(Marker::Type type, TimeStamp from, TimeStamp to, int stack_index = -1) {
643
666
  const std::lock_guard<std::mutex> lock(mutex);
644
667
 
645
- list.push_back({ type, Marker::INTERVAL, from, to, get_native_thread_id(), stack_index });
668
+ list.push_back({ type, Marker::INTERVAL, from, to, stack_index });
646
669
  }
647
670
 
648
671
  void record(Marker::Type type, int stack_index = -1) {
649
672
  const std::lock_guard<std::mutex> lock(mutex);
650
673
 
651
- list.push_back({ type, Marker::INSTANT, TimeStamp::Now(), TimeStamp(), get_native_thread_id(), stack_index });
674
+ list.push_back({ type, Marker::INSTANT, TimeStamp::Now(), TimeStamp(), stack_index });
675
+ }
676
+ };
677
+
678
+ class GCMarkerTable: public MarkerTable {
679
+ TimeStamp last_gc_entry;
680
+
681
+ public:
682
+ void record_gc_entered() {
683
+ last_gc_entry = TimeStamp::Now();
684
+ }
685
+
686
+ void record_gc_leave() {
687
+ list.push_back({ Marker::MARKER_GC_PAUSE, Marker::INTERVAL, last_gc_entry, TimeStamp::Now(), -1 });
652
688
  }
653
689
  };
654
690
 
@@ -731,6 +767,8 @@ class Thread {
731
767
  STOPPED
732
768
  };
733
769
 
770
+ VALUE ruby_thread;
771
+ VALUE ruby_thread_id;
734
772
  pthread_t pthread_id;
735
773
  native_thread_id_t native_tid;
736
774
  State state;
@@ -742,18 +780,33 @@ class Thread {
742
780
  int stack_on_suspend_idx;
743
781
  SampleTranslator translator;
744
782
 
745
- std::string name;
783
+ MarkerTable *markers;
784
+
785
+ std::string name;
746
786
 
747
- Thread(State state) : state(state), stack_on_suspend_idx(-1) {
748
- pthread_id = pthread_self();
787
+ // FIXME: don't use pthread at start
788
+ Thread(State state, pthread_t pthread_id, VALUE ruby_thread) : pthread_id(pthread_id), ruby_thread(ruby_thread), state(state), stack_on_suspend_idx(-1) {
789
+ name = Qnil;
790
+ ruby_thread_id = rb_obj_id(ruby_thread);
791
+ //ruby_thread_id = ULL2NUM(ruby_thread);
749
792
  native_tid = get_native_thread_id();
750
793
  started_at = state_changed_at = TimeStamp::Now();
794
+ name = "";
795
+ markers = new MarkerTable();
796
+
797
+ if (state == State::STARTED) {
798
+ markers->record(Marker::Type::MARKER_GVL_THREAD_STARTED);
799
+ }
751
800
  }
752
801
 
753
- void set_state(State new_state, MarkerTable *markers) {
802
+ void set_state(State new_state) {
754
803
  if (state == Thread::State::STOPPED) {
755
804
  return;
756
805
  }
806
+ if (new_state == Thread::State::SUSPENDED && state == new_state) {
807
+ // on Ruby 3.2 (only?) we may see duplicate suspended states
808
+ return;
809
+ }
757
810
 
758
811
  TimeStamp from = state_changed_at;
759
812
  auto now = TimeStamp::Now();
@@ -764,10 +817,13 @@ class Thread {
764
817
 
765
818
  switch (new_state) {
766
819
  case State::STARTED:
767
- new_state = State::RUNNING;
820
+ markers->record(Marker::Type::MARKER_GVL_THREAD_STARTED);
821
+ return; // no mutation of current state
768
822
  break;
769
823
  case State::RUNNING:
770
- assert(state == State::READY);
824
+ assert(state == State::READY || state == State::RUNNING);
825
+ pthread_id = pthread_self();
826
+ native_tid = get_native_thread_id();
771
827
 
772
828
  // If the GVL is immediately ready, and we measure no times
773
829
  // stalled, skip emitting the interval.
@@ -783,25 +839,26 @@ class Thread {
783
839
  // Threads can be preempted, which means they will have been in "Running"
784
840
  // state, and then the VM was like "no I need to stop you from working,
785
841
  // so I'll put you in the 'ready' (or stalled) state"
786
- assert(state == State::SUSPENDED || state == State::RUNNING);
842
+ assert(state == State::STARTED || state == State::SUSPENDED || state == State::RUNNING);
787
843
  if (state == State::SUSPENDED) {
788
844
  markers->record_interval(Marker::Type::MARKER_THREAD_SUSPENDED, from, now, stack_on_suspend_idx);
789
845
  }
790
- else {
846
+ else if (state == State::RUNNING) {
791
847
  markers->record_interval(Marker::Type::MARKER_THREAD_RUNNING, from, now);
792
848
  }
793
849
  break;
794
850
  case State::SUSPENDED:
795
851
  // We can go from RUNNING or STARTED to SUSPENDED
796
- assert(state == State::RUNNING || state == State::STARTED);
852
+ assert(state == State::RUNNING || state == State::STARTED || state == State::SUSPENDED);
797
853
  markers->record_interval(Marker::Type::MARKER_THREAD_RUNNING, from, now);
798
854
  break;
799
855
  case State::STOPPED:
800
- // We can go from RUNNING or STARTED to STOPPED
801
- assert(state == State::RUNNING || state == State::STARTED);
856
+ // We can go from RUNNING or STARTED or SUSPENDED to STOPPED
857
+ assert(state == State::RUNNING || state == State::STARTED || state == State::SUSPENDED);
802
858
  markers->record_interval(Marker::Type::MARKER_THREAD_RUNNING, from, now);
859
+ markers->record(Marker::Type::MARKER_GVL_THREAD_EXITED);
860
+
803
861
  stopped_at = now;
804
- capture_name();
805
862
 
806
863
  break;
807
864
  }
@@ -814,11 +871,7 @@ class Thread {
814
871
  return state != State::STOPPED;
815
872
  }
816
873
 
817
- void capture_name() {
818
- char buf[128];
819
- int rc = pthread_getname_np(pthread_id, buf, sizeof(buf));
820
- if (rc == 0)
821
- name = std::string(buf);
874
+ void mark() {
822
875
  }
823
876
  };
824
877
 
@@ -832,40 +885,46 @@ class ThreadTable {
832
885
  ThreadTable(FrameList &frame_list) : frame_list(frame_list) {
833
886
  }
834
887
 
835
- void started(MarkerTable *markers) {
836
- //const std::lock_guard<std::mutex> lock(mutex);
888
+ void mark() {
889
+ for (auto &thread : list) {
890
+ thread.mark();
891
+ }
892
+ }
837
893
 
894
+ void started(VALUE th) {
838
895
  //list.push_back(Thread{pthread_self(), Thread::State::SUSPENDED});
839
- markers->record(Marker::Type::MARKER_GVL_THREAD_STARTED);
840
- set_state(Thread::State::STARTED, markers);
896
+ set_state(Thread::State::STARTED, th);
841
897
  }
842
898
 
843
- void ready(MarkerTable *markers) {
844
- set_state(Thread::State::READY, markers);
899
+ void ready(VALUE th) {
900
+ set_state(Thread::State::READY, th);
845
901
  }
846
902
 
847
- void resumed(MarkerTable *markers) {
848
- set_state(Thread::State::RUNNING, markers);
903
+ void resumed(VALUE th) {
904
+ set_state(Thread::State::RUNNING, th);
849
905
  }
850
906
 
851
- void suspended(MarkerTable *markers) {
852
- set_state(Thread::State::SUSPENDED, markers);
907
+ void suspended(VALUE th) {
908
+ set_state(Thread::State::SUSPENDED, th);
853
909
  }
854
910
 
855
- void stopped(MarkerTable *markers) {
856
- markers->record(Marker::Type::MARKER_GVL_THREAD_EXITED);
857
- set_state(Thread::State::STOPPED, markers);
911
+ void stopped(VALUE th) {
912
+ set_state(Thread::State::STOPPED, th);
858
913
  }
859
914
 
860
915
  private:
861
- void set_state(Thread::State new_state, MarkerTable *markers) {
916
+ void set_state(Thread::State new_state, VALUE th) {
862
917
  const std::lock_guard<std::mutex> lock(mutex);
863
918
 
864
- pthread_t current_thread = pthread_self();
865
919
  //cerr << "set state=" << new_state << " thread=" << gettid() << endl;
866
920
 
921
+ pid_t native_tid = get_native_thread_id();
922
+ pthread_t pthread_id = pthread_self();
923
+
924
+ //fprintf(stderr, "th %p (tid: %i) from %s to %s\n", (void *)th, native_tid, gvl_event_name(state), gvl_event_name(new_state));
925
+
867
926
  for (auto &thread : list) {
868
- if (pthread_equal(current_thread, thread.pthread_id)) {
927
+ if (thread_equal(th, thread.ruby_thread)) {
869
928
  if (new_state == Thread::State::SUSPENDED) {
870
929
 
871
930
  RawSample sample;
@@ -875,14 +934,27 @@ class ThreadTable {
875
934
  //cerr << gettid() << " suspended! Stack size:" << thread.stack_on_suspend.size() << endl;
876
935
  }
877
936
 
878
- thread.set_state(new_state, markers);
937
+ thread.set_state(new_state);
938
+
939
+ if (thread.state == Thread::State::RUNNING) {
940
+ thread.pthread_id = pthread_self();
941
+ thread.native_tid = get_native_thread_id();
942
+ } else {
943
+ thread.pthread_id = 0;
944
+ thread.native_tid = 0;
945
+ }
946
+
879
947
 
880
948
  return;
881
949
  }
882
950
  }
883
951
 
884
- pid_t native_tid = get_native_thread_id();
885
- list.emplace_back(new_state);
952
+ //fprintf(stderr, "NEW THREAD: th: %p, state: %i\n", th, new_state);
953
+ list.emplace_back(new_state, pthread_self(), th);
954
+ }
955
+
956
+ bool thread_equal(VALUE a, VALUE b) {
957
+ return a == b;
886
958
  }
887
959
  };
888
960
 
@@ -1002,6 +1074,12 @@ class RetainedCollector : public BaseCollector {
1002
1074
  void record(VALUE obj) {
1003
1075
  RawSample sample;
1004
1076
  sample.sample();
1077
+ if (sample.empty()) {
1078
+ // During thread allocation we allocate one object without a frame
1079
+ // (as of Ruby 3.3)
1080
+ // Ideally we'd allow empty samples to be represented
1081
+ return;
1082
+ }
1005
1083
  int stack_index = frame_list.stack_index(sample);
1006
1084
 
1007
1085
  object_list.push_back(obj);
@@ -1151,6 +1229,8 @@ class GlobalSignalHandler {
1151
1229
  void record_sample(LiveSample &sample, pthread_t pthread_id) {
1152
1230
  const std::lock_guard<std::mutex> lock(mutex);
1153
1231
 
1232
+ assert(pthread_id);
1233
+
1154
1234
  live_sample = &sample;
1155
1235
  if (pthread_kill(pthread_id, SIGPROF)) {
1156
1236
  rb_bug("pthread_kill failed");
@@ -1187,7 +1267,7 @@ class GlobalSignalHandler {
1187
1267
  LiveSample *GlobalSignalHandler::live_sample;
1188
1268
 
1189
1269
  class TimeCollector : public BaseCollector {
1190
- MarkerTable markers;
1270
+ GCMarkerTable gc_markers;
1191
1271
  ThreadTable threads;
1192
1272
 
1193
1273
  pthread_t sample_thread;
@@ -1216,10 +1296,22 @@ class TimeCollector : public BaseCollector {
1216
1296
  }
1217
1297
 
1218
1298
  VALUE get_markers() {
1219
- VALUE list = rb_ary_new2(this->markers.list.size());
1299
+ VALUE list = rb_ary_new();
1300
+ VALUE main_thread = rb_thread_main();
1301
+ VALUE main_thread_id = rb_obj_id(main_thread);
1302
+
1303
+ for (auto& marker: this->gc_markers.list) {
1304
+ VALUE ary = marker.to_array();
1220
1305
 
1221
- for (auto& marker: this->markers.list) {
1222
- rb_ary_push(list, marker.to_array());
1306
+ RARRAY_ASET(ary, 0, main_thread_id);
1307
+ rb_ary_push(list, ary);
1308
+ }
1309
+ for (auto &thread : threads.list) {
1310
+ for (auto& marker: thread.markers->list) {
1311
+ VALUE ary = marker.to_array();
1312
+ RARRAY_ASET(ary, 0, thread.ruby_thread_id);
1313
+ rb_ary_push(list, ary);
1314
+ }
1223
1315
  }
1224
1316
 
1225
1317
  return list;
@@ -1235,7 +1327,9 @@ class TimeCollector : public BaseCollector {
1235
1327
  threads.mutex.lock();
1236
1328
  for (auto &thread : threads.list) {
1237
1329
  //if (thread.state == Thread::State::RUNNING) {
1238
- if (thread.state == Thread::State::RUNNING || (thread.state == Thread::State::SUSPENDED && thread.stack_on_suspend_idx < 0)) {
1330
+ //if (thread.state == Thread::State::RUNNING || (thread.state == Thread::State::SUSPENDED && thread.stack_on_suspend_idx < 0)) {
1331
+ if (thread.state == Thread::State::RUNNING) {
1332
+ //fprintf(stderr, "sampling %p on tid:%i\n", thread.ruby_thread, thread.native_tid);
1239
1333
  GlobalSignalHandler::get_instance()->record_sample(sample, thread.pthread_id);
1240
1334
 
1241
1335
  if (sample.sample.gc) {
@@ -1271,6 +1365,13 @@ class TimeCollector : public BaseCollector {
1271
1365
  }
1272
1366
 
1273
1367
  static void *sample_thread_entry(void *arg) {
1368
+ #if HAVE_PTHREAD_SETNAME_NP
1369
+ #ifdef __APPLE__
1370
+ pthread_setname_np("Vernier profiler");
1371
+ #else
1372
+ pthread_setname_np(pthread_self(), "Vernier profiler");
1373
+ #endif
1374
+ #endif
1274
1375
  TimeCollector *collector = static_cast<TimeCollector *>(arg);
1275
1376
  collector->sample_thread_run();
1276
1377
  return NULL;
@@ -1281,10 +1382,10 @@ class TimeCollector : public BaseCollector {
1281
1382
 
1282
1383
  switch (event) {
1283
1384
  case RUBY_EVENT_THREAD_BEGIN:
1284
- collector->threads.started(&collector->markers);
1385
+ collector->threads.started(self);
1285
1386
  break;
1286
1387
  case RUBY_EVENT_THREAD_END:
1287
- collector->threads.stopped(&collector->markers);
1388
+ collector->threads.stopped(self);
1288
1389
  break;
1289
1390
  }
1290
1391
  }
@@ -1294,36 +1395,57 @@ class TimeCollector : public BaseCollector {
1294
1395
 
1295
1396
  switch (event) {
1296
1397
  case RUBY_INTERNAL_EVENT_GC_START:
1297
- collector->markers.record(Marker::Type::MARKER_GC_START);
1398
+ collector->gc_markers.record(Marker::Type::MARKER_GC_START);
1298
1399
  break;
1299
1400
  case RUBY_INTERNAL_EVENT_GC_END_MARK:
1300
- collector->markers.record(Marker::Type::MARKER_GC_END_MARK);
1401
+ collector->gc_markers.record(Marker::Type::MARKER_GC_END_MARK);
1301
1402
  break;
1302
1403
  case RUBY_INTERNAL_EVENT_GC_END_SWEEP:
1303
- collector->markers.record(Marker::Type::MARKER_GC_END_SWEEP);
1404
+ collector->gc_markers.record(Marker::Type::MARKER_GC_END_SWEEP);
1304
1405
  break;
1305
1406
  case RUBY_INTERNAL_EVENT_GC_ENTER:
1306
- collector->markers.record_gc_entered();
1407
+ collector->gc_markers.record_gc_entered();
1307
1408
  break;
1308
1409
  case RUBY_INTERNAL_EVENT_GC_EXIT:
1309
- collector->markers.record_gc_leave();
1410
+ collector->gc_markers.record_gc_leave();
1310
1411
  break;
1311
1412
  }
1312
1413
  }
1313
1414
 
1314
1415
  static void internal_thread_event_cb(rb_event_flag_t event, const rb_internal_thread_event_data_t *event_data, void *data) {
1315
1416
  TimeCollector *collector = static_cast<TimeCollector *>(data);
1417
+ VALUE thread = Qnil;
1418
+
1419
+ #if HAVE_RB_INTERNAL_THREAD_EVENT_DATA_T_THREAD
1420
+ thread = event_data->thread;
1421
+ #else
1422
+ // We may arrive here when starting a thread with
1423
+ // RUBY_INTERNAL_THREAD_EVENT_READY before the thread is actually set up.
1424
+ if (!ruby_native_thread_p()) return;
1425
+
1426
+ thread = rb_thread_current();
1427
+ #endif
1428
+
1429
+ auto native_tid = get_native_thread_id();
1316
1430
  //cerr << "internal thread event" << event << " at " << TimeStamp::Now() << endl;
1431
+ //fprintf(stderr, "(%i) th %p to %s\n", native_tid, (void *)thread, gvl_event_name(event));
1432
+
1317
1433
 
1318
1434
  switch (event) {
1435
+ case RUBY_INTERNAL_THREAD_EVENT_STARTED:
1436
+ collector->threads.started(thread);
1437
+ break;
1438
+ case RUBY_INTERNAL_THREAD_EVENT_EXITED:
1439
+ collector->threads.stopped(thread);
1440
+ break;
1319
1441
  case RUBY_INTERNAL_THREAD_EVENT_READY:
1320
- collector->threads.ready(&collector->markers);
1442
+ collector->threads.ready(thread);
1321
1443
  break;
1322
1444
  case RUBY_INTERNAL_THREAD_EVENT_RESUMED:
1323
- collector->threads.resumed(&collector->markers);
1445
+ collector->threads.resumed(thread);
1324
1446
  break;
1325
1447
  case RUBY_INTERNAL_THREAD_EVENT_SUSPENDED:
1326
- collector->threads.suspended(&collector->markers);
1448
+ collector->threads.suspended(thread);
1327
1449
  break;
1328
1450
 
1329
1451
  }
@@ -1351,7 +1473,7 @@ class TimeCollector : public BaseCollector {
1351
1473
  // have at least one thread in our thread list because it's possible
1352
1474
  // that the profile might be such that we don't get any thread switch
1353
1475
  // events and we need at least one
1354
- this->threads.resumed(&this->markers);
1476
+ this->threads.resumed(rb_thread_current());
1355
1477
 
1356
1478
  thread_hook = rb_internal_thread_add_event_hook(internal_thread_event_cb, RUBY_INTERNAL_THREAD_EVENT_MASK, this);
1357
1479
  rb_add_event_hook(internal_gc_event_cb, RUBY_INTERNAL_EVENTS, PTR2NUM((void *)this));
@@ -1372,13 +1494,6 @@ class TimeCollector : public BaseCollector {
1372
1494
  rb_remove_event_hook(internal_gc_event_cb);
1373
1495
  rb_remove_event_hook(internal_thread_event_cb);
1374
1496
 
1375
- // capture thread names
1376
- for (auto& thread: this->threads.list) {
1377
- if (thread.running()) {
1378
- thread.capture_name();
1379
- }
1380
- }
1381
-
1382
1497
  frame_list.finalize();
1383
1498
 
1384
1499
  VALUE result = build_collector_result();
@@ -1398,7 +1513,7 @@ class TimeCollector : public BaseCollector {
1398
1513
  VALUE hash = rb_hash_new();
1399
1514
  thread.samples.write_result(hash);
1400
1515
 
1401
- rb_hash_aset(threads, ULL2NUM(thread.native_tid), hash);
1516
+ rb_hash_aset(threads, thread.ruby_thread_id, hash);
1402
1517
  rb_hash_aset(hash, sym("tid"), ULL2NUM(thread.native_tid));
1403
1518
  rb_hash_aset(hash, sym("started_at"), ULL2NUM(thread.started_at.nanoseconds()));
1404
1519
  if (!thread.stopped_at.zero()) {
@@ -1415,6 +1530,7 @@ class TimeCollector : public BaseCollector {
1415
1530
 
1416
1531
  void mark() {
1417
1532
  frame_list.mark_frames();
1533
+ threads.mark();
1418
1534
 
1419
1535
  //for (int i = 0; i < queued_length; i++) {
1420
1536
  // rb_gc_mark(queued_frames[i]);
@@ -19,7 +19,7 @@ module Vernier
19
19
  Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond)
20
20
  end
21
21
 
22
- def add_marker(name:, start:, finish:, thread: Thread.current.native_thread_id, phase: Marker::Phase::INTERVAL, data: nil)
22
+ def add_marker(name:, start:, finish:, thread: Thread.current.object_id, phase: Marker::Phase::INTERVAL, data: nil)
23
23
  @markers << [thread,
24
24
  name,
25
25
  start,
@@ -39,7 +39,7 @@ module Vernier
39
39
  start:,
40
40
  finish: current_time,
41
41
  phase: Marker::Phase::INTERVAL,
42
- thread: Thread.current.native_thread_id,
42
+ thread: Thread.current.object_id,
43
43
  data: { :type => 'UserTiming', :entryType => 'measure', :name => name }
44
44
  )
45
45
  end
@@ -99,9 +99,10 @@ module Vernier
99
99
  def data
100
100
  markers_by_thread = profile.markers.group_by { |marker| marker[0] }
101
101
 
102
- thread_data = profile.threads.map do |tid, thread_info|
103
- markers = markers_by_thread[tid] || []
102
+ thread_data = profile.threads.map do |ruby_thread_id, thread_info|
103
+ markers = markers_by_thread[ruby_thread_id] || []
104
104
  Thread.new(
105
+ ruby_thread_id,
105
106
  profile,
106
107
  @categorizer,
107
108
  markers: markers,
@@ -157,11 +158,12 @@ module Vernier
157
158
  class Thread
158
159
  attr_reader :profile
159
160
 
160
- def initialize(profile, categorizer, name:, tid:, samples:, weights:, timestamps: nil, sample_categories: nil, markers:, started_at:, stopped_at: nil)
161
+ def initialize(ruby_thread_id, profile, categorizer, name:, tid:, samples:, weights:, timestamps: nil, sample_categories: nil, markers:, started_at:, stopped_at: nil)
162
+ @ruby_thread_id = ruby_thread_id
161
163
  @profile = profile
162
164
  @categorizer = categorizer
163
165
  @tid = tid
164
- @name = name
166
+ @name = pretty_name(name)
165
167
 
166
168
  timestamps ||= [0] * samples.size
167
169
  @samples, @weights, @timestamps = samples, weights, timestamps
@@ -212,7 +214,7 @@ module Vernier
212
214
  def data
213
215
  {
214
216
  name: @name,
215
- isMainThread: (@tid == ::Thread.main.native_thread_id) || (profile.threads.size == 1),
217
+ isMainThread: @ruby_thread_id == ::Thread.main.object_id || (profile.threads.size == 1),
216
218
  processStartupTime: 0, # FIXME
217
219
  processShutdownTime: nil, # FIXME
218
220
  registerTime: (@started_at - 0) / 1_000_000.0,
@@ -383,6 +385,25 @@ module Vernier
383
385
 
384
386
  private
385
387
 
388
+ def pretty_name(name)
389
+ if name.empty?
390
+ begin
391
+ tr = ObjectSpace._id2ref(@ruby_thread_id)
392
+ name = tr.inspect if tr
393
+ rescue RangeError
394
+ # Thread was already GC'd
395
+ end
396
+ end
397
+ return name unless name.start_with?("#<Thread")
398
+ pretty = []
399
+ obj_address = name[/Thread:(0x\w+)/,1]
400
+ best_id = name[/\#<Thread:0x\w+@?\s?(.*)\s+\S+>/,1] || ""
401
+ Gem.path.each { |gem_dir| best_id = best_id.gsub(gem_dir, "...") }
402
+ pretty << best_id unless best_id.empty?
403
+ pretty << "(#{obj_address})"
404
+ pretty.join(' ')
405
+ end
406
+
386
407
  def gc_category
387
408
  @categorizer.get_category("GC")
388
409
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Vernier
4
- VERSION = "0.3.1"
4
+ VERSION = "0.5.0"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: vernier
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Hawthorn
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-12-05 00:00:00.000000000 Z
11
+ date: 2024-02-27 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: An experimental profiler
14
14
  email: