vernier 0.3.1 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6050bca74116d0e90f98025fe23d7fbd40c6107c085e3f768d606d4f418ebc60
4
- data.tar.gz: e302542d8b06852d28d0ec1e2528f23bba46d5ef9ea9a828c9dbcdb985a292a3
3
+ metadata.gz: f552d6fe2d529de743412cbc9975bfbd4ac87b894777a44ce26ef07dddc8e032
4
+ data.tar.gz: b9ddcd3e4ce0acb5ac53363e041d1bd9e3a2d4c5bf21b2bbe56c48ed5fef4cdd
5
5
  SHA512:
6
- metadata.gz: 9d20e5f9d9c894a253bc4aeb6b9da6cb35d916e881abedf996ce19757a9cd92efe43bcc76fda0aa4ce5e334be36582adf7c716a998c4c5e1f403dc57364fb5ab
7
- data.tar.gz: 7cf03df7bcb4f961b5b456eb781fd5818de7e18501baa01592ce17528f5eae7b272e90dcff200f861495ebb1a83c87f9146306db66ce20a9d56467ab87bce3c6
6
+ metadata.gz: 1b0808ee6ae8e64866e81e7ba8ed4847788421a00517c00e7aacb54c2fdff16287f92eb10132b5802fcb93955c3e4cf1a8fe4cfc97f4a9742a8130341bea75f7
7
+ data.tar.gz: 686a7397043be44451cccf9380473cda1e350ee342878ca2428d8bcd6c69aeea36244d15c92e3f998d4b6245c46c37d74ac3aac18ed58c783c288501d0cf7243
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Vernier
2
2
 
3
- Next-generation Ruby sampling profiler. Tracks multiple threads, GVL activity, GC pauses, idle time, and more.
3
+ Next-generation Ruby 3.2.1+ sampling profiler. Tracks multiple threads, GVL activity, GC pauses, idle time, and more.
4
4
 
5
5
  ## Examples
6
6
 
@@ -20,6 +20,8 @@ Rails benchmark - lobste.rs (time)
20
20
 
21
21
  ## Installation
22
22
 
23
+ Vernier requires Ruby version 3.2.1 or greater
24
+
23
25
  ```ruby
24
26
  gem 'vernier'
25
27
  ```
@@ -35,6 +37,9 @@ Vernier.trace(out: "time_profile.json") { some_slow_method }
35
37
 
36
38
  The output can then be viewed in the Firefox Profiler (demo) or the [`profile-viewer` gem](https://github.com/tenderlove/profiler/tree/ruby) (a Ruby-customized version of the firefox profiler.
37
39
 
40
+ - **Flame Graph**: Shows proportionally how much time is spent within particular stack frames. Frames are grouped together, which means that x-axis / left-to-right order is not meaningful.
41
+ - **Stack Chart**: Shows the stack at each sample with the x-axis representing time and can be read left-to-right.
42
+
38
43
  ### Retained memory
39
44
 
40
45
  Record a flamegraph of all **retained** allocations from loading `irb`.
@@ -43,6 +48,7 @@ Record a flamegraph of all **retained** allocations from loading `irb`.
43
48
  ruby -r vernier -e 'Vernier.trace_retained(out: "irb_profile.json") { require "irb" }'
44
49
  ```
45
50
 
51
+ Retained-memory flamegraphs must be interpreted a little differently than a typical profiling flamegraph. In a retained-memory flamegraph, the x-axis represents a proportion of memory in bytes, _not time or samples_ The topmost boxes on the y-axis represent the retained objects, with their stacktrace below; their width represents the percentage of overall retained memory each object occupies.
46
52
 
47
53
  ## Development
48
54
 
@@ -5,4 +5,11 @@ require "mkmf"
5
5
  $CXXFLAGS += " -std=c++14 "
6
6
  $CXXFLAGS += " -ggdb3 -Og "
7
7
 
8
+ have_header("ruby/thread.h")
9
+ have_struct_member("rb_internal_thread_event_data_t", "thread", ["ruby/thread.h"])
10
+
11
+ have_func("rb_profile_thread_frames", "ruby/debug.h")
12
+
13
+ have_func("pthread_setname_np")
14
+
8
15
  create_makefile("vernier/vernier")
@@ -1,3 +1,5 @@
1
+ // vim: expandtab:ts=4:sw=4
2
+
1
3
  #include <iostream>
2
4
  #include <iomanip>
3
5
  #include <vector>
@@ -27,6 +29,9 @@
27
29
  #include "ruby/debug.h"
28
30
  #include "ruby/thread.h"
29
31
 
32
+ #undef assert
33
+ #define assert RUBY_ASSERT_ALWAYS
34
+
30
35
  # define PTR2NUM(x) (rb_int2inum((intptr_t)(void *)(x)))
31
36
 
32
37
  // Internal TracePoint events we'll monitor during profiling
@@ -53,6 +58,22 @@ static VALUE rb_cVernierResult;
53
58
  static VALUE rb_mVernierMarkerType;
54
59
  static VALUE rb_cVernierCollector;
55
60
 
61
+ static const char *gvl_event_name(rb_event_flag_t event) {
62
+ switch (event) {
63
+ case RUBY_INTERNAL_THREAD_EVENT_STARTED:
64
+ return "started";
65
+ case RUBY_INTERNAL_THREAD_EVENT_READY:
66
+ return "ready";
67
+ case RUBY_INTERNAL_THREAD_EVENT_RESUMED:
68
+ return "resumed";
69
+ case RUBY_INTERNAL_THREAD_EVENT_SUSPENDED:
70
+ return "suspended";
71
+ case RUBY_INTERNAL_THREAD_EVENT_EXITED:
72
+ return "exited";
73
+ }
74
+ return "no-event";
75
+ }
76
+
56
77
  class TimeStamp {
57
78
  static const uint64_t nanoseconds_per_second = 1000000000;
58
79
  uint64_t value_ns;
@@ -85,8 +106,16 @@ class TimeStamp {
85
106
  } while (target_time > TimeStamp::Now());
86
107
  }
87
108
 
109
+ static TimeStamp from_seconds(uint64_t s) {
110
+ return TimeStamp::from_milliseconds(s * 1000);
111
+ }
112
+
113
+ static TimeStamp from_milliseconds(uint64_t ms) {
114
+ return TimeStamp::from_microseconds(ms * 1000);
115
+ }
116
+
88
117
  static TimeStamp from_microseconds(uint64_t us) {
89
- return TimeStamp(us * 1000);
118
+ return TimeStamp::from_nanoseconds(us * 1000);
90
119
  }
91
120
 
92
121
  static TimeStamp from_nanoseconds(uint64_t ns) {
@@ -266,6 +295,10 @@ class SamplerSemaphore {
266
295
  #ifdef __APPLE__
267
296
  dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER);
268
297
  #else
298
+ // Use sem_timedwait so that we get a crash instead of a deadlock for
299
+ // easier debugging
300
+ auto ts = (TimeStamp::Now() + TimeStamp::from_seconds(5)).timespec();
301
+
269
302
  int ret;
270
303
  do {
271
304
  ret = sem_wait(&sem);
@@ -304,16 +337,15 @@ struct RawSample {
304
337
  }
305
338
 
306
339
  void sample() {
340
+ clear();
341
+
307
342
  if (!ruby_native_thread_p()) {
308
- clear();
309
343
  return;
310
344
  }
311
345
 
312
346
  if (rb_during_gc()) {
313
347
  gc = true;
314
- len = 0;
315
348
  } else {
316
- gc = false;
317
349
  len = rb_profile_frames(0, MAX_LEN, frames, lines);
318
350
  }
319
351
  }
@@ -602,12 +634,13 @@ class Marker {
602
634
  Phase phase;
603
635
  TimeStamp timestamp;
604
636
  TimeStamp finish;
605
- native_thread_id_t thread_id;
637
+ // VALUE ruby_thread_id;
638
+ //native_thread_id_t thread_id;
606
639
  int stack_index = -1;
607
640
 
608
641
  VALUE to_array() {
609
642
  VALUE record[6] = {0};
610
- record[0] = ULL2NUM(thread_id);
643
+ record[0] = Qnil; // FIXME
611
644
  record[1] = INT2NUM(type);
612
645
  record[2] = INT2NUM(phase);
613
646
  record[3] = ULL2NUM(timestamp.nanoseconds());
@@ -625,30 +658,33 @@ class Marker {
625
658
  };
626
659
 
627
660
  class MarkerTable {
628
- TimeStamp last_gc_entry;
629
-
630
661
  public:
631
662
  std::vector<Marker> list;
632
663
  std::mutex mutex;
633
664
 
634
- void record_gc_entered() {
635
- last_gc_entry = TimeStamp::Now();
636
- }
637
-
638
- void record_gc_leave() {
639
- list.push_back({ Marker::MARKER_GC_PAUSE, Marker::INTERVAL, last_gc_entry, TimeStamp::Now(), get_native_thread_id(), -1 });
640
- }
641
-
642
665
  void record_interval(Marker::Type type, TimeStamp from, TimeStamp to, int stack_index = -1) {
643
666
  const std::lock_guard<std::mutex> lock(mutex);
644
667
 
645
- list.push_back({ type, Marker::INTERVAL, from, to, get_native_thread_id(), stack_index });
668
+ list.push_back({ type, Marker::INTERVAL, from, to, stack_index });
646
669
  }
647
670
 
648
671
  void record(Marker::Type type, int stack_index = -1) {
649
672
  const std::lock_guard<std::mutex> lock(mutex);
650
673
 
651
- list.push_back({ type, Marker::INSTANT, TimeStamp::Now(), TimeStamp(), get_native_thread_id(), stack_index });
674
+ list.push_back({ type, Marker::INSTANT, TimeStamp::Now(), TimeStamp(), stack_index });
675
+ }
676
+ };
677
+
678
+ class GCMarkerTable: public MarkerTable {
679
+ TimeStamp last_gc_entry;
680
+
681
+ public:
682
+ void record_gc_entered() {
683
+ last_gc_entry = TimeStamp::Now();
684
+ }
685
+
686
+ void record_gc_leave() {
687
+ list.push_back({ Marker::MARKER_GC_PAUSE, Marker::INTERVAL, last_gc_entry, TimeStamp::Now(), -1 });
652
688
  }
653
689
  };
654
690
 
@@ -731,6 +767,8 @@ class Thread {
731
767
  STOPPED
732
768
  };
733
769
 
770
+ VALUE ruby_thread;
771
+ VALUE ruby_thread_id;
734
772
  pthread_t pthread_id;
735
773
  native_thread_id_t native_tid;
736
774
  State state;
@@ -742,18 +780,33 @@ class Thread {
742
780
  int stack_on_suspend_idx;
743
781
  SampleTranslator translator;
744
782
 
745
- std::string name;
783
+ MarkerTable *markers;
784
+
785
+ std::string name;
746
786
 
747
- Thread(State state) : state(state), stack_on_suspend_idx(-1) {
748
- pthread_id = pthread_self();
787
+ // FIXME: don't use pthread at start
788
+ Thread(State state, pthread_t pthread_id, VALUE ruby_thread) : pthread_id(pthread_id), ruby_thread(ruby_thread), state(state), stack_on_suspend_idx(-1) {
789
+ name = Qnil;
790
+ ruby_thread_id = rb_obj_id(ruby_thread);
791
+ //ruby_thread_id = ULL2NUM(ruby_thread);
749
792
  native_tid = get_native_thread_id();
750
793
  started_at = state_changed_at = TimeStamp::Now();
794
+ name = "";
795
+ markers = new MarkerTable();
796
+
797
+ if (state == State::STARTED) {
798
+ markers->record(Marker::Type::MARKER_GVL_THREAD_STARTED);
799
+ }
751
800
  }
752
801
 
753
- void set_state(State new_state, MarkerTable *markers) {
802
+ void set_state(State new_state) {
754
803
  if (state == Thread::State::STOPPED) {
755
804
  return;
756
805
  }
806
+ if (new_state == Thread::State::SUSPENDED && state == new_state) {
807
+ // on Ruby 3.2 (only?) we may see duplicate suspended states
808
+ return;
809
+ }
757
810
 
758
811
  TimeStamp from = state_changed_at;
759
812
  auto now = TimeStamp::Now();
@@ -764,10 +817,13 @@ class Thread {
764
817
 
765
818
  switch (new_state) {
766
819
  case State::STARTED:
767
- new_state = State::RUNNING;
820
+ markers->record(Marker::Type::MARKER_GVL_THREAD_STARTED);
821
+ return; // no mutation of current state
768
822
  break;
769
823
  case State::RUNNING:
770
- assert(state == State::READY);
824
+ assert(state == State::READY || state == State::RUNNING);
825
+ pthread_id = pthread_self();
826
+ native_tid = get_native_thread_id();
771
827
 
772
828
  // If the GVL is immediately ready, and we measure no times
773
829
  // stalled, skip emitting the interval.
@@ -783,25 +839,26 @@ class Thread {
783
839
  // Threads can be preempted, which means they will have been in "Running"
784
840
  // state, and then the VM was like "no I need to stop you from working,
785
841
  // so I'll put you in the 'ready' (or stalled) state"
786
- assert(state == State::SUSPENDED || state == State::RUNNING);
842
+ assert(state == State::STARTED || state == State::SUSPENDED || state == State::RUNNING);
787
843
  if (state == State::SUSPENDED) {
788
844
  markers->record_interval(Marker::Type::MARKER_THREAD_SUSPENDED, from, now, stack_on_suspend_idx);
789
845
  }
790
- else {
846
+ else if (state == State::RUNNING) {
791
847
  markers->record_interval(Marker::Type::MARKER_THREAD_RUNNING, from, now);
792
848
  }
793
849
  break;
794
850
  case State::SUSPENDED:
795
851
  // We can go from RUNNING or STARTED to SUSPENDED
796
- assert(state == State::RUNNING || state == State::STARTED);
852
+ assert(state == State::RUNNING || state == State::STARTED || state == State::SUSPENDED);
797
853
  markers->record_interval(Marker::Type::MARKER_THREAD_RUNNING, from, now);
798
854
  break;
799
855
  case State::STOPPED:
800
- // We can go from RUNNING or STARTED to STOPPED
801
- assert(state == State::RUNNING || state == State::STARTED);
856
+ // We can go from RUNNING or STARTED or SUSPENDED to STOPPED
857
+ assert(state == State::RUNNING || state == State::STARTED || state == State::SUSPENDED);
802
858
  markers->record_interval(Marker::Type::MARKER_THREAD_RUNNING, from, now);
859
+ markers->record(Marker::Type::MARKER_GVL_THREAD_EXITED);
860
+
803
861
  stopped_at = now;
804
- capture_name();
805
862
 
806
863
  break;
807
864
  }
@@ -814,11 +871,7 @@ class Thread {
814
871
  return state != State::STOPPED;
815
872
  }
816
873
 
817
- void capture_name() {
818
- char buf[128];
819
- int rc = pthread_getname_np(pthread_id, buf, sizeof(buf));
820
- if (rc == 0)
821
- name = std::string(buf);
874
+ void mark() {
822
875
  }
823
876
  };
824
877
 
@@ -832,40 +885,46 @@ class ThreadTable {
832
885
  ThreadTable(FrameList &frame_list) : frame_list(frame_list) {
833
886
  }
834
887
 
835
- void started(MarkerTable *markers) {
836
- //const std::lock_guard<std::mutex> lock(mutex);
888
+ void mark() {
889
+ for (auto &thread : list) {
890
+ thread.mark();
891
+ }
892
+ }
837
893
 
894
+ void started(VALUE th) {
838
895
  //list.push_back(Thread{pthread_self(), Thread::State::SUSPENDED});
839
- markers->record(Marker::Type::MARKER_GVL_THREAD_STARTED);
840
- set_state(Thread::State::STARTED, markers);
896
+ set_state(Thread::State::STARTED, th);
841
897
  }
842
898
 
843
- void ready(MarkerTable *markers) {
844
- set_state(Thread::State::READY, markers);
899
+ void ready(VALUE th) {
900
+ set_state(Thread::State::READY, th);
845
901
  }
846
902
 
847
- void resumed(MarkerTable *markers) {
848
- set_state(Thread::State::RUNNING, markers);
903
+ void resumed(VALUE th) {
904
+ set_state(Thread::State::RUNNING, th);
849
905
  }
850
906
 
851
- void suspended(MarkerTable *markers) {
852
- set_state(Thread::State::SUSPENDED, markers);
907
+ void suspended(VALUE th) {
908
+ set_state(Thread::State::SUSPENDED, th);
853
909
  }
854
910
 
855
- void stopped(MarkerTable *markers) {
856
- markers->record(Marker::Type::MARKER_GVL_THREAD_EXITED);
857
- set_state(Thread::State::STOPPED, markers);
911
+ void stopped(VALUE th) {
912
+ set_state(Thread::State::STOPPED, th);
858
913
  }
859
914
 
860
915
  private:
861
- void set_state(Thread::State new_state, MarkerTable *markers) {
916
+ void set_state(Thread::State new_state, VALUE th) {
862
917
  const std::lock_guard<std::mutex> lock(mutex);
863
918
 
864
- pthread_t current_thread = pthread_self();
865
919
  //cerr << "set state=" << new_state << " thread=" << gettid() << endl;
866
920
 
921
+ pid_t native_tid = get_native_thread_id();
922
+ pthread_t pthread_id = pthread_self();
923
+
924
+ //fprintf(stderr, "th %p (tid: %i) from %s to %s\n", (void *)th, native_tid, gvl_event_name(state), gvl_event_name(new_state));
925
+
867
926
  for (auto &thread : list) {
868
- if (pthread_equal(current_thread, thread.pthread_id)) {
927
+ if (thread_equal(th, thread.ruby_thread)) {
869
928
  if (new_state == Thread::State::SUSPENDED) {
870
929
 
871
930
  RawSample sample;
@@ -875,14 +934,27 @@ class ThreadTable {
875
934
  //cerr << gettid() << " suspended! Stack size:" << thread.stack_on_suspend.size() << endl;
876
935
  }
877
936
 
878
- thread.set_state(new_state, markers);
937
+ thread.set_state(new_state);
938
+
939
+ if (thread.state == Thread::State::RUNNING) {
940
+ thread.pthread_id = pthread_self();
941
+ thread.native_tid = get_native_thread_id();
942
+ } else {
943
+ thread.pthread_id = 0;
944
+ thread.native_tid = 0;
945
+ }
946
+
879
947
 
880
948
  return;
881
949
  }
882
950
  }
883
951
 
884
- pid_t native_tid = get_native_thread_id();
885
- list.emplace_back(new_state);
952
+ //fprintf(stderr, "NEW THREAD: th: %p, state: %i\n", th, new_state);
953
+ list.emplace_back(new_state, pthread_self(), th);
954
+ }
955
+
956
+ bool thread_equal(VALUE a, VALUE b) {
957
+ return a == b;
886
958
  }
887
959
  };
888
960
 
@@ -1002,6 +1074,12 @@ class RetainedCollector : public BaseCollector {
1002
1074
  void record(VALUE obj) {
1003
1075
  RawSample sample;
1004
1076
  sample.sample();
1077
+ if (sample.empty()) {
1078
+ // During thread allocation we allocate one object without a frame
1079
+ // (as of Ruby 3.3)
1080
+ // Ideally we'd allow empty samples to be represented
1081
+ return;
1082
+ }
1005
1083
  int stack_index = frame_list.stack_index(sample);
1006
1084
 
1007
1085
  object_list.push_back(obj);
@@ -1151,6 +1229,8 @@ class GlobalSignalHandler {
1151
1229
  void record_sample(LiveSample &sample, pthread_t pthread_id) {
1152
1230
  const std::lock_guard<std::mutex> lock(mutex);
1153
1231
 
1232
+ assert(pthread_id);
1233
+
1154
1234
  live_sample = &sample;
1155
1235
  if (pthread_kill(pthread_id, SIGPROF)) {
1156
1236
  rb_bug("pthread_kill failed");
@@ -1187,7 +1267,7 @@ class GlobalSignalHandler {
1187
1267
  LiveSample *GlobalSignalHandler::live_sample;
1188
1268
 
1189
1269
  class TimeCollector : public BaseCollector {
1190
- MarkerTable markers;
1270
+ GCMarkerTable gc_markers;
1191
1271
  ThreadTable threads;
1192
1272
 
1193
1273
  pthread_t sample_thread;
@@ -1216,10 +1296,22 @@ class TimeCollector : public BaseCollector {
1216
1296
  }
1217
1297
 
1218
1298
  VALUE get_markers() {
1219
- VALUE list = rb_ary_new2(this->markers.list.size());
1299
+ VALUE list = rb_ary_new();
1300
+ VALUE main_thread = rb_thread_main();
1301
+ VALUE main_thread_id = rb_obj_id(main_thread);
1302
+
1303
+ for (auto& marker: this->gc_markers.list) {
1304
+ VALUE ary = marker.to_array();
1220
1305
 
1221
- for (auto& marker: this->markers.list) {
1222
- rb_ary_push(list, marker.to_array());
1306
+ RARRAY_ASET(ary, 0, main_thread_id);
1307
+ rb_ary_push(list, ary);
1308
+ }
1309
+ for (auto &thread : threads.list) {
1310
+ for (auto& marker: thread.markers->list) {
1311
+ VALUE ary = marker.to_array();
1312
+ RARRAY_ASET(ary, 0, thread.ruby_thread_id);
1313
+ rb_ary_push(list, ary);
1314
+ }
1223
1315
  }
1224
1316
 
1225
1317
  return list;
@@ -1235,7 +1327,9 @@ class TimeCollector : public BaseCollector {
1235
1327
  threads.mutex.lock();
1236
1328
  for (auto &thread : threads.list) {
1237
1329
  //if (thread.state == Thread::State::RUNNING) {
1238
- if (thread.state == Thread::State::RUNNING || (thread.state == Thread::State::SUSPENDED && thread.stack_on_suspend_idx < 0)) {
1330
+ //if (thread.state == Thread::State::RUNNING || (thread.state == Thread::State::SUSPENDED && thread.stack_on_suspend_idx < 0)) {
1331
+ if (thread.state == Thread::State::RUNNING) {
1332
+ //fprintf(stderr, "sampling %p on tid:%i\n", thread.ruby_thread, thread.native_tid);
1239
1333
  GlobalSignalHandler::get_instance()->record_sample(sample, thread.pthread_id);
1240
1334
 
1241
1335
  if (sample.sample.gc) {
@@ -1271,6 +1365,13 @@ class TimeCollector : public BaseCollector {
1271
1365
  }
1272
1366
 
1273
1367
  static void *sample_thread_entry(void *arg) {
1368
+ #if HAVE_PTHREAD_SETNAME_NP
1369
+ #ifdef __APPLE__
1370
+ pthread_setname_np("Vernier profiler");
1371
+ #else
1372
+ pthread_setname_np(pthread_self(), "Vernier profiler");
1373
+ #endif
1374
+ #endif
1274
1375
  TimeCollector *collector = static_cast<TimeCollector *>(arg);
1275
1376
  collector->sample_thread_run();
1276
1377
  return NULL;
@@ -1281,10 +1382,10 @@ class TimeCollector : public BaseCollector {
1281
1382
 
1282
1383
  switch (event) {
1283
1384
  case RUBY_EVENT_THREAD_BEGIN:
1284
- collector->threads.started(&collector->markers);
1385
+ collector->threads.started(self);
1285
1386
  break;
1286
1387
  case RUBY_EVENT_THREAD_END:
1287
- collector->threads.stopped(&collector->markers);
1388
+ collector->threads.stopped(self);
1288
1389
  break;
1289
1390
  }
1290
1391
  }
@@ -1294,36 +1395,57 @@ class TimeCollector : public BaseCollector {
1294
1395
 
1295
1396
  switch (event) {
1296
1397
  case RUBY_INTERNAL_EVENT_GC_START:
1297
- collector->markers.record(Marker::Type::MARKER_GC_START);
1398
+ collector->gc_markers.record(Marker::Type::MARKER_GC_START);
1298
1399
  break;
1299
1400
  case RUBY_INTERNAL_EVENT_GC_END_MARK:
1300
- collector->markers.record(Marker::Type::MARKER_GC_END_MARK);
1401
+ collector->gc_markers.record(Marker::Type::MARKER_GC_END_MARK);
1301
1402
  break;
1302
1403
  case RUBY_INTERNAL_EVENT_GC_END_SWEEP:
1303
- collector->markers.record(Marker::Type::MARKER_GC_END_SWEEP);
1404
+ collector->gc_markers.record(Marker::Type::MARKER_GC_END_SWEEP);
1304
1405
  break;
1305
1406
  case RUBY_INTERNAL_EVENT_GC_ENTER:
1306
- collector->markers.record_gc_entered();
1407
+ collector->gc_markers.record_gc_entered();
1307
1408
  break;
1308
1409
  case RUBY_INTERNAL_EVENT_GC_EXIT:
1309
- collector->markers.record_gc_leave();
1410
+ collector->gc_markers.record_gc_leave();
1310
1411
  break;
1311
1412
  }
1312
1413
  }
1313
1414
 
1314
1415
  static void internal_thread_event_cb(rb_event_flag_t event, const rb_internal_thread_event_data_t *event_data, void *data) {
1315
1416
  TimeCollector *collector = static_cast<TimeCollector *>(data);
1417
+ VALUE thread = Qnil;
1418
+
1419
+ #if HAVE_RB_INTERNAL_THREAD_EVENT_DATA_T_THREAD
1420
+ thread = event_data->thread;
1421
+ #else
1422
+ // We may arrive here when starting a thread with
1423
+ // RUBY_INTERNAL_THREAD_EVENT_READY before the thread is actually set up.
1424
+ if (!ruby_native_thread_p()) return;
1425
+
1426
+ thread = rb_thread_current();
1427
+ #endif
1428
+
1429
+ auto native_tid = get_native_thread_id();
1316
1430
  //cerr << "internal thread event" << event << " at " << TimeStamp::Now() << endl;
1431
+ //fprintf(stderr, "(%i) th %p to %s\n", native_tid, (void *)thread, gvl_event_name(event));
1432
+
1317
1433
 
1318
1434
  switch (event) {
1435
+ case RUBY_INTERNAL_THREAD_EVENT_STARTED:
1436
+ collector->threads.started(thread);
1437
+ break;
1438
+ case RUBY_INTERNAL_THREAD_EVENT_EXITED:
1439
+ collector->threads.stopped(thread);
1440
+ break;
1319
1441
  case RUBY_INTERNAL_THREAD_EVENT_READY:
1320
- collector->threads.ready(&collector->markers);
1442
+ collector->threads.ready(thread);
1321
1443
  break;
1322
1444
  case RUBY_INTERNAL_THREAD_EVENT_RESUMED:
1323
- collector->threads.resumed(&collector->markers);
1445
+ collector->threads.resumed(thread);
1324
1446
  break;
1325
1447
  case RUBY_INTERNAL_THREAD_EVENT_SUSPENDED:
1326
- collector->threads.suspended(&collector->markers);
1448
+ collector->threads.suspended(thread);
1327
1449
  break;
1328
1450
 
1329
1451
  }
@@ -1351,7 +1473,7 @@ class TimeCollector : public BaseCollector {
1351
1473
  // have at least one thread in our thread list because it's possible
1352
1474
  // that the profile might be such that we don't get any thread switch
1353
1475
  // events and we need at least one
1354
- this->threads.resumed(&this->markers);
1476
+ this->threads.resumed(rb_thread_current());
1355
1477
 
1356
1478
  thread_hook = rb_internal_thread_add_event_hook(internal_thread_event_cb, RUBY_INTERNAL_THREAD_EVENT_MASK, this);
1357
1479
  rb_add_event_hook(internal_gc_event_cb, RUBY_INTERNAL_EVENTS, PTR2NUM((void *)this));
@@ -1372,13 +1494,6 @@ class TimeCollector : public BaseCollector {
1372
1494
  rb_remove_event_hook(internal_gc_event_cb);
1373
1495
  rb_remove_event_hook(internal_thread_event_cb);
1374
1496
 
1375
- // capture thread names
1376
- for (auto& thread: this->threads.list) {
1377
- if (thread.running()) {
1378
- thread.capture_name();
1379
- }
1380
- }
1381
-
1382
1497
  frame_list.finalize();
1383
1498
 
1384
1499
  VALUE result = build_collector_result();
@@ -1398,7 +1513,7 @@ class TimeCollector : public BaseCollector {
1398
1513
  VALUE hash = rb_hash_new();
1399
1514
  thread.samples.write_result(hash);
1400
1515
 
1401
- rb_hash_aset(threads, ULL2NUM(thread.native_tid), hash);
1516
+ rb_hash_aset(threads, thread.ruby_thread_id, hash);
1402
1517
  rb_hash_aset(hash, sym("tid"), ULL2NUM(thread.native_tid));
1403
1518
  rb_hash_aset(hash, sym("started_at"), ULL2NUM(thread.started_at.nanoseconds()));
1404
1519
  if (!thread.stopped_at.zero()) {
@@ -1415,6 +1530,7 @@ class TimeCollector : public BaseCollector {
1415
1530
 
1416
1531
  void mark() {
1417
1532
  frame_list.mark_frames();
1533
+ threads.mark();
1418
1534
 
1419
1535
  //for (int i = 0; i < queued_length; i++) {
1420
1536
  // rb_gc_mark(queued_frames[i]);
@@ -19,7 +19,7 @@ module Vernier
19
19
  Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond)
20
20
  end
21
21
 
22
- def add_marker(name:, start:, finish:, thread: Thread.current.native_thread_id, phase: Marker::Phase::INTERVAL, data: nil)
22
+ def add_marker(name:, start:, finish:, thread: Thread.current.object_id, phase: Marker::Phase::INTERVAL, data: nil)
23
23
  @markers << [thread,
24
24
  name,
25
25
  start,
@@ -39,7 +39,7 @@ module Vernier
39
39
  start:,
40
40
  finish: current_time,
41
41
  phase: Marker::Phase::INTERVAL,
42
- thread: Thread.current.native_thread_id,
42
+ thread: Thread.current.object_id,
43
43
  data: { :type => 'UserTiming', :entryType => 'measure', :name => name }
44
44
  )
45
45
  end
@@ -99,9 +99,10 @@ module Vernier
99
99
  def data
100
100
  markers_by_thread = profile.markers.group_by { |marker| marker[0] }
101
101
 
102
- thread_data = profile.threads.map do |tid, thread_info|
103
- markers = markers_by_thread[tid] || []
102
+ thread_data = profile.threads.map do |ruby_thread_id, thread_info|
103
+ markers = markers_by_thread[ruby_thread_id] || []
104
104
  Thread.new(
105
+ ruby_thread_id,
105
106
  profile,
106
107
  @categorizer,
107
108
  markers: markers,
@@ -157,11 +158,12 @@ module Vernier
157
158
  class Thread
158
159
  attr_reader :profile
159
160
 
160
- def initialize(profile, categorizer, name:, tid:, samples:, weights:, timestamps: nil, sample_categories: nil, markers:, started_at:, stopped_at: nil)
161
+ def initialize(ruby_thread_id, profile, categorizer, name:, tid:, samples:, weights:, timestamps: nil, sample_categories: nil, markers:, started_at:, stopped_at: nil)
162
+ @ruby_thread_id = ruby_thread_id
161
163
  @profile = profile
162
164
  @categorizer = categorizer
163
165
  @tid = tid
164
- @name = name
166
+ @name = pretty_name(name)
165
167
 
166
168
  timestamps ||= [0] * samples.size
167
169
  @samples, @weights, @timestamps = samples, weights, timestamps
@@ -212,7 +214,7 @@ module Vernier
212
214
  def data
213
215
  {
214
216
  name: @name,
215
- isMainThread: (@tid == ::Thread.main.native_thread_id) || (profile.threads.size == 1),
217
+ isMainThread: @ruby_thread_id == ::Thread.main.object_id || (profile.threads.size == 1),
216
218
  processStartupTime: 0, # FIXME
217
219
  processShutdownTime: nil, # FIXME
218
220
  registerTime: (@started_at - 0) / 1_000_000.0,
@@ -383,6 +385,25 @@ module Vernier
383
385
 
384
386
  private
385
387
 
388
+ def pretty_name(name)
389
+ if name.empty?
390
+ begin
391
+ tr = ObjectSpace._id2ref(@ruby_thread_id)
392
+ name = tr.inspect if tr
393
+ rescue RangeError
394
+ # Thread was already GC'd
395
+ end
396
+ end
397
+ return name unless name.start_with?("#<Thread")
398
+ pretty = []
399
+ obj_address = name[/Thread:(0x\w+)/,1]
400
+ best_id = name[/\#<Thread:0x\w+@?\s?(.*)\s+\S+>/,1] || ""
401
+ Gem.path.each { |gem_dir| best_id = best_id.gsub(gem_dir, "...") }
402
+ pretty << best_id unless best_id.empty?
403
+ pretty << "(#{obj_address})"
404
+ pretty.join(' ')
405
+ end
406
+
386
407
  def gc_category
387
408
  @categorizer.get_category("GC")
388
409
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Vernier
4
- VERSION = "0.3.1"
4
+ VERSION = "0.5.0"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: vernier
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Hawthorn
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-12-05 00:00:00.000000000 Z
11
+ date: 2024-02-27 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: An experimental profiler
14
14
  email: