vernier 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 52f738e495accaaab1af3f33a806a7b20467aa8f2a93f4b7bf8d7300fdaf88ad
4
- data.tar.gz: e6877e7315fb990e8db9482b940a96068770987989fec85a804c75ce79b62022
3
+ metadata.gz: c3190b81748262d9620de74e12f6adca9b2c4126741781f9860076d96c96a123
4
+ data.tar.gz: b54848781f0b17c16074fd630d0aae6b6ea206e47679713ce858e6e3f08525a5
5
5
  SHA512:
6
- metadata.gz: 4dcf1c2ebcbfcfd1b5a456fbc9ffbfd82da1f96622b8dc889d9ece105a9f461768dd688b6e0ee73102db104bff527a9c940f6d74689d520005ea216823444cbe
7
- data.tar.gz: 97e0116ec7de5ee084512520debd37d0262555ca6a09d427b9147c3e08303492c925e8b1f81c9299915aff6322122a82e8737d1ef22823da8689aab2ce63aa6f
6
+ metadata.gz: de91010589471c0b4a7cfddb37bab92262392ef33da2af44930cdab848a6fd290468abe76c147485ba032d89cdac33b631937eea01bd15af8e89b03a5200e69e
7
+ data.tar.gz: 82b40e4d93685ab8c560a995df31421c06ff00f8c27f33d6b37296bed96ca5b37cacda3be60e64464f535dd52d273f80d4885705f591e7246d7b3fb2269c151f
data/README.md CHANGED
@@ -1,33 +1,55 @@
1
1
  # Vernier
2
2
 
3
- Experimental next-generation Ruby sampling profiler. Tracks multiple threads, GVL activity, GC pauses, idle time, and more.
3
+ Next-generation Ruby 3.2.1+ sampling profiler. Tracks multiple threads, GVL activity, GC pauses, idle time, and more.
4
+
5
+ ## Examples
6
+
7
+ [Livestreamed demo: Pairin' with Aaron (YouTube)](https://www.youtube.com/watch?v=9nvX3OHykGQ#t=27m43)
8
+
9
+ Sidekiq jobs from Mastodon (time, threded)
10
+ : https://share.firefox.dev/44jZRf3
11
+
12
+ Puma web requests from Mastodon (time, threded)
13
+ : https://share.firefox.dev/48FOTnF
14
+
15
+ Rails benchmark - lobste.rs (time)
16
+ : https://share.firefox.dev/3Ld89id
17
+
18
+ `require "irb"` (retained memory)
19
+ : https://share.firefox.dev/3DhLsFa
4
20
 
5
21
  ## Installation
6
22
 
23
+ Vernier requires Ruby version 3.2.1 or greater
24
+
7
25
  ```ruby
8
26
  gem 'vernier'
9
27
  ```
10
28
 
11
29
  ## Usage
12
30
 
13
- ### Retained memory
14
31
 
15
- Record a flamegraph of all **retained** allocations from loading `irb`.
32
+ ### Time
16
33
 
17
34
  ```
18
- ruby -r vernier -e 'Vernier.trace_retained(out: "irb_profile.json") { require "irb" }'
35
+ Vernier.trace(out: "time_profile.json") { some_slow_method }
19
36
  ```
20
37
 
21
- The output can then be viewed in the [Firefox Profiler (demo)](https://share.firefox.dev/3DhLsFa) or the [`profile-viewer` gem](https://github.com/tenderlove/profiler/tree/ruby) (a Ruby-customized version of the firefox profiler.
38
+ The output can then be viewed in the Firefox Profiler (demo) or the [`profile-viewer` gem](https://github.com/tenderlove/profiler/tree/ruby) (a Ruby-customized version of the firefox profiler.
22
39
 
23
- ![Screenshot 2023-07-16 at 21-06-19 Ruby_Vernier 1970-01-01 12 00 00 a m UTC Firefox Profiler](https://github.com/jhawthorn/vernier/assets/131752/9ca0b593-70fb-4c8b-aed9-cb33e0e0bc06)
40
+ - **Flame Graph**: Shows proportionally how much time is spent within particular stack frames. Frames are grouped together, which means that x-axis / left-to-right order is not meaningful.
41
+ - **Stack Chart**: Shows the stack at each sample with the x-axis representing time and can be read left-to-right.
24
42
 
25
- ### Time
43
+ ### Retained memory
44
+
45
+ Record a flamegraph of all **retained** allocations from loading `irb`.
26
46
 
27
47
  ```
28
- Vernier.trace(out: "time_profile.json") { some_slow_method }
48
+ ruby -r vernier -e 'Vernier.trace_retained(out: "irb_profile.json") { require "irb" }'
29
49
  ```
30
50
 
51
+ Retained-memory flamegraphs must be interpreted a little differently than a typical profiling flamegraph. In a retained-memory flamegraph, the x-axis represents a proportion of memory in bytes, _not time or samples_ The topmost boxes on the y-axis represent the retained objects, with their stacktrace below; their width represents the percentage of overall retained memory each object occupies.
52
+
31
53
  ## Development
32
54
 
33
55
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -5,4 +5,11 @@ require "mkmf"
5
5
  $CXXFLAGS += " -std=c++14 "
6
6
  $CXXFLAGS += " -ggdb3 -Og "
7
7
 
8
+ have_header("ruby/thread.h")
9
+ have_struct_member("rb_internal_thread_event_data_t", "thread", ["ruby/thread.h"])
10
+
11
+ have_func("rb_profile_thread_frames", "ruby/debug.h")
12
+
13
+ have_func("pthread_setname_np")
14
+
8
15
  create_makefile("vernier/vernier")
@@ -1,3 +1,5 @@
1
+ // vim: expandtab:ts=4:sw=4
2
+
1
3
  #include <iostream>
2
4
  #include <iomanip>
3
5
  #include <vector>
@@ -27,6 +29,9 @@
27
29
  #include "ruby/debug.h"
28
30
  #include "ruby/thread.h"
29
31
 
32
+ #undef assert
33
+ #define assert RUBY_ASSERT_ALWAYS
34
+
30
35
  # define PTR2NUM(x) (rb_int2inum((intptr_t)(void *)(x)))
31
36
 
32
37
  // Internal TracePoint events we'll monitor during profiling
@@ -53,6 +58,22 @@ static VALUE rb_cVernierResult;
53
58
  static VALUE rb_mVernierMarkerType;
54
59
  static VALUE rb_cVernierCollector;
55
60
 
61
+ static const char *gvl_event_name(rb_event_flag_t event) {
62
+ switch (event) {
63
+ case RUBY_INTERNAL_THREAD_EVENT_STARTED:
64
+ return "started";
65
+ case RUBY_INTERNAL_THREAD_EVENT_READY:
66
+ return "ready";
67
+ case RUBY_INTERNAL_THREAD_EVENT_RESUMED:
68
+ return "resumed";
69
+ case RUBY_INTERNAL_THREAD_EVENT_SUSPENDED:
70
+ return "suspended";
71
+ case RUBY_INTERNAL_THREAD_EVENT_EXITED:
72
+ return "exited";
73
+ }
74
+ return "no-event";
75
+ }
76
+
56
77
  class TimeStamp {
57
78
  static const uint64_t nanoseconds_per_second = 1000000000;
58
79
  uint64_t value_ns;
@@ -72,17 +93,29 @@ class TimeStamp {
72
93
  return TimeStamp(0);
73
94
  }
74
95
 
75
- static void Sleep(const TimeStamp &time) {
76
- struct timespec ts = time.timespec();
96
+ // SleepUntil a specified timestamp
97
+ // Highly accurate manual sleep time
98
+ static void SleepUntil(const TimeStamp &target_time) {
99
+ if (target_time.zero()) return;
100
+ struct timespec ts = target_time.timespec();
77
101
 
78
102
  int res;
79
103
  do {
80
- res = nanosleep(&ts, &ts);
81
- } while (res && errno == EINTR);
104
+ // do nothing until it's time :)
105
+ sleep(0);
106
+ } while (target_time > TimeStamp::Now());
107
+ }
108
+
109
+ static TimeStamp from_seconds(uint64_t s) {
110
+ return TimeStamp::from_milliseconds(s * 1000);
111
+ }
112
+
113
+ static TimeStamp from_milliseconds(uint64_t ms) {
114
+ return TimeStamp::from_microseconds(ms * 1000);
82
115
  }
83
116
 
84
117
  static TimeStamp from_microseconds(uint64_t us) {
85
- return TimeStamp(us * 1000);
118
+ return TimeStamp::from_nanoseconds(us * 1000);
86
119
  }
87
120
 
88
121
  static TimeStamp from_nanoseconds(uint64_t ns) {
@@ -262,6 +295,10 @@ class SamplerSemaphore {
262
295
  #ifdef __APPLE__
263
296
  dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER);
264
297
  #else
298
+ // Use sem_timedwait so that we get a crash instead of a deadlock for
299
+ // easier debugging
300
+ auto ts = (TimeStamp::Now() + TimeStamp::from_seconds(5)).timespec();
301
+
265
302
  int ret;
266
303
  do {
267
304
  ret = sem_wait(&sem);
@@ -300,16 +337,15 @@ struct RawSample {
300
337
  }
301
338
 
302
339
  void sample() {
340
+ clear();
341
+
303
342
  if (!ruby_native_thread_p()) {
304
- clear();
305
343
  return;
306
344
  }
307
345
 
308
346
  if (rb_during_gc()) {
309
347
  gc = true;
310
- len = 0;
311
348
  } else {
312
- gc = false;
313
349
  len = rb_profile_frames(0, MAX_LEN, frames, lines);
314
350
  }
315
351
  }
@@ -598,12 +634,13 @@ class Marker {
598
634
  Phase phase;
599
635
  TimeStamp timestamp;
600
636
  TimeStamp finish;
601
- native_thread_id_t thread_id;
637
+ // VALUE ruby_thread_id;
638
+ //native_thread_id_t thread_id;
602
639
  int stack_index = -1;
603
640
 
604
641
  VALUE to_array() {
605
642
  VALUE record[6] = {0};
606
- record[0] = ULL2NUM(thread_id);
643
+ record[0] = Qnil; // FIXME
607
644
  record[1] = INT2NUM(type);
608
645
  record[2] = INT2NUM(phase);
609
646
  record[3] = ULL2NUM(timestamp.nanoseconds());
@@ -621,30 +658,33 @@ class Marker {
621
658
  };
622
659
 
623
660
  class MarkerTable {
624
- TimeStamp last_gc_entry;
625
-
626
661
  public:
627
662
  std::vector<Marker> list;
628
663
  std::mutex mutex;
629
664
 
630
- void record_gc_entered() {
631
- last_gc_entry = TimeStamp::Now();
632
- }
633
-
634
- void record_gc_leave() {
635
- list.push_back({ Marker::MARKER_GC_PAUSE, Marker::INTERVAL, last_gc_entry, TimeStamp::Now(), get_native_thread_id(), -1 });
636
- }
637
-
638
665
  void record_interval(Marker::Type type, TimeStamp from, TimeStamp to, int stack_index = -1) {
639
666
  const std::lock_guard<std::mutex> lock(mutex);
640
667
 
641
- list.push_back({ type, Marker::INTERVAL, from, to, get_native_thread_id(), stack_index });
668
+ list.push_back({ type, Marker::INTERVAL, from, to, stack_index });
642
669
  }
643
670
 
644
671
  void record(Marker::Type type, int stack_index = -1) {
645
672
  const std::lock_guard<std::mutex> lock(mutex);
646
673
 
647
- list.push_back({ type, Marker::INSTANT, TimeStamp::Now(), TimeStamp(), get_native_thread_id(), stack_index });
674
+ list.push_back({ type, Marker::INSTANT, TimeStamp::Now(), TimeStamp(), stack_index });
675
+ }
676
+ };
677
+
678
+ class GCMarkerTable: public MarkerTable {
679
+ TimeStamp last_gc_entry;
680
+
681
+ public:
682
+ void record_gc_entered() {
683
+ last_gc_entry = TimeStamp::Now();
684
+ }
685
+
686
+ void record_gc_leave() {
687
+ list.push_back({ Marker::MARKER_GC_PAUSE, Marker::INTERVAL, last_gc_entry, TimeStamp::Now(), -1 });
648
688
  }
649
689
  };
650
690
 
@@ -727,6 +767,8 @@ class Thread {
727
767
  STOPPED
728
768
  };
729
769
 
770
+ VALUE ruby_thread;
771
+ VALUE ruby_thread_id;
730
772
  pthread_t pthread_id;
731
773
  native_thread_id_t native_tid;
732
774
  State state;
@@ -738,18 +780,33 @@ class Thread {
738
780
  int stack_on_suspend_idx;
739
781
  SampleTranslator translator;
740
782
 
741
- std::string name;
783
+ MarkerTable *markers;
784
+
785
+ std::string name;
742
786
 
743
- Thread(State state) : state(state), stack_on_suspend_idx(-1) {
744
- pthread_id = pthread_self();
787
+ // FIXME: don't use pthread at start
788
+ Thread(State state, pthread_t pthread_id, VALUE ruby_thread) : pthread_id(pthread_id), ruby_thread(ruby_thread), state(state), stack_on_suspend_idx(-1) {
789
+ name = Qnil;
790
+ ruby_thread_id = rb_obj_id(ruby_thread);
791
+ //ruby_thread_id = ULL2NUM(ruby_thread);
745
792
  native_tid = get_native_thread_id();
746
793
  started_at = state_changed_at = TimeStamp::Now();
794
+ name = "";
795
+ markers = new MarkerTable();
796
+
797
+ if (state == State::STARTED) {
798
+ markers->record(Marker::Type::MARKER_GVL_THREAD_STARTED);
799
+ }
747
800
  }
748
801
 
749
- void set_state(State new_state, MarkerTable *markers) {
802
+ void set_state(State new_state) {
750
803
  if (state == Thread::State::STOPPED) {
751
804
  return;
752
805
  }
806
+ if (new_state == Thread::State::SUSPENDED && state == new_state) {
807
+ // on Ruby 3.2 (only?) we may see duplicate suspended states
808
+ return;
809
+ }
753
810
 
754
811
  TimeStamp from = state_changed_at;
755
812
  auto now = TimeStamp::Now();
@@ -760,10 +817,13 @@ class Thread {
760
817
 
761
818
  switch (new_state) {
762
819
  case State::STARTED:
763
- new_state = State::RUNNING;
820
+ markers->record(Marker::Type::MARKER_GVL_THREAD_STARTED);
821
+ return; // no mutation of current state
764
822
  break;
765
823
  case State::RUNNING:
766
- assert(state == State::READY);
824
+ assert(state == State::READY || state == State::RUNNING);
825
+ pthread_id = pthread_self();
826
+ native_tid = get_native_thread_id();
767
827
 
768
828
  // If the GVL is immediately ready, and we measure no times
769
829
  // stalled, skip emitting the interval.
@@ -779,23 +839,25 @@ class Thread {
779
839
  // Threads can be preempted, which means they will have been in "Running"
780
840
  // state, and then the VM was like "no I need to stop you from working,
781
841
  // so I'll put you in the 'ready' (or stalled) state"
782
- assert(state == State::SUSPENDED || state == State::RUNNING);
842
+ assert(state == State::STARTED || state == State::SUSPENDED || state == State::RUNNING);
783
843
  if (state == State::SUSPENDED) {
784
844
  markers->record_interval(Marker::Type::MARKER_THREAD_SUSPENDED, from, now, stack_on_suspend_idx);
785
845
  }
786
- else {
846
+ else if (state == State::RUNNING) {
787
847
  markers->record_interval(Marker::Type::MARKER_THREAD_RUNNING, from, now);
788
848
  }
789
849
  break;
790
850
  case State::SUSPENDED:
791
851
  // We can go from RUNNING or STARTED to SUSPENDED
792
- assert(state == State::RUNNING || state == State::STARTED);
852
+ assert(state == State::RUNNING || state == State::STARTED || state == State::SUSPENDED);
793
853
  markers->record_interval(Marker::Type::MARKER_THREAD_RUNNING, from, now);
794
854
  break;
795
855
  case State::STOPPED:
796
856
  // We can go from RUNNING or STARTED to STOPPED
797
857
  assert(state == State::RUNNING || state == State::STARTED);
798
858
  markers->record_interval(Marker::Type::MARKER_THREAD_RUNNING, from, now);
859
+ markers->record(Marker::Type::MARKER_GVL_THREAD_EXITED);
860
+
799
861
  stopped_at = now;
800
862
  capture_name();
801
863
 
@@ -811,10 +873,13 @@ class Thread {
811
873
  }
812
874
 
813
875
  void capture_name() {
814
- char buf[128];
815
- int rc = pthread_getname_np(pthread_id, buf, sizeof(buf));
816
- if (rc == 0)
817
- name = std::string(buf);
876
+ //char buf[128];
877
+ //int rc = pthread_getname_np(pthread_id, buf, sizeof(buf));
878
+ //if (rc == 0)
879
+ // name = std::string(buf);
880
+ }
881
+
882
+ void mark() {
818
883
  }
819
884
  };
820
885
 
@@ -828,40 +893,46 @@ class ThreadTable {
828
893
  ThreadTable(FrameList &frame_list) : frame_list(frame_list) {
829
894
  }
830
895
 
831
- void started(MarkerTable *markers) {
832
- //const std::lock_guard<std::mutex> lock(mutex);
896
+ void mark() {
897
+ for (auto &thread : list) {
898
+ thread.mark();
899
+ }
900
+ }
833
901
 
902
+ void started(VALUE th) {
834
903
  //list.push_back(Thread{pthread_self(), Thread::State::SUSPENDED});
835
- markers->record(Marker::Type::MARKER_GVL_THREAD_STARTED);
836
- set_state(Thread::State::STARTED, markers);
904
+ set_state(Thread::State::STARTED, th);
837
905
  }
838
906
 
839
- void ready(MarkerTable *markers) {
840
- set_state(Thread::State::READY, markers);
907
+ void ready(VALUE th) {
908
+ set_state(Thread::State::READY, th);
841
909
  }
842
910
 
843
- void resumed(MarkerTable *markers) {
844
- set_state(Thread::State::RUNNING, markers);
911
+ void resumed(VALUE th) {
912
+ set_state(Thread::State::RUNNING, th);
845
913
  }
846
914
 
847
- void suspended(MarkerTable *markers) {
848
- set_state(Thread::State::SUSPENDED, markers);
915
+ void suspended(VALUE th) {
916
+ set_state(Thread::State::SUSPENDED, th);
849
917
  }
850
918
 
851
- void stopped(MarkerTable *markers) {
852
- markers->record(Marker::Type::MARKER_GVL_THREAD_EXITED);
853
- set_state(Thread::State::STOPPED, markers);
919
+ void stopped(VALUE th) {
920
+ set_state(Thread::State::STOPPED, th);
854
921
  }
855
922
 
856
923
  private:
857
- void set_state(Thread::State new_state, MarkerTable *markers) {
924
+ void set_state(Thread::State new_state, VALUE th) {
858
925
  const std::lock_guard<std::mutex> lock(mutex);
859
926
 
860
- pthread_t current_thread = pthread_self();
861
927
  //cerr << "set state=" << new_state << " thread=" << gettid() << endl;
862
928
 
929
+ pid_t native_tid = get_native_thread_id();
930
+ pthread_t pthread_id = pthread_self();
931
+
932
+ //fprintf(stderr, "th %p (tid: %i) from %s to %s\n", (void *)th, native_tid, gvl_event_name(state), gvl_event_name(new_state));
933
+
863
934
  for (auto &thread : list) {
864
- if (pthread_equal(current_thread, thread.pthread_id)) {
935
+ if (thread_equal(th, thread.ruby_thread)) {
865
936
  if (new_state == Thread::State::SUSPENDED) {
866
937
 
867
938
  RawSample sample;
@@ -871,14 +942,27 @@ class ThreadTable {
871
942
  //cerr << gettid() << " suspended! Stack size:" << thread.stack_on_suspend.size() << endl;
872
943
  }
873
944
 
874
- thread.set_state(new_state, markers);
945
+ thread.set_state(new_state);
946
+
947
+ if (thread.state == Thread::State::RUNNING) {
948
+ thread.pthread_id = pthread_self();
949
+ thread.native_tid = get_native_thread_id();
950
+ } else {
951
+ thread.pthread_id = 0;
952
+ thread.native_tid = 0;
953
+ }
954
+
875
955
 
876
956
  return;
877
957
  }
878
958
  }
879
959
 
880
- pid_t native_tid = get_native_thread_id();
881
- list.emplace_back(new_state);
960
+ //fprintf(stderr, "NEW THREAD: th: %p, state: %i\n", th, new_state);
961
+ list.emplace_back(new_state, pthread_self(), th);
962
+ }
963
+
964
+ bool thread_equal(VALUE a, VALUE b) {
965
+ return a == b;
882
966
  }
883
967
  };
884
968
 
@@ -1091,6 +1175,9 @@ class RetainedCollector : public BaseCollector {
1091
1175
  VALUE weights = rb_ary_new();
1092
1176
  rb_hash_aset(thread_hash, sym("weights"), weights);
1093
1177
 
1178
+ rb_hash_aset(thread_hash, sym("name"), rb_str_new_cstr("retained memory"));
1179
+ rb_hash_aset(thread_hash, sym("started_at"), ULL2NUM(collector->started_at.nanoseconds()));
1180
+
1094
1181
  for (auto& obj: collector->object_list) {
1095
1182
  const auto search = collector->object_frames.find(obj);
1096
1183
  if (search != collector->object_frames.end()) {
@@ -1144,6 +1231,8 @@ class GlobalSignalHandler {
1144
1231
  void record_sample(LiveSample &sample, pthread_t pthread_id) {
1145
1232
  const std::lock_guard<std::mutex> lock(mutex);
1146
1233
 
1234
+ assert(pthread_id);
1235
+
1147
1236
  live_sample = &sample;
1148
1237
  if (pthread_kill(pthread_id, SIGPROF)) {
1149
1238
  rb_bug("pthread_kill failed");
@@ -1180,7 +1269,7 @@ class GlobalSignalHandler {
1180
1269
  LiveSample *GlobalSignalHandler::live_sample;
1181
1270
 
1182
1271
  class TimeCollector : public BaseCollector {
1183
- MarkerTable markers;
1272
+ GCMarkerTable gc_markers;
1184
1273
  ThreadTable threads;
1185
1274
 
1186
1275
  pthread_t sample_thread;
@@ -1209,10 +1298,22 @@ class TimeCollector : public BaseCollector {
1209
1298
  }
1210
1299
 
1211
1300
  VALUE get_markers() {
1212
- VALUE list = rb_ary_new2(this->markers.list.size());
1301
+ VALUE list = rb_ary_new();
1302
+ VALUE main_thread = rb_thread_main();
1303
+ VALUE main_thread_id = rb_obj_id(main_thread);
1304
+
1305
+ for (auto& marker: this->gc_markers.list) {
1306
+ VALUE ary = marker.to_array();
1213
1307
 
1214
- for (auto& marker: this->markers.list) {
1215
- rb_ary_push(list, marker.to_array());
1308
+ RARRAY_ASET(ary, 0, main_thread_id);
1309
+ rb_ary_push(list, ary);
1310
+ }
1311
+ for (auto &thread : threads.list) {
1312
+ for (auto& marker: thread.markers->list) {
1313
+ VALUE ary = marker.to_array();
1314
+ RARRAY_ASET(ary, 0, thread.ruby_thread_id);
1315
+ rb_ary_push(list, ary);
1316
+ }
1216
1317
  }
1217
1318
 
1218
1319
  return list;
@@ -1228,7 +1329,9 @@ class TimeCollector : public BaseCollector {
1228
1329
  threads.mutex.lock();
1229
1330
  for (auto &thread : threads.list) {
1230
1331
  //if (thread.state == Thread::State::RUNNING) {
1231
- if (thread.state == Thread::State::RUNNING || (thread.state == Thread::State::SUSPENDED && thread.stack_on_suspend_idx < 0)) {
1332
+ //if (thread.state == Thread::State::RUNNING || (thread.state == Thread::State::SUSPENDED && thread.stack_on_suspend_idx < 0)) {
1333
+ if (thread.state == Thread::State::RUNNING) {
1334
+ //fprintf(stderr, "sampling %p on tid:%i\n", thread.ruby_thread, thread.native_tid);
1232
1335
  GlobalSignalHandler::get_instance()->record_sample(sample, thread.pthread_id);
1233
1336
 
1234
1337
  if (sample.sample.gc) {
@@ -1252,19 +1355,25 @@ class TimeCollector : public BaseCollector {
1252
1355
 
1253
1356
  next_sample_schedule += interval;
1254
1357
 
1358
+ // If sampling falls behind, restart, and check in another interval
1255
1359
  if (next_sample_schedule < sample_complete) {
1256
- //fprintf(stderr, "fell behind by %ius\n", (sample_complete - next_sample_schedule).microseconds());
1257
1360
  next_sample_schedule = sample_complete + interval;
1258
1361
  }
1259
1362
 
1260
- TimeStamp sleep_time = next_sample_schedule - sample_complete;
1261
- TimeStamp::Sleep(sleep_time);
1363
+ TimeStamp::SleepUntil(next_sample_schedule);
1262
1364
  }
1263
1365
 
1264
1366
  thread_stopped.post();
1265
1367
  }
1266
1368
 
1267
1369
  static void *sample_thread_entry(void *arg) {
1370
+ #if HAVE_PTHREAD_SETNAME_NP
1371
+ #ifdef __APPLE__
1372
+ pthread_setname_np("Vernier profiler");
1373
+ #else
1374
+ pthread_setname_np(pthread_self(), "Vernier profiler");
1375
+ #endif
1376
+ #endif
1268
1377
  TimeCollector *collector = static_cast<TimeCollector *>(arg);
1269
1378
  collector->sample_thread_run();
1270
1379
  return NULL;
@@ -1275,10 +1384,10 @@ class TimeCollector : public BaseCollector {
1275
1384
 
1276
1385
  switch (event) {
1277
1386
  case RUBY_EVENT_THREAD_BEGIN:
1278
- collector->threads.started(&collector->markers);
1387
+ collector->threads.started(self);
1279
1388
  break;
1280
1389
  case RUBY_EVENT_THREAD_END:
1281
- collector->threads.stopped(&collector->markers);
1390
+ collector->threads.stopped(self);
1282
1391
  break;
1283
1392
  }
1284
1393
  }
@@ -1288,36 +1397,57 @@ class TimeCollector : public BaseCollector {
1288
1397
 
1289
1398
  switch (event) {
1290
1399
  case RUBY_INTERNAL_EVENT_GC_START:
1291
- collector->markers.record(Marker::Type::MARKER_GC_START);
1400
+ collector->gc_markers.record(Marker::Type::MARKER_GC_START);
1292
1401
  break;
1293
1402
  case RUBY_INTERNAL_EVENT_GC_END_MARK:
1294
- collector->markers.record(Marker::Type::MARKER_GC_END_MARK);
1403
+ collector->gc_markers.record(Marker::Type::MARKER_GC_END_MARK);
1295
1404
  break;
1296
1405
  case RUBY_INTERNAL_EVENT_GC_END_SWEEP:
1297
- collector->markers.record(Marker::Type::MARKER_GC_END_SWEEP);
1406
+ collector->gc_markers.record(Marker::Type::MARKER_GC_END_SWEEP);
1298
1407
  break;
1299
1408
  case RUBY_INTERNAL_EVENT_GC_ENTER:
1300
- collector->markers.record_gc_entered();
1409
+ collector->gc_markers.record_gc_entered();
1301
1410
  break;
1302
1411
  case RUBY_INTERNAL_EVENT_GC_EXIT:
1303
- collector->markers.record_gc_leave();
1412
+ collector->gc_markers.record_gc_leave();
1304
1413
  break;
1305
1414
  }
1306
1415
  }
1307
1416
 
1308
1417
  static void internal_thread_event_cb(rb_event_flag_t event, const rb_internal_thread_event_data_t *event_data, void *data) {
1309
1418
  TimeCollector *collector = static_cast<TimeCollector *>(data);
1419
+ VALUE thread = Qnil;
1420
+
1421
+ #if HAVE_RB_INTERNAL_THREAD_EVENT_DATA_T_THREAD
1422
+ thread = event_data->thread;
1423
+ #else
1424
+ // We may arrive here when starting a thread with
1425
+ // RUBY_INTERNAL_THREAD_EVENT_READY before the thread is actually set up.
1426
+ if (!ruby_native_thread_p()) return;
1427
+
1428
+ thread = rb_thread_current();
1429
+ #endif
1430
+
1431
+ auto native_tid = get_native_thread_id();
1310
1432
  //cerr << "internal thread event" << event << " at " << TimeStamp::Now() << endl;
1433
+ //fprintf(stderr, "(%i) th %p to %s\n", native_tid, (void *)thread, gvl_event_name(event));
1434
+
1311
1435
 
1312
1436
  switch (event) {
1437
+ case RUBY_INTERNAL_THREAD_EVENT_STARTED:
1438
+ collector->threads.started(thread);
1439
+ break;
1440
+ case RUBY_INTERNAL_THREAD_EVENT_EXITED:
1441
+ collector->threads.stopped(thread);
1442
+ break;
1313
1443
  case RUBY_INTERNAL_THREAD_EVENT_READY:
1314
- collector->threads.ready(&collector->markers);
1444
+ collector->threads.ready(thread);
1315
1445
  break;
1316
1446
  case RUBY_INTERNAL_THREAD_EVENT_RESUMED:
1317
- collector->threads.resumed(&collector->markers);
1447
+ collector->threads.resumed(thread);
1318
1448
  break;
1319
1449
  case RUBY_INTERNAL_THREAD_EVENT_SUSPENDED:
1320
- collector->threads.suspended(&collector->markers);
1450
+ collector->threads.suspended(thread);
1321
1451
  break;
1322
1452
 
1323
1453
  }
@@ -1345,7 +1475,7 @@ class TimeCollector : public BaseCollector {
1345
1475
  // have at least one thread in our thread list because it's possible
1346
1476
  // that the profile might be such that we don't get any thread switch
1347
1477
  // events and we need at least one
1348
- this->threads.resumed(&this->markers);
1478
+ this->threads.resumed(rb_thread_current());
1349
1479
 
1350
1480
  thread_hook = rb_internal_thread_add_event_hook(internal_thread_event_cb, RUBY_INTERNAL_THREAD_EVENT_MASK, this);
1351
1481
  rb_add_event_hook(internal_gc_event_cb, RUBY_INTERNAL_EVENTS, PTR2NUM((void *)this));
@@ -1392,7 +1522,7 @@ class TimeCollector : public BaseCollector {
1392
1522
  VALUE hash = rb_hash_new();
1393
1523
  thread.samples.write_result(hash);
1394
1524
 
1395
- rb_hash_aset(threads, ULL2NUM(thread.native_tid), hash);
1525
+ rb_hash_aset(threads, thread.ruby_thread_id, hash);
1396
1526
  rb_hash_aset(hash, sym("tid"), ULL2NUM(thread.native_tid));
1397
1527
  rb_hash_aset(hash, sym("started_at"), ULL2NUM(thread.started_at.nanoseconds()));
1398
1528
  if (!thread.stopped_at.zero()) {
@@ -1409,6 +1539,7 @@ class TimeCollector : public BaseCollector {
1409
1539
 
1410
1540
  void mark() {
1411
1541
  frame_list.mark_frames();
1542
+ threads.mark();
1412
1543
 
1413
1544
  //for (int i = 0; i < queued_length; i++) {
1414
1545
  // rb_gc_mark(queued_frames[i]);
@@ -19,7 +19,7 @@ module Vernier
19
19
  Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond)
20
20
  end
21
21
 
22
- def add_marker(name:, start:, finish:, thread: Thread.current.native_thread_id, phase: Marker::Phase::INTERVAL, data: nil)
22
+ def add_marker(name:, start:, finish:, thread: Thread.current.object_id, phase: Marker::Phase::INTERVAL, data: nil)
23
23
  @markers << [thread,
24
24
  name,
25
25
  start,
@@ -39,7 +39,7 @@ module Vernier
39
39
  start:,
40
40
  finish: current_time,
41
41
  phase: Marker::Phase::INTERVAL,
42
- thread: Thread.current.native_thread_id,
42
+ thread: Thread.current.object_id,
43
43
  data: { :type => 'UserTiming', :entryType => 'measure', :name => name }
44
44
  )
45
45
  end
@@ -99,9 +99,10 @@ module Vernier
99
99
  def data
100
100
  markers_by_thread = profile.markers.group_by { |marker| marker[0] }
101
101
 
102
- thread_data = profile.threads.map do |tid, thread_info|
103
- markers = markers_by_thread[tid] || []
102
+ thread_data = profile.threads.map do |ruby_thread_id, thread_info|
103
+ markers = markers_by_thread[ruby_thread_id] || []
104
104
  Thread.new(
105
+ ruby_thread_id,
105
106
  profile,
106
107
  @categorizer,
107
108
  markers: markers,
@@ -157,14 +158,16 @@ module Vernier
157
158
  class Thread
158
159
  attr_reader :profile
159
160
 
160
- def initialize(profile, categorizer, name:, tid:, samples:, weights:, timestamps:, sample_categories:, markers:, started_at:, stopped_at: nil)
161
+ def initialize(ruby_thread_id, profile, categorizer, name:, tid:, samples:, weights:, timestamps: nil, sample_categories: nil, markers:, started_at:, stopped_at: nil)
162
+ @ruby_thread_id = ruby_thread_id
161
163
  @profile = profile
162
164
  @categorizer = categorizer
163
165
  @tid = tid
164
166
  @name = name
165
167
 
168
+ timestamps ||= [0] * samples.size
166
169
  @samples, @weights, @timestamps = samples, weights, timestamps
167
- @sample_categories = sample_categories
170
+ @sample_categories = sample_categories || ([0] * samples.size)
168
171
  @markers = markers
169
172
 
170
173
  @started_at, @stopped_at = started_at, stopped_at
@@ -211,7 +214,7 @@ module Vernier
211
214
  def data
212
215
  {
213
216
  name: @name,
214
- isMainThread: @tid == ::Thread.main.native_thread_id,
217
+ isMainThread: @ruby_thread_id == ::Thread.main.object_id || (profile.threads.size == 1),
215
218
  processStartupTime: 0, # FIXME
216
219
  processShutdownTime: nil, # FIXME
217
220
  registerTime: (@started_at - 0) / 1_000_000.0,
@@ -237,8 +240,6 @@ module Vernier
237
240
  end
238
241
 
239
242
  def markers_table
240
- size = @markers.size
241
-
242
243
  string_indexes = []
243
244
  start_times = []
244
245
  end_times = []
@@ -292,7 +293,6 @@ module Vernier
292
293
  times = (0...size).to_a
293
294
  end
294
295
 
295
- raise unless samples.size == size
296
296
  raise unless weights.size == size
297
297
  raise unless times.size == size
298
298
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Vernier
4
- VERSION = "0.3.0"
4
+ VERSION = "0.4.0"
5
5
  end
data/lib/vernier.rb CHANGED
@@ -19,11 +19,11 @@ module Vernier
19
19
  yield collector
20
20
  ensure
21
21
  result = collector.stop
22
+ if out
23
+ File.write(out, Output::Firefox.new(result).output)
24
+ end
22
25
  end
23
26
 
24
- if out
25
- File.write(out, Output::Firefox.new(result).output)
26
- end
27
27
  result
28
28
  end
29
29
 
data/vernier.gemspec CHANGED
@@ -12,7 +12,7 @@ Gem::Specification.new do |spec|
12
12
  spec.description = spec.summary
13
13
  spec.homepage = "https://github.com/jhawthorn/vernier"
14
14
  spec.license = "MIT"
15
- spec.required_ruby_version = ">= 3.2.0"
15
+ spec.required_ruby_version = ">= 3.2.1"
16
16
 
17
17
  spec.metadata["homepage_uri"] = spec.homepage
18
18
  spec.metadata["source_code_uri"] = spec.homepage
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: vernier
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Hawthorn
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-08-29 00:00:00.000000000 Z
11
+ date: 2024-01-15 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: An experimental profiler
14
14
  email:
@@ -60,7 +60,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
60
60
  requirements:
61
61
  - - ">="
62
62
  - !ruby/object:Gem::Version
63
- version: 3.2.0
63
+ version: 3.2.1
64
64
  required_rubygems_version: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - ">="