vernier 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 52f738e495accaaab1af3f33a806a7b20467aa8f2a93f4b7bf8d7300fdaf88ad
4
- data.tar.gz: e6877e7315fb990e8db9482b940a96068770987989fec85a804c75ce79b62022
3
+ metadata.gz: c3190b81748262d9620de74e12f6adca9b2c4126741781f9860076d96c96a123
4
+ data.tar.gz: b54848781f0b17c16074fd630d0aae6b6ea206e47679713ce858e6e3f08525a5
5
5
  SHA512:
6
- metadata.gz: 4dcf1c2ebcbfcfd1b5a456fbc9ffbfd82da1f96622b8dc889d9ece105a9f461768dd688b6e0ee73102db104bff527a9c940f6d74689d520005ea216823444cbe
7
- data.tar.gz: 97e0116ec7de5ee084512520debd37d0262555ca6a09d427b9147c3e08303492c925e8b1f81c9299915aff6322122a82e8737d1ef22823da8689aab2ce63aa6f
6
+ metadata.gz: de91010589471c0b4a7cfddb37bab92262392ef33da2af44930cdab848a6fd290468abe76c147485ba032d89cdac33b631937eea01bd15af8e89b03a5200e69e
7
+ data.tar.gz: 82b40e4d93685ab8c560a995df31421c06ff00f8c27f33d6b37296bed96ca5b37cacda3be60e64464f535dd52d273f80d4885705f591e7246d7b3fb2269c151f
data/README.md CHANGED
@@ -1,33 +1,55 @@
1
1
  # Vernier
2
2
 
3
- Experimental next-generation Ruby sampling profiler. Tracks multiple threads, GVL activity, GC pauses, idle time, and more.
3
+ Next-generation Ruby 3.2.1+ sampling profiler. Tracks multiple threads, GVL activity, GC pauses, idle time, and more.
4
+
5
+ ## Examples
6
+
7
+ [Livestreamed demo: Pairin' with Aaron (YouTube)](https://www.youtube.com/watch?v=9nvX3OHykGQ#t=27m43)
8
+
9
+ Sidekiq jobs from Mastodon (time, threded)
10
+ : https://share.firefox.dev/44jZRf3
11
+
12
+ Puma web requests from Mastodon (time, threded)
13
+ : https://share.firefox.dev/48FOTnF
14
+
15
+ Rails benchmark - lobste.rs (time)
16
+ : https://share.firefox.dev/3Ld89id
17
+
18
+ `require "irb"` (retained memory)
19
+ : https://share.firefox.dev/3DhLsFa
4
20
 
5
21
  ## Installation
6
22
 
23
+ Vernier requires Ruby version 3.2.1 or greater
24
+
7
25
  ```ruby
8
26
  gem 'vernier'
9
27
  ```
10
28
 
11
29
  ## Usage
12
30
 
13
- ### Retained memory
14
31
 
15
- Record a flamegraph of all **retained** allocations from loading `irb`.
32
+ ### Time
16
33
 
17
34
  ```
18
- ruby -r vernier -e 'Vernier.trace_retained(out: "irb_profile.json") { require "irb" }'
35
+ Vernier.trace(out: "time_profile.json") { some_slow_method }
19
36
  ```
20
37
 
21
- The output can then be viewed in the [Firefox Profiler (demo)](https://share.firefox.dev/3DhLsFa) or the [`profile-viewer` gem](https://github.com/tenderlove/profiler/tree/ruby) (a Ruby-customized version of the firefox profiler.
38
+ The output can then be viewed in the Firefox Profiler (demo) or the [`profile-viewer` gem](https://github.com/tenderlove/profiler/tree/ruby) (a Ruby-customized version of the firefox profiler.
22
39
 
23
- ![Screenshot 2023-07-16 at 21-06-19 Ruby_Vernier 1970-01-01 12 00 00 a m UTC Firefox Profiler](https://github.com/jhawthorn/vernier/assets/131752/9ca0b593-70fb-4c8b-aed9-cb33e0e0bc06)
40
+ - **Flame Graph**: Shows proportionally how much time is spent within particular stack frames. Frames are grouped together, which means that x-axis / left-to-right order is not meaningful.
41
+ - **Stack Chart**: Shows the stack at each sample with the x-axis representing time and can be read left-to-right.
24
42
 
25
- ### Time
43
+ ### Retained memory
44
+
45
+ Record a flamegraph of all **retained** allocations from loading `irb`.
26
46
 
27
47
  ```
28
- Vernier.trace(out: "time_profile.json") { some_slow_method }
48
+ ruby -r vernier -e 'Vernier.trace_retained(out: "irb_profile.json") { require "irb" }'
29
49
  ```
30
50
 
51
+ Retained-memory flamegraphs must be interpreted a little differently than a typical profiling flamegraph. In a retained-memory flamegraph, the x-axis represents a proportion of memory in bytes, _not time or samples_ The topmost boxes on the y-axis represent the retained objects, with their stacktrace below; their width represents the percentage of overall retained memory each object occupies.
52
+
31
53
  ## Development
32
54
 
33
55
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -5,4 +5,11 @@ require "mkmf"
5
5
  $CXXFLAGS += " -std=c++14 "
6
6
  $CXXFLAGS += " -ggdb3 -Og "
7
7
 
8
+ have_header("ruby/thread.h")
9
+ have_struct_member("rb_internal_thread_event_data_t", "thread", ["ruby/thread.h"])
10
+
11
+ have_func("rb_profile_thread_frames", "ruby/debug.h")
12
+
13
+ have_func("pthread_setname_np")
14
+
8
15
  create_makefile("vernier/vernier")
@@ -1,3 +1,5 @@
1
+ // vim: expandtab:ts=4:sw=4
2
+
1
3
  #include <iostream>
2
4
  #include <iomanip>
3
5
  #include <vector>
@@ -27,6 +29,9 @@
27
29
  #include "ruby/debug.h"
28
30
  #include "ruby/thread.h"
29
31
 
32
+ #undef assert
33
+ #define assert RUBY_ASSERT_ALWAYS
34
+
30
35
  # define PTR2NUM(x) (rb_int2inum((intptr_t)(void *)(x)))
31
36
 
32
37
  // Internal TracePoint events we'll monitor during profiling
@@ -53,6 +58,22 @@ static VALUE rb_cVernierResult;
53
58
  static VALUE rb_mVernierMarkerType;
54
59
  static VALUE rb_cVernierCollector;
55
60
 
61
+ static const char *gvl_event_name(rb_event_flag_t event) {
62
+ switch (event) {
63
+ case RUBY_INTERNAL_THREAD_EVENT_STARTED:
64
+ return "started";
65
+ case RUBY_INTERNAL_THREAD_EVENT_READY:
66
+ return "ready";
67
+ case RUBY_INTERNAL_THREAD_EVENT_RESUMED:
68
+ return "resumed";
69
+ case RUBY_INTERNAL_THREAD_EVENT_SUSPENDED:
70
+ return "suspended";
71
+ case RUBY_INTERNAL_THREAD_EVENT_EXITED:
72
+ return "exited";
73
+ }
74
+ return "no-event";
75
+ }
76
+
56
77
  class TimeStamp {
57
78
  static const uint64_t nanoseconds_per_second = 1000000000;
58
79
  uint64_t value_ns;
@@ -72,17 +93,29 @@ class TimeStamp {
72
93
  return TimeStamp(0);
73
94
  }
74
95
 
75
- static void Sleep(const TimeStamp &time) {
76
- struct timespec ts = time.timespec();
96
+ // SleepUntil a specified timestamp
97
+ // Highly accurate manual sleep time
98
+ static void SleepUntil(const TimeStamp &target_time) {
99
+ if (target_time.zero()) return;
100
+ struct timespec ts = target_time.timespec();
77
101
 
78
102
  int res;
79
103
  do {
80
- res = nanosleep(&ts, &ts);
81
- } while (res && errno == EINTR);
104
+ // do nothing until it's time :)
105
+ sleep(0);
106
+ } while (target_time > TimeStamp::Now());
107
+ }
108
+
109
+ static TimeStamp from_seconds(uint64_t s) {
110
+ return TimeStamp::from_milliseconds(s * 1000);
111
+ }
112
+
113
+ static TimeStamp from_milliseconds(uint64_t ms) {
114
+ return TimeStamp::from_microseconds(ms * 1000);
82
115
  }
83
116
 
84
117
  static TimeStamp from_microseconds(uint64_t us) {
85
- return TimeStamp(us * 1000);
118
+ return TimeStamp::from_nanoseconds(us * 1000);
86
119
  }
87
120
 
88
121
  static TimeStamp from_nanoseconds(uint64_t ns) {
@@ -262,6 +295,10 @@ class SamplerSemaphore {
262
295
  #ifdef __APPLE__
263
296
  dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER);
264
297
  #else
298
+ // Use sem_timedwait so that we get a crash instead of a deadlock for
299
+ // easier debugging
300
+ auto ts = (TimeStamp::Now() + TimeStamp::from_seconds(5)).timespec();
301
+
265
302
  int ret;
266
303
  do {
267
304
  ret = sem_wait(&sem);
@@ -300,16 +337,15 @@ struct RawSample {
300
337
  }
301
338
 
302
339
  void sample() {
340
+ clear();
341
+
303
342
  if (!ruby_native_thread_p()) {
304
- clear();
305
343
  return;
306
344
  }
307
345
 
308
346
  if (rb_during_gc()) {
309
347
  gc = true;
310
- len = 0;
311
348
  } else {
312
- gc = false;
313
349
  len = rb_profile_frames(0, MAX_LEN, frames, lines);
314
350
  }
315
351
  }
@@ -598,12 +634,13 @@ class Marker {
598
634
  Phase phase;
599
635
  TimeStamp timestamp;
600
636
  TimeStamp finish;
601
- native_thread_id_t thread_id;
637
+ // VALUE ruby_thread_id;
638
+ //native_thread_id_t thread_id;
602
639
  int stack_index = -1;
603
640
 
604
641
  VALUE to_array() {
605
642
  VALUE record[6] = {0};
606
- record[0] = ULL2NUM(thread_id);
643
+ record[0] = Qnil; // FIXME
607
644
  record[1] = INT2NUM(type);
608
645
  record[2] = INT2NUM(phase);
609
646
  record[3] = ULL2NUM(timestamp.nanoseconds());
@@ -621,30 +658,33 @@ class Marker {
621
658
  };
622
659
 
623
660
  class MarkerTable {
624
- TimeStamp last_gc_entry;
625
-
626
661
  public:
627
662
  std::vector<Marker> list;
628
663
  std::mutex mutex;
629
664
 
630
- void record_gc_entered() {
631
- last_gc_entry = TimeStamp::Now();
632
- }
633
-
634
- void record_gc_leave() {
635
- list.push_back({ Marker::MARKER_GC_PAUSE, Marker::INTERVAL, last_gc_entry, TimeStamp::Now(), get_native_thread_id(), -1 });
636
- }
637
-
638
665
  void record_interval(Marker::Type type, TimeStamp from, TimeStamp to, int stack_index = -1) {
639
666
  const std::lock_guard<std::mutex> lock(mutex);
640
667
 
641
- list.push_back({ type, Marker::INTERVAL, from, to, get_native_thread_id(), stack_index });
668
+ list.push_back({ type, Marker::INTERVAL, from, to, stack_index });
642
669
  }
643
670
 
644
671
  void record(Marker::Type type, int stack_index = -1) {
645
672
  const std::lock_guard<std::mutex> lock(mutex);
646
673
 
647
- list.push_back({ type, Marker::INSTANT, TimeStamp::Now(), TimeStamp(), get_native_thread_id(), stack_index });
674
+ list.push_back({ type, Marker::INSTANT, TimeStamp::Now(), TimeStamp(), stack_index });
675
+ }
676
+ };
677
+
678
+ class GCMarkerTable: public MarkerTable {
679
+ TimeStamp last_gc_entry;
680
+
681
+ public:
682
+ void record_gc_entered() {
683
+ last_gc_entry = TimeStamp::Now();
684
+ }
685
+
686
+ void record_gc_leave() {
687
+ list.push_back({ Marker::MARKER_GC_PAUSE, Marker::INTERVAL, last_gc_entry, TimeStamp::Now(), -1 });
648
688
  }
649
689
  };
650
690
 
@@ -727,6 +767,8 @@ class Thread {
727
767
  STOPPED
728
768
  };
729
769
 
770
+ VALUE ruby_thread;
771
+ VALUE ruby_thread_id;
730
772
  pthread_t pthread_id;
731
773
  native_thread_id_t native_tid;
732
774
  State state;
@@ -738,18 +780,33 @@ class Thread {
738
780
  int stack_on_suspend_idx;
739
781
  SampleTranslator translator;
740
782
 
741
- std::string name;
783
+ MarkerTable *markers;
784
+
785
+ std::string name;
742
786
 
743
- Thread(State state) : state(state), stack_on_suspend_idx(-1) {
744
- pthread_id = pthread_self();
787
+ // FIXME: don't use pthread at start
788
+ Thread(State state, pthread_t pthread_id, VALUE ruby_thread) : pthread_id(pthread_id), ruby_thread(ruby_thread), state(state), stack_on_suspend_idx(-1) {
789
+ name = Qnil;
790
+ ruby_thread_id = rb_obj_id(ruby_thread);
791
+ //ruby_thread_id = ULL2NUM(ruby_thread);
745
792
  native_tid = get_native_thread_id();
746
793
  started_at = state_changed_at = TimeStamp::Now();
794
+ name = "";
795
+ markers = new MarkerTable();
796
+
797
+ if (state == State::STARTED) {
798
+ markers->record(Marker::Type::MARKER_GVL_THREAD_STARTED);
799
+ }
747
800
  }
748
801
 
749
- void set_state(State new_state, MarkerTable *markers) {
802
+ void set_state(State new_state) {
750
803
  if (state == Thread::State::STOPPED) {
751
804
  return;
752
805
  }
806
+ if (new_state == Thread::State::SUSPENDED && state == new_state) {
807
+ // on Ruby 3.2 (only?) we may see duplicate suspended states
808
+ return;
809
+ }
753
810
 
754
811
  TimeStamp from = state_changed_at;
755
812
  auto now = TimeStamp::Now();
@@ -760,10 +817,13 @@ class Thread {
760
817
 
761
818
  switch (new_state) {
762
819
  case State::STARTED:
763
- new_state = State::RUNNING;
820
+ markers->record(Marker::Type::MARKER_GVL_THREAD_STARTED);
821
+ return; // no mutation of current state
764
822
  break;
765
823
  case State::RUNNING:
766
- assert(state == State::READY);
824
+ assert(state == State::READY || state == State::RUNNING);
825
+ pthread_id = pthread_self();
826
+ native_tid = get_native_thread_id();
767
827
 
768
828
  // If the GVL is immediately ready, and we measure no times
769
829
  // stalled, skip emitting the interval.
@@ -779,23 +839,25 @@ class Thread {
779
839
  // Threads can be preempted, which means they will have been in "Running"
780
840
  // state, and then the VM was like "no I need to stop you from working,
781
841
  // so I'll put you in the 'ready' (or stalled) state"
782
- assert(state == State::SUSPENDED || state == State::RUNNING);
842
+ assert(state == State::STARTED || state == State::SUSPENDED || state == State::RUNNING);
783
843
  if (state == State::SUSPENDED) {
784
844
  markers->record_interval(Marker::Type::MARKER_THREAD_SUSPENDED, from, now, stack_on_suspend_idx);
785
845
  }
786
- else {
846
+ else if (state == State::RUNNING) {
787
847
  markers->record_interval(Marker::Type::MARKER_THREAD_RUNNING, from, now);
788
848
  }
789
849
  break;
790
850
  case State::SUSPENDED:
791
851
  // We can go from RUNNING or STARTED to SUSPENDED
792
- assert(state == State::RUNNING || state == State::STARTED);
852
+ assert(state == State::RUNNING || state == State::STARTED || state == State::SUSPENDED);
793
853
  markers->record_interval(Marker::Type::MARKER_THREAD_RUNNING, from, now);
794
854
  break;
795
855
  case State::STOPPED:
796
856
  // We can go from RUNNING or STARTED to STOPPED
797
857
  assert(state == State::RUNNING || state == State::STARTED);
798
858
  markers->record_interval(Marker::Type::MARKER_THREAD_RUNNING, from, now);
859
+ markers->record(Marker::Type::MARKER_GVL_THREAD_EXITED);
860
+
799
861
  stopped_at = now;
800
862
  capture_name();
801
863
 
@@ -811,10 +873,13 @@ class Thread {
811
873
  }
812
874
 
813
875
  void capture_name() {
814
- char buf[128];
815
- int rc = pthread_getname_np(pthread_id, buf, sizeof(buf));
816
- if (rc == 0)
817
- name = std::string(buf);
876
+ //char buf[128];
877
+ //int rc = pthread_getname_np(pthread_id, buf, sizeof(buf));
878
+ //if (rc == 0)
879
+ // name = std::string(buf);
880
+ }
881
+
882
+ void mark() {
818
883
  }
819
884
  };
820
885
 
@@ -828,40 +893,46 @@ class ThreadTable {
828
893
  ThreadTable(FrameList &frame_list) : frame_list(frame_list) {
829
894
  }
830
895
 
831
- void started(MarkerTable *markers) {
832
- //const std::lock_guard<std::mutex> lock(mutex);
896
+ void mark() {
897
+ for (auto &thread : list) {
898
+ thread.mark();
899
+ }
900
+ }
833
901
 
902
+ void started(VALUE th) {
834
903
  //list.push_back(Thread{pthread_self(), Thread::State::SUSPENDED});
835
- markers->record(Marker::Type::MARKER_GVL_THREAD_STARTED);
836
- set_state(Thread::State::STARTED, markers);
904
+ set_state(Thread::State::STARTED, th);
837
905
  }
838
906
 
839
- void ready(MarkerTable *markers) {
840
- set_state(Thread::State::READY, markers);
907
+ void ready(VALUE th) {
908
+ set_state(Thread::State::READY, th);
841
909
  }
842
910
 
843
- void resumed(MarkerTable *markers) {
844
- set_state(Thread::State::RUNNING, markers);
911
+ void resumed(VALUE th) {
912
+ set_state(Thread::State::RUNNING, th);
845
913
  }
846
914
 
847
- void suspended(MarkerTable *markers) {
848
- set_state(Thread::State::SUSPENDED, markers);
915
+ void suspended(VALUE th) {
916
+ set_state(Thread::State::SUSPENDED, th);
849
917
  }
850
918
 
851
- void stopped(MarkerTable *markers) {
852
- markers->record(Marker::Type::MARKER_GVL_THREAD_EXITED);
853
- set_state(Thread::State::STOPPED, markers);
919
+ void stopped(VALUE th) {
920
+ set_state(Thread::State::STOPPED, th);
854
921
  }
855
922
 
856
923
  private:
857
- void set_state(Thread::State new_state, MarkerTable *markers) {
924
+ void set_state(Thread::State new_state, VALUE th) {
858
925
  const std::lock_guard<std::mutex> lock(mutex);
859
926
 
860
- pthread_t current_thread = pthread_self();
861
927
  //cerr << "set state=" << new_state << " thread=" << gettid() << endl;
862
928
 
929
+ pid_t native_tid = get_native_thread_id();
930
+ pthread_t pthread_id = pthread_self();
931
+
932
+ //fprintf(stderr, "th %p (tid: %i) from %s to %s\n", (void *)th, native_tid, gvl_event_name(state), gvl_event_name(new_state));
933
+
863
934
  for (auto &thread : list) {
864
- if (pthread_equal(current_thread, thread.pthread_id)) {
935
+ if (thread_equal(th, thread.ruby_thread)) {
865
936
  if (new_state == Thread::State::SUSPENDED) {
866
937
 
867
938
  RawSample sample;
@@ -871,14 +942,27 @@ class ThreadTable {
871
942
  //cerr << gettid() << " suspended! Stack size:" << thread.stack_on_suspend.size() << endl;
872
943
  }
873
944
 
874
- thread.set_state(new_state, markers);
945
+ thread.set_state(new_state);
946
+
947
+ if (thread.state == Thread::State::RUNNING) {
948
+ thread.pthread_id = pthread_self();
949
+ thread.native_tid = get_native_thread_id();
950
+ } else {
951
+ thread.pthread_id = 0;
952
+ thread.native_tid = 0;
953
+ }
954
+
875
955
 
876
956
  return;
877
957
  }
878
958
  }
879
959
 
880
- pid_t native_tid = get_native_thread_id();
881
- list.emplace_back(new_state);
960
+ //fprintf(stderr, "NEW THREAD: th: %p, state: %i\n", th, new_state);
961
+ list.emplace_back(new_state, pthread_self(), th);
962
+ }
963
+
964
+ bool thread_equal(VALUE a, VALUE b) {
965
+ return a == b;
882
966
  }
883
967
  };
884
968
 
@@ -1091,6 +1175,9 @@ class RetainedCollector : public BaseCollector {
1091
1175
  VALUE weights = rb_ary_new();
1092
1176
  rb_hash_aset(thread_hash, sym("weights"), weights);
1093
1177
 
1178
+ rb_hash_aset(thread_hash, sym("name"), rb_str_new_cstr("retained memory"));
1179
+ rb_hash_aset(thread_hash, sym("started_at"), ULL2NUM(collector->started_at.nanoseconds()));
1180
+
1094
1181
  for (auto& obj: collector->object_list) {
1095
1182
  const auto search = collector->object_frames.find(obj);
1096
1183
  if (search != collector->object_frames.end()) {
@@ -1144,6 +1231,8 @@ class GlobalSignalHandler {
1144
1231
  void record_sample(LiveSample &sample, pthread_t pthread_id) {
1145
1232
  const std::lock_guard<std::mutex> lock(mutex);
1146
1233
 
1234
+ assert(pthread_id);
1235
+
1147
1236
  live_sample = &sample;
1148
1237
  if (pthread_kill(pthread_id, SIGPROF)) {
1149
1238
  rb_bug("pthread_kill failed");
@@ -1180,7 +1269,7 @@ class GlobalSignalHandler {
1180
1269
  LiveSample *GlobalSignalHandler::live_sample;
1181
1270
 
1182
1271
  class TimeCollector : public BaseCollector {
1183
- MarkerTable markers;
1272
+ GCMarkerTable gc_markers;
1184
1273
  ThreadTable threads;
1185
1274
 
1186
1275
  pthread_t sample_thread;
@@ -1209,10 +1298,22 @@ class TimeCollector : public BaseCollector {
1209
1298
  }
1210
1299
 
1211
1300
  VALUE get_markers() {
1212
- VALUE list = rb_ary_new2(this->markers.list.size());
1301
+ VALUE list = rb_ary_new();
1302
+ VALUE main_thread = rb_thread_main();
1303
+ VALUE main_thread_id = rb_obj_id(main_thread);
1304
+
1305
+ for (auto& marker: this->gc_markers.list) {
1306
+ VALUE ary = marker.to_array();
1213
1307
 
1214
- for (auto& marker: this->markers.list) {
1215
- rb_ary_push(list, marker.to_array());
1308
+ RARRAY_ASET(ary, 0, main_thread_id);
1309
+ rb_ary_push(list, ary);
1310
+ }
1311
+ for (auto &thread : threads.list) {
1312
+ for (auto& marker: thread.markers->list) {
1313
+ VALUE ary = marker.to_array();
1314
+ RARRAY_ASET(ary, 0, thread.ruby_thread_id);
1315
+ rb_ary_push(list, ary);
1316
+ }
1216
1317
  }
1217
1318
 
1218
1319
  return list;
@@ -1228,7 +1329,9 @@ class TimeCollector : public BaseCollector {
1228
1329
  threads.mutex.lock();
1229
1330
  for (auto &thread : threads.list) {
1230
1331
  //if (thread.state == Thread::State::RUNNING) {
1231
- if (thread.state == Thread::State::RUNNING || (thread.state == Thread::State::SUSPENDED && thread.stack_on_suspend_idx < 0)) {
1332
+ //if (thread.state == Thread::State::RUNNING || (thread.state == Thread::State::SUSPENDED && thread.stack_on_suspend_idx < 0)) {
1333
+ if (thread.state == Thread::State::RUNNING) {
1334
+ //fprintf(stderr, "sampling %p on tid:%i\n", thread.ruby_thread, thread.native_tid);
1232
1335
  GlobalSignalHandler::get_instance()->record_sample(sample, thread.pthread_id);
1233
1336
 
1234
1337
  if (sample.sample.gc) {
@@ -1252,19 +1355,25 @@ class TimeCollector : public BaseCollector {
1252
1355
 
1253
1356
  next_sample_schedule += interval;
1254
1357
 
1358
+ // If sampling falls behind, restart, and check in another interval
1255
1359
  if (next_sample_schedule < sample_complete) {
1256
- //fprintf(stderr, "fell behind by %ius\n", (sample_complete - next_sample_schedule).microseconds());
1257
1360
  next_sample_schedule = sample_complete + interval;
1258
1361
  }
1259
1362
 
1260
- TimeStamp sleep_time = next_sample_schedule - sample_complete;
1261
- TimeStamp::Sleep(sleep_time);
1363
+ TimeStamp::SleepUntil(next_sample_schedule);
1262
1364
  }
1263
1365
 
1264
1366
  thread_stopped.post();
1265
1367
  }
1266
1368
 
1267
1369
  static void *sample_thread_entry(void *arg) {
1370
+ #if HAVE_PTHREAD_SETNAME_NP
1371
+ #ifdef __APPLE__
1372
+ pthread_setname_np("Vernier profiler");
1373
+ #else
1374
+ pthread_setname_np(pthread_self(), "Vernier profiler");
1375
+ #endif
1376
+ #endif
1268
1377
  TimeCollector *collector = static_cast<TimeCollector *>(arg);
1269
1378
  collector->sample_thread_run();
1270
1379
  return NULL;
@@ -1275,10 +1384,10 @@ class TimeCollector : public BaseCollector {
1275
1384
 
1276
1385
  switch (event) {
1277
1386
  case RUBY_EVENT_THREAD_BEGIN:
1278
- collector->threads.started(&collector->markers);
1387
+ collector->threads.started(self);
1279
1388
  break;
1280
1389
  case RUBY_EVENT_THREAD_END:
1281
- collector->threads.stopped(&collector->markers);
1390
+ collector->threads.stopped(self);
1282
1391
  break;
1283
1392
  }
1284
1393
  }
@@ -1288,36 +1397,57 @@ class TimeCollector : public BaseCollector {
1288
1397
 
1289
1398
  switch (event) {
1290
1399
  case RUBY_INTERNAL_EVENT_GC_START:
1291
- collector->markers.record(Marker::Type::MARKER_GC_START);
1400
+ collector->gc_markers.record(Marker::Type::MARKER_GC_START);
1292
1401
  break;
1293
1402
  case RUBY_INTERNAL_EVENT_GC_END_MARK:
1294
- collector->markers.record(Marker::Type::MARKER_GC_END_MARK);
1403
+ collector->gc_markers.record(Marker::Type::MARKER_GC_END_MARK);
1295
1404
  break;
1296
1405
  case RUBY_INTERNAL_EVENT_GC_END_SWEEP:
1297
- collector->markers.record(Marker::Type::MARKER_GC_END_SWEEP);
1406
+ collector->gc_markers.record(Marker::Type::MARKER_GC_END_SWEEP);
1298
1407
  break;
1299
1408
  case RUBY_INTERNAL_EVENT_GC_ENTER:
1300
- collector->markers.record_gc_entered();
1409
+ collector->gc_markers.record_gc_entered();
1301
1410
  break;
1302
1411
  case RUBY_INTERNAL_EVENT_GC_EXIT:
1303
- collector->markers.record_gc_leave();
1412
+ collector->gc_markers.record_gc_leave();
1304
1413
  break;
1305
1414
  }
1306
1415
  }
1307
1416
 
1308
1417
  static void internal_thread_event_cb(rb_event_flag_t event, const rb_internal_thread_event_data_t *event_data, void *data) {
1309
1418
  TimeCollector *collector = static_cast<TimeCollector *>(data);
1419
+ VALUE thread = Qnil;
1420
+
1421
+ #if HAVE_RB_INTERNAL_THREAD_EVENT_DATA_T_THREAD
1422
+ thread = event_data->thread;
1423
+ #else
1424
+ // We may arrive here when starting a thread with
1425
+ // RUBY_INTERNAL_THREAD_EVENT_READY before the thread is actually set up.
1426
+ if (!ruby_native_thread_p()) return;
1427
+
1428
+ thread = rb_thread_current();
1429
+ #endif
1430
+
1431
+ auto native_tid = get_native_thread_id();
1310
1432
  //cerr << "internal thread event" << event << " at " << TimeStamp::Now() << endl;
1433
+ //fprintf(stderr, "(%i) th %p to %s\n", native_tid, (void *)thread, gvl_event_name(event));
1434
+
1311
1435
 
1312
1436
  switch (event) {
1437
+ case RUBY_INTERNAL_THREAD_EVENT_STARTED:
1438
+ collector->threads.started(thread);
1439
+ break;
1440
+ case RUBY_INTERNAL_THREAD_EVENT_EXITED:
1441
+ collector->threads.stopped(thread);
1442
+ break;
1313
1443
  case RUBY_INTERNAL_THREAD_EVENT_READY:
1314
- collector->threads.ready(&collector->markers);
1444
+ collector->threads.ready(thread);
1315
1445
  break;
1316
1446
  case RUBY_INTERNAL_THREAD_EVENT_RESUMED:
1317
- collector->threads.resumed(&collector->markers);
1447
+ collector->threads.resumed(thread);
1318
1448
  break;
1319
1449
  case RUBY_INTERNAL_THREAD_EVENT_SUSPENDED:
1320
- collector->threads.suspended(&collector->markers);
1450
+ collector->threads.suspended(thread);
1321
1451
  break;
1322
1452
 
1323
1453
  }
@@ -1345,7 +1475,7 @@ class TimeCollector : public BaseCollector {
1345
1475
  // have at least one thread in our thread list because it's possible
1346
1476
  // that the profile might be such that we don't get any thread switch
1347
1477
  // events and we need at least one
1348
- this->threads.resumed(&this->markers);
1478
+ this->threads.resumed(rb_thread_current());
1349
1479
 
1350
1480
  thread_hook = rb_internal_thread_add_event_hook(internal_thread_event_cb, RUBY_INTERNAL_THREAD_EVENT_MASK, this);
1351
1481
  rb_add_event_hook(internal_gc_event_cb, RUBY_INTERNAL_EVENTS, PTR2NUM((void *)this));
@@ -1392,7 +1522,7 @@ class TimeCollector : public BaseCollector {
1392
1522
  VALUE hash = rb_hash_new();
1393
1523
  thread.samples.write_result(hash);
1394
1524
 
1395
- rb_hash_aset(threads, ULL2NUM(thread.native_tid), hash);
1525
+ rb_hash_aset(threads, thread.ruby_thread_id, hash);
1396
1526
  rb_hash_aset(hash, sym("tid"), ULL2NUM(thread.native_tid));
1397
1527
  rb_hash_aset(hash, sym("started_at"), ULL2NUM(thread.started_at.nanoseconds()));
1398
1528
  if (!thread.stopped_at.zero()) {
@@ -1409,6 +1539,7 @@ class TimeCollector : public BaseCollector {
1409
1539
 
1410
1540
  void mark() {
1411
1541
  frame_list.mark_frames();
1542
+ threads.mark();
1412
1543
 
1413
1544
  //for (int i = 0; i < queued_length; i++) {
1414
1545
  // rb_gc_mark(queued_frames[i]);
@@ -19,7 +19,7 @@ module Vernier
19
19
  Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond)
20
20
  end
21
21
 
22
- def add_marker(name:, start:, finish:, thread: Thread.current.native_thread_id, phase: Marker::Phase::INTERVAL, data: nil)
22
+ def add_marker(name:, start:, finish:, thread: Thread.current.object_id, phase: Marker::Phase::INTERVAL, data: nil)
23
23
  @markers << [thread,
24
24
  name,
25
25
  start,
@@ -39,7 +39,7 @@ module Vernier
39
39
  start:,
40
40
  finish: current_time,
41
41
  phase: Marker::Phase::INTERVAL,
42
- thread: Thread.current.native_thread_id,
42
+ thread: Thread.current.object_id,
43
43
  data: { :type => 'UserTiming', :entryType => 'measure', :name => name }
44
44
  )
45
45
  end
@@ -99,9 +99,10 @@ module Vernier
99
99
  def data
100
100
  markers_by_thread = profile.markers.group_by { |marker| marker[0] }
101
101
 
102
- thread_data = profile.threads.map do |tid, thread_info|
103
- markers = markers_by_thread[tid] || []
102
+ thread_data = profile.threads.map do |ruby_thread_id, thread_info|
103
+ markers = markers_by_thread[ruby_thread_id] || []
104
104
  Thread.new(
105
+ ruby_thread_id,
105
106
  profile,
106
107
  @categorizer,
107
108
  markers: markers,
@@ -157,14 +158,16 @@ module Vernier
157
158
  class Thread
158
159
  attr_reader :profile
159
160
 
160
- def initialize(profile, categorizer, name:, tid:, samples:, weights:, timestamps:, sample_categories:, markers:, started_at:, stopped_at: nil)
161
+ def initialize(ruby_thread_id, profile, categorizer, name:, tid:, samples:, weights:, timestamps: nil, sample_categories: nil, markers:, started_at:, stopped_at: nil)
162
+ @ruby_thread_id = ruby_thread_id
161
163
  @profile = profile
162
164
  @categorizer = categorizer
163
165
  @tid = tid
164
166
  @name = name
165
167
 
168
+ timestamps ||= [0] * samples.size
166
169
  @samples, @weights, @timestamps = samples, weights, timestamps
167
- @sample_categories = sample_categories
170
+ @sample_categories = sample_categories || ([0] * samples.size)
168
171
  @markers = markers
169
172
 
170
173
  @started_at, @stopped_at = started_at, stopped_at
@@ -211,7 +214,7 @@ module Vernier
211
214
  def data
212
215
  {
213
216
  name: @name,
214
- isMainThread: @tid == ::Thread.main.native_thread_id,
217
+ isMainThread: @ruby_thread_id == ::Thread.main.object_id || (profile.threads.size == 1),
215
218
  processStartupTime: 0, # FIXME
216
219
  processShutdownTime: nil, # FIXME
217
220
  registerTime: (@started_at - 0) / 1_000_000.0,
@@ -237,8 +240,6 @@ module Vernier
237
240
  end
238
241
 
239
242
  def markers_table
240
- size = @markers.size
241
-
242
243
  string_indexes = []
243
244
  start_times = []
244
245
  end_times = []
@@ -292,7 +293,6 @@ module Vernier
292
293
  times = (0...size).to_a
293
294
  end
294
295
 
295
- raise unless samples.size == size
296
296
  raise unless weights.size == size
297
297
  raise unless times.size == size
298
298
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Vernier
4
- VERSION = "0.3.0"
4
+ VERSION = "0.4.0"
5
5
  end
data/lib/vernier.rb CHANGED
@@ -19,11 +19,11 @@ module Vernier
19
19
  yield collector
20
20
  ensure
21
21
  result = collector.stop
22
+ if out
23
+ File.write(out, Output::Firefox.new(result).output)
24
+ end
22
25
  end
23
26
 
24
- if out
25
- File.write(out, Output::Firefox.new(result).output)
26
- end
27
27
  result
28
28
  end
29
29
 
data/vernier.gemspec CHANGED
@@ -12,7 +12,7 @@ Gem::Specification.new do |spec|
12
12
  spec.description = spec.summary
13
13
  spec.homepage = "https://github.com/jhawthorn/vernier"
14
14
  spec.license = "MIT"
15
- spec.required_ruby_version = ">= 3.2.0"
15
+ spec.required_ruby_version = ">= 3.2.1"
16
16
 
17
17
  spec.metadata["homepage_uri"] = spec.homepage
18
18
  spec.metadata["source_code_uri"] = spec.homepage
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: vernier
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Hawthorn
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-08-29 00:00:00.000000000 Z
11
+ date: 2024-01-15 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: An experimental profiler
14
14
  email:
@@ -60,7 +60,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
60
60
  requirements:
61
61
  - - ">="
62
62
  - !ruby/object:Gem::Version
63
- version: 3.2.0
63
+ version: 3.2.1
64
64
  required_rubygems_version: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - ">="