vernier 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -27,14 +27,20 @@
27
27
  #include "ruby/debug.h"
28
28
  #include "ruby/thread.h"
29
29
 
30
- // GC event's we'll monitor during profiling
31
- #define RUBY_GC_PHASE_EVENTS \
30
+ # define PTR2NUM(x) (rb_int2inum((intptr_t)(void *)(x)))
31
+
32
+ // Internal TracePoint events we'll monitor during profiling
33
+ #define RUBY_INTERNAL_EVENTS \
32
34
  RUBY_INTERNAL_EVENT_GC_START | \
33
35
  RUBY_INTERNAL_EVENT_GC_END_MARK | \
34
36
  RUBY_INTERNAL_EVENT_GC_END_SWEEP | \
35
37
  RUBY_INTERNAL_EVENT_GC_ENTER | \
36
38
  RUBY_INTERNAL_EVENT_GC_EXIT
37
39
 
40
+ #define RUBY_NORMAL_EVENTS \
41
+ RUBY_EVENT_THREAD_BEGIN | \
42
+ RUBY_EVENT_THREAD_END
43
+
38
44
  #define sym(name) ID2SYM(rb_intern_const(name))
39
45
 
40
46
  // HACK: This isn't public, but the objspace ext uses it
@@ -125,6 +131,14 @@ class TimeStamp {
125
131
  return value_ns >= other.value_ns;
126
132
  }
127
133
 
134
+ bool operator==(const TimeStamp &other) const {
135
+ return value_ns == other.value_ns;
136
+ }
137
+
138
+ bool operator!=(const TimeStamp &other) const {
139
+ return value_ns != other.value_ns;
140
+ }
141
+
128
142
  uint64_t nanoseconds() const {
129
143
  return value_ns;
130
144
  }
@@ -202,6 +216,10 @@ bool operator==(const Frame& lhs, const Frame& rhs) noexcept {
202
216
  return lhs.frame == rhs.frame && lhs.line == rhs.line;
203
217
  }
204
218
 
219
+ bool operator!=(const Frame& lhs, const Frame& rhs) noexcept {
220
+ return !(lhs == rhs);
221
+ }
222
+
205
223
  namespace std {
206
224
  template<>
207
225
  struct hash<Frame>
@@ -275,7 +293,9 @@ struct RawSample {
275
293
  }
276
294
 
277
295
  Frame frame(int i) const {
278
- const Frame frame = {frames[i], lines[i]};
296
+ int idx = len - i - 1;
297
+ if (idx < 0) throw std::out_of_range("out of range");
298
+ const Frame frame = {frames[idx], lines[idx]};
279
299
  return frame;
280
300
  }
281
301
 
@@ -337,24 +357,6 @@ struct LiveSample {
337
357
  }
338
358
  };
339
359
 
340
- struct TraceArg {
341
- rb_trace_arg_t *tparg;
342
- VALUE obj;
343
- VALUE path;
344
- VALUE line;
345
- VALUE mid;
346
- VALUE klass;
347
-
348
- TraceArg(VALUE tpval) {
349
- tparg = rb_tracearg_from_tracepoint(tpval);
350
- obj = rb_tracearg_object(tparg);
351
- path = rb_tracearg_path(tparg);
352
- line = rb_tracearg_lineno(tparg);
353
- mid = rb_tracearg_method_id(tparg);
354
- klass = rb_tracearg_defined_class(tparg);
355
- }
356
- };
357
-
358
360
  struct FrameList {
359
361
  std::unordered_map<std::string, int> string_to_idx;
360
362
  std::vector<std::string> string_list;
@@ -412,27 +414,29 @@ struct FrameList {
412
414
  }
413
415
 
414
416
  StackNode *node = &root_stack_node;
415
- for (int i = stack.size() - 1; i >= 0; i--) {
416
- const Frame &frame = stack.frame(i);
417
+ for (int i = 0; i < stack.size(); i++) {
418
+ Frame frame = stack.frame(i);
417
419
  node = next_stack_node(node, frame);
418
420
  }
419
421
  return node->index;
420
422
  }
421
423
 
422
- StackNode *next_stack_node(StackNode *node, const Frame &frame) {
423
- int next_node_idx = node->children[frame];
424
- if (next_node_idx == 0) {
424
+ StackNode *next_stack_node(StackNode *node, Frame frame) {
425
+ auto search = node->children.find(frame);
426
+ if (search == node->children.end()) {
425
427
  // insert a new node
426
- next_node_idx = stack_node_list.size();
428
+ int next_node_idx = stack_node_list.size();
427
429
  node->children[frame] = next_node_idx;
428
430
  stack_node_list.emplace_back(
429
431
  frame,
430
432
  next_node_idx,
431
433
  node->index
432
434
  );
435
+ return &stack_node_list[next_node_idx];
436
+ } else {
437
+ int node_idx = search->second;
438
+ return &stack_node_list[node_idx];
433
439
  }
434
-
435
- return &stack_node_list[next_node_idx];
436
440
  }
437
441
 
438
442
  // Converts Frames from stacks other tables. "Symbolicates" the frames
@@ -512,6 +516,372 @@ struct FrameList {
512
516
  }
513
517
  };
514
518
 
519
+ class SampleTranslator {
520
+ public:
521
+ int last_stack_index;
522
+
523
+ Frame frames[RawSample::MAX_LEN];
524
+ int frame_indexes[RawSample::MAX_LEN];
525
+ int len;
526
+
527
+ SampleTranslator() : len(0), last_stack_index(-1) {
528
+ }
529
+
530
+ int translate(FrameList &frame_list, const RawSample &sample) {
531
+ int i = 0;
532
+ for (; i < len && i < sample.size(); i++) {
533
+ if (frames[i] != sample.frame(i)) {
534
+ break;
535
+ }
536
+ }
537
+
538
+ FrameList::StackNode *node = i == 0 ? &frame_list.root_stack_node : &frame_list.stack_node_list[frame_indexes[i - 1]];
539
+
540
+ for (; i < sample.size(); i++) {
541
+ Frame frame = sample.frame(i);
542
+ node = frame_list.next_stack_node(node, frame);
543
+
544
+ frames[i] = frame;
545
+ frame_indexes[i] = node->index;
546
+ }
547
+ len = i;
548
+
549
+ last_stack_index = node->index;
550
+ return last_stack_index;
551
+ }
552
+ };
553
+
554
+ typedef uint64_t native_thread_id_t;
555
+ static native_thread_id_t get_native_thread_id() {
556
+ #ifdef __APPLE__
557
+ uint64_t thread_id;
558
+ int e = pthread_threadid_np(pthread_self(), &thread_id);
559
+ if (e != 0) rb_syserr_fail(e, "pthread_threadid_np");
560
+ return thread_id;
561
+ #else
562
+ // gettid() is only available as of glibc 2.30
563
+ pid_t tid = syscall(SYS_gettid);
564
+ return tid;
565
+ #endif
566
+ }
567
+
568
+
569
+ class Marker {
570
+ public:
571
+ enum Type {
572
+ MARKER_GVL_THREAD_STARTED,
573
+ MARKER_GVL_THREAD_EXITED,
574
+
575
+ MARKER_GC_START,
576
+ MARKER_GC_END_MARK,
577
+ MARKER_GC_END_SWEEP,
578
+ MARKER_GC_ENTER,
579
+ MARKER_GC_EXIT,
580
+ MARKER_GC_PAUSE,
581
+
582
+ MARKER_THREAD_RUNNING,
583
+ MARKER_THREAD_STALLED,
584
+ MARKER_THREAD_SUSPENDED,
585
+
586
+ MARKER_MAX,
587
+ };
588
+
589
+ // Must match phase types from Gecko
590
+ enum Phase {
591
+ INSTANT,
592
+ INTERVAL,
593
+ INTERVAL_START,
594
+ INTERVAL_END
595
+ };
596
+
597
+ Type type;
598
+ Phase phase;
599
+ TimeStamp timestamp;
600
+ TimeStamp finish;
601
+ native_thread_id_t thread_id;
602
+ int stack_index = -1;
603
+
604
+ VALUE to_array() {
605
+ VALUE record[6] = {0};
606
+ record[0] = ULL2NUM(thread_id);
607
+ record[1] = INT2NUM(type);
608
+ record[2] = INT2NUM(phase);
609
+ record[3] = ULL2NUM(timestamp.nanoseconds());
610
+
611
+ if (phase == Marker::Phase::INTERVAL) {
612
+ record[4] = ULL2NUM(finish.nanoseconds());
613
+ }
614
+ else {
615
+ record[4] = Qnil;
616
+ }
617
+ record[5] = stack_index == -1 ? Qnil : INT2NUM(stack_index);
618
+
619
+ return rb_ary_new_from_values(6, record);
620
+ }
621
+ };
622
+
623
+ class MarkerTable {
624
+ TimeStamp last_gc_entry;
625
+
626
+ public:
627
+ std::vector<Marker> list;
628
+ std::mutex mutex;
629
+
630
+ void record_gc_entered() {
631
+ last_gc_entry = TimeStamp::Now();
632
+ }
633
+
634
+ void record_gc_leave() {
635
+ list.push_back({ Marker::MARKER_GC_PAUSE, Marker::INTERVAL, last_gc_entry, TimeStamp::Now(), get_native_thread_id(), -1 });
636
+ }
637
+
638
+ void record_interval(Marker::Type type, TimeStamp from, TimeStamp to, int stack_index = -1) {
639
+ const std::lock_guard<std::mutex> lock(mutex);
640
+
641
+ list.push_back({ type, Marker::INTERVAL, from, to, get_native_thread_id(), stack_index });
642
+ }
643
+
644
+ void record(Marker::Type type, int stack_index = -1) {
645
+ const std::lock_guard<std::mutex> lock(mutex);
646
+
647
+ list.push_back({ type, Marker::INSTANT, TimeStamp::Now(), TimeStamp(), get_native_thread_id(), stack_index });
648
+ }
649
+ };
650
+
651
+ enum Category{
652
+ CATEGORY_NORMAL,
653
+ CATEGORY_IDLE
654
+ };
655
+
656
+ class SampleList {
657
+ public:
658
+
659
+ std::vector<int> stacks;
660
+ std::vector<TimeStamp> timestamps;
661
+ std::vector<native_thread_id_t> threads;
662
+ std::vector<Category> categories;
663
+ std::vector<int> weights;
664
+
665
+ size_t size() {
666
+ return stacks.size();
667
+ }
668
+
669
+ bool empty() {
670
+ return size() == 0;
671
+ }
672
+
673
+ void record_sample(int stack_index, TimeStamp time, native_thread_id_t thread_id, Category category) {
674
+ if (
675
+ !empty() &&
676
+ stacks.back() == stack_index &&
677
+ threads.back() == thread_id &&
678
+ categories.back() == category)
679
+ {
680
+ // We don't compare timestamps for de-duplication
681
+ weights.back() += 1;
682
+ } else {
683
+ stacks.push_back(stack_index);
684
+ timestamps.push_back(time);
685
+ threads.push_back(thread_id);
686
+ categories.push_back(category);
687
+ weights.push_back(1);
688
+ }
689
+ }
690
+
691
+ void write_result(VALUE result) const {
692
+ VALUE samples = rb_ary_new();
693
+ rb_hash_aset(result, sym("samples"), samples);
694
+ for (auto& stack_index: this->stacks) {
695
+ rb_ary_push(samples, INT2NUM(stack_index));
696
+ }
697
+
698
+ VALUE weights = rb_ary_new();
699
+ rb_hash_aset(result, sym("weights"), weights);
700
+ for (auto& weight: this->weights) {
701
+ rb_ary_push(weights, INT2NUM(weight));
702
+ }
703
+
704
+ VALUE timestamps = rb_ary_new();
705
+ rb_hash_aset(result, sym("timestamps"), timestamps);
706
+ for (auto& timestamp: this->timestamps) {
707
+ rb_ary_push(timestamps, ULL2NUM(timestamp.nanoseconds()));
708
+ }
709
+
710
+ VALUE sample_categories = rb_ary_new();
711
+ rb_hash_aset(result, sym("sample_categories"), sample_categories);
712
+ for (auto& cat: this->categories) {
713
+ rb_ary_push(sample_categories, INT2NUM(cat));
714
+ }
715
+ }
716
+ };
717
+
718
+ class Thread {
719
+ public:
720
+ SampleList samples;
721
+
722
+ enum State {
723
+ STARTED,
724
+ RUNNING,
725
+ READY,
726
+ SUSPENDED,
727
+ STOPPED
728
+ };
729
+
730
+ pthread_t pthread_id;
731
+ native_thread_id_t native_tid;
732
+ State state;
733
+
734
+ TimeStamp state_changed_at;
735
+ TimeStamp started_at;
736
+ TimeStamp stopped_at;
737
+
738
+ int stack_on_suspend_idx;
739
+ SampleTranslator translator;
740
+
741
+ std::string name;
742
+
743
+ Thread(State state) : state(state), stack_on_suspend_idx(-1) {
744
+ pthread_id = pthread_self();
745
+ native_tid = get_native_thread_id();
746
+ started_at = state_changed_at = TimeStamp::Now();
747
+ }
748
+
749
+ void set_state(State new_state, MarkerTable *markers) {
750
+ if (state == Thread::State::STOPPED) {
751
+ return;
752
+ }
753
+
754
+ TimeStamp from = state_changed_at;
755
+ auto now = TimeStamp::Now();
756
+
757
+ if (started_at.zero()) {
758
+ started_at = now;
759
+ }
760
+
761
+ switch (new_state) {
762
+ case State::STARTED:
763
+ new_state = State::RUNNING;
764
+ break;
765
+ case State::RUNNING:
766
+ assert(state == State::READY);
767
+
768
+ // If the GVL is immediately ready, and we measure no times
769
+ // stalled, skip emitting the interval.
770
+ if (from != now) {
771
+ markers->record_interval(Marker::Type::MARKER_THREAD_STALLED, from, now);
772
+ }
773
+ break;
774
+ case State::READY:
775
+ // The ready state means "I would like to do some work, but I can't
776
+ // do it right now either because I blocked on IO and now I want the GVL back,
777
+ // or because the VM timer put me to sleep"
778
+ //
779
+ // Threads can be preempted, which means they will have been in "Running"
780
+ // state, and then the VM was like "no I need to stop you from working,
781
+ // so I'll put you in the 'ready' (or stalled) state"
782
+ assert(state == State::SUSPENDED || state == State::RUNNING);
783
+ if (state == State::SUSPENDED) {
784
+ markers->record_interval(Marker::Type::MARKER_THREAD_SUSPENDED, from, now, stack_on_suspend_idx);
785
+ }
786
+ else {
787
+ markers->record_interval(Marker::Type::MARKER_THREAD_RUNNING, from, now);
788
+ }
789
+ break;
790
+ case State::SUSPENDED:
791
+ // We can go from RUNNING or STARTED to SUSPENDED
792
+ assert(state == State::RUNNING || state == State::STARTED);
793
+ markers->record_interval(Marker::Type::MARKER_THREAD_RUNNING, from, now);
794
+ break;
795
+ case State::STOPPED:
796
+ // We can go from RUNNING or STARTED to STOPPED
797
+ assert(state == State::RUNNING || state == State::STARTED);
798
+ markers->record_interval(Marker::Type::MARKER_THREAD_RUNNING, from, now);
799
+ stopped_at = now;
800
+ capture_name();
801
+
802
+ break;
803
+ }
804
+
805
+ state = new_state;
806
+ state_changed_at = now;
807
+ }
808
+
809
+ bool running() {
810
+ return state != State::STOPPED;
811
+ }
812
+
813
+ void capture_name() {
814
+ char buf[128];
815
+ int rc = pthread_getname_np(pthread_id, buf, sizeof(buf));
816
+ if (rc == 0)
817
+ name = std::string(buf);
818
+ }
819
+ };
820
+
821
+ class ThreadTable {
822
+ public:
823
+ FrameList &frame_list;
824
+
825
+ std::vector<Thread> list;
826
+ std::mutex mutex;
827
+
828
+ ThreadTable(FrameList &frame_list) : frame_list(frame_list) {
829
+ }
830
+
831
+ void started(MarkerTable *markers) {
832
+ //const std::lock_guard<std::mutex> lock(mutex);
833
+
834
+ //list.push_back(Thread{pthread_self(), Thread::State::SUSPENDED});
835
+ markers->record(Marker::Type::MARKER_GVL_THREAD_STARTED);
836
+ set_state(Thread::State::STARTED, markers);
837
+ }
838
+
839
+ void ready(MarkerTable *markers) {
840
+ set_state(Thread::State::READY, markers);
841
+ }
842
+
843
+ void resumed(MarkerTable *markers) {
844
+ set_state(Thread::State::RUNNING, markers);
845
+ }
846
+
847
+ void suspended(MarkerTable *markers) {
848
+ set_state(Thread::State::SUSPENDED, markers);
849
+ }
850
+
851
+ void stopped(MarkerTable *markers) {
852
+ markers->record(Marker::Type::MARKER_GVL_THREAD_EXITED);
853
+ set_state(Thread::State::STOPPED, markers);
854
+ }
855
+
856
+ private:
857
+ void set_state(Thread::State new_state, MarkerTable *markers) {
858
+ const std::lock_guard<std::mutex> lock(mutex);
859
+
860
+ pthread_t current_thread = pthread_self();
861
+ //cerr << "set state=" << new_state << " thread=" << gettid() << endl;
862
+
863
+ for (auto &thread : list) {
864
+ if (pthread_equal(current_thread, thread.pthread_id)) {
865
+ if (new_state == Thread::State::SUSPENDED) {
866
+
867
+ RawSample sample;
868
+ sample.sample();
869
+
870
+ thread.stack_on_suspend_idx = thread.translator.translate(frame_list, sample);
871
+ //cerr << gettid() << " suspended! Stack size:" << thread.stack_on_suspend.size() << endl;
872
+ }
873
+
874
+ thread.set_state(new_state, markers);
875
+
876
+ return;
877
+ }
878
+ }
879
+
880
+ pid_t native_tid = get_native_thread_id();
881
+ list.emplace_back(new_state);
882
+ }
883
+ };
884
+
515
885
  class BaseCollector {
516
886
  protected:
517
887
 
@@ -523,15 +893,19 @@ class BaseCollector {
523
893
  bool running = false;
524
894
  FrameList frame_list;
525
895
 
896
+ TimeStamp started_at;
897
+
526
898
  virtual ~BaseCollector() {}
527
899
 
528
900
  virtual bool start() {
529
901
  if (running) {
530
902
  return false;
531
- } else {
532
- running = true;
533
- return true;
534
903
  }
904
+
905
+ started_at = TimeStamp::Now();
906
+
907
+ running = true;
908
+ return true;
535
909
  }
536
910
 
537
911
  virtual VALUE stop() {
@@ -543,6 +917,21 @@ class BaseCollector {
543
917
  return Qnil;
544
918
  }
545
919
 
920
+ void write_meta(VALUE result) {
921
+ VALUE meta = rb_hash_new();
922
+ rb_ivar_set(result, rb_intern("@meta"), meta);
923
+ rb_hash_aset(meta, sym("started_at"), ULL2NUM(started_at.nanoseconds()));
924
+
925
+ }
926
+
927
+ virtual VALUE build_collector_result() {
928
+ VALUE result = rb_obj_alloc(rb_cVernierResult);
929
+
930
+ write_meta(result);
931
+
932
+ return result;
933
+ }
934
+
546
935
  virtual void sample() {
547
936
  rb_raise(rb_eRuntimeError, "collector doesn't support manual sampling");
548
937
  };
@@ -557,14 +946,15 @@ class BaseCollector {
557
946
  };
558
947
 
559
948
  class CustomCollector : public BaseCollector {
560
- std::vector<int> samples;
949
+ SampleList samples;
561
950
 
562
951
  void sample() {
563
952
  RawSample sample;
564
953
  sample.sample();
565
954
  int stack_index = frame_list.stack_index(sample);
566
955
 
567
- samples.push_back(stack_index);
956
+ native_thread_id_t thread_id = 0;
957
+ samples.record_sample(stack_index, TimeStamp::Now(), thread_id, CATEGORY_NORMAL);
568
958
  }
569
959
 
570
960
  VALUE stop() {
@@ -580,17 +970,16 @@ class CustomCollector : public BaseCollector {
580
970
  }
581
971
 
582
972
  VALUE build_collector_result() {
583
- VALUE result = rb_obj_alloc(rb_cVernierResult);
973
+ VALUE result = BaseCollector::build_collector_result();
584
974
 
585
- VALUE samples = rb_ary_new();
586
- rb_ivar_set(result, rb_intern("@samples"), samples);
587
- VALUE weights = rb_ary_new();
588
- rb_ivar_set(result, rb_intern("@weights"), weights);
975
+ VALUE threads = rb_hash_new();
976
+ rb_ivar_set(result, rb_intern("@threads"), threads);
589
977
 
590
- for (auto& stack_index: this->samples) {
591
- rb_ary_push(samples, INT2NUM(stack_index));
592
- rb_ary_push(weights, INT2NUM(1));
593
- }
978
+ VALUE thread_hash = rb_hash_new();
979
+ samples.write_result(thread_hash);
980
+
981
+ rb_hash_aset(threads, ULL2NUM(0), thread_hash);
982
+ rb_hash_aset(thread_hash, sym("tid"), ULL2NUM(0));
594
983
 
595
984
  frame_list.write_result(result);
596
985
 
@@ -623,16 +1012,18 @@ class RetainedCollector : public BaseCollector {
623
1012
 
624
1013
  static void newobj_i(VALUE tpval, void *data) {
625
1014
  RetainedCollector *collector = static_cast<RetainedCollector *>(data);
626
- TraceArg tp(tpval);
1015
+ rb_trace_arg_t *tparg = rb_tracearg_from_tracepoint(tpval);
1016
+ VALUE obj = rb_tracearg_object(tparg);
627
1017
 
628
- collector->record(tp.obj);
1018
+ collector->record(obj);
629
1019
  }
630
1020
 
631
1021
  static void freeobj_i(VALUE tpval, void *data) {
632
1022
  RetainedCollector *collector = static_cast<RetainedCollector *>(data);
633
- TraceArg tp(tpval);
1023
+ rb_trace_arg_t *tparg = rb_tracearg_from_tracepoint(tpval);
1024
+ VALUE obj = rb_tracearg_object(tparg);
634
1025
 
635
- collector->object_frames.erase(tp.obj);
1026
+ collector->object_frames.erase(obj);
636
1027
  }
637
1028
 
638
1029
  public:
@@ -687,12 +1078,18 @@ class RetainedCollector : public BaseCollector {
687
1078
  RetainedCollector *collector = this;
688
1079
  FrameList &frame_list = collector->frame_list;
689
1080
 
690
- VALUE result = rb_obj_alloc(rb_cVernierResult);
1081
+ VALUE result = BaseCollector::build_collector_result();
691
1082
 
1083
+ VALUE threads = rb_hash_new();
1084
+ rb_ivar_set(result, rb_intern("@threads"), threads);
1085
+ VALUE thread_hash = rb_hash_new();
1086
+ rb_hash_aset(threads, ULL2NUM(0), thread_hash);
1087
+
1088
+ rb_hash_aset(thread_hash, sym("tid"), ULL2NUM(0));
692
1089
  VALUE samples = rb_ary_new();
693
- rb_ivar_set(result, rb_intern("@samples"), samples);
1090
+ rb_hash_aset(thread_hash, sym("samples"), samples);
694
1091
  VALUE weights = rb_ary_new();
695
- rb_ivar_set(result, rb_intern("@weights"), weights);
1092
+ rb_hash_aset(thread_hash, sym("weights"), weights);
696
1093
 
697
1094
  for (auto& obj: collector->object_list) {
698
1095
  const auto search = collector->object_frames.find(obj);
@@ -721,162 +1118,68 @@ class RetainedCollector : public BaseCollector {
721
1118
  }
722
1119
  };
723
1120
 
724
- typedef uint64_t native_thread_id_t;
1121
+ class GlobalSignalHandler {
1122
+ static LiveSample *live_sample;
725
1123
 
726
- class Thread {
727
1124
  public:
728
- static native_thread_id_t get_native_thread_id() {
729
- #ifdef __APPLE__
730
- uint64_t thread_id;
731
- int e = pthread_threadid_np(pthread_self(), &thread_id);
732
- if (e != 0) rb_syserr_fail(e, "pthread_threadid_np");
733
- return thread_id;
734
- #else
735
- // gettid() is only available as of glibc 2.30
736
- pid_t tid = syscall(SYS_gettid);
737
- return tid;
738
- #endif
739
- }
740
-
741
- enum State {
742
- STARTED,
743
- RUNNING,
744
- SUSPENDED,
745
- STOPPED
746
- };
747
-
748
- pthread_t pthread_id;
749
- native_thread_id_t native_tid;
750
- State state;
751
-
752
- TimeStamp state_changed_at;
753
- TimeStamp started_at;
754
- TimeStamp stopped_at;
755
-
756
- RawSample stack_on_suspend;
757
-
758
- std::string name;
759
-
760
- Thread(State state) : state(state) {
761
- pthread_id = pthread_self();
762
- native_tid = get_native_thread_id();
763
- started_at = state_changed_at = TimeStamp::Now();
1125
+ static GlobalSignalHandler *get_instance() {
1126
+ static GlobalSignalHandler instance;
1127
+ return &instance;
764
1128
  }
765
1129
 
766
- void set_state(State new_state) {
767
- if (state == Thread::State::STOPPED) {
768
- return;
769
- }
770
-
771
- auto now = TimeStamp::Now();
772
-
773
- state = new_state;
774
- state_changed_at = now;
775
- if (new_state == State::STARTED) {
776
- if (started_at.zero()) {
777
- started_at = now;
778
- }
779
- } else if (new_state == State::STOPPED) {
780
- stopped_at = now;
1130
+ void install() {
1131
+ const std::lock_guard<std::mutex> lock(mutex);
1132
+ count++;
781
1133
 
782
- capture_name();
783
- }
1134
+ if (count == 1) setup_signal_handler();
784
1135
  }
785
1136
 
786
- bool running() {
787
- return state != State::STOPPED;
788
- }
1137
+ void uninstall() {
1138
+ const std::lock_guard<std::mutex> lock(mutex);
1139
+ count--;
789
1140
 
790
- void capture_name() {
791
- char buf[128];
792
- int rc = pthread_getname_np(pthread_id, buf, sizeof(buf));
793
- if (rc == 0)
794
- name = std::string(buf);
1141
+ if (count == 0) clear_signal_handler();
795
1142
  }
796
- };
797
-
798
- class Marker {
799
- public:
800
- enum Type {
801
- MARKER_GVL_THREAD_STARTED,
802
- MARKER_GVL_THREAD_READY,
803
- MARKER_GVL_THREAD_RESUMED,
804
- MARKER_GVL_THREAD_SUSPENDED,
805
- MARKER_GVL_THREAD_EXITED,
806
1143
 
807
- MARKER_GC_START,
808
- MARKER_GC_END_MARK,
809
- MARKER_GC_END_SWEEP,
810
- MARKER_GC_ENTER,
811
- MARKER_GC_EXIT,
812
-
813
- MARKER_MAX,
814
- };
815
- Type type;
816
- TimeStamp timestamp;
817
- native_thread_id_t thread_id;
818
- };
819
-
820
- class MarkerTable {
821
- public:
822
- std::vector<Marker> list;
823
- std::mutex mutex;
824
-
825
- void record(Marker::Type type) {
1144
+ void record_sample(LiveSample &sample, pthread_t pthread_id) {
826
1145
  const std::lock_guard<std::mutex> lock(mutex);
827
1146
 
828
- list.push_back({ type, TimeStamp::Now(), Thread::get_native_thread_id() });
1147
+ live_sample = &sample;
1148
+ if (pthread_kill(pthread_id, SIGPROF)) {
1149
+ rb_bug("pthread_kill failed");
1150
+ }
1151
+ sample.wait();
1152
+ live_sample = NULL;
829
1153
  }
830
- };
831
1154
 
832
- extern "C" int ruby_thread_has_gvl_p(void);
833
-
834
- class ThreadTable {
835
- public:
836
- std::vector<Thread> list;
1155
+ private:
837
1156
  std::mutex mutex;
1157
+ int count;
838
1158
 
839
- void started() {
840
- //const std::lock_guard<std::mutex> lock(mutex);
841
-
842
- //list.push_back(Thread{pthread_self(), Thread::State::SUSPENDED});
843
- set_state(Thread::State::STARTED);
1159
+ static void signal_handler(int sig, siginfo_t* sinfo, void* ucontext) {
1160
+ assert(live_sample);
1161
+ live_sample->sample_current_thread();
844
1162
  }
845
1163
 
846
- void set_state(Thread::State new_state) {
847
- const std::lock_guard<std::mutex> lock(mutex);
848
-
849
- pthread_t current_thread = pthread_self();
850
- //cerr << "set state=" << new_state << " thread=" << gettid() << endl;
851
-
852
- for (auto &thread : list) {
853
- if (pthread_equal(current_thread, thread.pthread_id)) {
854
- thread.set_state(new_state);
855
-
856
- if (new_state == Thread::State::SUSPENDED) {
857
- thread.stack_on_suspend.sample();
858
- //cerr << gettid() << " suspended! Stack size:" << thread.stack_on_suspend.size() << endl;
859
- }
860
- return;
861
- }
862
- }
863
-
864
- pid_t native_tid = Thread::get_native_thread_id();
865
- list.emplace_back(new_state);
1164
+ void setup_signal_handler() {
1165
+ struct sigaction sa;
1166
+ sa.sa_sigaction = signal_handler;
1167
+ sa.sa_flags = SA_RESTART | SA_SIGINFO;
1168
+ sigemptyset(&sa.sa_mask);
1169
+ sigaction(SIGPROF, &sa, NULL);
866
1170
  }
867
- };
868
1171
 
869
- enum Category{
870
- CATEGORY_NORMAL,
871
- CATEGORY_IDLE
1172
+ void clear_signal_handler() {
1173
+ struct sigaction sa;
1174
+ sa.sa_handler = SIG_IGN;
1175
+ sa.sa_flags = SA_RESTART;
1176
+ sigemptyset(&sa.sa_mask);
1177
+ sigaction(SIGPROF, &sa, NULL);
1178
+ }
872
1179
  };
1180
+ LiveSample *GlobalSignalHandler::live_sample;
873
1181
 
874
1182
  class TimeCollector : public BaseCollector {
875
- std::vector<int> samples;
876
- std::vector<TimeStamp> timestamps;
877
- std::vector<native_thread_id_t> sample_threads;
878
- std::vector<Category> sample_categories;
879
-
880
1183
  MarkerTable markers;
881
1184
  ThreadTable threads;
882
1185
 
@@ -885,41 +1188,31 @@ class TimeCollector : public BaseCollector {
885
1188
  atomic_bool running;
886
1189
  SamplerSemaphore thread_stopped;
887
1190
 
888
- static LiveSample *live_sample;
889
-
890
- TimeStamp started_at;
891
1191
  TimeStamp interval;
892
1192
 
893
1193
  public:
894
- TimeCollector(TimeStamp interval) : interval(interval) {
1194
+ TimeCollector(TimeStamp interval) : interval(interval), threads(frame_list) {
895
1195
  }
896
1196
 
897
1197
  private:
898
1198
 
899
- void record_sample(const RawSample &sample, TimeStamp time, const Thread &thread, Category category) {
1199
+ void record_sample(const RawSample &sample, TimeStamp time, Thread &thread, Category category) {
900
1200
  if (!sample.empty()) {
901
- int stack_index = frame_list.stack_index(sample);
902
- samples.push_back(stack_index);
903
- timestamps.push_back(time);
904
- sample_threads.push_back(thread.native_tid);
905
- sample_categories.push_back(category);
1201
+ int stack_index = thread.translator.translate(frame_list, sample);
1202
+ thread.samples.record_sample(
1203
+ stack_index,
1204
+ time,
1205
+ thread.native_tid,
1206
+ category
1207
+ );
906
1208
  }
907
1209
  }
908
1210
 
909
- static void signal_handler(int sig, siginfo_t* sinfo, void* ucontext) {
910
- assert(live_sample);
911
- live_sample->sample_current_thread();
912
- }
913
-
914
1211
  VALUE get_markers() {
915
- VALUE list = rb_ary_new();
1212
+ VALUE list = rb_ary_new2(this->markers.list.size());
916
1213
 
917
1214
  for (auto& marker: this->markers.list) {
918
- VALUE record[3] = {0};
919
- record[0] = ULL2NUM(marker.thread_id);
920
- record[1] = INT2NUM(marker.type);
921
- record[2] = ULL2NUM(marker.timestamp.nanoseconds());
922
- rb_ary_push(list, rb_ary_new_from_values(3, record));
1215
+ rb_ary_push(list, marker.to_array());
923
1216
  }
924
1217
 
925
1218
  return list;
@@ -927,20 +1220,16 @@ class TimeCollector : public BaseCollector {
927
1220
 
928
1221
  void sample_thread_run() {
929
1222
  LiveSample sample;
930
- live_sample = &sample;
931
1223
 
932
1224
  TimeStamp next_sample_schedule = TimeStamp::Now();
933
1225
  while (running) {
934
1226
  TimeStamp sample_start = TimeStamp::Now();
935
1227
 
936
1228
  threads.mutex.lock();
937
- for (auto thread : threads.list) {
1229
+ for (auto &thread : threads.list) {
938
1230
  //if (thread.state == Thread::State::RUNNING) {
939
- if (thread.state == Thread::State::RUNNING || (thread.state == Thread::State::SUSPENDED && thread.stack_on_suspend.size() == 0)) {
940
- if (pthread_kill(thread.pthread_id, SIGPROF)) {
941
- rb_bug("pthread_kill failed");
942
- }
943
- sample.wait();
1231
+ if (thread.state == Thread::State::RUNNING || (thread.state == Thread::State::SUSPENDED && thread.stack_on_suspend_idx < 0)) {
1232
+ GlobalSignalHandler::get_instance()->record_sample(sample, thread.pthread_id);
944
1233
 
945
1234
  if (sample.sample.gc) {
946
1235
  // fprintf(stderr, "skipping GC sample\n");
@@ -948,10 +1237,15 @@ class TimeCollector : public BaseCollector {
948
1237
  record_sample(sample.sample, sample_start, thread, CATEGORY_NORMAL);
949
1238
  }
950
1239
  } else if (thread.state == Thread::State::SUSPENDED) {
951
- record_sample(thread.stack_on_suspend, sample_start, thread, CATEGORY_IDLE);
1240
+ thread.samples.record_sample(
1241
+ thread.stack_on_suspend_idx,
1242
+ sample_start,
1243
+ thread.native_tid,
1244
+ CATEGORY_IDLE);
952
1245
  } else {
953
1246
  }
954
1247
  }
1248
+
955
1249
  threads.mutex.unlock();
956
1250
 
957
1251
  TimeStamp sample_complete = TimeStamp::Now();
@@ -967,8 +1261,6 @@ class TimeCollector : public BaseCollector {
967
1261
  TimeStamp::Sleep(sleep_time);
968
1262
  }
969
1263
 
970
- live_sample = NULL;
971
-
972
1264
  thread_stopped.post();
973
1265
  }
974
1266
 
@@ -978,10 +1270,21 @@ class TimeCollector : public BaseCollector {
978
1270
  return NULL;
979
1271
  }
980
1272
 
981
- static void internal_gc_event_cb(VALUE tpval, void *data) {
982
- TimeCollector *collector = static_cast<TimeCollector *>(data);
983
- rb_trace_arg_t *tparg = rb_tracearg_from_tracepoint(tpval);
984
- int event = rb_tracearg_event_flag(tparg);
1273
+ static void internal_thread_event_cb(rb_event_flag_t event, VALUE data, VALUE self, ID mid, VALUE klass) {
1274
+ TimeCollector *collector = static_cast<TimeCollector *>((void *)NUM2ULL(data));
1275
+
1276
+ switch (event) {
1277
+ case RUBY_EVENT_THREAD_BEGIN:
1278
+ collector->threads.started(&collector->markers);
1279
+ break;
1280
+ case RUBY_EVENT_THREAD_END:
1281
+ collector->threads.stopped(&collector->markers);
1282
+ break;
1283
+ }
1284
+ }
1285
+
1286
+ static void internal_gc_event_cb(rb_event_flag_t event, VALUE data, VALUE self, ID mid, VALUE klass) {
1287
+ TimeCollector *collector = static_cast<TimeCollector *>((void *)NUM2ULL(data));
985
1288
 
986
1289
  switch (event) {
987
1290
  case RUBY_INTERNAL_EVENT_GC_START:
@@ -994,10 +1297,10 @@ class TimeCollector : public BaseCollector {
994
1297
  collector->markers.record(Marker::Type::MARKER_GC_END_SWEEP);
995
1298
  break;
996
1299
  case RUBY_INTERNAL_EVENT_GC_ENTER:
997
- collector->markers.record(Marker::Type::MARKER_GC_ENTER);
1300
+ collector->markers.record_gc_entered();
998
1301
  break;
999
1302
  case RUBY_INTERNAL_EVENT_GC_EXIT:
1000
- collector->markers.record(Marker::Type::MARKER_GC_EXIT);
1303
+ collector->markers.record_gc_leave();
1001
1304
  break;
1002
1305
  }
1003
1306
  }
@@ -1007,44 +1310,27 @@ class TimeCollector : public BaseCollector {
1007
1310
  //cerr << "internal thread event" << event << " at " << TimeStamp::Now() << endl;
1008
1311
 
1009
1312
  switch (event) {
1010
- case RUBY_INTERNAL_THREAD_EVENT_STARTED:
1011
- collector->markers.record(Marker::Type::MARKER_GVL_THREAD_STARTED);
1012
- collector->threads.started();
1013
- break;
1014
1313
  case RUBY_INTERNAL_THREAD_EVENT_READY:
1015
- collector->markers.record(Marker::Type::MARKER_GVL_THREAD_READY);
1314
+ collector->threads.ready(&collector->markers);
1016
1315
  break;
1017
1316
  case RUBY_INTERNAL_THREAD_EVENT_RESUMED:
1018
- collector->markers.record(Marker::Type::MARKER_GVL_THREAD_RESUMED);
1019
- collector->threads.set_state(Thread::State::RUNNING);
1317
+ collector->threads.resumed(&collector->markers);
1020
1318
  break;
1021
1319
  case RUBY_INTERNAL_THREAD_EVENT_SUSPENDED:
1022
- collector->markers.record(Marker::Type::MARKER_GVL_THREAD_SUSPENDED);
1023
- collector->threads.set_state(Thread::State::SUSPENDED);
1024
- break;
1025
- case RUBY_INTERNAL_THREAD_EVENT_EXITED:
1026
- collector->markers.record(Marker::Type::MARKER_GVL_THREAD_EXITED);
1027
- collector->threads.set_state(Thread::State::STOPPED);
1320
+ collector->threads.suspended(&collector->markers);
1028
1321
  break;
1029
1322
 
1030
1323
  }
1031
1324
  }
1032
1325
 
1033
1326
  rb_internal_thread_event_hook_t *thread_hook;
1034
- VALUE gc_hook;
1035
1327
 
1036
1328
  bool start() {
1037
1329
  if (!BaseCollector::start()) {
1038
1330
  return false;
1039
1331
  }
1040
1332
 
1041
- started_at = TimeStamp::Now();
1042
-
1043
- struct sigaction sa;
1044
- sa.sa_sigaction = signal_handler;
1045
- sa.sa_flags = SA_RESTART | SA_SIGINFO;
1046
- sigemptyset(&sa.sa_mask);
1047
- sigaction(SIGPROF, &sa, NULL);
1333
+ GlobalSignalHandler::get_instance()->install();
1048
1334
 
1049
1335
  running = true;
1050
1336
 
@@ -1054,15 +1340,16 @@ class TimeCollector : public BaseCollector {
1054
1340
  rb_bug("pthread_create");
1055
1341
  }
1056
1342
 
1057
- // Set the state of the current Ruby thread to RUNNING.
1058
- // We want to have at least one thread in our thread list because it's
1059
- // possible that the profile might be such that we don't get any
1060
- // thread switch events and we need at least one
1061
- this->threads.set_state(Thread::State::RUNNING);
1343
+ // Set the state of the current Ruby thread to RUNNING, which we know it
1344
+ // is as it must have held the GVL to start the collector. We want to
1345
+ // have at least one thread in our thread list because it's possible
1346
+ // that the profile might be such that we don't get any thread switch
1347
+ // events and we need at least one
1348
+ this->threads.resumed(&this->markers);
1062
1349
 
1063
1350
  thread_hook = rb_internal_thread_add_event_hook(internal_thread_event_cb, RUBY_INTERNAL_THREAD_EVENT_MASK, this);
1064
- gc_hook = rb_tracepoint_new(0, RUBY_GC_PHASE_EVENTS, internal_gc_event_cb, (void *)this);
1065
- rb_tracepoint_enable(gc_hook);
1351
+ rb_add_event_hook(internal_gc_event_cb, RUBY_INTERNAL_EVENTS, PTR2NUM((void *)this));
1352
+ rb_add_event_hook(internal_thread_event_cb, RUBY_NORMAL_EVENTS, PTR2NUM((void *)this));
1066
1353
 
1067
1354
  return true;
1068
1355
  }
@@ -1073,14 +1360,11 @@ class TimeCollector : public BaseCollector {
1073
1360
  running = false;
1074
1361
  thread_stopped.wait();
1075
1362
 
1076
- struct sigaction sa;
1077
- sa.sa_handler = SIG_IGN;
1078
- sa.sa_flags = SA_RESTART;
1079
- sigemptyset(&sa.sa_mask);
1080
- sigaction(SIGPROF, &sa, NULL);
1363
+ GlobalSignalHandler::get_instance()->uninstall();
1081
1364
 
1082
1365
  rb_internal_thread_remove_event_hook(thread_hook);
1083
- rb_tracepoint_disable(gc_hook);
1366
+ rb_remove_event_hook(internal_gc_event_cb);
1367
+ rb_remove_event_hook(internal_thread_event_cb);
1084
1368
 
1085
1369
  // capture thread names
1086
1370
  for (auto& thread: this->threads.list) {
@@ -1099,45 +1383,15 @@ class TimeCollector : public BaseCollector {
1099
1383
  }
1100
1384
 
1101
1385
  VALUE build_collector_result() {
1102
- VALUE result = rb_obj_alloc(rb_cVernierResult);
1103
-
1104
- VALUE meta = rb_hash_new();
1105
- rb_ivar_set(result, rb_intern("@meta"), meta);
1106
- rb_hash_aset(meta, sym("started_at"), ULL2NUM(started_at.nanoseconds()));
1107
-
1108
- VALUE samples = rb_ary_new();
1109
- rb_ivar_set(result, rb_intern("@samples"), samples);
1110
- VALUE weights = rb_ary_new();
1111
- rb_ivar_set(result, rb_intern("@weights"), weights);
1112
- for (auto& stack_index: this->samples) {
1113
- rb_ary_push(samples, INT2NUM(stack_index));
1114
- rb_ary_push(weights, INT2NUM(1));
1115
- }
1116
-
1117
- VALUE timestamps = rb_ary_new();
1118
- rb_ivar_set(result, rb_intern("@timestamps"), timestamps);
1119
-
1120
- for (auto& timestamp: this->timestamps) {
1121
- rb_ary_push(timestamps, ULL2NUM(timestamp.nanoseconds()));
1122
- }
1123
-
1124
- VALUE sample_threads = rb_ary_new();
1125
- rb_ivar_set(result, rb_intern("@sample_threads"), sample_threads);
1126
- for (auto& thread: this->sample_threads) {
1127
- rb_ary_push(sample_threads, ULL2NUM(thread));
1128
- }
1129
-
1130
- VALUE sample_categories = rb_ary_new();
1131
- rb_ivar_set(result, rb_intern("@sample_categories"), sample_categories);
1132
- for (auto& cat: this->sample_categories) {
1133
- rb_ary_push(sample_categories, INT2NUM(cat));
1134
- }
1386
+ VALUE result = BaseCollector::build_collector_result();
1135
1387
 
1136
1388
  VALUE threads = rb_hash_new();
1137
1389
  rb_ivar_set(result, rb_intern("@threads"), threads);
1138
1390
 
1139
1391
  for (const auto& thread: this->threads.list) {
1140
1392
  VALUE hash = rb_hash_new();
1393
+ thread.samples.write_result(hash);
1394
+
1141
1395
  rb_hash_aset(threads, ULL2NUM(thread.native_tid), hash);
1142
1396
  rb_hash_aset(hash, sym("tid"), ULL2NUM(thread.native_tid));
1143
1397
  rb_hash_aset(hash, sym("started_at"), ULL2NUM(thread.started_at.nanoseconds()));
@@ -1155,7 +1409,6 @@ class TimeCollector : public BaseCollector {
1155
1409
 
1156
1410
  void mark() {
1157
1411
  frame_list.mark_frames();
1158
- rb_gc_mark(gc_hook);
1159
1412
 
1160
1413
  //for (int i = 0; i < queued_length; i++) {
1161
1414
  // rb_gc_mark(queued_frames[i]);
@@ -1165,8 +1418,6 @@ class TimeCollector : public BaseCollector {
1165
1418
  }
1166
1419
  };
1167
1420
 
1168
- LiveSample *TimeCollector::live_sample;
1169
-
1170
1421
  static void
1171
1422
  collector_mark(void *data) {
1172
1423
  BaseCollector *collector = static_cast<BaseCollector *>(data);
@@ -1252,14 +1503,11 @@ static VALUE collector_new(VALUE self, VALUE mode, VALUE options) {
1252
1503
  }
1253
1504
 
1254
1505
  static void
1255
- Init_consts() {
1506
+ Init_consts(VALUE rb_mVernierMarkerPhase) {
1256
1507
  #define MARKER_CONST(name) \
1257
1508
  rb_define_const(rb_mVernierMarkerType, #name, INT2NUM(Marker::Type::MARKER_##name))
1258
1509
 
1259
1510
  MARKER_CONST(GVL_THREAD_STARTED);
1260
- MARKER_CONST(GVL_THREAD_READY);
1261
- MARKER_CONST(GVL_THREAD_RESUMED);
1262
- MARKER_CONST(GVL_THREAD_SUSPENDED);
1263
1511
  MARKER_CONST(GVL_THREAD_EXITED);
1264
1512
 
1265
1513
  MARKER_CONST(GC_START);
@@ -1267,8 +1515,22 @@ Init_consts() {
1267
1515
  MARKER_CONST(GC_END_SWEEP);
1268
1516
  MARKER_CONST(GC_ENTER);
1269
1517
  MARKER_CONST(GC_EXIT);
1518
+ MARKER_CONST(GC_PAUSE);
1519
+
1520
+ MARKER_CONST(THREAD_RUNNING);
1521
+ MARKER_CONST(THREAD_STALLED);
1522
+ MARKER_CONST(THREAD_SUSPENDED);
1270
1523
 
1271
1524
  #undef MARKER_CONST
1525
+
1526
+ #define PHASE_CONST(name) \
1527
+ rb_define_const(rb_mVernierMarkerPhase, #name, INT2NUM(Marker::Phase::name))
1528
+
1529
+ PHASE_CONST(INSTANT);
1530
+ PHASE_CONST(INTERVAL);
1531
+ PHASE_CONST(INTERVAL_START);
1532
+ PHASE_CONST(INTERVAL_END);
1533
+ #undef PHASE_CONST
1272
1534
  }
1273
1535
 
1274
1536
  extern "C" void
@@ -1277,6 +1539,7 @@ Init_vernier(void)
1277
1539
  rb_mVernier = rb_define_module("Vernier");
1278
1540
  rb_cVernierResult = rb_define_class_under(rb_mVernier, "Result", rb_cObject);
1279
1541
  VALUE rb_mVernierMarker = rb_define_module_under(rb_mVernier, "Marker");
1542
+ VALUE rb_mVernierMarkerPhase = rb_define_module_under(rb_mVernierMarker, "Phase");
1280
1543
  rb_mVernierMarkerType = rb_define_module_under(rb_mVernierMarker, "Type");
1281
1544
 
1282
1545
  rb_cVernierCollector = rb_define_class_under(rb_mVernier, "Collector", rb_cObject);
@@ -1287,7 +1550,7 @@ Init_vernier(void)
1287
1550
  rb_define_private_method(rb_cVernierCollector, "finish", collector_stop, 0);
1288
1551
  rb_define_private_method(rb_cVernierCollector, "markers", markers, 0);
1289
1552
 
1290
- Init_consts();
1553
+ Init_consts(rb_mVernierMarkerPhase);
1291
1554
 
1292
1555
  //static VALUE gc_hook = Data_Wrap_Struct(rb_cObject, collector_mark, NULL, &_collector);
1293
1556
  //rb_global_variable(&gc_hook);