vernier 0.2.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -27,14 +27,20 @@
27
27
  #include "ruby/debug.h"
28
28
  #include "ruby/thread.h"
29
29
 
30
- // GC event's we'll monitor during profiling
31
- #define RUBY_GC_PHASE_EVENTS \
30
+ # define PTR2NUM(x) (rb_int2inum((intptr_t)(void *)(x)))
31
+
32
+ // Internal TracePoint events we'll monitor during profiling
33
+ #define RUBY_INTERNAL_EVENTS \
32
34
  RUBY_INTERNAL_EVENT_GC_START | \
33
35
  RUBY_INTERNAL_EVENT_GC_END_MARK | \
34
36
  RUBY_INTERNAL_EVENT_GC_END_SWEEP | \
35
37
  RUBY_INTERNAL_EVENT_GC_ENTER | \
36
38
  RUBY_INTERNAL_EVENT_GC_EXIT
37
39
 
40
+ #define RUBY_NORMAL_EVENTS \
41
+ RUBY_EVENT_THREAD_BEGIN | \
42
+ RUBY_EVENT_THREAD_END
43
+
38
44
  #define sym(name) ID2SYM(rb_intern_const(name))
39
45
 
40
46
  // HACK: This isn't public, but the objspace ext uses it
@@ -125,6 +131,14 @@ class TimeStamp {
125
131
  return value_ns >= other.value_ns;
126
132
  }
127
133
 
134
+ bool operator==(const TimeStamp &other) const {
135
+ return value_ns == other.value_ns;
136
+ }
137
+
138
+ bool operator!=(const TimeStamp &other) const {
139
+ return value_ns != other.value_ns;
140
+ }
141
+
128
142
  uint64_t nanoseconds() const {
129
143
  return value_ns;
130
144
  }
@@ -202,6 +216,10 @@ bool operator==(const Frame& lhs, const Frame& rhs) noexcept {
202
216
  return lhs.frame == rhs.frame && lhs.line == rhs.line;
203
217
  }
204
218
 
219
+ bool operator!=(const Frame& lhs, const Frame& rhs) noexcept {
220
+ return !(lhs == rhs);
221
+ }
222
+
205
223
  namespace std {
206
224
  template<>
207
225
  struct hash<Frame>
@@ -275,7 +293,9 @@ struct RawSample {
275
293
  }
276
294
 
277
295
  Frame frame(int i) const {
278
- const Frame frame = {frames[i], lines[i]};
296
+ int idx = len - i - 1;
297
+ if (idx < 0) throw std::out_of_range("out of range");
298
+ const Frame frame = {frames[idx], lines[idx]};
279
299
  return frame;
280
300
  }
281
301
 
@@ -337,24 +357,6 @@ struct LiveSample {
337
357
  }
338
358
  };
339
359
 
340
- struct TraceArg {
341
- rb_trace_arg_t *tparg;
342
- VALUE obj;
343
- VALUE path;
344
- VALUE line;
345
- VALUE mid;
346
- VALUE klass;
347
-
348
- TraceArg(VALUE tpval) {
349
- tparg = rb_tracearg_from_tracepoint(tpval);
350
- obj = rb_tracearg_object(tparg);
351
- path = rb_tracearg_path(tparg);
352
- line = rb_tracearg_lineno(tparg);
353
- mid = rb_tracearg_method_id(tparg);
354
- klass = rb_tracearg_defined_class(tparg);
355
- }
356
- };
357
-
358
360
  struct FrameList {
359
361
  std::unordered_map<std::string, int> string_to_idx;
360
362
  std::vector<std::string> string_list;
@@ -412,27 +414,29 @@ struct FrameList {
412
414
  }
413
415
 
414
416
  StackNode *node = &root_stack_node;
415
- for (int i = stack.size() - 1; i >= 0; i--) {
416
- const Frame &frame = stack.frame(i);
417
+ for (int i = 0; i < stack.size(); i++) {
418
+ Frame frame = stack.frame(i);
417
419
  node = next_stack_node(node, frame);
418
420
  }
419
421
  return node->index;
420
422
  }
421
423
 
422
- StackNode *next_stack_node(StackNode *node, const Frame &frame) {
423
- int next_node_idx = node->children[frame];
424
- if (next_node_idx == 0) {
424
+ StackNode *next_stack_node(StackNode *node, Frame frame) {
425
+ auto search = node->children.find(frame);
426
+ if (search == node->children.end()) {
425
427
  // insert a new node
426
- next_node_idx = stack_node_list.size();
428
+ int next_node_idx = stack_node_list.size();
427
429
  node->children[frame] = next_node_idx;
428
430
  stack_node_list.emplace_back(
429
431
  frame,
430
432
  next_node_idx,
431
433
  node->index
432
434
  );
435
+ return &stack_node_list[next_node_idx];
436
+ } else {
437
+ int node_idx = search->second;
438
+ return &stack_node_list[node_idx];
433
439
  }
434
-
435
- return &stack_node_list[next_node_idx];
436
440
  }
437
441
 
438
442
  // Converts Frames from stacks other tables. "Symbolicates" the frames
@@ -512,6 +516,372 @@ struct FrameList {
512
516
  }
513
517
  };
514
518
 
519
+ class SampleTranslator {
520
+ public:
521
+ int last_stack_index;
522
+
523
+ Frame frames[RawSample::MAX_LEN];
524
+ int frame_indexes[RawSample::MAX_LEN];
525
+ int len;
526
+
527
+ SampleTranslator() : len(0), last_stack_index(-1) {
528
+ }
529
+
530
+ int translate(FrameList &frame_list, const RawSample &sample) {
531
+ int i = 0;
532
+ for (; i < len && i < sample.size(); i++) {
533
+ if (frames[i] != sample.frame(i)) {
534
+ break;
535
+ }
536
+ }
537
+
538
+ FrameList::StackNode *node = i == 0 ? &frame_list.root_stack_node : &frame_list.stack_node_list[frame_indexes[i - 1]];
539
+
540
+ for (; i < sample.size(); i++) {
541
+ Frame frame = sample.frame(i);
542
+ node = frame_list.next_stack_node(node, frame);
543
+
544
+ frames[i] = frame;
545
+ frame_indexes[i] = node->index;
546
+ }
547
+ len = i;
548
+
549
+ last_stack_index = node->index;
550
+ return last_stack_index;
551
+ }
552
+ };
553
+
554
+ typedef uint64_t native_thread_id_t;
555
+ static native_thread_id_t get_native_thread_id() {
556
+ #ifdef __APPLE__
557
+ uint64_t thread_id;
558
+ int e = pthread_threadid_np(pthread_self(), &thread_id);
559
+ if (e != 0) rb_syserr_fail(e, "pthread_threadid_np");
560
+ return thread_id;
561
+ #else
562
+ // gettid() is only available as of glibc 2.30
563
+ pid_t tid = syscall(SYS_gettid);
564
+ return tid;
565
+ #endif
566
+ }
567
+
568
+
569
+ class Marker {
570
+ public:
571
+ enum Type {
572
+ MARKER_GVL_THREAD_STARTED,
573
+ MARKER_GVL_THREAD_EXITED,
574
+
575
+ MARKER_GC_START,
576
+ MARKER_GC_END_MARK,
577
+ MARKER_GC_END_SWEEP,
578
+ MARKER_GC_ENTER,
579
+ MARKER_GC_EXIT,
580
+ MARKER_GC_PAUSE,
581
+
582
+ MARKER_THREAD_RUNNING,
583
+ MARKER_THREAD_STALLED,
584
+ MARKER_THREAD_SUSPENDED,
585
+
586
+ MARKER_MAX,
587
+ };
588
+
589
+ // Must match phase types from Gecko
590
+ enum Phase {
591
+ INSTANT,
592
+ INTERVAL,
593
+ INTERVAL_START,
594
+ INTERVAL_END
595
+ };
596
+
597
+ Type type;
598
+ Phase phase;
599
+ TimeStamp timestamp;
600
+ TimeStamp finish;
601
+ native_thread_id_t thread_id;
602
+ int stack_index = -1;
603
+
604
+ VALUE to_array() {
605
+ VALUE record[6] = {0};
606
+ record[0] = ULL2NUM(thread_id);
607
+ record[1] = INT2NUM(type);
608
+ record[2] = INT2NUM(phase);
609
+ record[3] = ULL2NUM(timestamp.nanoseconds());
610
+
611
+ if (phase == Marker::Phase::INTERVAL) {
612
+ record[4] = ULL2NUM(finish.nanoseconds());
613
+ }
614
+ else {
615
+ record[4] = Qnil;
616
+ }
617
+ record[5] = stack_index == -1 ? Qnil : INT2NUM(stack_index);
618
+
619
+ return rb_ary_new_from_values(6, record);
620
+ }
621
+ };
622
+
623
+ class MarkerTable {
624
+ TimeStamp last_gc_entry;
625
+
626
+ public:
627
+ std::vector<Marker> list;
628
+ std::mutex mutex;
629
+
630
+ void record_gc_entered() {
631
+ last_gc_entry = TimeStamp::Now();
632
+ }
633
+
634
+ void record_gc_leave() {
635
+ list.push_back({ Marker::MARKER_GC_PAUSE, Marker::INTERVAL, last_gc_entry, TimeStamp::Now(), get_native_thread_id(), -1 });
636
+ }
637
+
638
+ void record_interval(Marker::Type type, TimeStamp from, TimeStamp to, int stack_index = -1) {
639
+ const std::lock_guard<std::mutex> lock(mutex);
640
+
641
+ list.push_back({ type, Marker::INTERVAL, from, to, get_native_thread_id(), stack_index });
642
+ }
643
+
644
+ void record(Marker::Type type, int stack_index = -1) {
645
+ const std::lock_guard<std::mutex> lock(mutex);
646
+
647
+ list.push_back({ type, Marker::INSTANT, TimeStamp::Now(), TimeStamp(), get_native_thread_id(), stack_index });
648
+ }
649
+ };
650
+
651
+ enum Category{
652
+ CATEGORY_NORMAL,
653
+ CATEGORY_IDLE
654
+ };
655
+
656
+ class SampleList {
657
+ public:
658
+
659
+ std::vector<int> stacks;
660
+ std::vector<TimeStamp> timestamps;
661
+ std::vector<native_thread_id_t> threads;
662
+ std::vector<Category> categories;
663
+ std::vector<int> weights;
664
+
665
+ size_t size() {
666
+ return stacks.size();
667
+ }
668
+
669
+ bool empty() {
670
+ return size() == 0;
671
+ }
672
+
673
+ void record_sample(int stack_index, TimeStamp time, native_thread_id_t thread_id, Category category) {
674
+ if (
675
+ !empty() &&
676
+ stacks.back() == stack_index &&
677
+ threads.back() == thread_id &&
678
+ categories.back() == category)
679
+ {
680
+ // We don't compare timestamps for de-duplication
681
+ weights.back() += 1;
682
+ } else {
683
+ stacks.push_back(stack_index);
684
+ timestamps.push_back(time);
685
+ threads.push_back(thread_id);
686
+ categories.push_back(category);
687
+ weights.push_back(1);
688
+ }
689
+ }
690
+
691
+ void write_result(VALUE result) const {
692
+ VALUE samples = rb_ary_new();
693
+ rb_hash_aset(result, sym("samples"), samples);
694
+ for (auto& stack_index: this->stacks) {
695
+ rb_ary_push(samples, INT2NUM(stack_index));
696
+ }
697
+
698
+ VALUE weights = rb_ary_new();
699
+ rb_hash_aset(result, sym("weights"), weights);
700
+ for (auto& weight: this->weights) {
701
+ rb_ary_push(weights, INT2NUM(weight));
702
+ }
703
+
704
+ VALUE timestamps = rb_ary_new();
705
+ rb_hash_aset(result, sym("timestamps"), timestamps);
706
+ for (auto& timestamp: this->timestamps) {
707
+ rb_ary_push(timestamps, ULL2NUM(timestamp.nanoseconds()));
708
+ }
709
+
710
+ VALUE sample_categories = rb_ary_new();
711
+ rb_hash_aset(result, sym("sample_categories"), sample_categories);
712
+ for (auto& cat: this->categories) {
713
+ rb_ary_push(sample_categories, INT2NUM(cat));
714
+ }
715
+ }
716
+ };
717
+
718
+ class Thread {
719
+ public:
720
+ SampleList samples;
721
+
722
+ enum State {
723
+ STARTED,
724
+ RUNNING,
725
+ READY,
726
+ SUSPENDED,
727
+ STOPPED
728
+ };
729
+
730
+ pthread_t pthread_id;
731
+ native_thread_id_t native_tid;
732
+ State state;
733
+
734
+ TimeStamp state_changed_at;
735
+ TimeStamp started_at;
736
+ TimeStamp stopped_at;
737
+
738
+ int stack_on_suspend_idx;
739
+ SampleTranslator translator;
740
+
741
+ std::string name;
742
+
743
+ Thread(State state) : state(state), stack_on_suspend_idx(-1) {
744
+ pthread_id = pthread_self();
745
+ native_tid = get_native_thread_id();
746
+ started_at = state_changed_at = TimeStamp::Now();
747
+ }
748
+
749
+ void set_state(State new_state, MarkerTable *markers) {
750
+ if (state == Thread::State::STOPPED) {
751
+ return;
752
+ }
753
+
754
+ TimeStamp from = state_changed_at;
755
+ auto now = TimeStamp::Now();
756
+
757
+ if (started_at.zero()) {
758
+ started_at = now;
759
+ }
760
+
761
+ switch (new_state) {
762
+ case State::STARTED:
763
+ new_state = State::RUNNING;
764
+ break;
765
+ case State::RUNNING:
766
+ assert(state == State::READY);
767
+
768
+ // If the GVL is immediately ready, and we measure no times
769
+ // stalled, skip emitting the interval.
770
+ if (from != now) {
771
+ markers->record_interval(Marker::Type::MARKER_THREAD_STALLED, from, now);
772
+ }
773
+ break;
774
+ case State::READY:
775
+ // The ready state means "I would like to do some work, but I can't
776
+ // do it right now either because I blocked on IO and now I want the GVL back,
777
+ // or because the VM timer put me to sleep"
778
+ //
779
+ // Threads can be preempted, which means they will have been in "Running"
780
+ // state, and then the VM was like "no I need to stop you from working,
781
+ // so I'll put you in the 'ready' (or stalled) state"
782
+ assert(state == State::SUSPENDED || state == State::RUNNING);
783
+ if (state == State::SUSPENDED) {
784
+ markers->record_interval(Marker::Type::MARKER_THREAD_SUSPENDED, from, now, stack_on_suspend_idx);
785
+ }
786
+ else {
787
+ markers->record_interval(Marker::Type::MARKER_THREAD_RUNNING, from, now);
788
+ }
789
+ break;
790
+ case State::SUSPENDED:
791
+ // We can go from RUNNING or STARTED to SUSPENDED
792
+ assert(state == State::RUNNING || state == State::STARTED);
793
+ markers->record_interval(Marker::Type::MARKER_THREAD_RUNNING, from, now);
794
+ break;
795
+ case State::STOPPED:
796
+ // We can go from RUNNING or STARTED to STOPPED
797
+ assert(state == State::RUNNING || state == State::STARTED);
798
+ markers->record_interval(Marker::Type::MARKER_THREAD_RUNNING, from, now);
799
+ stopped_at = now;
800
+ capture_name();
801
+
802
+ break;
803
+ }
804
+
805
+ state = new_state;
806
+ state_changed_at = now;
807
+ }
808
+
809
+ bool running() {
810
+ return state != State::STOPPED;
811
+ }
812
+
813
+ void capture_name() {
814
+ char buf[128];
815
+ int rc = pthread_getname_np(pthread_id, buf, sizeof(buf));
816
+ if (rc == 0)
817
+ name = std::string(buf);
818
+ }
819
+ };
820
+
821
+ class ThreadTable {
822
+ public:
823
+ FrameList &frame_list;
824
+
825
+ std::vector<Thread> list;
826
+ std::mutex mutex;
827
+
828
+ ThreadTable(FrameList &frame_list) : frame_list(frame_list) {
829
+ }
830
+
831
+ void started(MarkerTable *markers) {
832
+ //const std::lock_guard<std::mutex> lock(mutex);
833
+
834
+ //list.push_back(Thread{pthread_self(), Thread::State::SUSPENDED});
835
+ markers->record(Marker::Type::MARKER_GVL_THREAD_STARTED);
836
+ set_state(Thread::State::STARTED, markers);
837
+ }
838
+
839
+ void ready(MarkerTable *markers) {
840
+ set_state(Thread::State::READY, markers);
841
+ }
842
+
843
+ void resumed(MarkerTable *markers) {
844
+ set_state(Thread::State::RUNNING, markers);
845
+ }
846
+
847
+ void suspended(MarkerTable *markers) {
848
+ set_state(Thread::State::SUSPENDED, markers);
849
+ }
850
+
851
+ void stopped(MarkerTable *markers) {
852
+ markers->record(Marker::Type::MARKER_GVL_THREAD_EXITED);
853
+ set_state(Thread::State::STOPPED, markers);
854
+ }
855
+
856
+ private:
857
+ void set_state(Thread::State new_state, MarkerTable *markers) {
858
+ const std::lock_guard<std::mutex> lock(mutex);
859
+
860
+ pthread_t current_thread = pthread_self();
861
+ //cerr << "set state=" << new_state << " thread=" << gettid() << endl;
862
+
863
+ for (auto &thread : list) {
864
+ if (pthread_equal(current_thread, thread.pthread_id)) {
865
+ if (new_state == Thread::State::SUSPENDED) {
866
+
867
+ RawSample sample;
868
+ sample.sample();
869
+
870
+ thread.stack_on_suspend_idx = thread.translator.translate(frame_list, sample);
871
+ //cerr << gettid() << " suspended! Stack size:" << thread.stack_on_suspend.size() << endl;
872
+ }
873
+
874
+ thread.set_state(new_state, markers);
875
+
876
+ return;
877
+ }
878
+ }
879
+
880
+ pid_t native_tid = get_native_thread_id();
881
+ list.emplace_back(new_state);
882
+ }
883
+ };
884
+
515
885
  class BaseCollector {
516
886
  protected:
517
887
 
@@ -523,15 +893,19 @@ class BaseCollector {
523
893
  bool running = false;
524
894
  FrameList frame_list;
525
895
 
896
+ TimeStamp started_at;
897
+
526
898
  virtual ~BaseCollector() {}
527
899
 
528
900
  virtual bool start() {
529
901
  if (running) {
530
902
  return false;
531
- } else {
532
- running = true;
533
- return true;
534
903
  }
904
+
905
+ started_at = TimeStamp::Now();
906
+
907
+ running = true;
908
+ return true;
535
909
  }
536
910
 
537
911
  virtual VALUE stop() {
@@ -543,6 +917,21 @@ class BaseCollector {
543
917
  return Qnil;
544
918
  }
545
919
 
920
+ void write_meta(VALUE result) {
921
+ VALUE meta = rb_hash_new();
922
+ rb_ivar_set(result, rb_intern("@meta"), meta);
923
+ rb_hash_aset(meta, sym("started_at"), ULL2NUM(started_at.nanoseconds()));
924
+
925
+ }
926
+
927
+ virtual VALUE build_collector_result() {
928
+ VALUE result = rb_obj_alloc(rb_cVernierResult);
929
+
930
+ write_meta(result);
931
+
932
+ return result;
933
+ }
934
+
546
935
  virtual void sample() {
547
936
  rb_raise(rb_eRuntimeError, "collector doesn't support manual sampling");
548
937
  };
@@ -557,14 +946,15 @@ class BaseCollector {
557
946
  };
558
947
 
559
948
  class CustomCollector : public BaseCollector {
560
- std::vector<int> samples;
949
+ SampleList samples;
561
950
 
562
951
  void sample() {
563
952
  RawSample sample;
564
953
  sample.sample();
565
954
  int stack_index = frame_list.stack_index(sample);
566
955
 
567
- samples.push_back(stack_index);
956
+ native_thread_id_t thread_id = 0;
957
+ samples.record_sample(stack_index, TimeStamp::Now(), thread_id, CATEGORY_NORMAL);
568
958
  }
569
959
 
570
960
  VALUE stop() {
@@ -580,17 +970,16 @@ class CustomCollector : public BaseCollector {
580
970
  }
581
971
 
582
972
  VALUE build_collector_result() {
583
- VALUE result = rb_obj_alloc(rb_cVernierResult);
973
+ VALUE result = BaseCollector::build_collector_result();
584
974
 
585
- VALUE samples = rb_ary_new();
586
- rb_ivar_set(result, rb_intern("@samples"), samples);
587
- VALUE weights = rb_ary_new();
588
- rb_ivar_set(result, rb_intern("@weights"), weights);
975
+ VALUE threads = rb_hash_new();
976
+ rb_ivar_set(result, rb_intern("@threads"), threads);
589
977
 
590
- for (auto& stack_index: this->samples) {
591
- rb_ary_push(samples, INT2NUM(stack_index));
592
- rb_ary_push(weights, INT2NUM(1));
593
- }
978
+ VALUE thread_hash = rb_hash_new();
979
+ samples.write_result(thread_hash);
980
+
981
+ rb_hash_aset(threads, ULL2NUM(0), thread_hash);
982
+ rb_hash_aset(thread_hash, sym("tid"), ULL2NUM(0));
594
983
 
595
984
  frame_list.write_result(result);
596
985
 
@@ -623,16 +1012,18 @@ class RetainedCollector : public BaseCollector {
623
1012
 
624
1013
  static void newobj_i(VALUE tpval, void *data) {
625
1014
  RetainedCollector *collector = static_cast<RetainedCollector *>(data);
626
- TraceArg tp(tpval);
1015
+ rb_trace_arg_t *tparg = rb_tracearg_from_tracepoint(tpval);
1016
+ VALUE obj = rb_tracearg_object(tparg);
627
1017
 
628
- collector->record(tp.obj);
1018
+ collector->record(obj);
629
1019
  }
630
1020
 
631
1021
  static void freeobj_i(VALUE tpval, void *data) {
632
1022
  RetainedCollector *collector = static_cast<RetainedCollector *>(data);
633
- TraceArg tp(tpval);
1023
+ rb_trace_arg_t *tparg = rb_tracearg_from_tracepoint(tpval);
1024
+ VALUE obj = rb_tracearg_object(tparg);
634
1025
 
635
- collector->object_frames.erase(tp.obj);
1026
+ collector->object_frames.erase(obj);
636
1027
  }
637
1028
 
638
1029
  public:
@@ -687,12 +1078,18 @@ class RetainedCollector : public BaseCollector {
687
1078
  RetainedCollector *collector = this;
688
1079
  FrameList &frame_list = collector->frame_list;
689
1080
 
690
- VALUE result = rb_obj_alloc(rb_cVernierResult);
1081
+ VALUE result = BaseCollector::build_collector_result();
691
1082
 
1083
+ VALUE threads = rb_hash_new();
1084
+ rb_ivar_set(result, rb_intern("@threads"), threads);
1085
+ VALUE thread_hash = rb_hash_new();
1086
+ rb_hash_aset(threads, ULL2NUM(0), thread_hash);
1087
+
1088
+ rb_hash_aset(thread_hash, sym("tid"), ULL2NUM(0));
692
1089
  VALUE samples = rb_ary_new();
693
- rb_ivar_set(result, rb_intern("@samples"), samples);
1090
+ rb_hash_aset(thread_hash, sym("samples"), samples);
694
1091
  VALUE weights = rb_ary_new();
695
- rb_ivar_set(result, rb_intern("@weights"), weights);
1092
+ rb_hash_aset(thread_hash, sym("weights"), weights);
696
1093
 
697
1094
  for (auto& obj: collector->object_list) {
698
1095
  const auto search = collector->object_frames.find(obj);
@@ -721,162 +1118,68 @@ class RetainedCollector : public BaseCollector {
721
1118
  }
722
1119
  };
723
1120
 
724
- typedef uint64_t native_thread_id_t;
1121
+ class GlobalSignalHandler {
1122
+ static LiveSample *live_sample;
725
1123
 
726
- class Thread {
727
1124
  public:
728
- static native_thread_id_t get_native_thread_id() {
729
- #ifdef __APPLE__
730
- uint64_t thread_id;
731
- int e = pthread_threadid_np(pthread_self(), &thread_id);
732
- if (e != 0) rb_syserr_fail(e, "pthread_threadid_np");
733
- return thread_id;
734
- #else
735
- // gettid() is only available as of glibc 2.30
736
- pid_t tid = syscall(SYS_gettid);
737
- return tid;
738
- #endif
739
- }
740
-
741
- enum State {
742
- STARTED,
743
- RUNNING,
744
- SUSPENDED,
745
- STOPPED
746
- };
747
-
748
- pthread_t pthread_id;
749
- native_thread_id_t native_tid;
750
- State state;
751
-
752
- TimeStamp state_changed_at;
753
- TimeStamp started_at;
754
- TimeStamp stopped_at;
755
-
756
- RawSample stack_on_suspend;
757
-
758
- std::string name;
759
-
760
- Thread(State state) : state(state) {
761
- pthread_id = pthread_self();
762
- native_tid = get_native_thread_id();
763
- started_at = state_changed_at = TimeStamp::Now();
1125
+ static GlobalSignalHandler *get_instance() {
1126
+ static GlobalSignalHandler instance;
1127
+ return &instance;
764
1128
  }
765
1129
 
766
- void set_state(State new_state) {
767
- if (state == Thread::State::STOPPED) {
768
- return;
769
- }
770
-
771
- auto now = TimeStamp::Now();
772
-
773
- state = new_state;
774
- state_changed_at = now;
775
- if (new_state == State::STARTED) {
776
- if (started_at.zero()) {
777
- started_at = now;
778
- }
779
- } else if (new_state == State::STOPPED) {
780
- stopped_at = now;
1130
+ void install() {
1131
+ const std::lock_guard<std::mutex> lock(mutex);
1132
+ count++;
781
1133
 
782
- capture_name();
783
- }
1134
+ if (count == 1) setup_signal_handler();
784
1135
  }
785
1136
 
786
- bool running() {
787
- return state != State::STOPPED;
788
- }
1137
+ void uninstall() {
1138
+ const std::lock_guard<std::mutex> lock(mutex);
1139
+ count--;
789
1140
 
790
- void capture_name() {
791
- char buf[128];
792
- int rc = pthread_getname_np(pthread_id, buf, sizeof(buf));
793
- if (rc == 0)
794
- name = std::string(buf);
1141
+ if (count == 0) clear_signal_handler();
795
1142
  }
796
- };
797
-
798
- class Marker {
799
- public:
800
- enum Type {
801
- MARKER_GVL_THREAD_STARTED,
802
- MARKER_GVL_THREAD_READY,
803
- MARKER_GVL_THREAD_RESUMED,
804
- MARKER_GVL_THREAD_SUSPENDED,
805
- MARKER_GVL_THREAD_EXITED,
806
1143
 
807
- MARKER_GC_START,
808
- MARKER_GC_END_MARK,
809
- MARKER_GC_END_SWEEP,
810
- MARKER_GC_ENTER,
811
- MARKER_GC_EXIT,
812
-
813
- MARKER_MAX,
814
- };
815
- Type type;
816
- TimeStamp timestamp;
817
- native_thread_id_t thread_id;
818
- };
819
-
820
- class MarkerTable {
821
- public:
822
- std::vector<Marker> list;
823
- std::mutex mutex;
824
-
825
- void record(Marker::Type type) {
1144
+ void record_sample(LiveSample &sample, pthread_t pthread_id) {
826
1145
  const std::lock_guard<std::mutex> lock(mutex);
827
1146
 
828
- list.push_back({ type, TimeStamp::Now(), Thread::get_native_thread_id() });
1147
+ live_sample = &sample;
1148
+ if (pthread_kill(pthread_id, SIGPROF)) {
1149
+ rb_bug("pthread_kill failed");
1150
+ }
1151
+ sample.wait();
1152
+ live_sample = NULL;
829
1153
  }
830
- };
831
1154
 
832
- extern "C" int ruby_thread_has_gvl_p(void);
833
-
834
- class ThreadTable {
835
- public:
836
- std::vector<Thread> list;
1155
+ private:
837
1156
  std::mutex mutex;
1157
+ int count;
838
1158
 
839
- void started() {
840
- //const std::lock_guard<std::mutex> lock(mutex);
841
-
842
- //list.push_back(Thread{pthread_self(), Thread::State::SUSPENDED});
843
- set_state(Thread::State::STARTED);
1159
+ static void signal_handler(int sig, siginfo_t* sinfo, void* ucontext) {
1160
+ assert(live_sample);
1161
+ live_sample->sample_current_thread();
844
1162
  }
845
1163
 
846
- void set_state(Thread::State new_state) {
847
- const std::lock_guard<std::mutex> lock(mutex);
848
-
849
- pthread_t current_thread = pthread_self();
850
- //cerr << "set state=" << new_state << " thread=" << gettid() << endl;
851
-
852
- for (auto &thread : list) {
853
- if (pthread_equal(current_thread, thread.pthread_id)) {
854
- thread.set_state(new_state);
855
-
856
- if (new_state == Thread::State::SUSPENDED) {
857
- thread.stack_on_suspend.sample();
858
- //cerr << gettid() << " suspended! Stack size:" << thread.stack_on_suspend.size() << endl;
859
- }
860
- return;
861
- }
862
- }
863
-
864
- pid_t native_tid = Thread::get_native_thread_id();
865
- list.emplace_back(new_state);
1164
+ void setup_signal_handler() {
1165
+ struct sigaction sa;
1166
+ sa.sa_sigaction = signal_handler;
1167
+ sa.sa_flags = SA_RESTART | SA_SIGINFO;
1168
+ sigemptyset(&sa.sa_mask);
1169
+ sigaction(SIGPROF, &sa, NULL);
866
1170
  }
867
- };
868
1171
 
869
- enum Category{
870
- CATEGORY_NORMAL,
871
- CATEGORY_IDLE
1172
+ void clear_signal_handler() {
1173
+ struct sigaction sa;
1174
+ sa.sa_handler = SIG_IGN;
1175
+ sa.sa_flags = SA_RESTART;
1176
+ sigemptyset(&sa.sa_mask);
1177
+ sigaction(SIGPROF, &sa, NULL);
1178
+ }
872
1179
  };
1180
+ LiveSample *GlobalSignalHandler::live_sample;
873
1181
 
874
1182
  class TimeCollector : public BaseCollector {
875
- std::vector<int> samples;
876
- std::vector<TimeStamp> timestamps;
877
- std::vector<native_thread_id_t> sample_threads;
878
- std::vector<Category> sample_categories;
879
-
880
1183
  MarkerTable markers;
881
1184
  ThreadTable threads;
882
1185
 
@@ -885,41 +1188,31 @@ class TimeCollector : public BaseCollector {
885
1188
  atomic_bool running;
886
1189
  SamplerSemaphore thread_stopped;
887
1190
 
888
- static LiveSample *live_sample;
889
-
890
- TimeStamp started_at;
891
1191
  TimeStamp interval;
892
1192
 
893
1193
  public:
894
- TimeCollector(TimeStamp interval) : interval(interval) {
1194
+ TimeCollector(TimeStamp interval) : interval(interval), threads(frame_list) {
895
1195
  }
896
1196
 
897
1197
  private:
898
1198
 
899
- void record_sample(const RawSample &sample, TimeStamp time, const Thread &thread, Category category) {
1199
+ void record_sample(const RawSample &sample, TimeStamp time, Thread &thread, Category category) {
900
1200
  if (!sample.empty()) {
901
- int stack_index = frame_list.stack_index(sample);
902
- samples.push_back(stack_index);
903
- timestamps.push_back(time);
904
- sample_threads.push_back(thread.native_tid);
905
- sample_categories.push_back(category);
1201
+ int stack_index = thread.translator.translate(frame_list, sample);
1202
+ thread.samples.record_sample(
1203
+ stack_index,
1204
+ time,
1205
+ thread.native_tid,
1206
+ category
1207
+ );
906
1208
  }
907
1209
  }
908
1210
 
909
- static void signal_handler(int sig, siginfo_t* sinfo, void* ucontext) {
910
- assert(live_sample);
911
- live_sample->sample_current_thread();
912
- }
913
-
914
1211
  VALUE get_markers() {
915
- VALUE list = rb_ary_new();
1212
+ VALUE list = rb_ary_new2(this->markers.list.size());
916
1213
 
917
1214
  for (auto& marker: this->markers.list) {
918
- VALUE record[3] = {0};
919
- record[0] = ULL2NUM(marker.thread_id);
920
- record[1] = INT2NUM(marker.type);
921
- record[2] = ULL2NUM(marker.timestamp.nanoseconds());
922
- rb_ary_push(list, rb_ary_new_from_values(3, record));
1215
+ rb_ary_push(list, marker.to_array());
923
1216
  }
924
1217
 
925
1218
  return list;
@@ -927,20 +1220,16 @@ class TimeCollector : public BaseCollector {
927
1220
 
928
1221
  void sample_thread_run() {
929
1222
  LiveSample sample;
930
- live_sample = &sample;
931
1223
 
932
1224
  TimeStamp next_sample_schedule = TimeStamp::Now();
933
1225
  while (running) {
934
1226
  TimeStamp sample_start = TimeStamp::Now();
935
1227
 
936
1228
  threads.mutex.lock();
937
- for (auto thread : threads.list) {
1229
+ for (auto &thread : threads.list) {
938
1230
  //if (thread.state == Thread::State::RUNNING) {
939
- if (thread.state == Thread::State::RUNNING || (thread.state == Thread::State::SUSPENDED && thread.stack_on_suspend.size() == 0)) {
940
- if (pthread_kill(thread.pthread_id, SIGPROF)) {
941
- rb_bug("pthread_kill failed");
942
- }
943
- sample.wait();
1231
+ if (thread.state == Thread::State::RUNNING || (thread.state == Thread::State::SUSPENDED && thread.stack_on_suspend_idx < 0)) {
1232
+ GlobalSignalHandler::get_instance()->record_sample(sample, thread.pthread_id);
944
1233
 
945
1234
  if (sample.sample.gc) {
946
1235
  // fprintf(stderr, "skipping GC sample\n");
@@ -948,10 +1237,15 @@ class TimeCollector : public BaseCollector {
948
1237
  record_sample(sample.sample, sample_start, thread, CATEGORY_NORMAL);
949
1238
  }
950
1239
  } else if (thread.state == Thread::State::SUSPENDED) {
951
- record_sample(thread.stack_on_suspend, sample_start, thread, CATEGORY_IDLE);
1240
+ thread.samples.record_sample(
1241
+ thread.stack_on_suspend_idx,
1242
+ sample_start,
1243
+ thread.native_tid,
1244
+ CATEGORY_IDLE);
952
1245
  } else {
953
1246
  }
954
1247
  }
1248
+
955
1249
  threads.mutex.unlock();
956
1250
 
957
1251
  TimeStamp sample_complete = TimeStamp::Now();
@@ -967,8 +1261,6 @@ class TimeCollector : public BaseCollector {
967
1261
  TimeStamp::Sleep(sleep_time);
968
1262
  }
969
1263
 
970
- live_sample = NULL;
971
-
972
1264
  thread_stopped.post();
973
1265
  }
974
1266
 
@@ -978,10 +1270,21 @@ class TimeCollector : public BaseCollector {
978
1270
  return NULL;
979
1271
  }
980
1272
 
981
- static void internal_gc_event_cb(VALUE tpval, void *data) {
982
- TimeCollector *collector = static_cast<TimeCollector *>(data);
983
- rb_trace_arg_t *tparg = rb_tracearg_from_tracepoint(tpval);
984
- int event = rb_tracearg_event_flag(tparg);
1273
+ static void internal_thread_event_cb(rb_event_flag_t event, VALUE data, VALUE self, ID mid, VALUE klass) {
1274
+ TimeCollector *collector = static_cast<TimeCollector *>((void *)NUM2ULL(data));
1275
+
1276
+ switch (event) {
1277
+ case RUBY_EVENT_THREAD_BEGIN:
1278
+ collector->threads.started(&collector->markers);
1279
+ break;
1280
+ case RUBY_EVENT_THREAD_END:
1281
+ collector->threads.stopped(&collector->markers);
1282
+ break;
1283
+ }
1284
+ }
1285
+
1286
+ static void internal_gc_event_cb(rb_event_flag_t event, VALUE data, VALUE self, ID mid, VALUE klass) {
1287
+ TimeCollector *collector = static_cast<TimeCollector *>((void *)NUM2ULL(data));
985
1288
 
986
1289
  switch (event) {
987
1290
  case RUBY_INTERNAL_EVENT_GC_START:
@@ -994,10 +1297,10 @@ class TimeCollector : public BaseCollector {
994
1297
  collector->markers.record(Marker::Type::MARKER_GC_END_SWEEP);
995
1298
  break;
996
1299
  case RUBY_INTERNAL_EVENT_GC_ENTER:
997
- collector->markers.record(Marker::Type::MARKER_GC_ENTER);
1300
+ collector->markers.record_gc_entered();
998
1301
  break;
999
1302
  case RUBY_INTERNAL_EVENT_GC_EXIT:
1000
- collector->markers.record(Marker::Type::MARKER_GC_EXIT);
1303
+ collector->markers.record_gc_leave();
1001
1304
  break;
1002
1305
  }
1003
1306
  }
@@ -1007,44 +1310,27 @@ class TimeCollector : public BaseCollector {
1007
1310
  //cerr << "internal thread event" << event << " at " << TimeStamp::Now() << endl;
1008
1311
 
1009
1312
  switch (event) {
1010
- case RUBY_INTERNAL_THREAD_EVENT_STARTED:
1011
- collector->markers.record(Marker::Type::MARKER_GVL_THREAD_STARTED);
1012
- collector->threads.started();
1013
- break;
1014
1313
  case RUBY_INTERNAL_THREAD_EVENT_READY:
1015
- collector->markers.record(Marker::Type::MARKER_GVL_THREAD_READY);
1314
+ collector->threads.ready(&collector->markers);
1016
1315
  break;
1017
1316
  case RUBY_INTERNAL_THREAD_EVENT_RESUMED:
1018
- collector->markers.record(Marker::Type::MARKER_GVL_THREAD_RESUMED);
1019
- collector->threads.set_state(Thread::State::RUNNING);
1317
+ collector->threads.resumed(&collector->markers);
1020
1318
  break;
1021
1319
  case RUBY_INTERNAL_THREAD_EVENT_SUSPENDED:
1022
- collector->markers.record(Marker::Type::MARKER_GVL_THREAD_SUSPENDED);
1023
- collector->threads.set_state(Thread::State::SUSPENDED);
1024
- break;
1025
- case RUBY_INTERNAL_THREAD_EVENT_EXITED:
1026
- collector->markers.record(Marker::Type::MARKER_GVL_THREAD_EXITED);
1027
- collector->threads.set_state(Thread::State::STOPPED);
1320
+ collector->threads.suspended(&collector->markers);
1028
1321
  break;
1029
1322
 
1030
1323
  }
1031
1324
  }
1032
1325
 
1033
1326
  rb_internal_thread_event_hook_t *thread_hook;
1034
- VALUE gc_hook;
1035
1327
 
1036
1328
  bool start() {
1037
1329
  if (!BaseCollector::start()) {
1038
1330
  return false;
1039
1331
  }
1040
1332
 
1041
- started_at = TimeStamp::Now();
1042
-
1043
- struct sigaction sa;
1044
- sa.sa_sigaction = signal_handler;
1045
- sa.sa_flags = SA_RESTART | SA_SIGINFO;
1046
- sigemptyset(&sa.sa_mask);
1047
- sigaction(SIGPROF, &sa, NULL);
1333
+ GlobalSignalHandler::get_instance()->install();
1048
1334
 
1049
1335
  running = true;
1050
1336
 
@@ -1054,15 +1340,16 @@ class TimeCollector : public BaseCollector {
1054
1340
  rb_bug("pthread_create");
1055
1341
  }
1056
1342
 
1057
- // Set the state of the current Ruby thread to RUNNING.
1058
- // We want to have at least one thread in our thread list because it's
1059
- // possible that the profile might be such that we don't get any
1060
- // thread switch events and we need at least one
1061
- this->threads.set_state(Thread::State::RUNNING);
1343
+ // Set the state of the current Ruby thread to RUNNING, which we know it
1344
+ // is as it must have held the GVL to start the collector. We want to
1345
+ // have at least one thread in our thread list because it's possible
1346
+ // that the profile might be such that we don't get any thread switch
1347
+ // events and we need at least one
1348
+ this->threads.resumed(&this->markers);
1062
1349
 
1063
1350
  thread_hook = rb_internal_thread_add_event_hook(internal_thread_event_cb, RUBY_INTERNAL_THREAD_EVENT_MASK, this);
1064
- gc_hook = rb_tracepoint_new(0, RUBY_GC_PHASE_EVENTS, internal_gc_event_cb, (void *)this);
1065
- rb_tracepoint_enable(gc_hook);
1351
+ rb_add_event_hook(internal_gc_event_cb, RUBY_INTERNAL_EVENTS, PTR2NUM((void *)this));
1352
+ rb_add_event_hook(internal_thread_event_cb, RUBY_NORMAL_EVENTS, PTR2NUM((void *)this));
1066
1353
 
1067
1354
  return true;
1068
1355
  }
@@ -1073,14 +1360,11 @@ class TimeCollector : public BaseCollector {
1073
1360
  running = false;
1074
1361
  thread_stopped.wait();
1075
1362
 
1076
- struct sigaction sa;
1077
- sa.sa_handler = SIG_IGN;
1078
- sa.sa_flags = SA_RESTART;
1079
- sigemptyset(&sa.sa_mask);
1080
- sigaction(SIGPROF, &sa, NULL);
1363
+ GlobalSignalHandler::get_instance()->uninstall();
1081
1364
 
1082
1365
  rb_internal_thread_remove_event_hook(thread_hook);
1083
- rb_tracepoint_disable(gc_hook);
1366
+ rb_remove_event_hook(internal_gc_event_cb);
1367
+ rb_remove_event_hook(internal_thread_event_cb);
1084
1368
 
1085
1369
  // capture thread names
1086
1370
  for (auto& thread: this->threads.list) {
@@ -1099,45 +1383,15 @@ class TimeCollector : public BaseCollector {
1099
1383
  }
1100
1384
 
1101
1385
  VALUE build_collector_result() {
1102
- VALUE result = rb_obj_alloc(rb_cVernierResult);
1103
-
1104
- VALUE meta = rb_hash_new();
1105
- rb_ivar_set(result, rb_intern("@meta"), meta);
1106
- rb_hash_aset(meta, sym("started_at"), ULL2NUM(started_at.nanoseconds()));
1107
-
1108
- VALUE samples = rb_ary_new();
1109
- rb_ivar_set(result, rb_intern("@samples"), samples);
1110
- VALUE weights = rb_ary_new();
1111
- rb_ivar_set(result, rb_intern("@weights"), weights);
1112
- for (auto& stack_index: this->samples) {
1113
- rb_ary_push(samples, INT2NUM(stack_index));
1114
- rb_ary_push(weights, INT2NUM(1));
1115
- }
1116
-
1117
- VALUE timestamps = rb_ary_new();
1118
- rb_ivar_set(result, rb_intern("@timestamps"), timestamps);
1119
-
1120
- for (auto& timestamp: this->timestamps) {
1121
- rb_ary_push(timestamps, ULL2NUM(timestamp.nanoseconds()));
1122
- }
1123
-
1124
- VALUE sample_threads = rb_ary_new();
1125
- rb_ivar_set(result, rb_intern("@sample_threads"), sample_threads);
1126
- for (auto& thread: this->sample_threads) {
1127
- rb_ary_push(sample_threads, ULL2NUM(thread));
1128
- }
1129
-
1130
- VALUE sample_categories = rb_ary_new();
1131
- rb_ivar_set(result, rb_intern("@sample_categories"), sample_categories);
1132
- for (auto& cat: this->sample_categories) {
1133
- rb_ary_push(sample_categories, INT2NUM(cat));
1134
- }
1386
+ VALUE result = BaseCollector::build_collector_result();
1135
1387
 
1136
1388
  VALUE threads = rb_hash_new();
1137
1389
  rb_ivar_set(result, rb_intern("@threads"), threads);
1138
1390
 
1139
1391
  for (const auto& thread: this->threads.list) {
1140
1392
  VALUE hash = rb_hash_new();
1393
+ thread.samples.write_result(hash);
1394
+
1141
1395
  rb_hash_aset(threads, ULL2NUM(thread.native_tid), hash);
1142
1396
  rb_hash_aset(hash, sym("tid"), ULL2NUM(thread.native_tid));
1143
1397
  rb_hash_aset(hash, sym("started_at"), ULL2NUM(thread.started_at.nanoseconds()));
@@ -1155,7 +1409,6 @@ class TimeCollector : public BaseCollector {
1155
1409
 
1156
1410
  void mark() {
1157
1411
  frame_list.mark_frames();
1158
- rb_gc_mark(gc_hook);
1159
1412
 
1160
1413
  //for (int i = 0; i < queued_length; i++) {
1161
1414
  // rb_gc_mark(queued_frames[i]);
@@ -1165,8 +1418,6 @@ class TimeCollector : public BaseCollector {
1165
1418
  }
1166
1419
  };
1167
1420
 
1168
- LiveSample *TimeCollector::live_sample;
1169
-
1170
1421
  static void
1171
1422
  collector_mark(void *data) {
1172
1423
  BaseCollector *collector = static_cast<BaseCollector *>(data);
@@ -1252,14 +1503,11 @@ static VALUE collector_new(VALUE self, VALUE mode, VALUE options) {
1252
1503
  }
1253
1504
 
1254
1505
  static void
1255
- Init_consts() {
1506
+ Init_consts(VALUE rb_mVernierMarkerPhase) {
1256
1507
  #define MARKER_CONST(name) \
1257
1508
  rb_define_const(rb_mVernierMarkerType, #name, INT2NUM(Marker::Type::MARKER_##name))
1258
1509
 
1259
1510
  MARKER_CONST(GVL_THREAD_STARTED);
1260
- MARKER_CONST(GVL_THREAD_READY);
1261
- MARKER_CONST(GVL_THREAD_RESUMED);
1262
- MARKER_CONST(GVL_THREAD_SUSPENDED);
1263
1511
  MARKER_CONST(GVL_THREAD_EXITED);
1264
1512
 
1265
1513
  MARKER_CONST(GC_START);
@@ -1267,8 +1515,22 @@ Init_consts() {
1267
1515
  MARKER_CONST(GC_END_SWEEP);
1268
1516
  MARKER_CONST(GC_ENTER);
1269
1517
  MARKER_CONST(GC_EXIT);
1518
+ MARKER_CONST(GC_PAUSE);
1519
+
1520
+ MARKER_CONST(THREAD_RUNNING);
1521
+ MARKER_CONST(THREAD_STALLED);
1522
+ MARKER_CONST(THREAD_SUSPENDED);
1270
1523
 
1271
1524
  #undef MARKER_CONST
1525
+
1526
+ #define PHASE_CONST(name) \
1527
+ rb_define_const(rb_mVernierMarkerPhase, #name, INT2NUM(Marker::Phase::name))
1528
+
1529
+ PHASE_CONST(INSTANT);
1530
+ PHASE_CONST(INTERVAL);
1531
+ PHASE_CONST(INTERVAL_START);
1532
+ PHASE_CONST(INTERVAL_END);
1533
+ #undef PHASE_CONST
1272
1534
  }
1273
1535
 
1274
1536
  extern "C" void
@@ -1277,6 +1539,7 @@ Init_vernier(void)
1277
1539
  rb_mVernier = rb_define_module("Vernier");
1278
1540
  rb_cVernierResult = rb_define_class_under(rb_mVernier, "Result", rb_cObject);
1279
1541
  VALUE rb_mVernierMarker = rb_define_module_under(rb_mVernier, "Marker");
1542
+ VALUE rb_mVernierMarkerPhase = rb_define_module_under(rb_mVernierMarker, "Phase");
1280
1543
  rb_mVernierMarkerType = rb_define_module_under(rb_mVernierMarker, "Type");
1281
1544
 
1282
1545
  rb_cVernierCollector = rb_define_class_under(rb_mVernier, "Collector", rb_cObject);
@@ -1287,7 +1550,7 @@ Init_vernier(void)
1287
1550
  rb_define_private_method(rb_cVernierCollector, "finish", collector_stop, 0);
1288
1551
  rb_define_private_method(rb_cVernierCollector, "markers", markers, 0);
1289
1552
 
1290
- Init_consts();
1553
+ Init_consts(rb_mVernierMarkerPhase);
1291
1554
 
1292
1555
  //static VALUE gc_hook = Data_Wrap_Struct(rb_cObject, collector_mark, NULL, &_collector);
1293
1556
  //rb_global_variable(&gc_hook);