vernier 0.3.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +7 -1
- data/ext/vernier/extconf.rb +7 -0
- data/ext/vernier/vernier.cc +194 -78
- data/lib/vernier/collector.rb +2 -2
- data/lib/vernier/output/firefox.rb +26 -5
- data/lib/vernier/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: f552d6fe2d529de743412cbc9975bfbd4ac87b894777a44ce26ef07dddc8e032
|
|
4
|
+
data.tar.gz: b9ddcd3e4ce0acb5ac53363e041d1bd9e3a2d4c5bf21b2bbe56c48ed5fef4cdd
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 1b0808ee6ae8e64866e81e7ba8ed4847788421a00517c00e7aacb54c2fdff16287f92eb10132b5802fcb93955c3e4cf1a8fe4cfc97f4a9742a8130341bea75f7
|
|
7
|
+
data.tar.gz: 686a7397043be44451cccf9380473cda1e350ee342878ca2428d8bcd6c69aeea36244d15c92e3f998d4b6245c46c37d74ac3aac18ed58c783c288501d0cf7243
|
data/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Vernier
|
|
2
2
|
|
|
3
|
-
Next-generation Ruby sampling profiler. Tracks multiple threads, GVL activity, GC pauses, idle time, and more.
|
|
3
|
+
Next-generation Ruby 3.2.1+ sampling profiler. Tracks multiple threads, GVL activity, GC pauses, idle time, and more.
|
|
4
4
|
|
|
5
5
|
## Examples
|
|
6
6
|
|
|
@@ -20,6 +20,8 @@ Rails benchmark - lobste.rs (time)
|
|
|
20
20
|
|
|
21
21
|
## Installation
|
|
22
22
|
|
|
23
|
+
Vernier requires Ruby version 3.2.1 or greater
|
|
24
|
+
|
|
23
25
|
```ruby
|
|
24
26
|
gem 'vernier'
|
|
25
27
|
```
|
|
@@ -35,6 +37,9 @@ Vernier.trace(out: "time_profile.json") { some_slow_method }
|
|
|
35
37
|
|
|
36
38
|
The output can then be viewed in the Firefox Profiler (demo) or the [`profile-viewer` gem](https://github.com/tenderlove/profiler/tree/ruby) (a Ruby-customized version of the firefox profiler.
|
|
37
39
|
|
|
40
|
+
- **Flame Graph**: Shows proportionally how much time is spent within particular stack frames. Frames are grouped together, which means that x-axis / left-to-right order is not meaningful.
|
|
41
|
+
- **Stack Chart**: Shows the stack at each sample with the x-axis representing time and can be read left-to-right.
|
|
42
|
+
|
|
38
43
|
### Retained memory
|
|
39
44
|
|
|
40
45
|
Record a flamegraph of all **retained** allocations from loading `irb`.
|
|
@@ -43,6 +48,7 @@ Record a flamegraph of all **retained** allocations from loading `irb`.
|
|
|
43
48
|
ruby -r vernier -e 'Vernier.trace_retained(out: "irb_profile.json") { require "irb" }'
|
|
44
49
|
```
|
|
45
50
|
|
|
51
|
+
Retained-memory flamegraphs must be interpreted a little differently than a typical profiling flamegraph. In a retained-memory flamegraph, the x-axis represents a proportion of memory in bytes, _not time or samples_ The topmost boxes on the y-axis represent the retained objects, with their stacktrace below; their width represents the percentage of overall retained memory each object occupies.
|
|
46
52
|
|
|
47
53
|
## Development
|
|
48
54
|
|
data/ext/vernier/extconf.rb
CHANGED
|
@@ -5,4 +5,11 @@ require "mkmf"
|
|
|
5
5
|
$CXXFLAGS += " -std=c++14 "
|
|
6
6
|
$CXXFLAGS += " -ggdb3 -Og "
|
|
7
7
|
|
|
8
|
+
have_header("ruby/thread.h")
|
|
9
|
+
have_struct_member("rb_internal_thread_event_data_t", "thread", ["ruby/thread.h"])
|
|
10
|
+
|
|
11
|
+
have_func("rb_profile_thread_frames", "ruby/debug.h")
|
|
12
|
+
|
|
13
|
+
have_func("pthread_setname_np")
|
|
14
|
+
|
|
8
15
|
create_makefile("vernier/vernier")
|
data/ext/vernier/vernier.cc
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
// vim: expandtab:ts=4:sw=4
|
|
2
|
+
|
|
1
3
|
#include <iostream>
|
|
2
4
|
#include <iomanip>
|
|
3
5
|
#include <vector>
|
|
@@ -27,6 +29,9 @@
|
|
|
27
29
|
#include "ruby/debug.h"
|
|
28
30
|
#include "ruby/thread.h"
|
|
29
31
|
|
|
32
|
+
#undef assert
|
|
33
|
+
#define assert RUBY_ASSERT_ALWAYS
|
|
34
|
+
|
|
30
35
|
# define PTR2NUM(x) (rb_int2inum((intptr_t)(void *)(x)))
|
|
31
36
|
|
|
32
37
|
// Internal TracePoint events we'll monitor during profiling
|
|
@@ -53,6 +58,22 @@ static VALUE rb_cVernierResult;
|
|
|
53
58
|
static VALUE rb_mVernierMarkerType;
|
|
54
59
|
static VALUE rb_cVernierCollector;
|
|
55
60
|
|
|
61
|
+
static const char *gvl_event_name(rb_event_flag_t event) {
|
|
62
|
+
switch (event) {
|
|
63
|
+
case RUBY_INTERNAL_THREAD_EVENT_STARTED:
|
|
64
|
+
return "started";
|
|
65
|
+
case RUBY_INTERNAL_THREAD_EVENT_READY:
|
|
66
|
+
return "ready";
|
|
67
|
+
case RUBY_INTERNAL_THREAD_EVENT_RESUMED:
|
|
68
|
+
return "resumed";
|
|
69
|
+
case RUBY_INTERNAL_THREAD_EVENT_SUSPENDED:
|
|
70
|
+
return "suspended";
|
|
71
|
+
case RUBY_INTERNAL_THREAD_EVENT_EXITED:
|
|
72
|
+
return "exited";
|
|
73
|
+
}
|
|
74
|
+
return "no-event";
|
|
75
|
+
}
|
|
76
|
+
|
|
56
77
|
class TimeStamp {
|
|
57
78
|
static const uint64_t nanoseconds_per_second = 1000000000;
|
|
58
79
|
uint64_t value_ns;
|
|
@@ -85,8 +106,16 @@ class TimeStamp {
|
|
|
85
106
|
} while (target_time > TimeStamp::Now());
|
|
86
107
|
}
|
|
87
108
|
|
|
109
|
+
static TimeStamp from_seconds(uint64_t s) {
|
|
110
|
+
return TimeStamp::from_milliseconds(s * 1000);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
static TimeStamp from_milliseconds(uint64_t ms) {
|
|
114
|
+
return TimeStamp::from_microseconds(ms * 1000);
|
|
115
|
+
}
|
|
116
|
+
|
|
88
117
|
static TimeStamp from_microseconds(uint64_t us) {
|
|
89
|
-
return TimeStamp(us * 1000);
|
|
118
|
+
return TimeStamp::from_nanoseconds(us * 1000);
|
|
90
119
|
}
|
|
91
120
|
|
|
92
121
|
static TimeStamp from_nanoseconds(uint64_t ns) {
|
|
@@ -266,6 +295,10 @@ class SamplerSemaphore {
|
|
|
266
295
|
#ifdef __APPLE__
|
|
267
296
|
dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER);
|
|
268
297
|
#else
|
|
298
|
+
// Use sem_timedwait so that we get a crash instead of a deadlock for
|
|
299
|
+
// easier debugging
|
|
300
|
+
auto ts = (TimeStamp::Now() + TimeStamp::from_seconds(5)).timespec();
|
|
301
|
+
|
|
269
302
|
int ret;
|
|
270
303
|
do {
|
|
271
304
|
ret = sem_wait(&sem);
|
|
@@ -304,16 +337,15 @@ struct RawSample {
|
|
|
304
337
|
}
|
|
305
338
|
|
|
306
339
|
void sample() {
|
|
340
|
+
clear();
|
|
341
|
+
|
|
307
342
|
if (!ruby_native_thread_p()) {
|
|
308
|
-
clear();
|
|
309
343
|
return;
|
|
310
344
|
}
|
|
311
345
|
|
|
312
346
|
if (rb_during_gc()) {
|
|
313
347
|
gc = true;
|
|
314
|
-
len = 0;
|
|
315
348
|
} else {
|
|
316
|
-
gc = false;
|
|
317
349
|
len = rb_profile_frames(0, MAX_LEN, frames, lines);
|
|
318
350
|
}
|
|
319
351
|
}
|
|
@@ -602,12 +634,13 @@ class Marker {
|
|
|
602
634
|
Phase phase;
|
|
603
635
|
TimeStamp timestamp;
|
|
604
636
|
TimeStamp finish;
|
|
605
|
-
|
|
637
|
+
// VALUE ruby_thread_id;
|
|
638
|
+
//native_thread_id_t thread_id;
|
|
606
639
|
int stack_index = -1;
|
|
607
640
|
|
|
608
641
|
VALUE to_array() {
|
|
609
642
|
VALUE record[6] = {0};
|
|
610
|
-
record[0] =
|
|
643
|
+
record[0] = Qnil; // FIXME
|
|
611
644
|
record[1] = INT2NUM(type);
|
|
612
645
|
record[2] = INT2NUM(phase);
|
|
613
646
|
record[3] = ULL2NUM(timestamp.nanoseconds());
|
|
@@ -625,30 +658,33 @@ class Marker {
|
|
|
625
658
|
};
|
|
626
659
|
|
|
627
660
|
class MarkerTable {
|
|
628
|
-
TimeStamp last_gc_entry;
|
|
629
|
-
|
|
630
661
|
public:
|
|
631
662
|
std::vector<Marker> list;
|
|
632
663
|
std::mutex mutex;
|
|
633
664
|
|
|
634
|
-
void record_gc_entered() {
|
|
635
|
-
last_gc_entry = TimeStamp::Now();
|
|
636
|
-
}
|
|
637
|
-
|
|
638
|
-
void record_gc_leave() {
|
|
639
|
-
list.push_back({ Marker::MARKER_GC_PAUSE, Marker::INTERVAL, last_gc_entry, TimeStamp::Now(), get_native_thread_id(), -1 });
|
|
640
|
-
}
|
|
641
|
-
|
|
642
665
|
void record_interval(Marker::Type type, TimeStamp from, TimeStamp to, int stack_index = -1) {
|
|
643
666
|
const std::lock_guard<std::mutex> lock(mutex);
|
|
644
667
|
|
|
645
|
-
list.push_back({ type, Marker::INTERVAL, from, to,
|
|
668
|
+
list.push_back({ type, Marker::INTERVAL, from, to, stack_index });
|
|
646
669
|
}
|
|
647
670
|
|
|
648
671
|
void record(Marker::Type type, int stack_index = -1) {
|
|
649
672
|
const std::lock_guard<std::mutex> lock(mutex);
|
|
650
673
|
|
|
651
|
-
list.push_back({ type, Marker::INSTANT, TimeStamp::Now(), TimeStamp(),
|
|
674
|
+
list.push_back({ type, Marker::INSTANT, TimeStamp::Now(), TimeStamp(), stack_index });
|
|
675
|
+
}
|
|
676
|
+
};
|
|
677
|
+
|
|
678
|
+
class GCMarkerTable: public MarkerTable {
|
|
679
|
+
TimeStamp last_gc_entry;
|
|
680
|
+
|
|
681
|
+
public:
|
|
682
|
+
void record_gc_entered() {
|
|
683
|
+
last_gc_entry = TimeStamp::Now();
|
|
684
|
+
}
|
|
685
|
+
|
|
686
|
+
void record_gc_leave() {
|
|
687
|
+
list.push_back({ Marker::MARKER_GC_PAUSE, Marker::INTERVAL, last_gc_entry, TimeStamp::Now(), -1 });
|
|
652
688
|
}
|
|
653
689
|
};
|
|
654
690
|
|
|
@@ -731,6 +767,8 @@ class Thread {
|
|
|
731
767
|
STOPPED
|
|
732
768
|
};
|
|
733
769
|
|
|
770
|
+
VALUE ruby_thread;
|
|
771
|
+
VALUE ruby_thread_id;
|
|
734
772
|
pthread_t pthread_id;
|
|
735
773
|
native_thread_id_t native_tid;
|
|
736
774
|
State state;
|
|
@@ -742,18 +780,33 @@ class Thread {
|
|
|
742
780
|
int stack_on_suspend_idx;
|
|
743
781
|
SampleTranslator translator;
|
|
744
782
|
|
|
745
|
-
|
|
783
|
+
MarkerTable *markers;
|
|
784
|
+
|
|
785
|
+
std::string name;
|
|
746
786
|
|
|
747
|
-
|
|
748
|
-
|
|
787
|
+
// FIXME: don't use pthread at start
|
|
788
|
+
Thread(State state, pthread_t pthread_id, VALUE ruby_thread) : pthread_id(pthread_id), ruby_thread(ruby_thread), state(state), stack_on_suspend_idx(-1) {
|
|
789
|
+
name = Qnil;
|
|
790
|
+
ruby_thread_id = rb_obj_id(ruby_thread);
|
|
791
|
+
//ruby_thread_id = ULL2NUM(ruby_thread);
|
|
749
792
|
native_tid = get_native_thread_id();
|
|
750
793
|
started_at = state_changed_at = TimeStamp::Now();
|
|
794
|
+
name = "";
|
|
795
|
+
markers = new MarkerTable();
|
|
796
|
+
|
|
797
|
+
if (state == State::STARTED) {
|
|
798
|
+
markers->record(Marker::Type::MARKER_GVL_THREAD_STARTED);
|
|
799
|
+
}
|
|
751
800
|
}
|
|
752
801
|
|
|
753
|
-
void set_state(State new_state
|
|
802
|
+
void set_state(State new_state) {
|
|
754
803
|
if (state == Thread::State::STOPPED) {
|
|
755
804
|
return;
|
|
756
805
|
}
|
|
806
|
+
if (new_state == Thread::State::SUSPENDED && state == new_state) {
|
|
807
|
+
// on Ruby 3.2 (only?) we may see duplicate suspended states
|
|
808
|
+
return;
|
|
809
|
+
}
|
|
757
810
|
|
|
758
811
|
TimeStamp from = state_changed_at;
|
|
759
812
|
auto now = TimeStamp::Now();
|
|
@@ -764,10 +817,13 @@ class Thread {
|
|
|
764
817
|
|
|
765
818
|
switch (new_state) {
|
|
766
819
|
case State::STARTED:
|
|
767
|
-
|
|
820
|
+
markers->record(Marker::Type::MARKER_GVL_THREAD_STARTED);
|
|
821
|
+
return; // no mutation of current state
|
|
768
822
|
break;
|
|
769
823
|
case State::RUNNING:
|
|
770
|
-
assert(state == State::READY);
|
|
824
|
+
assert(state == State::READY || state == State::RUNNING);
|
|
825
|
+
pthread_id = pthread_self();
|
|
826
|
+
native_tid = get_native_thread_id();
|
|
771
827
|
|
|
772
828
|
// If the GVL is immediately ready, and we measure no times
|
|
773
829
|
// stalled, skip emitting the interval.
|
|
@@ -783,25 +839,26 @@ class Thread {
|
|
|
783
839
|
// Threads can be preempted, which means they will have been in "Running"
|
|
784
840
|
// state, and then the VM was like "no I need to stop you from working,
|
|
785
841
|
// so I'll put you in the 'ready' (or stalled) state"
|
|
786
|
-
assert(state == State::SUSPENDED || state == State::RUNNING);
|
|
842
|
+
assert(state == State::STARTED || state == State::SUSPENDED || state == State::RUNNING);
|
|
787
843
|
if (state == State::SUSPENDED) {
|
|
788
844
|
markers->record_interval(Marker::Type::MARKER_THREAD_SUSPENDED, from, now, stack_on_suspend_idx);
|
|
789
845
|
}
|
|
790
|
-
else {
|
|
846
|
+
else if (state == State::RUNNING) {
|
|
791
847
|
markers->record_interval(Marker::Type::MARKER_THREAD_RUNNING, from, now);
|
|
792
848
|
}
|
|
793
849
|
break;
|
|
794
850
|
case State::SUSPENDED:
|
|
795
851
|
// We can go from RUNNING or STARTED to SUSPENDED
|
|
796
|
-
assert(state == State::RUNNING || state == State::STARTED);
|
|
852
|
+
assert(state == State::RUNNING || state == State::STARTED || state == State::SUSPENDED);
|
|
797
853
|
markers->record_interval(Marker::Type::MARKER_THREAD_RUNNING, from, now);
|
|
798
854
|
break;
|
|
799
855
|
case State::STOPPED:
|
|
800
|
-
// We can go from RUNNING or STARTED to STOPPED
|
|
801
|
-
assert(state == State::RUNNING || state == State::STARTED);
|
|
856
|
+
// We can go from RUNNING or STARTED or SUSPENDED to STOPPED
|
|
857
|
+
assert(state == State::RUNNING || state == State::STARTED || state == State::SUSPENDED);
|
|
802
858
|
markers->record_interval(Marker::Type::MARKER_THREAD_RUNNING, from, now);
|
|
859
|
+
markers->record(Marker::Type::MARKER_GVL_THREAD_EXITED);
|
|
860
|
+
|
|
803
861
|
stopped_at = now;
|
|
804
|
-
capture_name();
|
|
805
862
|
|
|
806
863
|
break;
|
|
807
864
|
}
|
|
@@ -814,11 +871,7 @@ class Thread {
|
|
|
814
871
|
return state != State::STOPPED;
|
|
815
872
|
}
|
|
816
873
|
|
|
817
|
-
void
|
|
818
|
-
char buf[128];
|
|
819
|
-
int rc = pthread_getname_np(pthread_id, buf, sizeof(buf));
|
|
820
|
-
if (rc == 0)
|
|
821
|
-
name = std::string(buf);
|
|
874
|
+
void mark() {
|
|
822
875
|
}
|
|
823
876
|
};
|
|
824
877
|
|
|
@@ -832,40 +885,46 @@ class ThreadTable {
|
|
|
832
885
|
ThreadTable(FrameList &frame_list) : frame_list(frame_list) {
|
|
833
886
|
}
|
|
834
887
|
|
|
835
|
-
void
|
|
836
|
-
|
|
888
|
+
void mark() {
|
|
889
|
+
for (auto &thread : list) {
|
|
890
|
+
thread.mark();
|
|
891
|
+
}
|
|
892
|
+
}
|
|
837
893
|
|
|
894
|
+
void started(VALUE th) {
|
|
838
895
|
//list.push_back(Thread{pthread_self(), Thread::State::SUSPENDED});
|
|
839
|
-
|
|
840
|
-
set_state(Thread::State::STARTED, markers);
|
|
896
|
+
set_state(Thread::State::STARTED, th);
|
|
841
897
|
}
|
|
842
898
|
|
|
843
|
-
void ready(
|
|
844
|
-
set_state(Thread::State::READY,
|
|
899
|
+
void ready(VALUE th) {
|
|
900
|
+
set_state(Thread::State::READY, th);
|
|
845
901
|
}
|
|
846
902
|
|
|
847
|
-
void resumed(
|
|
848
|
-
set_state(Thread::State::RUNNING,
|
|
903
|
+
void resumed(VALUE th) {
|
|
904
|
+
set_state(Thread::State::RUNNING, th);
|
|
849
905
|
}
|
|
850
906
|
|
|
851
|
-
void suspended(
|
|
852
|
-
set_state(Thread::State::SUSPENDED,
|
|
907
|
+
void suspended(VALUE th) {
|
|
908
|
+
set_state(Thread::State::SUSPENDED, th);
|
|
853
909
|
}
|
|
854
910
|
|
|
855
|
-
void stopped(
|
|
856
|
-
|
|
857
|
-
set_state(Thread::State::STOPPED, markers);
|
|
911
|
+
void stopped(VALUE th) {
|
|
912
|
+
set_state(Thread::State::STOPPED, th);
|
|
858
913
|
}
|
|
859
914
|
|
|
860
915
|
private:
|
|
861
|
-
void set_state(Thread::State new_state,
|
|
916
|
+
void set_state(Thread::State new_state, VALUE th) {
|
|
862
917
|
const std::lock_guard<std::mutex> lock(mutex);
|
|
863
918
|
|
|
864
|
-
pthread_t current_thread = pthread_self();
|
|
865
919
|
//cerr << "set state=" << new_state << " thread=" << gettid() << endl;
|
|
866
920
|
|
|
921
|
+
pid_t native_tid = get_native_thread_id();
|
|
922
|
+
pthread_t pthread_id = pthread_self();
|
|
923
|
+
|
|
924
|
+
//fprintf(stderr, "th %p (tid: %i) from %s to %s\n", (void *)th, native_tid, gvl_event_name(state), gvl_event_name(new_state));
|
|
925
|
+
|
|
867
926
|
for (auto &thread : list) {
|
|
868
|
-
if (
|
|
927
|
+
if (thread_equal(th, thread.ruby_thread)) {
|
|
869
928
|
if (new_state == Thread::State::SUSPENDED) {
|
|
870
929
|
|
|
871
930
|
RawSample sample;
|
|
@@ -875,14 +934,27 @@ class ThreadTable {
|
|
|
875
934
|
//cerr << gettid() << " suspended! Stack size:" << thread.stack_on_suspend.size() << endl;
|
|
876
935
|
}
|
|
877
936
|
|
|
878
|
-
thread.set_state(new_state
|
|
937
|
+
thread.set_state(new_state);
|
|
938
|
+
|
|
939
|
+
if (thread.state == Thread::State::RUNNING) {
|
|
940
|
+
thread.pthread_id = pthread_self();
|
|
941
|
+
thread.native_tid = get_native_thread_id();
|
|
942
|
+
} else {
|
|
943
|
+
thread.pthread_id = 0;
|
|
944
|
+
thread.native_tid = 0;
|
|
945
|
+
}
|
|
946
|
+
|
|
879
947
|
|
|
880
948
|
return;
|
|
881
949
|
}
|
|
882
950
|
}
|
|
883
951
|
|
|
884
|
-
|
|
885
|
-
list.emplace_back(new_state);
|
|
952
|
+
//fprintf(stderr, "NEW THREAD: th: %p, state: %i\n", th, new_state);
|
|
953
|
+
list.emplace_back(new_state, pthread_self(), th);
|
|
954
|
+
}
|
|
955
|
+
|
|
956
|
+
bool thread_equal(VALUE a, VALUE b) {
|
|
957
|
+
return a == b;
|
|
886
958
|
}
|
|
887
959
|
};
|
|
888
960
|
|
|
@@ -1002,6 +1074,12 @@ class RetainedCollector : public BaseCollector {
|
|
|
1002
1074
|
void record(VALUE obj) {
|
|
1003
1075
|
RawSample sample;
|
|
1004
1076
|
sample.sample();
|
|
1077
|
+
if (sample.empty()) {
|
|
1078
|
+
// During thread allocation we allocate one object without a frame
|
|
1079
|
+
// (as of Ruby 3.3)
|
|
1080
|
+
// Ideally we'd allow empty samples to be represented
|
|
1081
|
+
return;
|
|
1082
|
+
}
|
|
1005
1083
|
int stack_index = frame_list.stack_index(sample);
|
|
1006
1084
|
|
|
1007
1085
|
object_list.push_back(obj);
|
|
@@ -1151,6 +1229,8 @@ class GlobalSignalHandler {
|
|
|
1151
1229
|
void record_sample(LiveSample &sample, pthread_t pthread_id) {
|
|
1152
1230
|
const std::lock_guard<std::mutex> lock(mutex);
|
|
1153
1231
|
|
|
1232
|
+
assert(pthread_id);
|
|
1233
|
+
|
|
1154
1234
|
live_sample = &sample;
|
|
1155
1235
|
if (pthread_kill(pthread_id, SIGPROF)) {
|
|
1156
1236
|
rb_bug("pthread_kill failed");
|
|
@@ -1187,7 +1267,7 @@ class GlobalSignalHandler {
|
|
|
1187
1267
|
LiveSample *GlobalSignalHandler::live_sample;
|
|
1188
1268
|
|
|
1189
1269
|
class TimeCollector : public BaseCollector {
|
|
1190
|
-
|
|
1270
|
+
GCMarkerTable gc_markers;
|
|
1191
1271
|
ThreadTable threads;
|
|
1192
1272
|
|
|
1193
1273
|
pthread_t sample_thread;
|
|
@@ -1216,10 +1296,22 @@ class TimeCollector : public BaseCollector {
|
|
|
1216
1296
|
}
|
|
1217
1297
|
|
|
1218
1298
|
VALUE get_markers() {
|
|
1219
|
-
VALUE list =
|
|
1299
|
+
VALUE list = rb_ary_new();
|
|
1300
|
+
VALUE main_thread = rb_thread_main();
|
|
1301
|
+
VALUE main_thread_id = rb_obj_id(main_thread);
|
|
1302
|
+
|
|
1303
|
+
for (auto& marker: this->gc_markers.list) {
|
|
1304
|
+
VALUE ary = marker.to_array();
|
|
1220
1305
|
|
|
1221
|
-
|
|
1222
|
-
rb_ary_push(list,
|
|
1306
|
+
RARRAY_ASET(ary, 0, main_thread_id);
|
|
1307
|
+
rb_ary_push(list, ary);
|
|
1308
|
+
}
|
|
1309
|
+
for (auto &thread : threads.list) {
|
|
1310
|
+
for (auto& marker: thread.markers->list) {
|
|
1311
|
+
VALUE ary = marker.to_array();
|
|
1312
|
+
RARRAY_ASET(ary, 0, thread.ruby_thread_id);
|
|
1313
|
+
rb_ary_push(list, ary);
|
|
1314
|
+
}
|
|
1223
1315
|
}
|
|
1224
1316
|
|
|
1225
1317
|
return list;
|
|
@@ -1235,7 +1327,9 @@ class TimeCollector : public BaseCollector {
|
|
|
1235
1327
|
threads.mutex.lock();
|
|
1236
1328
|
for (auto &thread : threads.list) {
|
|
1237
1329
|
//if (thread.state == Thread::State::RUNNING) {
|
|
1238
|
-
if (thread.state == Thread::State::RUNNING || (thread.state == Thread::State::SUSPENDED && thread.stack_on_suspend_idx < 0)) {
|
|
1330
|
+
//if (thread.state == Thread::State::RUNNING || (thread.state == Thread::State::SUSPENDED && thread.stack_on_suspend_idx < 0)) {
|
|
1331
|
+
if (thread.state == Thread::State::RUNNING) {
|
|
1332
|
+
//fprintf(stderr, "sampling %p on tid:%i\n", thread.ruby_thread, thread.native_tid);
|
|
1239
1333
|
GlobalSignalHandler::get_instance()->record_sample(sample, thread.pthread_id);
|
|
1240
1334
|
|
|
1241
1335
|
if (sample.sample.gc) {
|
|
@@ -1271,6 +1365,13 @@ class TimeCollector : public BaseCollector {
|
|
|
1271
1365
|
}
|
|
1272
1366
|
|
|
1273
1367
|
static void *sample_thread_entry(void *arg) {
|
|
1368
|
+
#if HAVE_PTHREAD_SETNAME_NP
|
|
1369
|
+
#ifdef __APPLE__
|
|
1370
|
+
pthread_setname_np("Vernier profiler");
|
|
1371
|
+
#else
|
|
1372
|
+
pthread_setname_np(pthread_self(), "Vernier profiler");
|
|
1373
|
+
#endif
|
|
1374
|
+
#endif
|
|
1274
1375
|
TimeCollector *collector = static_cast<TimeCollector *>(arg);
|
|
1275
1376
|
collector->sample_thread_run();
|
|
1276
1377
|
return NULL;
|
|
@@ -1281,10 +1382,10 @@ class TimeCollector : public BaseCollector {
|
|
|
1281
1382
|
|
|
1282
1383
|
switch (event) {
|
|
1283
1384
|
case RUBY_EVENT_THREAD_BEGIN:
|
|
1284
|
-
collector->threads.started(
|
|
1385
|
+
collector->threads.started(self);
|
|
1285
1386
|
break;
|
|
1286
1387
|
case RUBY_EVENT_THREAD_END:
|
|
1287
|
-
collector->threads.stopped(
|
|
1388
|
+
collector->threads.stopped(self);
|
|
1288
1389
|
break;
|
|
1289
1390
|
}
|
|
1290
1391
|
}
|
|
@@ -1294,36 +1395,57 @@ class TimeCollector : public BaseCollector {
|
|
|
1294
1395
|
|
|
1295
1396
|
switch (event) {
|
|
1296
1397
|
case RUBY_INTERNAL_EVENT_GC_START:
|
|
1297
|
-
collector->
|
|
1398
|
+
collector->gc_markers.record(Marker::Type::MARKER_GC_START);
|
|
1298
1399
|
break;
|
|
1299
1400
|
case RUBY_INTERNAL_EVENT_GC_END_MARK:
|
|
1300
|
-
collector->
|
|
1401
|
+
collector->gc_markers.record(Marker::Type::MARKER_GC_END_MARK);
|
|
1301
1402
|
break;
|
|
1302
1403
|
case RUBY_INTERNAL_EVENT_GC_END_SWEEP:
|
|
1303
|
-
collector->
|
|
1404
|
+
collector->gc_markers.record(Marker::Type::MARKER_GC_END_SWEEP);
|
|
1304
1405
|
break;
|
|
1305
1406
|
case RUBY_INTERNAL_EVENT_GC_ENTER:
|
|
1306
|
-
collector->
|
|
1407
|
+
collector->gc_markers.record_gc_entered();
|
|
1307
1408
|
break;
|
|
1308
1409
|
case RUBY_INTERNAL_EVENT_GC_EXIT:
|
|
1309
|
-
collector->
|
|
1410
|
+
collector->gc_markers.record_gc_leave();
|
|
1310
1411
|
break;
|
|
1311
1412
|
}
|
|
1312
1413
|
}
|
|
1313
1414
|
|
|
1314
1415
|
static void internal_thread_event_cb(rb_event_flag_t event, const rb_internal_thread_event_data_t *event_data, void *data) {
|
|
1315
1416
|
TimeCollector *collector = static_cast<TimeCollector *>(data);
|
|
1417
|
+
VALUE thread = Qnil;
|
|
1418
|
+
|
|
1419
|
+
#if HAVE_RB_INTERNAL_THREAD_EVENT_DATA_T_THREAD
|
|
1420
|
+
thread = event_data->thread;
|
|
1421
|
+
#else
|
|
1422
|
+
// We may arrive here when starting a thread with
|
|
1423
|
+
// RUBY_INTERNAL_THREAD_EVENT_READY before the thread is actually set up.
|
|
1424
|
+
if (!ruby_native_thread_p()) return;
|
|
1425
|
+
|
|
1426
|
+
thread = rb_thread_current();
|
|
1427
|
+
#endif
|
|
1428
|
+
|
|
1429
|
+
auto native_tid = get_native_thread_id();
|
|
1316
1430
|
//cerr << "internal thread event" << event << " at " << TimeStamp::Now() << endl;
|
|
1431
|
+
//fprintf(stderr, "(%i) th %p to %s\n", native_tid, (void *)thread, gvl_event_name(event));
|
|
1432
|
+
|
|
1317
1433
|
|
|
1318
1434
|
switch (event) {
|
|
1435
|
+
case RUBY_INTERNAL_THREAD_EVENT_STARTED:
|
|
1436
|
+
collector->threads.started(thread);
|
|
1437
|
+
break;
|
|
1438
|
+
case RUBY_INTERNAL_THREAD_EVENT_EXITED:
|
|
1439
|
+
collector->threads.stopped(thread);
|
|
1440
|
+
break;
|
|
1319
1441
|
case RUBY_INTERNAL_THREAD_EVENT_READY:
|
|
1320
|
-
collector->threads.ready(
|
|
1442
|
+
collector->threads.ready(thread);
|
|
1321
1443
|
break;
|
|
1322
1444
|
case RUBY_INTERNAL_THREAD_EVENT_RESUMED:
|
|
1323
|
-
collector->threads.resumed(
|
|
1445
|
+
collector->threads.resumed(thread);
|
|
1324
1446
|
break;
|
|
1325
1447
|
case RUBY_INTERNAL_THREAD_EVENT_SUSPENDED:
|
|
1326
|
-
collector->threads.suspended(
|
|
1448
|
+
collector->threads.suspended(thread);
|
|
1327
1449
|
break;
|
|
1328
1450
|
|
|
1329
1451
|
}
|
|
@@ -1351,7 +1473,7 @@ class TimeCollector : public BaseCollector {
|
|
|
1351
1473
|
// have at least one thread in our thread list because it's possible
|
|
1352
1474
|
// that the profile might be such that we don't get any thread switch
|
|
1353
1475
|
// events and we need at least one
|
|
1354
|
-
this->threads.resumed(
|
|
1476
|
+
this->threads.resumed(rb_thread_current());
|
|
1355
1477
|
|
|
1356
1478
|
thread_hook = rb_internal_thread_add_event_hook(internal_thread_event_cb, RUBY_INTERNAL_THREAD_EVENT_MASK, this);
|
|
1357
1479
|
rb_add_event_hook(internal_gc_event_cb, RUBY_INTERNAL_EVENTS, PTR2NUM((void *)this));
|
|
@@ -1372,13 +1494,6 @@ class TimeCollector : public BaseCollector {
|
|
|
1372
1494
|
rb_remove_event_hook(internal_gc_event_cb);
|
|
1373
1495
|
rb_remove_event_hook(internal_thread_event_cb);
|
|
1374
1496
|
|
|
1375
|
-
// capture thread names
|
|
1376
|
-
for (auto& thread: this->threads.list) {
|
|
1377
|
-
if (thread.running()) {
|
|
1378
|
-
thread.capture_name();
|
|
1379
|
-
}
|
|
1380
|
-
}
|
|
1381
|
-
|
|
1382
1497
|
frame_list.finalize();
|
|
1383
1498
|
|
|
1384
1499
|
VALUE result = build_collector_result();
|
|
@@ -1398,7 +1513,7 @@ class TimeCollector : public BaseCollector {
|
|
|
1398
1513
|
VALUE hash = rb_hash_new();
|
|
1399
1514
|
thread.samples.write_result(hash);
|
|
1400
1515
|
|
|
1401
|
-
rb_hash_aset(threads,
|
|
1516
|
+
rb_hash_aset(threads, thread.ruby_thread_id, hash);
|
|
1402
1517
|
rb_hash_aset(hash, sym("tid"), ULL2NUM(thread.native_tid));
|
|
1403
1518
|
rb_hash_aset(hash, sym("started_at"), ULL2NUM(thread.started_at.nanoseconds()));
|
|
1404
1519
|
if (!thread.stopped_at.zero()) {
|
|
@@ -1415,6 +1530,7 @@ class TimeCollector : public BaseCollector {
|
|
|
1415
1530
|
|
|
1416
1531
|
void mark() {
|
|
1417
1532
|
frame_list.mark_frames();
|
|
1533
|
+
threads.mark();
|
|
1418
1534
|
|
|
1419
1535
|
//for (int i = 0; i < queued_length; i++) {
|
|
1420
1536
|
// rb_gc_mark(queued_frames[i]);
|
data/lib/vernier/collector.rb
CHANGED
|
@@ -19,7 +19,7 @@ module Vernier
|
|
|
19
19
|
Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond)
|
|
20
20
|
end
|
|
21
21
|
|
|
22
|
-
def add_marker(name:, start:, finish:, thread: Thread.current.
|
|
22
|
+
def add_marker(name:, start:, finish:, thread: Thread.current.object_id, phase: Marker::Phase::INTERVAL, data: nil)
|
|
23
23
|
@markers << [thread,
|
|
24
24
|
name,
|
|
25
25
|
start,
|
|
@@ -39,7 +39,7 @@ module Vernier
|
|
|
39
39
|
start:,
|
|
40
40
|
finish: current_time,
|
|
41
41
|
phase: Marker::Phase::INTERVAL,
|
|
42
|
-
thread: Thread.current.
|
|
42
|
+
thread: Thread.current.object_id,
|
|
43
43
|
data: { :type => 'UserTiming', :entryType => 'measure', :name => name }
|
|
44
44
|
)
|
|
45
45
|
end
|
|
@@ -99,9 +99,10 @@ module Vernier
|
|
|
99
99
|
def data
|
|
100
100
|
markers_by_thread = profile.markers.group_by { |marker| marker[0] }
|
|
101
101
|
|
|
102
|
-
thread_data = profile.threads.map do |
|
|
103
|
-
markers = markers_by_thread[
|
|
102
|
+
thread_data = profile.threads.map do |ruby_thread_id, thread_info|
|
|
103
|
+
markers = markers_by_thread[ruby_thread_id] || []
|
|
104
104
|
Thread.new(
|
|
105
|
+
ruby_thread_id,
|
|
105
106
|
profile,
|
|
106
107
|
@categorizer,
|
|
107
108
|
markers: markers,
|
|
@@ -157,11 +158,12 @@ module Vernier
|
|
|
157
158
|
class Thread
|
|
158
159
|
attr_reader :profile
|
|
159
160
|
|
|
160
|
-
def initialize(profile, categorizer, name:, tid:, samples:, weights:, timestamps: nil, sample_categories: nil, markers:, started_at:, stopped_at: nil)
|
|
161
|
+
def initialize(ruby_thread_id, profile, categorizer, name:, tid:, samples:, weights:, timestamps: nil, sample_categories: nil, markers:, started_at:, stopped_at: nil)
|
|
162
|
+
@ruby_thread_id = ruby_thread_id
|
|
161
163
|
@profile = profile
|
|
162
164
|
@categorizer = categorizer
|
|
163
165
|
@tid = tid
|
|
164
|
-
@name = name
|
|
166
|
+
@name = pretty_name(name)
|
|
165
167
|
|
|
166
168
|
timestamps ||= [0] * samples.size
|
|
167
169
|
@samples, @weights, @timestamps = samples, weights, timestamps
|
|
@@ -212,7 +214,7 @@ module Vernier
|
|
|
212
214
|
def data
|
|
213
215
|
{
|
|
214
216
|
name: @name,
|
|
215
|
-
isMainThread:
|
|
217
|
+
isMainThread: @ruby_thread_id == ::Thread.main.object_id || (profile.threads.size == 1),
|
|
216
218
|
processStartupTime: 0, # FIXME
|
|
217
219
|
processShutdownTime: nil, # FIXME
|
|
218
220
|
registerTime: (@started_at - 0) / 1_000_000.0,
|
|
@@ -383,6 +385,25 @@ module Vernier
|
|
|
383
385
|
|
|
384
386
|
private
|
|
385
387
|
|
|
388
|
+
def pretty_name(name)
|
|
389
|
+
if name.empty?
|
|
390
|
+
begin
|
|
391
|
+
tr = ObjectSpace._id2ref(@ruby_thread_id)
|
|
392
|
+
name = tr.inspect if tr
|
|
393
|
+
rescue RangeError
|
|
394
|
+
# Thread was already GC'd
|
|
395
|
+
end
|
|
396
|
+
end
|
|
397
|
+
return name unless name.start_with?("#<Thread")
|
|
398
|
+
pretty = []
|
|
399
|
+
obj_address = name[/Thread:(0x\w+)/,1]
|
|
400
|
+
best_id = name[/\#<Thread:0x\w+@?\s?(.*)\s+\S+>/,1] || ""
|
|
401
|
+
Gem.path.each { |gem_dir| best_id = best_id.gsub(gem_dir, "...") }
|
|
402
|
+
pretty << best_id unless best_id.empty?
|
|
403
|
+
pretty << "(#{obj_address})"
|
|
404
|
+
pretty.join(' ')
|
|
405
|
+
end
|
|
406
|
+
|
|
386
407
|
def gc_category
|
|
387
408
|
@categorizer.get_category("GC")
|
|
388
409
|
end
|
data/lib/vernier/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: vernier
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.5.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- John Hawthorn
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2024-02-27 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
13
|
description: An experimental profiler
|
|
14
14
|
email:
|