vernier 0.3.1 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +7 -1
- data/ext/vernier/extconf.rb +7 -0
- data/ext/vernier/vernier.cc +194 -78
- data/lib/vernier/collector.rb +2 -2
- data/lib/vernier/output/firefox.rb +26 -5
- data/lib/vernier/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f552d6fe2d529de743412cbc9975bfbd4ac87b894777a44ce26ef07dddc8e032
|
4
|
+
data.tar.gz: b9ddcd3e4ce0acb5ac53363e041d1bd9e3a2d4c5bf21b2bbe56c48ed5fef4cdd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1b0808ee6ae8e64866e81e7ba8ed4847788421a00517c00e7aacb54c2fdff16287f92eb10132b5802fcb93955c3e4cf1a8fe4cfc97f4a9742a8130341bea75f7
|
7
|
+
data.tar.gz: 686a7397043be44451cccf9380473cda1e350ee342878ca2428d8bcd6c69aeea36244d15c92e3f998d4b6245c46c37d74ac3aac18ed58c783c288501d0cf7243
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# Vernier
|
2
2
|
|
3
|
-
Next-generation Ruby sampling profiler. Tracks multiple threads, GVL activity, GC pauses, idle time, and more.
|
3
|
+
Next-generation Ruby 3.2.1+ sampling profiler. Tracks multiple threads, GVL activity, GC pauses, idle time, and more.
|
4
4
|
|
5
5
|
## Examples
|
6
6
|
|
@@ -20,6 +20,8 @@ Rails benchmark - lobste.rs (time)
|
|
20
20
|
|
21
21
|
## Installation
|
22
22
|
|
23
|
+
Vernier requires Ruby version 3.2.1 or greater
|
24
|
+
|
23
25
|
```ruby
|
24
26
|
gem 'vernier'
|
25
27
|
```
|
@@ -35,6 +37,9 @@ Vernier.trace(out: "time_profile.json") { some_slow_method }
|
|
35
37
|
|
36
38
|
The output can then be viewed in the Firefox Profiler (demo) or the [`profile-viewer` gem](https://github.com/tenderlove/profiler/tree/ruby) (a Ruby-customized version of the firefox profiler.
|
37
39
|
|
40
|
+
- **Flame Graph**: Shows proportionally how much time is spent within particular stack frames. Frames are grouped together, which means that x-axis / left-to-right order is not meaningful.
|
41
|
+
- **Stack Chart**: Shows the stack at each sample with the x-axis representing time and can be read left-to-right.
|
42
|
+
|
38
43
|
### Retained memory
|
39
44
|
|
40
45
|
Record a flamegraph of all **retained** allocations from loading `irb`.
|
@@ -43,6 +48,7 @@ Record a flamegraph of all **retained** allocations from loading `irb`.
|
|
43
48
|
ruby -r vernier -e 'Vernier.trace_retained(out: "irb_profile.json") { require "irb" }'
|
44
49
|
```
|
45
50
|
|
51
|
+
Retained-memory flamegraphs must be interpreted a little differently than a typical profiling flamegraph. In a retained-memory flamegraph, the x-axis represents a proportion of memory in bytes, _not time or samples_ The topmost boxes on the y-axis represent the retained objects, with their stacktrace below; their width represents the percentage of overall retained memory each object occupies.
|
46
52
|
|
47
53
|
## Development
|
48
54
|
|
data/ext/vernier/extconf.rb
CHANGED
@@ -5,4 +5,11 @@ require "mkmf"
|
|
5
5
|
$CXXFLAGS += " -std=c++14 "
|
6
6
|
$CXXFLAGS += " -ggdb3 -Og "
|
7
7
|
|
8
|
+
have_header("ruby/thread.h")
|
9
|
+
have_struct_member("rb_internal_thread_event_data_t", "thread", ["ruby/thread.h"])
|
10
|
+
|
11
|
+
have_func("rb_profile_thread_frames", "ruby/debug.h")
|
12
|
+
|
13
|
+
have_func("pthread_setname_np")
|
14
|
+
|
8
15
|
create_makefile("vernier/vernier")
|
data/ext/vernier/vernier.cc
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
// vim: expandtab:ts=4:sw=4
|
2
|
+
|
1
3
|
#include <iostream>
|
2
4
|
#include <iomanip>
|
3
5
|
#include <vector>
|
@@ -27,6 +29,9 @@
|
|
27
29
|
#include "ruby/debug.h"
|
28
30
|
#include "ruby/thread.h"
|
29
31
|
|
32
|
+
#undef assert
|
33
|
+
#define assert RUBY_ASSERT_ALWAYS
|
34
|
+
|
30
35
|
# define PTR2NUM(x) (rb_int2inum((intptr_t)(void *)(x)))
|
31
36
|
|
32
37
|
// Internal TracePoint events we'll monitor during profiling
|
@@ -53,6 +58,22 @@ static VALUE rb_cVernierResult;
|
|
53
58
|
static VALUE rb_mVernierMarkerType;
|
54
59
|
static VALUE rb_cVernierCollector;
|
55
60
|
|
61
|
+
static const char *gvl_event_name(rb_event_flag_t event) {
|
62
|
+
switch (event) {
|
63
|
+
case RUBY_INTERNAL_THREAD_EVENT_STARTED:
|
64
|
+
return "started";
|
65
|
+
case RUBY_INTERNAL_THREAD_EVENT_READY:
|
66
|
+
return "ready";
|
67
|
+
case RUBY_INTERNAL_THREAD_EVENT_RESUMED:
|
68
|
+
return "resumed";
|
69
|
+
case RUBY_INTERNAL_THREAD_EVENT_SUSPENDED:
|
70
|
+
return "suspended";
|
71
|
+
case RUBY_INTERNAL_THREAD_EVENT_EXITED:
|
72
|
+
return "exited";
|
73
|
+
}
|
74
|
+
return "no-event";
|
75
|
+
}
|
76
|
+
|
56
77
|
class TimeStamp {
|
57
78
|
static const uint64_t nanoseconds_per_second = 1000000000;
|
58
79
|
uint64_t value_ns;
|
@@ -85,8 +106,16 @@ class TimeStamp {
|
|
85
106
|
} while (target_time > TimeStamp::Now());
|
86
107
|
}
|
87
108
|
|
109
|
+
static TimeStamp from_seconds(uint64_t s) {
|
110
|
+
return TimeStamp::from_milliseconds(s * 1000);
|
111
|
+
}
|
112
|
+
|
113
|
+
static TimeStamp from_milliseconds(uint64_t ms) {
|
114
|
+
return TimeStamp::from_microseconds(ms * 1000);
|
115
|
+
}
|
116
|
+
|
88
117
|
static TimeStamp from_microseconds(uint64_t us) {
|
89
|
-
return TimeStamp(us * 1000);
|
118
|
+
return TimeStamp::from_nanoseconds(us * 1000);
|
90
119
|
}
|
91
120
|
|
92
121
|
static TimeStamp from_nanoseconds(uint64_t ns) {
|
@@ -266,6 +295,10 @@ class SamplerSemaphore {
|
|
266
295
|
#ifdef __APPLE__
|
267
296
|
dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER);
|
268
297
|
#else
|
298
|
+
// Use sem_timedwait so that we get a crash instead of a deadlock for
|
299
|
+
// easier debugging
|
300
|
+
auto ts = (TimeStamp::Now() + TimeStamp::from_seconds(5)).timespec();
|
301
|
+
|
269
302
|
int ret;
|
270
303
|
do {
|
271
304
|
ret = sem_wait(&sem);
|
@@ -304,16 +337,15 @@ struct RawSample {
|
|
304
337
|
}
|
305
338
|
|
306
339
|
void sample() {
|
340
|
+
clear();
|
341
|
+
|
307
342
|
if (!ruby_native_thread_p()) {
|
308
|
-
clear();
|
309
343
|
return;
|
310
344
|
}
|
311
345
|
|
312
346
|
if (rb_during_gc()) {
|
313
347
|
gc = true;
|
314
|
-
len = 0;
|
315
348
|
} else {
|
316
|
-
gc = false;
|
317
349
|
len = rb_profile_frames(0, MAX_LEN, frames, lines);
|
318
350
|
}
|
319
351
|
}
|
@@ -602,12 +634,13 @@ class Marker {
|
|
602
634
|
Phase phase;
|
603
635
|
TimeStamp timestamp;
|
604
636
|
TimeStamp finish;
|
605
|
-
|
637
|
+
// VALUE ruby_thread_id;
|
638
|
+
//native_thread_id_t thread_id;
|
606
639
|
int stack_index = -1;
|
607
640
|
|
608
641
|
VALUE to_array() {
|
609
642
|
VALUE record[6] = {0};
|
610
|
-
record[0] =
|
643
|
+
record[0] = Qnil; // FIXME
|
611
644
|
record[1] = INT2NUM(type);
|
612
645
|
record[2] = INT2NUM(phase);
|
613
646
|
record[3] = ULL2NUM(timestamp.nanoseconds());
|
@@ -625,30 +658,33 @@ class Marker {
|
|
625
658
|
};
|
626
659
|
|
627
660
|
class MarkerTable {
|
628
|
-
TimeStamp last_gc_entry;
|
629
|
-
|
630
661
|
public:
|
631
662
|
std::vector<Marker> list;
|
632
663
|
std::mutex mutex;
|
633
664
|
|
634
|
-
void record_gc_entered() {
|
635
|
-
last_gc_entry = TimeStamp::Now();
|
636
|
-
}
|
637
|
-
|
638
|
-
void record_gc_leave() {
|
639
|
-
list.push_back({ Marker::MARKER_GC_PAUSE, Marker::INTERVAL, last_gc_entry, TimeStamp::Now(), get_native_thread_id(), -1 });
|
640
|
-
}
|
641
|
-
|
642
665
|
void record_interval(Marker::Type type, TimeStamp from, TimeStamp to, int stack_index = -1) {
|
643
666
|
const std::lock_guard<std::mutex> lock(mutex);
|
644
667
|
|
645
|
-
list.push_back({ type, Marker::INTERVAL, from, to,
|
668
|
+
list.push_back({ type, Marker::INTERVAL, from, to, stack_index });
|
646
669
|
}
|
647
670
|
|
648
671
|
void record(Marker::Type type, int stack_index = -1) {
|
649
672
|
const std::lock_guard<std::mutex> lock(mutex);
|
650
673
|
|
651
|
-
list.push_back({ type, Marker::INSTANT, TimeStamp::Now(), TimeStamp(),
|
674
|
+
list.push_back({ type, Marker::INSTANT, TimeStamp::Now(), TimeStamp(), stack_index });
|
675
|
+
}
|
676
|
+
};
|
677
|
+
|
678
|
+
class GCMarkerTable: public MarkerTable {
|
679
|
+
TimeStamp last_gc_entry;
|
680
|
+
|
681
|
+
public:
|
682
|
+
void record_gc_entered() {
|
683
|
+
last_gc_entry = TimeStamp::Now();
|
684
|
+
}
|
685
|
+
|
686
|
+
void record_gc_leave() {
|
687
|
+
list.push_back({ Marker::MARKER_GC_PAUSE, Marker::INTERVAL, last_gc_entry, TimeStamp::Now(), -1 });
|
652
688
|
}
|
653
689
|
};
|
654
690
|
|
@@ -731,6 +767,8 @@ class Thread {
|
|
731
767
|
STOPPED
|
732
768
|
};
|
733
769
|
|
770
|
+
VALUE ruby_thread;
|
771
|
+
VALUE ruby_thread_id;
|
734
772
|
pthread_t pthread_id;
|
735
773
|
native_thread_id_t native_tid;
|
736
774
|
State state;
|
@@ -742,18 +780,33 @@ class Thread {
|
|
742
780
|
int stack_on_suspend_idx;
|
743
781
|
SampleTranslator translator;
|
744
782
|
|
745
|
-
|
783
|
+
MarkerTable *markers;
|
784
|
+
|
785
|
+
std::string name;
|
746
786
|
|
747
|
-
|
748
|
-
|
787
|
+
// FIXME: don't use pthread at start
|
788
|
+
Thread(State state, pthread_t pthread_id, VALUE ruby_thread) : pthread_id(pthread_id), ruby_thread(ruby_thread), state(state), stack_on_suspend_idx(-1) {
|
789
|
+
name = Qnil;
|
790
|
+
ruby_thread_id = rb_obj_id(ruby_thread);
|
791
|
+
//ruby_thread_id = ULL2NUM(ruby_thread);
|
749
792
|
native_tid = get_native_thread_id();
|
750
793
|
started_at = state_changed_at = TimeStamp::Now();
|
794
|
+
name = "";
|
795
|
+
markers = new MarkerTable();
|
796
|
+
|
797
|
+
if (state == State::STARTED) {
|
798
|
+
markers->record(Marker::Type::MARKER_GVL_THREAD_STARTED);
|
799
|
+
}
|
751
800
|
}
|
752
801
|
|
753
|
-
void set_state(State new_state
|
802
|
+
void set_state(State new_state) {
|
754
803
|
if (state == Thread::State::STOPPED) {
|
755
804
|
return;
|
756
805
|
}
|
806
|
+
if (new_state == Thread::State::SUSPENDED && state == new_state) {
|
807
|
+
// on Ruby 3.2 (only?) we may see duplicate suspended states
|
808
|
+
return;
|
809
|
+
}
|
757
810
|
|
758
811
|
TimeStamp from = state_changed_at;
|
759
812
|
auto now = TimeStamp::Now();
|
@@ -764,10 +817,13 @@ class Thread {
|
|
764
817
|
|
765
818
|
switch (new_state) {
|
766
819
|
case State::STARTED:
|
767
|
-
|
820
|
+
markers->record(Marker::Type::MARKER_GVL_THREAD_STARTED);
|
821
|
+
return; // no mutation of current state
|
768
822
|
break;
|
769
823
|
case State::RUNNING:
|
770
|
-
assert(state == State::READY);
|
824
|
+
assert(state == State::READY || state == State::RUNNING);
|
825
|
+
pthread_id = pthread_self();
|
826
|
+
native_tid = get_native_thread_id();
|
771
827
|
|
772
828
|
// If the GVL is immediately ready, and we measure no times
|
773
829
|
// stalled, skip emitting the interval.
|
@@ -783,25 +839,26 @@ class Thread {
|
|
783
839
|
// Threads can be preempted, which means they will have been in "Running"
|
784
840
|
// state, and then the VM was like "no I need to stop you from working,
|
785
841
|
// so I'll put you in the 'ready' (or stalled) state"
|
786
|
-
assert(state == State::SUSPENDED || state == State::RUNNING);
|
842
|
+
assert(state == State::STARTED || state == State::SUSPENDED || state == State::RUNNING);
|
787
843
|
if (state == State::SUSPENDED) {
|
788
844
|
markers->record_interval(Marker::Type::MARKER_THREAD_SUSPENDED, from, now, stack_on_suspend_idx);
|
789
845
|
}
|
790
|
-
else {
|
846
|
+
else if (state == State::RUNNING) {
|
791
847
|
markers->record_interval(Marker::Type::MARKER_THREAD_RUNNING, from, now);
|
792
848
|
}
|
793
849
|
break;
|
794
850
|
case State::SUSPENDED:
|
795
851
|
// We can go from RUNNING or STARTED to SUSPENDED
|
796
|
-
assert(state == State::RUNNING || state == State::STARTED);
|
852
|
+
assert(state == State::RUNNING || state == State::STARTED || state == State::SUSPENDED);
|
797
853
|
markers->record_interval(Marker::Type::MARKER_THREAD_RUNNING, from, now);
|
798
854
|
break;
|
799
855
|
case State::STOPPED:
|
800
|
-
// We can go from RUNNING or STARTED to STOPPED
|
801
|
-
assert(state == State::RUNNING || state == State::STARTED);
|
856
|
+
// We can go from RUNNING or STARTED or SUSPENDED to STOPPED
|
857
|
+
assert(state == State::RUNNING || state == State::STARTED || state == State::SUSPENDED);
|
802
858
|
markers->record_interval(Marker::Type::MARKER_THREAD_RUNNING, from, now);
|
859
|
+
markers->record(Marker::Type::MARKER_GVL_THREAD_EXITED);
|
860
|
+
|
803
861
|
stopped_at = now;
|
804
|
-
capture_name();
|
805
862
|
|
806
863
|
break;
|
807
864
|
}
|
@@ -814,11 +871,7 @@ class Thread {
|
|
814
871
|
return state != State::STOPPED;
|
815
872
|
}
|
816
873
|
|
817
|
-
void
|
818
|
-
char buf[128];
|
819
|
-
int rc = pthread_getname_np(pthread_id, buf, sizeof(buf));
|
820
|
-
if (rc == 0)
|
821
|
-
name = std::string(buf);
|
874
|
+
void mark() {
|
822
875
|
}
|
823
876
|
};
|
824
877
|
|
@@ -832,40 +885,46 @@ class ThreadTable {
|
|
832
885
|
ThreadTable(FrameList &frame_list) : frame_list(frame_list) {
|
833
886
|
}
|
834
887
|
|
835
|
-
void
|
836
|
-
|
888
|
+
void mark() {
|
889
|
+
for (auto &thread : list) {
|
890
|
+
thread.mark();
|
891
|
+
}
|
892
|
+
}
|
837
893
|
|
894
|
+
void started(VALUE th) {
|
838
895
|
//list.push_back(Thread{pthread_self(), Thread::State::SUSPENDED});
|
839
|
-
|
840
|
-
set_state(Thread::State::STARTED, markers);
|
896
|
+
set_state(Thread::State::STARTED, th);
|
841
897
|
}
|
842
898
|
|
843
|
-
void ready(
|
844
|
-
set_state(Thread::State::READY,
|
899
|
+
void ready(VALUE th) {
|
900
|
+
set_state(Thread::State::READY, th);
|
845
901
|
}
|
846
902
|
|
847
|
-
void resumed(
|
848
|
-
set_state(Thread::State::RUNNING,
|
903
|
+
void resumed(VALUE th) {
|
904
|
+
set_state(Thread::State::RUNNING, th);
|
849
905
|
}
|
850
906
|
|
851
|
-
void suspended(
|
852
|
-
set_state(Thread::State::SUSPENDED,
|
907
|
+
void suspended(VALUE th) {
|
908
|
+
set_state(Thread::State::SUSPENDED, th);
|
853
909
|
}
|
854
910
|
|
855
|
-
void stopped(
|
856
|
-
|
857
|
-
set_state(Thread::State::STOPPED, markers);
|
911
|
+
void stopped(VALUE th) {
|
912
|
+
set_state(Thread::State::STOPPED, th);
|
858
913
|
}
|
859
914
|
|
860
915
|
private:
|
861
|
-
void set_state(Thread::State new_state,
|
916
|
+
void set_state(Thread::State new_state, VALUE th) {
|
862
917
|
const std::lock_guard<std::mutex> lock(mutex);
|
863
918
|
|
864
|
-
pthread_t current_thread = pthread_self();
|
865
919
|
//cerr << "set state=" << new_state << " thread=" << gettid() << endl;
|
866
920
|
|
921
|
+
pid_t native_tid = get_native_thread_id();
|
922
|
+
pthread_t pthread_id = pthread_self();
|
923
|
+
|
924
|
+
//fprintf(stderr, "th %p (tid: %i) from %s to %s\n", (void *)th, native_tid, gvl_event_name(state), gvl_event_name(new_state));
|
925
|
+
|
867
926
|
for (auto &thread : list) {
|
868
|
-
if (
|
927
|
+
if (thread_equal(th, thread.ruby_thread)) {
|
869
928
|
if (new_state == Thread::State::SUSPENDED) {
|
870
929
|
|
871
930
|
RawSample sample;
|
@@ -875,14 +934,27 @@ class ThreadTable {
|
|
875
934
|
//cerr << gettid() << " suspended! Stack size:" << thread.stack_on_suspend.size() << endl;
|
876
935
|
}
|
877
936
|
|
878
|
-
thread.set_state(new_state
|
937
|
+
thread.set_state(new_state);
|
938
|
+
|
939
|
+
if (thread.state == Thread::State::RUNNING) {
|
940
|
+
thread.pthread_id = pthread_self();
|
941
|
+
thread.native_tid = get_native_thread_id();
|
942
|
+
} else {
|
943
|
+
thread.pthread_id = 0;
|
944
|
+
thread.native_tid = 0;
|
945
|
+
}
|
946
|
+
|
879
947
|
|
880
948
|
return;
|
881
949
|
}
|
882
950
|
}
|
883
951
|
|
884
|
-
|
885
|
-
list.emplace_back(new_state);
|
952
|
+
//fprintf(stderr, "NEW THREAD: th: %p, state: %i\n", th, new_state);
|
953
|
+
list.emplace_back(new_state, pthread_self(), th);
|
954
|
+
}
|
955
|
+
|
956
|
+
bool thread_equal(VALUE a, VALUE b) {
|
957
|
+
return a == b;
|
886
958
|
}
|
887
959
|
};
|
888
960
|
|
@@ -1002,6 +1074,12 @@ class RetainedCollector : public BaseCollector {
|
|
1002
1074
|
void record(VALUE obj) {
|
1003
1075
|
RawSample sample;
|
1004
1076
|
sample.sample();
|
1077
|
+
if (sample.empty()) {
|
1078
|
+
// During thread allocation we allocate one object without a frame
|
1079
|
+
// (as of Ruby 3.3)
|
1080
|
+
// Ideally we'd allow empty samples to be represented
|
1081
|
+
return;
|
1082
|
+
}
|
1005
1083
|
int stack_index = frame_list.stack_index(sample);
|
1006
1084
|
|
1007
1085
|
object_list.push_back(obj);
|
@@ -1151,6 +1229,8 @@ class GlobalSignalHandler {
|
|
1151
1229
|
void record_sample(LiveSample &sample, pthread_t pthread_id) {
|
1152
1230
|
const std::lock_guard<std::mutex> lock(mutex);
|
1153
1231
|
|
1232
|
+
assert(pthread_id);
|
1233
|
+
|
1154
1234
|
live_sample = &sample;
|
1155
1235
|
if (pthread_kill(pthread_id, SIGPROF)) {
|
1156
1236
|
rb_bug("pthread_kill failed");
|
@@ -1187,7 +1267,7 @@ class GlobalSignalHandler {
|
|
1187
1267
|
LiveSample *GlobalSignalHandler::live_sample;
|
1188
1268
|
|
1189
1269
|
class TimeCollector : public BaseCollector {
|
1190
|
-
|
1270
|
+
GCMarkerTable gc_markers;
|
1191
1271
|
ThreadTable threads;
|
1192
1272
|
|
1193
1273
|
pthread_t sample_thread;
|
@@ -1216,10 +1296,22 @@ class TimeCollector : public BaseCollector {
|
|
1216
1296
|
}
|
1217
1297
|
|
1218
1298
|
VALUE get_markers() {
|
1219
|
-
VALUE list =
|
1299
|
+
VALUE list = rb_ary_new();
|
1300
|
+
VALUE main_thread = rb_thread_main();
|
1301
|
+
VALUE main_thread_id = rb_obj_id(main_thread);
|
1302
|
+
|
1303
|
+
for (auto& marker: this->gc_markers.list) {
|
1304
|
+
VALUE ary = marker.to_array();
|
1220
1305
|
|
1221
|
-
|
1222
|
-
rb_ary_push(list,
|
1306
|
+
RARRAY_ASET(ary, 0, main_thread_id);
|
1307
|
+
rb_ary_push(list, ary);
|
1308
|
+
}
|
1309
|
+
for (auto &thread : threads.list) {
|
1310
|
+
for (auto& marker: thread.markers->list) {
|
1311
|
+
VALUE ary = marker.to_array();
|
1312
|
+
RARRAY_ASET(ary, 0, thread.ruby_thread_id);
|
1313
|
+
rb_ary_push(list, ary);
|
1314
|
+
}
|
1223
1315
|
}
|
1224
1316
|
|
1225
1317
|
return list;
|
@@ -1235,7 +1327,9 @@ class TimeCollector : public BaseCollector {
|
|
1235
1327
|
threads.mutex.lock();
|
1236
1328
|
for (auto &thread : threads.list) {
|
1237
1329
|
//if (thread.state == Thread::State::RUNNING) {
|
1238
|
-
if (thread.state == Thread::State::RUNNING || (thread.state == Thread::State::SUSPENDED && thread.stack_on_suspend_idx < 0)) {
|
1330
|
+
//if (thread.state == Thread::State::RUNNING || (thread.state == Thread::State::SUSPENDED && thread.stack_on_suspend_idx < 0)) {
|
1331
|
+
if (thread.state == Thread::State::RUNNING) {
|
1332
|
+
//fprintf(stderr, "sampling %p on tid:%i\n", thread.ruby_thread, thread.native_tid);
|
1239
1333
|
GlobalSignalHandler::get_instance()->record_sample(sample, thread.pthread_id);
|
1240
1334
|
|
1241
1335
|
if (sample.sample.gc) {
|
@@ -1271,6 +1365,13 @@ class TimeCollector : public BaseCollector {
|
|
1271
1365
|
}
|
1272
1366
|
|
1273
1367
|
static void *sample_thread_entry(void *arg) {
|
1368
|
+
#if HAVE_PTHREAD_SETNAME_NP
|
1369
|
+
#ifdef __APPLE__
|
1370
|
+
pthread_setname_np("Vernier profiler");
|
1371
|
+
#else
|
1372
|
+
pthread_setname_np(pthread_self(), "Vernier profiler");
|
1373
|
+
#endif
|
1374
|
+
#endif
|
1274
1375
|
TimeCollector *collector = static_cast<TimeCollector *>(arg);
|
1275
1376
|
collector->sample_thread_run();
|
1276
1377
|
return NULL;
|
@@ -1281,10 +1382,10 @@ class TimeCollector : public BaseCollector {
|
|
1281
1382
|
|
1282
1383
|
switch (event) {
|
1283
1384
|
case RUBY_EVENT_THREAD_BEGIN:
|
1284
|
-
collector->threads.started(
|
1385
|
+
collector->threads.started(self);
|
1285
1386
|
break;
|
1286
1387
|
case RUBY_EVENT_THREAD_END:
|
1287
|
-
collector->threads.stopped(
|
1388
|
+
collector->threads.stopped(self);
|
1288
1389
|
break;
|
1289
1390
|
}
|
1290
1391
|
}
|
@@ -1294,36 +1395,57 @@ class TimeCollector : public BaseCollector {
|
|
1294
1395
|
|
1295
1396
|
switch (event) {
|
1296
1397
|
case RUBY_INTERNAL_EVENT_GC_START:
|
1297
|
-
collector->
|
1398
|
+
collector->gc_markers.record(Marker::Type::MARKER_GC_START);
|
1298
1399
|
break;
|
1299
1400
|
case RUBY_INTERNAL_EVENT_GC_END_MARK:
|
1300
|
-
collector->
|
1401
|
+
collector->gc_markers.record(Marker::Type::MARKER_GC_END_MARK);
|
1301
1402
|
break;
|
1302
1403
|
case RUBY_INTERNAL_EVENT_GC_END_SWEEP:
|
1303
|
-
collector->
|
1404
|
+
collector->gc_markers.record(Marker::Type::MARKER_GC_END_SWEEP);
|
1304
1405
|
break;
|
1305
1406
|
case RUBY_INTERNAL_EVENT_GC_ENTER:
|
1306
|
-
collector->
|
1407
|
+
collector->gc_markers.record_gc_entered();
|
1307
1408
|
break;
|
1308
1409
|
case RUBY_INTERNAL_EVENT_GC_EXIT:
|
1309
|
-
collector->
|
1410
|
+
collector->gc_markers.record_gc_leave();
|
1310
1411
|
break;
|
1311
1412
|
}
|
1312
1413
|
}
|
1313
1414
|
|
1314
1415
|
static void internal_thread_event_cb(rb_event_flag_t event, const rb_internal_thread_event_data_t *event_data, void *data) {
|
1315
1416
|
TimeCollector *collector = static_cast<TimeCollector *>(data);
|
1417
|
+
VALUE thread = Qnil;
|
1418
|
+
|
1419
|
+
#if HAVE_RB_INTERNAL_THREAD_EVENT_DATA_T_THREAD
|
1420
|
+
thread = event_data->thread;
|
1421
|
+
#else
|
1422
|
+
// We may arrive here when starting a thread with
|
1423
|
+
// RUBY_INTERNAL_THREAD_EVENT_READY before the thread is actually set up.
|
1424
|
+
if (!ruby_native_thread_p()) return;
|
1425
|
+
|
1426
|
+
thread = rb_thread_current();
|
1427
|
+
#endif
|
1428
|
+
|
1429
|
+
auto native_tid = get_native_thread_id();
|
1316
1430
|
//cerr << "internal thread event" << event << " at " << TimeStamp::Now() << endl;
|
1431
|
+
//fprintf(stderr, "(%i) th %p to %s\n", native_tid, (void *)thread, gvl_event_name(event));
|
1432
|
+
|
1317
1433
|
|
1318
1434
|
switch (event) {
|
1435
|
+
case RUBY_INTERNAL_THREAD_EVENT_STARTED:
|
1436
|
+
collector->threads.started(thread);
|
1437
|
+
break;
|
1438
|
+
case RUBY_INTERNAL_THREAD_EVENT_EXITED:
|
1439
|
+
collector->threads.stopped(thread);
|
1440
|
+
break;
|
1319
1441
|
case RUBY_INTERNAL_THREAD_EVENT_READY:
|
1320
|
-
collector->threads.ready(
|
1442
|
+
collector->threads.ready(thread);
|
1321
1443
|
break;
|
1322
1444
|
case RUBY_INTERNAL_THREAD_EVENT_RESUMED:
|
1323
|
-
collector->threads.resumed(
|
1445
|
+
collector->threads.resumed(thread);
|
1324
1446
|
break;
|
1325
1447
|
case RUBY_INTERNAL_THREAD_EVENT_SUSPENDED:
|
1326
|
-
collector->threads.suspended(
|
1448
|
+
collector->threads.suspended(thread);
|
1327
1449
|
break;
|
1328
1450
|
|
1329
1451
|
}
|
@@ -1351,7 +1473,7 @@ class TimeCollector : public BaseCollector {
|
|
1351
1473
|
// have at least one thread in our thread list because it's possible
|
1352
1474
|
// that the profile might be such that we don't get any thread switch
|
1353
1475
|
// events and we need at least one
|
1354
|
-
this->threads.resumed(
|
1476
|
+
this->threads.resumed(rb_thread_current());
|
1355
1477
|
|
1356
1478
|
thread_hook = rb_internal_thread_add_event_hook(internal_thread_event_cb, RUBY_INTERNAL_THREAD_EVENT_MASK, this);
|
1357
1479
|
rb_add_event_hook(internal_gc_event_cb, RUBY_INTERNAL_EVENTS, PTR2NUM((void *)this));
|
@@ -1372,13 +1494,6 @@ class TimeCollector : public BaseCollector {
|
|
1372
1494
|
rb_remove_event_hook(internal_gc_event_cb);
|
1373
1495
|
rb_remove_event_hook(internal_thread_event_cb);
|
1374
1496
|
|
1375
|
-
// capture thread names
|
1376
|
-
for (auto& thread: this->threads.list) {
|
1377
|
-
if (thread.running()) {
|
1378
|
-
thread.capture_name();
|
1379
|
-
}
|
1380
|
-
}
|
1381
|
-
|
1382
1497
|
frame_list.finalize();
|
1383
1498
|
|
1384
1499
|
VALUE result = build_collector_result();
|
@@ -1398,7 +1513,7 @@ class TimeCollector : public BaseCollector {
|
|
1398
1513
|
VALUE hash = rb_hash_new();
|
1399
1514
|
thread.samples.write_result(hash);
|
1400
1515
|
|
1401
|
-
rb_hash_aset(threads,
|
1516
|
+
rb_hash_aset(threads, thread.ruby_thread_id, hash);
|
1402
1517
|
rb_hash_aset(hash, sym("tid"), ULL2NUM(thread.native_tid));
|
1403
1518
|
rb_hash_aset(hash, sym("started_at"), ULL2NUM(thread.started_at.nanoseconds()));
|
1404
1519
|
if (!thread.stopped_at.zero()) {
|
@@ -1415,6 +1530,7 @@ class TimeCollector : public BaseCollector {
|
|
1415
1530
|
|
1416
1531
|
void mark() {
|
1417
1532
|
frame_list.mark_frames();
|
1533
|
+
threads.mark();
|
1418
1534
|
|
1419
1535
|
//for (int i = 0; i < queued_length; i++) {
|
1420
1536
|
// rb_gc_mark(queued_frames[i]);
|
data/lib/vernier/collector.rb
CHANGED
@@ -19,7 +19,7 @@ module Vernier
|
|
19
19
|
Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond)
|
20
20
|
end
|
21
21
|
|
22
|
-
def add_marker(name:, start:, finish:, thread: Thread.current.
|
22
|
+
def add_marker(name:, start:, finish:, thread: Thread.current.object_id, phase: Marker::Phase::INTERVAL, data: nil)
|
23
23
|
@markers << [thread,
|
24
24
|
name,
|
25
25
|
start,
|
@@ -39,7 +39,7 @@ module Vernier
|
|
39
39
|
start:,
|
40
40
|
finish: current_time,
|
41
41
|
phase: Marker::Phase::INTERVAL,
|
42
|
-
thread: Thread.current.
|
42
|
+
thread: Thread.current.object_id,
|
43
43
|
data: { :type => 'UserTiming', :entryType => 'measure', :name => name }
|
44
44
|
)
|
45
45
|
end
|
@@ -99,9 +99,10 @@ module Vernier
|
|
99
99
|
def data
|
100
100
|
markers_by_thread = profile.markers.group_by { |marker| marker[0] }
|
101
101
|
|
102
|
-
thread_data = profile.threads.map do |
|
103
|
-
markers = markers_by_thread[
|
102
|
+
thread_data = profile.threads.map do |ruby_thread_id, thread_info|
|
103
|
+
markers = markers_by_thread[ruby_thread_id] || []
|
104
104
|
Thread.new(
|
105
|
+
ruby_thread_id,
|
105
106
|
profile,
|
106
107
|
@categorizer,
|
107
108
|
markers: markers,
|
@@ -157,11 +158,12 @@ module Vernier
|
|
157
158
|
class Thread
|
158
159
|
attr_reader :profile
|
159
160
|
|
160
|
-
def initialize(profile, categorizer, name:, tid:, samples:, weights:, timestamps: nil, sample_categories: nil, markers:, started_at:, stopped_at: nil)
|
161
|
+
def initialize(ruby_thread_id, profile, categorizer, name:, tid:, samples:, weights:, timestamps: nil, sample_categories: nil, markers:, started_at:, stopped_at: nil)
|
162
|
+
@ruby_thread_id = ruby_thread_id
|
161
163
|
@profile = profile
|
162
164
|
@categorizer = categorizer
|
163
165
|
@tid = tid
|
164
|
-
@name = name
|
166
|
+
@name = pretty_name(name)
|
165
167
|
|
166
168
|
timestamps ||= [0] * samples.size
|
167
169
|
@samples, @weights, @timestamps = samples, weights, timestamps
|
@@ -212,7 +214,7 @@ module Vernier
|
|
212
214
|
def data
|
213
215
|
{
|
214
216
|
name: @name,
|
215
|
-
isMainThread:
|
217
|
+
isMainThread: @ruby_thread_id == ::Thread.main.object_id || (profile.threads.size == 1),
|
216
218
|
processStartupTime: 0, # FIXME
|
217
219
|
processShutdownTime: nil, # FIXME
|
218
220
|
registerTime: (@started_at - 0) / 1_000_000.0,
|
@@ -383,6 +385,25 @@ module Vernier
|
|
383
385
|
|
384
386
|
private
|
385
387
|
|
388
|
+
def pretty_name(name)
|
389
|
+
if name.empty?
|
390
|
+
begin
|
391
|
+
tr = ObjectSpace._id2ref(@ruby_thread_id)
|
392
|
+
name = tr.inspect if tr
|
393
|
+
rescue RangeError
|
394
|
+
# Thread was already GC'd
|
395
|
+
end
|
396
|
+
end
|
397
|
+
return name unless name.start_with?("#<Thread")
|
398
|
+
pretty = []
|
399
|
+
obj_address = name[/Thread:(0x\w+)/,1]
|
400
|
+
best_id = name[/\#<Thread:0x\w+@?\s?(.*)\s+\S+>/,1] || ""
|
401
|
+
Gem.path.each { |gem_dir| best_id = best_id.gsub(gem_dir, "...") }
|
402
|
+
pretty << best_id unless best_id.empty?
|
403
|
+
pretty << "(#{obj_address})"
|
404
|
+
pretty.join(' ')
|
405
|
+
end
|
406
|
+
|
386
407
|
def gc_category
|
387
408
|
@categorizer.get_category("GC")
|
388
409
|
end
|
data/lib/vernier/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: vernier
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Hawthorn
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-02-27 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: An experimental profiler
|
14
14
|
email:
|