vernier 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +30 -8
- data/ext/vernier/extconf.rb +7 -0
- data/ext/vernier/vernier.cc +205 -74
- data/lib/vernier/collector.rb +2 -2
- data/lib/vernier/output/firefox.rb +8 -8
- data/lib/vernier/version.rb +1 -1
- data/lib/vernier.rb +3 -3
- data/vernier.gemspec +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c3190b81748262d9620de74e12f6adca9b2c4126741781f9860076d96c96a123
|
4
|
+
data.tar.gz: b54848781f0b17c16074fd630d0aae6b6ea206e47679713ce858e6e3f08525a5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: de91010589471c0b4a7cfddb37bab92262392ef33da2af44930cdab848a6fd290468abe76c147485ba032d89cdac33b631937eea01bd15af8e89b03a5200e69e
|
7
|
+
data.tar.gz: 82b40e4d93685ab8c560a995df31421c06ff00f8c27f33d6b37296bed96ca5b37cacda3be60e64464f535dd52d273f80d4885705f591e7246d7b3fb2269c151f
|
data/README.md
CHANGED
@@ -1,33 +1,55 @@
|
|
1
1
|
# Vernier
|
2
2
|
|
3
|
-
|
3
|
+
Next-generation Ruby 3.2.1+ sampling profiler. Tracks multiple threads, GVL activity, GC pauses, idle time, and more.
|
4
|
+
|
5
|
+
## Examples
|
6
|
+
|
7
|
+
[Livestreamed demo: Pairin' with Aaron (YouTube)](https://www.youtube.com/watch?v=9nvX3OHykGQ#t=27m43)
|
8
|
+
|
9
|
+
Sidekiq jobs from Mastodon (time, threded)
|
10
|
+
: https://share.firefox.dev/44jZRf3
|
11
|
+
|
12
|
+
Puma web requests from Mastodon (time, threded)
|
13
|
+
: https://share.firefox.dev/48FOTnF
|
14
|
+
|
15
|
+
Rails benchmark - lobste.rs (time)
|
16
|
+
: https://share.firefox.dev/3Ld89id
|
17
|
+
|
18
|
+
`require "irb"` (retained memory)
|
19
|
+
: https://share.firefox.dev/3DhLsFa
|
4
20
|
|
5
21
|
## Installation
|
6
22
|
|
23
|
+
Vernier requires Ruby version 3.2.1 or greater
|
24
|
+
|
7
25
|
```ruby
|
8
26
|
gem 'vernier'
|
9
27
|
```
|
10
28
|
|
11
29
|
## Usage
|
12
30
|
|
13
|
-
### Retained memory
|
14
31
|
|
15
|
-
|
32
|
+
### Time
|
16
33
|
|
17
34
|
```
|
18
|
-
|
35
|
+
Vernier.trace(out: "time_profile.json") { some_slow_method }
|
19
36
|
```
|
20
37
|
|
21
|
-
The output can then be viewed in the
|
38
|
+
The output can then be viewed in the Firefox Profiler (demo) or the [`profile-viewer` gem](https://github.com/tenderlove/profiler/tree/ruby) (a Ruby-customized version of the firefox profiler.
|
22
39
|
|
23
|
-
|
40
|
+
- **Flame Graph**: Shows proportionally how much time is spent within particular stack frames. Frames are grouped together, which means that x-axis / left-to-right order is not meaningful.
|
41
|
+
- **Stack Chart**: Shows the stack at each sample with the x-axis representing time and can be read left-to-right.
|
24
42
|
|
25
|
-
###
|
43
|
+
### Retained memory
|
44
|
+
|
45
|
+
Record a flamegraph of all **retained** allocations from loading `irb`.
|
26
46
|
|
27
47
|
```
|
28
|
-
Vernier.
|
48
|
+
ruby -r vernier -e 'Vernier.trace_retained(out: "irb_profile.json") { require "irb" }'
|
29
49
|
```
|
30
50
|
|
51
|
+
Retained-memory flamegraphs must be interpreted a little differently than a typical profiling flamegraph. In a retained-memory flamegraph, the x-axis represents a proportion of memory in bytes, _not time or samples_ The topmost boxes on the y-axis represent the retained objects, with their stacktrace below; their width represents the percentage of overall retained memory each object occupies.
|
52
|
+
|
31
53
|
## Development
|
32
54
|
|
33
55
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/ext/vernier/extconf.rb
CHANGED
@@ -5,4 +5,11 @@ require "mkmf"
|
|
5
5
|
$CXXFLAGS += " -std=c++14 "
|
6
6
|
$CXXFLAGS += " -ggdb3 -Og "
|
7
7
|
|
8
|
+
have_header("ruby/thread.h")
|
9
|
+
have_struct_member("rb_internal_thread_event_data_t", "thread", ["ruby/thread.h"])
|
10
|
+
|
11
|
+
have_func("rb_profile_thread_frames", "ruby/debug.h")
|
12
|
+
|
13
|
+
have_func("pthread_setname_np")
|
14
|
+
|
8
15
|
create_makefile("vernier/vernier")
|
data/ext/vernier/vernier.cc
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
// vim: expandtab:ts=4:sw=4
|
2
|
+
|
1
3
|
#include <iostream>
|
2
4
|
#include <iomanip>
|
3
5
|
#include <vector>
|
@@ -27,6 +29,9 @@
|
|
27
29
|
#include "ruby/debug.h"
|
28
30
|
#include "ruby/thread.h"
|
29
31
|
|
32
|
+
#undef assert
|
33
|
+
#define assert RUBY_ASSERT_ALWAYS
|
34
|
+
|
30
35
|
# define PTR2NUM(x) (rb_int2inum((intptr_t)(void *)(x)))
|
31
36
|
|
32
37
|
// Internal TracePoint events we'll monitor during profiling
|
@@ -53,6 +58,22 @@ static VALUE rb_cVernierResult;
|
|
53
58
|
static VALUE rb_mVernierMarkerType;
|
54
59
|
static VALUE rb_cVernierCollector;
|
55
60
|
|
61
|
+
static const char *gvl_event_name(rb_event_flag_t event) {
|
62
|
+
switch (event) {
|
63
|
+
case RUBY_INTERNAL_THREAD_EVENT_STARTED:
|
64
|
+
return "started";
|
65
|
+
case RUBY_INTERNAL_THREAD_EVENT_READY:
|
66
|
+
return "ready";
|
67
|
+
case RUBY_INTERNAL_THREAD_EVENT_RESUMED:
|
68
|
+
return "resumed";
|
69
|
+
case RUBY_INTERNAL_THREAD_EVENT_SUSPENDED:
|
70
|
+
return "suspended";
|
71
|
+
case RUBY_INTERNAL_THREAD_EVENT_EXITED:
|
72
|
+
return "exited";
|
73
|
+
}
|
74
|
+
return "no-event";
|
75
|
+
}
|
76
|
+
|
56
77
|
class TimeStamp {
|
57
78
|
static const uint64_t nanoseconds_per_second = 1000000000;
|
58
79
|
uint64_t value_ns;
|
@@ -72,17 +93,29 @@ class TimeStamp {
|
|
72
93
|
return TimeStamp(0);
|
73
94
|
}
|
74
95
|
|
75
|
-
|
76
|
-
|
96
|
+
// SleepUntil a specified timestamp
|
97
|
+
// Highly accurate manual sleep time
|
98
|
+
static void SleepUntil(const TimeStamp &target_time) {
|
99
|
+
if (target_time.zero()) return;
|
100
|
+
struct timespec ts = target_time.timespec();
|
77
101
|
|
78
102
|
int res;
|
79
103
|
do {
|
80
|
-
|
81
|
-
|
104
|
+
// do nothing until it's time :)
|
105
|
+
sleep(0);
|
106
|
+
} while (target_time > TimeStamp::Now());
|
107
|
+
}
|
108
|
+
|
109
|
+
static TimeStamp from_seconds(uint64_t s) {
|
110
|
+
return TimeStamp::from_milliseconds(s * 1000);
|
111
|
+
}
|
112
|
+
|
113
|
+
static TimeStamp from_milliseconds(uint64_t ms) {
|
114
|
+
return TimeStamp::from_microseconds(ms * 1000);
|
82
115
|
}
|
83
116
|
|
84
117
|
static TimeStamp from_microseconds(uint64_t us) {
|
85
|
-
return TimeStamp(us * 1000);
|
118
|
+
return TimeStamp::from_nanoseconds(us * 1000);
|
86
119
|
}
|
87
120
|
|
88
121
|
static TimeStamp from_nanoseconds(uint64_t ns) {
|
@@ -262,6 +295,10 @@ class SamplerSemaphore {
|
|
262
295
|
#ifdef __APPLE__
|
263
296
|
dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER);
|
264
297
|
#else
|
298
|
+
// Use sem_timedwait so that we get a crash instead of a deadlock for
|
299
|
+
// easier debugging
|
300
|
+
auto ts = (TimeStamp::Now() + TimeStamp::from_seconds(5)).timespec();
|
301
|
+
|
265
302
|
int ret;
|
266
303
|
do {
|
267
304
|
ret = sem_wait(&sem);
|
@@ -300,16 +337,15 @@ struct RawSample {
|
|
300
337
|
}
|
301
338
|
|
302
339
|
void sample() {
|
340
|
+
clear();
|
341
|
+
|
303
342
|
if (!ruby_native_thread_p()) {
|
304
|
-
clear();
|
305
343
|
return;
|
306
344
|
}
|
307
345
|
|
308
346
|
if (rb_during_gc()) {
|
309
347
|
gc = true;
|
310
|
-
len = 0;
|
311
348
|
} else {
|
312
|
-
gc = false;
|
313
349
|
len = rb_profile_frames(0, MAX_LEN, frames, lines);
|
314
350
|
}
|
315
351
|
}
|
@@ -598,12 +634,13 @@ class Marker {
|
|
598
634
|
Phase phase;
|
599
635
|
TimeStamp timestamp;
|
600
636
|
TimeStamp finish;
|
601
|
-
|
637
|
+
// VALUE ruby_thread_id;
|
638
|
+
//native_thread_id_t thread_id;
|
602
639
|
int stack_index = -1;
|
603
640
|
|
604
641
|
VALUE to_array() {
|
605
642
|
VALUE record[6] = {0};
|
606
|
-
record[0] =
|
643
|
+
record[0] = Qnil; // FIXME
|
607
644
|
record[1] = INT2NUM(type);
|
608
645
|
record[2] = INT2NUM(phase);
|
609
646
|
record[3] = ULL2NUM(timestamp.nanoseconds());
|
@@ -621,30 +658,33 @@ class Marker {
|
|
621
658
|
};
|
622
659
|
|
623
660
|
class MarkerTable {
|
624
|
-
TimeStamp last_gc_entry;
|
625
|
-
|
626
661
|
public:
|
627
662
|
std::vector<Marker> list;
|
628
663
|
std::mutex mutex;
|
629
664
|
|
630
|
-
void record_gc_entered() {
|
631
|
-
last_gc_entry = TimeStamp::Now();
|
632
|
-
}
|
633
|
-
|
634
|
-
void record_gc_leave() {
|
635
|
-
list.push_back({ Marker::MARKER_GC_PAUSE, Marker::INTERVAL, last_gc_entry, TimeStamp::Now(), get_native_thread_id(), -1 });
|
636
|
-
}
|
637
|
-
|
638
665
|
void record_interval(Marker::Type type, TimeStamp from, TimeStamp to, int stack_index = -1) {
|
639
666
|
const std::lock_guard<std::mutex> lock(mutex);
|
640
667
|
|
641
|
-
list.push_back({ type, Marker::INTERVAL, from, to,
|
668
|
+
list.push_back({ type, Marker::INTERVAL, from, to, stack_index });
|
642
669
|
}
|
643
670
|
|
644
671
|
void record(Marker::Type type, int stack_index = -1) {
|
645
672
|
const std::lock_guard<std::mutex> lock(mutex);
|
646
673
|
|
647
|
-
list.push_back({ type, Marker::INSTANT, TimeStamp::Now(), TimeStamp(),
|
674
|
+
list.push_back({ type, Marker::INSTANT, TimeStamp::Now(), TimeStamp(), stack_index });
|
675
|
+
}
|
676
|
+
};
|
677
|
+
|
678
|
+
class GCMarkerTable: public MarkerTable {
|
679
|
+
TimeStamp last_gc_entry;
|
680
|
+
|
681
|
+
public:
|
682
|
+
void record_gc_entered() {
|
683
|
+
last_gc_entry = TimeStamp::Now();
|
684
|
+
}
|
685
|
+
|
686
|
+
void record_gc_leave() {
|
687
|
+
list.push_back({ Marker::MARKER_GC_PAUSE, Marker::INTERVAL, last_gc_entry, TimeStamp::Now(), -1 });
|
648
688
|
}
|
649
689
|
};
|
650
690
|
|
@@ -727,6 +767,8 @@ class Thread {
|
|
727
767
|
STOPPED
|
728
768
|
};
|
729
769
|
|
770
|
+
VALUE ruby_thread;
|
771
|
+
VALUE ruby_thread_id;
|
730
772
|
pthread_t pthread_id;
|
731
773
|
native_thread_id_t native_tid;
|
732
774
|
State state;
|
@@ -738,18 +780,33 @@ class Thread {
|
|
738
780
|
int stack_on_suspend_idx;
|
739
781
|
SampleTranslator translator;
|
740
782
|
|
741
|
-
|
783
|
+
MarkerTable *markers;
|
784
|
+
|
785
|
+
std::string name;
|
742
786
|
|
743
|
-
|
744
|
-
|
787
|
+
// FIXME: don't use pthread at start
|
788
|
+
Thread(State state, pthread_t pthread_id, VALUE ruby_thread) : pthread_id(pthread_id), ruby_thread(ruby_thread), state(state), stack_on_suspend_idx(-1) {
|
789
|
+
name = Qnil;
|
790
|
+
ruby_thread_id = rb_obj_id(ruby_thread);
|
791
|
+
//ruby_thread_id = ULL2NUM(ruby_thread);
|
745
792
|
native_tid = get_native_thread_id();
|
746
793
|
started_at = state_changed_at = TimeStamp::Now();
|
794
|
+
name = "";
|
795
|
+
markers = new MarkerTable();
|
796
|
+
|
797
|
+
if (state == State::STARTED) {
|
798
|
+
markers->record(Marker::Type::MARKER_GVL_THREAD_STARTED);
|
799
|
+
}
|
747
800
|
}
|
748
801
|
|
749
|
-
void set_state(State new_state
|
802
|
+
void set_state(State new_state) {
|
750
803
|
if (state == Thread::State::STOPPED) {
|
751
804
|
return;
|
752
805
|
}
|
806
|
+
if (new_state == Thread::State::SUSPENDED && state == new_state) {
|
807
|
+
// on Ruby 3.2 (only?) we may see duplicate suspended states
|
808
|
+
return;
|
809
|
+
}
|
753
810
|
|
754
811
|
TimeStamp from = state_changed_at;
|
755
812
|
auto now = TimeStamp::Now();
|
@@ -760,10 +817,13 @@ class Thread {
|
|
760
817
|
|
761
818
|
switch (new_state) {
|
762
819
|
case State::STARTED:
|
763
|
-
|
820
|
+
markers->record(Marker::Type::MARKER_GVL_THREAD_STARTED);
|
821
|
+
return; // no mutation of current state
|
764
822
|
break;
|
765
823
|
case State::RUNNING:
|
766
|
-
assert(state == State::READY);
|
824
|
+
assert(state == State::READY || state == State::RUNNING);
|
825
|
+
pthread_id = pthread_self();
|
826
|
+
native_tid = get_native_thread_id();
|
767
827
|
|
768
828
|
// If the GVL is immediately ready, and we measure no times
|
769
829
|
// stalled, skip emitting the interval.
|
@@ -779,23 +839,25 @@ class Thread {
|
|
779
839
|
// Threads can be preempted, which means they will have been in "Running"
|
780
840
|
// state, and then the VM was like "no I need to stop you from working,
|
781
841
|
// so I'll put you in the 'ready' (or stalled) state"
|
782
|
-
assert(state == State::SUSPENDED || state == State::RUNNING);
|
842
|
+
assert(state == State::STARTED || state == State::SUSPENDED || state == State::RUNNING);
|
783
843
|
if (state == State::SUSPENDED) {
|
784
844
|
markers->record_interval(Marker::Type::MARKER_THREAD_SUSPENDED, from, now, stack_on_suspend_idx);
|
785
845
|
}
|
786
|
-
else {
|
846
|
+
else if (state == State::RUNNING) {
|
787
847
|
markers->record_interval(Marker::Type::MARKER_THREAD_RUNNING, from, now);
|
788
848
|
}
|
789
849
|
break;
|
790
850
|
case State::SUSPENDED:
|
791
851
|
// We can go from RUNNING or STARTED to SUSPENDED
|
792
|
-
assert(state == State::RUNNING || state == State::STARTED);
|
852
|
+
assert(state == State::RUNNING || state == State::STARTED || state == State::SUSPENDED);
|
793
853
|
markers->record_interval(Marker::Type::MARKER_THREAD_RUNNING, from, now);
|
794
854
|
break;
|
795
855
|
case State::STOPPED:
|
796
856
|
// We can go from RUNNING or STARTED to STOPPED
|
797
857
|
assert(state == State::RUNNING || state == State::STARTED);
|
798
858
|
markers->record_interval(Marker::Type::MARKER_THREAD_RUNNING, from, now);
|
859
|
+
markers->record(Marker::Type::MARKER_GVL_THREAD_EXITED);
|
860
|
+
|
799
861
|
stopped_at = now;
|
800
862
|
capture_name();
|
801
863
|
|
@@ -811,10 +873,13 @@ class Thread {
|
|
811
873
|
}
|
812
874
|
|
813
875
|
void capture_name() {
|
814
|
-
char buf[128];
|
815
|
-
int rc = pthread_getname_np(pthread_id, buf, sizeof(buf));
|
816
|
-
if (rc == 0)
|
817
|
-
|
876
|
+
//char buf[128];
|
877
|
+
//int rc = pthread_getname_np(pthread_id, buf, sizeof(buf));
|
878
|
+
//if (rc == 0)
|
879
|
+
// name = std::string(buf);
|
880
|
+
}
|
881
|
+
|
882
|
+
void mark() {
|
818
883
|
}
|
819
884
|
};
|
820
885
|
|
@@ -828,40 +893,46 @@ class ThreadTable {
|
|
828
893
|
ThreadTable(FrameList &frame_list) : frame_list(frame_list) {
|
829
894
|
}
|
830
895
|
|
831
|
-
void
|
832
|
-
|
896
|
+
void mark() {
|
897
|
+
for (auto &thread : list) {
|
898
|
+
thread.mark();
|
899
|
+
}
|
900
|
+
}
|
833
901
|
|
902
|
+
void started(VALUE th) {
|
834
903
|
//list.push_back(Thread{pthread_self(), Thread::State::SUSPENDED});
|
835
|
-
|
836
|
-
set_state(Thread::State::STARTED, markers);
|
904
|
+
set_state(Thread::State::STARTED, th);
|
837
905
|
}
|
838
906
|
|
839
|
-
void ready(
|
840
|
-
set_state(Thread::State::READY,
|
907
|
+
void ready(VALUE th) {
|
908
|
+
set_state(Thread::State::READY, th);
|
841
909
|
}
|
842
910
|
|
843
|
-
void resumed(
|
844
|
-
set_state(Thread::State::RUNNING,
|
911
|
+
void resumed(VALUE th) {
|
912
|
+
set_state(Thread::State::RUNNING, th);
|
845
913
|
}
|
846
914
|
|
847
|
-
void suspended(
|
848
|
-
set_state(Thread::State::SUSPENDED,
|
915
|
+
void suspended(VALUE th) {
|
916
|
+
set_state(Thread::State::SUSPENDED, th);
|
849
917
|
}
|
850
918
|
|
851
|
-
void stopped(
|
852
|
-
|
853
|
-
set_state(Thread::State::STOPPED, markers);
|
919
|
+
void stopped(VALUE th) {
|
920
|
+
set_state(Thread::State::STOPPED, th);
|
854
921
|
}
|
855
922
|
|
856
923
|
private:
|
857
|
-
void set_state(Thread::State new_state,
|
924
|
+
void set_state(Thread::State new_state, VALUE th) {
|
858
925
|
const std::lock_guard<std::mutex> lock(mutex);
|
859
926
|
|
860
|
-
pthread_t current_thread = pthread_self();
|
861
927
|
//cerr << "set state=" << new_state << " thread=" << gettid() << endl;
|
862
928
|
|
929
|
+
pid_t native_tid = get_native_thread_id();
|
930
|
+
pthread_t pthread_id = pthread_self();
|
931
|
+
|
932
|
+
//fprintf(stderr, "th %p (tid: %i) from %s to %s\n", (void *)th, native_tid, gvl_event_name(state), gvl_event_name(new_state));
|
933
|
+
|
863
934
|
for (auto &thread : list) {
|
864
|
-
if (
|
935
|
+
if (thread_equal(th, thread.ruby_thread)) {
|
865
936
|
if (new_state == Thread::State::SUSPENDED) {
|
866
937
|
|
867
938
|
RawSample sample;
|
@@ -871,14 +942,27 @@ class ThreadTable {
|
|
871
942
|
//cerr << gettid() << " suspended! Stack size:" << thread.stack_on_suspend.size() << endl;
|
872
943
|
}
|
873
944
|
|
874
|
-
thread.set_state(new_state
|
945
|
+
thread.set_state(new_state);
|
946
|
+
|
947
|
+
if (thread.state == Thread::State::RUNNING) {
|
948
|
+
thread.pthread_id = pthread_self();
|
949
|
+
thread.native_tid = get_native_thread_id();
|
950
|
+
} else {
|
951
|
+
thread.pthread_id = 0;
|
952
|
+
thread.native_tid = 0;
|
953
|
+
}
|
954
|
+
|
875
955
|
|
876
956
|
return;
|
877
957
|
}
|
878
958
|
}
|
879
959
|
|
880
|
-
|
881
|
-
list.emplace_back(new_state);
|
960
|
+
//fprintf(stderr, "NEW THREAD: th: %p, state: %i\n", th, new_state);
|
961
|
+
list.emplace_back(new_state, pthread_self(), th);
|
962
|
+
}
|
963
|
+
|
964
|
+
bool thread_equal(VALUE a, VALUE b) {
|
965
|
+
return a == b;
|
882
966
|
}
|
883
967
|
};
|
884
968
|
|
@@ -1091,6 +1175,9 @@ class RetainedCollector : public BaseCollector {
|
|
1091
1175
|
VALUE weights = rb_ary_new();
|
1092
1176
|
rb_hash_aset(thread_hash, sym("weights"), weights);
|
1093
1177
|
|
1178
|
+
rb_hash_aset(thread_hash, sym("name"), rb_str_new_cstr("retained memory"));
|
1179
|
+
rb_hash_aset(thread_hash, sym("started_at"), ULL2NUM(collector->started_at.nanoseconds()));
|
1180
|
+
|
1094
1181
|
for (auto& obj: collector->object_list) {
|
1095
1182
|
const auto search = collector->object_frames.find(obj);
|
1096
1183
|
if (search != collector->object_frames.end()) {
|
@@ -1144,6 +1231,8 @@ class GlobalSignalHandler {
|
|
1144
1231
|
void record_sample(LiveSample &sample, pthread_t pthread_id) {
|
1145
1232
|
const std::lock_guard<std::mutex> lock(mutex);
|
1146
1233
|
|
1234
|
+
assert(pthread_id);
|
1235
|
+
|
1147
1236
|
live_sample = &sample;
|
1148
1237
|
if (pthread_kill(pthread_id, SIGPROF)) {
|
1149
1238
|
rb_bug("pthread_kill failed");
|
@@ -1180,7 +1269,7 @@ class GlobalSignalHandler {
|
|
1180
1269
|
LiveSample *GlobalSignalHandler::live_sample;
|
1181
1270
|
|
1182
1271
|
class TimeCollector : public BaseCollector {
|
1183
|
-
|
1272
|
+
GCMarkerTable gc_markers;
|
1184
1273
|
ThreadTable threads;
|
1185
1274
|
|
1186
1275
|
pthread_t sample_thread;
|
@@ -1209,10 +1298,22 @@ class TimeCollector : public BaseCollector {
|
|
1209
1298
|
}
|
1210
1299
|
|
1211
1300
|
VALUE get_markers() {
|
1212
|
-
VALUE list =
|
1301
|
+
VALUE list = rb_ary_new();
|
1302
|
+
VALUE main_thread = rb_thread_main();
|
1303
|
+
VALUE main_thread_id = rb_obj_id(main_thread);
|
1304
|
+
|
1305
|
+
for (auto& marker: this->gc_markers.list) {
|
1306
|
+
VALUE ary = marker.to_array();
|
1213
1307
|
|
1214
|
-
|
1215
|
-
rb_ary_push(list,
|
1308
|
+
RARRAY_ASET(ary, 0, main_thread_id);
|
1309
|
+
rb_ary_push(list, ary);
|
1310
|
+
}
|
1311
|
+
for (auto &thread : threads.list) {
|
1312
|
+
for (auto& marker: thread.markers->list) {
|
1313
|
+
VALUE ary = marker.to_array();
|
1314
|
+
RARRAY_ASET(ary, 0, thread.ruby_thread_id);
|
1315
|
+
rb_ary_push(list, ary);
|
1316
|
+
}
|
1216
1317
|
}
|
1217
1318
|
|
1218
1319
|
return list;
|
@@ -1228,7 +1329,9 @@ class TimeCollector : public BaseCollector {
|
|
1228
1329
|
threads.mutex.lock();
|
1229
1330
|
for (auto &thread : threads.list) {
|
1230
1331
|
//if (thread.state == Thread::State::RUNNING) {
|
1231
|
-
if (thread.state == Thread::State::RUNNING || (thread.state == Thread::State::SUSPENDED && thread.stack_on_suspend_idx < 0)) {
|
1332
|
+
//if (thread.state == Thread::State::RUNNING || (thread.state == Thread::State::SUSPENDED && thread.stack_on_suspend_idx < 0)) {
|
1333
|
+
if (thread.state == Thread::State::RUNNING) {
|
1334
|
+
//fprintf(stderr, "sampling %p on tid:%i\n", thread.ruby_thread, thread.native_tid);
|
1232
1335
|
GlobalSignalHandler::get_instance()->record_sample(sample, thread.pthread_id);
|
1233
1336
|
|
1234
1337
|
if (sample.sample.gc) {
|
@@ -1252,19 +1355,25 @@ class TimeCollector : public BaseCollector {
|
|
1252
1355
|
|
1253
1356
|
next_sample_schedule += interval;
|
1254
1357
|
|
1358
|
+
// If sampling falls behind, restart, and check in another interval
|
1255
1359
|
if (next_sample_schedule < sample_complete) {
|
1256
|
-
//fprintf(stderr, "fell behind by %ius\n", (sample_complete - next_sample_schedule).microseconds());
|
1257
1360
|
next_sample_schedule = sample_complete + interval;
|
1258
1361
|
}
|
1259
1362
|
|
1260
|
-
TimeStamp
|
1261
|
-
TimeStamp::Sleep(sleep_time);
|
1363
|
+
TimeStamp::SleepUntil(next_sample_schedule);
|
1262
1364
|
}
|
1263
1365
|
|
1264
1366
|
thread_stopped.post();
|
1265
1367
|
}
|
1266
1368
|
|
1267
1369
|
static void *sample_thread_entry(void *arg) {
|
1370
|
+
#if HAVE_PTHREAD_SETNAME_NP
|
1371
|
+
#ifdef __APPLE__
|
1372
|
+
pthread_setname_np("Vernier profiler");
|
1373
|
+
#else
|
1374
|
+
pthread_setname_np(pthread_self(), "Vernier profiler");
|
1375
|
+
#endif
|
1376
|
+
#endif
|
1268
1377
|
TimeCollector *collector = static_cast<TimeCollector *>(arg);
|
1269
1378
|
collector->sample_thread_run();
|
1270
1379
|
return NULL;
|
@@ -1275,10 +1384,10 @@ class TimeCollector : public BaseCollector {
|
|
1275
1384
|
|
1276
1385
|
switch (event) {
|
1277
1386
|
case RUBY_EVENT_THREAD_BEGIN:
|
1278
|
-
collector->threads.started(
|
1387
|
+
collector->threads.started(self);
|
1279
1388
|
break;
|
1280
1389
|
case RUBY_EVENT_THREAD_END:
|
1281
|
-
collector->threads.stopped(
|
1390
|
+
collector->threads.stopped(self);
|
1282
1391
|
break;
|
1283
1392
|
}
|
1284
1393
|
}
|
@@ -1288,36 +1397,57 @@ class TimeCollector : public BaseCollector {
|
|
1288
1397
|
|
1289
1398
|
switch (event) {
|
1290
1399
|
case RUBY_INTERNAL_EVENT_GC_START:
|
1291
|
-
collector->
|
1400
|
+
collector->gc_markers.record(Marker::Type::MARKER_GC_START);
|
1292
1401
|
break;
|
1293
1402
|
case RUBY_INTERNAL_EVENT_GC_END_MARK:
|
1294
|
-
collector->
|
1403
|
+
collector->gc_markers.record(Marker::Type::MARKER_GC_END_MARK);
|
1295
1404
|
break;
|
1296
1405
|
case RUBY_INTERNAL_EVENT_GC_END_SWEEP:
|
1297
|
-
collector->
|
1406
|
+
collector->gc_markers.record(Marker::Type::MARKER_GC_END_SWEEP);
|
1298
1407
|
break;
|
1299
1408
|
case RUBY_INTERNAL_EVENT_GC_ENTER:
|
1300
|
-
collector->
|
1409
|
+
collector->gc_markers.record_gc_entered();
|
1301
1410
|
break;
|
1302
1411
|
case RUBY_INTERNAL_EVENT_GC_EXIT:
|
1303
|
-
collector->
|
1412
|
+
collector->gc_markers.record_gc_leave();
|
1304
1413
|
break;
|
1305
1414
|
}
|
1306
1415
|
}
|
1307
1416
|
|
1308
1417
|
static void internal_thread_event_cb(rb_event_flag_t event, const rb_internal_thread_event_data_t *event_data, void *data) {
|
1309
1418
|
TimeCollector *collector = static_cast<TimeCollector *>(data);
|
1419
|
+
VALUE thread = Qnil;
|
1420
|
+
|
1421
|
+
#if HAVE_RB_INTERNAL_THREAD_EVENT_DATA_T_THREAD
|
1422
|
+
thread = event_data->thread;
|
1423
|
+
#else
|
1424
|
+
// We may arrive here when starting a thread with
|
1425
|
+
// RUBY_INTERNAL_THREAD_EVENT_READY before the thread is actually set up.
|
1426
|
+
if (!ruby_native_thread_p()) return;
|
1427
|
+
|
1428
|
+
thread = rb_thread_current();
|
1429
|
+
#endif
|
1430
|
+
|
1431
|
+
auto native_tid = get_native_thread_id();
|
1310
1432
|
//cerr << "internal thread event" << event << " at " << TimeStamp::Now() << endl;
|
1433
|
+
//fprintf(stderr, "(%i) th %p to %s\n", native_tid, (void *)thread, gvl_event_name(event));
|
1434
|
+
|
1311
1435
|
|
1312
1436
|
switch (event) {
|
1437
|
+
case RUBY_INTERNAL_THREAD_EVENT_STARTED:
|
1438
|
+
collector->threads.started(thread);
|
1439
|
+
break;
|
1440
|
+
case RUBY_INTERNAL_THREAD_EVENT_EXITED:
|
1441
|
+
collector->threads.stopped(thread);
|
1442
|
+
break;
|
1313
1443
|
case RUBY_INTERNAL_THREAD_EVENT_READY:
|
1314
|
-
collector->threads.ready(
|
1444
|
+
collector->threads.ready(thread);
|
1315
1445
|
break;
|
1316
1446
|
case RUBY_INTERNAL_THREAD_EVENT_RESUMED:
|
1317
|
-
collector->threads.resumed(
|
1447
|
+
collector->threads.resumed(thread);
|
1318
1448
|
break;
|
1319
1449
|
case RUBY_INTERNAL_THREAD_EVENT_SUSPENDED:
|
1320
|
-
collector->threads.suspended(
|
1450
|
+
collector->threads.suspended(thread);
|
1321
1451
|
break;
|
1322
1452
|
|
1323
1453
|
}
|
@@ -1345,7 +1475,7 @@ class TimeCollector : public BaseCollector {
|
|
1345
1475
|
// have at least one thread in our thread list because it's possible
|
1346
1476
|
// that the profile might be such that we don't get any thread switch
|
1347
1477
|
// events and we need at least one
|
1348
|
-
this->threads.resumed(
|
1478
|
+
this->threads.resumed(rb_thread_current());
|
1349
1479
|
|
1350
1480
|
thread_hook = rb_internal_thread_add_event_hook(internal_thread_event_cb, RUBY_INTERNAL_THREAD_EVENT_MASK, this);
|
1351
1481
|
rb_add_event_hook(internal_gc_event_cb, RUBY_INTERNAL_EVENTS, PTR2NUM((void *)this));
|
@@ -1392,7 +1522,7 @@ class TimeCollector : public BaseCollector {
|
|
1392
1522
|
VALUE hash = rb_hash_new();
|
1393
1523
|
thread.samples.write_result(hash);
|
1394
1524
|
|
1395
|
-
rb_hash_aset(threads,
|
1525
|
+
rb_hash_aset(threads, thread.ruby_thread_id, hash);
|
1396
1526
|
rb_hash_aset(hash, sym("tid"), ULL2NUM(thread.native_tid));
|
1397
1527
|
rb_hash_aset(hash, sym("started_at"), ULL2NUM(thread.started_at.nanoseconds()));
|
1398
1528
|
if (!thread.stopped_at.zero()) {
|
@@ -1409,6 +1539,7 @@ class TimeCollector : public BaseCollector {
|
|
1409
1539
|
|
1410
1540
|
void mark() {
|
1411
1541
|
frame_list.mark_frames();
|
1542
|
+
threads.mark();
|
1412
1543
|
|
1413
1544
|
//for (int i = 0; i < queued_length; i++) {
|
1414
1545
|
// rb_gc_mark(queued_frames[i]);
|
data/lib/vernier/collector.rb
CHANGED
@@ -19,7 +19,7 @@ module Vernier
|
|
19
19
|
Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond)
|
20
20
|
end
|
21
21
|
|
22
|
-
def add_marker(name:, start:, finish:, thread: Thread.current.
|
22
|
+
def add_marker(name:, start:, finish:, thread: Thread.current.object_id, phase: Marker::Phase::INTERVAL, data: nil)
|
23
23
|
@markers << [thread,
|
24
24
|
name,
|
25
25
|
start,
|
@@ -39,7 +39,7 @@ module Vernier
|
|
39
39
|
start:,
|
40
40
|
finish: current_time,
|
41
41
|
phase: Marker::Phase::INTERVAL,
|
42
|
-
thread: Thread.current.
|
42
|
+
thread: Thread.current.object_id,
|
43
43
|
data: { :type => 'UserTiming', :entryType => 'measure', :name => name }
|
44
44
|
)
|
45
45
|
end
|
@@ -99,9 +99,10 @@ module Vernier
|
|
99
99
|
def data
|
100
100
|
markers_by_thread = profile.markers.group_by { |marker| marker[0] }
|
101
101
|
|
102
|
-
thread_data = profile.threads.map do |
|
103
|
-
markers = markers_by_thread[
|
102
|
+
thread_data = profile.threads.map do |ruby_thread_id, thread_info|
|
103
|
+
markers = markers_by_thread[ruby_thread_id] || []
|
104
104
|
Thread.new(
|
105
|
+
ruby_thread_id,
|
105
106
|
profile,
|
106
107
|
@categorizer,
|
107
108
|
markers: markers,
|
@@ -157,14 +158,16 @@ module Vernier
|
|
157
158
|
class Thread
|
158
159
|
attr_reader :profile
|
159
160
|
|
160
|
-
def initialize(profile, categorizer, name:, tid:, samples:, weights:, timestamps
|
161
|
+
def initialize(ruby_thread_id, profile, categorizer, name:, tid:, samples:, weights:, timestamps: nil, sample_categories: nil, markers:, started_at:, stopped_at: nil)
|
162
|
+
@ruby_thread_id = ruby_thread_id
|
161
163
|
@profile = profile
|
162
164
|
@categorizer = categorizer
|
163
165
|
@tid = tid
|
164
166
|
@name = name
|
165
167
|
|
168
|
+
timestamps ||= [0] * samples.size
|
166
169
|
@samples, @weights, @timestamps = samples, weights, timestamps
|
167
|
-
@sample_categories = sample_categories
|
170
|
+
@sample_categories = sample_categories || ([0] * samples.size)
|
168
171
|
@markers = markers
|
169
172
|
|
170
173
|
@started_at, @stopped_at = started_at, stopped_at
|
@@ -211,7 +214,7 @@ module Vernier
|
|
211
214
|
def data
|
212
215
|
{
|
213
216
|
name: @name,
|
214
|
-
isMainThread: @
|
217
|
+
isMainThread: @ruby_thread_id == ::Thread.main.object_id || (profile.threads.size == 1),
|
215
218
|
processStartupTime: 0, # FIXME
|
216
219
|
processShutdownTime: nil, # FIXME
|
217
220
|
registerTime: (@started_at - 0) / 1_000_000.0,
|
@@ -237,8 +240,6 @@ module Vernier
|
|
237
240
|
end
|
238
241
|
|
239
242
|
def markers_table
|
240
|
-
size = @markers.size
|
241
|
-
|
242
243
|
string_indexes = []
|
243
244
|
start_times = []
|
244
245
|
end_times = []
|
@@ -292,7 +293,6 @@ module Vernier
|
|
292
293
|
times = (0...size).to_a
|
293
294
|
end
|
294
295
|
|
295
|
-
raise unless samples.size == size
|
296
296
|
raise unless weights.size == size
|
297
297
|
raise unless times.size == size
|
298
298
|
|
data/lib/vernier/version.rb
CHANGED
data/lib/vernier.rb
CHANGED
@@ -19,11 +19,11 @@ module Vernier
|
|
19
19
|
yield collector
|
20
20
|
ensure
|
21
21
|
result = collector.stop
|
22
|
+
if out
|
23
|
+
File.write(out, Output::Firefox.new(result).output)
|
24
|
+
end
|
22
25
|
end
|
23
26
|
|
24
|
-
if out
|
25
|
-
File.write(out, Output::Firefox.new(result).output)
|
26
|
-
end
|
27
27
|
result
|
28
28
|
end
|
29
29
|
|
data/vernier.gemspec
CHANGED
@@ -12,7 +12,7 @@ Gem::Specification.new do |spec|
|
|
12
12
|
spec.description = spec.summary
|
13
13
|
spec.homepage = "https://github.com/jhawthorn/vernier"
|
14
14
|
spec.license = "MIT"
|
15
|
-
spec.required_ruby_version = ">= 3.2.
|
15
|
+
spec.required_ruby_version = ">= 3.2.1"
|
16
16
|
|
17
17
|
spec.metadata["homepage_uri"] = spec.homepage
|
18
18
|
spec.metadata["source_code_uri"] = spec.homepage
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: vernier
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Hawthorn
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-01-15 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: An experimental profiler
|
14
14
|
email:
|
@@ -60,7 +60,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
60
60
|
requirements:
|
61
61
|
- - ">="
|
62
62
|
- !ruby/object:Gem::Version
|
63
|
-
version: 3.2.
|
63
|
+
version: 3.2.1
|
64
64
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - ">="
|