vernier 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +30 -8
- data/ext/vernier/extconf.rb +7 -0
- data/ext/vernier/vernier.cc +205 -74
- data/lib/vernier/collector.rb +2 -2
- data/lib/vernier/output/firefox.rb +8 -8
- data/lib/vernier/version.rb +1 -1
- data/lib/vernier.rb +3 -3
- data/vernier.gemspec +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c3190b81748262d9620de74e12f6adca9b2c4126741781f9860076d96c96a123
|
4
|
+
data.tar.gz: b54848781f0b17c16074fd630d0aae6b6ea206e47679713ce858e6e3f08525a5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: de91010589471c0b4a7cfddb37bab92262392ef33da2af44930cdab848a6fd290468abe76c147485ba032d89cdac33b631937eea01bd15af8e89b03a5200e69e
|
7
|
+
data.tar.gz: 82b40e4d93685ab8c560a995df31421c06ff00f8c27f33d6b37296bed96ca5b37cacda3be60e64464f535dd52d273f80d4885705f591e7246d7b3fb2269c151f
|
data/README.md
CHANGED
@@ -1,33 +1,55 @@
|
|
1
1
|
# Vernier
|
2
2
|
|
3
|
-
|
3
|
+
Next-generation Ruby 3.2.1+ sampling profiler. Tracks multiple threads, GVL activity, GC pauses, idle time, and more.
|
4
|
+
|
5
|
+
## Examples
|
6
|
+
|
7
|
+
[Livestreamed demo: Pairin' with Aaron (YouTube)](https://www.youtube.com/watch?v=9nvX3OHykGQ#t=27m43)
|
8
|
+
|
9
|
+
Sidekiq jobs from Mastodon (time, threded)
|
10
|
+
: https://share.firefox.dev/44jZRf3
|
11
|
+
|
12
|
+
Puma web requests from Mastodon (time, threded)
|
13
|
+
: https://share.firefox.dev/48FOTnF
|
14
|
+
|
15
|
+
Rails benchmark - lobste.rs (time)
|
16
|
+
: https://share.firefox.dev/3Ld89id
|
17
|
+
|
18
|
+
`require "irb"` (retained memory)
|
19
|
+
: https://share.firefox.dev/3DhLsFa
|
4
20
|
|
5
21
|
## Installation
|
6
22
|
|
23
|
+
Vernier requires Ruby version 3.2.1 or greater
|
24
|
+
|
7
25
|
```ruby
|
8
26
|
gem 'vernier'
|
9
27
|
```
|
10
28
|
|
11
29
|
## Usage
|
12
30
|
|
13
|
-
### Retained memory
|
14
31
|
|
15
|
-
|
32
|
+
### Time
|
16
33
|
|
17
34
|
```
|
18
|
-
|
35
|
+
Vernier.trace(out: "time_profile.json") { some_slow_method }
|
19
36
|
```
|
20
37
|
|
21
|
-
The output can then be viewed in the
|
38
|
+
The output can then be viewed in the Firefox Profiler (demo) or the [`profile-viewer` gem](https://github.com/tenderlove/profiler/tree/ruby) (a Ruby-customized version of the firefox profiler.
|
22
39
|
|
23
|
-
|
40
|
+
- **Flame Graph**: Shows proportionally how much time is spent within particular stack frames. Frames are grouped together, which means that x-axis / left-to-right order is not meaningful.
|
41
|
+
- **Stack Chart**: Shows the stack at each sample with the x-axis representing time and can be read left-to-right.
|
24
42
|
|
25
|
-
###
|
43
|
+
### Retained memory
|
44
|
+
|
45
|
+
Record a flamegraph of all **retained** allocations from loading `irb`.
|
26
46
|
|
27
47
|
```
|
28
|
-
Vernier.
|
48
|
+
ruby -r vernier -e 'Vernier.trace_retained(out: "irb_profile.json") { require "irb" }'
|
29
49
|
```
|
30
50
|
|
51
|
+
Retained-memory flamegraphs must be interpreted a little differently than a typical profiling flamegraph. In a retained-memory flamegraph, the x-axis represents a proportion of memory in bytes, _not time or samples_ The topmost boxes on the y-axis represent the retained objects, with their stacktrace below; their width represents the percentage of overall retained memory each object occupies.
|
52
|
+
|
31
53
|
## Development
|
32
54
|
|
33
55
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/ext/vernier/extconf.rb
CHANGED
@@ -5,4 +5,11 @@ require "mkmf"
|
|
5
5
|
$CXXFLAGS += " -std=c++14 "
|
6
6
|
$CXXFLAGS += " -ggdb3 -Og "
|
7
7
|
|
8
|
+
have_header("ruby/thread.h")
|
9
|
+
have_struct_member("rb_internal_thread_event_data_t", "thread", ["ruby/thread.h"])
|
10
|
+
|
11
|
+
have_func("rb_profile_thread_frames", "ruby/debug.h")
|
12
|
+
|
13
|
+
have_func("pthread_setname_np")
|
14
|
+
|
8
15
|
create_makefile("vernier/vernier")
|
data/ext/vernier/vernier.cc
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
// vim: expandtab:ts=4:sw=4
|
2
|
+
|
1
3
|
#include <iostream>
|
2
4
|
#include <iomanip>
|
3
5
|
#include <vector>
|
@@ -27,6 +29,9 @@
|
|
27
29
|
#include "ruby/debug.h"
|
28
30
|
#include "ruby/thread.h"
|
29
31
|
|
32
|
+
#undef assert
|
33
|
+
#define assert RUBY_ASSERT_ALWAYS
|
34
|
+
|
30
35
|
# define PTR2NUM(x) (rb_int2inum((intptr_t)(void *)(x)))
|
31
36
|
|
32
37
|
// Internal TracePoint events we'll monitor during profiling
|
@@ -53,6 +58,22 @@ static VALUE rb_cVernierResult;
|
|
53
58
|
static VALUE rb_mVernierMarkerType;
|
54
59
|
static VALUE rb_cVernierCollector;
|
55
60
|
|
61
|
+
static const char *gvl_event_name(rb_event_flag_t event) {
|
62
|
+
switch (event) {
|
63
|
+
case RUBY_INTERNAL_THREAD_EVENT_STARTED:
|
64
|
+
return "started";
|
65
|
+
case RUBY_INTERNAL_THREAD_EVENT_READY:
|
66
|
+
return "ready";
|
67
|
+
case RUBY_INTERNAL_THREAD_EVENT_RESUMED:
|
68
|
+
return "resumed";
|
69
|
+
case RUBY_INTERNAL_THREAD_EVENT_SUSPENDED:
|
70
|
+
return "suspended";
|
71
|
+
case RUBY_INTERNAL_THREAD_EVENT_EXITED:
|
72
|
+
return "exited";
|
73
|
+
}
|
74
|
+
return "no-event";
|
75
|
+
}
|
76
|
+
|
56
77
|
class TimeStamp {
|
57
78
|
static const uint64_t nanoseconds_per_second = 1000000000;
|
58
79
|
uint64_t value_ns;
|
@@ -72,17 +93,29 @@ class TimeStamp {
|
|
72
93
|
return TimeStamp(0);
|
73
94
|
}
|
74
95
|
|
75
|
-
|
76
|
-
|
96
|
+
// SleepUntil a specified timestamp
|
97
|
+
// Highly accurate manual sleep time
|
98
|
+
static void SleepUntil(const TimeStamp &target_time) {
|
99
|
+
if (target_time.zero()) return;
|
100
|
+
struct timespec ts = target_time.timespec();
|
77
101
|
|
78
102
|
int res;
|
79
103
|
do {
|
80
|
-
|
81
|
-
|
104
|
+
// do nothing until it's time :)
|
105
|
+
sleep(0);
|
106
|
+
} while (target_time > TimeStamp::Now());
|
107
|
+
}
|
108
|
+
|
109
|
+
static TimeStamp from_seconds(uint64_t s) {
|
110
|
+
return TimeStamp::from_milliseconds(s * 1000);
|
111
|
+
}
|
112
|
+
|
113
|
+
static TimeStamp from_milliseconds(uint64_t ms) {
|
114
|
+
return TimeStamp::from_microseconds(ms * 1000);
|
82
115
|
}
|
83
116
|
|
84
117
|
static TimeStamp from_microseconds(uint64_t us) {
|
85
|
-
return TimeStamp(us * 1000);
|
118
|
+
return TimeStamp::from_nanoseconds(us * 1000);
|
86
119
|
}
|
87
120
|
|
88
121
|
static TimeStamp from_nanoseconds(uint64_t ns) {
|
@@ -262,6 +295,10 @@ class SamplerSemaphore {
|
|
262
295
|
#ifdef __APPLE__
|
263
296
|
dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER);
|
264
297
|
#else
|
298
|
+
// Use sem_timedwait so that we get a crash instead of a deadlock for
|
299
|
+
// easier debugging
|
300
|
+
auto ts = (TimeStamp::Now() + TimeStamp::from_seconds(5)).timespec();
|
301
|
+
|
265
302
|
int ret;
|
266
303
|
do {
|
267
304
|
ret = sem_wait(&sem);
|
@@ -300,16 +337,15 @@ struct RawSample {
|
|
300
337
|
}
|
301
338
|
|
302
339
|
void sample() {
|
340
|
+
clear();
|
341
|
+
|
303
342
|
if (!ruby_native_thread_p()) {
|
304
|
-
clear();
|
305
343
|
return;
|
306
344
|
}
|
307
345
|
|
308
346
|
if (rb_during_gc()) {
|
309
347
|
gc = true;
|
310
|
-
len = 0;
|
311
348
|
} else {
|
312
|
-
gc = false;
|
313
349
|
len = rb_profile_frames(0, MAX_LEN, frames, lines);
|
314
350
|
}
|
315
351
|
}
|
@@ -598,12 +634,13 @@ class Marker {
|
|
598
634
|
Phase phase;
|
599
635
|
TimeStamp timestamp;
|
600
636
|
TimeStamp finish;
|
601
|
-
|
637
|
+
// VALUE ruby_thread_id;
|
638
|
+
//native_thread_id_t thread_id;
|
602
639
|
int stack_index = -1;
|
603
640
|
|
604
641
|
VALUE to_array() {
|
605
642
|
VALUE record[6] = {0};
|
606
|
-
record[0] =
|
643
|
+
record[0] = Qnil; // FIXME
|
607
644
|
record[1] = INT2NUM(type);
|
608
645
|
record[2] = INT2NUM(phase);
|
609
646
|
record[3] = ULL2NUM(timestamp.nanoseconds());
|
@@ -621,30 +658,33 @@ class Marker {
|
|
621
658
|
};
|
622
659
|
|
623
660
|
class MarkerTable {
|
624
|
-
TimeStamp last_gc_entry;
|
625
|
-
|
626
661
|
public:
|
627
662
|
std::vector<Marker> list;
|
628
663
|
std::mutex mutex;
|
629
664
|
|
630
|
-
void record_gc_entered() {
|
631
|
-
last_gc_entry = TimeStamp::Now();
|
632
|
-
}
|
633
|
-
|
634
|
-
void record_gc_leave() {
|
635
|
-
list.push_back({ Marker::MARKER_GC_PAUSE, Marker::INTERVAL, last_gc_entry, TimeStamp::Now(), get_native_thread_id(), -1 });
|
636
|
-
}
|
637
|
-
|
638
665
|
void record_interval(Marker::Type type, TimeStamp from, TimeStamp to, int stack_index = -1) {
|
639
666
|
const std::lock_guard<std::mutex> lock(mutex);
|
640
667
|
|
641
|
-
list.push_back({ type, Marker::INTERVAL, from, to,
|
668
|
+
list.push_back({ type, Marker::INTERVAL, from, to, stack_index });
|
642
669
|
}
|
643
670
|
|
644
671
|
void record(Marker::Type type, int stack_index = -1) {
|
645
672
|
const std::lock_guard<std::mutex> lock(mutex);
|
646
673
|
|
647
|
-
list.push_back({ type, Marker::INSTANT, TimeStamp::Now(), TimeStamp(),
|
674
|
+
list.push_back({ type, Marker::INSTANT, TimeStamp::Now(), TimeStamp(), stack_index });
|
675
|
+
}
|
676
|
+
};
|
677
|
+
|
678
|
+
class GCMarkerTable: public MarkerTable {
|
679
|
+
TimeStamp last_gc_entry;
|
680
|
+
|
681
|
+
public:
|
682
|
+
void record_gc_entered() {
|
683
|
+
last_gc_entry = TimeStamp::Now();
|
684
|
+
}
|
685
|
+
|
686
|
+
void record_gc_leave() {
|
687
|
+
list.push_back({ Marker::MARKER_GC_PAUSE, Marker::INTERVAL, last_gc_entry, TimeStamp::Now(), -1 });
|
648
688
|
}
|
649
689
|
};
|
650
690
|
|
@@ -727,6 +767,8 @@ class Thread {
|
|
727
767
|
STOPPED
|
728
768
|
};
|
729
769
|
|
770
|
+
VALUE ruby_thread;
|
771
|
+
VALUE ruby_thread_id;
|
730
772
|
pthread_t pthread_id;
|
731
773
|
native_thread_id_t native_tid;
|
732
774
|
State state;
|
@@ -738,18 +780,33 @@ class Thread {
|
|
738
780
|
int stack_on_suspend_idx;
|
739
781
|
SampleTranslator translator;
|
740
782
|
|
741
|
-
|
783
|
+
MarkerTable *markers;
|
784
|
+
|
785
|
+
std::string name;
|
742
786
|
|
743
|
-
|
744
|
-
|
787
|
+
// FIXME: don't use pthread at start
|
788
|
+
Thread(State state, pthread_t pthread_id, VALUE ruby_thread) : pthread_id(pthread_id), ruby_thread(ruby_thread), state(state), stack_on_suspend_idx(-1) {
|
789
|
+
name = Qnil;
|
790
|
+
ruby_thread_id = rb_obj_id(ruby_thread);
|
791
|
+
//ruby_thread_id = ULL2NUM(ruby_thread);
|
745
792
|
native_tid = get_native_thread_id();
|
746
793
|
started_at = state_changed_at = TimeStamp::Now();
|
794
|
+
name = "";
|
795
|
+
markers = new MarkerTable();
|
796
|
+
|
797
|
+
if (state == State::STARTED) {
|
798
|
+
markers->record(Marker::Type::MARKER_GVL_THREAD_STARTED);
|
799
|
+
}
|
747
800
|
}
|
748
801
|
|
749
|
-
void set_state(State new_state
|
802
|
+
void set_state(State new_state) {
|
750
803
|
if (state == Thread::State::STOPPED) {
|
751
804
|
return;
|
752
805
|
}
|
806
|
+
if (new_state == Thread::State::SUSPENDED && state == new_state) {
|
807
|
+
// on Ruby 3.2 (only?) we may see duplicate suspended states
|
808
|
+
return;
|
809
|
+
}
|
753
810
|
|
754
811
|
TimeStamp from = state_changed_at;
|
755
812
|
auto now = TimeStamp::Now();
|
@@ -760,10 +817,13 @@ class Thread {
|
|
760
817
|
|
761
818
|
switch (new_state) {
|
762
819
|
case State::STARTED:
|
763
|
-
|
820
|
+
markers->record(Marker::Type::MARKER_GVL_THREAD_STARTED);
|
821
|
+
return; // no mutation of current state
|
764
822
|
break;
|
765
823
|
case State::RUNNING:
|
766
|
-
assert(state == State::READY);
|
824
|
+
assert(state == State::READY || state == State::RUNNING);
|
825
|
+
pthread_id = pthread_self();
|
826
|
+
native_tid = get_native_thread_id();
|
767
827
|
|
768
828
|
// If the GVL is immediately ready, and we measure no times
|
769
829
|
// stalled, skip emitting the interval.
|
@@ -779,23 +839,25 @@ class Thread {
|
|
779
839
|
// Threads can be preempted, which means they will have been in "Running"
|
780
840
|
// state, and then the VM was like "no I need to stop you from working,
|
781
841
|
// so I'll put you in the 'ready' (or stalled) state"
|
782
|
-
assert(state == State::SUSPENDED || state == State::RUNNING);
|
842
|
+
assert(state == State::STARTED || state == State::SUSPENDED || state == State::RUNNING);
|
783
843
|
if (state == State::SUSPENDED) {
|
784
844
|
markers->record_interval(Marker::Type::MARKER_THREAD_SUSPENDED, from, now, stack_on_suspend_idx);
|
785
845
|
}
|
786
|
-
else {
|
846
|
+
else if (state == State::RUNNING) {
|
787
847
|
markers->record_interval(Marker::Type::MARKER_THREAD_RUNNING, from, now);
|
788
848
|
}
|
789
849
|
break;
|
790
850
|
case State::SUSPENDED:
|
791
851
|
// We can go from RUNNING or STARTED to SUSPENDED
|
792
|
-
assert(state == State::RUNNING || state == State::STARTED);
|
852
|
+
assert(state == State::RUNNING || state == State::STARTED || state == State::SUSPENDED);
|
793
853
|
markers->record_interval(Marker::Type::MARKER_THREAD_RUNNING, from, now);
|
794
854
|
break;
|
795
855
|
case State::STOPPED:
|
796
856
|
// We can go from RUNNING or STARTED to STOPPED
|
797
857
|
assert(state == State::RUNNING || state == State::STARTED);
|
798
858
|
markers->record_interval(Marker::Type::MARKER_THREAD_RUNNING, from, now);
|
859
|
+
markers->record(Marker::Type::MARKER_GVL_THREAD_EXITED);
|
860
|
+
|
799
861
|
stopped_at = now;
|
800
862
|
capture_name();
|
801
863
|
|
@@ -811,10 +873,13 @@ class Thread {
|
|
811
873
|
}
|
812
874
|
|
813
875
|
void capture_name() {
|
814
|
-
char buf[128];
|
815
|
-
int rc = pthread_getname_np(pthread_id, buf, sizeof(buf));
|
816
|
-
if (rc == 0)
|
817
|
-
|
876
|
+
//char buf[128];
|
877
|
+
//int rc = pthread_getname_np(pthread_id, buf, sizeof(buf));
|
878
|
+
//if (rc == 0)
|
879
|
+
// name = std::string(buf);
|
880
|
+
}
|
881
|
+
|
882
|
+
void mark() {
|
818
883
|
}
|
819
884
|
};
|
820
885
|
|
@@ -828,40 +893,46 @@ class ThreadTable {
|
|
828
893
|
ThreadTable(FrameList &frame_list) : frame_list(frame_list) {
|
829
894
|
}
|
830
895
|
|
831
|
-
void
|
832
|
-
|
896
|
+
void mark() {
|
897
|
+
for (auto &thread : list) {
|
898
|
+
thread.mark();
|
899
|
+
}
|
900
|
+
}
|
833
901
|
|
902
|
+
void started(VALUE th) {
|
834
903
|
//list.push_back(Thread{pthread_self(), Thread::State::SUSPENDED});
|
835
|
-
|
836
|
-
set_state(Thread::State::STARTED, markers);
|
904
|
+
set_state(Thread::State::STARTED, th);
|
837
905
|
}
|
838
906
|
|
839
|
-
void ready(
|
840
|
-
set_state(Thread::State::READY,
|
907
|
+
void ready(VALUE th) {
|
908
|
+
set_state(Thread::State::READY, th);
|
841
909
|
}
|
842
910
|
|
843
|
-
void resumed(
|
844
|
-
set_state(Thread::State::RUNNING,
|
911
|
+
void resumed(VALUE th) {
|
912
|
+
set_state(Thread::State::RUNNING, th);
|
845
913
|
}
|
846
914
|
|
847
|
-
void suspended(
|
848
|
-
set_state(Thread::State::SUSPENDED,
|
915
|
+
void suspended(VALUE th) {
|
916
|
+
set_state(Thread::State::SUSPENDED, th);
|
849
917
|
}
|
850
918
|
|
851
|
-
void stopped(
|
852
|
-
|
853
|
-
set_state(Thread::State::STOPPED, markers);
|
919
|
+
void stopped(VALUE th) {
|
920
|
+
set_state(Thread::State::STOPPED, th);
|
854
921
|
}
|
855
922
|
|
856
923
|
private:
|
857
|
-
void set_state(Thread::State new_state,
|
924
|
+
void set_state(Thread::State new_state, VALUE th) {
|
858
925
|
const std::lock_guard<std::mutex> lock(mutex);
|
859
926
|
|
860
|
-
pthread_t current_thread = pthread_self();
|
861
927
|
//cerr << "set state=" << new_state << " thread=" << gettid() << endl;
|
862
928
|
|
929
|
+
pid_t native_tid = get_native_thread_id();
|
930
|
+
pthread_t pthread_id = pthread_self();
|
931
|
+
|
932
|
+
//fprintf(stderr, "th %p (tid: %i) from %s to %s\n", (void *)th, native_tid, gvl_event_name(state), gvl_event_name(new_state));
|
933
|
+
|
863
934
|
for (auto &thread : list) {
|
864
|
-
if (
|
935
|
+
if (thread_equal(th, thread.ruby_thread)) {
|
865
936
|
if (new_state == Thread::State::SUSPENDED) {
|
866
937
|
|
867
938
|
RawSample sample;
|
@@ -871,14 +942,27 @@ class ThreadTable {
|
|
871
942
|
//cerr << gettid() << " suspended! Stack size:" << thread.stack_on_suspend.size() << endl;
|
872
943
|
}
|
873
944
|
|
874
|
-
thread.set_state(new_state
|
945
|
+
thread.set_state(new_state);
|
946
|
+
|
947
|
+
if (thread.state == Thread::State::RUNNING) {
|
948
|
+
thread.pthread_id = pthread_self();
|
949
|
+
thread.native_tid = get_native_thread_id();
|
950
|
+
} else {
|
951
|
+
thread.pthread_id = 0;
|
952
|
+
thread.native_tid = 0;
|
953
|
+
}
|
954
|
+
|
875
955
|
|
876
956
|
return;
|
877
957
|
}
|
878
958
|
}
|
879
959
|
|
880
|
-
|
881
|
-
list.emplace_back(new_state);
|
960
|
+
//fprintf(stderr, "NEW THREAD: th: %p, state: %i\n", th, new_state);
|
961
|
+
list.emplace_back(new_state, pthread_self(), th);
|
962
|
+
}
|
963
|
+
|
964
|
+
bool thread_equal(VALUE a, VALUE b) {
|
965
|
+
return a == b;
|
882
966
|
}
|
883
967
|
};
|
884
968
|
|
@@ -1091,6 +1175,9 @@ class RetainedCollector : public BaseCollector {
|
|
1091
1175
|
VALUE weights = rb_ary_new();
|
1092
1176
|
rb_hash_aset(thread_hash, sym("weights"), weights);
|
1093
1177
|
|
1178
|
+
rb_hash_aset(thread_hash, sym("name"), rb_str_new_cstr("retained memory"));
|
1179
|
+
rb_hash_aset(thread_hash, sym("started_at"), ULL2NUM(collector->started_at.nanoseconds()));
|
1180
|
+
|
1094
1181
|
for (auto& obj: collector->object_list) {
|
1095
1182
|
const auto search = collector->object_frames.find(obj);
|
1096
1183
|
if (search != collector->object_frames.end()) {
|
@@ -1144,6 +1231,8 @@ class GlobalSignalHandler {
|
|
1144
1231
|
void record_sample(LiveSample &sample, pthread_t pthread_id) {
|
1145
1232
|
const std::lock_guard<std::mutex> lock(mutex);
|
1146
1233
|
|
1234
|
+
assert(pthread_id);
|
1235
|
+
|
1147
1236
|
live_sample = &sample;
|
1148
1237
|
if (pthread_kill(pthread_id, SIGPROF)) {
|
1149
1238
|
rb_bug("pthread_kill failed");
|
@@ -1180,7 +1269,7 @@ class GlobalSignalHandler {
|
|
1180
1269
|
LiveSample *GlobalSignalHandler::live_sample;
|
1181
1270
|
|
1182
1271
|
class TimeCollector : public BaseCollector {
|
1183
|
-
|
1272
|
+
GCMarkerTable gc_markers;
|
1184
1273
|
ThreadTable threads;
|
1185
1274
|
|
1186
1275
|
pthread_t sample_thread;
|
@@ -1209,10 +1298,22 @@ class TimeCollector : public BaseCollector {
|
|
1209
1298
|
}
|
1210
1299
|
|
1211
1300
|
VALUE get_markers() {
|
1212
|
-
VALUE list =
|
1301
|
+
VALUE list = rb_ary_new();
|
1302
|
+
VALUE main_thread = rb_thread_main();
|
1303
|
+
VALUE main_thread_id = rb_obj_id(main_thread);
|
1304
|
+
|
1305
|
+
for (auto& marker: this->gc_markers.list) {
|
1306
|
+
VALUE ary = marker.to_array();
|
1213
1307
|
|
1214
|
-
|
1215
|
-
rb_ary_push(list,
|
1308
|
+
RARRAY_ASET(ary, 0, main_thread_id);
|
1309
|
+
rb_ary_push(list, ary);
|
1310
|
+
}
|
1311
|
+
for (auto &thread : threads.list) {
|
1312
|
+
for (auto& marker: thread.markers->list) {
|
1313
|
+
VALUE ary = marker.to_array();
|
1314
|
+
RARRAY_ASET(ary, 0, thread.ruby_thread_id);
|
1315
|
+
rb_ary_push(list, ary);
|
1316
|
+
}
|
1216
1317
|
}
|
1217
1318
|
|
1218
1319
|
return list;
|
@@ -1228,7 +1329,9 @@ class TimeCollector : public BaseCollector {
|
|
1228
1329
|
threads.mutex.lock();
|
1229
1330
|
for (auto &thread : threads.list) {
|
1230
1331
|
//if (thread.state == Thread::State::RUNNING) {
|
1231
|
-
if (thread.state == Thread::State::RUNNING || (thread.state == Thread::State::SUSPENDED && thread.stack_on_suspend_idx < 0)) {
|
1332
|
+
//if (thread.state == Thread::State::RUNNING || (thread.state == Thread::State::SUSPENDED && thread.stack_on_suspend_idx < 0)) {
|
1333
|
+
if (thread.state == Thread::State::RUNNING) {
|
1334
|
+
//fprintf(stderr, "sampling %p on tid:%i\n", thread.ruby_thread, thread.native_tid);
|
1232
1335
|
GlobalSignalHandler::get_instance()->record_sample(sample, thread.pthread_id);
|
1233
1336
|
|
1234
1337
|
if (sample.sample.gc) {
|
@@ -1252,19 +1355,25 @@ class TimeCollector : public BaseCollector {
|
|
1252
1355
|
|
1253
1356
|
next_sample_schedule += interval;
|
1254
1357
|
|
1358
|
+
// If sampling falls behind, restart, and check in another interval
|
1255
1359
|
if (next_sample_schedule < sample_complete) {
|
1256
|
-
//fprintf(stderr, "fell behind by %ius\n", (sample_complete - next_sample_schedule).microseconds());
|
1257
1360
|
next_sample_schedule = sample_complete + interval;
|
1258
1361
|
}
|
1259
1362
|
|
1260
|
-
TimeStamp
|
1261
|
-
TimeStamp::Sleep(sleep_time);
|
1363
|
+
TimeStamp::SleepUntil(next_sample_schedule);
|
1262
1364
|
}
|
1263
1365
|
|
1264
1366
|
thread_stopped.post();
|
1265
1367
|
}
|
1266
1368
|
|
1267
1369
|
static void *sample_thread_entry(void *arg) {
|
1370
|
+
#if HAVE_PTHREAD_SETNAME_NP
|
1371
|
+
#ifdef __APPLE__
|
1372
|
+
pthread_setname_np("Vernier profiler");
|
1373
|
+
#else
|
1374
|
+
pthread_setname_np(pthread_self(), "Vernier profiler");
|
1375
|
+
#endif
|
1376
|
+
#endif
|
1268
1377
|
TimeCollector *collector = static_cast<TimeCollector *>(arg);
|
1269
1378
|
collector->sample_thread_run();
|
1270
1379
|
return NULL;
|
@@ -1275,10 +1384,10 @@ class TimeCollector : public BaseCollector {
|
|
1275
1384
|
|
1276
1385
|
switch (event) {
|
1277
1386
|
case RUBY_EVENT_THREAD_BEGIN:
|
1278
|
-
collector->threads.started(
|
1387
|
+
collector->threads.started(self);
|
1279
1388
|
break;
|
1280
1389
|
case RUBY_EVENT_THREAD_END:
|
1281
|
-
collector->threads.stopped(
|
1390
|
+
collector->threads.stopped(self);
|
1282
1391
|
break;
|
1283
1392
|
}
|
1284
1393
|
}
|
@@ -1288,36 +1397,57 @@ class TimeCollector : public BaseCollector {
|
|
1288
1397
|
|
1289
1398
|
switch (event) {
|
1290
1399
|
case RUBY_INTERNAL_EVENT_GC_START:
|
1291
|
-
collector->
|
1400
|
+
collector->gc_markers.record(Marker::Type::MARKER_GC_START);
|
1292
1401
|
break;
|
1293
1402
|
case RUBY_INTERNAL_EVENT_GC_END_MARK:
|
1294
|
-
collector->
|
1403
|
+
collector->gc_markers.record(Marker::Type::MARKER_GC_END_MARK);
|
1295
1404
|
break;
|
1296
1405
|
case RUBY_INTERNAL_EVENT_GC_END_SWEEP:
|
1297
|
-
collector->
|
1406
|
+
collector->gc_markers.record(Marker::Type::MARKER_GC_END_SWEEP);
|
1298
1407
|
break;
|
1299
1408
|
case RUBY_INTERNAL_EVENT_GC_ENTER:
|
1300
|
-
collector->
|
1409
|
+
collector->gc_markers.record_gc_entered();
|
1301
1410
|
break;
|
1302
1411
|
case RUBY_INTERNAL_EVENT_GC_EXIT:
|
1303
|
-
collector->
|
1412
|
+
collector->gc_markers.record_gc_leave();
|
1304
1413
|
break;
|
1305
1414
|
}
|
1306
1415
|
}
|
1307
1416
|
|
1308
1417
|
static void internal_thread_event_cb(rb_event_flag_t event, const rb_internal_thread_event_data_t *event_data, void *data) {
|
1309
1418
|
TimeCollector *collector = static_cast<TimeCollector *>(data);
|
1419
|
+
VALUE thread = Qnil;
|
1420
|
+
|
1421
|
+
#if HAVE_RB_INTERNAL_THREAD_EVENT_DATA_T_THREAD
|
1422
|
+
thread = event_data->thread;
|
1423
|
+
#else
|
1424
|
+
// We may arrive here when starting a thread with
|
1425
|
+
// RUBY_INTERNAL_THREAD_EVENT_READY before the thread is actually set up.
|
1426
|
+
if (!ruby_native_thread_p()) return;
|
1427
|
+
|
1428
|
+
thread = rb_thread_current();
|
1429
|
+
#endif
|
1430
|
+
|
1431
|
+
auto native_tid = get_native_thread_id();
|
1310
1432
|
//cerr << "internal thread event" << event << " at " << TimeStamp::Now() << endl;
|
1433
|
+
//fprintf(stderr, "(%i) th %p to %s\n", native_tid, (void *)thread, gvl_event_name(event));
|
1434
|
+
|
1311
1435
|
|
1312
1436
|
switch (event) {
|
1437
|
+
case RUBY_INTERNAL_THREAD_EVENT_STARTED:
|
1438
|
+
collector->threads.started(thread);
|
1439
|
+
break;
|
1440
|
+
case RUBY_INTERNAL_THREAD_EVENT_EXITED:
|
1441
|
+
collector->threads.stopped(thread);
|
1442
|
+
break;
|
1313
1443
|
case RUBY_INTERNAL_THREAD_EVENT_READY:
|
1314
|
-
collector->threads.ready(
|
1444
|
+
collector->threads.ready(thread);
|
1315
1445
|
break;
|
1316
1446
|
case RUBY_INTERNAL_THREAD_EVENT_RESUMED:
|
1317
|
-
collector->threads.resumed(
|
1447
|
+
collector->threads.resumed(thread);
|
1318
1448
|
break;
|
1319
1449
|
case RUBY_INTERNAL_THREAD_EVENT_SUSPENDED:
|
1320
|
-
collector->threads.suspended(
|
1450
|
+
collector->threads.suspended(thread);
|
1321
1451
|
break;
|
1322
1452
|
|
1323
1453
|
}
|
@@ -1345,7 +1475,7 @@ class TimeCollector : public BaseCollector {
|
|
1345
1475
|
// have at least one thread in our thread list because it's possible
|
1346
1476
|
// that the profile might be such that we don't get any thread switch
|
1347
1477
|
// events and we need at least one
|
1348
|
-
this->threads.resumed(
|
1478
|
+
this->threads.resumed(rb_thread_current());
|
1349
1479
|
|
1350
1480
|
thread_hook = rb_internal_thread_add_event_hook(internal_thread_event_cb, RUBY_INTERNAL_THREAD_EVENT_MASK, this);
|
1351
1481
|
rb_add_event_hook(internal_gc_event_cb, RUBY_INTERNAL_EVENTS, PTR2NUM((void *)this));
|
@@ -1392,7 +1522,7 @@ class TimeCollector : public BaseCollector {
|
|
1392
1522
|
VALUE hash = rb_hash_new();
|
1393
1523
|
thread.samples.write_result(hash);
|
1394
1524
|
|
1395
|
-
rb_hash_aset(threads,
|
1525
|
+
rb_hash_aset(threads, thread.ruby_thread_id, hash);
|
1396
1526
|
rb_hash_aset(hash, sym("tid"), ULL2NUM(thread.native_tid));
|
1397
1527
|
rb_hash_aset(hash, sym("started_at"), ULL2NUM(thread.started_at.nanoseconds()));
|
1398
1528
|
if (!thread.stopped_at.zero()) {
|
@@ -1409,6 +1539,7 @@ class TimeCollector : public BaseCollector {
|
|
1409
1539
|
|
1410
1540
|
void mark() {
|
1411
1541
|
frame_list.mark_frames();
|
1542
|
+
threads.mark();
|
1412
1543
|
|
1413
1544
|
//for (int i = 0; i < queued_length; i++) {
|
1414
1545
|
// rb_gc_mark(queued_frames[i]);
|
data/lib/vernier/collector.rb
CHANGED
@@ -19,7 +19,7 @@ module Vernier
|
|
19
19
|
Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond)
|
20
20
|
end
|
21
21
|
|
22
|
-
def add_marker(name:, start:, finish:, thread: Thread.current.
|
22
|
+
def add_marker(name:, start:, finish:, thread: Thread.current.object_id, phase: Marker::Phase::INTERVAL, data: nil)
|
23
23
|
@markers << [thread,
|
24
24
|
name,
|
25
25
|
start,
|
@@ -39,7 +39,7 @@ module Vernier
|
|
39
39
|
start:,
|
40
40
|
finish: current_time,
|
41
41
|
phase: Marker::Phase::INTERVAL,
|
42
|
-
thread: Thread.current.
|
42
|
+
thread: Thread.current.object_id,
|
43
43
|
data: { :type => 'UserTiming', :entryType => 'measure', :name => name }
|
44
44
|
)
|
45
45
|
end
|
@@ -99,9 +99,10 @@ module Vernier
|
|
99
99
|
def data
|
100
100
|
markers_by_thread = profile.markers.group_by { |marker| marker[0] }
|
101
101
|
|
102
|
-
thread_data = profile.threads.map do |
|
103
|
-
markers = markers_by_thread[
|
102
|
+
thread_data = profile.threads.map do |ruby_thread_id, thread_info|
|
103
|
+
markers = markers_by_thread[ruby_thread_id] || []
|
104
104
|
Thread.new(
|
105
|
+
ruby_thread_id,
|
105
106
|
profile,
|
106
107
|
@categorizer,
|
107
108
|
markers: markers,
|
@@ -157,14 +158,16 @@ module Vernier
|
|
157
158
|
class Thread
|
158
159
|
attr_reader :profile
|
159
160
|
|
160
|
-
def initialize(profile, categorizer, name:, tid:, samples:, weights:, timestamps
|
161
|
+
def initialize(ruby_thread_id, profile, categorizer, name:, tid:, samples:, weights:, timestamps: nil, sample_categories: nil, markers:, started_at:, stopped_at: nil)
|
162
|
+
@ruby_thread_id = ruby_thread_id
|
161
163
|
@profile = profile
|
162
164
|
@categorizer = categorizer
|
163
165
|
@tid = tid
|
164
166
|
@name = name
|
165
167
|
|
168
|
+
timestamps ||= [0] * samples.size
|
166
169
|
@samples, @weights, @timestamps = samples, weights, timestamps
|
167
|
-
@sample_categories = sample_categories
|
170
|
+
@sample_categories = sample_categories || ([0] * samples.size)
|
168
171
|
@markers = markers
|
169
172
|
|
170
173
|
@started_at, @stopped_at = started_at, stopped_at
|
@@ -211,7 +214,7 @@ module Vernier
|
|
211
214
|
def data
|
212
215
|
{
|
213
216
|
name: @name,
|
214
|
-
isMainThread: @
|
217
|
+
isMainThread: @ruby_thread_id == ::Thread.main.object_id || (profile.threads.size == 1),
|
215
218
|
processStartupTime: 0, # FIXME
|
216
219
|
processShutdownTime: nil, # FIXME
|
217
220
|
registerTime: (@started_at - 0) / 1_000_000.0,
|
@@ -237,8 +240,6 @@ module Vernier
|
|
237
240
|
end
|
238
241
|
|
239
242
|
def markers_table
|
240
|
-
size = @markers.size
|
241
|
-
|
242
243
|
string_indexes = []
|
243
244
|
start_times = []
|
244
245
|
end_times = []
|
@@ -292,7 +293,6 @@ module Vernier
|
|
292
293
|
times = (0...size).to_a
|
293
294
|
end
|
294
295
|
|
295
|
-
raise unless samples.size == size
|
296
296
|
raise unless weights.size == size
|
297
297
|
raise unless times.size == size
|
298
298
|
|
data/lib/vernier/version.rb
CHANGED
data/lib/vernier.rb
CHANGED
@@ -19,11 +19,11 @@ module Vernier
|
|
19
19
|
yield collector
|
20
20
|
ensure
|
21
21
|
result = collector.stop
|
22
|
+
if out
|
23
|
+
File.write(out, Output::Firefox.new(result).output)
|
24
|
+
end
|
22
25
|
end
|
23
26
|
|
24
|
-
if out
|
25
|
-
File.write(out, Output::Firefox.new(result).output)
|
26
|
-
end
|
27
27
|
result
|
28
28
|
end
|
29
29
|
|
data/vernier.gemspec
CHANGED
@@ -12,7 +12,7 @@ Gem::Specification.new do |spec|
|
|
12
12
|
spec.description = spec.summary
|
13
13
|
spec.homepage = "https://github.com/jhawthorn/vernier"
|
14
14
|
spec.license = "MIT"
|
15
|
-
spec.required_ruby_version = ">= 3.2.
|
15
|
+
spec.required_ruby_version = ">= 3.2.1"
|
16
16
|
|
17
17
|
spec.metadata["homepage_uri"] = spec.homepage
|
18
18
|
spec.metadata["source_code_uri"] = spec.homepage
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: vernier
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Hawthorn
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-01-15 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: An experimental profiler
|
14
14
|
email:
|
@@ -60,7 +60,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
60
60
|
requirements:
|
61
61
|
- - ">="
|
62
62
|
- !ruby/object:Gem::Version
|
63
|
-
version: 3.2.
|
63
|
+
version: 3.2.1
|
64
64
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - ">="
|