stackprof 0.2.15 → 0.2.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +43 -0
- data/.gitignore +1 -0
- data/CHANGELOG.md +12 -1
- data/README.md +57 -51
- data/Rakefile +21 -25
- data/bin/stackprof +1 -1
- data/ext/stackprof/extconf.rb +6 -0
- data/ext/stackprof/stackprof.c +210 -71
- data/lib/stackprof/report.rb +65 -26
- data/lib/stackprof/truffleruby.rb +37 -0
- data/lib/stackprof.rb +10 -2
- data/stackprof.gemspec +8 -1
- data/test/fixtures/profile.dump +1 -0
- data/test/fixtures/profile.json +1 -0
- data/test/test_report.rb +24 -0
- data/test/test_stackprof.rb +75 -12
- data/test/test_truffleruby.rb +18 -0
- data/vendor/FlameGraph/flamegraph.pl +751 -85
- metadata +16 -10
- data/.travis.yml +0 -21
- data/Dockerfile +0 -21
- data/Gemfile.lock +0 -27
data/ext/stackprof/stackprof.c
CHANGED
@@ -7,15 +7,19 @@
|
|
7
7
|
**********************************************************************/
|
8
8
|
|
9
9
|
#include <ruby/ruby.h>
|
10
|
+
#include <ruby/version.h>
|
10
11
|
#include <ruby/debug.h>
|
11
12
|
#include <ruby/st.h>
|
12
13
|
#include <ruby/io.h>
|
13
14
|
#include <ruby/intern.h>
|
14
15
|
#include <signal.h>
|
15
16
|
#include <sys/time.h>
|
17
|
+
#include <time.h>
|
16
18
|
#include <pthread.h>
|
17
19
|
|
18
20
|
#define BUF_SIZE 2048
|
21
|
+
#define MICROSECONDS_IN_SECOND 1000000
|
22
|
+
#define NANOSECONDS_IN_SECOND 1000000000
|
19
23
|
|
20
24
|
#define FAKE_FRAME_GC INT2FIX(0)
|
21
25
|
#define FAKE_FRAME_MARK INT2FIX(1)
|
@@ -27,8 +31,51 @@ static const char *fake_frame_cstrs[] = {
|
|
27
31
|
"(sweeping)",
|
28
32
|
};
|
29
33
|
|
34
|
+
static int stackprof_use_postponed_job = 1;
|
35
|
+
|
30
36
|
#define TOTAL_FAKE_FRAMES (sizeof(fake_frame_cstrs) / sizeof(char *))
|
31
37
|
|
38
|
+
#ifdef _POSIX_MONOTONIC_CLOCK
|
39
|
+
#define timestamp_t timespec
|
40
|
+
typedef struct timestamp_t timestamp_t;
|
41
|
+
|
42
|
+
static void capture_timestamp(timestamp_t *ts) {
|
43
|
+
clock_gettime(CLOCK_MONOTONIC, ts);
|
44
|
+
}
|
45
|
+
|
46
|
+
static int64_t delta_usec(timestamp_t *start, timestamp_t *end) {
|
47
|
+
int64_t result = MICROSECONDS_IN_SECOND * (end->tv_sec - start->tv_sec);
|
48
|
+
if (end->tv_nsec < start->tv_nsec) {
|
49
|
+
result -= MICROSECONDS_IN_SECOND;
|
50
|
+
result += (NANOSECONDS_IN_SECOND + end->tv_nsec - start->tv_nsec) / 1000;
|
51
|
+
} else {
|
52
|
+
result += (end->tv_nsec - start->tv_nsec) / 1000;
|
53
|
+
}
|
54
|
+
return result;
|
55
|
+
}
|
56
|
+
|
57
|
+
static uint64_t timestamp_usec(timestamp_t *ts) {
|
58
|
+
return (MICROSECONDS_IN_SECOND * ts->tv_sec) + (ts->tv_nsec / 1000);
|
59
|
+
}
|
60
|
+
#else
|
61
|
+
#define timestamp_t timeval
|
62
|
+
typedef struct timestamp_t timestamp_t;
|
63
|
+
|
64
|
+
static void capture_timestamp(timestamp_t *ts) {
|
65
|
+
gettimeofday(ts, NULL);
|
66
|
+
}
|
67
|
+
|
68
|
+
static int64_t delta_usec(timestamp_t *start, timestamp_t *end) {
|
69
|
+
struct timeval diff;
|
70
|
+
timersub(end, start, &diff);
|
71
|
+
return (MICROSECONDS_IN_SECOND * diff.tv_sec) + diff.tv_usec;
|
72
|
+
}
|
73
|
+
|
74
|
+
static uint64_t timestamp_usec(timestamp_t *ts) {
|
75
|
+
return (MICROSECONDS_IN_SECOND * ts.tv_sec) + diff.tv_usec
|
76
|
+
}
|
77
|
+
#endif
|
78
|
+
|
32
79
|
typedef struct {
|
33
80
|
size_t total_samples;
|
34
81
|
size_t caller_samples;
|
@@ -37,6 +84,11 @@ typedef struct {
|
|
37
84
|
st_table *lines;
|
38
85
|
} frame_data_t;
|
39
86
|
|
87
|
+
typedef struct {
|
88
|
+
uint64_t timestamp_usec;
|
89
|
+
int64_t delta_usec;
|
90
|
+
} sample_time_t;
|
91
|
+
|
40
92
|
static struct {
|
41
93
|
int running;
|
42
94
|
int raw;
|
@@ -46,16 +98,17 @@ static struct {
|
|
46
98
|
VALUE interval;
|
47
99
|
VALUE out;
|
48
100
|
VALUE metadata;
|
101
|
+
int ignore_gc;
|
49
102
|
|
50
103
|
VALUE *raw_samples;
|
51
104
|
size_t raw_samples_len;
|
52
105
|
size_t raw_samples_capa;
|
53
106
|
size_t raw_sample_index;
|
54
107
|
|
55
|
-
struct
|
56
|
-
|
57
|
-
size_t
|
58
|
-
size_t
|
108
|
+
struct timestamp_t last_sample_at;
|
109
|
+
sample_time_t *raw_sample_times;
|
110
|
+
size_t raw_sample_times_len;
|
111
|
+
size_t raw_sample_times_capa;
|
59
112
|
|
60
113
|
size_t overall_signals;
|
61
114
|
size_t overall_samples;
|
@@ -67,14 +120,17 @@ static struct {
|
|
67
120
|
|
68
121
|
VALUE fake_frame_names[TOTAL_FAKE_FRAMES];
|
69
122
|
VALUE empty_string;
|
123
|
+
|
124
|
+
int buffer_count;
|
125
|
+
sample_time_t buffer_time;
|
70
126
|
VALUE frames_buffer[BUF_SIZE];
|
71
127
|
int lines_buffer[BUF_SIZE];
|
72
128
|
} _stackprof;
|
73
129
|
|
74
130
|
static VALUE sym_object, sym_wall, sym_cpu, sym_custom, sym_name, sym_file, sym_line;
|
75
131
|
static VALUE sym_samples, sym_total_samples, sym_missed_samples, sym_edges, sym_lines;
|
76
|
-
static VALUE sym_version, sym_mode, sym_interval, sym_raw, sym_metadata, sym_frames,
|
77
|
-
static VALUE sym_state, sym_marking, sym_sweeping;
|
132
|
+
static VALUE sym_version, sym_mode, sym_interval, sym_raw, sym_metadata, sym_frames, sym_ignore_gc, sym_out;
|
133
|
+
static VALUE sym_aggregate, sym_raw_sample_timestamps, sym_raw_timestamp_deltas, sym_state, sym_marking, sym_sweeping;
|
78
134
|
static VALUE sym_gc_samples, objtracer;
|
79
135
|
static VALUE gc_hook;
|
80
136
|
static VALUE rb_mStackProf;
|
@@ -88,7 +144,9 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
88
144
|
struct sigaction sa;
|
89
145
|
struct itimerval timer;
|
90
146
|
VALUE opts = Qnil, mode = Qnil, interval = Qnil, metadata = rb_hash_new(), out = Qfalse;
|
147
|
+
int ignore_gc = 0;
|
91
148
|
int raw = 0, aggregate = 1;
|
149
|
+
VALUE metadata_val;
|
92
150
|
|
93
151
|
if (_stackprof.running)
|
94
152
|
return Qfalse;
|
@@ -99,8 +157,11 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
99
157
|
mode = rb_hash_aref(opts, sym_mode);
|
100
158
|
interval = rb_hash_aref(opts, sym_interval);
|
101
159
|
out = rb_hash_aref(opts, sym_out);
|
160
|
+
if (RTEST(rb_hash_aref(opts, sym_ignore_gc))) {
|
161
|
+
ignore_gc = 1;
|
162
|
+
}
|
102
163
|
|
103
|
-
|
164
|
+
metadata_val = rb_hash_aref(opts, sym_metadata);
|
104
165
|
if (RTEST(metadata_val)) {
|
105
166
|
if (!RB_TYPE_P(metadata_val, T_HASH))
|
106
167
|
rb_raise(rb_eArgError, "metadata should be a hash");
|
@@ -115,6 +176,10 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
115
176
|
}
|
116
177
|
if (!RTEST(mode)) mode = sym_wall;
|
117
178
|
|
179
|
+
if (!NIL_P(interval) && (NUM2INT(interval) < 1 || NUM2INT(interval) >= MICROSECONDS_IN_SECOND)) {
|
180
|
+
rb_raise(rb_eArgError, "interval is a number of microseconds between 1 and 1 million");
|
181
|
+
}
|
182
|
+
|
118
183
|
if (!_stackprof.frames) {
|
119
184
|
_stackprof.frames = st_init_numtable();
|
120
185
|
_stackprof.overall_signals = 0;
|
@@ -151,11 +216,12 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
151
216
|
_stackprof.aggregate = aggregate;
|
152
217
|
_stackprof.mode = mode;
|
153
218
|
_stackprof.interval = interval;
|
219
|
+
_stackprof.ignore_gc = ignore_gc;
|
154
220
|
_stackprof.metadata = metadata;
|
155
221
|
_stackprof.out = out;
|
156
222
|
|
157
223
|
if (raw) {
|
158
|
-
|
224
|
+
capture_timestamp(&_stackprof.last_sample_at);
|
159
225
|
}
|
160
226
|
|
161
227
|
return Qtrue;
|
@@ -190,13 +256,19 @@ stackprof_stop(VALUE self)
|
|
190
256
|
return Qtrue;
|
191
257
|
}
|
192
258
|
|
259
|
+
#if SIZEOF_VOIDP == SIZEOF_LONG
|
260
|
+
# define PTR2NUM(x) (LONG2NUM((long)(x)))
|
261
|
+
#else
|
262
|
+
# define PTR2NUM(x) (LL2NUM((LONG_LONG)(x)))
|
263
|
+
#endif
|
264
|
+
|
193
265
|
static int
|
194
266
|
frame_edges_i(st_data_t key, st_data_t val, st_data_t arg)
|
195
267
|
{
|
196
268
|
VALUE edges = (VALUE)arg;
|
197
269
|
|
198
270
|
intptr_t weight = (intptr_t)val;
|
199
|
-
rb_hash_aset(edges,
|
271
|
+
rb_hash_aset(edges, PTR2NUM(key), INT2FIX(weight));
|
200
272
|
return ST_CONTINUE;
|
201
273
|
}
|
202
274
|
|
@@ -223,7 +295,7 @@ frame_i(st_data_t key, st_data_t val, st_data_t arg)
|
|
223
295
|
VALUE name, file, edges, lines;
|
224
296
|
VALUE line;
|
225
297
|
|
226
|
-
rb_hash_aset(results,
|
298
|
+
rb_hash_aset(results, PTR2NUM(frame), details);
|
227
299
|
|
228
300
|
if (FIXNUM_P(frame)) {
|
229
301
|
name = _stackprof.fake_frame_names[FIX2INT(frame)];
|
@@ -295,7 +367,7 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
295
367
|
|
296
368
|
if (_stackprof.raw && _stackprof.raw_samples_len) {
|
297
369
|
size_t len, n, o;
|
298
|
-
VALUE raw_timestamp_deltas;
|
370
|
+
VALUE raw_sample_timestamps, raw_timestamp_deltas;
|
299
371
|
VALUE raw_samples = rb_ary_new_capa(_stackprof.raw_samples_len);
|
300
372
|
|
301
373
|
for (n = 0; n < _stackprof.raw_samples_len; n++) {
|
@@ -303,7 +375,7 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
303
375
|
rb_ary_push(raw_samples, SIZET2NUM(len));
|
304
376
|
|
305
377
|
for (o = 0, n++; o < len; n++, o++)
|
306
|
-
rb_ary_push(raw_samples,
|
378
|
+
rb_ary_push(raw_samples, PTR2NUM(_stackprof.raw_samples[n]));
|
307
379
|
rb_ary_push(raw_samples, SIZET2NUM((size_t)_stackprof.raw_samples[n]));
|
308
380
|
}
|
309
381
|
|
@@ -315,17 +387,20 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
315
387
|
|
316
388
|
rb_hash_aset(results, sym_raw, raw_samples);
|
317
389
|
|
318
|
-
|
390
|
+
raw_sample_timestamps = rb_ary_new_capa(_stackprof.raw_sample_times_len);
|
391
|
+
raw_timestamp_deltas = rb_ary_new_capa(_stackprof.raw_sample_times_len);
|
319
392
|
|
320
|
-
for (n = 0; n < _stackprof.
|
321
|
-
rb_ary_push(
|
393
|
+
for (n = 0; n < _stackprof.raw_sample_times_len; n++) {
|
394
|
+
rb_ary_push(raw_sample_timestamps, ULL2NUM(_stackprof.raw_sample_times[n].timestamp_usec));
|
395
|
+
rb_ary_push(raw_timestamp_deltas, LL2NUM(_stackprof.raw_sample_times[n].delta_usec));
|
322
396
|
}
|
323
397
|
|
324
|
-
free(_stackprof.
|
325
|
-
_stackprof.
|
326
|
-
_stackprof.
|
327
|
-
_stackprof.
|
398
|
+
free(_stackprof.raw_sample_times);
|
399
|
+
_stackprof.raw_sample_times = NULL;
|
400
|
+
_stackprof.raw_sample_times_len = 0;
|
401
|
+
_stackprof.raw_sample_times_capa = 0;
|
328
402
|
|
403
|
+
rb_hash_aset(results, sym_raw_sample_timestamps, raw_sample_timestamps);
|
329
404
|
rb_hash_aset(results, sym_raw_timestamp_deltas, raw_timestamp_deltas);
|
330
405
|
|
331
406
|
_stackprof.raw = 0;
|
@@ -405,14 +480,14 @@ st_numtable_increment(st_table *table, st_data_t key, size_t increment)
|
|
405
480
|
}
|
406
481
|
|
407
482
|
void
|
408
|
-
stackprof_record_sample_for_stack(int num,
|
483
|
+
stackprof_record_sample_for_stack(int num, uint64_t sample_timestamp, int64_t timestamp_delta)
|
409
484
|
{
|
410
485
|
int i, n;
|
411
486
|
VALUE prev_frame = Qnil;
|
412
487
|
|
413
488
|
_stackprof.overall_samples++;
|
414
489
|
|
415
|
-
if (_stackprof.raw) {
|
490
|
+
if (_stackprof.raw && num > 0) {
|
416
491
|
int found = 0;
|
417
492
|
|
418
493
|
/* If there's no sample buffer allocated, then allocate one. The buffer
|
@@ -464,20 +539,23 @@ stackprof_record_sample_for_stack(int num, int timestamp_delta)
|
|
464
539
|
}
|
465
540
|
|
466
541
|
/* If there's no timestamp delta buffer, allocate one */
|
467
|
-
if (!_stackprof.
|
468
|
-
_stackprof.
|
469
|
-
_stackprof.
|
470
|
-
_stackprof.
|
542
|
+
if (!_stackprof.raw_sample_times) {
|
543
|
+
_stackprof.raw_sample_times_capa = 100;
|
544
|
+
_stackprof.raw_sample_times = malloc(sizeof(sample_time_t) * _stackprof.raw_sample_times_capa);
|
545
|
+
_stackprof.raw_sample_times_len = 0;
|
471
546
|
}
|
472
547
|
|
473
548
|
/* Double the buffer size if it's too small */
|
474
|
-
while (_stackprof.
|
475
|
-
_stackprof.
|
476
|
-
_stackprof.
|
549
|
+
while (_stackprof.raw_sample_times_capa <= _stackprof.raw_sample_times_len + 1) {
|
550
|
+
_stackprof.raw_sample_times_capa *= 2;
|
551
|
+
_stackprof.raw_sample_times = realloc(_stackprof.raw_sample_times, sizeof(sample_time_t) * _stackprof.raw_sample_times_capa);
|
477
552
|
}
|
478
553
|
|
479
|
-
/* Store the time delta (which is the amount of
|
480
|
-
_stackprof.
|
554
|
+
/* Store the time delta (which is the amount of microseconds between samples). */
|
555
|
+
_stackprof.raw_sample_times[_stackprof.raw_sample_times_len++] = (sample_time_t) {
|
556
|
+
.timestamp_usec = sample_timestamp,
|
557
|
+
.delta_usec = timestamp_delta,
|
558
|
+
};
|
481
559
|
}
|
482
560
|
|
483
561
|
for (i = 0; i < num; i++) {
|
@@ -510,48 +588,60 @@ stackprof_record_sample_for_stack(int num, int timestamp_delta)
|
|
510
588
|
}
|
511
589
|
|
512
590
|
if (_stackprof.raw) {
|
513
|
-
|
591
|
+
capture_timestamp(&_stackprof.last_sample_at);
|
514
592
|
}
|
515
593
|
}
|
516
594
|
|
595
|
+
// buffer the current profile frames
|
596
|
+
// This must be async-signal-safe
|
597
|
+
// Returns immediately if another set of frames are already in the buffer
|
517
598
|
void
|
518
|
-
|
599
|
+
stackprof_buffer_sample(void)
|
519
600
|
{
|
520
|
-
|
601
|
+
uint64_t start_timestamp = 0;
|
602
|
+
int64_t timestamp_delta = 0;
|
521
603
|
int num;
|
604
|
+
|
605
|
+
if (_stackprof.buffer_count > 0) {
|
606
|
+
// Another sample is already pending
|
607
|
+
return;
|
608
|
+
}
|
609
|
+
|
522
610
|
if (_stackprof.raw) {
|
523
|
-
struct
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
timestamp_delta = (1000 * diff.tv_sec) + diff.tv_usec;
|
611
|
+
struct timestamp_t t;
|
612
|
+
capture_timestamp(&t);
|
613
|
+
start_timestamp = timestamp_usec(&t);
|
614
|
+
timestamp_delta = delta_usec(&_stackprof.last_sample_at, &t);
|
528
615
|
}
|
616
|
+
|
529
617
|
num = rb_profile_frames(0, sizeof(_stackprof.frames_buffer) / sizeof(VALUE), _stackprof.frames_buffer, _stackprof.lines_buffer);
|
530
|
-
|
618
|
+
|
619
|
+
_stackprof.buffer_count = num;
|
620
|
+
_stackprof.buffer_time.timestamp_usec = start_timestamp;
|
621
|
+
_stackprof.buffer_time.delta_usec = timestamp_delta;
|
531
622
|
}
|
532
623
|
|
533
624
|
void
|
534
|
-
stackprof_record_gc_samples()
|
625
|
+
stackprof_record_gc_samples(void)
|
535
626
|
{
|
536
|
-
|
537
|
-
|
627
|
+
int64_t delta_to_first_unrecorded_gc_sample = 0;
|
628
|
+
uint64_t start_timestamp = 0;
|
629
|
+
size_t i;
|
538
630
|
if (_stackprof.raw) {
|
539
|
-
struct
|
540
|
-
|
541
|
-
|
542
|
-
timersub(&t, &_stackprof.last_sample_at, &diff);
|
631
|
+
struct timestamp_t t;
|
632
|
+
capture_timestamp(&t);
|
633
|
+
start_timestamp = timestamp_usec(&t);
|
543
634
|
|
544
635
|
// We don't know when the GC samples were actually marked, so let's
|
545
636
|
// assume that they were marked at a perfectly regular interval.
|
546
|
-
delta_to_first_unrecorded_gc_sample = (
|
637
|
+
delta_to_first_unrecorded_gc_sample = delta_usec(&_stackprof.last_sample_at, &t) - (_stackprof.unrecorded_gc_samples - 1) * NUM2LONG(_stackprof.interval);
|
547
638
|
if (delta_to_first_unrecorded_gc_sample < 0) {
|
548
639
|
delta_to_first_unrecorded_gc_sample = 0;
|
549
640
|
}
|
550
641
|
}
|
551
642
|
|
552
|
-
|
553
643
|
for (i = 0; i < _stackprof.unrecorded_gc_samples; i++) {
|
554
|
-
|
644
|
+
int64_t timestamp_delta = i == 0 ? delta_to_first_unrecorded_gc_sample : NUM2LONG(_stackprof.interval);
|
555
645
|
|
556
646
|
if (_stackprof.unrecorded_gc_marking_samples) {
|
557
647
|
_stackprof.frames_buffer[0] = FAKE_FRAME_MARK;
|
@@ -560,7 +650,7 @@ stackprof_record_gc_samples()
|
|
560
650
|
_stackprof.lines_buffer[1] = 0;
|
561
651
|
_stackprof.unrecorded_gc_marking_samples--;
|
562
652
|
|
563
|
-
stackprof_record_sample_for_stack(2, timestamp_delta);
|
653
|
+
stackprof_record_sample_for_stack(2, start_timestamp, timestamp_delta);
|
564
654
|
} else if (_stackprof.unrecorded_gc_sweeping_samples) {
|
565
655
|
_stackprof.frames_buffer[0] = FAKE_FRAME_SWEEP;
|
566
656
|
_stackprof.lines_buffer[0] = 0;
|
@@ -569,11 +659,11 @@ stackprof_record_gc_samples()
|
|
569
659
|
|
570
660
|
_stackprof.unrecorded_gc_sweeping_samples--;
|
571
661
|
|
572
|
-
stackprof_record_sample_for_stack(2, timestamp_delta);
|
662
|
+
stackprof_record_sample_for_stack(2, start_timestamp, timestamp_delta);
|
573
663
|
} else {
|
574
664
|
_stackprof.frames_buffer[0] = FAKE_FRAME_GC;
|
575
665
|
_stackprof.lines_buffer[0] = 0;
|
576
|
-
stackprof_record_sample_for_stack(1, timestamp_delta);
|
666
|
+
stackprof_record_sample_for_stack(1, start_timestamp, timestamp_delta);
|
577
667
|
}
|
578
668
|
}
|
579
669
|
_stackprof.during_gc += _stackprof.unrecorded_gc_samples;
|
@@ -582,35 +672,59 @@ stackprof_record_gc_samples()
|
|
582
672
|
_stackprof.unrecorded_gc_sweeping_samples = 0;
|
583
673
|
}
|
584
674
|
|
675
|
+
// record the sample previously buffered by stackprof_buffer_sample
|
676
|
+
static void
|
677
|
+
stackprof_record_buffer(void)
|
678
|
+
{
|
679
|
+
stackprof_record_sample_for_stack(_stackprof.buffer_count, _stackprof.buffer_time.timestamp_usec, _stackprof.buffer_time.delta_usec);
|
680
|
+
|
681
|
+
// reset the buffer
|
682
|
+
_stackprof.buffer_count = 0;
|
683
|
+
}
|
684
|
+
|
685
|
+
static void
|
686
|
+
stackprof_sample_and_record(void)
|
687
|
+
{
|
688
|
+
stackprof_buffer_sample();
|
689
|
+
stackprof_record_buffer();
|
690
|
+
}
|
691
|
+
|
585
692
|
static void
|
586
|
-
|
693
|
+
stackprof_job_record_gc(void *data)
|
587
694
|
{
|
588
|
-
static int in_signal_handler = 0;
|
589
|
-
if (in_signal_handler) return;
|
590
695
|
if (!_stackprof.running) return;
|
591
696
|
|
592
|
-
in_signal_handler++;
|
593
697
|
stackprof_record_gc_samples();
|
594
|
-
in_signal_handler--;
|
595
698
|
}
|
596
699
|
|
597
700
|
static void
|
598
|
-
|
701
|
+
stackprof_job_sample_and_record(void *data)
|
599
702
|
{
|
600
|
-
static int in_signal_handler = 0;
|
601
|
-
if (in_signal_handler) return;
|
602
703
|
if (!_stackprof.running) return;
|
603
704
|
|
604
|
-
|
605
|
-
|
606
|
-
|
705
|
+
stackprof_sample_and_record();
|
706
|
+
}
|
707
|
+
|
708
|
+
static void
|
709
|
+
stackprof_job_record_buffer(void *data)
|
710
|
+
{
|
711
|
+
if (!_stackprof.running) return;
|
712
|
+
|
713
|
+
stackprof_record_buffer();
|
607
714
|
}
|
608
715
|
|
609
716
|
static void
|
610
717
|
stackprof_signal_handler(int sig, siginfo_t *sinfo, void *ucontext)
|
611
718
|
{
|
719
|
+
static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
|
720
|
+
|
612
721
|
_stackprof.overall_signals++;
|
613
|
-
|
722
|
+
|
723
|
+
if (!_stackprof.running) return;
|
724
|
+
if (!ruby_native_thread_p()) return;
|
725
|
+
if (pthread_mutex_trylock(&lock)) return;
|
726
|
+
|
727
|
+
if (!_stackprof.ignore_gc && rb_during_gc()) {
|
614
728
|
VALUE mode = rb_gc_latest_gc_info(sym_state);
|
615
729
|
if (mode == sym_marking) {
|
616
730
|
_stackprof.unrecorded_gc_marking_samples++;
|
@@ -618,10 +732,19 @@ stackprof_signal_handler(int sig, siginfo_t *sinfo, void *ucontext)
|
|
618
732
|
_stackprof.unrecorded_gc_sweeping_samples++;
|
619
733
|
}
|
620
734
|
_stackprof.unrecorded_gc_samples++;
|
621
|
-
rb_postponed_job_register_one(0,
|
735
|
+
rb_postponed_job_register_one(0, stackprof_job_record_gc, (void*)0);
|
622
736
|
} else {
|
623
|
-
|
737
|
+
if (stackprof_use_postponed_job) {
|
738
|
+
rb_postponed_job_register_one(0, stackprof_job_sample_and_record, (void*)0);
|
739
|
+
} else {
|
740
|
+
// Buffer a sample immediately, if an existing sample exists this will
|
741
|
+
// return immediately
|
742
|
+
stackprof_buffer_sample();
|
743
|
+
// Enqueue a job to record the sample
|
744
|
+
rb_postponed_job_register_one(0, stackprof_job_record_buffer, (void*)0);
|
745
|
+
}
|
624
746
|
}
|
747
|
+
pthread_mutex_unlock(&lock);
|
625
748
|
}
|
626
749
|
|
627
750
|
static void
|
@@ -630,7 +753,7 @@ stackprof_newobj_handler(VALUE tpval, void *data)
|
|
630
753
|
_stackprof.overall_signals++;
|
631
754
|
if (RTEST(_stackprof.interval) && _stackprof.overall_signals % NUM2LONG(_stackprof.interval))
|
632
755
|
return;
|
633
|
-
|
756
|
+
stackprof_sample_and_record();
|
634
757
|
}
|
635
758
|
|
636
759
|
static VALUE
|
@@ -640,7 +763,7 @@ stackprof_sample(VALUE self)
|
|
640
763
|
return Qfalse;
|
641
764
|
|
642
765
|
_stackprof.overall_signals++;
|
643
|
-
|
766
|
+
stackprof_sample_and_record();
|
644
767
|
return Qtrue;
|
645
768
|
}
|
646
769
|
|
@@ -697,10 +820,23 @@ stackprof_atfork_child(void)
|
|
697
820
|
stackprof_stop(rb_mStackProf);
|
698
821
|
}
|
699
822
|
|
823
|
+
static VALUE
|
824
|
+
stackprof_use_postponed_job_l(VALUE self)
|
825
|
+
{
|
826
|
+
stackprof_use_postponed_job = 1;
|
827
|
+
return Qnil;
|
828
|
+
}
|
829
|
+
|
700
830
|
void
|
701
831
|
Init_stackprof(void)
|
702
832
|
{
|
703
833
|
size_t i;
|
834
|
+
/*
|
835
|
+
* As of Ruby 3.0, it should be safe to read stack frames at any time, unless YJIT is enabled
|
836
|
+
* See https://github.com/ruby/ruby/commit/0e276dc458f94d9d79a0f7c7669bde84abe80f21
|
837
|
+
*/
|
838
|
+
stackprof_use_postponed_job = RUBY_API_VERSION_MAJOR < 3;
|
839
|
+
|
704
840
|
#define S(name) sym_##name = ID2SYM(rb_intern(#name));
|
705
841
|
S(object);
|
706
842
|
S(custom);
|
@@ -719,9 +855,11 @@ Init_stackprof(void)
|
|
719
855
|
S(mode);
|
720
856
|
S(interval);
|
721
857
|
S(raw);
|
858
|
+
S(raw_sample_timestamps);
|
722
859
|
S(raw_timestamp_deltas);
|
723
860
|
S(out);
|
724
861
|
S(metadata);
|
862
|
+
S(ignore_gc);
|
725
863
|
S(frames);
|
726
864
|
S(aggregate);
|
727
865
|
S(state);
|
@@ -740,9 +878,9 @@ Init_stackprof(void)
|
|
740
878
|
_stackprof.raw_samples_capa = 0;
|
741
879
|
_stackprof.raw_sample_index = 0;
|
742
880
|
|
743
|
-
_stackprof.
|
744
|
-
_stackprof.
|
745
|
-
_stackprof.
|
881
|
+
_stackprof.raw_sample_times = NULL;
|
882
|
+
_stackprof.raw_sample_times_len = 0;
|
883
|
+
_stackprof.raw_sample_times_capa = 0;
|
746
884
|
|
747
885
|
_stackprof.empty_string = rb_str_new_cstr("");
|
748
886
|
rb_global_variable(&_stackprof.empty_string);
|
@@ -759,6 +897,7 @@ Init_stackprof(void)
|
|
759
897
|
rb_define_singleton_method(rb_mStackProf, "stop", stackprof_stop, 0);
|
760
898
|
rb_define_singleton_method(rb_mStackProf, "results", stackprof_results, -1);
|
761
899
|
rb_define_singleton_method(rb_mStackProf, "sample", stackprof_sample, 0);
|
900
|
+
rb_define_singleton_method(rb_mStackProf, "use_postponed_job!", stackprof_use_postponed_job_l, 0);
|
762
901
|
|
763
902
|
pthread_atfork(stackprof_atfork_prepare, stackprof_atfork_parent, stackprof_atfork_child);
|
764
903
|
}
|