stackprof 0.2.17 → 0.2.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +1 -1
- data/bin/stackprof +1 -1
- data/ext/stackprof/stackprof.c +187 -72
- data/lib/stackprof/report.rb +55 -52
- data/lib/stackprof.rb +5 -1
- data/stackprof.gemspec +1 -1
- data/test/fixtures/profile.dump +1 -0
- data/test/fixtures/profile.json +1 -0
- data/test/test_report.rb +24 -0
- data/test/test_stackprof.rb +31 -4
- metadata +11 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e627bf7fbeca0cb94e8be863b0a1db9160534d5172c1409ba4306e0f16a57ae0
|
4
|
+
data.tar.gz: f881ec5ab3267f3b48f57794c3bce910b39f890db32817655b4e5253733fc466
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 514c6697e4465fbce7990fffc2a71ca5aa544e1f526b338fde343c14bc801bb5056534e371e4d8fd5642f1b73b92126b2a5e476eb5292dde23bd0da734d7e236
|
7
|
+
data.tar.gz: bed130e196f1004b3e74d2db466732fa8b23e582df3fcf944161112036053e95a5a05a3ed2ac175a6603e1f3b28b6062491aef89f4d30b764be3886aaf586b19
|
data/.github/workflows/ci.yml
CHANGED
data/bin/stackprof
CHANGED
@@ -42,7 +42,7 @@ reports = []
|
|
42
42
|
while ARGV.size > 0
|
43
43
|
begin
|
44
44
|
file = ARGV.pop
|
45
|
-
reports << StackProf::Report.
|
45
|
+
reports << StackProf::Report.from_file(file)
|
46
46
|
rescue TypeError => e
|
47
47
|
STDERR.puts "** error parsing #{file}: #{e.inspect}"
|
48
48
|
end
|
data/ext/stackprof/stackprof.c
CHANGED
@@ -7,37 +7,75 @@
|
|
7
7
|
**********************************************************************/
|
8
8
|
|
9
9
|
#include <ruby/ruby.h>
|
10
|
+
#include <ruby/version.h>
|
10
11
|
#include <ruby/debug.h>
|
11
12
|
#include <ruby/st.h>
|
12
13
|
#include <ruby/io.h>
|
13
14
|
#include <ruby/intern.h>
|
14
15
|
#include <signal.h>
|
15
16
|
#include <sys/time.h>
|
17
|
+
#include <time.h>
|
16
18
|
#include <pthread.h>
|
17
19
|
|
18
20
|
#define BUF_SIZE 2048
|
19
21
|
#define MICROSECONDS_IN_SECOND 1000000
|
22
|
+
#define NANOSECONDS_IN_SECOND 1000000000
|
20
23
|
|
21
24
|
#define FAKE_FRAME_GC INT2FIX(0)
|
22
25
|
#define FAKE_FRAME_MARK INT2FIX(1)
|
23
26
|
#define FAKE_FRAME_SWEEP INT2FIX(2)
|
24
27
|
|
25
|
-
/*
|
26
|
-
* As of Ruby 3.0, it should be safe to read stack frames at any time
|
27
|
-
* See https://github.com/ruby/ruby/commit/0e276dc458f94d9d79a0f7c7669bde84abe80f21
|
28
|
-
*/
|
29
|
-
#if RUBY_API_VERSION_MAJOR < 3
|
30
|
-
#define USE_POSTPONED_JOB
|
31
|
-
#endif
|
32
|
-
|
33
28
|
static const char *fake_frame_cstrs[] = {
|
34
29
|
"(garbage collection)",
|
35
30
|
"(marking)",
|
36
31
|
"(sweeping)",
|
37
32
|
};
|
38
33
|
|
34
|
+
static int stackprof_use_postponed_job = 1;
|
35
|
+
|
39
36
|
#define TOTAL_FAKE_FRAMES (sizeof(fake_frame_cstrs) / sizeof(char *))
|
40
37
|
|
38
|
+
#ifdef _POSIX_MONOTONIC_CLOCK
|
39
|
+
#define timestamp_t timespec
|
40
|
+
typedef struct timestamp_t timestamp_t;
|
41
|
+
|
42
|
+
static void capture_timestamp(timestamp_t *ts) {
|
43
|
+
clock_gettime(CLOCK_MONOTONIC, ts);
|
44
|
+
}
|
45
|
+
|
46
|
+
static int64_t delta_usec(timestamp_t *start, timestamp_t *end) {
|
47
|
+
int64_t result = MICROSECONDS_IN_SECOND * (end->tv_sec - start->tv_sec);
|
48
|
+
if (end->tv_nsec < start->tv_nsec) {
|
49
|
+
result -= MICROSECONDS_IN_SECOND;
|
50
|
+
result += (NANOSECONDS_IN_SECOND + end->tv_nsec - start->tv_nsec) / 1000;
|
51
|
+
} else {
|
52
|
+
result += (end->tv_nsec - start->tv_nsec) / 1000;
|
53
|
+
}
|
54
|
+
return result;
|
55
|
+
}
|
56
|
+
|
57
|
+
static uint64_t timestamp_usec(timestamp_t *ts) {
|
58
|
+
return (MICROSECONDS_IN_SECOND * ts->tv_sec) + (ts->tv_nsec / 1000);
|
59
|
+
}
|
60
|
+
#else
|
61
|
+
#define timestamp_t timeval
|
62
|
+
typedef struct timestamp_t timestamp_t;
|
63
|
+
|
64
|
+
static void capture_timestamp(timestamp_t *ts) {
|
65
|
+
gettimeofday(ts, NULL);
|
66
|
+
}
|
67
|
+
|
68
|
+
static int64_t delta_usec(timestamp_t *start, timestamp_t *end) {
|
69
|
+
struct timeval diff;
|
70
|
+
timersub(end, start, &diff);
|
71
|
+
return (MICROSECONDS_IN_SECOND * diff.tv_sec) + diff.tv_usec;
|
72
|
+
}
|
73
|
+
|
74
|
+
static uint64_t timestamp_usec(timestamp_t *ts) {
|
75
|
+
return (MICROSECONDS_IN_SECOND * ts.tv_sec) + diff.tv_usec
|
76
|
+
}
|
77
|
+
#endif
|
78
|
+
|
41
79
|
typedef struct {
|
42
80
|
size_t total_samples;
|
43
81
|
size_t caller_samples;
|
@@ -46,6 +84,11 @@ typedef struct {
|
|
46
84
|
st_table *lines;
|
47
85
|
} frame_data_t;
|
48
86
|
|
87
|
+
typedef struct {
|
88
|
+
uint64_t timestamp_usec;
|
89
|
+
int64_t delta_usec;
|
90
|
+
} sample_time_t;
|
91
|
+
|
49
92
|
static struct {
|
50
93
|
int running;
|
51
94
|
int raw;
|
@@ -62,10 +105,10 @@ static struct {
|
|
62
105
|
size_t raw_samples_capa;
|
63
106
|
size_t raw_sample_index;
|
64
107
|
|
65
|
-
struct
|
66
|
-
|
67
|
-
size_t
|
68
|
-
size_t
|
108
|
+
struct timestamp_t last_sample_at;
|
109
|
+
sample_time_t *raw_sample_times;
|
110
|
+
size_t raw_sample_times_len;
|
111
|
+
size_t raw_sample_times_capa;
|
69
112
|
|
70
113
|
size_t overall_signals;
|
71
114
|
size_t overall_samples;
|
@@ -77,6 +120,9 @@ static struct {
|
|
77
120
|
|
78
121
|
VALUE fake_frame_names[TOTAL_FAKE_FRAMES];
|
79
122
|
VALUE empty_string;
|
123
|
+
|
124
|
+
int buffer_count;
|
125
|
+
sample_time_t buffer_time;
|
80
126
|
VALUE frames_buffer[BUF_SIZE];
|
81
127
|
int lines_buffer[BUF_SIZE];
|
82
128
|
} _stackprof;
|
@@ -84,7 +130,7 @@ static struct {
|
|
84
130
|
static VALUE sym_object, sym_wall, sym_cpu, sym_custom, sym_name, sym_file, sym_line;
|
85
131
|
static VALUE sym_samples, sym_total_samples, sym_missed_samples, sym_edges, sym_lines;
|
86
132
|
static VALUE sym_version, sym_mode, sym_interval, sym_raw, sym_metadata, sym_frames, sym_ignore_gc, sym_out;
|
87
|
-
static VALUE sym_aggregate, sym_raw_timestamp_deltas, sym_state, sym_marking, sym_sweeping;
|
133
|
+
static VALUE sym_aggregate, sym_raw_sample_timestamps, sym_raw_timestamp_deltas, sym_state, sym_marking, sym_sweeping;
|
88
134
|
static VALUE sym_gc_samples, objtracer;
|
89
135
|
static VALUE gc_hook;
|
90
136
|
static VALUE rb_mStackProf;
|
@@ -174,7 +220,7 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
174
220
|
_stackprof.out = out;
|
175
221
|
|
176
222
|
if (raw) {
|
177
|
-
|
223
|
+
capture_timestamp(&_stackprof.last_sample_at);
|
178
224
|
}
|
179
225
|
|
180
226
|
return Qtrue;
|
@@ -209,13 +255,19 @@ stackprof_stop(VALUE self)
|
|
209
255
|
return Qtrue;
|
210
256
|
}
|
211
257
|
|
258
|
+
#if SIZEOF_VOIDP == SIZEOF_LONG
|
259
|
+
# define PTR2NUM(x) (LONG2NUM((long)(x)))
|
260
|
+
#else
|
261
|
+
# define PTR2NUM(x) (LL2NUM((LONG_LONG)(x)))
|
262
|
+
#endif
|
263
|
+
|
212
264
|
static int
|
213
265
|
frame_edges_i(st_data_t key, st_data_t val, st_data_t arg)
|
214
266
|
{
|
215
267
|
VALUE edges = (VALUE)arg;
|
216
268
|
|
217
269
|
intptr_t weight = (intptr_t)val;
|
218
|
-
rb_hash_aset(edges,
|
270
|
+
rb_hash_aset(edges, PTR2NUM(key), INT2FIX(weight));
|
219
271
|
return ST_CONTINUE;
|
220
272
|
}
|
221
273
|
|
@@ -242,7 +294,7 @@ frame_i(st_data_t key, st_data_t val, st_data_t arg)
|
|
242
294
|
VALUE name, file, edges, lines;
|
243
295
|
VALUE line;
|
244
296
|
|
245
|
-
rb_hash_aset(results,
|
297
|
+
rb_hash_aset(results, PTR2NUM(frame), details);
|
246
298
|
|
247
299
|
if (FIXNUM_P(frame)) {
|
248
300
|
name = _stackprof.fake_frame_names[FIX2INT(frame)];
|
@@ -314,7 +366,7 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
314
366
|
|
315
367
|
if (_stackprof.raw && _stackprof.raw_samples_len) {
|
316
368
|
size_t len, n, o;
|
317
|
-
VALUE raw_timestamp_deltas;
|
369
|
+
VALUE raw_sample_timestamps, raw_timestamp_deltas;
|
318
370
|
VALUE raw_samples = rb_ary_new_capa(_stackprof.raw_samples_len);
|
319
371
|
|
320
372
|
for (n = 0; n < _stackprof.raw_samples_len; n++) {
|
@@ -322,7 +374,7 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
322
374
|
rb_ary_push(raw_samples, SIZET2NUM(len));
|
323
375
|
|
324
376
|
for (o = 0, n++; o < len; n++, o++)
|
325
|
-
rb_ary_push(raw_samples,
|
377
|
+
rb_ary_push(raw_samples, PTR2NUM(_stackprof.raw_samples[n]));
|
326
378
|
rb_ary_push(raw_samples, SIZET2NUM((size_t)_stackprof.raw_samples[n]));
|
327
379
|
}
|
328
380
|
|
@@ -334,17 +386,20 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
334
386
|
|
335
387
|
rb_hash_aset(results, sym_raw, raw_samples);
|
336
388
|
|
337
|
-
|
389
|
+
raw_sample_timestamps = rb_ary_new_capa(_stackprof.raw_sample_times_len);
|
390
|
+
raw_timestamp_deltas = rb_ary_new_capa(_stackprof.raw_sample_times_len);
|
338
391
|
|
339
|
-
for (n = 0; n < _stackprof.
|
340
|
-
rb_ary_push(
|
392
|
+
for (n = 0; n < _stackprof.raw_sample_times_len; n++) {
|
393
|
+
rb_ary_push(raw_sample_timestamps, ULL2NUM(_stackprof.raw_sample_times[n].timestamp_usec));
|
394
|
+
rb_ary_push(raw_timestamp_deltas, LL2NUM(_stackprof.raw_sample_times[n].delta_usec));
|
341
395
|
}
|
342
396
|
|
343
|
-
free(_stackprof.
|
344
|
-
_stackprof.
|
345
|
-
_stackprof.
|
346
|
-
_stackprof.
|
397
|
+
free(_stackprof.raw_sample_times);
|
398
|
+
_stackprof.raw_sample_times = NULL;
|
399
|
+
_stackprof.raw_sample_times_len = 0;
|
400
|
+
_stackprof.raw_sample_times_capa = 0;
|
347
401
|
|
402
|
+
rb_hash_aset(results, sym_raw_sample_timestamps, raw_sample_timestamps);
|
348
403
|
rb_hash_aset(results, sym_raw_timestamp_deltas, raw_timestamp_deltas);
|
349
404
|
|
350
405
|
_stackprof.raw = 0;
|
@@ -424,14 +479,14 @@ st_numtable_increment(st_table *table, st_data_t key, size_t increment)
|
|
424
479
|
}
|
425
480
|
|
426
481
|
void
|
427
|
-
stackprof_record_sample_for_stack(int num,
|
482
|
+
stackprof_record_sample_for_stack(int num, uint64_t sample_timestamp, int64_t timestamp_delta)
|
428
483
|
{
|
429
484
|
int i, n;
|
430
485
|
VALUE prev_frame = Qnil;
|
431
486
|
|
432
487
|
_stackprof.overall_samples++;
|
433
488
|
|
434
|
-
if (_stackprof.raw) {
|
489
|
+
if (_stackprof.raw && num > 0) {
|
435
490
|
int found = 0;
|
436
491
|
|
437
492
|
/* If there's no sample buffer allocated, then allocate one. The buffer
|
@@ -483,20 +538,23 @@ stackprof_record_sample_for_stack(int num, int timestamp_delta)
|
|
483
538
|
}
|
484
539
|
|
485
540
|
/* If there's no timestamp delta buffer, allocate one */
|
486
|
-
if (!_stackprof.
|
487
|
-
_stackprof.
|
488
|
-
_stackprof.
|
489
|
-
_stackprof.
|
541
|
+
if (!_stackprof.raw_sample_times) {
|
542
|
+
_stackprof.raw_sample_times_capa = 100;
|
543
|
+
_stackprof.raw_sample_times = malloc(sizeof(sample_time_t) * _stackprof.raw_sample_times_capa);
|
544
|
+
_stackprof.raw_sample_times_len = 0;
|
490
545
|
}
|
491
546
|
|
492
547
|
/* Double the buffer size if it's too small */
|
493
|
-
while (_stackprof.
|
494
|
-
_stackprof.
|
495
|
-
_stackprof.
|
548
|
+
while (_stackprof.raw_sample_times_capa <= _stackprof.raw_sample_times_len + 1) {
|
549
|
+
_stackprof.raw_sample_times_capa *= 2;
|
550
|
+
_stackprof.raw_sample_times = realloc(_stackprof.raw_sample_times, sizeof(sample_time_t) * _stackprof.raw_sample_times_capa);
|
496
551
|
}
|
497
552
|
|
498
|
-
/* Store the time delta (which is the amount of
|
499
|
-
_stackprof.
|
553
|
+
/* Store the time delta (which is the amount of microseconds between samples). */
|
554
|
+
_stackprof.raw_sample_times[_stackprof.raw_sample_times_len++] = (sample_time_t) {
|
555
|
+
.timestamp_usec = sample_timestamp,
|
556
|
+
.delta_usec = timestamp_delta,
|
557
|
+
};
|
500
558
|
}
|
501
559
|
|
502
560
|
for (i = 0; i < num; i++) {
|
@@ -529,48 +587,59 @@ stackprof_record_sample_for_stack(int num, int timestamp_delta)
|
|
529
587
|
}
|
530
588
|
|
531
589
|
if (_stackprof.raw) {
|
532
|
-
|
590
|
+
capture_timestamp(&_stackprof.last_sample_at);
|
533
591
|
}
|
534
592
|
}
|
535
593
|
|
594
|
+
// buffer the current profile frames
|
595
|
+
// This must be async-signal-safe
|
596
|
+
// Returns immediately if another set of frames are already in the buffer
|
536
597
|
void
|
537
|
-
|
598
|
+
stackprof_buffer_sample(void)
|
538
599
|
{
|
539
|
-
|
600
|
+
if (_stackprof.buffer_count > 0) {
|
601
|
+
// Another sample is already pending
|
602
|
+
return;
|
603
|
+
}
|
604
|
+
|
605
|
+
uint64_t start_timestamp = 0;
|
606
|
+
int64_t timestamp_delta = 0;
|
540
607
|
int num;
|
541
608
|
if (_stackprof.raw) {
|
542
|
-
struct
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
timestamp_delta = (1000 * diff.tv_sec) + diff.tv_usec;
|
609
|
+
struct timestamp_t t;
|
610
|
+
capture_timestamp(&t);
|
611
|
+
start_timestamp = timestamp_usec(&t);
|
612
|
+
timestamp_delta = delta_usec(&_stackprof.last_sample_at, &t);
|
547
613
|
}
|
614
|
+
|
548
615
|
num = rb_profile_frames(0, sizeof(_stackprof.frames_buffer) / sizeof(VALUE), _stackprof.frames_buffer, _stackprof.lines_buffer);
|
549
|
-
|
616
|
+
|
617
|
+
_stackprof.buffer_count = num;
|
618
|
+
_stackprof.buffer_time.timestamp_usec = start_timestamp;
|
619
|
+
_stackprof.buffer_time.delta_usec = timestamp_delta;
|
550
620
|
}
|
551
621
|
|
552
622
|
void
|
553
|
-
stackprof_record_gc_samples()
|
623
|
+
stackprof_record_gc_samples(void)
|
554
624
|
{
|
555
|
-
|
556
|
-
|
625
|
+
int64_t delta_to_first_unrecorded_gc_sample = 0;
|
626
|
+
uint64_t start_timestamp = 0;
|
627
|
+
size_t i;
|
557
628
|
if (_stackprof.raw) {
|
558
|
-
struct
|
559
|
-
|
560
|
-
|
561
|
-
timersub(&t, &_stackprof.last_sample_at, &diff);
|
629
|
+
struct timestamp_t t;
|
630
|
+
capture_timestamp(&t);
|
631
|
+
start_timestamp = timestamp_usec(&t);
|
562
632
|
|
563
633
|
// We don't know when the GC samples were actually marked, so let's
|
564
634
|
// assume that they were marked at a perfectly regular interval.
|
565
|
-
delta_to_first_unrecorded_gc_sample = (
|
635
|
+
delta_to_first_unrecorded_gc_sample = delta_usec(&_stackprof.last_sample_at, &t) - (_stackprof.unrecorded_gc_samples - 1) * NUM2LONG(_stackprof.interval);
|
566
636
|
if (delta_to_first_unrecorded_gc_sample < 0) {
|
567
637
|
delta_to_first_unrecorded_gc_sample = 0;
|
568
638
|
}
|
569
639
|
}
|
570
640
|
|
571
|
-
|
572
641
|
for (i = 0; i < _stackprof.unrecorded_gc_samples; i++) {
|
573
|
-
|
642
|
+
int64_t timestamp_delta = i == 0 ? delta_to_first_unrecorded_gc_sample : NUM2LONG(_stackprof.interval);
|
574
643
|
|
575
644
|
if (_stackprof.unrecorded_gc_marking_samples) {
|
576
645
|
_stackprof.frames_buffer[0] = FAKE_FRAME_MARK;
|
@@ -579,7 +648,7 @@ stackprof_record_gc_samples()
|
|
579
648
|
_stackprof.lines_buffer[1] = 0;
|
580
649
|
_stackprof.unrecorded_gc_marking_samples--;
|
581
650
|
|
582
|
-
stackprof_record_sample_for_stack(2, timestamp_delta);
|
651
|
+
stackprof_record_sample_for_stack(2, start_timestamp, timestamp_delta);
|
583
652
|
} else if (_stackprof.unrecorded_gc_sweeping_samples) {
|
584
653
|
_stackprof.frames_buffer[0] = FAKE_FRAME_SWEEP;
|
585
654
|
_stackprof.lines_buffer[0] = 0;
|
@@ -588,11 +657,11 @@ stackprof_record_gc_samples()
|
|
588
657
|
|
589
658
|
_stackprof.unrecorded_gc_sweeping_samples--;
|
590
659
|
|
591
|
-
stackprof_record_sample_for_stack(2, timestamp_delta);
|
660
|
+
stackprof_record_sample_for_stack(2, start_timestamp, timestamp_delta);
|
592
661
|
} else {
|
593
662
|
_stackprof.frames_buffer[0] = FAKE_FRAME_GC;
|
594
663
|
_stackprof.lines_buffer[0] = 0;
|
595
|
-
stackprof_record_sample_for_stack(1, timestamp_delta);
|
664
|
+
stackprof_record_sample_for_stack(1, start_timestamp, timestamp_delta);
|
596
665
|
}
|
597
666
|
}
|
598
667
|
_stackprof.during_gc += _stackprof.unrecorded_gc_samples;
|
@@ -601,8 +670,25 @@ stackprof_record_gc_samples()
|
|
601
670
|
_stackprof.unrecorded_gc_sweeping_samples = 0;
|
602
671
|
}
|
603
672
|
|
673
|
+
// record the sample previously buffered by stackprof_buffer_sample
|
674
|
+
static void
|
675
|
+
stackprof_record_buffer(void)
|
676
|
+
{
|
677
|
+
stackprof_record_sample_for_stack(_stackprof.buffer_count, _stackprof.buffer_time.timestamp_usec, _stackprof.buffer_time.delta_usec);
|
678
|
+
|
679
|
+
// reset the buffer
|
680
|
+
_stackprof.buffer_count = 0;
|
681
|
+
}
|
682
|
+
|
604
683
|
static void
|
605
|
-
|
684
|
+
stackprof_sample_and_record(void)
|
685
|
+
{
|
686
|
+
stackprof_buffer_sample();
|
687
|
+
stackprof_record_buffer();
|
688
|
+
}
|
689
|
+
|
690
|
+
static void
|
691
|
+
stackprof_job_record_gc(void *data)
|
606
692
|
{
|
607
693
|
if (!_stackprof.running) return;
|
608
694
|
|
@@ -610,11 +696,19 @@ stackprof_gc_job_handler(void *data)
|
|
610
696
|
}
|
611
697
|
|
612
698
|
static void
|
613
|
-
|
699
|
+
stackprof_job_sample_and_record(void *data)
|
614
700
|
{
|
615
701
|
if (!_stackprof.running) return;
|
616
702
|
|
617
|
-
|
703
|
+
stackprof_sample_and_record();
|
704
|
+
}
|
705
|
+
|
706
|
+
static void
|
707
|
+
stackprof_job_record_buffer(void *data)
|
708
|
+
{
|
709
|
+
if (!_stackprof.running) return;
|
710
|
+
|
711
|
+
stackprof_record_buffer();
|
618
712
|
}
|
619
713
|
|
620
714
|
static void
|
@@ -636,13 +730,17 @@ stackprof_signal_handler(int sig, siginfo_t *sinfo, void *ucontext)
|
|
636
730
|
_stackprof.unrecorded_gc_sweeping_samples++;
|
637
731
|
}
|
638
732
|
_stackprof.unrecorded_gc_samples++;
|
639
|
-
rb_postponed_job_register_one(0,
|
733
|
+
rb_postponed_job_register_one(0, stackprof_job_record_gc, (void*)0);
|
640
734
|
} else {
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
735
|
+
if (stackprof_use_postponed_job) {
|
736
|
+
rb_postponed_job_register_one(0, stackprof_job_sample_and_record, (void*)0);
|
737
|
+
} else {
|
738
|
+
// Buffer a sample immediately, if an existing sample exists this will
|
739
|
+
// return immediately
|
740
|
+
stackprof_buffer_sample();
|
741
|
+
// Enqueue a job to record the sample
|
742
|
+
rb_postponed_job_register_one(0, stackprof_job_record_buffer, (void*)0);
|
743
|
+
}
|
646
744
|
}
|
647
745
|
pthread_mutex_unlock(&lock);
|
648
746
|
}
|
@@ -653,7 +751,7 @@ stackprof_newobj_handler(VALUE tpval, void *data)
|
|
653
751
|
_stackprof.overall_signals++;
|
654
752
|
if (RTEST(_stackprof.interval) && _stackprof.overall_signals % NUM2LONG(_stackprof.interval))
|
655
753
|
return;
|
656
|
-
|
754
|
+
stackprof_sample_and_record();
|
657
755
|
}
|
658
756
|
|
659
757
|
static VALUE
|
@@ -663,7 +761,7 @@ stackprof_sample(VALUE self)
|
|
663
761
|
return Qfalse;
|
664
762
|
|
665
763
|
_stackprof.overall_signals++;
|
666
|
-
|
764
|
+
stackprof_sample_and_record();
|
667
765
|
return Qtrue;
|
668
766
|
}
|
669
767
|
|
@@ -720,9 +818,24 @@ stackprof_atfork_child(void)
|
|
720
818
|
stackprof_stop(rb_mStackProf);
|
721
819
|
}
|
722
820
|
|
821
|
+
static VALUE
|
822
|
+
stackprof_use_postponed_job_l(VALUE self)
|
823
|
+
{
|
824
|
+
stackprof_use_postponed_job = 1;
|
825
|
+
return Qnil;
|
826
|
+
}
|
827
|
+
|
723
828
|
void
|
724
829
|
Init_stackprof(void)
|
725
830
|
{
|
831
|
+
/*
|
832
|
+
* As of Ruby 3.0, it should be safe to read stack frames at any time, unless YJIT is enabled
|
833
|
+
* See https://github.com/ruby/ruby/commit/0e276dc458f94d9d79a0f7c7669bde84abe80f21
|
834
|
+
*/
|
835
|
+
#if RUBY_API_VERSION_MAJOR < 3
|
836
|
+
stackprof_use_postponed_job = 0;
|
837
|
+
#endif
|
838
|
+
|
726
839
|
size_t i;
|
727
840
|
#define S(name) sym_##name = ID2SYM(rb_intern(#name));
|
728
841
|
S(object);
|
@@ -742,6 +855,7 @@ Init_stackprof(void)
|
|
742
855
|
S(mode);
|
743
856
|
S(interval);
|
744
857
|
S(raw);
|
858
|
+
S(raw_sample_timestamps);
|
745
859
|
S(raw_timestamp_deltas);
|
746
860
|
S(out);
|
747
861
|
S(metadata);
|
@@ -764,9 +878,9 @@ Init_stackprof(void)
|
|
764
878
|
_stackprof.raw_samples_capa = 0;
|
765
879
|
_stackprof.raw_sample_index = 0;
|
766
880
|
|
767
|
-
_stackprof.
|
768
|
-
_stackprof.
|
769
|
-
_stackprof.
|
881
|
+
_stackprof.raw_sample_times = NULL;
|
882
|
+
_stackprof.raw_sample_times_len = 0;
|
883
|
+
_stackprof.raw_sample_times_capa = 0;
|
770
884
|
|
771
885
|
_stackprof.empty_string = rb_str_new_cstr("");
|
772
886
|
rb_global_variable(&_stackprof.empty_string);
|
@@ -783,6 +897,7 @@ Init_stackprof(void)
|
|
783
897
|
rb_define_singleton_method(rb_mStackProf, "stop", stackprof_stop, 0);
|
784
898
|
rb_define_singleton_method(rb_mStackProf, "results", stackprof_results, -1);
|
785
899
|
rb_define_singleton_method(rb_mStackProf, "sample", stackprof_sample, 0);
|
900
|
+
rb_define_singleton_method(rb_mStackProf, "use_postponed_job!", stackprof_use_postponed_job_l, 0);
|
786
901
|
|
787
902
|
pthread_atfork(stackprof_atfork_prepare, stackprof_atfork_parent, stackprof_atfork_child);
|
788
903
|
}
|
data/lib/stackprof/report.rb
CHANGED
@@ -2,9 +2,43 @@
|
|
2
2
|
|
3
3
|
require 'pp'
|
4
4
|
require 'digest/md5'
|
5
|
+
require 'json'
|
5
6
|
|
6
7
|
module StackProf
|
7
8
|
class Report
|
9
|
+
MARSHAL_SIGNATURE = "\x04\x08"
|
10
|
+
|
11
|
+
class << self
|
12
|
+
def from_file(file)
|
13
|
+
if (content = IO.binread(file)).start_with?(MARSHAL_SIGNATURE)
|
14
|
+
new(Marshal.load(content))
|
15
|
+
else
|
16
|
+
from_json(JSON.parse(content))
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def from_json(json)
|
21
|
+
new(parse_json(json))
|
22
|
+
end
|
23
|
+
|
24
|
+
def parse_json(json)
|
25
|
+
json.keys.each do |key|
|
26
|
+
value = json.delete(key)
|
27
|
+
from_json(value) if value.is_a?(Hash)
|
28
|
+
|
29
|
+
new_key = case key
|
30
|
+
when /\A[0-9]*\z/
|
31
|
+
key.to_i
|
32
|
+
else
|
33
|
+
key.to_sym
|
34
|
+
end
|
35
|
+
|
36
|
+
json[new_key] = value
|
37
|
+
end
|
38
|
+
json
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
8
42
|
def initialize(data)
|
9
43
|
@data = data
|
10
44
|
end
|
@@ -95,51 +129,10 @@ module StackProf
|
|
95
129
|
print_flamegraph(f, skip_common, true)
|
96
130
|
end
|
97
131
|
|
98
|
-
StackCursor = Struct.new(:raw, :idx, :length) do
|
99
|
-
def weight
|
100
|
-
@weight ||= raw[1 + idx + length]
|
101
|
-
end
|
102
|
-
|
103
|
-
def [](i)
|
104
|
-
if i >= length
|
105
|
-
nil
|
106
|
-
else
|
107
|
-
raw[1 + idx + i]
|
108
|
-
end
|
109
|
-
end
|
110
|
-
|
111
|
-
def <=>(other)
|
112
|
-
i = 0
|
113
|
-
while i < length && i < other.length
|
114
|
-
if self[i] != other[i]
|
115
|
-
return self[i] <=> other[i]
|
116
|
-
end
|
117
|
-
i += 1
|
118
|
-
end
|
119
|
-
|
120
|
-
return length <=> other.length
|
121
|
-
end
|
122
|
-
end
|
123
|
-
|
124
132
|
def print_flamegraph(f, skip_common, alphabetical=false)
|
125
133
|
raise "profile does not include raw samples (add `raw: true` to collecting StackProf.run)" unless raw = data[:raw]
|
126
134
|
|
127
|
-
stacks =
|
128
|
-
max_x = 0
|
129
|
-
max_y = 0
|
130
|
-
|
131
|
-
idx = 0
|
132
|
-
loop do
|
133
|
-
len = raw[idx]
|
134
|
-
break unless len
|
135
|
-
max_y = len if len > max_y
|
136
|
-
|
137
|
-
stack = StackCursor.new(raw, idx, len)
|
138
|
-
stacks << stack
|
139
|
-
max_x += stack.weight
|
140
|
-
|
141
|
-
idx += len + 2
|
142
|
-
end
|
135
|
+
stacks, max_x, max_y = flamegraph_stacks(raw)
|
143
136
|
|
144
137
|
stacks.sort! if alphabetical
|
145
138
|
|
@@ -150,7 +143,7 @@ module StackProf
|
|
150
143
|
x = 0
|
151
144
|
|
152
145
|
stacks.each do |stack|
|
153
|
-
weight = stack.
|
146
|
+
weight = stack.last
|
154
147
|
cell = stack[y] unless y == stack.length-1
|
155
148
|
|
156
149
|
if cell.nil?
|
@@ -191,6 +184,24 @@ module StackProf
|
|
191
184
|
f.puts '])'
|
192
185
|
end
|
193
186
|
|
187
|
+
def flamegraph_stacks(raw)
|
188
|
+
stacks = []
|
189
|
+
max_x = 0
|
190
|
+
max_y = 0
|
191
|
+
idx = 0
|
192
|
+
|
193
|
+
while len = raw[idx]
|
194
|
+
idx += 1
|
195
|
+
max_y = len if len > max_y
|
196
|
+
stack = raw.slice(idx, len+1)
|
197
|
+
idx += len+1
|
198
|
+
stacks << stack
|
199
|
+
max_x += stack.last
|
200
|
+
end
|
201
|
+
|
202
|
+
return stacks, max_x, max_y
|
203
|
+
end
|
204
|
+
|
194
205
|
def flamegraph_row(f, x, y, weight, addr)
|
195
206
|
frame = @data[:frames][addr]
|
196
207
|
f.print ',' if @rows_started
|
@@ -231,15 +242,7 @@ module StackProf
|
|
231
242
|
def print_d3_flamegraph(f=STDOUT, skip_common=true)
|
232
243
|
raise "profile does not include raw samples (add `raw: true` to collecting StackProf.run)" unless raw = data[:raw]
|
233
244
|
|
234
|
-
stacks =
|
235
|
-
max_x = 0
|
236
|
-
max_y = 0
|
237
|
-
while len = raw.shift
|
238
|
-
max_y = len if len > max_y
|
239
|
-
stack = raw.slice!(0, len+1)
|
240
|
-
stacks << stack
|
241
|
-
max_x += stack.last
|
242
|
-
end
|
245
|
+
stacks, * = flamegraph_stacks(raw)
|
243
246
|
|
244
247
|
# d3-flame-grpah supports only alphabetical flamegraph
|
245
248
|
stacks.sort!
|
data/lib/stackprof.rb
CHANGED
data/stackprof.gemspec
CHANGED
@@ -0,0 +1 @@
|
|
1
|
+
{: modeI"cpu:ET
|
@@ -0,0 +1 @@
|
|
1
|
+
{ "mode": "cpu" }
|
data/test/test_report.rb
CHANGED
@@ -32,3 +32,27 @@ class ReportDumpTest < MiniTest::Test
|
|
32
32
|
assert_equal expected, Marshal.load(marshal_data)
|
33
33
|
end
|
34
34
|
end
|
35
|
+
|
36
|
+
class ReportReadTest < MiniTest::Test
|
37
|
+
require 'pathname'
|
38
|
+
|
39
|
+
def test_from_file_read_json
|
40
|
+
file = fixture("profile.json")
|
41
|
+
report = StackProf::Report.from_file(file)
|
42
|
+
|
43
|
+
assert_equal({ mode: "cpu" }, report.data)
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_from_file_read_marshal
|
47
|
+
file = fixture("profile.dump")
|
48
|
+
report = StackProf::Report.from_file(file)
|
49
|
+
|
50
|
+
assert_equal({ mode: "cpu" }, report.data)
|
51
|
+
end
|
52
|
+
|
53
|
+
private
|
54
|
+
|
55
|
+
def fixture(name)
|
56
|
+
Pathname.new(__dir__).join("fixtures", name)
|
57
|
+
end
|
58
|
+
end
|
data/test/test_stackprof.rb
CHANGED
@@ -5,6 +5,10 @@ require 'tempfile'
|
|
5
5
|
require 'pathname'
|
6
6
|
|
7
7
|
class StackProfTest < MiniTest::Test
|
8
|
+
def setup
|
9
|
+
Object.new # warm some caches to avoid flakiness
|
10
|
+
end
|
11
|
+
|
8
12
|
def test_info
|
9
13
|
profile = StackProf.run{}
|
10
14
|
assert_equal 1.2, profile[:version]
|
@@ -78,9 +82,14 @@ class StackProfTest < MiniTest::Test
|
|
78
82
|
end
|
79
83
|
|
80
84
|
assert_operator profile[:samples], :>=, 1
|
81
|
-
|
82
|
-
|
83
|
-
|
85
|
+
if RUBY_VERSION >= '3'
|
86
|
+
assert profile[:frames].values.take(2).map { |f|
|
87
|
+
f[:name].include? "StackProfTest#math"
|
88
|
+
}.any?
|
89
|
+
else
|
90
|
+
frame = profile[:frames].values.first
|
91
|
+
assert_includes frame[:name], "StackProfTest#math"
|
92
|
+
end
|
84
93
|
end
|
85
94
|
|
86
95
|
def test_walltime
|
@@ -121,19 +130,38 @@ class StackProfTest < MiniTest::Test
|
|
121
130
|
end
|
122
131
|
|
123
132
|
def test_raw
|
133
|
+
before_monotonic = Process.clock_gettime(Process::CLOCK_MONOTONIC, :microsecond)
|
134
|
+
|
124
135
|
profile = StackProf.run(mode: :custom, raw: true) do
|
125
136
|
10.times do
|
126
137
|
StackProf.sample
|
138
|
+
sleep 0.0001
|
127
139
|
end
|
128
140
|
end
|
129
141
|
|
142
|
+
after_monotonic = Process.clock_gettime(Process::CLOCK_MONOTONIC, :microsecond)
|
143
|
+
|
130
144
|
raw = profile[:raw]
|
131
145
|
assert_equal 10, raw[-1]
|
132
146
|
assert_equal raw[0] + 2, raw.size
|
133
147
|
|
134
148
|
offset = RUBY_VERSION >= '3' ? -3 : -2
|
135
149
|
assert_includes profile[:frames][raw[offset]][:name], 'StackProfTest#test_raw'
|
150
|
+
|
151
|
+
assert_equal 10, profile[:raw_sample_timestamps].size
|
152
|
+
profile[:raw_sample_timestamps].each_cons(2) do |t1, t2|
|
153
|
+
assert_operator t1, :>, before_monotonic
|
154
|
+
assert_operator t2, :>=, t1
|
155
|
+
assert_operator t2, :<, after_monotonic
|
156
|
+
end
|
157
|
+
|
136
158
|
assert_equal 10, profile[:raw_timestamp_deltas].size
|
159
|
+
total_duration = after_monotonic - before_monotonic
|
160
|
+
assert_operator profile[:raw_timestamp_deltas].inject(&:+), :<, total_duration
|
161
|
+
|
162
|
+
profile[:raw_timestamp_deltas].each do |delta|
|
163
|
+
assert_operator delta, :>, 0
|
164
|
+
end
|
137
165
|
end
|
138
166
|
|
139
167
|
def test_metadata
|
@@ -205,7 +233,6 @@ class StackProfTest < MiniTest::Test
|
|
205
233
|
end
|
206
234
|
end
|
207
235
|
|
208
|
-
raw = profile[:raw]
|
209
236
|
gc_frame = profile[:frames].values.find{ |f| f[:name] == "(garbage collection)" }
|
210
237
|
marking_frame = profile[:frames].values.find{ |f| f[:name] == "(marking)" }
|
211
238
|
sweeping_frame = profile[:frames].values.find{ |f| f[:name] == "(sweeping)" }
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: stackprof
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.20
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Aman Gupta
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-07-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake-compiler
|
@@ -82,6 +82,8 @@ files:
|
|
82
82
|
- lib/stackprof/report.rb
|
83
83
|
- sample.rb
|
84
84
|
- stackprof.gemspec
|
85
|
+
- test/fixtures/profile.dump
|
86
|
+
- test/fixtures/profile.json
|
85
87
|
- test/test_middleware.rb
|
86
88
|
- test/test_report.rb
|
87
89
|
- test/test_stackprof.rb
|
@@ -94,10 +96,10 @@ licenses:
|
|
94
96
|
- MIT
|
95
97
|
metadata:
|
96
98
|
bug_tracker_uri: https://github.com/tmm1/stackprof/issues
|
97
|
-
changelog_uri: https://github.com/tmm1/stackprof/blob/v0.2.
|
98
|
-
documentation_uri: https://www.rubydoc.info/gems/stackprof/0.2.
|
99
|
-
source_code_uri: https://github.com/tmm1/stackprof/tree/v0.2.
|
100
|
-
post_install_message:
|
99
|
+
changelog_uri: https://github.com/tmm1/stackprof/blob/v0.2.20/CHANGELOG.md
|
100
|
+
documentation_uri: https://www.rubydoc.info/gems/stackprof/0.2.20
|
101
|
+
source_code_uri: https://github.com/tmm1/stackprof/tree/v0.2.20
|
102
|
+
post_install_message:
|
101
103
|
rdoc_options: []
|
102
104
|
require_paths:
|
103
105
|
- lib
|
@@ -112,8 +114,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
112
114
|
- !ruby/object:Gem::Version
|
113
115
|
version: '0'
|
114
116
|
requirements: []
|
115
|
-
rubygems_version: 3.1
|
116
|
-
signing_key:
|
117
|
+
rubygems_version: 3.0.3.1
|
118
|
+
signing_key:
|
117
119
|
specification_version: 4
|
118
120
|
summary: sampling callstack-profiler for ruby 2.2+
|
119
121
|
test_files: []
|