stackprof 0.2.14 → 0.2.18
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +43 -0
- data/.gitignore +1 -0
- data/CHANGELOG.md +13 -6
- data/README.md +57 -51
- data/Rakefile +11 -25
- data/ext/stackprof/stackprof.c +206 -70
- data/lib/stackprof/report.rb +28 -23
- data/lib/stackprof.rb +1 -1
- data/stackprof.gemspec +8 -1
- data/test/test_stackprof.rb +65 -11
- data/vendor/FlameGraph/flamegraph.pl +751 -85
- metadata +9 -7
- data/.travis.yml +0 -21
- data/Dockerfile +0 -21
- data/Gemfile.lock +0 -27
data/ext/stackprof/stackprof.c
CHANGED
@@ -7,20 +7,32 @@
|
|
7
7
|
**********************************************************************/
|
8
8
|
|
9
9
|
#include <ruby/ruby.h>
|
10
|
+
#include <ruby/version.h>
|
10
11
|
#include <ruby/debug.h>
|
11
12
|
#include <ruby/st.h>
|
12
13
|
#include <ruby/io.h>
|
13
14
|
#include <ruby/intern.h>
|
14
15
|
#include <signal.h>
|
15
16
|
#include <sys/time.h>
|
17
|
+
#include <time.h>
|
16
18
|
#include <pthread.h>
|
17
19
|
|
18
20
|
#define BUF_SIZE 2048
|
21
|
+
#define MICROSECONDS_IN_SECOND 1000000
|
22
|
+
#define NANOSECONDS_IN_SECOND 1000000000
|
19
23
|
|
20
24
|
#define FAKE_FRAME_GC INT2FIX(0)
|
21
25
|
#define FAKE_FRAME_MARK INT2FIX(1)
|
22
26
|
#define FAKE_FRAME_SWEEP INT2FIX(2)
|
23
27
|
|
28
|
+
/*
|
29
|
+
* As of Ruby 3.0, it should be safe to read stack frames at any time
|
30
|
+
* See https://github.com/ruby/ruby/commit/0e276dc458f94d9d79a0f7c7669bde84abe80f21
|
31
|
+
*/
|
32
|
+
#if RUBY_API_VERSION_MAJOR < 3
|
33
|
+
#define USE_POSTPONED_JOB
|
34
|
+
#endif
|
35
|
+
|
24
36
|
static const char *fake_frame_cstrs[] = {
|
25
37
|
"(garbage collection)",
|
26
38
|
"(marking)",
|
@@ -29,6 +41,47 @@ static const char *fake_frame_cstrs[] = {
|
|
29
41
|
|
30
42
|
#define TOTAL_FAKE_FRAMES (sizeof(fake_frame_cstrs) / sizeof(char *))
|
31
43
|
|
44
|
+
#ifdef _POSIX_MONOTONIC_CLOCK
|
45
|
+
#define timestamp_t timespec
|
46
|
+
typedef struct timestamp_t timestamp_t;
|
47
|
+
|
48
|
+
static void capture_timestamp(timestamp_t *ts) {
|
49
|
+
clock_gettime(CLOCK_MONOTONIC, ts);
|
50
|
+
}
|
51
|
+
|
52
|
+
static int64_t delta_usec(timestamp_t *start, timestamp_t *end) {
|
53
|
+
int64_t result = MICROSECONDS_IN_SECOND * (end->tv_sec - start->tv_sec);
|
54
|
+
if (end->tv_nsec < start->tv_nsec) {
|
55
|
+
result -= MICROSECONDS_IN_SECOND;
|
56
|
+
result += (NANOSECONDS_IN_SECOND + end->tv_nsec - start->tv_nsec) / 1000;
|
57
|
+
} else {
|
58
|
+
result += (end->tv_nsec - start->tv_nsec) / 1000;
|
59
|
+
}
|
60
|
+
return result;
|
61
|
+
}
|
62
|
+
|
63
|
+
static uint64_t timestamp_usec(timestamp_t *ts) {
|
64
|
+
return (MICROSECONDS_IN_SECOND * ts->tv_sec) + (ts->tv_nsec / 1000);
|
65
|
+
}
|
66
|
+
#else
|
67
|
+
#define timestamp_t timeval
|
68
|
+
typedef struct timestamp_t timestamp_t;
|
69
|
+
|
70
|
+
static void capture_timestamp(timestamp_t *ts) {
|
71
|
+
gettimeofday(ts, NULL);
|
72
|
+
}
|
73
|
+
|
74
|
+
static int64_t delta_usec(timestamp_t *start, timestamp_t *end) {
|
75
|
+
struct timeval diff;
|
76
|
+
timersub(end, start, &diff);
|
77
|
+
return (MICROSECONDS_IN_SECOND * diff.tv_sec) + diff.tv_usec;
|
78
|
+
}
|
79
|
+
|
80
|
+
static uint64_t timestamp_usec(timestamp_t *ts) {
|
81
|
+
return (MICROSECONDS_IN_SECOND * ts.tv_sec) + diff.tv_usec
|
82
|
+
}
|
83
|
+
#endif
|
84
|
+
|
32
85
|
typedef struct {
|
33
86
|
size_t total_samples;
|
34
87
|
size_t caller_samples;
|
@@ -37,6 +90,11 @@ typedef struct {
|
|
37
90
|
st_table *lines;
|
38
91
|
} frame_data_t;
|
39
92
|
|
93
|
+
typedef struct {
|
94
|
+
uint64_t timestamp_usec;
|
95
|
+
int64_t delta_usec;
|
96
|
+
} sample_time_t;
|
97
|
+
|
40
98
|
static struct {
|
41
99
|
int running;
|
42
100
|
int raw;
|
@@ -46,16 +104,17 @@ static struct {
|
|
46
104
|
VALUE interval;
|
47
105
|
VALUE out;
|
48
106
|
VALUE metadata;
|
107
|
+
int ignore_gc;
|
49
108
|
|
50
109
|
VALUE *raw_samples;
|
51
110
|
size_t raw_samples_len;
|
52
111
|
size_t raw_samples_capa;
|
53
112
|
size_t raw_sample_index;
|
54
113
|
|
55
|
-
struct
|
56
|
-
|
57
|
-
size_t
|
58
|
-
size_t
|
114
|
+
struct timestamp_t last_sample_at;
|
115
|
+
sample_time_t *raw_sample_times;
|
116
|
+
size_t raw_sample_times_len;
|
117
|
+
size_t raw_sample_times_capa;
|
59
118
|
|
60
119
|
size_t overall_signals;
|
61
120
|
size_t overall_samples;
|
@@ -67,14 +126,17 @@ static struct {
|
|
67
126
|
|
68
127
|
VALUE fake_frame_names[TOTAL_FAKE_FRAMES];
|
69
128
|
VALUE empty_string;
|
129
|
+
|
130
|
+
int buffer_count;
|
131
|
+
sample_time_t buffer_time;
|
70
132
|
VALUE frames_buffer[BUF_SIZE];
|
71
133
|
int lines_buffer[BUF_SIZE];
|
72
134
|
} _stackprof;
|
73
135
|
|
74
136
|
static VALUE sym_object, sym_wall, sym_cpu, sym_custom, sym_name, sym_file, sym_line;
|
75
137
|
static VALUE sym_samples, sym_total_samples, sym_missed_samples, sym_edges, sym_lines;
|
76
|
-
static VALUE sym_version, sym_mode, sym_interval, sym_raw, sym_metadata, sym_frames,
|
77
|
-
static VALUE sym_state, sym_marking, sym_sweeping;
|
138
|
+
static VALUE sym_version, sym_mode, sym_interval, sym_raw, sym_metadata, sym_frames, sym_ignore_gc, sym_out;
|
139
|
+
static VALUE sym_aggregate, sym_raw_sample_timestamps, sym_raw_timestamp_deltas, sym_state, sym_marking, sym_sweeping;
|
78
140
|
static VALUE sym_gc_samples, objtracer;
|
79
141
|
static VALUE gc_hook;
|
80
142
|
static VALUE rb_mStackProf;
|
@@ -88,6 +150,7 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
88
150
|
struct sigaction sa;
|
89
151
|
struct itimerval timer;
|
90
152
|
VALUE opts = Qnil, mode = Qnil, interval = Qnil, metadata = rb_hash_new(), out = Qfalse;
|
153
|
+
int ignore_gc = 0;
|
91
154
|
int raw = 0, aggregate = 1;
|
92
155
|
|
93
156
|
if (_stackprof.running)
|
@@ -99,6 +162,9 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
99
162
|
mode = rb_hash_aref(opts, sym_mode);
|
100
163
|
interval = rb_hash_aref(opts, sym_interval);
|
101
164
|
out = rb_hash_aref(opts, sym_out);
|
165
|
+
if (RTEST(rb_hash_aref(opts, sym_ignore_gc))) {
|
166
|
+
ignore_gc = 1;
|
167
|
+
}
|
102
168
|
|
103
169
|
VALUE metadata_val = rb_hash_aref(opts, sym_metadata);
|
104
170
|
if (RTEST(metadata_val)) {
|
@@ -115,6 +181,10 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
115
181
|
}
|
116
182
|
if (!RTEST(mode)) mode = sym_wall;
|
117
183
|
|
184
|
+
if (!NIL_P(interval) && (NUM2INT(interval) < 1 || NUM2INT(interval) >= MICROSECONDS_IN_SECOND)) {
|
185
|
+
rb_raise(rb_eArgError, "interval is a number of microseconds between 1 and 1 million");
|
186
|
+
}
|
187
|
+
|
118
188
|
if (!_stackprof.frames) {
|
119
189
|
_stackprof.frames = st_init_numtable();
|
120
190
|
_stackprof.overall_signals = 0;
|
@@ -151,11 +221,12 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
151
221
|
_stackprof.aggregate = aggregate;
|
152
222
|
_stackprof.mode = mode;
|
153
223
|
_stackprof.interval = interval;
|
224
|
+
_stackprof.ignore_gc = ignore_gc;
|
154
225
|
_stackprof.metadata = metadata;
|
155
226
|
_stackprof.out = out;
|
156
227
|
|
157
228
|
if (raw) {
|
158
|
-
|
229
|
+
capture_timestamp(&_stackprof.last_sample_at);
|
159
230
|
}
|
160
231
|
|
161
232
|
return Qtrue;
|
@@ -190,13 +261,19 @@ stackprof_stop(VALUE self)
|
|
190
261
|
return Qtrue;
|
191
262
|
}
|
192
263
|
|
264
|
+
#if SIZEOF_VOIDP == SIZEOF_LONG
|
265
|
+
# define PTR2NUM(x) (LONG2NUM((long)(x)))
|
266
|
+
#else
|
267
|
+
# define PTR2NUM(x) (LL2NUM((LONG_LONG)(x)))
|
268
|
+
#endif
|
269
|
+
|
193
270
|
static int
|
194
271
|
frame_edges_i(st_data_t key, st_data_t val, st_data_t arg)
|
195
272
|
{
|
196
273
|
VALUE edges = (VALUE)arg;
|
197
274
|
|
198
275
|
intptr_t weight = (intptr_t)val;
|
199
|
-
rb_hash_aset(edges,
|
276
|
+
rb_hash_aset(edges, PTR2NUM(key), INT2FIX(weight));
|
200
277
|
return ST_CONTINUE;
|
201
278
|
}
|
202
279
|
|
@@ -223,7 +300,7 @@ frame_i(st_data_t key, st_data_t val, st_data_t arg)
|
|
223
300
|
VALUE name, file, edges, lines;
|
224
301
|
VALUE line;
|
225
302
|
|
226
|
-
rb_hash_aset(results,
|
303
|
+
rb_hash_aset(results, PTR2NUM(frame), details);
|
227
304
|
|
228
305
|
if (FIXNUM_P(frame)) {
|
229
306
|
name = _stackprof.fake_frame_names[FIX2INT(frame)];
|
@@ -284,6 +361,8 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
284
361
|
rb_hash_aset(results, sym_missed_samples, SIZET2NUM(_stackprof.overall_signals - _stackprof.overall_samples));
|
285
362
|
rb_hash_aset(results, sym_metadata, _stackprof.metadata);
|
286
363
|
|
364
|
+
_stackprof.metadata = Qnil;
|
365
|
+
|
287
366
|
frames = rb_hash_new();
|
288
367
|
rb_hash_aset(results, sym_frames, frames);
|
289
368
|
st_foreach(_stackprof.frames, frame_i, (st_data_t)frames);
|
@@ -293,7 +372,7 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
293
372
|
|
294
373
|
if (_stackprof.raw && _stackprof.raw_samples_len) {
|
295
374
|
size_t len, n, o;
|
296
|
-
VALUE raw_timestamp_deltas;
|
375
|
+
VALUE raw_sample_timestamps, raw_timestamp_deltas;
|
297
376
|
VALUE raw_samples = rb_ary_new_capa(_stackprof.raw_samples_len);
|
298
377
|
|
299
378
|
for (n = 0; n < _stackprof.raw_samples_len; n++) {
|
@@ -301,7 +380,7 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
301
380
|
rb_ary_push(raw_samples, SIZET2NUM(len));
|
302
381
|
|
303
382
|
for (o = 0, n++; o < len; n++, o++)
|
304
|
-
rb_ary_push(raw_samples,
|
383
|
+
rb_ary_push(raw_samples, PTR2NUM(_stackprof.raw_samples[n]));
|
305
384
|
rb_ary_push(raw_samples, SIZET2NUM((size_t)_stackprof.raw_samples[n]));
|
306
385
|
}
|
307
386
|
|
@@ -313,17 +392,20 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
313
392
|
|
314
393
|
rb_hash_aset(results, sym_raw, raw_samples);
|
315
394
|
|
316
|
-
|
395
|
+
raw_sample_timestamps = rb_ary_new_capa(_stackprof.raw_sample_times_len);
|
396
|
+
raw_timestamp_deltas = rb_ary_new_capa(_stackprof.raw_sample_times_len);
|
317
397
|
|
318
|
-
for (n = 0; n < _stackprof.
|
319
|
-
rb_ary_push(
|
398
|
+
for (n = 0; n < _stackprof.raw_sample_times_len; n++) {
|
399
|
+
rb_ary_push(raw_sample_timestamps, ULL2NUM(_stackprof.raw_sample_times[n].timestamp_usec));
|
400
|
+
rb_ary_push(raw_timestamp_deltas, LL2NUM(_stackprof.raw_sample_times[n].delta_usec));
|
320
401
|
}
|
321
402
|
|
322
|
-
free(_stackprof.
|
323
|
-
_stackprof.
|
324
|
-
_stackprof.
|
325
|
-
_stackprof.
|
403
|
+
free(_stackprof.raw_sample_times);
|
404
|
+
_stackprof.raw_sample_times = NULL;
|
405
|
+
_stackprof.raw_sample_times_len = 0;
|
406
|
+
_stackprof.raw_sample_times_capa = 0;
|
326
407
|
|
408
|
+
rb_hash_aset(results, sym_raw_sample_timestamps, raw_sample_timestamps);
|
327
409
|
rb_hash_aset(results, sym_raw_timestamp_deltas, raw_timestamp_deltas);
|
328
410
|
|
329
411
|
_stackprof.raw = 0;
|
@@ -403,14 +485,14 @@ st_numtable_increment(st_table *table, st_data_t key, size_t increment)
|
|
403
485
|
}
|
404
486
|
|
405
487
|
void
|
406
|
-
stackprof_record_sample_for_stack(int num,
|
488
|
+
stackprof_record_sample_for_stack(int num, uint64_t sample_timestamp, int64_t timestamp_delta)
|
407
489
|
{
|
408
490
|
int i, n;
|
409
491
|
VALUE prev_frame = Qnil;
|
410
492
|
|
411
493
|
_stackprof.overall_samples++;
|
412
494
|
|
413
|
-
if (_stackprof.raw) {
|
495
|
+
if (_stackprof.raw && num > 0) {
|
414
496
|
int found = 0;
|
415
497
|
|
416
498
|
/* If there's no sample buffer allocated, then allocate one. The buffer
|
@@ -462,20 +544,23 @@ stackprof_record_sample_for_stack(int num, int timestamp_delta)
|
|
462
544
|
}
|
463
545
|
|
464
546
|
/* If there's no timestamp delta buffer, allocate one */
|
465
|
-
if (!_stackprof.
|
466
|
-
_stackprof.
|
467
|
-
_stackprof.
|
468
|
-
_stackprof.
|
547
|
+
if (!_stackprof.raw_sample_times) {
|
548
|
+
_stackprof.raw_sample_times_capa = 100;
|
549
|
+
_stackprof.raw_sample_times = malloc(sizeof(sample_time_t) * _stackprof.raw_sample_times_capa);
|
550
|
+
_stackprof.raw_sample_times_len = 0;
|
469
551
|
}
|
470
552
|
|
471
553
|
/* Double the buffer size if it's too small */
|
472
|
-
while (_stackprof.
|
473
|
-
_stackprof.
|
474
|
-
_stackprof.
|
554
|
+
while (_stackprof.raw_sample_times_capa <= _stackprof.raw_sample_times_len + 1) {
|
555
|
+
_stackprof.raw_sample_times_capa *= 2;
|
556
|
+
_stackprof.raw_sample_times = realloc(_stackprof.raw_sample_times, sizeof(sample_time_t) * _stackprof.raw_sample_times_capa);
|
475
557
|
}
|
476
558
|
|
477
|
-
/* Store the time delta (which is the amount of
|
478
|
-
_stackprof.
|
559
|
+
/* Store the time delta (which is the amount of microseconds between samples). */
|
560
|
+
_stackprof.raw_sample_times[_stackprof.raw_sample_times_len++] = (sample_time_t) {
|
561
|
+
.timestamp_usec = sample_timestamp,
|
562
|
+
.delta_usec = timestamp_delta,
|
563
|
+
};
|
479
564
|
}
|
480
565
|
|
481
566
|
for (i = 0; i < num; i++) {
|
@@ -508,48 +593,59 @@ stackprof_record_sample_for_stack(int num, int timestamp_delta)
|
|
508
593
|
}
|
509
594
|
|
510
595
|
if (_stackprof.raw) {
|
511
|
-
|
596
|
+
capture_timestamp(&_stackprof.last_sample_at);
|
512
597
|
}
|
513
598
|
}
|
514
599
|
|
600
|
+
// buffer the current profile frames
|
601
|
+
// This must be async-signal-safe
|
602
|
+
// Returns immediately if another set of frames are already in the buffer
|
515
603
|
void
|
516
|
-
|
604
|
+
stackprof_buffer_sample(void)
|
517
605
|
{
|
518
|
-
|
606
|
+
if (_stackprof.buffer_count > 0) {
|
607
|
+
// Another sample is already pending
|
608
|
+
return;
|
609
|
+
}
|
610
|
+
|
611
|
+
uint64_t start_timestamp = 0;
|
612
|
+
int64_t timestamp_delta = 0;
|
519
613
|
int num;
|
520
614
|
if (_stackprof.raw) {
|
521
|
-
struct
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
timestamp_delta = (1000 * diff.tv_sec) + diff.tv_usec;
|
615
|
+
struct timestamp_t t;
|
616
|
+
capture_timestamp(&t);
|
617
|
+
start_timestamp = timestamp_usec(&t);
|
618
|
+
timestamp_delta = delta_usec(&t, &_stackprof.last_sample_at);
|
526
619
|
}
|
620
|
+
|
527
621
|
num = rb_profile_frames(0, sizeof(_stackprof.frames_buffer) / sizeof(VALUE), _stackprof.frames_buffer, _stackprof.lines_buffer);
|
528
|
-
|
622
|
+
|
623
|
+
_stackprof.buffer_count = num;
|
624
|
+
_stackprof.buffer_time.timestamp_usec = start_timestamp;
|
625
|
+
_stackprof.buffer_time.delta_usec = timestamp_delta;
|
529
626
|
}
|
530
627
|
|
531
628
|
void
|
532
|
-
stackprof_record_gc_samples()
|
629
|
+
stackprof_record_gc_samples(void)
|
533
630
|
{
|
534
|
-
|
535
|
-
|
631
|
+
int64_t delta_to_first_unrecorded_gc_sample = 0;
|
632
|
+
uint64_t start_timestamp = 0;
|
633
|
+
size_t i;
|
536
634
|
if (_stackprof.raw) {
|
537
|
-
struct
|
538
|
-
|
539
|
-
|
540
|
-
timersub(&t, &_stackprof.last_sample_at, &diff);
|
635
|
+
struct timestamp_t t;
|
636
|
+
capture_timestamp(&t);
|
637
|
+
start_timestamp = timestamp_usec(&t);
|
541
638
|
|
542
639
|
// We don't know when the GC samples were actually marked, so let's
|
543
640
|
// assume that they were marked at a perfectly regular interval.
|
544
|
-
delta_to_first_unrecorded_gc_sample = (
|
641
|
+
delta_to_first_unrecorded_gc_sample = delta_usec(&t, &_stackprof.last_sample_at) - (_stackprof.unrecorded_gc_samples - 1) * NUM2LONG(_stackprof.interval);
|
545
642
|
if (delta_to_first_unrecorded_gc_sample < 0) {
|
546
643
|
delta_to_first_unrecorded_gc_sample = 0;
|
547
644
|
}
|
548
645
|
}
|
549
646
|
|
550
|
-
|
551
647
|
for (i = 0; i < _stackprof.unrecorded_gc_samples; i++) {
|
552
|
-
|
648
|
+
int64_t timestamp_delta = i == 0 ? delta_to_first_unrecorded_gc_sample : NUM2LONG(_stackprof.interval);
|
553
649
|
|
554
650
|
if (_stackprof.unrecorded_gc_marking_samples) {
|
555
651
|
_stackprof.frames_buffer[0] = FAKE_FRAME_MARK;
|
@@ -558,7 +654,7 @@ stackprof_record_gc_samples()
|
|
558
654
|
_stackprof.lines_buffer[1] = 0;
|
559
655
|
_stackprof.unrecorded_gc_marking_samples--;
|
560
656
|
|
561
|
-
stackprof_record_sample_for_stack(2, timestamp_delta);
|
657
|
+
stackprof_record_sample_for_stack(2, start_timestamp, timestamp_delta);
|
562
658
|
} else if (_stackprof.unrecorded_gc_sweeping_samples) {
|
563
659
|
_stackprof.frames_buffer[0] = FAKE_FRAME_SWEEP;
|
564
660
|
_stackprof.lines_buffer[0] = 0;
|
@@ -567,11 +663,11 @@ stackprof_record_gc_samples()
|
|
567
663
|
|
568
664
|
_stackprof.unrecorded_gc_sweeping_samples--;
|
569
665
|
|
570
|
-
stackprof_record_sample_for_stack(2, timestamp_delta);
|
666
|
+
stackprof_record_sample_for_stack(2, start_timestamp, timestamp_delta);
|
571
667
|
} else {
|
572
668
|
_stackprof.frames_buffer[0] = FAKE_FRAME_GC;
|
573
669
|
_stackprof.lines_buffer[0] = 0;
|
574
|
-
stackprof_record_sample_for_stack(1, timestamp_delta);
|
670
|
+
stackprof_record_sample_for_stack(1, start_timestamp, timestamp_delta);
|
575
671
|
}
|
576
672
|
}
|
577
673
|
_stackprof.during_gc += _stackprof.unrecorded_gc_samples;
|
@@ -580,35 +676,61 @@ stackprof_record_gc_samples()
|
|
580
676
|
_stackprof.unrecorded_gc_sweeping_samples = 0;
|
581
677
|
}
|
582
678
|
|
679
|
+
// record the sample previously buffered by stackprof_buffer_sample
|
680
|
+
static void
|
681
|
+
stackprof_record_buffer(void)
|
682
|
+
{
|
683
|
+
stackprof_record_sample_for_stack(_stackprof.buffer_count, _stackprof.buffer_time.timestamp_usec, _stackprof.buffer_time.delta_usec);
|
684
|
+
|
685
|
+
// reset the buffer
|
686
|
+
_stackprof.buffer_count = 0;
|
687
|
+
}
|
688
|
+
|
583
689
|
static void
|
584
|
-
|
690
|
+
stackprof_sample_and_record(void)
|
691
|
+
{
|
692
|
+
stackprof_buffer_sample();
|
693
|
+
stackprof_record_buffer();
|
694
|
+
}
|
695
|
+
|
696
|
+
static void
|
697
|
+
stackprof_job_record_gc(void *data)
|
585
698
|
{
|
586
|
-
static int in_signal_handler = 0;
|
587
|
-
if (in_signal_handler) return;
|
588
699
|
if (!_stackprof.running) return;
|
589
700
|
|
590
|
-
in_signal_handler++;
|
591
701
|
stackprof_record_gc_samples();
|
592
|
-
in_signal_handler--;
|
593
702
|
}
|
594
703
|
|
704
|
+
#ifdef USE_POSTPONED_JOB
|
595
705
|
static void
|
596
|
-
|
706
|
+
stackprof_job_sample_and_record(void *data)
|
597
707
|
{
|
598
|
-
static int in_signal_handler = 0;
|
599
|
-
if (in_signal_handler) return;
|
600
708
|
if (!_stackprof.running) return;
|
601
709
|
|
602
|
-
|
603
|
-
|
604
|
-
|
710
|
+
stackprof_sample_and_record();
|
711
|
+
}
|
712
|
+
#endif
|
713
|
+
|
714
|
+
static void
|
715
|
+
stackprof_job_record_buffer(void *data)
|
716
|
+
{
|
717
|
+
if (!_stackprof.running) return;
|
718
|
+
|
719
|
+
stackprof_record_buffer();
|
605
720
|
}
|
606
721
|
|
607
722
|
static void
|
608
723
|
stackprof_signal_handler(int sig, siginfo_t *sinfo, void *ucontext)
|
609
724
|
{
|
725
|
+
static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
|
726
|
+
|
610
727
|
_stackprof.overall_signals++;
|
611
|
-
|
728
|
+
|
729
|
+
if (!_stackprof.running) return;
|
730
|
+
if (!ruby_native_thread_p()) return;
|
731
|
+
if (pthread_mutex_trylock(&lock)) return;
|
732
|
+
|
733
|
+
if (!_stackprof.ignore_gc && rb_during_gc()) {
|
612
734
|
VALUE mode = rb_gc_latest_gc_info(sym_state);
|
613
735
|
if (mode == sym_marking) {
|
614
736
|
_stackprof.unrecorded_gc_marking_samples++;
|
@@ -616,10 +738,19 @@ stackprof_signal_handler(int sig, siginfo_t *sinfo, void *ucontext)
|
|
616
738
|
_stackprof.unrecorded_gc_sweeping_samples++;
|
617
739
|
}
|
618
740
|
_stackprof.unrecorded_gc_samples++;
|
619
|
-
rb_postponed_job_register_one(0,
|
741
|
+
rb_postponed_job_register_one(0, stackprof_job_record_gc, (void*)0);
|
620
742
|
} else {
|
621
|
-
|
743
|
+
#ifdef USE_POSTPONED_JOB
|
744
|
+
rb_postponed_job_register_one(0, stackprof_job_sample_and_record, (void*)0);
|
745
|
+
#else
|
746
|
+
// Buffer a sample immediately, if an existing sample exists this will
|
747
|
+
// return immediately
|
748
|
+
stackprof_buffer_sample();
|
749
|
+
// Enqueue a job to record the sample
|
750
|
+
rb_postponed_job_register_one(0, stackprof_job_record_buffer, (void*)0);
|
751
|
+
#endif
|
622
752
|
}
|
753
|
+
pthread_mutex_unlock(&lock);
|
623
754
|
}
|
624
755
|
|
625
756
|
static void
|
@@ -628,7 +759,7 @@ stackprof_newobj_handler(VALUE tpval, void *data)
|
|
628
759
|
_stackprof.overall_signals++;
|
629
760
|
if (RTEST(_stackprof.interval) && _stackprof.overall_signals % NUM2LONG(_stackprof.interval))
|
630
761
|
return;
|
631
|
-
|
762
|
+
stackprof_sample_and_record();
|
632
763
|
}
|
633
764
|
|
634
765
|
static VALUE
|
@@ -638,7 +769,7 @@ stackprof_sample(VALUE self)
|
|
638
769
|
return Qfalse;
|
639
770
|
|
640
771
|
_stackprof.overall_signals++;
|
641
|
-
|
772
|
+
stackprof_sample_and_record();
|
642
773
|
return Qtrue;
|
643
774
|
}
|
644
775
|
|
@@ -653,6 +784,9 @@ frame_mark_i(st_data_t key, st_data_t val, st_data_t arg)
|
|
653
784
|
static void
|
654
785
|
stackprof_gc_mark(void *data)
|
655
786
|
{
|
787
|
+
if (RTEST(_stackprof.metadata))
|
788
|
+
rb_gc_mark(_stackprof.metadata);
|
789
|
+
|
656
790
|
if (RTEST(_stackprof.out))
|
657
791
|
rb_gc_mark(_stackprof.out);
|
658
792
|
|
@@ -714,9 +848,11 @@ Init_stackprof(void)
|
|
714
848
|
S(mode);
|
715
849
|
S(interval);
|
716
850
|
S(raw);
|
851
|
+
S(raw_sample_timestamps);
|
717
852
|
S(raw_timestamp_deltas);
|
718
853
|
S(out);
|
719
854
|
S(metadata);
|
855
|
+
S(ignore_gc);
|
720
856
|
S(frames);
|
721
857
|
S(aggregate);
|
722
858
|
S(state);
|
@@ -735,9 +871,9 @@ Init_stackprof(void)
|
|
735
871
|
_stackprof.raw_samples_capa = 0;
|
736
872
|
_stackprof.raw_sample_index = 0;
|
737
873
|
|
738
|
-
_stackprof.
|
739
|
-
_stackprof.
|
740
|
-
_stackprof.
|
874
|
+
_stackprof.raw_sample_times = NULL;
|
875
|
+
_stackprof.raw_sample_times_len = 0;
|
876
|
+
_stackprof.raw_sample_times_capa = 0;
|
741
877
|
|
742
878
|
_stackprof.empty_string = rb_str_new_cstr("");
|
743
879
|
rb_global_variable(&_stackprof.empty_string);
|
data/lib/stackprof/report.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'pp'
|
2
4
|
require 'digest/md5'
|
3
5
|
|
@@ -38,7 +40,7 @@ module StackProf
|
|
38
40
|
end
|
39
41
|
|
40
42
|
def max_samples
|
41
|
-
@data[:max_samples] ||= frames.max_by{ |
|
43
|
+
@data[:max_samples] ||= @data[:frames].values.max_by{ |frame| frame[:samples] }[:samples]
|
42
44
|
end
|
43
45
|
|
44
46
|
def files
|
@@ -96,15 +98,7 @@ module StackProf
|
|
96
98
|
def print_flamegraph(f, skip_common, alphabetical=false)
|
97
99
|
raise "profile does not include raw samples (add `raw: true` to collecting StackProf.run)" unless raw = data[:raw]
|
98
100
|
|
99
|
-
stacks =
|
100
|
-
max_x = 0
|
101
|
-
max_y = 0
|
102
|
-
while len = raw.shift
|
103
|
-
max_y = len if len > max_y
|
104
|
-
stack = raw.slice!(0, len+1)
|
105
|
-
stacks << stack
|
106
|
-
max_x += stack.last
|
107
|
-
end
|
101
|
+
stacks, max_x, max_y = flamegraph_stacks(raw)
|
108
102
|
|
109
103
|
stacks.sort! if alphabetical
|
110
104
|
|
@@ -156,8 +150,26 @@ module StackProf
|
|
156
150
|
f.puts '])'
|
157
151
|
end
|
158
152
|
|
153
|
+
def flamegraph_stacks(raw)
|
154
|
+
stacks = []
|
155
|
+
max_x = 0
|
156
|
+
max_y = 0
|
157
|
+
idx = 0
|
158
|
+
|
159
|
+
while len = raw[idx]
|
160
|
+
idx += 1
|
161
|
+
max_y = len if len > max_y
|
162
|
+
stack = raw.slice(idx, len+1)
|
163
|
+
idx += len+1
|
164
|
+
stacks << stack
|
165
|
+
max_x += stack.last
|
166
|
+
end
|
167
|
+
|
168
|
+
return stacks, max_x, max_y
|
169
|
+
end
|
170
|
+
|
159
171
|
def flamegraph_row(f, x, y, weight, addr)
|
160
|
-
frame = frames[addr]
|
172
|
+
frame = @data[:frames][addr]
|
161
173
|
f.print ',' if @rows_started
|
162
174
|
@rows_started = true
|
163
175
|
f.puts %{{"x":#{x},"y":#{y},"width":#{weight},"frame_id":#{addr},"frame":#{frame[:name].dump},"file":#{frame[:file].dump}}}
|
@@ -178,7 +190,7 @@ module StackProf
|
|
178
190
|
weight += stack.last
|
179
191
|
end
|
180
192
|
else
|
181
|
-
frame = frames[val]
|
193
|
+
frame = @data[:frames][val]
|
182
194
|
child_name = "#{ frame[:name] } : #{ frame[:file] }"
|
183
195
|
child_data = convert_to_d3_flame_graph_format(child_name, child_stacks, depth + 1)
|
184
196
|
weight += child_data["value"]
|
@@ -196,15 +208,7 @@ module StackProf
|
|
196
208
|
def print_d3_flamegraph(f=STDOUT, skip_common=true)
|
197
209
|
raise "profile does not include raw samples (add `raw: true` to collecting StackProf.run)" unless raw = data[:raw]
|
198
210
|
|
199
|
-
stacks =
|
200
|
-
max_x = 0
|
201
|
-
max_y = 0
|
202
|
-
while len = raw.shift
|
203
|
-
max_y = len if len > max_y
|
204
|
-
stack = raw.slice!(0, len+1)
|
205
|
-
stacks << stack
|
206
|
-
max_x += stack.last
|
207
|
-
end
|
211
|
+
stacks, * = flamegraph_stacks(raw)
|
208
212
|
|
209
213
|
# d3-flame-grpah supports only alphabetical flamegraph
|
210
214
|
stacks.sort!
|
@@ -410,7 +414,7 @@ module StackProf
|
|
410
414
|
call, total = info.values_at(:samples, :total_samples)
|
411
415
|
break if total < node_minimum || (limit && index >= limit)
|
412
416
|
|
413
|
-
sample = ''
|
417
|
+
sample = ''.dup
|
414
418
|
sample << "#{call} (%2.1f%%)\\rof " % (call*100.0/overall_samples) if call < total
|
415
419
|
sample << "#{total} (%2.1f%%)\\r" % (total*100.0/overall_samples)
|
416
420
|
fontsize = (1.0 * call / max_samples) * 28 + 10
|
@@ -654,7 +658,8 @@ module StackProf
|
|
654
658
|
end
|
655
659
|
end
|
656
660
|
end
|
661
|
+
rescue SystemCallError
|
662
|
+
f.puts " SOURCE UNAVAILABLE"
|
657
663
|
end
|
658
|
-
|
659
664
|
end
|
660
665
|
end
|
data/lib/stackprof.rb
CHANGED
data/stackprof.gemspec
CHANGED
@@ -1,11 +1,18 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'stackprof'
|
3
|
-
s.version = '0.2.
|
3
|
+
s.version = '0.2.18'
|
4
4
|
s.homepage = 'http://github.com/tmm1/stackprof'
|
5
5
|
|
6
6
|
s.authors = 'Aman Gupta'
|
7
7
|
s.email = 'aman@tmm1.net'
|
8
8
|
|
9
|
+
s.metadata = {
|
10
|
+
'bug_tracker_uri' => 'https://github.com/tmm1/stackprof/issues',
|
11
|
+
'changelog_uri' => "https://github.com/tmm1/stackprof/blob/v#{s.version}/CHANGELOG.md",
|
12
|
+
'documentation_uri' => "https://www.rubydoc.info/gems/stackprof/#{s.version}",
|
13
|
+
'source_code_uri' => "https://github.com/tmm1/stackprof/tree/v#{s.version}"
|
14
|
+
}
|
15
|
+
|
9
16
|
s.files = `git ls-files`.split("\n")
|
10
17
|
s.extensions = 'ext/stackprof/extconf.rb'
|
11
18
|
|