stackprof 0.2.11 → 0.2.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/workflows/ci.yml +43 -0
- data/.gitignore +2 -0
- data/CHANGELOG.md +18 -0
- data/README.md +87 -68
- data/Rakefile +21 -25
- data/bin/stackprof +115 -71
- data/ext/stackprof/extconf.rb +6 -0
- data/ext/stackprof/stackprof.c +352 -90
- data/lib/stackprof/autorun.rb +19 -0
- data/lib/stackprof/middleware.rb +23 -7
- data/lib/stackprof/report.rb +282 -18
- data/lib/stackprof/truffleruby.rb +37 -0
- data/lib/stackprof.rb +18 -1
- data/stackprof.gemspec +11 -2
- data/test/fixtures/profile.dump +1 -0
- data/test/fixtures/profile.json +1 -0
- data/test/test_middleware.rb +13 -7
- data/test/test_report.rb +24 -0
- data/test/test_stackprof.rb +163 -14
- data/test/test_truffleruby.rb +18 -0
- data/vendor/FlameGraph/flamegraph.pl +751 -85
- metadata +17 -9
- data/.travis.yml +0 -8
- data/Gemfile.lock +0 -27
data/ext/stackprof/stackprof.c
CHANGED
@@ -7,24 +7,90 @@
|
|
7
7
|
**********************************************************************/
|
8
8
|
|
9
9
|
#include <ruby/ruby.h>
|
10
|
+
#include <ruby/version.h>
|
10
11
|
#include <ruby/debug.h>
|
11
12
|
#include <ruby/st.h>
|
12
13
|
#include <ruby/io.h>
|
13
14
|
#include <ruby/intern.h>
|
15
|
+
#include <ruby/vm.h>
|
14
16
|
#include <signal.h>
|
15
17
|
#include <sys/time.h>
|
18
|
+
#include <time.h>
|
16
19
|
#include <pthread.h>
|
17
20
|
|
18
21
|
#define BUF_SIZE 2048
|
22
|
+
#define MICROSECONDS_IN_SECOND 1000000
|
23
|
+
#define NANOSECONDS_IN_SECOND 1000000000
|
24
|
+
|
25
|
+
#define FAKE_FRAME_GC INT2FIX(0)
|
26
|
+
#define FAKE_FRAME_MARK INT2FIX(1)
|
27
|
+
#define FAKE_FRAME_SWEEP INT2FIX(2)
|
28
|
+
|
29
|
+
static const char *fake_frame_cstrs[] = {
|
30
|
+
"(garbage collection)",
|
31
|
+
"(marking)",
|
32
|
+
"(sweeping)",
|
33
|
+
};
|
34
|
+
|
35
|
+
static int stackprof_use_postponed_job = 1;
|
36
|
+
static int ruby_vm_running = 0;
|
37
|
+
|
38
|
+
#define TOTAL_FAKE_FRAMES (sizeof(fake_frame_cstrs) / sizeof(char *))
|
39
|
+
|
40
|
+
#ifdef _POSIX_MONOTONIC_CLOCK
|
41
|
+
#define timestamp_t timespec
|
42
|
+
typedef struct timestamp_t timestamp_t;
|
43
|
+
|
44
|
+
static void capture_timestamp(timestamp_t *ts) {
|
45
|
+
clock_gettime(CLOCK_MONOTONIC, ts);
|
46
|
+
}
|
47
|
+
|
48
|
+
static int64_t delta_usec(timestamp_t *start, timestamp_t *end) {
|
49
|
+
int64_t result = MICROSECONDS_IN_SECOND * (end->tv_sec - start->tv_sec);
|
50
|
+
if (end->tv_nsec < start->tv_nsec) {
|
51
|
+
result -= MICROSECONDS_IN_SECOND;
|
52
|
+
result += (NANOSECONDS_IN_SECOND + end->tv_nsec - start->tv_nsec) / 1000;
|
53
|
+
} else {
|
54
|
+
result += (end->tv_nsec - start->tv_nsec) / 1000;
|
55
|
+
}
|
56
|
+
return result;
|
57
|
+
}
|
58
|
+
|
59
|
+
static uint64_t timestamp_usec(timestamp_t *ts) {
|
60
|
+
return (MICROSECONDS_IN_SECOND * ts->tv_sec) + (ts->tv_nsec / 1000);
|
61
|
+
}
|
62
|
+
#else
|
63
|
+
#define timestamp_t timeval
|
64
|
+
typedef struct timestamp_t timestamp_t;
|
65
|
+
|
66
|
+
static void capture_timestamp(timestamp_t *ts) {
|
67
|
+
gettimeofday(ts, NULL);
|
68
|
+
}
|
69
|
+
|
70
|
+
static int64_t delta_usec(timestamp_t *start, timestamp_t *end) {
|
71
|
+
struct timeval diff;
|
72
|
+
timersub(end, start, &diff);
|
73
|
+
return (MICROSECONDS_IN_SECOND * diff.tv_sec) + diff.tv_usec;
|
74
|
+
}
|
75
|
+
|
76
|
+
static uint64_t timestamp_usec(timestamp_t *ts) {
|
77
|
+
return (MICROSECONDS_IN_SECOND * ts.tv_sec) + diff.tv_usec
|
78
|
+
}
|
79
|
+
#endif
|
19
80
|
|
20
81
|
typedef struct {
|
21
82
|
size_t total_samples;
|
22
83
|
size_t caller_samples;
|
23
|
-
|
84
|
+
size_t seen_at_sample_number;
|
24
85
|
st_table *edges;
|
25
86
|
st_table *lines;
|
26
87
|
} frame_data_t;
|
27
88
|
|
89
|
+
typedef struct {
|
90
|
+
uint64_t timestamp_usec;
|
91
|
+
int64_t delta_usec;
|
92
|
+
} sample_time_t;
|
93
|
+
|
28
94
|
static struct {
|
29
95
|
int running;
|
30
96
|
int raw;
|
@@ -33,33 +99,42 @@ static struct {
|
|
33
99
|
VALUE mode;
|
34
100
|
VALUE interval;
|
35
101
|
VALUE out;
|
102
|
+
VALUE metadata;
|
103
|
+
int ignore_gc;
|
36
104
|
|
37
105
|
VALUE *raw_samples;
|
38
106
|
size_t raw_samples_len;
|
39
107
|
size_t raw_samples_capa;
|
40
108
|
size_t raw_sample_index;
|
41
109
|
|
42
|
-
struct
|
43
|
-
|
44
|
-
size_t
|
45
|
-
size_t
|
110
|
+
struct timestamp_t last_sample_at;
|
111
|
+
sample_time_t *raw_sample_times;
|
112
|
+
size_t raw_sample_times_len;
|
113
|
+
size_t raw_sample_times_capa;
|
46
114
|
|
47
115
|
size_t overall_signals;
|
48
116
|
size_t overall_samples;
|
49
117
|
size_t during_gc;
|
50
118
|
size_t unrecorded_gc_samples;
|
119
|
+
size_t unrecorded_gc_marking_samples;
|
120
|
+
size_t unrecorded_gc_sweeping_samples;
|
51
121
|
st_table *frames;
|
52
122
|
|
53
|
-
VALUE
|
54
|
-
VALUE fake_gc_frame_name;
|
123
|
+
VALUE fake_frame_names[TOTAL_FAKE_FRAMES];
|
55
124
|
VALUE empty_string;
|
125
|
+
|
126
|
+
int buffer_count;
|
127
|
+
sample_time_t buffer_time;
|
56
128
|
VALUE frames_buffer[BUF_SIZE];
|
57
129
|
int lines_buffer[BUF_SIZE];
|
130
|
+
|
131
|
+
pthread_t target_thread;
|
58
132
|
} _stackprof;
|
59
133
|
|
60
134
|
static VALUE sym_object, sym_wall, sym_cpu, sym_custom, sym_name, sym_file, sym_line;
|
61
135
|
static VALUE sym_samples, sym_total_samples, sym_missed_samples, sym_edges, sym_lines;
|
62
|
-
static VALUE sym_version, sym_mode, sym_interval, sym_raw,
|
136
|
+
static VALUE sym_version, sym_mode, sym_interval, sym_raw, sym_metadata, sym_frames, sym_ignore_gc, sym_out;
|
137
|
+
static VALUE sym_aggregate, sym_raw_sample_timestamps, sym_raw_timestamp_deltas, sym_state, sym_marking, sym_sweeping;
|
63
138
|
static VALUE sym_gc_samples, objtracer;
|
64
139
|
static VALUE gc_hook;
|
65
140
|
static VALUE rb_mStackProf;
|
@@ -72,8 +147,10 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
72
147
|
{
|
73
148
|
struct sigaction sa;
|
74
149
|
struct itimerval timer;
|
75
|
-
VALUE opts = Qnil, mode = Qnil, interval = Qnil, out = Qfalse;
|
150
|
+
VALUE opts = Qnil, mode = Qnil, interval = Qnil, metadata = rb_hash_new(), out = Qfalse;
|
151
|
+
int ignore_gc = 0;
|
76
152
|
int raw = 0, aggregate = 1;
|
153
|
+
VALUE metadata_val;
|
77
154
|
|
78
155
|
if (_stackprof.running)
|
79
156
|
return Qfalse;
|
@@ -84,6 +161,17 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
84
161
|
mode = rb_hash_aref(opts, sym_mode);
|
85
162
|
interval = rb_hash_aref(opts, sym_interval);
|
86
163
|
out = rb_hash_aref(opts, sym_out);
|
164
|
+
if (RTEST(rb_hash_aref(opts, sym_ignore_gc))) {
|
165
|
+
ignore_gc = 1;
|
166
|
+
}
|
167
|
+
|
168
|
+
metadata_val = rb_hash_aref(opts, sym_metadata);
|
169
|
+
if (RTEST(metadata_val)) {
|
170
|
+
if (!RB_TYPE_P(metadata_val, T_HASH))
|
171
|
+
rb_raise(rb_eArgError, "metadata should be a hash");
|
172
|
+
|
173
|
+
metadata = metadata_val;
|
174
|
+
}
|
87
175
|
|
88
176
|
if (RTEST(rb_hash_aref(opts, sym_raw)))
|
89
177
|
raw = 1;
|
@@ -92,6 +180,10 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
92
180
|
}
|
93
181
|
if (!RTEST(mode)) mode = sym_wall;
|
94
182
|
|
183
|
+
if (!NIL_P(interval) && (NUM2INT(interval) < 1 || NUM2INT(interval) >= MICROSECONDS_IN_SECOND)) {
|
184
|
+
rb_raise(rb_eArgError, "interval is a number of microseconds between 1 and 1 million");
|
185
|
+
}
|
186
|
+
|
95
187
|
if (!_stackprof.frames) {
|
96
188
|
_stackprof.frames = st_init_numtable();
|
97
189
|
_stackprof.overall_signals = 0;
|
@@ -128,10 +220,13 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
128
220
|
_stackprof.aggregate = aggregate;
|
129
221
|
_stackprof.mode = mode;
|
130
222
|
_stackprof.interval = interval;
|
223
|
+
_stackprof.ignore_gc = ignore_gc;
|
224
|
+
_stackprof.metadata = metadata;
|
131
225
|
_stackprof.out = out;
|
226
|
+
_stackprof.target_thread = pthread_self();
|
132
227
|
|
133
228
|
if (raw) {
|
134
|
-
|
229
|
+
capture_timestamp(&_stackprof.last_sample_at);
|
135
230
|
}
|
136
231
|
|
137
232
|
return Qtrue;
|
@@ -166,13 +261,19 @@ stackprof_stop(VALUE self)
|
|
166
261
|
return Qtrue;
|
167
262
|
}
|
168
263
|
|
264
|
+
#if SIZEOF_VOIDP == SIZEOF_LONG
|
265
|
+
# define PTR2NUM(x) (LONG2NUM((long)(x)))
|
266
|
+
#else
|
267
|
+
# define PTR2NUM(x) (LL2NUM((LONG_LONG)(x)))
|
268
|
+
#endif
|
269
|
+
|
169
270
|
static int
|
170
271
|
frame_edges_i(st_data_t key, st_data_t val, st_data_t arg)
|
171
272
|
{
|
172
273
|
VALUE edges = (VALUE)arg;
|
173
274
|
|
174
275
|
intptr_t weight = (intptr_t)val;
|
175
|
-
rb_hash_aset(edges,
|
276
|
+
rb_hash_aset(edges, PTR2NUM(key), INT2FIX(weight));
|
176
277
|
return ST_CONTINUE;
|
177
278
|
}
|
178
279
|
|
@@ -199,10 +300,10 @@ frame_i(st_data_t key, st_data_t val, st_data_t arg)
|
|
199
300
|
VALUE name, file, edges, lines;
|
200
301
|
VALUE line;
|
201
302
|
|
202
|
-
rb_hash_aset(results,
|
303
|
+
rb_hash_aset(results, PTR2NUM(frame), details);
|
203
304
|
|
204
|
-
if (frame
|
205
|
-
name = _stackprof.
|
305
|
+
if (FIXNUM_P(frame)) {
|
306
|
+
name = _stackprof.fake_frame_names[FIX2INT(frame)];
|
206
307
|
file = _stackprof.empty_string;
|
207
308
|
line = INT2FIX(0);
|
208
309
|
} else {
|
@@ -258,6 +359,9 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
258
359
|
rb_hash_aset(results, sym_samples, SIZET2NUM(_stackprof.overall_samples));
|
259
360
|
rb_hash_aset(results, sym_gc_samples, SIZET2NUM(_stackprof.during_gc));
|
260
361
|
rb_hash_aset(results, sym_missed_samples, SIZET2NUM(_stackprof.overall_signals - _stackprof.overall_samples));
|
362
|
+
rb_hash_aset(results, sym_metadata, _stackprof.metadata);
|
363
|
+
|
364
|
+
_stackprof.metadata = Qnil;
|
261
365
|
|
262
366
|
frames = rb_hash_new();
|
263
367
|
rb_hash_aset(results, sym_frames, frames);
|
@@ -268,6 +372,7 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
268
372
|
|
269
373
|
if (_stackprof.raw && _stackprof.raw_samples_len) {
|
270
374
|
size_t len, n, o;
|
375
|
+
VALUE raw_sample_timestamps, raw_timestamp_deltas;
|
271
376
|
VALUE raw_samples = rb_ary_new_capa(_stackprof.raw_samples_len);
|
272
377
|
|
273
378
|
for (n = 0; n < _stackprof.raw_samples_len; n++) {
|
@@ -275,7 +380,7 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
275
380
|
rb_ary_push(raw_samples, SIZET2NUM(len));
|
276
381
|
|
277
382
|
for (o = 0, n++; o < len; n++, o++)
|
278
|
-
rb_ary_push(raw_samples,
|
383
|
+
rb_ary_push(raw_samples, PTR2NUM(_stackprof.raw_samples[n]));
|
279
384
|
rb_ary_push(raw_samples, SIZET2NUM((size_t)_stackprof.raw_samples[n]));
|
280
385
|
}
|
281
386
|
|
@@ -287,17 +392,20 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
287
392
|
|
288
393
|
rb_hash_aset(results, sym_raw, raw_samples);
|
289
394
|
|
290
|
-
|
395
|
+
raw_sample_timestamps = rb_ary_new_capa(_stackprof.raw_sample_times_len);
|
396
|
+
raw_timestamp_deltas = rb_ary_new_capa(_stackprof.raw_sample_times_len);
|
291
397
|
|
292
|
-
for (n = 0; n < _stackprof.
|
293
|
-
rb_ary_push(
|
398
|
+
for (n = 0; n < _stackprof.raw_sample_times_len; n++) {
|
399
|
+
rb_ary_push(raw_sample_timestamps, ULL2NUM(_stackprof.raw_sample_times[n].timestamp_usec));
|
400
|
+
rb_ary_push(raw_timestamp_deltas, LL2NUM(_stackprof.raw_sample_times[n].delta_usec));
|
294
401
|
}
|
295
402
|
|
296
|
-
free(_stackprof.
|
297
|
-
_stackprof.
|
298
|
-
_stackprof.
|
299
|
-
_stackprof.
|
403
|
+
free(_stackprof.raw_sample_times);
|
404
|
+
_stackprof.raw_sample_times = NULL;
|
405
|
+
_stackprof.raw_sample_times_len = 0;
|
406
|
+
_stackprof.raw_sample_times_capa = 0;
|
300
407
|
|
408
|
+
rb_hash_aset(results, sym_raw_sample_timestamps, raw_sample_timestamps);
|
301
409
|
rb_hash_aset(results, sym_raw_timestamp_deltas, raw_timestamp_deltas);
|
302
410
|
|
303
411
|
_stackprof.raw = 0;
|
@@ -308,11 +416,12 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
308
416
|
|
309
417
|
if (RTEST(_stackprof.out)) {
|
310
418
|
VALUE file;
|
311
|
-
if (
|
312
|
-
file = rb_file_open_str(_stackprof.out, "w");
|
313
|
-
} else {
|
419
|
+
if (rb_respond_to(_stackprof.out, rb_intern("to_io"))) {
|
314
420
|
file = rb_io_check_io(_stackprof.out);
|
421
|
+
} else {
|
422
|
+
file = rb_file_open_str(_stackprof.out, "w");
|
315
423
|
}
|
424
|
+
|
316
425
|
rb_marshal_dump(results, file);
|
317
426
|
rb_io_flush(file);
|
318
427
|
_stackprof.out = Qnil;
|
@@ -376,27 +485,39 @@ st_numtable_increment(st_table *table, st_data_t key, size_t increment)
|
|
376
485
|
}
|
377
486
|
|
378
487
|
void
|
379
|
-
stackprof_record_sample_for_stack(int num,
|
488
|
+
stackprof_record_sample_for_stack(int num, uint64_t sample_timestamp, int64_t timestamp_delta)
|
380
489
|
{
|
381
490
|
int i, n;
|
382
491
|
VALUE prev_frame = Qnil;
|
383
492
|
|
384
493
|
_stackprof.overall_samples++;
|
385
494
|
|
386
|
-
if (_stackprof.raw) {
|
495
|
+
if (_stackprof.raw && num > 0) {
|
387
496
|
int found = 0;
|
388
497
|
|
498
|
+
/* If there's no sample buffer allocated, then allocate one. The buffer
|
499
|
+
* format is the number of frames (num), then the list of frames (from
|
500
|
+
* `_stackprof.raw_samples`), followed by the number of times this
|
501
|
+
* particular stack has been seen in a row. Each "new" stack is added
|
502
|
+
* to the end of the buffer, but if the previous stack is the same as
|
503
|
+
* the current stack, the counter will be incremented. */
|
389
504
|
if (!_stackprof.raw_samples) {
|
390
505
|
_stackprof.raw_samples_capa = num * 100;
|
391
506
|
_stackprof.raw_samples = malloc(sizeof(VALUE) * _stackprof.raw_samples_capa);
|
392
507
|
}
|
393
508
|
|
509
|
+
/* If we can't fit all the samples in the buffer, double the buffer size. */
|
394
510
|
while (_stackprof.raw_samples_capa <= _stackprof.raw_samples_len + (num + 2)) {
|
395
511
|
_stackprof.raw_samples_capa *= 2;
|
396
512
|
_stackprof.raw_samples = realloc(_stackprof.raw_samples, sizeof(VALUE) * _stackprof.raw_samples_capa);
|
397
513
|
}
|
398
514
|
|
515
|
+
/* If we've seen this stack before in the last sample, then increment the "seen" count. */
|
399
516
|
if (_stackprof.raw_samples_len > 0 && _stackprof.raw_samples[_stackprof.raw_sample_index] == (VALUE)num) {
|
517
|
+
/* The number of samples could have been the same, but the stack
|
518
|
+
* might be different, so we need to check the stack here. Stacks
|
519
|
+
* in the raw buffer are stored in the opposite direction of stacks
|
520
|
+
* in the frames buffer that came from Ruby. */
|
400
521
|
for (i = num-1, n = 0; i >= 0; i--, n++) {
|
401
522
|
VALUE frame = _stackprof.frames_buffer[i];
|
402
523
|
if (_stackprof.raw_samples[_stackprof.raw_sample_index + 1 + n] != frame)
|
@@ -408,7 +529,11 @@ stackprof_record_sample_for_stack(int num, int timestamp_delta)
|
|
408
529
|
}
|
409
530
|
}
|
410
531
|
|
532
|
+
/* If we haven't seen the stack, then add it to the buffer along with
|
533
|
+
* the length of the stack and a 1 for the "seen" count */
|
411
534
|
if (!found) {
|
535
|
+
/* Bump the `raw_sample_index` up so that the next iteration can
|
536
|
+
* find the previously recorded stack size. */
|
412
537
|
_stackprof.raw_sample_index = _stackprof.raw_samples_len;
|
413
538
|
_stackprof.raw_samples[_stackprof.raw_samples_len++] = (VALUE)num;
|
414
539
|
for (i = num-1; i >= 0; i--) {
|
@@ -418,23 +543,24 @@ stackprof_record_sample_for_stack(int num, int timestamp_delta)
|
|
418
543
|
_stackprof.raw_samples[_stackprof.raw_samples_len++] = (VALUE)1;
|
419
544
|
}
|
420
545
|
|
421
|
-
|
422
|
-
|
423
|
-
_stackprof.
|
424
|
-
_stackprof.
|
546
|
+
/* If there's no timestamp delta buffer, allocate one */
|
547
|
+
if (!_stackprof.raw_sample_times) {
|
548
|
+
_stackprof.raw_sample_times_capa = 100;
|
549
|
+
_stackprof.raw_sample_times = malloc(sizeof(sample_time_t) * _stackprof.raw_sample_times_capa);
|
550
|
+
_stackprof.raw_sample_times_len = 0;
|
425
551
|
}
|
426
552
|
|
427
|
-
|
428
|
-
|
429
|
-
_stackprof.
|
553
|
+
/* Double the buffer size if it's too small */
|
554
|
+
while (_stackprof.raw_sample_times_capa <= _stackprof.raw_sample_times_len + 1) {
|
555
|
+
_stackprof.raw_sample_times_capa *= 2;
|
556
|
+
_stackprof.raw_sample_times = realloc(_stackprof.raw_sample_times, sizeof(sample_time_t) * _stackprof.raw_sample_times_capa);
|
430
557
|
}
|
431
558
|
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
sample_for(frame)->already_accounted_in_total = 0;
|
559
|
+
/* Store the time delta (which is the amount of microseconds between samples). */
|
560
|
+
_stackprof.raw_sample_times[_stackprof.raw_sample_times_len++] = (sample_time_t) {
|
561
|
+
.timestamp_usec = sample_timestamp,
|
562
|
+
.delta_usec = timestamp_delta,
|
563
|
+
};
|
438
564
|
}
|
439
565
|
|
440
566
|
for (i = 0; i < num; i++) {
|
@@ -442,9 +568,10 @@ stackprof_record_sample_for_stack(int num, int timestamp_delta)
|
|
442
568
|
VALUE frame = _stackprof.frames_buffer[i];
|
443
569
|
frame_data_t *frame_data = sample_for(frame);
|
444
570
|
|
445
|
-
if (
|
571
|
+
if (frame_data->seen_at_sample_number != _stackprof.overall_samples) {
|
446
572
|
frame_data->total_samples++;
|
447
|
-
|
573
|
+
}
|
574
|
+
frame_data->seen_at_sample_number = _stackprof.overall_samples;
|
448
575
|
|
449
576
|
if (i == 0) {
|
450
577
|
frame_data->caller_samples++;
|
@@ -455,10 +582,10 @@ stackprof_record_sample_for_stack(int num, int timestamp_delta)
|
|
455
582
|
}
|
456
583
|
|
457
584
|
if (_stackprof.aggregate && line > 0) {
|
458
|
-
if (!frame_data->lines)
|
459
|
-
frame_data->lines = st_init_numtable();
|
460
585
|
size_t half = (size_t)1<<(8*SIZEOF_SIZE_T/2);
|
461
586
|
size_t increment = i == 0 ? half + 1 : half;
|
587
|
+
if (!frame_data->lines)
|
588
|
+
frame_data->lines = st_init_numtable();
|
462
589
|
st_numtable_increment(frame_data->lines, (st_data_t)line, increment);
|
463
590
|
}
|
464
591
|
|
@@ -466,90 +593,183 @@ stackprof_record_sample_for_stack(int num, int timestamp_delta)
|
|
466
593
|
}
|
467
594
|
|
468
595
|
if (_stackprof.raw) {
|
469
|
-
|
596
|
+
capture_timestamp(&_stackprof.last_sample_at);
|
470
597
|
}
|
471
598
|
}
|
472
599
|
|
600
|
+
// buffer the current profile frames
|
601
|
+
// This must be async-signal-safe
|
602
|
+
// Returns immediately if another set of frames are already in the buffer
|
473
603
|
void
|
474
|
-
|
604
|
+
stackprof_buffer_sample(void)
|
475
605
|
{
|
476
|
-
|
606
|
+
uint64_t start_timestamp = 0;
|
607
|
+
int64_t timestamp_delta = 0;
|
608
|
+
int num;
|
609
|
+
|
610
|
+
if (_stackprof.buffer_count > 0) {
|
611
|
+
// Another sample is already pending
|
612
|
+
return;
|
613
|
+
}
|
614
|
+
|
477
615
|
if (_stackprof.raw) {
|
478
|
-
struct
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
timestamp_delta = (1000 * diff.tv_sec) + diff.tv_usec;
|
616
|
+
struct timestamp_t t;
|
617
|
+
capture_timestamp(&t);
|
618
|
+
start_timestamp = timestamp_usec(&t);
|
619
|
+
timestamp_delta = delta_usec(&_stackprof.last_sample_at, &t);
|
483
620
|
}
|
484
|
-
|
485
|
-
|
621
|
+
|
622
|
+
num = rb_profile_frames(0, sizeof(_stackprof.frames_buffer) / sizeof(VALUE), _stackprof.frames_buffer, _stackprof.lines_buffer);
|
623
|
+
|
624
|
+
_stackprof.buffer_count = num;
|
625
|
+
_stackprof.buffer_time.timestamp_usec = start_timestamp;
|
626
|
+
_stackprof.buffer_time.delta_usec = timestamp_delta;
|
486
627
|
}
|
487
628
|
|
488
629
|
void
|
489
|
-
stackprof_record_gc_samples()
|
630
|
+
stackprof_record_gc_samples(void)
|
490
631
|
{
|
491
|
-
|
632
|
+
int64_t delta_to_first_unrecorded_gc_sample = 0;
|
633
|
+
uint64_t start_timestamp = 0;
|
634
|
+
size_t i;
|
492
635
|
if (_stackprof.raw) {
|
493
|
-
struct
|
494
|
-
|
495
|
-
|
496
|
-
timersub(&t, &_stackprof.last_sample_at, &diff);
|
636
|
+
struct timestamp_t t;
|
637
|
+
capture_timestamp(&t);
|
638
|
+
start_timestamp = timestamp_usec(&t);
|
497
639
|
|
498
640
|
// We don't know when the GC samples were actually marked, so let's
|
499
641
|
// assume that they were marked at a perfectly regular interval.
|
500
|
-
delta_to_first_unrecorded_gc_sample = (
|
642
|
+
delta_to_first_unrecorded_gc_sample = delta_usec(&_stackprof.last_sample_at, &t) - (_stackprof.unrecorded_gc_samples - 1) * NUM2LONG(_stackprof.interval);
|
501
643
|
if (delta_to_first_unrecorded_gc_sample < 0) {
|
502
644
|
delta_to_first_unrecorded_gc_sample = 0;
|
503
645
|
}
|
504
646
|
}
|
505
647
|
|
506
|
-
int i;
|
507
|
-
|
508
|
-
_stackprof.frames_buffer[0] = _stackprof.fake_gc_frame;
|
509
|
-
_stackprof.lines_buffer[0] = 0;
|
510
|
-
|
511
648
|
for (i = 0; i < _stackprof.unrecorded_gc_samples; i++) {
|
512
|
-
|
513
|
-
|
649
|
+
int64_t timestamp_delta = i == 0 ? delta_to_first_unrecorded_gc_sample : NUM2LONG(_stackprof.interval);
|
650
|
+
|
651
|
+
if (_stackprof.unrecorded_gc_marking_samples) {
|
652
|
+
_stackprof.frames_buffer[0] = FAKE_FRAME_MARK;
|
653
|
+
_stackprof.lines_buffer[0] = 0;
|
654
|
+
_stackprof.frames_buffer[1] = FAKE_FRAME_GC;
|
655
|
+
_stackprof.lines_buffer[1] = 0;
|
656
|
+
_stackprof.unrecorded_gc_marking_samples--;
|
657
|
+
|
658
|
+
stackprof_record_sample_for_stack(2, start_timestamp, timestamp_delta);
|
659
|
+
} else if (_stackprof.unrecorded_gc_sweeping_samples) {
|
660
|
+
_stackprof.frames_buffer[0] = FAKE_FRAME_SWEEP;
|
661
|
+
_stackprof.lines_buffer[0] = 0;
|
662
|
+
_stackprof.frames_buffer[1] = FAKE_FRAME_GC;
|
663
|
+
_stackprof.lines_buffer[1] = 0;
|
664
|
+
|
665
|
+
_stackprof.unrecorded_gc_sweeping_samples--;
|
666
|
+
|
667
|
+
stackprof_record_sample_for_stack(2, start_timestamp, timestamp_delta);
|
668
|
+
} else {
|
669
|
+
_stackprof.frames_buffer[0] = FAKE_FRAME_GC;
|
670
|
+
_stackprof.lines_buffer[0] = 0;
|
671
|
+
stackprof_record_sample_for_stack(1, start_timestamp, timestamp_delta);
|
672
|
+
}
|
514
673
|
}
|
515
674
|
_stackprof.during_gc += _stackprof.unrecorded_gc_samples;
|
516
675
|
_stackprof.unrecorded_gc_samples = 0;
|
676
|
+
_stackprof.unrecorded_gc_marking_samples = 0;
|
677
|
+
_stackprof.unrecorded_gc_sweeping_samples = 0;
|
517
678
|
}
|
518
679
|
|
680
|
+
// record the sample previously buffered by stackprof_buffer_sample
|
519
681
|
static void
|
520
|
-
|
682
|
+
stackprof_record_buffer(void)
|
683
|
+
{
|
684
|
+
stackprof_record_sample_for_stack(_stackprof.buffer_count, _stackprof.buffer_time.timestamp_usec, _stackprof.buffer_time.delta_usec);
|
685
|
+
|
686
|
+
// reset the buffer
|
687
|
+
_stackprof.buffer_count = 0;
|
688
|
+
}
|
689
|
+
|
690
|
+
static void
|
691
|
+
stackprof_sample_and_record(void)
|
692
|
+
{
|
693
|
+
stackprof_buffer_sample();
|
694
|
+
stackprof_record_buffer();
|
695
|
+
}
|
696
|
+
|
697
|
+
static void
|
698
|
+
stackprof_job_record_gc(void *data)
|
521
699
|
{
|
522
|
-
static int in_signal_handler = 0;
|
523
|
-
if (in_signal_handler) return;
|
524
700
|
if (!_stackprof.running) return;
|
525
701
|
|
526
|
-
in_signal_handler++;
|
527
702
|
stackprof_record_gc_samples();
|
528
|
-
in_signal_handler--;
|
529
703
|
}
|
530
704
|
|
531
705
|
static void
|
532
|
-
|
706
|
+
stackprof_job_sample_and_record(void *data)
|
533
707
|
{
|
534
|
-
static int in_signal_handler = 0;
|
535
|
-
if (in_signal_handler) return;
|
536
708
|
if (!_stackprof.running) return;
|
537
709
|
|
538
|
-
|
539
|
-
|
540
|
-
|
710
|
+
stackprof_sample_and_record();
|
711
|
+
}
|
712
|
+
|
713
|
+
static void
|
714
|
+
stackprof_job_record_buffer(void *data)
|
715
|
+
{
|
716
|
+
if (!_stackprof.running) return;
|
717
|
+
|
718
|
+
stackprof_record_buffer();
|
541
719
|
}
|
542
720
|
|
543
721
|
static void
|
544
722
|
stackprof_signal_handler(int sig, siginfo_t *sinfo, void *ucontext)
|
545
723
|
{
|
724
|
+
static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
|
725
|
+
|
546
726
|
_stackprof.overall_signals++;
|
547
|
-
|
727
|
+
|
728
|
+
if (!_stackprof.running) return;
|
729
|
+
|
730
|
+
// There's a possibility that the signal handler is invoked *after* the Ruby
|
731
|
+
// VM has been shut down (e.g. after ruby_cleanup(0)). In this case, things
|
732
|
+
// that rely on global VM state (e.g. rb_during_gc) will segfault.
|
733
|
+
if (!ruby_vm_running) return;
|
734
|
+
|
735
|
+
if (_stackprof.mode == sym_wall) {
|
736
|
+
// In "wall" mode, the SIGALRM signal will arrive at an arbitrary thread.
|
737
|
+
// In order to provide more useful results, especially under threaded web
|
738
|
+
// servers, we want to forward this signal to the original thread
|
739
|
+
// StackProf was started from.
|
740
|
+
// According to POSIX.1-2008 TC1 pthread_kill and pthread_self should be
|
741
|
+
// async-signal-safe.
|
742
|
+
if (pthread_self() != _stackprof.target_thread) {
|
743
|
+
pthread_kill(_stackprof.target_thread, sig);
|
744
|
+
return;
|
745
|
+
}
|
746
|
+
} else {
|
747
|
+
if (!ruby_native_thread_p()) return;
|
748
|
+
}
|
749
|
+
|
750
|
+
if (pthread_mutex_trylock(&lock)) return;
|
751
|
+
|
752
|
+
if (!_stackprof.ignore_gc && rb_during_gc()) {
|
753
|
+
VALUE mode = rb_gc_latest_gc_info(sym_state);
|
754
|
+
if (mode == sym_marking) {
|
755
|
+
_stackprof.unrecorded_gc_marking_samples++;
|
756
|
+
} else if (mode == sym_sweeping) {
|
757
|
+
_stackprof.unrecorded_gc_sweeping_samples++;
|
758
|
+
}
|
548
759
|
_stackprof.unrecorded_gc_samples++;
|
549
|
-
rb_postponed_job_register_one(0,
|
760
|
+
rb_postponed_job_register_one(0, stackprof_job_record_gc, (void*)0);
|
550
761
|
} else {
|
551
|
-
|
762
|
+
if (stackprof_use_postponed_job) {
|
763
|
+
rb_postponed_job_register_one(0, stackprof_job_sample_and_record, (void*)0);
|
764
|
+
} else {
|
765
|
+
// Buffer a sample immediately, if an existing sample exists this will
|
766
|
+
// return immediately
|
767
|
+
stackprof_buffer_sample();
|
768
|
+
// Enqueue a job to record the sample
|
769
|
+
rb_postponed_job_register_one(0, stackprof_job_record_buffer, (void*)0);
|
770
|
+
}
|
552
771
|
}
|
772
|
+
pthread_mutex_unlock(&lock);
|
553
773
|
}
|
554
774
|
|
555
775
|
static void
|
@@ -558,7 +778,7 @@ stackprof_newobj_handler(VALUE tpval, void *data)
|
|
558
778
|
_stackprof.overall_signals++;
|
559
779
|
if (RTEST(_stackprof.interval) && _stackprof.overall_signals % NUM2LONG(_stackprof.interval))
|
560
780
|
return;
|
561
|
-
|
781
|
+
stackprof_sample_and_record();
|
562
782
|
}
|
563
783
|
|
564
784
|
static VALUE
|
@@ -568,7 +788,7 @@ stackprof_sample(VALUE self)
|
|
568
788
|
return Qfalse;
|
569
789
|
|
570
790
|
_stackprof.overall_signals++;
|
571
|
-
|
791
|
+
stackprof_sample_and_record();
|
572
792
|
return Qtrue;
|
573
793
|
}
|
574
794
|
|
@@ -583,11 +803,18 @@ frame_mark_i(st_data_t key, st_data_t val, st_data_t arg)
|
|
583
803
|
static void
|
584
804
|
stackprof_gc_mark(void *data)
|
585
805
|
{
|
806
|
+
if (RTEST(_stackprof.metadata))
|
807
|
+
rb_gc_mark(_stackprof.metadata);
|
808
|
+
|
586
809
|
if (RTEST(_stackprof.out))
|
587
810
|
rb_gc_mark(_stackprof.out);
|
588
811
|
|
589
812
|
if (_stackprof.frames)
|
590
813
|
st_foreach(_stackprof.frames, frame_mark_i, 0);
|
814
|
+
|
815
|
+
for (int i = 0; i < _stackprof.buffer_count; i++) {
|
816
|
+
rb_gc_mark(_stackprof.frames_buffer[i]);
|
817
|
+
}
|
591
818
|
}
|
592
819
|
|
593
820
|
static void
|
@@ -622,9 +849,32 @@ stackprof_atfork_child(void)
|
|
622
849
|
stackprof_stop(rb_mStackProf);
|
623
850
|
}
|
624
851
|
|
852
|
+
static VALUE
|
853
|
+
stackprof_use_postponed_job_l(VALUE self)
|
854
|
+
{
|
855
|
+
stackprof_use_postponed_job = 1;
|
856
|
+
return Qnil;
|
857
|
+
}
|
858
|
+
|
859
|
+
static void
|
860
|
+
stackprof_at_exit(ruby_vm_t* vm)
|
861
|
+
{
|
862
|
+
ruby_vm_running = 0;
|
863
|
+
}
|
864
|
+
|
625
865
|
void
|
626
866
|
Init_stackprof(void)
|
627
867
|
{
|
868
|
+
size_t i;
|
869
|
+
/*
|
870
|
+
* As of Ruby 3.0, it should be safe to read stack frames at any time, unless YJIT is enabled
|
871
|
+
* See https://github.com/ruby/ruby/commit/0e276dc458f94d9d79a0f7c7669bde84abe80f21
|
872
|
+
*/
|
873
|
+
stackprof_use_postponed_job = RUBY_API_VERSION_MAJOR < 3;
|
874
|
+
|
875
|
+
ruby_vm_running = 1;
|
876
|
+
ruby_vm_at_exit(stackprof_at_exit);
|
877
|
+
|
628
878
|
#define S(name) sym_##name = ID2SYM(rb_intern(#name));
|
629
879
|
S(object);
|
630
880
|
S(custom);
|
@@ -643,12 +893,21 @@ Init_stackprof(void)
|
|
643
893
|
S(mode);
|
644
894
|
S(interval);
|
645
895
|
S(raw);
|
896
|
+
S(raw_sample_timestamps);
|
646
897
|
S(raw_timestamp_deltas);
|
647
898
|
S(out);
|
899
|
+
S(metadata);
|
900
|
+
S(ignore_gc);
|
648
901
|
S(frames);
|
649
902
|
S(aggregate);
|
903
|
+
S(state);
|
904
|
+
S(marking);
|
905
|
+
S(sweeping);
|
650
906
|
#undef S
|
651
907
|
|
908
|
+
/* Need to run this to warm the symbol table before we call this during GC */
|
909
|
+
rb_gc_latest_gc_info(sym_state);
|
910
|
+
|
652
911
|
gc_hook = Data_Wrap_Struct(rb_cObject, stackprof_gc_mark, NULL, &_stackprof);
|
653
912
|
rb_global_variable(&gc_hook);
|
654
913
|
|
@@ -657,16 +916,18 @@ Init_stackprof(void)
|
|
657
916
|
_stackprof.raw_samples_capa = 0;
|
658
917
|
_stackprof.raw_sample_index = 0;
|
659
918
|
|
660
|
-
_stackprof.
|
661
|
-
_stackprof.
|
662
|
-
_stackprof.
|
919
|
+
_stackprof.raw_sample_times = NULL;
|
920
|
+
_stackprof.raw_sample_times_len = 0;
|
921
|
+
_stackprof.raw_sample_times_capa = 0;
|
663
922
|
|
664
|
-
_stackprof.fake_gc_frame = INT2FIX(0x9C);
|
665
923
|
_stackprof.empty_string = rb_str_new_cstr("");
|
666
|
-
_stackprof.fake_gc_frame_name = rb_str_new_cstr("(garbage collection)");
|
667
|
-
rb_global_variable(&_stackprof.fake_gc_frame_name);
|
668
924
|
rb_global_variable(&_stackprof.empty_string);
|
669
925
|
|
926
|
+
for (i = 0; i < TOTAL_FAKE_FRAMES; i++) {
|
927
|
+
_stackprof.fake_frame_names[i] = rb_str_new_cstr(fake_frame_cstrs[i]);
|
928
|
+
rb_global_variable(&_stackprof.fake_frame_names[i]);
|
929
|
+
}
|
930
|
+
|
670
931
|
rb_mStackProf = rb_define_module("StackProf");
|
671
932
|
rb_define_singleton_method(rb_mStackProf, "running?", stackprof_running_p, 0);
|
672
933
|
rb_define_singleton_method(rb_mStackProf, "run", stackprof_run, -1);
|
@@ -674,6 +935,7 @@ Init_stackprof(void)
|
|
674
935
|
rb_define_singleton_method(rb_mStackProf, "stop", stackprof_stop, 0);
|
675
936
|
rb_define_singleton_method(rb_mStackProf, "results", stackprof_results, -1);
|
676
937
|
rb_define_singleton_method(rb_mStackProf, "sample", stackprof_sample, 0);
|
938
|
+
rb_define_singleton_method(rb_mStackProf, "use_postponed_job!", stackprof_use_postponed_job_l, 0);
|
677
939
|
|
678
940
|
pthread_atfork(stackprof_atfork_prepare, stackprof_atfork_parent, stackprof_atfork_child);
|
679
941
|
}
|