stackprof 0.2.12 → 0.2.26
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.github/workflows/ci.yml +43 -0
- data/.gitignore +1 -1
- data/CHANGELOG.md +17 -2
- data/README.md +66 -51
- data/Rakefile +21 -25
- data/bin/stackprof +115 -71
- data/ext/stackprof/extconf.rb +6 -0
- data/ext/stackprof/stackprof.c +392 -84
- data/lib/stackprof/autorun.rb +19 -0
- data/lib/stackprof/middleware.rb +8 -2
- data/lib/stackprof/report.rb +280 -16
- data/lib/stackprof/truffleruby.rb +37 -0
- data/lib/stackprof.rb +22 -1
- data/stackprof.gemspec +11 -3
- data/test/fixtures/profile.dump +1 -0
- data/test/fixtures/profile.json +1 -0
- data/test/test_middleware.rb +36 -17
- data/test/test_report.rb +25 -1
- data/test/test_stackprof.rb +153 -15
- data/test/test_truffleruby.rb +18 -0
- data/vendor/FlameGraph/flamegraph.pl +751 -85
- metadata +16 -23
- data/.travis.yml +0 -8
- data/Gemfile.lock +0 -27
data/ext/stackprof/stackprof.c
CHANGED
@@ -7,15 +7,76 @@
|
|
7
7
|
**********************************************************************/
|
8
8
|
|
9
9
|
#include <ruby/ruby.h>
|
10
|
+
#include <ruby/version.h>
|
10
11
|
#include <ruby/debug.h>
|
11
12
|
#include <ruby/st.h>
|
12
13
|
#include <ruby/io.h>
|
13
14
|
#include <ruby/intern.h>
|
15
|
+
#include <ruby/vm.h>
|
14
16
|
#include <signal.h>
|
15
17
|
#include <sys/time.h>
|
18
|
+
#include <time.h>
|
16
19
|
#include <pthread.h>
|
17
20
|
|
18
21
|
#define BUF_SIZE 2048
|
22
|
+
#define MICROSECONDS_IN_SECOND 1000000
|
23
|
+
#define NANOSECONDS_IN_SECOND 1000000000
|
24
|
+
|
25
|
+
#define FAKE_FRAME_GC INT2FIX(0)
|
26
|
+
#define FAKE_FRAME_MARK INT2FIX(1)
|
27
|
+
#define FAKE_FRAME_SWEEP INT2FIX(2)
|
28
|
+
|
29
|
+
static const char *fake_frame_cstrs[] = {
|
30
|
+
"(garbage collection)",
|
31
|
+
"(marking)",
|
32
|
+
"(sweeping)",
|
33
|
+
};
|
34
|
+
|
35
|
+
static int stackprof_use_postponed_job = 1;
|
36
|
+
static int ruby_vm_running = 0;
|
37
|
+
|
38
|
+
#define TOTAL_FAKE_FRAMES (sizeof(fake_frame_cstrs) / sizeof(char *))
|
39
|
+
|
40
|
+
#ifdef _POSIX_MONOTONIC_CLOCK
|
41
|
+
#define timestamp_t timespec
|
42
|
+
typedef struct timestamp_t timestamp_t;
|
43
|
+
|
44
|
+
static void capture_timestamp(timestamp_t *ts) {
|
45
|
+
clock_gettime(CLOCK_MONOTONIC, ts);
|
46
|
+
}
|
47
|
+
|
48
|
+
static int64_t delta_usec(timestamp_t *start, timestamp_t *end) {
|
49
|
+
int64_t result = MICROSECONDS_IN_SECOND * (end->tv_sec - start->tv_sec);
|
50
|
+
if (end->tv_nsec < start->tv_nsec) {
|
51
|
+
result -= MICROSECONDS_IN_SECOND;
|
52
|
+
result += (NANOSECONDS_IN_SECOND + end->tv_nsec - start->tv_nsec) / 1000;
|
53
|
+
} else {
|
54
|
+
result += (end->tv_nsec - start->tv_nsec) / 1000;
|
55
|
+
}
|
56
|
+
return result;
|
57
|
+
}
|
58
|
+
|
59
|
+
static uint64_t timestamp_usec(timestamp_t *ts) {
|
60
|
+
return (MICROSECONDS_IN_SECOND * ts->tv_sec) + (ts->tv_nsec / 1000);
|
61
|
+
}
|
62
|
+
#else
|
63
|
+
#define timestamp_t timeval
|
64
|
+
typedef struct timestamp_t timestamp_t;
|
65
|
+
|
66
|
+
static void capture_timestamp(timestamp_t *ts) {
|
67
|
+
gettimeofday(ts, NULL);
|
68
|
+
}
|
69
|
+
|
70
|
+
static int64_t delta_usec(timestamp_t *start, timestamp_t *end) {
|
71
|
+
struct timeval diff;
|
72
|
+
timersub(end, start, &diff);
|
73
|
+
return (MICROSECONDS_IN_SECOND * diff.tv_sec) + diff.tv_usec;
|
74
|
+
}
|
75
|
+
|
76
|
+
static uint64_t timestamp_usec(timestamp_t *ts) {
|
77
|
+
return (MICROSECONDS_IN_SECOND * ts.tv_sec) + diff.tv_usec
|
78
|
+
}
|
79
|
+
#endif
|
19
80
|
|
20
81
|
typedef struct {
|
21
82
|
size_t total_samples;
|
@@ -25,6 +86,11 @@ typedef struct {
|
|
25
86
|
st_table *lines;
|
26
87
|
} frame_data_t;
|
27
88
|
|
89
|
+
typedef struct {
|
90
|
+
uint64_t timestamp_usec;
|
91
|
+
int64_t delta_usec;
|
92
|
+
} sample_time_t;
|
93
|
+
|
28
94
|
static struct {
|
29
95
|
int running;
|
30
96
|
int raw;
|
@@ -33,33 +99,44 @@ static struct {
|
|
33
99
|
VALUE mode;
|
34
100
|
VALUE interval;
|
35
101
|
VALUE out;
|
102
|
+
VALUE metadata;
|
103
|
+
int ignore_gc;
|
36
104
|
|
37
|
-
|
105
|
+
uint64_t *raw_samples;
|
38
106
|
size_t raw_samples_len;
|
39
107
|
size_t raw_samples_capa;
|
40
108
|
size_t raw_sample_index;
|
41
109
|
|
42
|
-
struct
|
43
|
-
|
44
|
-
size_t
|
45
|
-
size_t
|
110
|
+
struct timestamp_t last_sample_at;
|
111
|
+
sample_time_t *raw_sample_times;
|
112
|
+
size_t raw_sample_times_len;
|
113
|
+
size_t raw_sample_times_capa;
|
46
114
|
|
47
115
|
size_t overall_signals;
|
48
116
|
size_t overall_samples;
|
49
117
|
size_t during_gc;
|
50
118
|
size_t unrecorded_gc_samples;
|
119
|
+
size_t unrecorded_gc_marking_samples;
|
120
|
+
size_t unrecorded_gc_sweeping_samples;
|
51
121
|
st_table *frames;
|
52
122
|
|
53
|
-
|
54
|
-
|
123
|
+
timestamp_t gc_start_timestamp;
|
124
|
+
|
125
|
+
VALUE fake_frame_names[TOTAL_FAKE_FRAMES];
|
55
126
|
VALUE empty_string;
|
127
|
+
|
128
|
+
int buffer_count;
|
129
|
+
sample_time_t buffer_time;
|
56
130
|
VALUE frames_buffer[BUF_SIZE];
|
57
131
|
int lines_buffer[BUF_SIZE];
|
132
|
+
|
133
|
+
pthread_t target_thread;
|
58
134
|
} _stackprof;
|
59
135
|
|
60
136
|
static VALUE sym_object, sym_wall, sym_cpu, sym_custom, sym_name, sym_file, sym_line;
|
61
137
|
static VALUE sym_samples, sym_total_samples, sym_missed_samples, sym_edges, sym_lines;
|
62
|
-
static VALUE sym_version, sym_mode, sym_interval, sym_raw,
|
138
|
+
static VALUE sym_version, sym_mode, sym_interval, sym_raw, sym_raw_lines, sym_metadata, sym_frames, sym_ignore_gc, sym_out;
|
139
|
+
static VALUE sym_aggregate, sym_raw_sample_timestamps, sym_raw_timestamp_deltas, sym_state, sym_marking, sym_sweeping;
|
63
140
|
static VALUE sym_gc_samples, objtracer;
|
64
141
|
static VALUE gc_hook;
|
65
142
|
static VALUE rb_mStackProf;
|
@@ -72,8 +149,10 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
72
149
|
{
|
73
150
|
struct sigaction sa;
|
74
151
|
struct itimerval timer;
|
75
|
-
VALUE opts = Qnil, mode = Qnil, interval = Qnil, out = Qfalse;
|
152
|
+
VALUE opts = Qnil, mode = Qnil, interval = Qnil, metadata = rb_hash_new(), out = Qfalse;
|
153
|
+
int ignore_gc = 0;
|
76
154
|
int raw = 0, aggregate = 1;
|
155
|
+
VALUE metadata_val;
|
77
156
|
|
78
157
|
if (_stackprof.running)
|
79
158
|
return Qfalse;
|
@@ -84,6 +163,17 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
84
163
|
mode = rb_hash_aref(opts, sym_mode);
|
85
164
|
interval = rb_hash_aref(opts, sym_interval);
|
86
165
|
out = rb_hash_aref(opts, sym_out);
|
166
|
+
if (RTEST(rb_hash_aref(opts, sym_ignore_gc))) {
|
167
|
+
ignore_gc = 1;
|
168
|
+
}
|
169
|
+
|
170
|
+
metadata_val = rb_hash_aref(opts, sym_metadata);
|
171
|
+
if (RTEST(metadata_val)) {
|
172
|
+
if (!RB_TYPE_P(metadata_val, T_HASH))
|
173
|
+
rb_raise(rb_eArgError, "metadata should be a hash");
|
174
|
+
|
175
|
+
metadata = metadata_val;
|
176
|
+
}
|
87
177
|
|
88
178
|
if (RTEST(rb_hash_aref(opts, sym_raw)))
|
89
179
|
raw = 1;
|
@@ -92,6 +182,10 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
92
182
|
}
|
93
183
|
if (!RTEST(mode)) mode = sym_wall;
|
94
184
|
|
185
|
+
if (!NIL_P(interval) && (NUM2INT(interval) < 1 || NUM2INT(interval) >= MICROSECONDS_IN_SECOND)) {
|
186
|
+
rb_raise(rb_eArgError, "interval is a number of microseconds between 1 and 1 million");
|
187
|
+
}
|
188
|
+
|
95
189
|
if (!_stackprof.frames) {
|
96
190
|
_stackprof.frames = st_init_numtable();
|
97
191
|
_stackprof.overall_signals = 0;
|
@@ -128,10 +222,13 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
128
222
|
_stackprof.aggregate = aggregate;
|
129
223
|
_stackprof.mode = mode;
|
130
224
|
_stackprof.interval = interval;
|
225
|
+
_stackprof.ignore_gc = ignore_gc;
|
226
|
+
_stackprof.metadata = metadata;
|
131
227
|
_stackprof.out = out;
|
228
|
+
_stackprof.target_thread = pthread_self();
|
132
229
|
|
133
230
|
if (raw) {
|
134
|
-
|
231
|
+
capture_timestamp(&_stackprof.last_sample_at);
|
135
232
|
}
|
136
233
|
|
137
234
|
return Qtrue;
|
@@ -166,13 +263,19 @@ stackprof_stop(VALUE self)
|
|
166
263
|
return Qtrue;
|
167
264
|
}
|
168
265
|
|
266
|
+
#if SIZEOF_VOIDP == SIZEOF_LONG
|
267
|
+
# define PTR2NUM(x) (LONG2NUM((long)(x)))
|
268
|
+
#else
|
269
|
+
# define PTR2NUM(x) (LL2NUM((LONG_LONG)(x)))
|
270
|
+
#endif
|
271
|
+
|
169
272
|
static int
|
170
273
|
frame_edges_i(st_data_t key, st_data_t val, st_data_t arg)
|
171
274
|
{
|
172
275
|
VALUE edges = (VALUE)arg;
|
173
276
|
|
174
277
|
intptr_t weight = (intptr_t)val;
|
175
|
-
rb_hash_aset(edges,
|
278
|
+
rb_hash_aset(edges, PTR2NUM(key), INT2FIX(weight));
|
176
279
|
return ST_CONTINUE;
|
177
280
|
}
|
178
281
|
|
@@ -199,10 +302,10 @@ frame_i(st_data_t key, st_data_t val, st_data_t arg)
|
|
199
302
|
VALUE name, file, edges, lines;
|
200
303
|
VALUE line;
|
201
304
|
|
202
|
-
rb_hash_aset(results,
|
305
|
+
rb_hash_aset(results, PTR2NUM(frame), details);
|
203
306
|
|
204
|
-
if (frame
|
205
|
-
name = _stackprof.
|
307
|
+
if (FIXNUM_P(frame)) {
|
308
|
+
name = _stackprof.fake_frame_names[FIX2INT(frame)];
|
206
309
|
file = _stackprof.empty_string;
|
207
310
|
line = INT2FIX(0);
|
208
311
|
} else {
|
@@ -258,6 +361,9 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
258
361
|
rb_hash_aset(results, sym_samples, SIZET2NUM(_stackprof.overall_samples));
|
259
362
|
rb_hash_aset(results, sym_gc_samples, SIZET2NUM(_stackprof.during_gc));
|
260
363
|
rb_hash_aset(results, sym_missed_samples, SIZET2NUM(_stackprof.overall_signals - _stackprof.overall_samples));
|
364
|
+
rb_hash_aset(results, sym_metadata, _stackprof.metadata);
|
365
|
+
|
366
|
+
_stackprof.metadata = Qnil;
|
261
367
|
|
262
368
|
frames = rb_hash_new();
|
263
369
|
rb_hash_aset(results, sym_frames, frames);
|
@@ -268,16 +374,25 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
268
374
|
|
269
375
|
if (_stackprof.raw && _stackprof.raw_samples_len) {
|
270
376
|
size_t len, n, o;
|
271
|
-
VALUE raw_timestamp_deltas;
|
377
|
+
VALUE raw_sample_timestamps, raw_timestamp_deltas;
|
272
378
|
VALUE raw_samples = rb_ary_new_capa(_stackprof.raw_samples_len);
|
379
|
+
VALUE raw_lines = rb_ary_new_capa(_stackprof.raw_samples_len);
|
273
380
|
|
274
381
|
for (n = 0; n < _stackprof.raw_samples_len; n++) {
|
275
382
|
len = (size_t)_stackprof.raw_samples[n];
|
276
383
|
rb_ary_push(raw_samples, SIZET2NUM(len));
|
384
|
+
rb_ary_push(raw_lines, SIZET2NUM(len));
|
385
|
+
|
386
|
+
for (o = 0, n++; o < len; n++, o++) {
|
387
|
+
// Line is in the upper 16 bits
|
388
|
+
rb_ary_push(raw_lines, INT2NUM(_stackprof.raw_samples[n] >> 48));
|
389
|
+
|
390
|
+
VALUE frame = _stackprof.raw_samples[n] & ~((uint64_t)0xFFFF << 48);
|
391
|
+
rb_ary_push(raw_samples, PTR2NUM(frame));
|
392
|
+
}
|
277
393
|
|
278
|
-
for (o = 0, n++; o < len; n++, o++)
|
279
|
-
rb_ary_push(raw_samples, rb_obj_id(_stackprof.raw_samples[n]));
|
280
394
|
rb_ary_push(raw_samples, SIZET2NUM((size_t)_stackprof.raw_samples[n]));
|
395
|
+
rb_ary_push(raw_lines, SIZET2NUM((size_t)_stackprof.raw_samples[n]));
|
281
396
|
}
|
282
397
|
|
283
398
|
free(_stackprof.raw_samples);
|
@@ -287,18 +402,22 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
287
402
|
_stackprof.raw_sample_index = 0;
|
288
403
|
|
289
404
|
rb_hash_aset(results, sym_raw, raw_samples);
|
405
|
+
rb_hash_aset(results, sym_raw_lines, raw_lines);
|
290
406
|
|
291
|
-
|
407
|
+
raw_sample_timestamps = rb_ary_new_capa(_stackprof.raw_sample_times_len);
|
408
|
+
raw_timestamp_deltas = rb_ary_new_capa(_stackprof.raw_sample_times_len);
|
292
409
|
|
293
|
-
for (n = 0; n < _stackprof.
|
294
|
-
rb_ary_push(
|
410
|
+
for (n = 0; n < _stackprof.raw_sample_times_len; n++) {
|
411
|
+
rb_ary_push(raw_sample_timestamps, ULL2NUM(_stackprof.raw_sample_times[n].timestamp_usec));
|
412
|
+
rb_ary_push(raw_timestamp_deltas, LL2NUM(_stackprof.raw_sample_times[n].delta_usec));
|
295
413
|
}
|
296
414
|
|
297
|
-
free(_stackprof.
|
298
|
-
_stackprof.
|
299
|
-
_stackprof.
|
300
|
-
_stackprof.
|
415
|
+
free(_stackprof.raw_sample_times);
|
416
|
+
_stackprof.raw_sample_times = NULL;
|
417
|
+
_stackprof.raw_sample_times_len = 0;
|
418
|
+
_stackprof.raw_sample_times_capa = 0;
|
301
419
|
|
420
|
+
rb_hash_aset(results, sym_raw_sample_timestamps, raw_sample_timestamps);
|
302
421
|
rb_hash_aset(results, sym_raw_timestamp_deltas, raw_timestamp_deltas);
|
303
422
|
|
304
423
|
_stackprof.raw = 0;
|
@@ -309,11 +428,12 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
309
428
|
|
310
429
|
if (RTEST(_stackprof.out)) {
|
311
430
|
VALUE file;
|
312
|
-
if (
|
313
|
-
file = rb_file_open_str(_stackprof.out, "w");
|
314
|
-
} else {
|
431
|
+
if (rb_respond_to(_stackprof.out, rb_intern("to_io"))) {
|
315
432
|
file = rb_io_check_io(_stackprof.out);
|
433
|
+
} else {
|
434
|
+
file = rb_file_open_str(_stackprof.out, "w");
|
316
435
|
}
|
436
|
+
|
317
437
|
rb_marshal_dump(results, file);
|
318
438
|
rb_io_flush(file);
|
319
439
|
_stackprof.out = Qnil;
|
@@ -377,30 +497,47 @@ st_numtable_increment(st_table *table, st_data_t key, size_t increment)
|
|
377
497
|
}
|
378
498
|
|
379
499
|
void
|
380
|
-
stackprof_record_sample_for_stack(int num,
|
500
|
+
stackprof_record_sample_for_stack(int num, uint64_t sample_timestamp, int64_t timestamp_delta)
|
381
501
|
{
|
382
502
|
int i, n;
|
383
503
|
VALUE prev_frame = Qnil;
|
384
504
|
|
385
505
|
_stackprof.overall_samples++;
|
386
506
|
|
387
|
-
if (_stackprof.raw) {
|
507
|
+
if (_stackprof.raw && num > 0) {
|
388
508
|
int found = 0;
|
389
509
|
|
510
|
+
/* If there's no sample buffer allocated, then allocate one. The buffer
|
511
|
+
* format is the number of frames (num), then the list of frames (from
|
512
|
+
* `_stackprof.raw_samples`), followed by the number of times this
|
513
|
+
* particular stack has been seen in a row. Each "new" stack is added
|
514
|
+
* to the end of the buffer, but if the previous stack is the same as
|
515
|
+
* the current stack, the counter will be incremented. */
|
390
516
|
if (!_stackprof.raw_samples) {
|
391
517
|
_stackprof.raw_samples_capa = num * 100;
|
392
518
|
_stackprof.raw_samples = malloc(sizeof(VALUE) * _stackprof.raw_samples_capa);
|
393
519
|
}
|
394
520
|
|
521
|
+
/* If we can't fit all the samples in the buffer, double the buffer size. */
|
395
522
|
while (_stackprof.raw_samples_capa <= _stackprof.raw_samples_len + (num + 2)) {
|
396
523
|
_stackprof.raw_samples_capa *= 2;
|
397
524
|
_stackprof.raw_samples = realloc(_stackprof.raw_samples, sizeof(VALUE) * _stackprof.raw_samples_capa);
|
398
525
|
}
|
399
526
|
|
527
|
+
/* If we've seen this stack before in the last sample, then increment the "seen" count. */
|
400
528
|
if (_stackprof.raw_samples_len > 0 && _stackprof.raw_samples[_stackprof.raw_sample_index] == (VALUE)num) {
|
529
|
+
/* The number of samples could have been the same, but the stack
|
530
|
+
* might be different, so we need to check the stack here. Stacks
|
531
|
+
* in the raw buffer are stored in the opposite direction of stacks
|
532
|
+
* in the frames buffer that came from Ruby. */
|
401
533
|
for (i = num-1, n = 0; i >= 0; i--, n++) {
|
402
534
|
VALUE frame = _stackprof.frames_buffer[i];
|
403
|
-
|
535
|
+
int line = _stackprof.lines_buffer[i];
|
536
|
+
|
537
|
+
// Encode the line in to the upper 16 bits.
|
538
|
+
uint64_t key = ((uint64_t)line << 48) | (uint64_t)frame;
|
539
|
+
|
540
|
+
if (_stackprof.raw_samples[_stackprof.raw_sample_index + 1 + n] != key)
|
404
541
|
break;
|
405
542
|
}
|
406
543
|
if (i == -1) {
|
@@ -409,28 +546,43 @@ stackprof_record_sample_for_stack(int num, int timestamp_delta)
|
|
409
546
|
}
|
410
547
|
}
|
411
548
|
|
549
|
+
/* If we haven't seen the stack, then add it to the buffer along with
|
550
|
+
* the length of the stack and a 1 for the "seen" count */
|
412
551
|
if (!found) {
|
552
|
+
/* Bump the `raw_sample_index` up so that the next iteration can
|
553
|
+
* find the previously recorded stack size. */
|
413
554
|
_stackprof.raw_sample_index = _stackprof.raw_samples_len;
|
414
555
|
_stackprof.raw_samples[_stackprof.raw_samples_len++] = (VALUE)num;
|
415
556
|
for (i = num-1; i >= 0; i--) {
|
416
557
|
VALUE frame = _stackprof.frames_buffer[i];
|
417
|
-
_stackprof.
|
558
|
+
int line = _stackprof.lines_buffer[i];
|
559
|
+
|
560
|
+
// Encode the line in to the upper 16 bits.
|
561
|
+
uint64_t key = ((uint64_t)line << 48) | (uint64_t)frame;
|
562
|
+
|
563
|
+
_stackprof.raw_samples[_stackprof.raw_samples_len++] = key;
|
418
564
|
}
|
419
565
|
_stackprof.raw_samples[_stackprof.raw_samples_len++] = (VALUE)1;
|
420
566
|
}
|
421
567
|
|
422
|
-
|
423
|
-
|
424
|
-
_stackprof.
|
425
|
-
_stackprof.
|
568
|
+
/* If there's no timestamp delta buffer, allocate one */
|
569
|
+
if (!_stackprof.raw_sample_times) {
|
570
|
+
_stackprof.raw_sample_times_capa = 100;
|
571
|
+
_stackprof.raw_sample_times = malloc(sizeof(sample_time_t) * _stackprof.raw_sample_times_capa);
|
572
|
+
_stackprof.raw_sample_times_len = 0;
|
426
573
|
}
|
427
574
|
|
428
|
-
|
429
|
-
|
430
|
-
_stackprof.
|
575
|
+
/* Double the buffer size if it's too small */
|
576
|
+
while (_stackprof.raw_sample_times_capa <= _stackprof.raw_sample_times_len + 1) {
|
577
|
+
_stackprof.raw_sample_times_capa *= 2;
|
578
|
+
_stackprof.raw_sample_times = realloc(_stackprof.raw_sample_times, sizeof(sample_time_t) * _stackprof.raw_sample_times_capa);
|
431
579
|
}
|
432
580
|
|
433
|
-
|
581
|
+
/* Store the time delta (which is the amount of microseconds between samples). */
|
582
|
+
_stackprof.raw_sample_times[_stackprof.raw_sample_times_len++] = (sample_time_t) {
|
583
|
+
.timestamp_usec = sample_timestamp,
|
584
|
+
.delta_usec = timestamp_delta,
|
585
|
+
};
|
434
586
|
}
|
435
587
|
|
436
588
|
for (i = 0; i < num; i++) {
|
@@ -463,90 +615,187 @@ stackprof_record_sample_for_stack(int num, int timestamp_delta)
|
|
463
615
|
}
|
464
616
|
|
465
617
|
if (_stackprof.raw) {
|
466
|
-
|
618
|
+
capture_timestamp(&_stackprof.last_sample_at);
|
467
619
|
}
|
468
620
|
}
|
469
621
|
|
622
|
+
// buffer the current profile frames
|
623
|
+
// This must be async-signal-safe
|
624
|
+
// Returns immediately if another set of frames are already in the buffer
|
470
625
|
void
|
471
|
-
|
626
|
+
stackprof_buffer_sample(void)
|
472
627
|
{
|
473
|
-
|
628
|
+
uint64_t start_timestamp = 0;
|
629
|
+
int64_t timestamp_delta = 0;
|
474
630
|
int num;
|
631
|
+
|
632
|
+
if (_stackprof.buffer_count > 0) {
|
633
|
+
// Another sample is already pending
|
634
|
+
return;
|
635
|
+
}
|
636
|
+
|
475
637
|
if (_stackprof.raw) {
|
476
|
-
struct
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
timestamp_delta = (1000 * diff.tv_sec) + diff.tv_usec;
|
638
|
+
struct timestamp_t t;
|
639
|
+
capture_timestamp(&t);
|
640
|
+
start_timestamp = timestamp_usec(&t);
|
641
|
+
timestamp_delta = delta_usec(&_stackprof.last_sample_at, &t);
|
481
642
|
}
|
643
|
+
|
482
644
|
num = rb_profile_frames(0, sizeof(_stackprof.frames_buffer) / sizeof(VALUE), _stackprof.frames_buffer, _stackprof.lines_buffer);
|
483
|
-
|
645
|
+
|
646
|
+
_stackprof.buffer_count = num;
|
647
|
+
_stackprof.buffer_time.timestamp_usec = start_timestamp;
|
648
|
+
_stackprof.buffer_time.delta_usec = timestamp_delta;
|
484
649
|
}
|
485
650
|
|
651
|
+
// Postponed job
|
486
652
|
void
|
487
|
-
stackprof_record_gc_samples()
|
653
|
+
stackprof_record_gc_samples(void)
|
488
654
|
{
|
489
|
-
|
490
|
-
|
655
|
+
int64_t delta_to_first_unrecorded_gc_sample = 0;
|
656
|
+
uint64_t start_timestamp = 0;
|
657
|
+
size_t i;
|
491
658
|
if (_stackprof.raw) {
|
492
|
-
struct
|
493
|
-
|
494
|
-
gettimeofday(&t, NULL);
|
495
|
-
timersub(&t, &_stackprof.last_sample_at, &diff);
|
659
|
+
struct timestamp_t t = _stackprof.gc_start_timestamp;
|
660
|
+
start_timestamp = timestamp_usec(&t);
|
496
661
|
|
497
662
|
// We don't know when the GC samples were actually marked, so let's
|
498
663
|
// assume that they were marked at a perfectly regular interval.
|
499
|
-
delta_to_first_unrecorded_gc_sample = (
|
664
|
+
delta_to_first_unrecorded_gc_sample = delta_usec(&_stackprof.last_sample_at, &t) - (_stackprof.unrecorded_gc_samples - 1) * NUM2LONG(_stackprof.interval);
|
500
665
|
if (delta_to_first_unrecorded_gc_sample < 0) {
|
501
666
|
delta_to_first_unrecorded_gc_sample = 0;
|
502
667
|
}
|
503
668
|
}
|
504
669
|
|
505
|
-
_stackprof.frames_buffer[0] = _stackprof.fake_gc_frame;
|
506
|
-
_stackprof.lines_buffer[0] = 0;
|
507
|
-
|
508
670
|
for (i = 0; i < _stackprof.unrecorded_gc_samples; i++) {
|
509
|
-
|
510
|
-
|
671
|
+
int64_t timestamp_delta = i == 0 ? delta_to_first_unrecorded_gc_sample : NUM2LONG(_stackprof.interval);
|
672
|
+
|
673
|
+
if (_stackprof.unrecorded_gc_marking_samples) {
|
674
|
+
_stackprof.frames_buffer[0] = FAKE_FRAME_MARK;
|
675
|
+
_stackprof.lines_buffer[0] = 0;
|
676
|
+
_stackprof.frames_buffer[1] = FAKE_FRAME_GC;
|
677
|
+
_stackprof.lines_buffer[1] = 0;
|
678
|
+
_stackprof.unrecorded_gc_marking_samples--;
|
679
|
+
|
680
|
+
stackprof_record_sample_for_stack(2, start_timestamp, timestamp_delta);
|
681
|
+
} else if (_stackprof.unrecorded_gc_sweeping_samples) {
|
682
|
+
_stackprof.frames_buffer[0] = FAKE_FRAME_SWEEP;
|
683
|
+
_stackprof.lines_buffer[0] = 0;
|
684
|
+
_stackprof.frames_buffer[1] = FAKE_FRAME_GC;
|
685
|
+
_stackprof.lines_buffer[1] = 0;
|
686
|
+
|
687
|
+
_stackprof.unrecorded_gc_sweeping_samples--;
|
688
|
+
|
689
|
+
stackprof_record_sample_for_stack(2, start_timestamp, timestamp_delta);
|
690
|
+
} else {
|
691
|
+
_stackprof.frames_buffer[0] = FAKE_FRAME_GC;
|
692
|
+
_stackprof.lines_buffer[0] = 0;
|
693
|
+
stackprof_record_sample_for_stack(1, start_timestamp, timestamp_delta);
|
694
|
+
}
|
511
695
|
}
|
512
696
|
_stackprof.during_gc += _stackprof.unrecorded_gc_samples;
|
513
697
|
_stackprof.unrecorded_gc_samples = 0;
|
698
|
+
_stackprof.unrecorded_gc_marking_samples = 0;
|
699
|
+
_stackprof.unrecorded_gc_sweeping_samples = 0;
|
700
|
+
}
|
701
|
+
|
702
|
+
// record the sample previously buffered by stackprof_buffer_sample
|
703
|
+
static void
|
704
|
+
stackprof_record_buffer(void)
|
705
|
+
{
|
706
|
+
stackprof_record_sample_for_stack(_stackprof.buffer_count, _stackprof.buffer_time.timestamp_usec, _stackprof.buffer_time.delta_usec);
|
707
|
+
|
708
|
+
// reset the buffer
|
709
|
+
_stackprof.buffer_count = 0;
|
514
710
|
}
|
515
711
|
|
516
712
|
static void
|
517
|
-
|
713
|
+
stackprof_sample_and_record(void)
|
714
|
+
{
|
715
|
+
stackprof_buffer_sample();
|
716
|
+
stackprof_record_buffer();
|
717
|
+
}
|
718
|
+
|
719
|
+
static void
|
720
|
+
stackprof_job_record_gc(void *data)
|
518
721
|
{
|
519
|
-
static int in_signal_handler = 0;
|
520
|
-
if (in_signal_handler) return;
|
521
722
|
if (!_stackprof.running) return;
|
522
723
|
|
523
|
-
in_signal_handler++;
|
524
724
|
stackprof_record_gc_samples();
|
525
|
-
in_signal_handler--;
|
526
725
|
}
|
527
726
|
|
528
727
|
static void
|
529
|
-
|
728
|
+
stackprof_job_sample_and_record(void *data)
|
729
|
+
{
|
730
|
+
if (!_stackprof.running) return;
|
731
|
+
|
732
|
+
stackprof_sample_and_record();
|
733
|
+
}
|
734
|
+
|
735
|
+
static void
|
736
|
+
stackprof_job_record_buffer(void *data)
|
530
737
|
{
|
531
|
-
static int in_signal_handler = 0;
|
532
|
-
if (in_signal_handler) return;
|
533
738
|
if (!_stackprof.running) return;
|
534
739
|
|
535
|
-
|
536
|
-
stackprof_record_sample();
|
537
|
-
in_signal_handler--;
|
740
|
+
stackprof_record_buffer();
|
538
741
|
}
|
539
742
|
|
540
743
|
static void
|
541
744
|
stackprof_signal_handler(int sig, siginfo_t *sinfo, void *ucontext)
|
542
745
|
{
|
746
|
+
static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
|
747
|
+
|
543
748
|
_stackprof.overall_signals++;
|
544
|
-
|
749
|
+
|
750
|
+
if (!_stackprof.running) return;
|
751
|
+
|
752
|
+
// There's a possibility that the signal handler is invoked *after* the Ruby
|
753
|
+
// VM has been shut down (e.g. after ruby_cleanup(0)). In this case, things
|
754
|
+
// that rely on global VM state (e.g. rb_during_gc) will segfault.
|
755
|
+
if (!ruby_vm_running) return;
|
756
|
+
|
757
|
+
if (_stackprof.mode == sym_wall) {
|
758
|
+
// In "wall" mode, the SIGALRM signal will arrive at an arbitrary thread.
|
759
|
+
// In order to provide more useful results, especially under threaded web
|
760
|
+
// servers, we want to forward this signal to the original thread
|
761
|
+
// StackProf was started from.
|
762
|
+
// According to POSIX.1-2008 TC1 pthread_kill and pthread_self should be
|
763
|
+
// async-signal-safe.
|
764
|
+
if (pthread_self() != _stackprof.target_thread) {
|
765
|
+
pthread_kill(_stackprof.target_thread, sig);
|
766
|
+
return;
|
767
|
+
}
|
768
|
+
} else {
|
769
|
+
if (!ruby_native_thread_p()) return;
|
770
|
+
}
|
771
|
+
|
772
|
+
if (pthread_mutex_trylock(&lock)) return;
|
773
|
+
|
774
|
+
if (!_stackprof.ignore_gc && rb_during_gc()) {
|
775
|
+
VALUE mode = rb_gc_latest_gc_info(sym_state);
|
776
|
+
if (mode == sym_marking) {
|
777
|
+
_stackprof.unrecorded_gc_marking_samples++;
|
778
|
+
} else if (mode == sym_sweeping) {
|
779
|
+
_stackprof.unrecorded_gc_sweeping_samples++;
|
780
|
+
}
|
781
|
+
if(!_stackprof.unrecorded_gc_samples) {
|
782
|
+
// record start
|
783
|
+
capture_timestamp(&_stackprof.gc_start_timestamp);
|
784
|
+
}
|
545
785
|
_stackprof.unrecorded_gc_samples++;
|
546
|
-
rb_postponed_job_register_one(0,
|
786
|
+
rb_postponed_job_register_one(0, stackprof_job_record_gc, (void*)0);
|
547
787
|
} else {
|
548
|
-
|
788
|
+
if (stackprof_use_postponed_job) {
|
789
|
+
rb_postponed_job_register_one(0, stackprof_job_sample_and_record, (void*)0);
|
790
|
+
} else {
|
791
|
+
// Buffer a sample immediately, if an existing sample exists this will
|
792
|
+
// return immediately
|
793
|
+
stackprof_buffer_sample();
|
794
|
+
// Enqueue a job to record the sample
|
795
|
+
rb_postponed_job_register_one(0, stackprof_job_record_buffer, (void*)0);
|
796
|
+
}
|
549
797
|
}
|
798
|
+
pthread_mutex_unlock(&lock);
|
550
799
|
}
|
551
800
|
|
552
801
|
static void
|
@@ -555,7 +804,7 @@ stackprof_newobj_handler(VALUE tpval, void *data)
|
|
555
804
|
_stackprof.overall_signals++;
|
556
805
|
if (RTEST(_stackprof.interval) && _stackprof.overall_signals % NUM2LONG(_stackprof.interval))
|
557
806
|
return;
|
558
|
-
|
807
|
+
stackprof_sample_and_record();
|
559
808
|
}
|
560
809
|
|
561
810
|
static VALUE
|
@@ -565,7 +814,7 @@ stackprof_sample(VALUE self)
|
|
565
814
|
return Qfalse;
|
566
815
|
|
567
816
|
_stackprof.overall_signals++;
|
568
|
-
|
817
|
+
stackprof_sample_and_record();
|
569
818
|
return Qtrue;
|
570
819
|
}
|
571
820
|
|
@@ -580,11 +829,25 @@ frame_mark_i(st_data_t key, st_data_t val, st_data_t arg)
|
|
580
829
|
static void
|
581
830
|
stackprof_gc_mark(void *data)
|
582
831
|
{
|
832
|
+
if (RTEST(_stackprof.metadata))
|
833
|
+
rb_gc_mark(_stackprof.metadata);
|
834
|
+
|
583
835
|
if (RTEST(_stackprof.out))
|
584
836
|
rb_gc_mark(_stackprof.out);
|
585
837
|
|
586
838
|
if (_stackprof.frames)
|
587
839
|
st_foreach(_stackprof.frames, frame_mark_i, 0);
|
840
|
+
|
841
|
+
int i;
|
842
|
+
for (i = 0; i < _stackprof.buffer_count; i++) {
|
843
|
+
rb_gc_mark(_stackprof.frames_buffer[i]);
|
844
|
+
}
|
845
|
+
}
|
846
|
+
|
847
|
+
static size_t
|
848
|
+
stackprof_memsize(const void *data)
|
849
|
+
{
|
850
|
+
return sizeof(_stackprof);
|
588
851
|
}
|
589
852
|
|
590
853
|
static void
|
@@ -619,9 +882,41 @@ stackprof_atfork_child(void)
|
|
619
882
|
stackprof_stop(rb_mStackProf);
|
620
883
|
}
|
621
884
|
|
885
|
+
static VALUE
|
886
|
+
stackprof_use_postponed_job_l(VALUE self)
|
887
|
+
{
|
888
|
+
stackprof_use_postponed_job = 1;
|
889
|
+
return Qnil;
|
890
|
+
}
|
891
|
+
|
892
|
+
static void
|
893
|
+
stackprof_at_exit(ruby_vm_t* vm)
|
894
|
+
{
|
895
|
+
ruby_vm_running = 0;
|
896
|
+
}
|
897
|
+
|
898
|
+
static const rb_data_type_t stackprof_type = {
|
899
|
+
"StackProf",
|
900
|
+
{
|
901
|
+
stackprof_gc_mark,
|
902
|
+
NULL,
|
903
|
+
stackprof_memsize,
|
904
|
+
}
|
905
|
+
};
|
906
|
+
|
622
907
|
void
|
623
908
|
Init_stackprof(void)
|
624
909
|
{
|
910
|
+
size_t i;
|
911
|
+
/*
|
912
|
+
* As of Ruby 3.0, it should be safe to read stack frames at any time, unless YJIT is enabled
|
913
|
+
* See https://github.com/ruby/ruby/commit/0e276dc458f94d9d79a0f7c7669bde84abe80f21
|
914
|
+
*/
|
915
|
+
stackprof_use_postponed_job = RUBY_API_VERSION_MAJOR < 3;
|
916
|
+
|
917
|
+
ruby_vm_running = 1;
|
918
|
+
ruby_vm_at_exit(stackprof_at_exit);
|
919
|
+
|
625
920
|
#define S(name) sym_##name = ID2SYM(rb_intern(#name));
|
626
921
|
S(object);
|
627
922
|
S(custom);
|
@@ -640,30 +935,42 @@ Init_stackprof(void)
|
|
640
935
|
S(mode);
|
641
936
|
S(interval);
|
642
937
|
S(raw);
|
938
|
+
S(raw_lines);
|
939
|
+
S(raw_sample_timestamps);
|
643
940
|
S(raw_timestamp_deltas);
|
644
941
|
S(out);
|
942
|
+
S(metadata);
|
943
|
+
S(ignore_gc);
|
645
944
|
S(frames);
|
646
945
|
S(aggregate);
|
946
|
+
S(state);
|
947
|
+
S(marking);
|
948
|
+
S(sweeping);
|
647
949
|
#undef S
|
648
950
|
|
649
|
-
|
951
|
+
/* Need to run this to warm the symbol table before we call this during GC */
|
952
|
+
rb_gc_latest_gc_info(sym_state);
|
953
|
+
|
650
954
|
rb_global_variable(&gc_hook);
|
955
|
+
gc_hook = TypedData_Wrap_Struct(rb_cObject, &stackprof_type, &_stackprof);
|
651
956
|
|
652
957
|
_stackprof.raw_samples = NULL;
|
653
958
|
_stackprof.raw_samples_len = 0;
|
654
959
|
_stackprof.raw_samples_capa = 0;
|
655
960
|
_stackprof.raw_sample_index = 0;
|
656
961
|
|
657
|
-
_stackprof.
|
658
|
-
_stackprof.
|
659
|
-
_stackprof.
|
962
|
+
_stackprof.raw_sample_times = NULL;
|
963
|
+
_stackprof.raw_sample_times_len = 0;
|
964
|
+
_stackprof.raw_sample_times_capa = 0;
|
660
965
|
|
661
|
-
_stackprof.fake_gc_frame = INT2FIX(0x9C);
|
662
966
|
_stackprof.empty_string = rb_str_new_cstr("");
|
663
|
-
_stackprof.fake_gc_frame_name = rb_str_new_cstr("(garbage collection)");
|
664
|
-
rb_global_variable(&_stackprof.fake_gc_frame_name);
|
665
967
|
rb_global_variable(&_stackprof.empty_string);
|
666
968
|
|
969
|
+
for (i = 0; i < TOTAL_FAKE_FRAMES; i++) {
|
970
|
+
_stackprof.fake_frame_names[i] = rb_str_new_cstr(fake_frame_cstrs[i]);
|
971
|
+
rb_global_variable(&_stackprof.fake_frame_names[i]);
|
972
|
+
}
|
973
|
+
|
667
974
|
rb_mStackProf = rb_define_module("StackProf");
|
668
975
|
rb_define_singleton_method(rb_mStackProf, "running?", stackprof_running_p, 0);
|
669
976
|
rb_define_singleton_method(rb_mStackProf, "run", stackprof_run, -1);
|
@@ -671,6 +978,7 @@ Init_stackprof(void)
|
|
671
978
|
rb_define_singleton_method(rb_mStackProf, "stop", stackprof_stop, 0);
|
672
979
|
rb_define_singleton_method(rb_mStackProf, "results", stackprof_results, -1);
|
673
980
|
rb_define_singleton_method(rb_mStackProf, "sample", stackprof_sample, 0);
|
981
|
+
rb_define_singleton_method(rb_mStackProf, "use_postponed_job!", stackprof_use_postponed_job_l, 0);
|
674
982
|
|
675
983
|
pthread_atfork(stackprof_atfork_prepare, stackprof_atfork_parent, stackprof_atfork_child);
|
676
984
|
}
|