stackprof 0.2.12 → 0.2.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/workflows/ci.yml +43 -0
- data/.gitignore +1 -1
- data/CHANGELOG.md +17 -2
- data/README.md +66 -51
- data/Rakefile +21 -25
- data/bin/stackprof +115 -71
- data/ext/stackprof/extconf.rb +6 -0
- data/ext/stackprof/stackprof.c +392 -84
- data/lib/stackprof/autorun.rb +19 -0
- data/lib/stackprof/middleware.rb +8 -2
- data/lib/stackprof/report.rb +280 -16
- data/lib/stackprof/truffleruby.rb +37 -0
- data/lib/stackprof.rb +22 -1
- data/stackprof.gemspec +11 -3
- data/test/fixtures/profile.dump +1 -0
- data/test/fixtures/profile.json +1 -0
- data/test/test_middleware.rb +36 -17
- data/test/test_report.rb +25 -1
- data/test/test_stackprof.rb +153 -15
- data/test/test_truffleruby.rb +18 -0
- data/vendor/FlameGraph/flamegraph.pl +751 -85
- metadata +16 -23
- data/.travis.yml +0 -8
- data/Gemfile.lock +0 -27
data/ext/stackprof/stackprof.c
CHANGED
@@ -7,15 +7,76 @@
|
|
7
7
|
**********************************************************************/
|
8
8
|
|
9
9
|
#include <ruby/ruby.h>
|
10
|
+
#include <ruby/version.h>
|
10
11
|
#include <ruby/debug.h>
|
11
12
|
#include <ruby/st.h>
|
12
13
|
#include <ruby/io.h>
|
13
14
|
#include <ruby/intern.h>
|
15
|
+
#include <ruby/vm.h>
|
14
16
|
#include <signal.h>
|
15
17
|
#include <sys/time.h>
|
18
|
+
#include <time.h>
|
16
19
|
#include <pthread.h>
|
17
20
|
|
18
21
|
#define BUF_SIZE 2048
|
22
|
+
#define MICROSECONDS_IN_SECOND 1000000
|
23
|
+
#define NANOSECONDS_IN_SECOND 1000000000
|
24
|
+
|
25
|
+
#define FAKE_FRAME_GC INT2FIX(0)
|
26
|
+
#define FAKE_FRAME_MARK INT2FIX(1)
|
27
|
+
#define FAKE_FRAME_SWEEP INT2FIX(2)
|
28
|
+
|
29
|
+
static const char *fake_frame_cstrs[] = {
|
30
|
+
"(garbage collection)",
|
31
|
+
"(marking)",
|
32
|
+
"(sweeping)",
|
33
|
+
};
|
34
|
+
|
35
|
+
static int stackprof_use_postponed_job = 1;
|
36
|
+
static int ruby_vm_running = 0;
|
37
|
+
|
38
|
+
#define TOTAL_FAKE_FRAMES (sizeof(fake_frame_cstrs) / sizeof(char *))
|
39
|
+
|
40
|
+
#ifdef _POSIX_MONOTONIC_CLOCK
|
41
|
+
#define timestamp_t timespec
|
42
|
+
typedef struct timestamp_t timestamp_t;
|
43
|
+
|
44
|
+
static void capture_timestamp(timestamp_t *ts) {
|
45
|
+
clock_gettime(CLOCK_MONOTONIC, ts);
|
46
|
+
}
|
47
|
+
|
48
|
+
static int64_t delta_usec(timestamp_t *start, timestamp_t *end) {
|
49
|
+
int64_t result = MICROSECONDS_IN_SECOND * (end->tv_sec - start->tv_sec);
|
50
|
+
if (end->tv_nsec < start->tv_nsec) {
|
51
|
+
result -= MICROSECONDS_IN_SECOND;
|
52
|
+
result += (NANOSECONDS_IN_SECOND + end->tv_nsec - start->tv_nsec) / 1000;
|
53
|
+
} else {
|
54
|
+
result += (end->tv_nsec - start->tv_nsec) / 1000;
|
55
|
+
}
|
56
|
+
return result;
|
57
|
+
}
|
58
|
+
|
59
|
+
static uint64_t timestamp_usec(timestamp_t *ts) {
|
60
|
+
return (MICROSECONDS_IN_SECOND * ts->tv_sec) + (ts->tv_nsec / 1000);
|
61
|
+
}
|
62
|
+
#else
|
63
|
+
#define timestamp_t timeval
|
64
|
+
typedef struct timestamp_t timestamp_t;
|
65
|
+
|
66
|
+
static void capture_timestamp(timestamp_t *ts) {
|
67
|
+
gettimeofday(ts, NULL);
|
68
|
+
}
|
69
|
+
|
70
|
+
static int64_t delta_usec(timestamp_t *start, timestamp_t *end) {
|
71
|
+
struct timeval diff;
|
72
|
+
timersub(end, start, &diff);
|
73
|
+
return (MICROSECONDS_IN_SECOND * diff.tv_sec) + diff.tv_usec;
|
74
|
+
}
|
75
|
+
|
76
|
+
static uint64_t timestamp_usec(timestamp_t *ts) {
|
77
|
+
return (MICROSECONDS_IN_SECOND * ts.tv_sec) + diff.tv_usec
|
78
|
+
}
|
79
|
+
#endif
|
19
80
|
|
20
81
|
typedef struct {
|
21
82
|
size_t total_samples;
|
@@ -25,6 +86,11 @@ typedef struct {
|
|
25
86
|
st_table *lines;
|
26
87
|
} frame_data_t;
|
27
88
|
|
89
|
+
typedef struct {
|
90
|
+
uint64_t timestamp_usec;
|
91
|
+
int64_t delta_usec;
|
92
|
+
} sample_time_t;
|
93
|
+
|
28
94
|
static struct {
|
29
95
|
int running;
|
30
96
|
int raw;
|
@@ -33,33 +99,44 @@ static struct {
|
|
33
99
|
VALUE mode;
|
34
100
|
VALUE interval;
|
35
101
|
VALUE out;
|
102
|
+
VALUE metadata;
|
103
|
+
int ignore_gc;
|
36
104
|
|
37
|
-
|
105
|
+
uint64_t *raw_samples;
|
38
106
|
size_t raw_samples_len;
|
39
107
|
size_t raw_samples_capa;
|
40
108
|
size_t raw_sample_index;
|
41
109
|
|
42
|
-
struct
|
43
|
-
|
44
|
-
size_t
|
45
|
-
size_t
|
110
|
+
struct timestamp_t last_sample_at;
|
111
|
+
sample_time_t *raw_sample_times;
|
112
|
+
size_t raw_sample_times_len;
|
113
|
+
size_t raw_sample_times_capa;
|
46
114
|
|
47
115
|
size_t overall_signals;
|
48
116
|
size_t overall_samples;
|
49
117
|
size_t during_gc;
|
50
118
|
size_t unrecorded_gc_samples;
|
119
|
+
size_t unrecorded_gc_marking_samples;
|
120
|
+
size_t unrecorded_gc_sweeping_samples;
|
51
121
|
st_table *frames;
|
52
122
|
|
53
|
-
|
54
|
-
|
123
|
+
timestamp_t gc_start_timestamp;
|
124
|
+
|
125
|
+
VALUE fake_frame_names[TOTAL_FAKE_FRAMES];
|
55
126
|
VALUE empty_string;
|
127
|
+
|
128
|
+
int buffer_count;
|
129
|
+
sample_time_t buffer_time;
|
56
130
|
VALUE frames_buffer[BUF_SIZE];
|
57
131
|
int lines_buffer[BUF_SIZE];
|
132
|
+
|
133
|
+
pthread_t target_thread;
|
58
134
|
} _stackprof;
|
59
135
|
|
60
136
|
static VALUE sym_object, sym_wall, sym_cpu, sym_custom, sym_name, sym_file, sym_line;
|
61
137
|
static VALUE sym_samples, sym_total_samples, sym_missed_samples, sym_edges, sym_lines;
|
62
|
-
static VALUE sym_version, sym_mode, sym_interval, sym_raw,
|
138
|
+
static VALUE sym_version, sym_mode, sym_interval, sym_raw, sym_raw_lines, sym_metadata, sym_frames, sym_ignore_gc, sym_out;
|
139
|
+
static VALUE sym_aggregate, sym_raw_sample_timestamps, sym_raw_timestamp_deltas, sym_state, sym_marking, sym_sweeping;
|
63
140
|
static VALUE sym_gc_samples, objtracer;
|
64
141
|
static VALUE gc_hook;
|
65
142
|
static VALUE rb_mStackProf;
|
@@ -72,8 +149,10 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
72
149
|
{
|
73
150
|
struct sigaction sa;
|
74
151
|
struct itimerval timer;
|
75
|
-
VALUE opts = Qnil, mode = Qnil, interval = Qnil, out = Qfalse;
|
152
|
+
VALUE opts = Qnil, mode = Qnil, interval = Qnil, metadata = rb_hash_new(), out = Qfalse;
|
153
|
+
int ignore_gc = 0;
|
76
154
|
int raw = 0, aggregate = 1;
|
155
|
+
VALUE metadata_val;
|
77
156
|
|
78
157
|
if (_stackprof.running)
|
79
158
|
return Qfalse;
|
@@ -84,6 +163,17 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
84
163
|
mode = rb_hash_aref(opts, sym_mode);
|
85
164
|
interval = rb_hash_aref(opts, sym_interval);
|
86
165
|
out = rb_hash_aref(opts, sym_out);
|
166
|
+
if (RTEST(rb_hash_aref(opts, sym_ignore_gc))) {
|
167
|
+
ignore_gc = 1;
|
168
|
+
}
|
169
|
+
|
170
|
+
metadata_val = rb_hash_aref(opts, sym_metadata);
|
171
|
+
if (RTEST(metadata_val)) {
|
172
|
+
if (!RB_TYPE_P(metadata_val, T_HASH))
|
173
|
+
rb_raise(rb_eArgError, "metadata should be a hash");
|
174
|
+
|
175
|
+
metadata = metadata_val;
|
176
|
+
}
|
87
177
|
|
88
178
|
if (RTEST(rb_hash_aref(opts, sym_raw)))
|
89
179
|
raw = 1;
|
@@ -92,6 +182,10 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
92
182
|
}
|
93
183
|
if (!RTEST(mode)) mode = sym_wall;
|
94
184
|
|
185
|
+
if (!NIL_P(interval) && (NUM2INT(interval) < 1 || NUM2INT(interval) >= MICROSECONDS_IN_SECOND)) {
|
186
|
+
rb_raise(rb_eArgError, "interval is a number of microseconds between 1 and 1 million");
|
187
|
+
}
|
188
|
+
|
95
189
|
if (!_stackprof.frames) {
|
96
190
|
_stackprof.frames = st_init_numtable();
|
97
191
|
_stackprof.overall_signals = 0;
|
@@ -128,10 +222,13 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
128
222
|
_stackprof.aggregate = aggregate;
|
129
223
|
_stackprof.mode = mode;
|
130
224
|
_stackprof.interval = interval;
|
225
|
+
_stackprof.ignore_gc = ignore_gc;
|
226
|
+
_stackprof.metadata = metadata;
|
131
227
|
_stackprof.out = out;
|
228
|
+
_stackprof.target_thread = pthread_self();
|
132
229
|
|
133
230
|
if (raw) {
|
134
|
-
|
231
|
+
capture_timestamp(&_stackprof.last_sample_at);
|
135
232
|
}
|
136
233
|
|
137
234
|
return Qtrue;
|
@@ -166,13 +263,19 @@ stackprof_stop(VALUE self)
|
|
166
263
|
return Qtrue;
|
167
264
|
}
|
168
265
|
|
266
|
+
#if SIZEOF_VOIDP == SIZEOF_LONG
|
267
|
+
# define PTR2NUM(x) (LONG2NUM((long)(x)))
|
268
|
+
#else
|
269
|
+
# define PTR2NUM(x) (LL2NUM((LONG_LONG)(x)))
|
270
|
+
#endif
|
271
|
+
|
169
272
|
static int
|
170
273
|
frame_edges_i(st_data_t key, st_data_t val, st_data_t arg)
|
171
274
|
{
|
172
275
|
VALUE edges = (VALUE)arg;
|
173
276
|
|
174
277
|
intptr_t weight = (intptr_t)val;
|
175
|
-
rb_hash_aset(edges,
|
278
|
+
rb_hash_aset(edges, PTR2NUM(key), INT2FIX(weight));
|
176
279
|
return ST_CONTINUE;
|
177
280
|
}
|
178
281
|
|
@@ -199,10 +302,10 @@ frame_i(st_data_t key, st_data_t val, st_data_t arg)
|
|
199
302
|
VALUE name, file, edges, lines;
|
200
303
|
VALUE line;
|
201
304
|
|
202
|
-
rb_hash_aset(results,
|
305
|
+
rb_hash_aset(results, PTR2NUM(frame), details);
|
203
306
|
|
204
|
-
if (frame
|
205
|
-
name = _stackprof.
|
307
|
+
if (FIXNUM_P(frame)) {
|
308
|
+
name = _stackprof.fake_frame_names[FIX2INT(frame)];
|
206
309
|
file = _stackprof.empty_string;
|
207
310
|
line = INT2FIX(0);
|
208
311
|
} else {
|
@@ -258,6 +361,9 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
258
361
|
rb_hash_aset(results, sym_samples, SIZET2NUM(_stackprof.overall_samples));
|
259
362
|
rb_hash_aset(results, sym_gc_samples, SIZET2NUM(_stackprof.during_gc));
|
260
363
|
rb_hash_aset(results, sym_missed_samples, SIZET2NUM(_stackprof.overall_signals - _stackprof.overall_samples));
|
364
|
+
rb_hash_aset(results, sym_metadata, _stackprof.metadata);
|
365
|
+
|
366
|
+
_stackprof.metadata = Qnil;
|
261
367
|
|
262
368
|
frames = rb_hash_new();
|
263
369
|
rb_hash_aset(results, sym_frames, frames);
|
@@ -268,16 +374,25 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
268
374
|
|
269
375
|
if (_stackprof.raw && _stackprof.raw_samples_len) {
|
270
376
|
size_t len, n, o;
|
271
|
-
VALUE raw_timestamp_deltas;
|
377
|
+
VALUE raw_sample_timestamps, raw_timestamp_deltas;
|
272
378
|
VALUE raw_samples = rb_ary_new_capa(_stackprof.raw_samples_len);
|
379
|
+
VALUE raw_lines = rb_ary_new_capa(_stackprof.raw_samples_len);
|
273
380
|
|
274
381
|
for (n = 0; n < _stackprof.raw_samples_len; n++) {
|
275
382
|
len = (size_t)_stackprof.raw_samples[n];
|
276
383
|
rb_ary_push(raw_samples, SIZET2NUM(len));
|
384
|
+
rb_ary_push(raw_lines, SIZET2NUM(len));
|
385
|
+
|
386
|
+
for (o = 0, n++; o < len; n++, o++) {
|
387
|
+
// Line is in the upper 16 bits
|
388
|
+
rb_ary_push(raw_lines, INT2NUM(_stackprof.raw_samples[n] >> 48));
|
389
|
+
|
390
|
+
VALUE frame = _stackprof.raw_samples[n] & ~((uint64_t)0xFFFF << 48);
|
391
|
+
rb_ary_push(raw_samples, PTR2NUM(frame));
|
392
|
+
}
|
277
393
|
|
278
|
-
for (o = 0, n++; o < len; n++, o++)
|
279
|
-
rb_ary_push(raw_samples, rb_obj_id(_stackprof.raw_samples[n]));
|
280
394
|
rb_ary_push(raw_samples, SIZET2NUM((size_t)_stackprof.raw_samples[n]));
|
395
|
+
rb_ary_push(raw_lines, SIZET2NUM((size_t)_stackprof.raw_samples[n]));
|
281
396
|
}
|
282
397
|
|
283
398
|
free(_stackprof.raw_samples);
|
@@ -287,18 +402,22 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
287
402
|
_stackprof.raw_sample_index = 0;
|
288
403
|
|
289
404
|
rb_hash_aset(results, sym_raw, raw_samples);
|
405
|
+
rb_hash_aset(results, sym_raw_lines, raw_lines);
|
290
406
|
|
291
|
-
|
407
|
+
raw_sample_timestamps = rb_ary_new_capa(_stackprof.raw_sample_times_len);
|
408
|
+
raw_timestamp_deltas = rb_ary_new_capa(_stackprof.raw_sample_times_len);
|
292
409
|
|
293
|
-
for (n = 0; n < _stackprof.
|
294
|
-
rb_ary_push(
|
410
|
+
for (n = 0; n < _stackprof.raw_sample_times_len; n++) {
|
411
|
+
rb_ary_push(raw_sample_timestamps, ULL2NUM(_stackprof.raw_sample_times[n].timestamp_usec));
|
412
|
+
rb_ary_push(raw_timestamp_deltas, LL2NUM(_stackprof.raw_sample_times[n].delta_usec));
|
295
413
|
}
|
296
414
|
|
297
|
-
free(_stackprof.
|
298
|
-
_stackprof.
|
299
|
-
_stackprof.
|
300
|
-
_stackprof.
|
415
|
+
free(_stackprof.raw_sample_times);
|
416
|
+
_stackprof.raw_sample_times = NULL;
|
417
|
+
_stackprof.raw_sample_times_len = 0;
|
418
|
+
_stackprof.raw_sample_times_capa = 0;
|
301
419
|
|
420
|
+
rb_hash_aset(results, sym_raw_sample_timestamps, raw_sample_timestamps);
|
302
421
|
rb_hash_aset(results, sym_raw_timestamp_deltas, raw_timestamp_deltas);
|
303
422
|
|
304
423
|
_stackprof.raw = 0;
|
@@ -309,11 +428,12 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
309
428
|
|
310
429
|
if (RTEST(_stackprof.out)) {
|
311
430
|
VALUE file;
|
312
|
-
if (
|
313
|
-
file = rb_file_open_str(_stackprof.out, "w");
|
314
|
-
} else {
|
431
|
+
if (rb_respond_to(_stackprof.out, rb_intern("to_io"))) {
|
315
432
|
file = rb_io_check_io(_stackprof.out);
|
433
|
+
} else {
|
434
|
+
file = rb_file_open_str(_stackprof.out, "w");
|
316
435
|
}
|
436
|
+
|
317
437
|
rb_marshal_dump(results, file);
|
318
438
|
rb_io_flush(file);
|
319
439
|
_stackprof.out = Qnil;
|
@@ -377,30 +497,47 @@ st_numtable_increment(st_table *table, st_data_t key, size_t increment)
|
|
377
497
|
}
|
378
498
|
|
379
499
|
void
|
380
|
-
stackprof_record_sample_for_stack(int num,
|
500
|
+
stackprof_record_sample_for_stack(int num, uint64_t sample_timestamp, int64_t timestamp_delta)
|
381
501
|
{
|
382
502
|
int i, n;
|
383
503
|
VALUE prev_frame = Qnil;
|
384
504
|
|
385
505
|
_stackprof.overall_samples++;
|
386
506
|
|
387
|
-
if (_stackprof.raw) {
|
507
|
+
if (_stackprof.raw && num > 0) {
|
388
508
|
int found = 0;
|
389
509
|
|
510
|
+
/* If there's no sample buffer allocated, then allocate one. The buffer
|
511
|
+
* format is the number of frames (num), then the list of frames (from
|
512
|
+
* `_stackprof.raw_samples`), followed by the number of times this
|
513
|
+
* particular stack has been seen in a row. Each "new" stack is added
|
514
|
+
* to the end of the buffer, but if the previous stack is the same as
|
515
|
+
* the current stack, the counter will be incremented. */
|
390
516
|
if (!_stackprof.raw_samples) {
|
391
517
|
_stackprof.raw_samples_capa = num * 100;
|
392
518
|
_stackprof.raw_samples = malloc(sizeof(VALUE) * _stackprof.raw_samples_capa);
|
393
519
|
}
|
394
520
|
|
521
|
+
/* If we can't fit all the samples in the buffer, double the buffer size. */
|
395
522
|
while (_stackprof.raw_samples_capa <= _stackprof.raw_samples_len + (num + 2)) {
|
396
523
|
_stackprof.raw_samples_capa *= 2;
|
397
524
|
_stackprof.raw_samples = realloc(_stackprof.raw_samples, sizeof(VALUE) * _stackprof.raw_samples_capa);
|
398
525
|
}
|
399
526
|
|
527
|
+
/* If we've seen this stack before in the last sample, then increment the "seen" count. */
|
400
528
|
if (_stackprof.raw_samples_len > 0 && _stackprof.raw_samples[_stackprof.raw_sample_index] == (VALUE)num) {
|
529
|
+
/* The number of samples could have been the same, but the stack
|
530
|
+
* might be different, so we need to check the stack here. Stacks
|
531
|
+
* in the raw buffer are stored in the opposite direction of stacks
|
532
|
+
* in the frames buffer that came from Ruby. */
|
401
533
|
for (i = num-1, n = 0; i >= 0; i--, n++) {
|
402
534
|
VALUE frame = _stackprof.frames_buffer[i];
|
403
|
-
|
535
|
+
int line = _stackprof.lines_buffer[i];
|
536
|
+
|
537
|
+
// Encode the line in to the upper 16 bits.
|
538
|
+
uint64_t key = ((uint64_t)line << 48) | (uint64_t)frame;
|
539
|
+
|
540
|
+
if (_stackprof.raw_samples[_stackprof.raw_sample_index + 1 + n] != key)
|
404
541
|
break;
|
405
542
|
}
|
406
543
|
if (i == -1) {
|
@@ -409,28 +546,43 @@ stackprof_record_sample_for_stack(int num, int timestamp_delta)
|
|
409
546
|
}
|
410
547
|
}
|
411
548
|
|
549
|
+
/* If we haven't seen the stack, then add it to the buffer along with
|
550
|
+
* the length of the stack and a 1 for the "seen" count */
|
412
551
|
if (!found) {
|
552
|
+
/* Bump the `raw_sample_index` up so that the next iteration can
|
553
|
+
* find the previously recorded stack size. */
|
413
554
|
_stackprof.raw_sample_index = _stackprof.raw_samples_len;
|
414
555
|
_stackprof.raw_samples[_stackprof.raw_samples_len++] = (VALUE)num;
|
415
556
|
for (i = num-1; i >= 0; i--) {
|
416
557
|
VALUE frame = _stackprof.frames_buffer[i];
|
417
|
-
_stackprof.
|
558
|
+
int line = _stackprof.lines_buffer[i];
|
559
|
+
|
560
|
+
// Encode the line in to the upper 16 bits.
|
561
|
+
uint64_t key = ((uint64_t)line << 48) | (uint64_t)frame;
|
562
|
+
|
563
|
+
_stackprof.raw_samples[_stackprof.raw_samples_len++] = key;
|
418
564
|
}
|
419
565
|
_stackprof.raw_samples[_stackprof.raw_samples_len++] = (VALUE)1;
|
420
566
|
}
|
421
567
|
|
422
|
-
|
423
|
-
|
424
|
-
_stackprof.
|
425
|
-
_stackprof.
|
568
|
+
/* If there's no timestamp delta buffer, allocate one */
|
569
|
+
if (!_stackprof.raw_sample_times) {
|
570
|
+
_stackprof.raw_sample_times_capa = 100;
|
571
|
+
_stackprof.raw_sample_times = malloc(sizeof(sample_time_t) * _stackprof.raw_sample_times_capa);
|
572
|
+
_stackprof.raw_sample_times_len = 0;
|
426
573
|
}
|
427
574
|
|
428
|
-
|
429
|
-
|
430
|
-
_stackprof.
|
575
|
+
/* Double the buffer size if it's too small */
|
576
|
+
while (_stackprof.raw_sample_times_capa <= _stackprof.raw_sample_times_len + 1) {
|
577
|
+
_stackprof.raw_sample_times_capa *= 2;
|
578
|
+
_stackprof.raw_sample_times = realloc(_stackprof.raw_sample_times, sizeof(sample_time_t) * _stackprof.raw_sample_times_capa);
|
431
579
|
}
|
432
580
|
|
433
|
-
|
581
|
+
/* Store the time delta (which is the amount of microseconds between samples). */
|
582
|
+
_stackprof.raw_sample_times[_stackprof.raw_sample_times_len++] = (sample_time_t) {
|
583
|
+
.timestamp_usec = sample_timestamp,
|
584
|
+
.delta_usec = timestamp_delta,
|
585
|
+
};
|
434
586
|
}
|
435
587
|
|
436
588
|
for (i = 0; i < num; i++) {
|
@@ -463,90 +615,187 @@ stackprof_record_sample_for_stack(int num, int timestamp_delta)
|
|
463
615
|
}
|
464
616
|
|
465
617
|
if (_stackprof.raw) {
|
466
|
-
|
618
|
+
capture_timestamp(&_stackprof.last_sample_at);
|
467
619
|
}
|
468
620
|
}
|
469
621
|
|
622
|
+
// buffer the current profile frames
|
623
|
+
// This must be async-signal-safe
|
624
|
+
// Returns immediately if another set of frames are already in the buffer
|
470
625
|
void
|
471
|
-
|
626
|
+
stackprof_buffer_sample(void)
|
472
627
|
{
|
473
|
-
|
628
|
+
uint64_t start_timestamp = 0;
|
629
|
+
int64_t timestamp_delta = 0;
|
474
630
|
int num;
|
631
|
+
|
632
|
+
if (_stackprof.buffer_count > 0) {
|
633
|
+
// Another sample is already pending
|
634
|
+
return;
|
635
|
+
}
|
636
|
+
|
475
637
|
if (_stackprof.raw) {
|
476
|
-
struct
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
timestamp_delta = (1000 * diff.tv_sec) + diff.tv_usec;
|
638
|
+
struct timestamp_t t;
|
639
|
+
capture_timestamp(&t);
|
640
|
+
start_timestamp = timestamp_usec(&t);
|
641
|
+
timestamp_delta = delta_usec(&_stackprof.last_sample_at, &t);
|
481
642
|
}
|
643
|
+
|
482
644
|
num = rb_profile_frames(0, sizeof(_stackprof.frames_buffer) / sizeof(VALUE), _stackprof.frames_buffer, _stackprof.lines_buffer);
|
483
|
-
|
645
|
+
|
646
|
+
_stackprof.buffer_count = num;
|
647
|
+
_stackprof.buffer_time.timestamp_usec = start_timestamp;
|
648
|
+
_stackprof.buffer_time.delta_usec = timestamp_delta;
|
484
649
|
}
|
485
650
|
|
651
|
+
// Postponed job
|
486
652
|
void
|
487
|
-
stackprof_record_gc_samples()
|
653
|
+
stackprof_record_gc_samples(void)
|
488
654
|
{
|
489
|
-
|
490
|
-
|
655
|
+
int64_t delta_to_first_unrecorded_gc_sample = 0;
|
656
|
+
uint64_t start_timestamp = 0;
|
657
|
+
size_t i;
|
491
658
|
if (_stackprof.raw) {
|
492
|
-
struct
|
493
|
-
|
494
|
-
gettimeofday(&t, NULL);
|
495
|
-
timersub(&t, &_stackprof.last_sample_at, &diff);
|
659
|
+
struct timestamp_t t = _stackprof.gc_start_timestamp;
|
660
|
+
start_timestamp = timestamp_usec(&t);
|
496
661
|
|
497
662
|
// We don't know when the GC samples were actually marked, so let's
|
498
663
|
// assume that they were marked at a perfectly regular interval.
|
499
|
-
delta_to_first_unrecorded_gc_sample = (
|
664
|
+
delta_to_first_unrecorded_gc_sample = delta_usec(&_stackprof.last_sample_at, &t) - (_stackprof.unrecorded_gc_samples - 1) * NUM2LONG(_stackprof.interval);
|
500
665
|
if (delta_to_first_unrecorded_gc_sample < 0) {
|
501
666
|
delta_to_first_unrecorded_gc_sample = 0;
|
502
667
|
}
|
503
668
|
}
|
504
669
|
|
505
|
-
_stackprof.frames_buffer[0] = _stackprof.fake_gc_frame;
|
506
|
-
_stackprof.lines_buffer[0] = 0;
|
507
|
-
|
508
670
|
for (i = 0; i < _stackprof.unrecorded_gc_samples; i++) {
|
509
|
-
|
510
|
-
|
671
|
+
int64_t timestamp_delta = i == 0 ? delta_to_first_unrecorded_gc_sample : NUM2LONG(_stackprof.interval);
|
672
|
+
|
673
|
+
if (_stackprof.unrecorded_gc_marking_samples) {
|
674
|
+
_stackprof.frames_buffer[0] = FAKE_FRAME_MARK;
|
675
|
+
_stackprof.lines_buffer[0] = 0;
|
676
|
+
_stackprof.frames_buffer[1] = FAKE_FRAME_GC;
|
677
|
+
_stackprof.lines_buffer[1] = 0;
|
678
|
+
_stackprof.unrecorded_gc_marking_samples--;
|
679
|
+
|
680
|
+
stackprof_record_sample_for_stack(2, start_timestamp, timestamp_delta);
|
681
|
+
} else if (_stackprof.unrecorded_gc_sweeping_samples) {
|
682
|
+
_stackprof.frames_buffer[0] = FAKE_FRAME_SWEEP;
|
683
|
+
_stackprof.lines_buffer[0] = 0;
|
684
|
+
_stackprof.frames_buffer[1] = FAKE_FRAME_GC;
|
685
|
+
_stackprof.lines_buffer[1] = 0;
|
686
|
+
|
687
|
+
_stackprof.unrecorded_gc_sweeping_samples--;
|
688
|
+
|
689
|
+
stackprof_record_sample_for_stack(2, start_timestamp, timestamp_delta);
|
690
|
+
} else {
|
691
|
+
_stackprof.frames_buffer[0] = FAKE_FRAME_GC;
|
692
|
+
_stackprof.lines_buffer[0] = 0;
|
693
|
+
stackprof_record_sample_for_stack(1, start_timestamp, timestamp_delta);
|
694
|
+
}
|
511
695
|
}
|
512
696
|
_stackprof.during_gc += _stackprof.unrecorded_gc_samples;
|
513
697
|
_stackprof.unrecorded_gc_samples = 0;
|
698
|
+
_stackprof.unrecorded_gc_marking_samples = 0;
|
699
|
+
_stackprof.unrecorded_gc_sweeping_samples = 0;
|
700
|
+
}
|
701
|
+
|
702
|
+
// record the sample previously buffered by stackprof_buffer_sample
|
703
|
+
static void
|
704
|
+
stackprof_record_buffer(void)
|
705
|
+
{
|
706
|
+
stackprof_record_sample_for_stack(_stackprof.buffer_count, _stackprof.buffer_time.timestamp_usec, _stackprof.buffer_time.delta_usec);
|
707
|
+
|
708
|
+
// reset the buffer
|
709
|
+
_stackprof.buffer_count = 0;
|
514
710
|
}
|
515
711
|
|
516
712
|
static void
|
517
|
-
|
713
|
+
stackprof_sample_and_record(void)
|
714
|
+
{
|
715
|
+
stackprof_buffer_sample();
|
716
|
+
stackprof_record_buffer();
|
717
|
+
}
|
718
|
+
|
719
|
+
static void
|
720
|
+
stackprof_job_record_gc(void *data)
|
518
721
|
{
|
519
|
-
static int in_signal_handler = 0;
|
520
|
-
if (in_signal_handler) return;
|
521
722
|
if (!_stackprof.running) return;
|
522
723
|
|
523
|
-
in_signal_handler++;
|
524
724
|
stackprof_record_gc_samples();
|
525
|
-
in_signal_handler--;
|
526
725
|
}
|
527
726
|
|
528
727
|
static void
|
529
|
-
|
728
|
+
stackprof_job_sample_and_record(void *data)
|
729
|
+
{
|
730
|
+
if (!_stackprof.running) return;
|
731
|
+
|
732
|
+
stackprof_sample_and_record();
|
733
|
+
}
|
734
|
+
|
735
|
+
static void
|
736
|
+
stackprof_job_record_buffer(void *data)
|
530
737
|
{
|
531
|
-
static int in_signal_handler = 0;
|
532
|
-
if (in_signal_handler) return;
|
533
738
|
if (!_stackprof.running) return;
|
534
739
|
|
535
|
-
|
536
|
-
stackprof_record_sample();
|
537
|
-
in_signal_handler--;
|
740
|
+
stackprof_record_buffer();
|
538
741
|
}
|
539
742
|
|
540
743
|
static void
|
541
744
|
stackprof_signal_handler(int sig, siginfo_t *sinfo, void *ucontext)
|
542
745
|
{
|
746
|
+
static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
|
747
|
+
|
543
748
|
_stackprof.overall_signals++;
|
544
|
-
|
749
|
+
|
750
|
+
if (!_stackprof.running) return;
|
751
|
+
|
752
|
+
// There's a possibility that the signal handler is invoked *after* the Ruby
|
753
|
+
// VM has been shut down (e.g. after ruby_cleanup(0)). In this case, things
|
754
|
+
// that rely on global VM state (e.g. rb_during_gc) will segfault.
|
755
|
+
if (!ruby_vm_running) return;
|
756
|
+
|
757
|
+
if (_stackprof.mode == sym_wall) {
|
758
|
+
// In "wall" mode, the SIGALRM signal will arrive at an arbitrary thread.
|
759
|
+
// In order to provide more useful results, especially under threaded web
|
760
|
+
// servers, we want to forward this signal to the original thread
|
761
|
+
// StackProf was started from.
|
762
|
+
// According to POSIX.1-2008 TC1 pthread_kill and pthread_self should be
|
763
|
+
// async-signal-safe.
|
764
|
+
if (pthread_self() != _stackprof.target_thread) {
|
765
|
+
pthread_kill(_stackprof.target_thread, sig);
|
766
|
+
return;
|
767
|
+
}
|
768
|
+
} else {
|
769
|
+
if (!ruby_native_thread_p()) return;
|
770
|
+
}
|
771
|
+
|
772
|
+
if (pthread_mutex_trylock(&lock)) return;
|
773
|
+
|
774
|
+
if (!_stackprof.ignore_gc && rb_during_gc()) {
|
775
|
+
VALUE mode = rb_gc_latest_gc_info(sym_state);
|
776
|
+
if (mode == sym_marking) {
|
777
|
+
_stackprof.unrecorded_gc_marking_samples++;
|
778
|
+
} else if (mode == sym_sweeping) {
|
779
|
+
_stackprof.unrecorded_gc_sweeping_samples++;
|
780
|
+
}
|
781
|
+
if(!_stackprof.unrecorded_gc_samples) {
|
782
|
+
// record start
|
783
|
+
capture_timestamp(&_stackprof.gc_start_timestamp);
|
784
|
+
}
|
545
785
|
_stackprof.unrecorded_gc_samples++;
|
546
|
-
rb_postponed_job_register_one(0,
|
786
|
+
rb_postponed_job_register_one(0, stackprof_job_record_gc, (void*)0);
|
547
787
|
} else {
|
548
|
-
|
788
|
+
if (stackprof_use_postponed_job) {
|
789
|
+
rb_postponed_job_register_one(0, stackprof_job_sample_and_record, (void*)0);
|
790
|
+
} else {
|
791
|
+
// Buffer a sample immediately, if an existing sample exists this will
|
792
|
+
// return immediately
|
793
|
+
stackprof_buffer_sample();
|
794
|
+
// Enqueue a job to record the sample
|
795
|
+
rb_postponed_job_register_one(0, stackprof_job_record_buffer, (void*)0);
|
796
|
+
}
|
549
797
|
}
|
798
|
+
pthread_mutex_unlock(&lock);
|
550
799
|
}
|
551
800
|
|
552
801
|
static void
|
@@ -555,7 +804,7 @@ stackprof_newobj_handler(VALUE tpval, void *data)
|
|
555
804
|
_stackprof.overall_signals++;
|
556
805
|
if (RTEST(_stackprof.interval) && _stackprof.overall_signals % NUM2LONG(_stackprof.interval))
|
557
806
|
return;
|
558
|
-
|
807
|
+
stackprof_sample_and_record();
|
559
808
|
}
|
560
809
|
|
561
810
|
static VALUE
|
@@ -565,7 +814,7 @@ stackprof_sample(VALUE self)
|
|
565
814
|
return Qfalse;
|
566
815
|
|
567
816
|
_stackprof.overall_signals++;
|
568
|
-
|
817
|
+
stackprof_sample_and_record();
|
569
818
|
return Qtrue;
|
570
819
|
}
|
571
820
|
|
@@ -580,11 +829,25 @@ frame_mark_i(st_data_t key, st_data_t val, st_data_t arg)
|
|
580
829
|
static void
|
581
830
|
stackprof_gc_mark(void *data)
|
582
831
|
{
|
832
|
+
if (RTEST(_stackprof.metadata))
|
833
|
+
rb_gc_mark(_stackprof.metadata);
|
834
|
+
|
583
835
|
if (RTEST(_stackprof.out))
|
584
836
|
rb_gc_mark(_stackprof.out);
|
585
837
|
|
586
838
|
if (_stackprof.frames)
|
587
839
|
st_foreach(_stackprof.frames, frame_mark_i, 0);
|
840
|
+
|
841
|
+
int i;
|
842
|
+
for (i = 0; i < _stackprof.buffer_count; i++) {
|
843
|
+
rb_gc_mark(_stackprof.frames_buffer[i]);
|
844
|
+
}
|
845
|
+
}
|
846
|
+
|
847
|
+
static size_t
|
848
|
+
stackprof_memsize(const void *data)
|
849
|
+
{
|
850
|
+
return sizeof(_stackprof);
|
588
851
|
}
|
589
852
|
|
590
853
|
static void
|
@@ -619,9 +882,41 @@ stackprof_atfork_child(void)
|
|
619
882
|
stackprof_stop(rb_mStackProf);
|
620
883
|
}
|
621
884
|
|
885
|
+
static VALUE
|
886
|
+
stackprof_use_postponed_job_l(VALUE self)
|
887
|
+
{
|
888
|
+
stackprof_use_postponed_job = 1;
|
889
|
+
return Qnil;
|
890
|
+
}
|
891
|
+
|
892
|
+
static void
|
893
|
+
stackprof_at_exit(ruby_vm_t* vm)
|
894
|
+
{
|
895
|
+
ruby_vm_running = 0;
|
896
|
+
}
|
897
|
+
|
898
|
+
static const rb_data_type_t stackprof_type = {
|
899
|
+
"StackProf",
|
900
|
+
{
|
901
|
+
stackprof_gc_mark,
|
902
|
+
NULL,
|
903
|
+
stackprof_memsize,
|
904
|
+
}
|
905
|
+
};
|
906
|
+
|
622
907
|
void
|
623
908
|
Init_stackprof(void)
|
624
909
|
{
|
910
|
+
size_t i;
|
911
|
+
/*
|
912
|
+
* As of Ruby 3.0, it should be safe to read stack frames at any time, unless YJIT is enabled
|
913
|
+
* See https://github.com/ruby/ruby/commit/0e276dc458f94d9d79a0f7c7669bde84abe80f21
|
914
|
+
*/
|
915
|
+
stackprof_use_postponed_job = RUBY_API_VERSION_MAJOR < 3;
|
916
|
+
|
917
|
+
ruby_vm_running = 1;
|
918
|
+
ruby_vm_at_exit(stackprof_at_exit);
|
919
|
+
|
625
920
|
#define S(name) sym_##name = ID2SYM(rb_intern(#name));
|
626
921
|
S(object);
|
627
922
|
S(custom);
|
@@ -640,30 +935,42 @@ Init_stackprof(void)
|
|
640
935
|
S(mode);
|
641
936
|
S(interval);
|
642
937
|
S(raw);
|
938
|
+
S(raw_lines);
|
939
|
+
S(raw_sample_timestamps);
|
643
940
|
S(raw_timestamp_deltas);
|
644
941
|
S(out);
|
942
|
+
S(metadata);
|
943
|
+
S(ignore_gc);
|
645
944
|
S(frames);
|
646
945
|
S(aggregate);
|
946
|
+
S(state);
|
947
|
+
S(marking);
|
948
|
+
S(sweeping);
|
647
949
|
#undef S
|
648
950
|
|
649
|
-
|
951
|
+
/* Need to run this to warm the symbol table before we call this during GC */
|
952
|
+
rb_gc_latest_gc_info(sym_state);
|
953
|
+
|
650
954
|
rb_global_variable(&gc_hook);
|
955
|
+
gc_hook = TypedData_Wrap_Struct(rb_cObject, &stackprof_type, &_stackprof);
|
651
956
|
|
652
957
|
_stackprof.raw_samples = NULL;
|
653
958
|
_stackprof.raw_samples_len = 0;
|
654
959
|
_stackprof.raw_samples_capa = 0;
|
655
960
|
_stackprof.raw_sample_index = 0;
|
656
961
|
|
657
|
-
_stackprof.
|
658
|
-
_stackprof.
|
659
|
-
_stackprof.
|
962
|
+
_stackprof.raw_sample_times = NULL;
|
963
|
+
_stackprof.raw_sample_times_len = 0;
|
964
|
+
_stackprof.raw_sample_times_capa = 0;
|
660
965
|
|
661
|
-
_stackprof.fake_gc_frame = INT2FIX(0x9C);
|
662
966
|
_stackprof.empty_string = rb_str_new_cstr("");
|
663
|
-
_stackprof.fake_gc_frame_name = rb_str_new_cstr("(garbage collection)");
|
664
|
-
rb_global_variable(&_stackprof.fake_gc_frame_name);
|
665
967
|
rb_global_variable(&_stackprof.empty_string);
|
666
968
|
|
969
|
+
for (i = 0; i < TOTAL_FAKE_FRAMES; i++) {
|
970
|
+
_stackprof.fake_frame_names[i] = rb_str_new_cstr(fake_frame_cstrs[i]);
|
971
|
+
rb_global_variable(&_stackprof.fake_frame_names[i]);
|
972
|
+
}
|
973
|
+
|
667
974
|
rb_mStackProf = rb_define_module("StackProf");
|
668
975
|
rb_define_singleton_method(rb_mStackProf, "running?", stackprof_running_p, 0);
|
669
976
|
rb_define_singleton_method(rb_mStackProf, "run", stackprof_run, -1);
|
@@ -671,6 +978,7 @@ Init_stackprof(void)
|
|
671
978
|
rb_define_singleton_method(rb_mStackProf, "stop", stackprof_stop, 0);
|
672
979
|
rb_define_singleton_method(rb_mStackProf, "results", stackprof_results, -1);
|
673
980
|
rb_define_singleton_method(rb_mStackProf, "sample", stackprof_sample, 0);
|
981
|
+
rb_define_singleton_method(rb_mStackProf, "use_postponed_job!", stackprof_use_postponed_job_l, 0);
|
674
982
|
|
675
983
|
pthread_atfork(stackprof_atfork_prepare, stackprof_atfork_parent, stackprof_atfork_child);
|
676
984
|
}
|