stackprof 0.2.10 → 0.2.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/workflows/ci.yml +43 -0
- data/.gitignore +2 -0
- data/CHANGELOG.md +18 -0
- data/README.md +87 -67
- data/Rakefile +21 -25
- data/bin/stackprof +115 -70
- data/ext/stackprof/extconf.rb +6 -0
- data/ext/stackprof/stackprof.c +434 -37
- data/lib/stackprof/autorun.rb +19 -0
- data/lib/stackprof/flamegraph/flamegraph.js +926 -300
- data/lib/stackprof/flamegraph/viewer.html +29 -23
- data/lib/stackprof/middleware.rb +23 -7
- data/lib/stackprof/report.rb +323 -18
- data/lib/stackprof/truffleruby.rb +37 -0
- data/lib/stackprof.rb +18 -1
- data/sample.rb +3 -3
- data/stackprof.gemspec +11 -2
- data/test/fixtures/profile.dump +1 -0
- data/test/fixtures/profile.json +1 -0
- data/test/test_middleware.rb +13 -7
- data/test/test_report.rb +24 -0
- data/test/test_stackprof.rb +177 -25
- data/test/test_truffleruby.rb +18 -0
- data/vendor/FlameGraph/flamegraph.pl +751 -85
- metadata +17 -10
- data/.travis.yml +0 -8
- data/Gemfile.lock +0 -24
data/ext/stackprof/stackprof.c
CHANGED
@@ -7,23 +7,90 @@
|
|
7
7
|
**********************************************************************/
|
8
8
|
|
9
9
|
#include <ruby/ruby.h>
|
10
|
+
#include <ruby/version.h>
|
10
11
|
#include <ruby/debug.h>
|
11
12
|
#include <ruby/st.h>
|
12
13
|
#include <ruby/io.h>
|
13
14
|
#include <ruby/intern.h>
|
15
|
+
#include <ruby/vm.h>
|
14
16
|
#include <signal.h>
|
15
17
|
#include <sys/time.h>
|
18
|
+
#include <time.h>
|
16
19
|
#include <pthread.h>
|
17
20
|
|
18
21
|
#define BUF_SIZE 2048
|
22
|
+
#define MICROSECONDS_IN_SECOND 1000000
|
23
|
+
#define NANOSECONDS_IN_SECOND 1000000000
|
24
|
+
|
25
|
+
#define FAKE_FRAME_GC INT2FIX(0)
|
26
|
+
#define FAKE_FRAME_MARK INT2FIX(1)
|
27
|
+
#define FAKE_FRAME_SWEEP INT2FIX(2)
|
28
|
+
|
29
|
+
static const char *fake_frame_cstrs[] = {
|
30
|
+
"(garbage collection)",
|
31
|
+
"(marking)",
|
32
|
+
"(sweeping)",
|
33
|
+
};
|
34
|
+
|
35
|
+
static int stackprof_use_postponed_job = 1;
|
36
|
+
static int ruby_vm_running = 0;
|
37
|
+
|
38
|
+
#define TOTAL_FAKE_FRAMES (sizeof(fake_frame_cstrs) / sizeof(char *))
|
39
|
+
|
40
|
+
#ifdef _POSIX_MONOTONIC_CLOCK
|
41
|
+
#define timestamp_t timespec
|
42
|
+
typedef struct timestamp_t timestamp_t;
|
43
|
+
|
44
|
+
static void capture_timestamp(timestamp_t *ts) {
|
45
|
+
clock_gettime(CLOCK_MONOTONIC, ts);
|
46
|
+
}
|
47
|
+
|
48
|
+
static int64_t delta_usec(timestamp_t *start, timestamp_t *end) {
|
49
|
+
int64_t result = MICROSECONDS_IN_SECOND * (end->tv_sec - start->tv_sec);
|
50
|
+
if (end->tv_nsec < start->tv_nsec) {
|
51
|
+
result -= MICROSECONDS_IN_SECOND;
|
52
|
+
result += (NANOSECONDS_IN_SECOND + end->tv_nsec - start->tv_nsec) / 1000;
|
53
|
+
} else {
|
54
|
+
result += (end->tv_nsec - start->tv_nsec) / 1000;
|
55
|
+
}
|
56
|
+
return result;
|
57
|
+
}
|
58
|
+
|
59
|
+
static uint64_t timestamp_usec(timestamp_t *ts) {
|
60
|
+
return (MICROSECONDS_IN_SECOND * ts->tv_sec) + (ts->tv_nsec / 1000);
|
61
|
+
}
|
62
|
+
#else
|
63
|
+
#define timestamp_t timeval
|
64
|
+
typedef struct timestamp_t timestamp_t;
|
65
|
+
|
66
|
+
static void capture_timestamp(timestamp_t *ts) {
|
67
|
+
gettimeofday(ts, NULL);
|
68
|
+
}
|
69
|
+
|
70
|
+
static int64_t delta_usec(timestamp_t *start, timestamp_t *end) {
|
71
|
+
struct timeval diff;
|
72
|
+
timersub(end, start, &diff);
|
73
|
+
return (MICROSECONDS_IN_SECOND * diff.tv_sec) + diff.tv_usec;
|
74
|
+
}
|
75
|
+
|
76
|
+
static uint64_t timestamp_usec(timestamp_t *ts) {
|
77
|
+
return (MICROSECONDS_IN_SECOND * ts.tv_sec) + diff.tv_usec
|
78
|
+
}
|
79
|
+
#endif
|
19
80
|
|
20
81
|
typedef struct {
|
21
82
|
size_t total_samples;
|
22
83
|
size_t caller_samples;
|
84
|
+
size_t seen_at_sample_number;
|
23
85
|
st_table *edges;
|
24
86
|
st_table *lines;
|
25
87
|
} frame_data_t;
|
26
88
|
|
89
|
+
typedef struct {
|
90
|
+
uint64_t timestamp_usec;
|
91
|
+
int64_t delta_usec;
|
92
|
+
} sample_time_t;
|
93
|
+
|
27
94
|
static struct {
|
28
95
|
int running;
|
29
96
|
int raw;
|
@@ -32,24 +99,42 @@ static struct {
|
|
32
99
|
VALUE mode;
|
33
100
|
VALUE interval;
|
34
101
|
VALUE out;
|
102
|
+
VALUE metadata;
|
103
|
+
int ignore_gc;
|
35
104
|
|
36
105
|
VALUE *raw_samples;
|
37
106
|
size_t raw_samples_len;
|
38
107
|
size_t raw_samples_capa;
|
39
108
|
size_t raw_sample_index;
|
40
109
|
|
110
|
+
struct timestamp_t last_sample_at;
|
111
|
+
sample_time_t *raw_sample_times;
|
112
|
+
size_t raw_sample_times_len;
|
113
|
+
size_t raw_sample_times_capa;
|
114
|
+
|
41
115
|
size_t overall_signals;
|
42
116
|
size_t overall_samples;
|
43
117
|
size_t during_gc;
|
118
|
+
size_t unrecorded_gc_samples;
|
119
|
+
size_t unrecorded_gc_marking_samples;
|
120
|
+
size_t unrecorded_gc_sweeping_samples;
|
44
121
|
st_table *frames;
|
45
122
|
|
123
|
+
VALUE fake_frame_names[TOTAL_FAKE_FRAMES];
|
124
|
+
VALUE empty_string;
|
125
|
+
|
126
|
+
int buffer_count;
|
127
|
+
sample_time_t buffer_time;
|
46
128
|
VALUE frames_buffer[BUF_SIZE];
|
47
129
|
int lines_buffer[BUF_SIZE];
|
130
|
+
|
131
|
+
pthread_t target_thread;
|
48
132
|
} _stackprof;
|
49
133
|
|
50
134
|
static VALUE sym_object, sym_wall, sym_cpu, sym_custom, sym_name, sym_file, sym_line;
|
51
135
|
static VALUE sym_samples, sym_total_samples, sym_missed_samples, sym_edges, sym_lines;
|
52
|
-
static VALUE sym_version, sym_mode, sym_interval, sym_raw, sym_frames,
|
136
|
+
static VALUE sym_version, sym_mode, sym_interval, sym_raw, sym_metadata, sym_frames, sym_ignore_gc, sym_out;
|
137
|
+
static VALUE sym_aggregate, sym_raw_sample_timestamps, sym_raw_timestamp_deltas, sym_state, sym_marking, sym_sweeping;
|
53
138
|
static VALUE sym_gc_samples, objtracer;
|
54
139
|
static VALUE gc_hook;
|
55
140
|
static VALUE rb_mStackProf;
|
@@ -62,8 +147,10 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
62
147
|
{
|
63
148
|
struct sigaction sa;
|
64
149
|
struct itimerval timer;
|
65
|
-
VALUE opts = Qnil, mode = Qnil, interval = Qnil, out = Qfalse;
|
150
|
+
VALUE opts = Qnil, mode = Qnil, interval = Qnil, metadata = rb_hash_new(), out = Qfalse;
|
151
|
+
int ignore_gc = 0;
|
66
152
|
int raw = 0, aggregate = 1;
|
153
|
+
VALUE metadata_val;
|
67
154
|
|
68
155
|
if (_stackprof.running)
|
69
156
|
return Qfalse;
|
@@ -74,6 +161,17 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
74
161
|
mode = rb_hash_aref(opts, sym_mode);
|
75
162
|
interval = rb_hash_aref(opts, sym_interval);
|
76
163
|
out = rb_hash_aref(opts, sym_out);
|
164
|
+
if (RTEST(rb_hash_aref(opts, sym_ignore_gc))) {
|
165
|
+
ignore_gc = 1;
|
166
|
+
}
|
167
|
+
|
168
|
+
metadata_val = rb_hash_aref(opts, sym_metadata);
|
169
|
+
if (RTEST(metadata_val)) {
|
170
|
+
if (!RB_TYPE_P(metadata_val, T_HASH))
|
171
|
+
rb_raise(rb_eArgError, "metadata should be a hash");
|
172
|
+
|
173
|
+
metadata = metadata_val;
|
174
|
+
}
|
77
175
|
|
78
176
|
if (RTEST(rb_hash_aref(opts, sym_raw)))
|
79
177
|
raw = 1;
|
@@ -82,6 +180,10 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
82
180
|
}
|
83
181
|
if (!RTEST(mode)) mode = sym_wall;
|
84
182
|
|
183
|
+
if (!NIL_P(interval) && (NUM2INT(interval) < 1 || NUM2INT(interval) >= MICROSECONDS_IN_SECOND)) {
|
184
|
+
rb_raise(rb_eArgError, "interval is a number of microseconds between 1 and 1 million");
|
185
|
+
}
|
186
|
+
|
85
187
|
if (!_stackprof.frames) {
|
86
188
|
_stackprof.frames = st_init_numtable();
|
87
189
|
_stackprof.overall_signals = 0;
|
@@ -118,7 +220,14 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
118
220
|
_stackprof.aggregate = aggregate;
|
119
221
|
_stackprof.mode = mode;
|
120
222
|
_stackprof.interval = interval;
|
223
|
+
_stackprof.ignore_gc = ignore_gc;
|
224
|
+
_stackprof.metadata = metadata;
|
121
225
|
_stackprof.out = out;
|
226
|
+
_stackprof.target_thread = pthread_self();
|
227
|
+
|
228
|
+
if (raw) {
|
229
|
+
capture_timestamp(&_stackprof.last_sample_at);
|
230
|
+
}
|
122
231
|
|
123
232
|
return Qtrue;
|
124
233
|
}
|
@@ -152,13 +261,19 @@ stackprof_stop(VALUE self)
|
|
152
261
|
return Qtrue;
|
153
262
|
}
|
154
263
|
|
264
|
+
#if SIZEOF_VOIDP == SIZEOF_LONG
|
265
|
+
# define PTR2NUM(x) (LONG2NUM((long)(x)))
|
266
|
+
#else
|
267
|
+
# define PTR2NUM(x) (LL2NUM((LONG_LONG)(x)))
|
268
|
+
#endif
|
269
|
+
|
155
270
|
static int
|
156
271
|
frame_edges_i(st_data_t key, st_data_t val, st_data_t arg)
|
157
272
|
{
|
158
273
|
VALUE edges = (VALUE)arg;
|
159
274
|
|
160
275
|
intptr_t weight = (intptr_t)val;
|
161
|
-
rb_hash_aset(edges,
|
276
|
+
rb_hash_aset(edges, PTR2NUM(key), INT2FIX(weight));
|
162
277
|
return ST_CONTINUE;
|
163
278
|
}
|
164
279
|
|
@@ -185,18 +300,26 @@ frame_i(st_data_t key, st_data_t val, st_data_t arg)
|
|
185
300
|
VALUE name, file, edges, lines;
|
186
301
|
VALUE line;
|
187
302
|
|
188
|
-
rb_hash_aset(results,
|
303
|
+
rb_hash_aset(results, PTR2NUM(frame), details);
|
189
304
|
|
190
|
-
|
191
|
-
|
305
|
+
if (FIXNUM_P(frame)) {
|
306
|
+
name = _stackprof.fake_frame_names[FIX2INT(frame)];
|
307
|
+
file = _stackprof.empty_string;
|
308
|
+
line = INT2FIX(0);
|
309
|
+
} else {
|
310
|
+
name = rb_profile_frame_full_label(frame);
|
192
311
|
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
312
|
+
file = rb_profile_frame_absolute_path(frame);
|
313
|
+
if (NIL_P(file))
|
314
|
+
file = rb_profile_frame_path(frame);
|
315
|
+
line = rb_profile_frame_first_lineno(frame);
|
316
|
+
}
|
197
317
|
|
198
|
-
|
318
|
+
rb_hash_aset(details, sym_name, name);
|
319
|
+
rb_hash_aset(details, sym_file, file);
|
320
|
+
if (line != INT2FIX(0)) {
|
199
321
|
rb_hash_aset(details, sym_line, line);
|
322
|
+
}
|
200
323
|
|
201
324
|
rb_hash_aset(details, sym_total_samples, SIZET2NUM(frame_data->total_samples));
|
202
325
|
rb_hash_aset(details, sym_samples, SIZET2NUM(frame_data->caller_samples));
|
@@ -230,12 +353,15 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
230
353
|
return Qnil;
|
231
354
|
|
232
355
|
results = rb_hash_new();
|
233
|
-
rb_hash_aset(results, sym_version, DBL2NUM(1.
|
356
|
+
rb_hash_aset(results, sym_version, DBL2NUM(1.2));
|
234
357
|
rb_hash_aset(results, sym_mode, _stackprof.mode);
|
235
358
|
rb_hash_aset(results, sym_interval, _stackprof.interval);
|
236
359
|
rb_hash_aset(results, sym_samples, SIZET2NUM(_stackprof.overall_samples));
|
237
360
|
rb_hash_aset(results, sym_gc_samples, SIZET2NUM(_stackprof.during_gc));
|
238
361
|
rb_hash_aset(results, sym_missed_samples, SIZET2NUM(_stackprof.overall_signals - _stackprof.overall_samples));
|
362
|
+
rb_hash_aset(results, sym_metadata, _stackprof.metadata);
|
363
|
+
|
364
|
+
_stackprof.metadata = Qnil;
|
239
365
|
|
240
366
|
frames = rb_hash_new();
|
241
367
|
rb_hash_aset(results, sym_frames, frames);
|
@@ -246,6 +372,7 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
246
372
|
|
247
373
|
if (_stackprof.raw && _stackprof.raw_samples_len) {
|
248
374
|
size_t len, n, o;
|
375
|
+
VALUE raw_sample_timestamps, raw_timestamp_deltas;
|
249
376
|
VALUE raw_samples = rb_ary_new_capa(_stackprof.raw_samples_len);
|
250
377
|
|
251
378
|
for (n = 0; n < _stackprof.raw_samples_len; n++) {
|
@@ -253,7 +380,7 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
253
380
|
rb_ary_push(raw_samples, SIZET2NUM(len));
|
254
381
|
|
255
382
|
for (o = 0, n++; o < len; n++, o++)
|
256
|
-
rb_ary_push(raw_samples,
|
383
|
+
rb_ary_push(raw_samples, PTR2NUM(_stackprof.raw_samples[n]));
|
257
384
|
rb_ary_push(raw_samples, SIZET2NUM((size_t)_stackprof.raw_samples[n]));
|
258
385
|
}
|
259
386
|
|
@@ -262,9 +389,26 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
262
389
|
_stackprof.raw_samples_len = 0;
|
263
390
|
_stackprof.raw_samples_capa = 0;
|
264
391
|
_stackprof.raw_sample_index = 0;
|
265
|
-
_stackprof.raw = 0;
|
266
392
|
|
267
393
|
rb_hash_aset(results, sym_raw, raw_samples);
|
394
|
+
|
395
|
+
raw_sample_timestamps = rb_ary_new_capa(_stackprof.raw_sample_times_len);
|
396
|
+
raw_timestamp_deltas = rb_ary_new_capa(_stackprof.raw_sample_times_len);
|
397
|
+
|
398
|
+
for (n = 0; n < _stackprof.raw_sample_times_len; n++) {
|
399
|
+
rb_ary_push(raw_sample_timestamps, ULL2NUM(_stackprof.raw_sample_times[n].timestamp_usec));
|
400
|
+
rb_ary_push(raw_timestamp_deltas, LL2NUM(_stackprof.raw_sample_times[n].delta_usec));
|
401
|
+
}
|
402
|
+
|
403
|
+
free(_stackprof.raw_sample_times);
|
404
|
+
_stackprof.raw_sample_times = NULL;
|
405
|
+
_stackprof.raw_sample_times_len = 0;
|
406
|
+
_stackprof.raw_sample_times_capa = 0;
|
407
|
+
|
408
|
+
rb_hash_aset(results, sym_raw_sample_timestamps, raw_sample_timestamps);
|
409
|
+
rb_hash_aset(results, sym_raw_timestamp_deltas, raw_timestamp_deltas);
|
410
|
+
|
411
|
+
_stackprof.raw = 0;
|
268
412
|
}
|
269
413
|
|
270
414
|
if (argc == 1)
|
@@ -272,11 +416,12 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
272
416
|
|
273
417
|
if (RTEST(_stackprof.out)) {
|
274
418
|
VALUE file;
|
275
|
-
if (
|
276
|
-
file = rb_file_open_str(_stackprof.out, "w");
|
277
|
-
} else {
|
419
|
+
if (rb_respond_to(_stackprof.out, rb_intern("to_io"))) {
|
278
420
|
file = rb_io_check_io(_stackprof.out);
|
421
|
+
} else {
|
422
|
+
file = rb_file_open_str(_stackprof.out, "w");
|
279
423
|
}
|
424
|
+
|
280
425
|
rb_marshal_dump(results, file);
|
281
426
|
rb_io_flush(file);
|
282
427
|
_stackprof.out = Qnil;
|
@@ -340,28 +485,39 @@ st_numtable_increment(st_table *table, st_data_t key, size_t increment)
|
|
340
485
|
}
|
341
486
|
|
342
487
|
void
|
343
|
-
|
488
|
+
stackprof_record_sample_for_stack(int num, uint64_t sample_timestamp, int64_t timestamp_delta)
|
344
489
|
{
|
345
|
-
int
|
490
|
+
int i, n;
|
346
491
|
VALUE prev_frame = Qnil;
|
347
492
|
|
348
493
|
_stackprof.overall_samples++;
|
349
|
-
num = rb_profile_frames(0, sizeof(_stackprof.frames_buffer) / sizeof(VALUE), _stackprof.frames_buffer, _stackprof.lines_buffer);
|
350
494
|
|
351
|
-
if (_stackprof.raw) {
|
495
|
+
if (_stackprof.raw && num > 0) {
|
352
496
|
int found = 0;
|
353
497
|
|
498
|
+
/* If there's no sample buffer allocated, then allocate one. The buffer
|
499
|
+
* format is the number of frames (num), then the list of frames (from
|
500
|
+
* `_stackprof.raw_samples`), followed by the number of times this
|
501
|
+
* particular stack has been seen in a row. Each "new" stack is added
|
502
|
+
* to the end of the buffer, but if the previous stack is the same as
|
503
|
+
* the current stack, the counter will be incremented. */
|
354
504
|
if (!_stackprof.raw_samples) {
|
355
505
|
_stackprof.raw_samples_capa = num * 100;
|
356
506
|
_stackprof.raw_samples = malloc(sizeof(VALUE) * _stackprof.raw_samples_capa);
|
357
507
|
}
|
358
508
|
|
359
|
-
|
509
|
+
/* If we can't fit all the samples in the buffer, double the buffer size. */
|
510
|
+
while (_stackprof.raw_samples_capa <= _stackprof.raw_samples_len + (num + 2)) {
|
360
511
|
_stackprof.raw_samples_capa *= 2;
|
361
512
|
_stackprof.raw_samples = realloc(_stackprof.raw_samples, sizeof(VALUE) * _stackprof.raw_samples_capa);
|
362
513
|
}
|
363
514
|
|
515
|
+
/* If we've seen this stack before in the last sample, then increment the "seen" count. */
|
364
516
|
if (_stackprof.raw_samples_len > 0 && _stackprof.raw_samples[_stackprof.raw_sample_index] == (VALUE)num) {
|
517
|
+
/* The number of samples could have been the same, but the stack
|
518
|
+
* might be different, so we need to check the stack here. Stacks
|
519
|
+
* in the raw buffer are stored in the opposite direction of stacks
|
520
|
+
* in the frames buffer that came from Ruby. */
|
365
521
|
for (i = num-1, n = 0; i >= 0; i--, n++) {
|
366
522
|
VALUE frame = _stackprof.frames_buffer[i];
|
367
523
|
if (_stackprof.raw_samples[_stackprof.raw_sample_index + 1 + n] != frame)
|
@@ -373,7 +529,11 @@ stackprof_record_sample()
|
|
373
529
|
}
|
374
530
|
}
|
375
531
|
|
532
|
+
/* If we haven't seen the stack, then add it to the buffer along with
|
533
|
+
* the length of the stack and a 1 for the "seen" count */
|
376
534
|
if (!found) {
|
535
|
+
/* Bump the `raw_sample_index` up so that the next iteration can
|
536
|
+
* find the previously recorded stack size. */
|
377
537
|
_stackprof.raw_sample_index = _stackprof.raw_samples_len;
|
378
538
|
_stackprof.raw_samples[_stackprof.raw_samples_len++] = (VALUE)num;
|
379
539
|
for (i = num-1; i >= 0; i--) {
|
@@ -382,6 +542,25 @@ stackprof_record_sample()
|
|
382
542
|
}
|
383
543
|
_stackprof.raw_samples[_stackprof.raw_samples_len++] = (VALUE)1;
|
384
544
|
}
|
545
|
+
|
546
|
+
/* If there's no timestamp delta buffer, allocate one */
|
547
|
+
if (!_stackprof.raw_sample_times) {
|
548
|
+
_stackprof.raw_sample_times_capa = 100;
|
549
|
+
_stackprof.raw_sample_times = malloc(sizeof(sample_time_t) * _stackprof.raw_sample_times_capa);
|
550
|
+
_stackprof.raw_sample_times_len = 0;
|
551
|
+
}
|
552
|
+
|
553
|
+
/* Double the buffer size if it's too small */
|
554
|
+
while (_stackprof.raw_sample_times_capa <= _stackprof.raw_sample_times_len + 1) {
|
555
|
+
_stackprof.raw_sample_times_capa *= 2;
|
556
|
+
_stackprof.raw_sample_times = realloc(_stackprof.raw_sample_times, sizeof(sample_time_t) * _stackprof.raw_sample_times_capa);
|
557
|
+
}
|
558
|
+
|
559
|
+
/* Store the time delta (which is the amount of microseconds between samples). */
|
560
|
+
_stackprof.raw_sample_times[_stackprof.raw_sample_times_len++] = (sample_time_t) {
|
561
|
+
.timestamp_usec = sample_timestamp,
|
562
|
+
.delta_usec = timestamp_delta,
|
563
|
+
};
|
385
564
|
}
|
386
565
|
|
387
566
|
for (i = 0; i < num; i++) {
|
@@ -389,7 +568,10 @@ stackprof_record_sample()
|
|
389
568
|
VALUE frame = _stackprof.frames_buffer[i];
|
390
569
|
frame_data_t *frame_data = sample_for(frame);
|
391
570
|
|
392
|
-
frame_data->
|
571
|
+
if (frame_data->seen_at_sample_number != _stackprof.overall_samples) {
|
572
|
+
frame_data->total_samples++;
|
573
|
+
}
|
574
|
+
frame_data->seen_at_sample_number = _stackprof.overall_samples;
|
393
575
|
|
394
576
|
if (i == 0) {
|
395
577
|
frame_data->caller_samples++;
|
@@ -400,37 +582,194 @@ stackprof_record_sample()
|
|
400
582
|
}
|
401
583
|
|
402
584
|
if (_stackprof.aggregate && line > 0) {
|
403
|
-
if (!frame_data->lines)
|
404
|
-
frame_data->lines = st_init_numtable();
|
405
585
|
size_t half = (size_t)1<<(8*SIZEOF_SIZE_T/2);
|
406
586
|
size_t increment = i == 0 ? half + 1 : half;
|
587
|
+
if (!frame_data->lines)
|
588
|
+
frame_data->lines = st_init_numtable();
|
407
589
|
st_numtable_increment(frame_data->lines, (st_data_t)line, increment);
|
408
590
|
}
|
409
591
|
|
410
592
|
prev_frame = frame;
|
411
593
|
}
|
594
|
+
|
595
|
+
if (_stackprof.raw) {
|
596
|
+
capture_timestamp(&_stackprof.last_sample_at);
|
597
|
+
}
|
598
|
+
}
|
599
|
+
|
600
|
+
// buffer the current profile frames
|
601
|
+
// This must be async-signal-safe
|
602
|
+
// Returns immediately if another set of frames are already in the buffer
|
603
|
+
void
|
604
|
+
stackprof_buffer_sample(void)
|
605
|
+
{
|
606
|
+
uint64_t start_timestamp = 0;
|
607
|
+
int64_t timestamp_delta = 0;
|
608
|
+
int num;
|
609
|
+
|
610
|
+
if (_stackprof.buffer_count > 0) {
|
611
|
+
// Another sample is already pending
|
612
|
+
return;
|
613
|
+
}
|
614
|
+
|
615
|
+
if (_stackprof.raw) {
|
616
|
+
struct timestamp_t t;
|
617
|
+
capture_timestamp(&t);
|
618
|
+
start_timestamp = timestamp_usec(&t);
|
619
|
+
timestamp_delta = delta_usec(&_stackprof.last_sample_at, &t);
|
620
|
+
}
|
621
|
+
|
622
|
+
num = rb_profile_frames(0, sizeof(_stackprof.frames_buffer) / sizeof(VALUE), _stackprof.frames_buffer, _stackprof.lines_buffer);
|
623
|
+
|
624
|
+
_stackprof.buffer_count = num;
|
625
|
+
_stackprof.buffer_time.timestamp_usec = start_timestamp;
|
626
|
+
_stackprof.buffer_time.delta_usec = timestamp_delta;
|
627
|
+
}
|
628
|
+
|
629
|
+
void
|
630
|
+
stackprof_record_gc_samples(void)
|
631
|
+
{
|
632
|
+
int64_t delta_to_first_unrecorded_gc_sample = 0;
|
633
|
+
uint64_t start_timestamp = 0;
|
634
|
+
size_t i;
|
635
|
+
if (_stackprof.raw) {
|
636
|
+
struct timestamp_t t;
|
637
|
+
capture_timestamp(&t);
|
638
|
+
start_timestamp = timestamp_usec(&t);
|
639
|
+
|
640
|
+
// We don't know when the GC samples were actually marked, so let's
|
641
|
+
// assume that they were marked at a perfectly regular interval.
|
642
|
+
delta_to_first_unrecorded_gc_sample = delta_usec(&_stackprof.last_sample_at, &t) - (_stackprof.unrecorded_gc_samples - 1) * NUM2LONG(_stackprof.interval);
|
643
|
+
if (delta_to_first_unrecorded_gc_sample < 0) {
|
644
|
+
delta_to_first_unrecorded_gc_sample = 0;
|
645
|
+
}
|
646
|
+
}
|
647
|
+
|
648
|
+
for (i = 0; i < _stackprof.unrecorded_gc_samples; i++) {
|
649
|
+
int64_t timestamp_delta = i == 0 ? delta_to_first_unrecorded_gc_sample : NUM2LONG(_stackprof.interval);
|
650
|
+
|
651
|
+
if (_stackprof.unrecorded_gc_marking_samples) {
|
652
|
+
_stackprof.frames_buffer[0] = FAKE_FRAME_MARK;
|
653
|
+
_stackprof.lines_buffer[0] = 0;
|
654
|
+
_stackprof.frames_buffer[1] = FAKE_FRAME_GC;
|
655
|
+
_stackprof.lines_buffer[1] = 0;
|
656
|
+
_stackprof.unrecorded_gc_marking_samples--;
|
657
|
+
|
658
|
+
stackprof_record_sample_for_stack(2, start_timestamp, timestamp_delta);
|
659
|
+
} else if (_stackprof.unrecorded_gc_sweeping_samples) {
|
660
|
+
_stackprof.frames_buffer[0] = FAKE_FRAME_SWEEP;
|
661
|
+
_stackprof.lines_buffer[0] = 0;
|
662
|
+
_stackprof.frames_buffer[1] = FAKE_FRAME_GC;
|
663
|
+
_stackprof.lines_buffer[1] = 0;
|
664
|
+
|
665
|
+
_stackprof.unrecorded_gc_sweeping_samples--;
|
666
|
+
|
667
|
+
stackprof_record_sample_for_stack(2, start_timestamp, timestamp_delta);
|
668
|
+
} else {
|
669
|
+
_stackprof.frames_buffer[0] = FAKE_FRAME_GC;
|
670
|
+
_stackprof.lines_buffer[0] = 0;
|
671
|
+
stackprof_record_sample_for_stack(1, start_timestamp, timestamp_delta);
|
672
|
+
}
|
673
|
+
}
|
674
|
+
_stackprof.during_gc += _stackprof.unrecorded_gc_samples;
|
675
|
+
_stackprof.unrecorded_gc_samples = 0;
|
676
|
+
_stackprof.unrecorded_gc_marking_samples = 0;
|
677
|
+
_stackprof.unrecorded_gc_sweeping_samples = 0;
|
678
|
+
}
|
679
|
+
|
680
|
+
// record the sample previously buffered by stackprof_buffer_sample
|
681
|
+
static void
|
682
|
+
stackprof_record_buffer(void)
|
683
|
+
{
|
684
|
+
stackprof_record_sample_for_stack(_stackprof.buffer_count, _stackprof.buffer_time.timestamp_usec, _stackprof.buffer_time.delta_usec);
|
685
|
+
|
686
|
+
// reset the buffer
|
687
|
+
_stackprof.buffer_count = 0;
|
688
|
+
}
|
689
|
+
|
690
|
+
static void
|
691
|
+
stackprof_sample_and_record(void)
|
692
|
+
{
|
693
|
+
stackprof_buffer_sample();
|
694
|
+
stackprof_record_buffer();
|
695
|
+
}
|
696
|
+
|
697
|
+
static void
|
698
|
+
stackprof_job_record_gc(void *data)
|
699
|
+
{
|
700
|
+
if (!_stackprof.running) return;
|
701
|
+
|
702
|
+
stackprof_record_gc_samples();
|
703
|
+
}
|
704
|
+
|
705
|
+
static void
|
706
|
+
stackprof_job_sample_and_record(void *data)
|
707
|
+
{
|
708
|
+
if (!_stackprof.running) return;
|
709
|
+
|
710
|
+
stackprof_sample_and_record();
|
412
711
|
}
|
413
712
|
|
414
713
|
static void
|
415
|
-
|
714
|
+
stackprof_job_record_buffer(void *data)
|
416
715
|
{
|
417
|
-
static int in_signal_handler = 0;
|
418
|
-
if (in_signal_handler) return;
|
419
716
|
if (!_stackprof.running) return;
|
420
717
|
|
421
|
-
|
422
|
-
stackprof_record_sample();
|
423
|
-
in_signal_handler--;
|
718
|
+
stackprof_record_buffer();
|
424
719
|
}
|
425
720
|
|
426
721
|
static void
|
427
722
|
stackprof_signal_handler(int sig, siginfo_t *sinfo, void *ucontext)
|
428
723
|
{
|
724
|
+
static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
|
725
|
+
|
429
726
|
_stackprof.overall_signals++;
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
727
|
+
|
728
|
+
if (!_stackprof.running) return;
|
729
|
+
|
730
|
+
// There's a possibility that the signal handler is invoked *after* the Ruby
|
731
|
+
// VM has been shut down (e.g. after ruby_cleanup(0)). In this case, things
|
732
|
+
// that rely on global VM state (e.g. rb_during_gc) will segfault.
|
733
|
+
if (!ruby_vm_running) return;
|
734
|
+
|
735
|
+
if (_stackprof.mode == sym_wall) {
|
736
|
+
// In "wall" mode, the SIGALRM signal will arrive at an arbitrary thread.
|
737
|
+
// In order to provide more useful results, especially under threaded web
|
738
|
+
// servers, we want to forward this signal to the original thread
|
739
|
+
// StackProf was started from.
|
740
|
+
// According to POSIX.1-2008 TC1 pthread_kill and pthread_self should be
|
741
|
+
// async-signal-safe.
|
742
|
+
if (pthread_self() != _stackprof.target_thread) {
|
743
|
+
pthread_kill(_stackprof.target_thread, sig);
|
744
|
+
return;
|
745
|
+
}
|
746
|
+
} else {
|
747
|
+
if (!ruby_native_thread_p()) return;
|
748
|
+
}
|
749
|
+
|
750
|
+
if (pthread_mutex_trylock(&lock)) return;
|
751
|
+
|
752
|
+
if (!_stackprof.ignore_gc && rb_during_gc()) {
|
753
|
+
VALUE mode = rb_gc_latest_gc_info(sym_state);
|
754
|
+
if (mode == sym_marking) {
|
755
|
+
_stackprof.unrecorded_gc_marking_samples++;
|
756
|
+
} else if (mode == sym_sweeping) {
|
757
|
+
_stackprof.unrecorded_gc_sweeping_samples++;
|
758
|
+
}
|
759
|
+
_stackprof.unrecorded_gc_samples++;
|
760
|
+
rb_postponed_job_register_one(0, stackprof_job_record_gc, (void*)0);
|
761
|
+
} else {
|
762
|
+
if (stackprof_use_postponed_job) {
|
763
|
+
rb_postponed_job_register_one(0, stackprof_job_sample_and_record, (void*)0);
|
764
|
+
} else {
|
765
|
+
// Buffer a sample immediately, if an existing sample exists this will
|
766
|
+
// return immediately
|
767
|
+
stackprof_buffer_sample();
|
768
|
+
// Enqueue a job to record the sample
|
769
|
+
rb_postponed_job_register_one(0, stackprof_job_record_buffer, (void*)0);
|
770
|
+
}
|
771
|
+
}
|
772
|
+
pthread_mutex_unlock(&lock);
|
434
773
|
}
|
435
774
|
|
436
775
|
static void
|
@@ -439,7 +778,7 @@ stackprof_newobj_handler(VALUE tpval, void *data)
|
|
439
778
|
_stackprof.overall_signals++;
|
440
779
|
if (RTEST(_stackprof.interval) && _stackprof.overall_signals % NUM2LONG(_stackprof.interval))
|
441
780
|
return;
|
442
|
-
|
781
|
+
stackprof_sample_and_record();
|
443
782
|
}
|
444
783
|
|
445
784
|
static VALUE
|
@@ -449,7 +788,7 @@ stackprof_sample(VALUE self)
|
|
449
788
|
return Qfalse;
|
450
789
|
|
451
790
|
_stackprof.overall_signals++;
|
452
|
-
|
791
|
+
stackprof_sample_and_record();
|
453
792
|
return Qtrue;
|
454
793
|
}
|
455
794
|
|
@@ -464,11 +803,18 @@ frame_mark_i(st_data_t key, st_data_t val, st_data_t arg)
|
|
464
803
|
static void
|
465
804
|
stackprof_gc_mark(void *data)
|
466
805
|
{
|
806
|
+
if (RTEST(_stackprof.metadata))
|
807
|
+
rb_gc_mark(_stackprof.metadata);
|
808
|
+
|
467
809
|
if (RTEST(_stackprof.out))
|
468
810
|
rb_gc_mark(_stackprof.out);
|
469
811
|
|
470
812
|
if (_stackprof.frames)
|
471
813
|
st_foreach(_stackprof.frames, frame_mark_i, 0);
|
814
|
+
|
815
|
+
for (int i = 0; i < _stackprof.buffer_count; i++) {
|
816
|
+
rb_gc_mark(_stackprof.frames_buffer[i]);
|
817
|
+
}
|
472
818
|
}
|
473
819
|
|
474
820
|
static void
|
@@ -503,9 +849,32 @@ stackprof_atfork_child(void)
|
|
503
849
|
stackprof_stop(rb_mStackProf);
|
504
850
|
}
|
505
851
|
|
852
|
+
static VALUE
|
853
|
+
stackprof_use_postponed_job_l(VALUE self)
|
854
|
+
{
|
855
|
+
stackprof_use_postponed_job = 1;
|
856
|
+
return Qnil;
|
857
|
+
}
|
858
|
+
|
859
|
+
static void
|
860
|
+
stackprof_at_exit(ruby_vm_t* vm)
|
861
|
+
{
|
862
|
+
ruby_vm_running = 0;
|
863
|
+
}
|
864
|
+
|
506
865
|
void
|
507
866
|
Init_stackprof(void)
|
508
867
|
{
|
868
|
+
size_t i;
|
869
|
+
/*
|
870
|
+
* As of Ruby 3.0, it should be safe to read stack frames at any time, unless YJIT is enabled
|
871
|
+
* See https://github.com/ruby/ruby/commit/0e276dc458f94d9d79a0f7c7669bde84abe80f21
|
872
|
+
*/
|
873
|
+
stackprof_use_postponed_job = RUBY_API_VERSION_MAJOR < 3;
|
874
|
+
|
875
|
+
ruby_vm_running = 1;
|
876
|
+
ruby_vm_at_exit(stackprof_at_exit);
|
877
|
+
|
509
878
|
#define S(name) sym_##name = ID2SYM(rb_intern(#name));
|
510
879
|
S(object);
|
511
880
|
S(custom);
|
@@ -524,14 +893,41 @@ Init_stackprof(void)
|
|
524
893
|
S(mode);
|
525
894
|
S(interval);
|
526
895
|
S(raw);
|
896
|
+
S(raw_sample_timestamps);
|
897
|
+
S(raw_timestamp_deltas);
|
527
898
|
S(out);
|
899
|
+
S(metadata);
|
900
|
+
S(ignore_gc);
|
528
901
|
S(frames);
|
529
902
|
S(aggregate);
|
903
|
+
S(state);
|
904
|
+
S(marking);
|
905
|
+
S(sweeping);
|
530
906
|
#undef S
|
531
907
|
|
908
|
+
/* Need to run this to warm the symbol table before we call this during GC */
|
909
|
+
rb_gc_latest_gc_info(sym_state);
|
910
|
+
|
532
911
|
gc_hook = Data_Wrap_Struct(rb_cObject, stackprof_gc_mark, NULL, &_stackprof);
|
533
912
|
rb_global_variable(&gc_hook);
|
534
913
|
|
914
|
+
_stackprof.raw_samples = NULL;
|
915
|
+
_stackprof.raw_samples_len = 0;
|
916
|
+
_stackprof.raw_samples_capa = 0;
|
917
|
+
_stackprof.raw_sample_index = 0;
|
918
|
+
|
919
|
+
_stackprof.raw_sample_times = NULL;
|
920
|
+
_stackprof.raw_sample_times_len = 0;
|
921
|
+
_stackprof.raw_sample_times_capa = 0;
|
922
|
+
|
923
|
+
_stackprof.empty_string = rb_str_new_cstr("");
|
924
|
+
rb_global_variable(&_stackprof.empty_string);
|
925
|
+
|
926
|
+
for (i = 0; i < TOTAL_FAKE_FRAMES; i++) {
|
927
|
+
_stackprof.fake_frame_names[i] = rb_str_new_cstr(fake_frame_cstrs[i]);
|
928
|
+
rb_global_variable(&_stackprof.fake_frame_names[i]);
|
929
|
+
}
|
930
|
+
|
535
931
|
rb_mStackProf = rb_define_module("StackProf");
|
536
932
|
rb_define_singleton_method(rb_mStackProf, "running?", stackprof_running_p, 0);
|
537
933
|
rb_define_singleton_method(rb_mStackProf, "run", stackprof_run, -1);
|
@@ -539,6 +935,7 @@ Init_stackprof(void)
|
|
539
935
|
rb_define_singleton_method(rb_mStackProf, "stop", stackprof_stop, 0);
|
540
936
|
rb_define_singleton_method(rb_mStackProf, "results", stackprof_results, -1);
|
541
937
|
rb_define_singleton_method(rb_mStackProf, "sample", stackprof_sample, 0);
|
938
|
+
rb_define_singleton_method(rb_mStackProf, "use_postponed_job!", stackprof_use_postponed_job_l, 0);
|
542
939
|
|
543
940
|
pthread_atfork(stackprof_atfork_prepare, stackprof_atfork_parent, stackprof_atfork_child);
|
544
941
|
}
|