stackprof 0.2.15 → 0.2.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +43 -0
- data/.gitignore +1 -0
- data/CHANGELOG.md +16 -1
- data/README.md +57 -51
- data/Rakefile +21 -25
- data/bin/stackprof +115 -81
- data/ext/stackprof/extconf.rb +6 -0
- data/ext/stackprof/stackprof.c +335 -85
- data/lib/stackprof/autorun.rb +19 -0
- data/lib/stackprof/report.rb +65 -26
- data/lib/stackprof/truffleruby.rb +37 -0
- data/lib/stackprof.rb +19 -2
- data/stackprof.gemspec +8 -2
- data/test/fixtures/profile.dump +1 -0
- data/test/fixtures/profile.json +1 -0
- data/test/test_middleware.rb +30 -17
- data/test/test_report.rb +25 -1
- data/test/test_stackprof.rb +88 -15
- data/test/test_truffleruby.rb +18 -0
- data/vendor/FlameGraph/flamegraph.pl +751 -85
- metadata +14 -24
- data/.travis.yml +0 -21
- data/Dockerfile +0 -21
- data/Gemfile.lock +0 -27
data/ext/stackprof/stackprof.c
CHANGED
|
@@ -7,15 +7,20 @@
|
|
|
7
7
|
**********************************************************************/
|
|
8
8
|
|
|
9
9
|
#include <ruby/ruby.h>
|
|
10
|
+
#include <ruby/version.h>
|
|
10
11
|
#include <ruby/debug.h>
|
|
11
12
|
#include <ruby/st.h>
|
|
12
13
|
#include <ruby/io.h>
|
|
13
14
|
#include <ruby/intern.h>
|
|
15
|
+
#include <ruby/vm.h>
|
|
14
16
|
#include <signal.h>
|
|
15
17
|
#include <sys/time.h>
|
|
18
|
+
#include <time.h>
|
|
16
19
|
#include <pthread.h>
|
|
17
20
|
|
|
18
21
|
#define BUF_SIZE 2048
|
|
22
|
+
#define MICROSECONDS_IN_SECOND 1000000
|
|
23
|
+
#define NANOSECONDS_IN_SECOND 1000000000
|
|
19
24
|
|
|
20
25
|
#define FAKE_FRAME_GC INT2FIX(0)
|
|
21
26
|
#define FAKE_FRAME_MARK INT2FIX(1)
|
|
@@ -27,8 +32,52 @@ static const char *fake_frame_cstrs[] = {
|
|
|
27
32
|
"(sweeping)",
|
|
28
33
|
};
|
|
29
34
|
|
|
35
|
+
static int stackprof_use_postponed_job = 1;
|
|
36
|
+
static int ruby_vm_running = 0;
|
|
37
|
+
|
|
30
38
|
#define TOTAL_FAKE_FRAMES (sizeof(fake_frame_cstrs) / sizeof(char *))
|
|
31
39
|
|
|
40
|
+
#ifdef _POSIX_MONOTONIC_CLOCK
|
|
41
|
+
#define timestamp_t timespec
|
|
42
|
+
typedef struct timestamp_t timestamp_t;
|
|
43
|
+
|
|
44
|
+
static void capture_timestamp(timestamp_t *ts) {
|
|
45
|
+
clock_gettime(CLOCK_MONOTONIC, ts);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
static int64_t delta_usec(timestamp_t *start, timestamp_t *end) {
|
|
49
|
+
int64_t result = MICROSECONDS_IN_SECOND * (end->tv_sec - start->tv_sec);
|
|
50
|
+
if (end->tv_nsec < start->tv_nsec) {
|
|
51
|
+
result -= MICROSECONDS_IN_SECOND;
|
|
52
|
+
result += (NANOSECONDS_IN_SECOND + end->tv_nsec - start->tv_nsec) / 1000;
|
|
53
|
+
} else {
|
|
54
|
+
result += (end->tv_nsec - start->tv_nsec) / 1000;
|
|
55
|
+
}
|
|
56
|
+
return result;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
static uint64_t timestamp_usec(timestamp_t *ts) {
|
|
60
|
+
return (MICROSECONDS_IN_SECOND * ts->tv_sec) + (ts->tv_nsec / 1000);
|
|
61
|
+
}
|
|
62
|
+
#else
|
|
63
|
+
#define timestamp_t timeval
|
|
64
|
+
typedef struct timestamp_t timestamp_t;
|
|
65
|
+
|
|
66
|
+
static void capture_timestamp(timestamp_t *ts) {
|
|
67
|
+
gettimeofday(ts, NULL);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
static int64_t delta_usec(timestamp_t *start, timestamp_t *end) {
|
|
71
|
+
struct timeval diff;
|
|
72
|
+
timersub(end, start, &diff);
|
|
73
|
+
return (MICROSECONDS_IN_SECOND * diff.tv_sec) + diff.tv_usec;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
static uint64_t timestamp_usec(timestamp_t *ts) {
|
|
77
|
+
return (MICROSECONDS_IN_SECOND * ts.tv_sec) + diff.tv_usec
|
|
78
|
+
}
|
|
79
|
+
#endif
|
|
80
|
+
|
|
32
81
|
typedef struct {
|
|
33
82
|
size_t total_samples;
|
|
34
83
|
size_t caller_samples;
|
|
@@ -37,7 +86,24 @@ typedef struct {
|
|
|
37
86
|
st_table *lines;
|
|
38
87
|
} frame_data_t;
|
|
39
88
|
|
|
89
|
+
typedef struct {
|
|
90
|
+
uint64_t timestamp_usec;
|
|
91
|
+
int64_t delta_usec;
|
|
92
|
+
} sample_time_t;
|
|
93
|
+
|
|
94
|
+
/* We need to ensure that various memory operations are visible across
|
|
95
|
+
* threads. Ruby doesn't offer a portable way to do this sort of detection
|
|
96
|
+
* across all the Ruby versions we support, so we use something that casts a
|
|
97
|
+
* wide net (Clang, along with ICC, defines __GNUC__). */
|
|
98
|
+
#if defined(__GNUC__) && defined(__ATOMIC_SEQ_CST)
|
|
99
|
+
#define STACKPROF_HAVE_ATOMICS 1
|
|
100
|
+
#else
|
|
101
|
+
#define STACKPROF_HAVE_ATOMICS 0
|
|
102
|
+
#endif
|
|
103
|
+
|
|
40
104
|
static struct {
|
|
105
|
+
/* Access this field with the `STACKPROF_RUNNING` macro, below, since we
|
|
106
|
+
* can't properly express that this field has an atomic type. */
|
|
41
107
|
int running;
|
|
42
108
|
int raw;
|
|
43
109
|
int aggregate;
|
|
@@ -46,16 +112,17 @@ static struct {
|
|
|
46
112
|
VALUE interval;
|
|
47
113
|
VALUE out;
|
|
48
114
|
VALUE metadata;
|
|
115
|
+
int ignore_gc;
|
|
49
116
|
|
|
50
|
-
|
|
117
|
+
uint64_t *raw_samples;
|
|
51
118
|
size_t raw_samples_len;
|
|
52
119
|
size_t raw_samples_capa;
|
|
53
120
|
size_t raw_sample_index;
|
|
54
121
|
|
|
55
|
-
struct
|
|
56
|
-
|
|
57
|
-
size_t
|
|
58
|
-
size_t
|
|
122
|
+
struct timestamp_t last_sample_at;
|
|
123
|
+
sample_time_t *raw_sample_times;
|
|
124
|
+
size_t raw_sample_times_len;
|
|
125
|
+
size_t raw_sample_times_capa;
|
|
59
126
|
|
|
60
127
|
size_t overall_signals;
|
|
61
128
|
size_t overall_samples;
|
|
@@ -65,16 +132,29 @@ static struct {
|
|
|
65
132
|
size_t unrecorded_gc_sweeping_samples;
|
|
66
133
|
st_table *frames;
|
|
67
134
|
|
|
135
|
+
timestamp_t gc_start_timestamp;
|
|
136
|
+
|
|
68
137
|
VALUE fake_frame_names[TOTAL_FAKE_FRAMES];
|
|
69
138
|
VALUE empty_string;
|
|
139
|
+
|
|
140
|
+
int buffer_count;
|
|
141
|
+
sample_time_t buffer_time;
|
|
70
142
|
VALUE frames_buffer[BUF_SIZE];
|
|
71
143
|
int lines_buffer[BUF_SIZE];
|
|
144
|
+
|
|
145
|
+
pthread_t target_thread;
|
|
72
146
|
} _stackprof;
|
|
73
147
|
|
|
148
|
+
#if STACKPROF_HAVE_ATOMICS
|
|
149
|
+
#define STACKPROF_RUNNING() __atomic_load_n(&_stackprof.running, __ATOMIC_ACQUIRE)
|
|
150
|
+
#else
|
|
151
|
+
#define STACKPROF_RUNNING() _stackprof.running
|
|
152
|
+
#endif
|
|
153
|
+
|
|
74
154
|
static VALUE sym_object, sym_wall, sym_cpu, sym_custom, sym_name, sym_file, sym_line;
|
|
75
155
|
static VALUE sym_samples, sym_total_samples, sym_missed_samples, sym_edges, sym_lines;
|
|
76
|
-
static VALUE sym_version, sym_mode, sym_interval, sym_raw, sym_metadata, sym_frames,
|
|
77
|
-
static VALUE sym_state, sym_marking, sym_sweeping;
|
|
156
|
+
static VALUE sym_version, sym_mode, sym_interval, sym_raw, sym_raw_lines, sym_metadata, sym_frames, sym_ignore_gc, sym_out;
|
|
157
|
+
static VALUE sym_aggregate, sym_raw_sample_timestamps, sym_raw_timestamp_deltas, sym_state, sym_marking, sym_sweeping;
|
|
78
158
|
static VALUE sym_gc_samples, objtracer;
|
|
79
159
|
static VALUE gc_hook;
|
|
80
160
|
static VALUE rb_mStackProf;
|
|
@@ -88,9 +168,11 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
|
88
168
|
struct sigaction sa;
|
|
89
169
|
struct itimerval timer;
|
|
90
170
|
VALUE opts = Qnil, mode = Qnil, interval = Qnil, metadata = rb_hash_new(), out = Qfalse;
|
|
171
|
+
int ignore_gc = 0;
|
|
91
172
|
int raw = 0, aggregate = 1;
|
|
173
|
+
VALUE metadata_val;
|
|
92
174
|
|
|
93
|
-
if (
|
|
175
|
+
if (STACKPROF_RUNNING())
|
|
94
176
|
return Qfalse;
|
|
95
177
|
|
|
96
178
|
rb_scan_args(argc, argv, "0:", &opts);
|
|
@@ -99,8 +181,11 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
|
99
181
|
mode = rb_hash_aref(opts, sym_mode);
|
|
100
182
|
interval = rb_hash_aref(opts, sym_interval);
|
|
101
183
|
out = rb_hash_aref(opts, sym_out);
|
|
184
|
+
if (RTEST(rb_hash_aref(opts, sym_ignore_gc))) {
|
|
185
|
+
ignore_gc = 1;
|
|
186
|
+
}
|
|
102
187
|
|
|
103
|
-
|
|
188
|
+
metadata_val = rb_hash_aref(opts, sym_metadata);
|
|
104
189
|
if (RTEST(metadata_val)) {
|
|
105
190
|
if (!RB_TYPE_P(metadata_val, T_HASH))
|
|
106
191
|
rb_raise(rb_eArgError, "metadata should be a hash");
|
|
@@ -115,6 +200,10 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
|
115
200
|
}
|
|
116
201
|
if (!RTEST(mode)) mode = sym_wall;
|
|
117
202
|
|
|
203
|
+
if (!NIL_P(interval) && (NUM2INT(interval) < 1 || NUM2INT(interval) >= MICROSECONDS_IN_SECOND)) {
|
|
204
|
+
rb_raise(rb_eArgError, "interval is a number of microseconds between 1 and 1 million");
|
|
205
|
+
}
|
|
206
|
+
|
|
118
207
|
if (!_stackprof.frames) {
|
|
119
208
|
_stackprof.frames = st_init_numtable();
|
|
120
209
|
_stackprof.overall_signals = 0;
|
|
@@ -146,16 +235,24 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
|
|
|
146
235
|
rb_raise(rb_eArgError, "unknown profiler mode");
|
|
147
236
|
}
|
|
148
237
|
|
|
149
|
-
_stackprof.running = 1;
|
|
150
238
|
_stackprof.raw = raw;
|
|
151
239
|
_stackprof.aggregate = aggregate;
|
|
152
240
|
_stackprof.mode = mode;
|
|
153
241
|
_stackprof.interval = interval;
|
|
242
|
+
_stackprof.ignore_gc = ignore_gc;
|
|
154
243
|
_stackprof.metadata = metadata;
|
|
155
244
|
_stackprof.out = out;
|
|
245
|
+
_stackprof.target_thread = pthread_self();
|
|
246
|
+
/* We need to ensure previous initialization stores are visible across
|
|
247
|
+
* threads. */
|
|
248
|
+
#if STACKPROF_HAVE_ATOMICS
|
|
249
|
+
__atomic_store_n(&_stackprof.running, 1, __ATOMIC_SEQ_CST);
|
|
250
|
+
#else
|
|
251
|
+
_stackprof.running = 1;
|
|
252
|
+
#endif
|
|
156
253
|
|
|
157
254
|
if (raw) {
|
|
158
|
-
|
|
255
|
+
capture_timestamp(&_stackprof.last_sample_at);
|
|
159
256
|
}
|
|
160
257
|
|
|
161
258
|
return Qtrue;
|
|
@@ -167,9 +264,15 @@ stackprof_stop(VALUE self)
|
|
|
167
264
|
struct sigaction sa;
|
|
168
265
|
struct itimerval timer;
|
|
169
266
|
|
|
267
|
+
#if STACKPROF_HAVE_ATOMICS
|
|
268
|
+
int was_running = __atomic_exchange_n(&_stackprof.running, 0, __ATOMIC_SEQ_CST);
|
|
269
|
+
if (!was_running)
|
|
270
|
+
return Qfalse;
|
|
271
|
+
#else
|
|
170
272
|
if (!_stackprof.running)
|
|
171
273
|
return Qfalse;
|
|
172
274
|
_stackprof.running = 0;
|
|
275
|
+
#endif
|
|
173
276
|
|
|
174
277
|
if (_stackprof.mode == sym_object) {
|
|
175
278
|
rb_tracepoint_disable(objtracer);
|
|
@@ -190,13 +293,19 @@ stackprof_stop(VALUE self)
|
|
|
190
293
|
return Qtrue;
|
|
191
294
|
}
|
|
192
295
|
|
|
296
|
+
#if SIZEOF_VOIDP == SIZEOF_LONG
|
|
297
|
+
# define PTR2NUM(x) (LONG2NUM((long)(x)))
|
|
298
|
+
#else
|
|
299
|
+
# define PTR2NUM(x) (LL2NUM((LONG_LONG)(x)))
|
|
300
|
+
#endif
|
|
301
|
+
|
|
193
302
|
static int
|
|
194
303
|
frame_edges_i(st_data_t key, st_data_t val, st_data_t arg)
|
|
195
304
|
{
|
|
196
305
|
VALUE edges = (VALUE)arg;
|
|
197
306
|
|
|
198
307
|
intptr_t weight = (intptr_t)val;
|
|
199
|
-
rb_hash_aset(edges,
|
|
308
|
+
rb_hash_aset(edges, PTR2NUM(key), INT2FIX(weight));
|
|
200
309
|
return ST_CONTINUE;
|
|
201
310
|
}
|
|
202
311
|
|
|
@@ -223,7 +332,7 @@ frame_i(st_data_t key, st_data_t val, st_data_t arg)
|
|
|
223
332
|
VALUE name, file, edges, lines;
|
|
224
333
|
VALUE line;
|
|
225
334
|
|
|
226
|
-
rb_hash_aset(results,
|
|
335
|
+
rb_hash_aset(results, PTR2NUM(frame), details);
|
|
227
336
|
|
|
228
337
|
if (FIXNUM_P(frame)) {
|
|
229
338
|
name = _stackprof.fake_frame_names[FIX2INT(frame)];
|
|
@@ -272,7 +381,7 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
|
272
381
|
{
|
|
273
382
|
VALUE results, frames;
|
|
274
383
|
|
|
275
|
-
if (!_stackprof.frames ||
|
|
384
|
+
if (!_stackprof.frames || STACKPROF_RUNNING())
|
|
276
385
|
return Qnil;
|
|
277
386
|
|
|
278
387
|
results = rb_hash_new();
|
|
@@ -295,16 +404,25 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
|
295
404
|
|
|
296
405
|
if (_stackprof.raw && _stackprof.raw_samples_len) {
|
|
297
406
|
size_t len, n, o;
|
|
298
|
-
VALUE raw_timestamp_deltas;
|
|
407
|
+
VALUE raw_sample_timestamps, raw_timestamp_deltas;
|
|
299
408
|
VALUE raw_samples = rb_ary_new_capa(_stackprof.raw_samples_len);
|
|
409
|
+
VALUE raw_lines = rb_ary_new_capa(_stackprof.raw_samples_len);
|
|
300
410
|
|
|
301
411
|
for (n = 0; n < _stackprof.raw_samples_len; n++) {
|
|
302
412
|
len = (size_t)_stackprof.raw_samples[n];
|
|
303
413
|
rb_ary_push(raw_samples, SIZET2NUM(len));
|
|
414
|
+
rb_ary_push(raw_lines, SIZET2NUM(len));
|
|
415
|
+
|
|
416
|
+
for (o = 0, n++; o < len; n++, o++) {
|
|
417
|
+
// Line is in the upper 16 bits
|
|
418
|
+
rb_ary_push(raw_lines, INT2NUM(_stackprof.raw_samples[n] >> 48));
|
|
419
|
+
|
|
420
|
+
VALUE frame = _stackprof.raw_samples[n] & ~((uint64_t)0xFFFF << 48);
|
|
421
|
+
rb_ary_push(raw_samples, PTR2NUM(frame));
|
|
422
|
+
}
|
|
304
423
|
|
|
305
|
-
for (o = 0, n++; o < len; n++, o++)
|
|
306
|
-
rb_ary_push(raw_samples, rb_obj_id(_stackprof.raw_samples[n]));
|
|
307
424
|
rb_ary_push(raw_samples, SIZET2NUM((size_t)_stackprof.raw_samples[n]));
|
|
425
|
+
rb_ary_push(raw_lines, SIZET2NUM((size_t)_stackprof.raw_samples[n]));
|
|
308
426
|
}
|
|
309
427
|
|
|
310
428
|
free(_stackprof.raw_samples);
|
|
@@ -314,18 +432,22 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
|
|
|
314
432
|
_stackprof.raw_sample_index = 0;
|
|
315
433
|
|
|
316
434
|
rb_hash_aset(results, sym_raw, raw_samples);
|
|
435
|
+
rb_hash_aset(results, sym_raw_lines, raw_lines);
|
|
317
436
|
|
|
318
|
-
|
|
437
|
+
raw_sample_timestamps = rb_ary_new_capa(_stackprof.raw_sample_times_len);
|
|
438
|
+
raw_timestamp_deltas = rb_ary_new_capa(_stackprof.raw_sample_times_len);
|
|
319
439
|
|
|
320
|
-
for (n = 0; n < _stackprof.
|
|
321
|
-
rb_ary_push(
|
|
440
|
+
for (n = 0; n < _stackprof.raw_sample_times_len; n++) {
|
|
441
|
+
rb_ary_push(raw_sample_timestamps, ULL2NUM(_stackprof.raw_sample_times[n].timestamp_usec));
|
|
442
|
+
rb_ary_push(raw_timestamp_deltas, LL2NUM(_stackprof.raw_sample_times[n].delta_usec));
|
|
322
443
|
}
|
|
323
444
|
|
|
324
|
-
free(_stackprof.
|
|
325
|
-
_stackprof.
|
|
326
|
-
_stackprof.
|
|
327
|
-
_stackprof.
|
|
445
|
+
free(_stackprof.raw_sample_times);
|
|
446
|
+
_stackprof.raw_sample_times = NULL;
|
|
447
|
+
_stackprof.raw_sample_times_len = 0;
|
|
448
|
+
_stackprof.raw_sample_times_capa = 0;
|
|
328
449
|
|
|
450
|
+
rb_hash_aset(results, sym_raw_sample_timestamps, raw_sample_timestamps);
|
|
329
451
|
rb_hash_aset(results, sym_raw_timestamp_deltas, raw_timestamp_deltas);
|
|
330
452
|
|
|
331
453
|
_stackprof.raw = 0;
|
|
@@ -363,7 +485,7 @@ stackprof_run(int argc, VALUE *argv, VALUE self)
|
|
|
363
485
|
static VALUE
|
|
364
486
|
stackprof_running_p(VALUE self)
|
|
365
487
|
{
|
|
366
|
-
return
|
|
488
|
+
return STACKPROF_RUNNING() ? Qtrue : Qfalse;
|
|
367
489
|
}
|
|
368
490
|
|
|
369
491
|
static inline frame_data_t *
|
|
@@ -405,14 +527,14 @@ st_numtable_increment(st_table *table, st_data_t key, size_t increment)
|
|
|
405
527
|
}
|
|
406
528
|
|
|
407
529
|
void
|
|
408
|
-
stackprof_record_sample_for_stack(int num,
|
|
530
|
+
stackprof_record_sample_for_stack(int num, uint64_t sample_timestamp, int64_t timestamp_delta)
|
|
409
531
|
{
|
|
410
532
|
int i, n;
|
|
411
533
|
VALUE prev_frame = Qnil;
|
|
412
534
|
|
|
413
535
|
_stackprof.overall_samples++;
|
|
414
536
|
|
|
415
|
-
if (_stackprof.raw) {
|
|
537
|
+
if (_stackprof.raw && num > 0) {
|
|
416
538
|
int found = 0;
|
|
417
539
|
|
|
418
540
|
/* If there's no sample buffer allocated, then allocate one. The buffer
|
|
@@ -440,7 +562,12 @@ stackprof_record_sample_for_stack(int num, int timestamp_delta)
|
|
|
440
562
|
* in the frames buffer that came from Ruby. */
|
|
441
563
|
for (i = num-1, n = 0; i >= 0; i--, n++) {
|
|
442
564
|
VALUE frame = _stackprof.frames_buffer[i];
|
|
443
|
-
|
|
565
|
+
int line = _stackprof.lines_buffer[i];
|
|
566
|
+
|
|
567
|
+
// Encode the line in to the upper 16 bits.
|
|
568
|
+
uint64_t key = ((uint64_t)line << 48) | (uint64_t)frame;
|
|
569
|
+
|
|
570
|
+
if (_stackprof.raw_samples[_stackprof.raw_sample_index + 1 + n] != key)
|
|
444
571
|
break;
|
|
445
572
|
}
|
|
446
573
|
if (i == -1) {
|
|
@@ -458,26 +585,34 @@ stackprof_record_sample_for_stack(int num, int timestamp_delta)
|
|
|
458
585
|
_stackprof.raw_samples[_stackprof.raw_samples_len++] = (VALUE)num;
|
|
459
586
|
for (i = num-1; i >= 0; i--) {
|
|
460
587
|
VALUE frame = _stackprof.frames_buffer[i];
|
|
461
|
-
_stackprof.
|
|
588
|
+
int line = _stackprof.lines_buffer[i];
|
|
589
|
+
|
|
590
|
+
// Encode the line in to the upper 16 bits.
|
|
591
|
+
uint64_t key = ((uint64_t)line << 48) | (uint64_t)frame;
|
|
592
|
+
|
|
593
|
+
_stackprof.raw_samples[_stackprof.raw_samples_len++] = key;
|
|
462
594
|
}
|
|
463
595
|
_stackprof.raw_samples[_stackprof.raw_samples_len++] = (VALUE)1;
|
|
464
596
|
}
|
|
465
597
|
|
|
466
598
|
/* If there's no timestamp delta buffer, allocate one */
|
|
467
|
-
if (!_stackprof.
|
|
468
|
-
_stackprof.
|
|
469
|
-
_stackprof.
|
|
470
|
-
_stackprof.
|
|
599
|
+
if (!_stackprof.raw_sample_times) {
|
|
600
|
+
_stackprof.raw_sample_times_capa = 100;
|
|
601
|
+
_stackprof.raw_sample_times = malloc(sizeof(sample_time_t) * _stackprof.raw_sample_times_capa);
|
|
602
|
+
_stackprof.raw_sample_times_len = 0;
|
|
471
603
|
}
|
|
472
604
|
|
|
473
605
|
/* Double the buffer size if it's too small */
|
|
474
|
-
while (_stackprof.
|
|
475
|
-
_stackprof.
|
|
476
|
-
_stackprof.
|
|
606
|
+
while (_stackprof.raw_sample_times_capa <= _stackprof.raw_sample_times_len + 1) {
|
|
607
|
+
_stackprof.raw_sample_times_capa *= 2;
|
|
608
|
+
_stackprof.raw_sample_times = realloc(_stackprof.raw_sample_times, sizeof(sample_time_t) * _stackprof.raw_sample_times_capa);
|
|
477
609
|
}
|
|
478
610
|
|
|
479
|
-
/* Store the time delta (which is the amount of
|
|
480
|
-
_stackprof.
|
|
611
|
+
/* Store the time delta (which is the amount of microseconds between samples). */
|
|
612
|
+
_stackprof.raw_sample_times[_stackprof.raw_sample_times_len++] = (sample_time_t) {
|
|
613
|
+
.timestamp_usec = sample_timestamp,
|
|
614
|
+
.delta_usec = timestamp_delta,
|
|
615
|
+
};
|
|
481
616
|
}
|
|
482
617
|
|
|
483
618
|
for (i = 0; i < num; i++) {
|
|
@@ -510,48 +645,60 @@ stackprof_record_sample_for_stack(int num, int timestamp_delta)
|
|
|
510
645
|
}
|
|
511
646
|
|
|
512
647
|
if (_stackprof.raw) {
|
|
513
|
-
|
|
648
|
+
capture_timestamp(&_stackprof.last_sample_at);
|
|
514
649
|
}
|
|
515
650
|
}
|
|
516
651
|
|
|
652
|
+
// buffer the current profile frames
|
|
653
|
+
// This must be async-signal-safe
|
|
654
|
+
// Returns immediately if another set of frames are already in the buffer
|
|
517
655
|
void
|
|
518
|
-
|
|
656
|
+
stackprof_buffer_sample(void)
|
|
519
657
|
{
|
|
520
|
-
|
|
658
|
+
uint64_t start_timestamp = 0;
|
|
659
|
+
int64_t timestamp_delta = 0;
|
|
521
660
|
int num;
|
|
661
|
+
|
|
662
|
+
if (_stackprof.buffer_count > 0) {
|
|
663
|
+
// Another sample is already pending
|
|
664
|
+
return;
|
|
665
|
+
}
|
|
666
|
+
|
|
522
667
|
if (_stackprof.raw) {
|
|
523
|
-
struct
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
timestamp_delta = (1000 * diff.tv_sec) + diff.tv_usec;
|
|
668
|
+
struct timestamp_t t;
|
|
669
|
+
capture_timestamp(&t);
|
|
670
|
+
start_timestamp = timestamp_usec(&t);
|
|
671
|
+
timestamp_delta = delta_usec(&_stackprof.last_sample_at, &t);
|
|
528
672
|
}
|
|
673
|
+
|
|
529
674
|
num = rb_profile_frames(0, sizeof(_stackprof.frames_buffer) / sizeof(VALUE), _stackprof.frames_buffer, _stackprof.lines_buffer);
|
|
530
|
-
|
|
675
|
+
|
|
676
|
+
_stackprof.buffer_count = num;
|
|
677
|
+
_stackprof.buffer_time.timestamp_usec = start_timestamp;
|
|
678
|
+
_stackprof.buffer_time.delta_usec = timestamp_delta;
|
|
531
679
|
}
|
|
532
680
|
|
|
681
|
+
// Postponed job
|
|
533
682
|
void
|
|
534
|
-
stackprof_record_gc_samples()
|
|
683
|
+
stackprof_record_gc_samples(void)
|
|
535
684
|
{
|
|
536
|
-
|
|
537
|
-
|
|
685
|
+
int64_t delta_to_first_unrecorded_gc_sample = 0;
|
|
686
|
+
uint64_t start_timestamp = 0;
|
|
687
|
+
size_t i;
|
|
538
688
|
if (_stackprof.raw) {
|
|
539
|
-
struct
|
|
540
|
-
|
|
541
|
-
gettimeofday(&t, NULL);
|
|
542
|
-
timersub(&t, &_stackprof.last_sample_at, &diff);
|
|
689
|
+
struct timestamp_t t = _stackprof.gc_start_timestamp;
|
|
690
|
+
start_timestamp = timestamp_usec(&t);
|
|
543
691
|
|
|
544
692
|
// We don't know when the GC samples were actually marked, so let's
|
|
545
693
|
// assume that they were marked at a perfectly regular interval.
|
|
546
|
-
delta_to_first_unrecorded_gc_sample = (
|
|
694
|
+
delta_to_first_unrecorded_gc_sample = delta_usec(&_stackprof.last_sample_at, &t) - (_stackprof.unrecorded_gc_samples - 1) * NUM2LONG(_stackprof.interval);
|
|
547
695
|
if (delta_to_first_unrecorded_gc_sample < 0) {
|
|
548
696
|
delta_to_first_unrecorded_gc_sample = 0;
|
|
549
697
|
}
|
|
550
698
|
}
|
|
551
699
|
|
|
552
|
-
|
|
553
700
|
for (i = 0; i < _stackprof.unrecorded_gc_samples; i++) {
|
|
554
|
-
|
|
701
|
+
int64_t timestamp_delta = i == 0 ? delta_to_first_unrecorded_gc_sample : NUM2LONG(_stackprof.interval);
|
|
555
702
|
|
|
556
703
|
if (_stackprof.unrecorded_gc_marking_samples) {
|
|
557
704
|
_stackprof.frames_buffer[0] = FAKE_FRAME_MARK;
|
|
@@ -560,7 +707,7 @@ stackprof_record_gc_samples()
|
|
|
560
707
|
_stackprof.lines_buffer[1] = 0;
|
|
561
708
|
_stackprof.unrecorded_gc_marking_samples--;
|
|
562
709
|
|
|
563
|
-
stackprof_record_sample_for_stack(2, timestamp_delta);
|
|
710
|
+
stackprof_record_sample_for_stack(2, start_timestamp, timestamp_delta);
|
|
564
711
|
} else if (_stackprof.unrecorded_gc_sweeping_samples) {
|
|
565
712
|
_stackprof.frames_buffer[0] = FAKE_FRAME_SWEEP;
|
|
566
713
|
_stackprof.lines_buffer[0] = 0;
|
|
@@ -569,11 +716,11 @@ stackprof_record_gc_samples()
|
|
|
569
716
|
|
|
570
717
|
_stackprof.unrecorded_gc_sweeping_samples--;
|
|
571
718
|
|
|
572
|
-
stackprof_record_sample_for_stack(2, timestamp_delta);
|
|
719
|
+
stackprof_record_sample_for_stack(2, start_timestamp, timestamp_delta);
|
|
573
720
|
} else {
|
|
574
721
|
_stackprof.frames_buffer[0] = FAKE_FRAME_GC;
|
|
575
722
|
_stackprof.lines_buffer[0] = 0;
|
|
576
|
-
stackprof_record_sample_for_stack(1, timestamp_delta);
|
|
723
|
+
stackprof_record_sample_for_stack(1, start_timestamp, timestamp_delta);
|
|
577
724
|
}
|
|
578
725
|
}
|
|
579
726
|
_stackprof.during_gc += _stackprof.unrecorded_gc_samples;
|
|
@@ -582,46 +729,103 @@ stackprof_record_gc_samples()
|
|
|
582
729
|
_stackprof.unrecorded_gc_sweeping_samples = 0;
|
|
583
730
|
}
|
|
584
731
|
|
|
732
|
+
// record the sample previously buffered by stackprof_buffer_sample
|
|
733
|
+
static void
|
|
734
|
+
stackprof_record_buffer(void)
|
|
735
|
+
{
|
|
736
|
+
stackprof_record_sample_for_stack(_stackprof.buffer_count, _stackprof.buffer_time.timestamp_usec, _stackprof.buffer_time.delta_usec);
|
|
737
|
+
|
|
738
|
+
// reset the buffer
|
|
739
|
+
_stackprof.buffer_count = 0;
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
static void
|
|
743
|
+
stackprof_sample_and_record(void)
|
|
744
|
+
{
|
|
745
|
+
stackprof_buffer_sample();
|
|
746
|
+
stackprof_record_buffer();
|
|
747
|
+
}
|
|
748
|
+
|
|
585
749
|
static void
|
|
586
|
-
|
|
750
|
+
stackprof_job_record_gc(void *data)
|
|
587
751
|
{
|
|
588
|
-
|
|
589
|
-
if (in_signal_handler) return;
|
|
590
|
-
if (!_stackprof.running) return;
|
|
752
|
+
if (!STACKPROF_RUNNING()) return;
|
|
591
753
|
|
|
592
|
-
in_signal_handler++;
|
|
593
754
|
stackprof_record_gc_samples();
|
|
594
|
-
in_signal_handler--;
|
|
595
755
|
}
|
|
596
756
|
|
|
597
757
|
static void
|
|
598
|
-
|
|
758
|
+
stackprof_job_sample_and_record(void *data)
|
|
599
759
|
{
|
|
600
|
-
|
|
601
|
-
if (in_signal_handler) return;
|
|
602
|
-
if (!_stackprof.running) return;
|
|
760
|
+
if (!STACKPROF_RUNNING()) return;
|
|
603
761
|
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
762
|
+
stackprof_sample_and_record();
|
|
763
|
+
}
|
|
764
|
+
|
|
765
|
+
static void
|
|
766
|
+
stackprof_job_record_buffer(void *data)
|
|
767
|
+
{
|
|
768
|
+
if (!STACKPROF_RUNNING()) return;
|
|
769
|
+
|
|
770
|
+
stackprof_record_buffer();
|
|
607
771
|
}
|
|
608
772
|
|
|
609
773
|
static void
|
|
610
774
|
stackprof_signal_handler(int sig, siginfo_t *sinfo, void *ucontext)
|
|
611
775
|
{
|
|
776
|
+
static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
|
|
777
|
+
|
|
612
778
|
_stackprof.overall_signals++;
|
|
613
|
-
|
|
779
|
+
|
|
780
|
+
if (!STACKPROF_RUNNING()) return;
|
|
781
|
+
|
|
782
|
+
// There's a possibility that the signal handler is invoked *after* the Ruby
|
|
783
|
+
// VM has been shut down (e.g. after ruby_cleanup(0)). In this case, things
|
|
784
|
+
// that rely on global VM state (e.g. rb_during_gc) will segfault.
|
|
785
|
+
if (!ruby_vm_running) return;
|
|
786
|
+
|
|
787
|
+
if (_stackprof.mode == sym_wall) {
|
|
788
|
+
// In "wall" mode, the SIGALRM signal will arrive at an arbitrary thread.
|
|
789
|
+
// In order to provide more useful results, especially under threaded web
|
|
790
|
+
// servers, we want to forward this signal to the original thread
|
|
791
|
+
// StackProf was started from.
|
|
792
|
+
// According to POSIX.1-2008 TC1 pthread_kill and pthread_self should be
|
|
793
|
+
// async-signal-safe.
|
|
794
|
+
if (pthread_self() != _stackprof.target_thread) {
|
|
795
|
+
pthread_kill(_stackprof.target_thread, sig);
|
|
796
|
+
return;
|
|
797
|
+
}
|
|
798
|
+
} else {
|
|
799
|
+
if (!ruby_native_thread_p()) return;
|
|
800
|
+
}
|
|
801
|
+
|
|
802
|
+
if (pthread_mutex_trylock(&lock)) return;
|
|
803
|
+
|
|
804
|
+
if (!_stackprof.ignore_gc && rb_during_gc()) {
|
|
614
805
|
VALUE mode = rb_gc_latest_gc_info(sym_state);
|
|
615
806
|
if (mode == sym_marking) {
|
|
616
807
|
_stackprof.unrecorded_gc_marking_samples++;
|
|
617
808
|
} else if (mode == sym_sweeping) {
|
|
618
809
|
_stackprof.unrecorded_gc_sweeping_samples++;
|
|
619
810
|
}
|
|
811
|
+
if(!_stackprof.unrecorded_gc_samples) {
|
|
812
|
+
// record start
|
|
813
|
+
capture_timestamp(&_stackprof.gc_start_timestamp);
|
|
814
|
+
}
|
|
620
815
|
_stackprof.unrecorded_gc_samples++;
|
|
621
|
-
rb_postponed_job_register_one(0,
|
|
816
|
+
rb_postponed_job_register_one(0, stackprof_job_record_gc, (void*)0);
|
|
622
817
|
} else {
|
|
623
|
-
|
|
818
|
+
if (stackprof_use_postponed_job) {
|
|
819
|
+
rb_postponed_job_register_one(0, stackprof_job_sample_and_record, (void*)0);
|
|
820
|
+
} else {
|
|
821
|
+
// Buffer a sample immediately, if an existing sample exists this will
|
|
822
|
+
// return immediately
|
|
823
|
+
stackprof_buffer_sample();
|
|
824
|
+
// Enqueue a job to record the sample
|
|
825
|
+
rb_postponed_job_register_one(0, stackprof_job_record_buffer, (void*)0);
|
|
826
|
+
}
|
|
624
827
|
}
|
|
828
|
+
pthread_mutex_unlock(&lock);
|
|
625
829
|
}
|
|
626
830
|
|
|
627
831
|
static void
|
|
@@ -630,17 +834,17 @@ stackprof_newobj_handler(VALUE tpval, void *data)
|
|
|
630
834
|
_stackprof.overall_signals++;
|
|
631
835
|
if (RTEST(_stackprof.interval) && _stackprof.overall_signals % NUM2LONG(_stackprof.interval))
|
|
632
836
|
return;
|
|
633
|
-
|
|
837
|
+
stackprof_sample_and_record();
|
|
634
838
|
}
|
|
635
839
|
|
|
636
840
|
static VALUE
|
|
637
841
|
stackprof_sample(VALUE self)
|
|
638
842
|
{
|
|
639
|
-
if (!
|
|
843
|
+
if (!STACKPROF_RUNNING())
|
|
640
844
|
return Qfalse;
|
|
641
845
|
|
|
642
846
|
_stackprof.overall_signals++;
|
|
643
|
-
|
|
847
|
+
stackprof_sample_and_record();
|
|
644
848
|
return Qtrue;
|
|
645
849
|
}
|
|
646
850
|
|
|
@@ -663,13 +867,24 @@ stackprof_gc_mark(void *data)
|
|
|
663
867
|
|
|
664
868
|
if (_stackprof.frames)
|
|
665
869
|
st_foreach(_stackprof.frames, frame_mark_i, 0);
|
|
870
|
+
|
|
871
|
+
int i;
|
|
872
|
+
for (i = 0; i < _stackprof.buffer_count; i++) {
|
|
873
|
+
rb_gc_mark(_stackprof.frames_buffer[i]);
|
|
874
|
+
}
|
|
875
|
+
}
|
|
876
|
+
|
|
877
|
+
static size_t
|
|
878
|
+
stackprof_memsize(const void *data)
|
|
879
|
+
{
|
|
880
|
+
return sizeof(_stackprof);
|
|
666
881
|
}
|
|
667
882
|
|
|
668
883
|
static void
|
|
669
884
|
stackprof_atfork_prepare(void)
|
|
670
885
|
{
|
|
671
886
|
struct itimerval timer;
|
|
672
|
-
if (
|
|
887
|
+
if (STACKPROF_RUNNING()) {
|
|
673
888
|
if (_stackprof.mode == sym_wall || _stackprof.mode == sym_cpu) {
|
|
674
889
|
memset(&timer, 0, sizeof(timer));
|
|
675
890
|
setitimer(_stackprof.mode == sym_wall ? ITIMER_REAL : ITIMER_PROF, &timer, 0);
|
|
@@ -681,7 +896,7 @@ static void
|
|
|
681
896
|
stackprof_atfork_parent(void)
|
|
682
897
|
{
|
|
683
898
|
struct itimerval timer;
|
|
684
|
-
if (
|
|
899
|
+
if (STACKPROF_RUNNING()) {
|
|
685
900
|
if (_stackprof.mode == sym_wall || _stackprof.mode == sym_cpu) {
|
|
686
901
|
timer.it_interval.tv_sec = 0;
|
|
687
902
|
timer.it_interval.tv_usec = NUM2LONG(_stackprof.interval);
|
|
@@ -697,10 +912,41 @@ stackprof_atfork_child(void)
|
|
|
697
912
|
stackprof_stop(rb_mStackProf);
|
|
698
913
|
}
|
|
699
914
|
|
|
915
|
+
static VALUE
|
|
916
|
+
stackprof_use_postponed_job_l(VALUE self)
|
|
917
|
+
{
|
|
918
|
+
stackprof_use_postponed_job = 1;
|
|
919
|
+
return Qnil;
|
|
920
|
+
}
|
|
921
|
+
|
|
922
|
+
static void
|
|
923
|
+
stackprof_at_exit(ruby_vm_t* vm)
|
|
924
|
+
{
|
|
925
|
+
ruby_vm_running = 0;
|
|
926
|
+
}
|
|
927
|
+
|
|
928
|
+
static const rb_data_type_t stackprof_type = {
|
|
929
|
+
"StackProf",
|
|
930
|
+
{
|
|
931
|
+
stackprof_gc_mark,
|
|
932
|
+
NULL,
|
|
933
|
+
stackprof_memsize,
|
|
934
|
+
}
|
|
935
|
+
};
|
|
936
|
+
|
|
700
937
|
void
|
|
701
938
|
Init_stackprof(void)
|
|
702
939
|
{
|
|
703
940
|
size_t i;
|
|
941
|
+
/*
|
|
942
|
+
* As of Ruby 3.0, it should be safe to read stack frames at any time, unless YJIT is enabled
|
|
943
|
+
* See https://github.com/ruby/ruby/commit/0e276dc458f94d9d79a0f7c7669bde84abe80f21
|
|
944
|
+
*/
|
|
945
|
+
stackprof_use_postponed_job = RUBY_API_VERSION_MAJOR < 3;
|
|
946
|
+
|
|
947
|
+
ruby_vm_running = 1;
|
|
948
|
+
ruby_vm_at_exit(stackprof_at_exit);
|
|
949
|
+
|
|
704
950
|
#define S(name) sym_##name = ID2SYM(rb_intern(#name));
|
|
705
951
|
S(object);
|
|
706
952
|
S(custom);
|
|
@@ -719,9 +965,12 @@ Init_stackprof(void)
|
|
|
719
965
|
S(mode);
|
|
720
966
|
S(interval);
|
|
721
967
|
S(raw);
|
|
968
|
+
S(raw_lines);
|
|
969
|
+
S(raw_sample_timestamps);
|
|
722
970
|
S(raw_timestamp_deltas);
|
|
723
971
|
S(out);
|
|
724
972
|
S(metadata);
|
|
973
|
+
S(ignore_gc);
|
|
725
974
|
S(frames);
|
|
726
975
|
S(aggregate);
|
|
727
976
|
S(state);
|
|
@@ -732,17 +981,17 @@ Init_stackprof(void)
|
|
|
732
981
|
/* Need to run this to warm the symbol table before we call this during GC */
|
|
733
982
|
rb_gc_latest_gc_info(sym_state);
|
|
734
983
|
|
|
735
|
-
gc_hook = Data_Wrap_Struct(rb_cObject, stackprof_gc_mark, NULL, &_stackprof);
|
|
736
984
|
rb_global_variable(&gc_hook);
|
|
985
|
+
gc_hook = TypedData_Wrap_Struct(rb_cObject, &stackprof_type, &_stackprof);
|
|
737
986
|
|
|
738
987
|
_stackprof.raw_samples = NULL;
|
|
739
988
|
_stackprof.raw_samples_len = 0;
|
|
740
989
|
_stackprof.raw_samples_capa = 0;
|
|
741
990
|
_stackprof.raw_sample_index = 0;
|
|
742
991
|
|
|
743
|
-
_stackprof.
|
|
744
|
-
_stackprof.
|
|
745
|
-
_stackprof.
|
|
992
|
+
_stackprof.raw_sample_times = NULL;
|
|
993
|
+
_stackprof.raw_sample_times_len = 0;
|
|
994
|
+
_stackprof.raw_sample_times_capa = 0;
|
|
746
995
|
|
|
747
996
|
_stackprof.empty_string = rb_str_new_cstr("");
|
|
748
997
|
rb_global_variable(&_stackprof.empty_string);
|
|
@@ -759,6 +1008,7 @@ Init_stackprof(void)
|
|
|
759
1008
|
rb_define_singleton_method(rb_mStackProf, "stop", stackprof_stop, 0);
|
|
760
1009
|
rb_define_singleton_method(rb_mStackProf, "results", stackprof_results, -1);
|
|
761
1010
|
rb_define_singleton_method(rb_mStackProf, "sample", stackprof_sample, 0);
|
|
1011
|
+
rb_define_singleton_method(rb_mStackProf, "use_postponed_job!", stackprof_use_postponed_job_l, 0);
|
|
762
1012
|
|
|
763
1013
|
pthread_atfork(stackprof_atfork_prepare, stackprof_atfork_parent, stackprof_atfork_child);
|
|
764
1014
|
}
|