stackprof 0.2.17 → 0.2.20

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b55691b8d1966ba4b2b2458a4908b2a2d5b65f2074dfe3b3b1b6350f752704ec
4
- data.tar.gz: 79e2a0508a1c722f39cc61d39b0577cfb5520669a7a2db4cadac6c49dcb1267a
3
+ metadata.gz: e627bf7fbeca0cb94e8be863b0a1db9160534d5172c1409ba4306e0f16a57ae0
4
+ data.tar.gz: f881ec5ab3267f3b48f57794c3bce910b39f890db32817655b4e5253733fc466
5
5
  SHA512:
6
- metadata.gz: 2fa22779f03c332a3680f526bf1df29553588773fabeb00da327af3525018e535e973bafd990254c6ad50516faf5e8b1d087bb7c208c99d0b512d99ccdef53bb
7
- data.tar.gz: 73ba1328c793b0c0c4657e7826f4bf2cd52102c61a2ca2e3e0b1c5240ffe96ee0ec328ea831b1592c10b4e13c6aec2bb9d28fd05e93ddef999d5131e55124362
6
+ metadata.gz: 514c6697e4465fbce7990fffc2a71ca5aa544e1f526b338fde343c14bc801bb5056534e371e4d8fd5642f1b73b92126b2a5e476eb5292dde23bd0da734d7e236
7
+ data.tar.gz: bed130e196f1004b3e74d2db466732fa8b23e582df3fcf944161112036053e95a5a05a3ed2ac175a6603e1f3b28b6062491aef89f4d30b764be3886aaf586b19
@@ -8,7 +8,7 @@ jobs:
8
8
  strategy:
9
9
  fail-fast: false
10
10
  matrix:
11
- ruby: [ ruby-head, '3.0', '2.7', '2.6', '2.5', '2.4', '2.3', '2.2' ]
11
+ ruby: [ ruby-head, '3.1', '3.0', '2.7', '2.6', '2.5', '2.4', '2.3', '2.2' ]
12
12
  steps:
13
13
  - name: Checkout
14
14
  uses: actions/checkout@v2
data/bin/stackprof CHANGED
@@ -42,7 +42,7 @@ reports = []
42
42
  while ARGV.size > 0
43
43
  begin
44
44
  file = ARGV.pop
45
- reports << StackProf::Report.new(Marshal.load(IO.binread(file)))
45
+ reports << StackProf::Report.from_file(file)
46
46
  rescue TypeError => e
47
47
  STDERR.puts "** error parsing #{file}: #{e.inspect}"
48
48
  end
@@ -7,37 +7,75 @@
7
7
  **********************************************************************/
8
8
 
9
9
  #include <ruby/ruby.h>
10
+ #include <ruby/version.h>
10
11
  #include <ruby/debug.h>
11
12
  #include <ruby/st.h>
12
13
  #include <ruby/io.h>
13
14
  #include <ruby/intern.h>
14
15
  #include <signal.h>
15
16
  #include <sys/time.h>
17
+ #include <time.h>
16
18
  #include <pthread.h>
17
19
 
18
20
  #define BUF_SIZE 2048
19
21
  #define MICROSECONDS_IN_SECOND 1000000
22
+ #define NANOSECONDS_IN_SECOND 1000000000
20
23
 
21
24
  #define FAKE_FRAME_GC INT2FIX(0)
22
25
  #define FAKE_FRAME_MARK INT2FIX(1)
23
26
  #define FAKE_FRAME_SWEEP INT2FIX(2)
24
27
 
25
- /*
26
- * As of Ruby 3.0, it should be safe to read stack frames at any time
27
- * See https://github.com/ruby/ruby/commit/0e276dc458f94d9d79a0f7c7669bde84abe80f21
28
- */
29
- #if RUBY_API_VERSION_MAJOR < 3
30
- #define USE_POSTPONED_JOB
31
- #endif
32
-
33
28
  static const char *fake_frame_cstrs[] = {
34
29
  "(garbage collection)",
35
30
  "(marking)",
36
31
  "(sweeping)",
37
32
  };
38
33
 
34
+ static int stackprof_use_postponed_job = 1;
35
+
39
36
  #define TOTAL_FAKE_FRAMES (sizeof(fake_frame_cstrs) / sizeof(char *))
40
37
 
38
+ #ifdef _POSIX_MONOTONIC_CLOCK
39
+ #define timestamp_t timespec
40
+ typedef struct timestamp_t timestamp_t;
41
+
42
+ static void capture_timestamp(timestamp_t *ts) {
43
+ clock_gettime(CLOCK_MONOTONIC, ts);
44
+ }
45
+
46
+ static int64_t delta_usec(timestamp_t *start, timestamp_t *end) {
47
+ int64_t result = MICROSECONDS_IN_SECOND * (end->tv_sec - start->tv_sec);
48
+ if (end->tv_nsec < start->tv_nsec) {
49
+ result -= MICROSECONDS_IN_SECOND;
50
+ result += (NANOSECONDS_IN_SECOND + end->tv_nsec - start->tv_nsec) / 1000;
51
+ } else {
52
+ result += (end->tv_nsec - start->tv_nsec) / 1000;
53
+ }
54
+ return result;
55
+ }
56
+
57
+ static uint64_t timestamp_usec(timestamp_t *ts) {
58
+ return (MICROSECONDS_IN_SECOND * ts->tv_sec) + (ts->tv_nsec / 1000);
59
+ }
60
+ #else
61
+ #define timestamp_t timeval
62
+ typedef struct timestamp_t timestamp_t;
63
+
64
+ static void capture_timestamp(timestamp_t *ts) {
65
+ gettimeofday(ts, NULL);
66
+ }
67
+
68
+ static int64_t delta_usec(timestamp_t *start, timestamp_t *end) {
69
+ struct timeval diff;
70
+ timersub(end, start, &diff);
71
+ return (MICROSECONDS_IN_SECOND * diff.tv_sec) + diff.tv_usec;
72
+ }
73
+
74
+ static uint64_t timestamp_usec(timestamp_t *ts) {
75
+ return (MICROSECONDS_IN_SECOND * ts.tv_sec) + diff.tv_usec
76
+ }
77
+ #endif
78
+
41
79
  typedef struct {
42
80
  size_t total_samples;
43
81
  size_t caller_samples;
@@ -46,6 +84,11 @@ typedef struct {
46
84
  st_table *lines;
47
85
  } frame_data_t;
48
86
 
87
+ typedef struct {
88
+ uint64_t timestamp_usec;
89
+ int64_t delta_usec;
90
+ } sample_time_t;
91
+
49
92
  static struct {
50
93
  int running;
51
94
  int raw;
@@ -62,10 +105,10 @@ static struct {
62
105
  size_t raw_samples_capa;
63
106
  size_t raw_sample_index;
64
107
 
65
- struct timeval last_sample_at;
66
- int *raw_timestamp_deltas;
67
- size_t raw_timestamp_deltas_len;
68
- size_t raw_timestamp_deltas_capa;
108
+ struct timestamp_t last_sample_at;
109
+ sample_time_t *raw_sample_times;
110
+ size_t raw_sample_times_len;
111
+ size_t raw_sample_times_capa;
69
112
 
70
113
  size_t overall_signals;
71
114
  size_t overall_samples;
@@ -77,6 +120,9 @@ static struct {
77
120
 
78
121
  VALUE fake_frame_names[TOTAL_FAKE_FRAMES];
79
122
  VALUE empty_string;
123
+
124
+ int buffer_count;
125
+ sample_time_t buffer_time;
80
126
  VALUE frames_buffer[BUF_SIZE];
81
127
  int lines_buffer[BUF_SIZE];
82
128
  } _stackprof;
@@ -84,7 +130,7 @@ static struct {
84
130
  static VALUE sym_object, sym_wall, sym_cpu, sym_custom, sym_name, sym_file, sym_line;
85
131
  static VALUE sym_samples, sym_total_samples, sym_missed_samples, sym_edges, sym_lines;
86
132
  static VALUE sym_version, sym_mode, sym_interval, sym_raw, sym_metadata, sym_frames, sym_ignore_gc, sym_out;
87
- static VALUE sym_aggregate, sym_raw_timestamp_deltas, sym_state, sym_marking, sym_sweeping;
133
+ static VALUE sym_aggregate, sym_raw_sample_timestamps, sym_raw_timestamp_deltas, sym_state, sym_marking, sym_sweeping;
88
134
  static VALUE sym_gc_samples, objtracer;
89
135
  static VALUE gc_hook;
90
136
  static VALUE rb_mStackProf;
@@ -174,7 +220,7 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
174
220
  _stackprof.out = out;
175
221
 
176
222
  if (raw) {
177
- gettimeofday(&_stackprof.last_sample_at, NULL);
223
+ capture_timestamp(&_stackprof.last_sample_at);
178
224
  }
179
225
 
180
226
  return Qtrue;
@@ -209,13 +255,19 @@ stackprof_stop(VALUE self)
209
255
  return Qtrue;
210
256
  }
211
257
 
258
+ #if SIZEOF_VOIDP == SIZEOF_LONG
259
+ # define PTR2NUM(x) (LONG2NUM((long)(x)))
260
+ #else
261
+ # define PTR2NUM(x) (LL2NUM((LONG_LONG)(x)))
262
+ #endif
263
+
212
264
  static int
213
265
  frame_edges_i(st_data_t key, st_data_t val, st_data_t arg)
214
266
  {
215
267
  VALUE edges = (VALUE)arg;
216
268
 
217
269
  intptr_t weight = (intptr_t)val;
218
- rb_hash_aset(edges, rb_obj_id((VALUE)key), INT2FIX(weight));
270
+ rb_hash_aset(edges, PTR2NUM(key), INT2FIX(weight));
219
271
  return ST_CONTINUE;
220
272
  }
221
273
 
@@ -242,7 +294,7 @@ frame_i(st_data_t key, st_data_t val, st_data_t arg)
242
294
  VALUE name, file, edges, lines;
243
295
  VALUE line;
244
296
 
245
- rb_hash_aset(results, rb_obj_id(frame), details);
297
+ rb_hash_aset(results, PTR2NUM(frame), details);
246
298
 
247
299
  if (FIXNUM_P(frame)) {
248
300
  name = _stackprof.fake_frame_names[FIX2INT(frame)];
@@ -314,7 +366,7 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
314
366
 
315
367
  if (_stackprof.raw && _stackprof.raw_samples_len) {
316
368
  size_t len, n, o;
317
- VALUE raw_timestamp_deltas;
369
+ VALUE raw_sample_timestamps, raw_timestamp_deltas;
318
370
  VALUE raw_samples = rb_ary_new_capa(_stackprof.raw_samples_len);
319
371
 
320
372
  for (n = 0; n < _stackprof.raw_samples_len; n++) {
@@ -322,7 +374,7 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
322
374
  rb_ary_push(raw_samples, SIZET2NUM(len));
323
375
 
324
376
  for (o = 0, n++; o < len; n++, o++)
325
- rb_ary_push(raw_samples, rb_obj_id(_stackprof.raw_samples[n]));
377
+ rb_ary_push(raw_samples, PTR2NUM(_stackprof.raw_samples[n]));
326
378
  rb_ary_push(raw_samples, SIZET2NUM((size_t)_stackprof.raw_samples[n]));
327
379
  }
328
380
 
@@ -334,17 +386,20 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
334
386
 
335
387
  rb_hash_aset(results, sym_raw, raw_samples);
336
388
 
337
- raw_timestamp_deltas = rb_ary_new_capa(_stackprof.raw_timestamp_deltas_len);
389
+ raw_sample_timestamps = rb_ary_new_capa(_stackprof.raw_sample_times_len);
390
+ raw_timestamp_deltas = rb_ary_new_capa(_stackprof.raw_sample_times_len);
338
391
 
339
- for (n = 0; n < _stackprof.raw_timestamp_deltas_len; n++) {
340
- rb_ary_push(raw_timestamp_deltas, INT2FIX(_stackprof.raw_timestamp_deltas[n]));
392
+ for (n = 0; n < _stackprof.raw_sample_times_len; n++) {
393
+ rb_ary_push(raw_sample_timestamps, ULL2NUM(_stackprof.raw_sample_times[n].timestamp_usec));
394
+ rb_ary_push(raw_timestamp_deltas, LL2NUM(_stackprof.raw_sample_times[n].delta_usec));
341
395
  }
342
396
 
343
- free(_stackprof.raw_timestamp_deltas);
344
- _stackprof.raw_timestamp_deltas = NULL;
345
- _stackprof.raw_timestamp_deltas_len = 0;
346
- _stackprof.raw_timestamp_deltas_capa = 0;
397
+ free(_stackprof.raw_sample_times);
398
+ _stackprof.raw_sample_times = NULL;
399
+ _stackprof.raw_sample_times_len = 0;
400
+ _stackprof.raw_sample_times_capa = 0;
347
401
 
402
+ rb_hash_aset(results, sym_raw_sample_timestamps, raw_sample_timestamps);
348
403
  rb_hash_aset(results, sym_raw_timestamp_deltas, raw_timestamp_deltas);
349
404
 
350
405
  _stackprof.raw = 0;
@@ -424,14 +479,14 @@ st_numtable_increment(st_table *table, st_data_t key, size_t increment)
424
479
  }
425
480
 
426
481
  void
427
- stackprof_record_sample_for_stack(int num, int timestamp_delta)
482
+ stackprof_record_sample_for_stack(int num, uint64_t sample_timestamp, int64_t timestamp_delta)
428
483
  {
429
484
  int i, n;
430
485
  VALUE prev_frame = Qnil;
431
486
 
432
487
  _stackprof.overall_samples++;
433
488
 
434
- if (_stackprof.raw) {
489
+ if (_stackprof.raw && num > 0) {
435
490
  int found = 0;
436
491
 
437
492
  /* If there's no sample buffer allocated, then allocate one. The buffer
@@ -483,20 +538,23 @@ stackprof_record_sample_for_stack(int num, int timestamp_delta)
483
538
  }
484
539
 
485
540
  /* If there's no timestamp delta buffer, allocate one */
486
- if (!_stackprof.raw_timestamp_deltas) {
487
- _stackprof.raw_timestamp_deltas_capa = 100;
488
- _stackprof.raw_timestamp_deltas = malloc(sizeof(int) * _stackprof.raw_timestamp_deltas_capa);
489
- _stackprof.raw_timestamp_deltas_len = 0;
541
+ if (!_stackprof.raw_sample_times) {
542
+ _stackprof.raw_sample_times_capa = 100;
543
+ _stackprof.raw_sample_times = malloc(sizeof(sample_time_t) * _stackprof.raw_sample_times_capa);
544
+ _stackprof.raw_sample_times_len = 0;
490
545
  }
491
546
 
492
547
  /* Double the buffer size if it's too small */
493
- while (_stackprof.raw_timestamp_deltas_capa <= _stackprof.raw_timestamp_deltas_len + 1) {
494
- _stackprof.raw_timestamp_deltas_capa *= 2;
495
- _stackprof.raw_timestamp_deltas = realloc(_stackprof.raw_timestamp_deltas, sizeof(int) * _stackprof.raw_timestamp_deltas_capa);
548
+ while (_stackprof.raw_sample_times_capa <= _stackprof.raw_sample_times_len + 1) {
549
+ _stackprof.raw_sample_times_capa *= 2;
550
+ _stackprof.raw_sample_times = realloc(_stackprof.raw_sample_times, sizeof(sample_time_t) * _stackprof.raw_sample_times_capa);
496
551
  }
497
552
 
498
- /* Store the time delta (which is the amount of time between samples) */
499
- _stackprof.raw_timestamp_deltas[_stackprof.raw_timestamp_deltas_len++] = timestamp_delta;
553
+ /* Store the time delta (which is the amount of microseconds between samples). */
554
+ _stackprof.raw_sample_times[_stackprof.raw_sample_times_len++] = (sample_time_t) {
555
+ .timestamp_usec = sample_timestamp,
556
+ .delta_usec = timestamp_delta,
557
+ };
500
558
  }
501
559
 
502
560
  for (i = 0; i < num; i++) {
@@ -529,48 +587,59 @@ stackprof_record_sample_for_stack(int num, int timestamp_delta)
529
587
  }
530
588
 
531
589
  if (_stackprof.raw) {
532
- gettimeofday(&_stackprof.last_sample_at, NULL);
590
+ capture_timestamp(&_stackprof.last_sample_at);
533
591
  }
534
592
  }
535
593
 
594
+ // buffer the current profile frames
595
+ // This must be async-signal-safe
596
+ // Returns immediately if another set of frames are already in the buffer
536
597
  void
537
- stackprof_record_sample()
598
+ stackprof_buffer_sample(void)
538
599
  {
539
- int timestamp_delta = 0;
600
+ if (_stackprof.buffer_count > 0) {
601
+ // Another sample is already pending
602
+ return;
603
+ }
604
+
605
+ uint64_t start_timestamp = 0;
606
+ int64_t timestamp_delta = 0;
540
607
  int num;
541
608
  if (_stackprof.raw) {
542
- struct timeval t;
543
- struct timeval diff;
544
- gettimeofday(&t, NULL);
545
- timersub(&t, &_stackprof.last_sample_at, &diff);
546
- timestamp_delta = (1000 * diff.tv_sec) + diff.tv_usec;
609
+ struct timestamp_t t;
610
+ capture_timestamp(&t);
611
+ start_timestamp = timestamp_usec(&t);
612
+ timestamp_delta = delta_usec(&_stackprof.last_sample_at, &t);
547
613
  }
614
+
548
615
  num = rb_profile_frames(0, sizeof(_stackprof.frames_buffer) / sizeof(VALUE), _stackprof.frames_buffer, _stackprof.lines_buffer);
549
- stackprof_record_sample_for_stack(num, timestamp_delta);
616
+
617
+ _stackprof.buffer_count = num;
618
+ _stackprof.buffer_time.timestamp_usec = start_timestamp;
619
+ _stackprof.buffer_time.delta_usec = timestamp_delta;
550
620
  }
551
621
 
552
622
  void
553
- stackprof_record_gc_samples()
623
+ stackprof_record_gc_samples(void)
554
624
  {
555
- int delta_to_first_unrecorded_gc_sample = 0;
556
- int i;
625
+ int64_t delta_to_first_unrecorded_gc_sample = 0;
626
+ uint64_t start_timestamp = 0;
627
+ size_t i;
557
628
  if (_stackprof.raw) {
558
- struct timeval t;
559
- struct timeval diff;
560
- gettimeofday(&t, NULL);
561
- timersub(&t, &_stackprof.last_sample_at, &diff);
629
+ struct timestamp_t t;
630
+ capture_timestamp(&t);
631
+ start_timestamp = timestamp_usec(&t);
562
632
 
563
633
  // We don't know when the GC samples were actually marked, so let's
564
634
  // assume that they were marked at a perfectly regular interval.
565
- delta_to_first_unrecorded_gc_sample = (1000 * diff.tv_sec + diff.tv_usec) - (_stackprof.unrecorded_gc_samples - 1) * NUM2LONG(_stackprof.interval);
635
+ delta_to_first_unrecorded_gc_sample = delta_usec(&_stackprof.last_sample_at, &t) - (_stackprof.unrecorded_gc_samples - 1) * NUM2LONG(_stackprof.interval);
566
636
  if (delta_to_first_unrecorded_gc_sample < 0) {
567
637
  delta_to_first_unrecorded_gc_sample = 0;
568
638
  }
569
639
  }
570
640
 
571
-
572
641
  for (i = 0; i < _stackprof.unrecorded_gc_samples; i++) {
573
- int timestamp_delta = i == 0 ? delta_to_first_unrecorded_gc_sample : NUM2LONG(_stackprof.interval);
642
+ int64_t timestamp_delta = i == 0 ? delta_to_first_unrecorded_gc_sample : NUM2LONG(_stackprof.interval);
574
643
 
575
644
  if (_stackprof.unrecorded_gc_marking_samples) {
576
645
  _stackprof.frames_buffer[0] = FAKE_FRAME_MARK;
@@ -579,7 +648,7 @@ stackprof_record_gc_samples()
579
648
  _stackprof.lines_buffer[1] = 0;
580
649
  _stackprof.unrecorded_gc_marking_samples--;
581
650
 
582
- stackprof_record_sample_for_stack(2, timestamp_delta);
651
+ stackprof_record_sample_for_stack(2, start_timestamp, timestamp_delta);
583
652
  } else if (_stackprof.unrecorded_gc_sweeping_samples) {
584
653
  _stackprof.frames_buffer[0] = FAKE_FRAME_SWEEP;
585
654
  _stackprof.lines_buffer[0] = 0;
@@ -588,11 +657,11 @@ stackprof_record_gc_samples()
588
657
 
589
658
  _stackprof.unrecorded_gc_sweeping_samples--;
590
659
 
591
- stackprof_record_sample_for_stack(2, timestamp_delta);
660
+ stackprof_record_sample_for_stack(2, start_timestamp, timestamp_delta);
592
661
  } else {
593
662
  _stackprof.frames_buffer[0] = FAKE_FRAME_GC;
594
663
  _stackprof.lines_buffer[0] = 0;
595
- stackprof_record_sample_for_stack(1, timestamp_delta);
664
+ stackprof_record_sample_for_stack(1, start_timestamp, timestamp_delta);
596
665
  }
597
666
  }
598
667
  _stackprof.during_gc += _stackprof.unrecorded_gc_samples;
@@ -601,8 +670,25 @@ stackprof_record_gc_samples()
601
670
  _stackprof.unrecorded_gc_sweeping_samples = 0;
602
671
  }
603
672
 
673
+ // record the sample previously buffered by stackprof_buffer_sample
674
+ static void
675
+ stackprof_record_buffer(void)
676
+ {
677
+ stackprof_record_sample_for_stack(_stackprof.buffer_count, _stackprof.buffer_time.timestamp_usec, _stackprof.buffer_time.delta_usec);
678
+
679
+ // reset the buffer
680
+ _stackprof.buffer_count = 0;
681
+ }
682
+
604
683
  static void
605
- stackprof_gc_job_handler(void *data)
684
+ stackprof_sample_and_record(void)
685
+ {
686
+ stackprof_buffer_sample();
687
+ stackprof_record_buffer();
688
+ }
689
+
690
+ static void
691
+ stackprof_job_record_gc(void *data)
606
692
  {
607
693
  if (!_stackprof.running) return;
608
694
 
@@ -610,11 +696,19 @@ stackprof_gc_job_handler(void *data)
610
696
  }
611
697
 
612
698
  static void
613
- stackprof_job_handler(void *data)
699
+ stackprof_job_sample_and_record(void *data)
614
700
  {
615
701
  if (!_stackprof.running) return;
616
702
 
617
- stackprof_record_sample();
703
+ stackprof_sample_and_record();
704
+ }
705
+
706
+ static void
707
+ stackprof_job_record_buffer(void *data)
708
+ {
709
+ if (!_stackprof.running) return;
710
+
711
+ stackprof_record_buffer();
618
712
  }
619
713
 
620
714
  static void
@@ -636,13 +730,17 @@ stackprof_signal_handler(int sig, siginfo_t *sinfo, void *ucontext)
636
730
  _stackprof.unrecorded_gc_sweeping_samples++;
637
731
  }
638
732
  _stackprof.unrecorded_gc_samples++;
639
- rb_postponed_job_register_one(0, stackprof_gc_job_handler, (void*)0);
733
+ rb_postponed_job_register_one(0, stackprof_job_record_gc, (void*)0);
640
734
  } else {
641
- #ifdef USE_POSTPONED_JOB
642
- rb_postponed_job_register_one(0, stackprof_job_handler, (void*)0);
643
- #else
644
- stackprof_job_handler(0);
645
- #endif
735
+ if (stackprof_use_postponed_job) {
736
+ rb_postponed_job_register_one(0, stackprof_job_sample_and_record, (void*)0);
737
+ } else {
738
+ // Buffer a sample immediately, if an existing sample exists this will
739
+ // return immediately
740
+ stackprof_buffer_sample();
741
+ // Enqueue a job to record the sample
742
+ rb_postponed_job_register_one(0, stackprof_job_record_buffer, (void*)0);
743
+ }
646
744
  }
647
745
  pthread_mutex_unlock(&lock);
648
746
  }
@@ -653,7 +751,7 @@ stackprof_newobj_handler(VALUE tpval, void *data)
653
751
  _stackprof.overall_signals++;
654
752
  if (RTEST(_stackprof.interval) && _stackprof.overall_signals % NUM2LONG(_stackprof.interval))
655
753
  return;
656
- stackprof_job_handler(0);
754
+ stackprof_sample_and_record();
657
755
  }
658
756
 
659
757
  static VALUE
@@ -663,7 +761,7 @@ stackprof_sample(VALUE self)
663
761
  return Qfalse;
664
762
 
665
763
  _stackprof.overall_signals++;
666
- stackprof_job_handler(0);
764
+ stackprof_sample_and_record();
667
765
  return Qtrue;
668
766
  }
669
767
 
@@ -720,9 +818,24 @@ stackprof_atfork_child(void)
720
818
  stackprof_stop(rb_mStackProf);
721
819
  }
722
820
 
821
+ static VALUE
822
+ stackprof_use_postponed_job_l(VALUE self)
823
+ {
824
+ stackprof_use_postponed_job = 1;
825
+ return Qnil;
826
+ }
827
+
723
828
  void
724
829
  Init_stackprof(void)
725
830
  {
831
+ /*
832
+ * As of Ruby 3.0, it should be safe to read stack frames at any time, unless YJIT is enabled
833
+ * See https://github.com/ruby/ruby/commit/0e276dc458f94d9d79a0f7c7669bde84abe80f21
834
+ */
835
+ #if RUBY_API_VERSION_MAJOR < 3
836
+ stackprof_use_postponed_job = 0;
837
+ #endif
838
+
726
839
  size_t i;
727
840
  #define S(name) sym_##name = ID2SYM(rb_intern(#name));
728
841
  S(object);
@@ -742,6 +855,7 @@ Init_stackprof(void)
742
855
  S(mode);
743
856
  S(interval);
744
857
  S(raw);
858
+ S(raw_sample_timestamps);
745
859
  S(raw_timestamp_deltas);
746
860
  S(out);
747
861
  S(metadata);
@@ -764,9 +878,9 @@ Init_stackprof(void)
764
878
  _stackprof.raw_samples_capa = 0;
765
879
  _stackprof.raw_sample_index = 0;
766
880
 
767
- _stackprof.raw_timestamp_deltas = NULL;
768
- _stackprof.raw_timestamp_deltas_len = 0;
769
- _stackprof.raw_timestamp_deltas_capa = 0;
881
+ _stackprof.raw_sample_times = NULL;
882
+ _stackprof.raw_sample_times_len = 0;
883
+ _stackprof.raw_sample_times_capa = 0;
770
884
 
771
885
  _stackprof.empty_string = rb_str_new_cstr("");
772
886
  rb_global_variable(&_stackprof.empty_string);
@@ -783,6 +897,7 @@ Init_stackprof(void)
783
897
  rb_define_singleton_method(rb_mStackProf, "stop", stackprof_stop, 0);
784
898
  rb_define_singleton_method(rb_mStackProf, "results", stackprof_results, -1);
785
899
  rb_define_singleton_method(rb_mStackProf, "sample", stackprof_sample, 0);
900
+ rb_define_singleton_method(rb_mStackProf, "use_postponed_job!", stackprof_use_postponed_job_l, 0);
786
901
 
787
902
  pthread_atfork(stackprof_atfork_prepare, stackprof_atfork_parent, stackprof_atfork_child);
788
903
  }
@@ -2,9 +2,43 @@
2
2
 
3
3
  require 'pp'
4
4
  require 'digest/md5'
5
+ require 'json'
5
6
 
6
7
  module StackProf
7
8
  class Report
9
+ MARSHAL_SIGNATURE = "\x04\x08"
10
+
11
+ class << self
12
+ def from_file(file)
13
+ if (content = IO.binread(file)).start_with?(MARSHAL_SIGNATURE)
14
+ new(Marshal.load(content))
15
+ else
16
+ from_json(JSON.parse(content))
17
+ end
18
+ end
19
+
20
+ def from_json(json)
21
+ new(parse_json(json))
22
+ end
23
+
24
+ def parse_json(json)
25
+ json.keys.each do |key|
26
+ value = json.delete(key)
27
+ from_json(value) if value.is_a?(Hash)
28
+
29
+ new_key = case key
30
+ when /\A[0-9]*\z/
31
+ key.to_i
32
+ else
33
+ key.to_sym
34
+ end
35
+
36
+ json[new_key] = value
37
+ end
38
+ json
39
+ end
40
+ end
41
+
8
42
  def initialize(data)
9
43
  @data = data
10
44
  end
@@ -95,51 +129,10 @@ module StackProf
95
129
  print_flamegraph(f, skip_common, true)
96
130
  end
97
131
 
98
- StackCursor = Struct.new(:raw, :idx, :length) do
99
- def weight
100
- @weight ||= raw[1 + idx + length]
101
- end
102
-
103
- def [](i)
104
- if i >= length
105
- nil
106
- else
107
- raw[1 + idx + i]
108
- end
109
- end
110
-
111
- def <=>(other)
112
- i = 0
113
- while i < length && i < other.length
114
- if self[i] != other[i]
115
- return self[i] <=> other[i]
116
- end
117
- i += 1
118
- end
119
-
120
- return length <=> other.length
121
- end
122
- end
123
-
124
132
  def print_flamegraph(f, skip_common, alphabetical=false)
125
133
  raise "profile does not include raw samples (add `raw: true` to collecting StackProf.run)" unless raw = data[:raw]
126
134
 
127
- stacks = []
128
- max_x = 0
129
- max_y = 0
130
-
131
- idx = 0
132
- loop do
133
- len = raw[idx]
134
- break unless len
135
- max_y = len if len > max_y
136
-
137
- stack = StackCursor.new(raw, idx, len)
138
- stacks << stack
139
- max_x += stack.weight
140
-
141
- idx += len + 2
142
- end
135
+ stacks, max_x, max_y = flamegraph_stacks(raw)
143
136
 
144
137
  stacks.sort! if alphabetical
145
138
 
@@ -150,7 +143,7 @@ module StackProf
150
143
  x = 0
151
144
 
152
145
  stacks.each do |stack|
153
- weight = stack.weight
146
+ weight = stack.last
154
147
  cell = stack[y] unless y == stack.length-1
155
148
 
156
149
  if cell.nil?
@@ -191,6 +184,24 @@ module StackProf
191
184
  f.puts '])'
192
185
  end
193
186
 
187
+ def flamegraph_stacks(raw)
188
+ stacks = []
189
+ max_x = 0
190
+ max_y = 0
191
+ idx = 0
192
+
193
+ while len = raw[idx]
194
+ idx += 1
195
+ max_y = len if len > max_y
196
+ stack = raw.slice(idx, len+1)
197
+ idx += len+1
198
+ stacks << stack
199
+ max_x += stack.last
200
+ end
201
+
202
+ return stacks, max_x, max_y
203
+ end
204
+
194
205
  def flamegraph_row(f, x, y, weight, addr)
195
206
  frame = @data[:frames][addr]
196
207
  f.print ',' if @rows_started
@@ -231,15 +242,7 @@ module StackProf
231
242
  def print_d3_flamegraph(f=STDOUT, skip_common=true)
232
243
  raise "profile does not include raw samples (add `raw: true` to collecting StackProf.run)" unless raw = data[:raw]
233
244
 
234
- stacks = []
235
- max_x = 0
236
- max_y = 0
237
- while len = raw.shift
238
- max_y = len if len > max_y
239
- stack = raw.slice!(0, len+1)
240
- stacks << stack
241
- max_x += stack.last
242
- end
245
+ stacks, * = flamegraph_stacks(raw)
243
246
 
244
247
  # d3-flame-grpah supports only alphabetical flamegraph
245
248
  stacks.sort!
data/lib/stackprof.rb CHANGED
@@ -1,7 +1,11 @@
1
1
  require "stackprof/stackprof"
2
2
 
3
+ if defined?(RubyVM::YJIT) && RubyVM::YJIT.enabled?
4
+ StackProf.use_postponed_job!
5
+ end
6
+
3
7
  module StackProf
4
- VERSION = '0.2.17'
8
+ VERSION = '0.2.20'
5
9
  end
6
10
 
7
11
  StackProf.autoload :Report, "stackprof/report.rb"
data/stackprof.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'stackprof'
3
- s.version = '0.2.17'
3
+ s.version = '0.2.20'
4
4
  s.homepage = 'http://github.com/tmm1/stackprof'
5
5
 
6
6
  s.authors = 'Aman Gupta'
@@ -0,0 +1 @@
1
+ {: modeI"cpu:ET
@@ -0,0 +1 @@
1
+ { "mode": "cpu" }
data/test/test_report.rb CHANGED
@@ -32,3 +32,27 @@ class ReportDumpTest < MiniTest::Test
32
32
  assert_equal expected, Marshal.load(marshal_data)
33
33
  end
34
34
  end
35
+
36
+ class ReportReadTest < MiniTest::Test
37
+ require 'pathname'
38
+
39
+ def test_from_file_read_json
40
+ file = fixture("profile.json")
41
+ report = StackProf::Report.from_file(file)
42
+
43
+ assert_equal({ mode: "cpu" }, report.data)
44
+ end
45
+
46
+ def test_from_file_read_marshal
47
+ file = fixture("profile.dump")
48
+ report = StackProf::Report.from_file(file)
49
+
50
+ assert_equal({ mode: "cpu" }, report.data)
51
+ end
52
+
53
+ private
54
+
55
+ def fixture(name)
56
+ Pathname.new(__dir__).join("fixtures", name)
57
+ end
58
+ end
@@ -5,6 +5,10 @@ require 'tempfile'
5
5
  require 'pathname'
6
6
 
7
7
  class StackProfTest < MiniTest::Test
8
+ def setup
9
+ Object.new # warm some caches to avoid flakiness
10
+ end
11
+
8
12
  def test_info
9
13
  profile = StackProf.run{}
10
14
  assert_equal 1.2, profile[:version]
@@ -78,9 +82,14 @@ class StackProfTest < MiniTest::Test
78
82
  end
79
83
 
80
84
  assert_operator profile[:samples], :>=, 1
81
- offset = RUBY_VERSION >= '3' ? 1 : 0
82
- frame = profile[:frames].values[offset]
83
- assert_includes frame[:name], "StackProfTest#math"
85
+ if RUBY_VERSION >= '3'
86
+ assert profile[:frames].values.take(2).map { |f|
87
+ f[:name].include? "StackProfTest#math"
88
+ }.any?
89
+ else
90
+ frame = profile[:frames].values.first
91
+ assert_includes frame[:name], "StackProfTest#math"
92
+ end
84
93
  end
85
94
 
86
95
  def test_walltime
@@ -121,19 +130,38 @@ class StackProfTest < MiniTest::Test
121
130
  end
122
131
 
123
132
  def test_raw
133
+ before_monotonic = Process.clock_gettime(Process::CLOCK_MONOTONIC, :microsecond)
134
+
124
135
  profile = StackProf.run(mode: :custom, raw: true) do
125
136
  10.times do
126
137
  StackProf.sample
138
+ sleep 0.0001
127
139
  end
128
140
  end
129
141
 
142
+ after_monotonic = Process.clock_gettime(Process::CLOCK_MONOTONIC, :microsecond)
143
+
130
144
  raw = profile[:raw]
131
145
  assert_equal 10, raw[-1]
132
146
  assert_equal raw[0] + 2, raw.size
133
147
 
134
148
  offset = RUBY_VERSION >= '3' ? -3 : -2
135
149
  assert_includes profile[:frames][raw[offset]][:name], 'StackProfTest#test_raw'
150
+
151
+ assert_equal 10, profile[:raw_sample_timestamps].size
152
+ profile[:raw_sample_timestamps].each_cons(2) do |t1, t2|
153
+ assert_operator t1, :>, before_monotonic
154
+ assert_operator t2, :>=, t1
155
+ assert_operator t2, :<, after_monotonic
156
+ end
157
+
136
158
  assert_equal 10, profile[:raw_timestamp_deltas].size
159
+ total_duration = after_monotonic - before_monotonic
160
+ assert_operator profile[:raw_timestamp_deltas].inject(&:+), :<, total_duration
161
+
162
+ profile[:raw_timestamp_deltas].each do |delta|
163
+ assert_operator delta, :>, 0
164
+ end
137
165
  end
138
166
 
139
167
  def test_metadata
@@ -205,7 +233,6 @@ class StackProfTest < MiniTest::Test
205
233
  end
206
234
  end
207
235
 
208
- raw = profile[:raw]
209
236
  gc_frame = profile[:frames].values.find{ |f| f[:name] == "(garbage collection)" }
210
237
  marking_frame = profile[:frames].values.find{ |f| f[:name] == "(marking)" }
211
238
  sweeping_frame = profile[:frames].values.find{ |f| f[:name] == "(sweeping)" }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: stackprof
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.17
4
+ version: 0.2.20
5
5
  platform: ruby
6
6
  authors:
7
7
  - Aman Gupta
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-05-03 00:00:00.000000000 Z
11
+ date: 2022-07-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake-compiler
@@ -82,6 +82,8 @@ files:
82
82
  - lib/stackprof/report.rb
83
83
  - sample.rb
84
84
  - stackprof.gemspec
85
+ - test/fixtures/profile.dump
86
+ - test/fixtures/profile.json
85
87
  - test/test_middleware.rb
86
88
  - test/test_report.rb
87
89
  - test/test_stackprof.rb
@@ -94,10 +96,10 @@ licenses:
94
96
  - MIT
95
97
  metadata:
96
98
  bug_tracker_uri: https://github.com/tmm1/stackprof/issues
97
- changelog_uri: https://github.com/tmm1/stackprof/blob/v0.2.17/CHANGELOG.md
98
- documentation_uri: https://www.rubydoc.info/gems/stackprof/0.2.17
99
- source_code_uri: https://github.com/tmm1/stackprof/tree/v0.2.17
100
- post_install_message:
99
+ changelog_uri: https://github.com/tmm1/stackprof/blob/v0.2.20/CHANGELOG.md
100
+ documentation_uri: https://www.rubydoc.info/gems/stackprof/0.2.20
101
+ source_code_uri: https://github.com/tmm1/stackprof/tree/v0.2.20
102
+ post_install_message:
101
103
  rdoc_options: []
102
104
  require_paths:
103
105
  - lib
@@ -112,8 +114,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
112
114
  - !ruby/object:Gem::Version
113
115
  version: '0'
114
116
  requirements: []
115
- rubygems_version: 3.1.2
116
- signing_key:
117
+ rubygems_version: 3.0.3.1
118
+ signing_key:
117
119
  specification_version: 4
118
120
  summary: sampling callstack-profiler for ruby 2.2+
119
121
  test_files: []