stackprof 0.2.17 → 0.2.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b55691b8d1966ba4b2b2458a4908b2a2d5b65f2074dfe3b3b1b6350f752704ec
4
- data.tar.gz: 79e2a0508a1c722f39cc61d39b0577cfb5520669a7a2db4cadac6c49dcb1267a
3
+ metadata.gz: e627bf7fbeca0cb94e8be863b0a1db9160534d5172c1409ba4306e0f16a57ae0
4
+ data.tar.gz: f881ec5ab3267f3b48f57794c3bce910b39f890db32817655b4e5253733fc466
5
5
  SHA512:
6
- metadata.gz: 2fa22779f03c332a3680f526bf1df29553588773fabeb00da327af3525018e535e973bafd990254c6ad50516faf5e8b1d087bb7c208c99d0b512d99ccdef53bb
7
- data.tar.gz: 73ba1328c793b0c0c4657e7826f4bf2cd52102c61a2ca2e3e0b1c5240ffe96ee0ec328ea831b1592c10b4e13c6aec2bb9d28fd05e93ddef999d5131e55124362
6
+ metadata.gz: 514c6697e4465fbce7990fffc2a71ca5aa544e1f526b338fde343c14bc801bb5056534e371e4d8fd5642f1b73b92126b2a5e476eb5292dde23bd0da734d7e236
7
+ data.tar.gz: bed130e196f1004b3e74d2db466732fa8b23e582df3fcf944161112036053e95a5a05a3ed2ac175a6603e1f3b28b6062491aef89f4d30b764be3886aaf586b19
@@ -8,7 +8,7 @@ jobs:
8
8
  strategy:
9
9
  fail-fast: false
10
10
  matrix:
11
- ruby: [ ruby-head, '3.0', '2.7', '2.6', '2.5', '2.4', '2.3', '2.2' ]
11
+ ruby: [ ruby-head, '3.1', '3.0', '2.7', '2.6', '2.5', '2.4', '2.3', '2.2' ]
12
12
  steps:
13
13
  - name: Checkout
14
14
  uses: actions/checkout@v2
data/bin/stackprof CHANGED
@@ -42,7 +42,7 @@ reports = []
42
42
  while ARGV.size > 0
43
43
  begin
44
44
  file = ARGV.pop
45
- reports << StackProf::Report.new(Marshal.load(IO.binread(file)))
45
+ reports << StackProf::Report.from_file(file)
46
46
  rescue TypeError => e
47
47
  STDERR.puts "** error parsing #{file}: #{e.inspect}"
48
48
  end
@@ -7,37 +7,75 @@
7
7
  **********************************************************************/
8
8
 
9
9
  #include <ruby/ruby.h>
10
+ #include <ruby/version.h>
10
11
  #include <ruby/debug.h>
11
12
  #include <ruby/st.h>
12
13
  #include <ruby/io.h>
13
14
  #include <ruby/intern.h>
14
15
  #include <signal.h>
15
16
  #include <sys/time.h>
17
+ #include <time.h>
16
18
  #include <pthread.h>
17
19
 
18
20
  #define BUF_SIZE 2048
19
21
  #define MICROSECONDS_IN_SECOND 1000000
22
+ #define NANOSECONDS_IN_SECOND 1000000000
20
23
 
21
24
  #define FAKE_FRAME_GC INT2FIX(0)
22
25
  #define FAKE_FRAME_MARK INT2FIX(1)
23
26
  #define FAKE_FRAME_SWEEP INT2FIX(2)
24
27
 
25
- /*
26
- * As of Ruby 3.0, it should be safe to read stack frames at any time
27
- * See https://github.com/ruby/ruby/commit/0e276dc458f94d9d79a0f7c7669bde84abe80f21
28
- */
29
- #if RUBY_API_VERSION_MAJOR < 3
30
- #define USE_POSTPONED_JOB
31
- #endif
32
-
33
28
  static const char *fake_frame_cstrs[] = {
34
29
  "(garbage collection)",
35
30
  "(marking)",
36
31
  "(sweeping)",
37
32
  };
38
33
 
34
+ static int stackprof_use_postponed_job = 1;
35
+
39
36
  #define TOTAL_FAKE_FRAMES (sizeof(fake_frame_cstrs) / sizeof(char *))
40
37
 
38
+ #ifdef _POSIX_MONOTONIC_CLOCK
39
+ #define timestamp_t timespec
40
+ typedef struct timestamp_t timestamp_t;
41
+
42
+ static void capture_timestamp(timestamp_t *ts) {
43
+ clock_gettime(CLOCK_MONOTONIC, ts);
44
+ }
45
+
46
+ static int64_t delta_usec(timestamp_t *start, timestamp_t *end) {
47
+ int64_t result = MICROSECONDS_IN_SECOND * (end->tv_sec - start->tv_sec);
48
+ if (end->tv_nsec < start->tv_nsec) {
49
+ result -= MICROSECONDS_IN_SECOND;
50
+ result += (NANOSECONDS_IN_SECOND + end->tv_nsec - start->tv_nsec) / 1000;
51
+ } else {
52
+ result += (end->tv_nsec - start->tv_nsec) / 1000;
53
+ }
54
+ return result;
55
+ }
56
+
57
+ static uint64_t timestamp_usec(timestamp_t *ts) {
58
+ return (MICROSECONDS_IN_SECOND * ts->tv_sec) + (ts->tv_nsec / 1000);
59
+ }
60
+ #else
61
+ #define timestamp_t timeval
62
+ typedef struct timestamp_t timestamp_t;
63
+
64
+ static void capture_timestamp(timestamp_t *ts) {
65
+ gettimeofday(ts, NULL);
66
+ }
67
+
68
+ static int64_t delta_usec(timestamp_t *start, timestamp_t *end) {
69
+ struct timeval diff;
70
+ timersub(end, start, &diff);
71
+ return (MICROSECONDS_IN_SECOND * diff.tv_sec) + diff.tv_usec;
72
+ }
73
+
74
+ static uint64_t timestamp_usec(timestamp_t *ts) {
75
+ return (MICROSECONDS_IN_SECOND * ts.tv_sec) + diff.tv_usec
76
+ }
77
+ #endif
78
+
41
79
  typedef struct {
42
80
  size_t total_samples;
43
81
  size_t caller_samples;
@@ -46,6 +84,11 @@ typedef struct {
46
84
  st_table *lines;
47
85
  } frame_data_t;
48
86
 
87
+ typedef struct {
88
+ uint64_t timestamp_usec;
89
+ int64_t delta_usec;
90
+ } sample_time_t;
91
+
49
92
  static struct {
50
93
  int running;
51
94
  int raw;
@@ -62,10 +105,10 @@ static struct {
62
105
  size_t raw_samples_capa;
63
106
  size_t raw_sample_index;
64
107
 
65
- struct timeval last_sample_at;
66
- int *raw_timestamp_deltas;
67
- size_t raw_timestamp_deltas_len;
68
- size_t raw_timestamp_deltas_capa;
108
+ struct timestamp_t last_sample_at;
109
+ sample_time_t *raw_sample_times;
110
+ size_t raw_sample_times_len;
111
+ size_t raw_sample_times_capa;
69
112
 
70
113
  size_t overall_signals;
71
114
  size_t overall_samples;
@@ -77,6 +120,9 @@ static struct {
77
120
 
78
121
  VALUE fake_frame_names[TOTAL_FAKE_FRAMES];
79
122
  VALUE empty_string;
123
+
124
+ int buffer_count;
125
+ sample_time_t buffer_time;
80
126
  VALUE frames_buffer[BUF_SIZE];
81
127
  int lines_buffer[BUF_SIZE];
82
128
  } _stackprof;
@@ -84,7 +130,7 @@ static struct {
84
130
  static VALUE sym_object, sym_wall, sym_cpu, sym_custom, sym_name, sym_file, sym_line;
85
131
  static VALUE sym_samples, sym_total_samples, sym_missed_samples, sym_edges, sym_lines;
86
132
  static VALUE sym_version, sym_mode, sym_interval, sym_raw, sym_metadata, sym_frames, sym_ignore_gc, sym_out;
87
- static VALUE sym_aggregate, sym_raw_timestamp_deltas, sym_state, sym_marking, sym_sweeping;
133
+ static VALUE sym_aggregate, sym_raw_sample_timestamps, sym_raw_timestamp_deltas, sym_state, sym_marking, sym_sweeping;
88
134
  static VALUE sym_gc_samples, objtracer;
89
135
  static VALUE gc_hook;
90
136
  static VALUE rb_mStackProf;
@@ -174,7 +220,7 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
174
220
  _stackprof.out = out;
175
221
 
176
222
  if (raw) {
177
- gettimeofday(&_stackprof.last_sample_at, NULL);
223
+ capture_timestamp(&_stackprof.last_sample_at);
178
224
  }
179
225
 
180
226
  return Qtrue;
@@ -209,13 +255,19 @@ stackprof_stop(VALUE self)
209
255
  return Qtrue;
210
256
  }
211
257
 
258
+ #if SIZEOF_VOIDP == SIZEOF_LONG
259
+ # define PTR2NUM(x) (LONG2NUM((long)(x)))
260
+ #else
261
+ # define PTR2NUM(x) (LL2NUM((LONG_LONG)(x)))
262
+ #endif
263
+
212
264
  static int
213
265
  frame_edges_i(st_data_t key, st_data_t val, st_data_t arg)
214
266
  {
215
267
  VALUE edges = (VALUE)arg;
216
268
 
217
269
  intptr_t weight = (intptr_t)val;
218
- rb_hash_aset(edges, rb_obj_id((VALUE)key), INT2FIX(weight));
270
+ rb_hash_aset(edges, PTR2NUM(key), INT2FIX(weight));
219
271
  return ST_CONTINUE;
220
272
  }
221
273
 
@@ -242,7 +294,7 @@ frame_i(st_data_t key, st_data_t val, st_data_t arg)
242
294
  VALUE name, file, edges, lines;
243
295
  VALUE line;
244
296
 
245
- rb_hash_aset(results, rb_obj_id(frame), details);
297
+ rb_hash_aset(results, PTR2NUM(frame), details);
246
298
 
247
299
  if (FIXNUM_P(frame)) {
248
300
  name = _stackprof.fake_frame_names[FIX2INT(frame)];
@@ -314,7 +366,7 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
314
366
 
315
367
  if (_stackprof.raw && _stackprof.raw_samples_len) {
316
368
  size_t len, n, o;
317
- VALUE raw_timestamp_deltas;
369
+ VALUE raw_sample_timestamps, raw_timestamp_deltas;
318
370
  VALUE raw_samples = rb_ary_new_capa(_stackprof.raw_samples_len);
319
371
 
320
372
  for (n = 0; n < _stackprof.raw_samples_len; n++) {
@@ -322,7 +374,7 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
322
374
  rb_ary_push(raw_samples, SIZET2NUM(len));
323
375
 
324
376
  for (o = 0, n++; o < len; n++, o++)
325
- rb_ary_push(raw_samples, rb_obj_id(_stackprof.raw_samples[n]));
377
+ rb_ary_push(raw_samples, PTR2NUM(_stackprof.raw_samples[n]));
326
378
  rb_ary_push(raw_samples, SIZET2NUM((size_t)_stackprof.raw_samples[n]));
327
379
  }
328
380
 
@@ -334,17 +386,20 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
334
386
 
335
387
  rb_hash_aset(results, sym_raw, raw_samples);
336
388
 
337
- raw_timestamp_deltas = rb_ary_new_capa(_stackprof.raw_timestamp_deltas_len);
389
+ raw_sample_timestamps = rb_ary_new_capa(_stackprof.raw_sample_times_len);
390
+ raw_timestamp_deltas = rb_ary_new_capa(_stackprof.raw_sample_times_len);
338
391
 
339
- for (n = 0; n < _stackprof.raw_timestamp_deltas_len; n++) {
340
- rb_ary_push(raw_timestamp_deltas, INT2FIX(_stackprof.raw_timestamp_deltas[n]));
392
+ for (n = 0; n < _stackprof.raw_sample_times_len; n++) {
393
+ rb_ary_push(raw_sample_timestamps, ULL2NUM(_stackprof.raw_sample_times[n].timestamp_usec));
394
+ rb_ary_push(raw_timestamp_deltas, LL2NUM(_stackprof.raw_sample_times[n].delta_usec));
341
395
  }
342
396
 
343
- free(_stackprof.raw_timestamp_deltas);
344
- _stackprof.raw_timestamp_deltas = NULL;
345
- _stackprof.raw_timestamp_deltas_len = 0;
346
- _stackprof.raw_timestamp_deltas_capa = 0;
397
+ free(_stackprof.raw_sample_times);
398
+ _stackprof.raw_sample_times = NULL;
399
+ _stackprof.raw_sample_times_len = 0;
400
+ _stackprof.raw_sample_times_capa = 0;
347
401
 
402
+ rb_hash_aset(results, sym_raw_sample_timestamps, raw_sample_timestamps);
348
403
  rb_hash_aset(results, sym_raw_timestamp_deltas, raw_timestamp_deltas);
349
404
 
350
405
  _stackprof.raw = 0;
@@ -424,14 +479,14 @@ st_numtable_increment(st_table *table, st_data_t key, size_t increment)
424
479
  }
425
480
 
426
481
  void
427
- stackprof_record_sample_for_stack(int num, int timestamp_delta)
482
+ stackprof_record_sample_for_stack(int num, uint64_t sample_timestamp, int64_t timestamp_delta)
428
483
  {
429
484
  int i, n;
430
485
  VALUE prev_frame = Qnil;
431
486
 
432
487
  _stackprof.overall_samples++;
433
488
 
434
- if (_stackprof.raw) {
489
+ if (_stackprof.raw && num > 0) {
435
490
  int found = 0;
436
491
 
437
492
  /* If there's no sample buffer allocated, then allocate one. The buffer
@@ -483,20 +538,23 @@ stackprof_record_sample_for_stack(int num, int timestamp_delta)
483
538
  }
484
539
 
485
540
  /* If there's no timestamp delta buffer, allocate one */
486
- if (!_stackprof.raw_timestamp_deltas) {
487
- _stackprof.raw_timestamp_deltas_capa = 100;
488
- _stackprof.raw_timestamp_deltas = malloc(sizeof(int) * _stackprof.raw_timestamp_deltas_capa);
489
- _stackprof.raw_timestamp_deltas_len = 0;
541
+ if (!_stackprof.raw_sample_times) {
542
+ _stackprof.raw_sample_times_capa = 100;
543
+ _stackprof.raw_sample_times = malloc(sizeof(sample_time_t) * _stackprof.raw_sample_times_capa);
544
+ _stackprof.raw_sample_times_len = 0;
490
545
  }
491
546
 
492
547
  /* Double the buffer size if it's too small */
493
- while (_stackprof.raw_timestamp_deltas_capa <= _stackprof.raw_timestamp_deltas_len + 1) {
494
- _stackprof.raw_timestamp_deltas_capa *= 2;
495
- _stackprof.raw_timestamp_deltas = realloc(_stackprof.raw_timestamp_deltas, sizeof(int) * _stackprof.raw_timestamp_deltas_capa);
548
+ while (_stackprof.raw_sample_times_capa <= _stackprof.raw_sample_times_len + 1) {
549
+ _stackprof.raw_sample_times_capa *= 2;
550
+ _stackprof.raw_sample_times = realloc(_stackprof.raw_sample_times, sizeof(sample_time_t) * _stackprof.raw_sample_times_capa);
496
551
  }
497
552
 
498
- /* Store the time delta (which is the amount of time between samples) */
499
- _stackprof.raw_timestamp_deltas[_stackprof.raw_timestamp_deltas_len++] = timestamp_delta;
553
+ /* Store the time delta (which is the amount of microseconds between samples). */
554
+ _stackprof.raw_sample_times[_stackprof.raw_sample_times_len++] = (sample_time_t) {
555
+ .timestamp_usec = sample_timestamp,
556
+ .delta_usec = timestamp_delta,
557
+ };
500
558
  }
501
559
 
502
560
  for (i = 0; i < num; i++) {
@@ -529,48 +587,59 @@ stackprof_record_sample_for_stack(int num, int timestamp_delta)
529
587
  }
530
588
 
531
589
  if (_stackprof.raw) {
532
- gettimeofday(&_stackprof.last_sample_at, NULL);
590
+ capture_timestamp(&_stackprof.last_sample_at);
533
591
  }
534
592
  }
535
593
 
594
+ // buffer the current profile frames
595
+ // This must be async-signal-safe
596
+ // Returns immediately if another set of frames are already in the buffer
536
597
  void
537
- stackprof_record_sample()
598
+ stackprof_buffer_sample(void)
538
599
  {
539
- int timestamp_delta = 0;
600
+ if (_stackprof.buffer_count > 0) {
601
+ // Another sample is already pending
602
+ return;
603
+ }
604
+
605
+ uint64_t start_timestamp = 0;
606
+ int64_t timestamp_delta = 0;
540
607
  int num;
541
608
  if (_stackprof.raw) {
542
- struct timeval t;
543
- struct timeval diff;
544
- gettimeofday(&t, NULL);
545
- timersub(&t, &_stackprof.last_sample_at, &diff);
546
- timestamp_delta = (1000 * diff.tv_sec) + diff.tv_usec;
609
+ struct timestamp_t t;
610
+ capture_timestamp(&t);
611
+ start_timestamp = timestamp_usec(&t);
612
+ timestamp_delta = delta_usec(&_stackprof.last_sample_at, &t);
547
613
  }
614
+
548
615
  num = rb_profile_frames(0, sizeof(_stackprof.frames_buffer) / sizeof(VALUE), _stackprof.frames_buffer, _stackprof.lines_buffer);
549
- stackprof_record_sample_for_stack(num, timestamp_delta);
616
+
617
+ _stackprof.buffer_count = num;
618
+ _stackprof.buffer_time.timestamp_usec = start_timestamp;
619
+ _stackprof.buffer_time.delta_usec = timestamp_delta;
550
620
  }
551
621
 
552
622
  void
553
- stackprof_record_gc_samples()
623
+ stackprof_record_gc_samples(void)
554
624
  {
555
- int delta_to_first_unrecorded_gc_sample = 0;
556
- int i;
625
+ int64_t delta_to_first_unrecorded_gc_sample = 0;
626
+ uint64_t start_timestamp = 0;
627
+ size_t i;
557
628
  if (_stackprof.raw) {
558
- struct timeval t;
559
- struct timeval diff;
560
- gettimeofday(&t, NULL);
561
- timersub(&t, &_stackprof.last_sample_at, &diff);
629
+ struct timestamp_t t;
630
+ capture_timestamp(&t);
631
+ start_timestamp = timestamp_usec(&t);
562
632
 
563
633
  // We don't know when the GC samples were actually marked, so let's
564
634
  // assume that they were marked at a perfectly regular interval.
565
- delta_to_first_unrecorded_gc_sample = (1000 * diff.tv_sec + diff.tv_usec) - (_stackprof.unrecorded_gc_samples - 1) * NUM2LONG(_stackprof.interval);
635
+ delta_to_first_unrecorded_gc_sample = delta_usec(&_stackprof.last_sample_at, &t) - (_stackprof.unrecorded_gc_samples - 1) * NUM2LONG(_stackprof.interval);
566
636
  if (delta_to_first_unrecorded_gc_sample < 0) {
567
637
  delta_to_first_unrecorded_gc_sample = 0;
568
638
  }
569
639
  }
570
640
 
571
-
572
641
  for (i = 0; i < _stackprof.unrecorded_gc_samples; i++) {
573
- int timestamp_delta = i == 0 ? delta_to_first_unrecorded_gc_sample : NUM2LONG(_stackprof.interval);
642
+ int64_t timestamp_delta = i == 0 ? delta_to_first_unrecorded_gc_sample : NUM2LONG(_stackprof.interval);
574
643
 
575
644
  if (_stackprof.unrecorded_gc_marking_samples) {
576
645
  _stackprof.frames_buffer[0] = FAKE_FRAME_MARK;
@@ -579,7 +648,7 @@ stackprof_record_gc_samples()
579
648
  _stackprof.lines_buffer[1] = 0;
580
649
  _stackprof.unrecorded_gc_marking_samples--;
581
650
 
582
- stackprof_record_sample_for_stack(2, timestamp_delta);
651
+ stackprof_record_sample_for_stack(2, start_timestamp, timestamp_delta);
583
652
  } else if (_stackprof.unrecorded_gc_sweeping_samples) {
584
653
  _stackprof.frames_buffer[0] = FAKE_FRAME_SWEEP;
585
654
  _stackprof.lines_buffer[0] = 0;
@@ -588,11 +657,11 @@ stackprof_record_gc_samples()
588
657
 
589
658
  _stackprof.unrecorded_gc_sweeping_samples--;
590
659
 
591
- stackprof_record_sample_for_stack(2, timestamp_delta);
660
+ stackprof_record_sample_for_stack(2, start_timestamp, timestamp_delta);
592
661
  } else {
593
662
  _stackprof.frames_buffer[0] = FAKE_FRAME_GC;
594
663
  _stackprof.lines_buffer[0] = 0;
595
- stackprof_record_sample_for_stack(1, timestamp_delta);
664
+ stackprof_record_sample_for_stack(1, start_timestamp, timestamp_delta);
596
665
  }
597
666
  }
598
667
  _stackprof.during_gc += _stackprof.unrecorded_gc_samples;
@@ -601,8 +670,25 @@ stackprof_record_gc_samples()
601
670
  _stackprof.unrecorded_gc_sweeping_samples = 0;
602
671
  }
603
672
 
673
+ // record the sample previously buffered by stackprof_buffer_sample
674
+ static void
675
+ stackprof_record_buffer(void)
676
+ {
677
+ stackprof_record_sample_for_stack(_stackprof.buffer_count, _stackprof.buffer_time.timestamp_usec, _stackprof.buffer_time.delta_usec);
678
+
679
+ // reset the buffer
680
+ _stackprof.buffer_count = 0;
681
+ }
682
+
604
683
  static void
605
- stackprof_gc_job_handler(void *data)
684
+ stackprof_sample_and_record(void)
685
+ {
686
+ stackprof_buffer_sample();
687
+ stackprof_record_buffer();
688
+ }
689
+
690
+ static void
691
+ stackprof_job_record_gc(void *data)
606
692
  {
607
693
  if (!_stackprof.running) return;
608
694
 
@@ -610,11 +696,19 @@ stackprof_gc_job_handler(void *data)
610
696
  }
611
697
 
612
698
  static void
613
- stackprof_job_handler(void *data)
699
+ stackprof_job_sample_and_record(void *data)
614
700
  {
615
701
  if (!_stackprof.running) return;
616
702
 
617
- stackprof_record_sample();
703
+ stackprof_sample_and_record();
704
+ }
705
+
706
+ static void
707
+ stackprof_job_record_buffer(void *data)
708
+ {
709
+ if (!_stackprof.running) return;
710
+
711
+ stackprof_record_buffer();
618
712
  }
619
713
 
620
714
  static void
@@ -636,13 +730,17 @@ stackprof_signal_handler(int sig, siginfo_t *sinfo, void *ucontext)
636
730
  _stackprof.unrecorded_gc_sweeping_samples++;
637
731
  }
638
732
  _stackprof.unrecorded_gc_samples++;
639
- rb_postponed_job_register_one(0, stackprof_gc_job_handler, (void*)0);
733
+ rb_postponed_job_register_one(0, stackprof_job_record_gc, (void*)0);
640
734
  } else {
641
- #ifdef USE_POSTPONED_JOB
642
- rb_postponed_job_register_one(0, stackprof_job_handler, (void*)0);
643
- #else
644
- stackprof_job_handler(0);
645
- #endif
735
+ if (stackprof_use_postponed_job) {
736
+ rb_postponed_job_register_one(0, stackprof_job_sample_and_record, (void*)0);
737
+ } else {
738
+ // Buffer a sample immediately, if an existing sample exists this will
739
+ // return immediately
740
+ stackprof_buffer_sample();
741
+ // Enqueue a job to record the sample
742
+ rb_postponed_job_register_one(0, stackprof_job_record_buffer, (void*)0);
743
+ }
646
744
  }
647
745
  pthread_mutex_unlock(&lock);
648
746
  }
@@ -653,7 +751,7 @@ stackprof_newobj_handler(VALUE tpval, void *data)
653
751
  _stackprof.overall_signals++;
654
752
  if (RTEST(_stackprof.interval) && _stackprof.overall_signals % NUM2LONG(_stackprof.interval))
655
753
  return;
656
- stackprof_job_handler(0);
754
+ stackprof_sample_and_record();
657
755
  }
658
756
 
659
757
  static VALUE
@@ -663,7 +761,7 @@ stackprof_sample(VALUE self)
663
761
  return Qfalse;
664
762
 
665
763
  _stackprof.overall_signals++;
666
- stackprof_job_handler(0);
764
+ stackprof_sample_and_record();
667
765
  return Qtrue;
668
766
  }
669
767
 
@@ -720,9 +818,24 @@ stackprof_atfork_child(void)
720
818
  stackprof_stop(rb_mStackProf);
721
819
  }
722
820
 
821
+ static VALUE
822
+ stackprof_use_postponed_job_l(VALUE self)
823
+ {
824
+ stackprof_use_postponed_job = 1;
825
+ return Qnil;
826
+ }
827
+
723
828
  void
724
829
  Init_stackprof(void)
725
830
  {
831
+ /*
832
+ * As of Ruby 3.0, it should be safe to read stack frames at any time, unless YJIT is enabled
833
+ * See https://github.com/ruby/ruby/commit/0e276dc458f94d9d79a0f7c7669bde84abe80f21
834
+ */
835
+ #if RUBY_API_VERSION_MAJOR < 3
836
+ stackprof_use_postponed_job = 0;
837
+ #endif
838
+
726
839
  size_t i;
727
840
  #define S(name) sym_##name = ID2SYM(rb_intern(#name));
728
841
  S(object);
@@ -742,6 +855,7 @@ Init_stackprof(void)
742
855
  S(mode);
743
856
  S(interval);
744
857
  S(raw);
858
+ S(raw_sample_timestamps);
745
859
  S(raw_timestamp_deltas);
746
860
  S(out);
747
861
  S(metadata);
@@ -764,9 +878,9 @@ Init_stackprof(void)
764
878
  _stackprof.raw_samples_capa = 0;
765
879
  _stackprof.raw_sample_index = 0;
766
880
 
767
- _stackprof.raw_timestamp_deltas = NULL;
768
- _stackprof.raw_timestamp_deltas_len = 0;
769
- _stackprof.raw_timestamp_deltas_capa = 0;
881
+ _stackprof.raw_sample_times = NULL;
882
+ _stackprof.raw_sample_times_len = 0;
883
+ _stackprof.raw_sample_times_capa = 0;
770
884
 
771
885
  _stackprof.empty_string = rb_str_new_cstr("");
772
886
  rb_global_variable(&_stackprof.empty_string);
@@ -783,6 +897,7 @@ Init_stackprof(void)
783
897
  rb_define_singleton_method(rb_mStackProf, "stop", stackprof_stop, 0);
784
898
  rb_define_singleton_method(rb_mStackProf, "results", stackprof_results, -1);
785
899
  rb_define_singleton_method(rb_mStackProf, "sample", stackprof_sample, 0);
900
+ rb_define_singleton_method(rb_mStackProf, "use_postponed_job!", stackprof_use_postponed_job_l, 0);
786
901
 
787
902
  pthread_atfork(stackprof_atfork_prepare, stackprof_atfork_parent, stackprof_atfork_child);
788
903
  }
@@ -2,9 +2,43 @@
2
2
 
3
3
  require 'pp'
4
4
  require 'digest/md5'
5
+ require 'json'
5
6
 
6
7
  module StackProf
7
8
  class Report
9
+ MARSHAL_SIGNATURE = "\x04\x08"
10
+
11
+ class << self
12
+ def from_file(file)
13
+ if (content = IO.binread(file)).start_with?(MARSHAL_SIGNATURE)
14
+ new(Marshal.load(content))
15
+ else
16
+ from_json(JSON.parse(content))
17
+ end
18
+ end
19
+
20
+ def from_json(json)
21
+ new(parse_json(json))
22
+ end
23
+
24
+ def parse_json(json)
25
+ json.keys.each do |key|
26
+ value = json.delete(key)
27
+ from_json(value) if value.is_a?(Hash)
28
+
29
+ new_key = case key
30
+ when /\A[0-9]*\z/
31
+ key.to_i
32
+ else
33
+ key.to_sym
34
+ end
35
+
36
+ json[new_key] = value
37
+ end
38
+ json
39
+ end
40
+ end
41
+
8
42
  def initialize(data)
9
43
  @data = data
10
44
  end
@@ -95,51 +129,10 @@ module StackProf
95
129
  print_flamegraph(f, skip_common, true)
96
130
  end
97
131
 
98
- StackCursor = Struct.new(:raw, :idx, :length) do
99
- def weight
100
- @weight ||= raw[1 + idx + length]
101
- end
102
-
103
- def [](i)
104
- if i >= length
105
- nil
106
- else
107
- raw[1 + idx + i]
108
- end
109
- end
110
-
111
- def <=>(other)
112
- i = 0
113
- while i < length && i < other.length
114
- if self[i] != other[i]
115
- return self[i] <=> other[i]
116
- end
117
- i += 1
118
- end
119
-
120
- return length <=> other.length
121
- end
122
- end
123
-
124
132
  def print_flamegraph(f, skip_common, alphabetical=false)
125
133
  raise "profile does not include raw samples (add `raw: true` to collecting StackProf.run)" unless raw = data[:raw]
126
134
 
127
- stacks = []
128
- max_x = 0
129
- max_y = 0
130
-
131
- idx = 0
132
- loop do
133
- len = raw[idx]
134
- break unless len
135
- max_y = len if len > max_y
136
-
137
- stack = StackCursor.new(raw, idx, len)
138
- stacks << stack
139
- max_x += stack.weight
140
-
141
- idx += len + 2
142
- end
135
+ stacks, max_x, max_y = flamegraph_stacks(raw)
143
136
 
144
137
  stacks.sort! if alphabetical
145
138
 
@@ -150,7 +143,7 @@ module StackProf
150
143
  x = 0
151
144
 
152
145
  stacks.each do |stack|
153
- weight = stack.weight
146
+ weight = stack.last
154
147
  cell = stack[y] unless y == stack.length-1
155
148
 
156
149
  if cell.nil?
@@ -191,6 +184,24 @@ module StackProf
191
184
  f.puts '])'
192
185
  end
193
186
 
187
+ def flamegraph_stacks(raw)
188
+ stacks = []
189
+ max_x = 0
190
+ max_y = 0
191
+ idx = 0
192
+
193
+ while len = raw[idx]
194
+ idx += 1
195
+ max_y = len if len > max_y
196
+ stack = raw.slice(idx, len+1)
197
+ idx += len+1
198
+ stacks << stack
199
+ max_x += stack.last
200
+ end
201
+
202
+ return stacks, max_x, max_y
203
+ end
204
+
194
205
  def flamegraph_row(f, x, y, weight, addr)
195
206
  frame = @data[:frames][addr]
196
207
  f.print ',' if @rows_started
@@ -231,15 +242,7 @@ module StackProf
231
242
  def print_d3_flamegraph(f=STDOUT, skip_common=true)
232
243
  raise "profile does not include raw samples (add `raw: true` to collecting StackProf.run)" unless raw = data[:raw]
233
244
 
234
- stacks = []
235
- max_x = 0
236
- max_y = 0
237
- while len = raw.shift
238
- max_y = len if len > max_y
239
- stack = raw.slice!(0, len+1)
240
- stacks << stack
241
- max_x += stack.last
242
- end
245
+ stacks, * = flamegraph_stacks(raw)
243
246
 
244
247
  # d3-flame-grpah supports only alphabetical flamegraph
245
248
  stacks.sort!
data/lib/stackprof.rb CHANGED
@@ -1,7 +1,11 @@
1
1
  require "stackprof/stackprof"
2
2
 
3
+ if defined?(RubyVM::YJIT) && RubyVM::YJIT.enabled?
4
+ StackProf.use_postponed_job!
5
+ end
6
+
3
7
  module StackProf
4
- VERSION = '0.2.17'
8
+ VERSION = '0.2.20'
5
9
  end
6
10
 
7
11
  StackProf.autoload :Report, "stackprof/report.rb"
data/stackprof.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'stackprof'
3
- s.version = '0.2.17'
3
+ s.version = '0.2.20'
4
4
  s.homepage = 'http://github.com/tmm1/stackprof'
5
5
 
6
6
  s.authors = 'Aman Gupta'
@@ -0,0 +1 @@
1
+ {: modeI"cpu:ET
@@ -0,0 +1 @@
1
+ { "mode": "cpu" }
data/test/test_report.rb CHANGED
@@ -32,3 +32,27 @@ class ReportDumpTest < MiniTest::Test
32
32
  assert_equal expected, Marshal.load(marshal_data)
33
33
  end
34
34
  end
35
+
36
+ class ReportReadTest < MiniTest::Test
37
+ require 'pathname'
38
+
39
+ def test_from_file_read_json
40
+ file = fixture("profile.json")
41
+ report = StackProf::Report.from_file(file)
42
+
43
+ assert_equal({ mode: "cpu" }, report.data)
44
+ end
45
+
46
+ def test_from_file_read_marshal
47
+ file = fixture("profile.dump")
48
+ report = StackProf::Report.from_file(file)
49
+
50
+ assert_equal({ mode: "cpu" }, report.data)
51
+ end
52
+
53
+ private
54
+
55
+ def fixture(name)
56
+ Pathname.new(__dir__).join("fixtures", name)
57
+ end
58
+ end
@@ -5,6 +5,10 @@ require 'tempfile'
5
5
  require 'pathname'
6
6
 
7
7
  class StackProfTest < MiniTest::Test
8
+ def setup
9
+ Object.new # warm some caches to avoid flakiness
10
+ end
11
+
8
12
  def test_info
9
13
  profile = StackProf.run{}
10
14
  assert_equal 1.2, profile[:version]
@@ -78,9 +82,14 @@ class StackProfTest < MiniTest::Test
78
82
  end
79
83
 
80
84
  assert_operator profile[:samples], :>=, 1
81
- offset = RUBY_VERSION >= '3' ? 1 : 0
82
- frame = profile[:frames].values[offset]
83
- assert_includes frame[:name], "StackProfTest#math"
85
+ if RUBY_VERSION >= '3'
86
+ assert profile[:frames].values.take(2).map { |f|
87
+ f[:name].include? "StackProfTest#math"
88
+ }.any?
89
+ else
90
+ frame = profile[:frames].values.first
91
+ assert_includes frame[:name], "StackProfTest#math"
92
+ end
84
93
  end
85
94
 
86
95
  def test_walltime
@@ -121,19 +130,38 @@ class StackProfTest < MiniTest::Test
121
130
  end
122
131
 
123
132
  def test_raw
133
+ before_monotonic = Process.clock_gettime(Process::CLOCK_MONOTONIC, :microsecond)
134
+
124
135
  profile = StackProf.run(mode: :custom, raw: true) do
125
136
  10.times do
126
137
  StackProf.sample
138
+ sleep 0.0001
127
139
  end
128
140
  end
129
141
 
142
+ after_monotonic = Process.clock_gettime(Process::CLOCK_MONOTONIC, :microsecond)
143
+
130
144
  raw = profile[:raw]
131
145
  assert_equal 10, raw[-1]
132
146
  assert_equal raw[0] + 2, raw.size
133
147
 
134
148
  offset = RUBY_VERSION >= '3' ? -3 : -2
135
149
  assert_includes profile[:frames][raw[offset]][:name], 'StackProfTest#test_raw'
150
+
151
+ assert_equal 10, profile[:raw_sample_timestamps].size
152
+ profile[:raw_sample_timestamps].each_cons(2) do |t1, t2|
153
+ assert_operator t1, :>, before_monotonic
154
+ assert_operator t2, :>=, t1
155
+ assert_operator t2, :<, after_monotonic
156
+ end
157
+
136
158
  assert_equal 10, profile[:raw_timestamp_deltas].size
159
+ total_duration = after_monotonic - before_monotonic
160
+ assert_operator profile[:raw_timestamp_deltas].inject(&:+), :<, total_duration
161
+
162
+ profile[:raw_timestamp_deltas].each do |delta|
163
+ assert_operator delta, :>, 0
164
+ end
137
165
  end
138
166
 
139
167
  def test_metadata
@@ -205,7 +233,6 @@ class StackProfTest < MiniTest::Test
205
233
  end
206
234
  end
207
235
 
208
- raw = profile[:raw]
209
236
  gc_frame = profile[:frames].values.find{ |f| f[:name] == "(garbage collection)" }
210
237
  marking_frame = profile[:frames].values.find{ |f| f[:name] == "(marking)" }
211
238
  sweeping_frame = profile[:frames].values.find{ |f| f[:name] == "(sweeping)" }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: stackprof
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.17
4
+ version: 0.2.20
5
5
  platform: ruby
6
6
  authors:
7
7
  - Aman Gupta
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-05-03 00:00:00.000000000 Z
11
+ date: 2022-07-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake-compiler
@@ -82,6 +82,8 @@ files:
82
82
  - lib/stackprof/report.rb
83
83
  - sample.rb
84
84
  - stackprof.gemspec
85
+ - test/fixtures/profile.dump
86
+ - test/fixtures/profile.json
85
87
  - test/test_middleware.rb
86
88
  - test/test_report.rb
87
89
  - test/test_stackprof.rb
@@ -94,10 +96,10 @@ licenses:
94
96
  - MIT
95
97
  metadata:
96
98
  bug_tracker_uri: https://github.com/tmm1/stackprof/issues
97
- changelog_uri: https://github.com/tmm1/stackprof/blob/v0.2.17/CHANGELOG.md
98
- documentation_uri: https://www.rubydoc.info/gems/stackprof/0.2.17
99
- source_code_uri: https://github.com/tmm1/stackprof/tree/v0.2.17
100
- post_install_message:
99
+ changelog_uri: https://github.com/tmm1/stackprof/blob/v0.2.20/CHANGELOG.md
100
+ documentation_uri: https://www.rubydoc.info/gems/stackprof/0.2.20
101
+ source_code_uri: https://github.com/tmm1/stackprof/tree/v0.2.20
102
+ post_install_message:
101
103
  rdoc_options: []
102
104
  require_paths:
103
105
  - lib
@@ -112,8 +114,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
112
114
  - !ruby/object:Gem::Version
113
115
  version: '0'
114
116
  requirements: []
115
- rubygems_version: 3.1.2
116
- signing_key:
117
+ rubygems_version: 3.0.3.1
118
+ signing_key:
117
119
  specification_version: 4
118
120
  summary: sampling callstack-profiler for ruby 2.2+
119
121
  test_files: []