stackprof 0.2.14 → 0.2.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,20 +7,32 @@
7
7
  **********************************************************************/
8
8
 
9
9
  #include <ruby/ruby.h>
10
+ #include <ruby/version.h>
10
11
  #include <ruby/debug.h>
11
12
  #include <ruby/st.h>
12
13
  #include <ruby/io.h>
13
14
  #include <ruby/intern.h>
14
15
  #include <signal.h>
15
16
  #include <sys/time.h>
17
+ #include <time.h>
16
18
  #include <pthread.h>
17
19
 
18
20
  #define BUF_SIZE 2048
21
+ #define MICROSECONDS_IN_SECOND 1000000
22
+ #define NANOSECONDS_IN_SECOND 1000000000
19
23
 
20
24
  #define FAKE_FRAME_GC INT2FIX(0)
21
25
  #define FAKE_FRAME_MARK INT2FIX(1)
22
26
  #define FAKE_FRAME_SWEEP INT2FIX(2)
23
27
 
28
+ /*
29
+ * As of Ruby 3.0, it should be safe to read stack frames at any time
30
+ * See https://github.com/ruby/ruby/commit/0e276dc458f94d9d79a0f7c7669bde84abe80f21
31
+ */
32
+ #if RUBY_API_VERSION_MAJOR < 3
33
+ #define USE_POSTPONED_JOB
34
+ #endif
35
+
24
36
  static const char *fake_frame_cstrs[] = {
25
37
  "(garbage collection)",
26
38
  "(marking)",
@@ -29,6 +41,47 @@ static const char *fake_frame_cstrs[] = {
29
41
 
30
42
  #define TOTAL_FAKE_FRAMES (sizeof(fake_frame_cstrs) / sizeof(char *))
31
43
 
44
+ #ifdef _POSIX_MONOTONIC_CLOCK
45
+ #define timestamp_t timespec
46
+ typedef struct timestamp_t timestamp_t;
47
+
48
+ static void capture_timestamp(timestamp_t *ts) {
49
+ clock_gettime(CLOCK_MONOTONIC, ts);
50
+ }
51
+
52
+ static int64_t delta_usec(timestamp_t *start, timestamp_t *end) {
53
+ int64_t result = MICROSECONDS_IN_SECOND * (end->tv_sec - start->tv_sec);
54
+ if (end->tv_nsec < start->tv_nsec) {
55
+ result -= MICROSECONDS_IN_SECOND;
56
+ result += (NANOSECONDS_IN_SECOND + end->tv_nsec - start->tv_nsec) / 1000;
57
+ } else {
58
+ result += (end->tv_nsec - start->tv_nsec) / 1000;
59
+ }
60
+ return result;
61
+ }
62
+
63
+ static uint64_t timestamp_usec(timestamp_t *ts) {
64
+ return (MICROSECONDS_IN_SECOND * ts->tv_sec) + (ts->tv_nsec / 1000);
65
+ }
66
+ #else
67
+ #define timestamp_t timeval
68
+ typedef struct timestamp_t timestamp_t;
69
+
70
+ static void capture_timestamp(timestamp_t *ts) {
71
+ gettimeofday(ts, NULL);
72
+ }
73
+
74
+ static int64_t delta_usec(timestamp_t *start, timestamp_t *end) {
75
+ struct timeval diff;
76
+ timersub(end, start, &diff);
77
+ return (MICROSECONDS_IN_SECOND * diff.tv_sec) + diff.tv_usec;
78
+ }
79
+
80
+ static uint64_t timestamp_usec(timestamp_t *ts) {
81
+ return (MICROSECONDS_IN_SECOND * ts.tv_sec) + diff.tv_usec
82
+ }
83
+ #endif
84
+
32
85
  typedef struct {
33
86
  size_t total_samples;
34
87
  size_t caller_samples;
@@ -37,6 +90,11 @@ typedef struct {
37
90
  st_table *lines;
38
91
  } frame_data_t;
39
92
 
93
+ typedef struct {
94
+ uint64_t timestamp_usec;
95
+ int64_t delta_usec;
96
+ } sample_time_t;
97
+
40
98
  static struct {
41
99
  int running;
42
100
  int raw;
@@ -46,16 +104,17 @@ static struct {
46
104
  VALUE interval;
47
105
  VALUE out;
48
106
  VALUE metadata;
107
+ int ignore_gc;
49
108
 
50
109
  VALUE *raw_samples;
51
110
  size_t raw_samples_len;
52
111
  size_t raw_samples_capa;
53
112
  size_t raw_sample_index;
54
113
 
55
- struct timeval last_sample_at;
56
- int *raw_timestamp_deltas;
57
- size_t raw_timestamp_deltas_len;
58
- size_t raw_timestamp_deltas_capa;
114
+ struct timestamp_t last_sample_at;
115
+ sample_time_t *raw_sample_times;
116
+ size_t raw_sample_times_len;
117
+ size_t raw_sample_times_capa;
59
118
 
60
119
  size_t overall_signals;
61
120
  size_t overall_samples;
@@ -67,14 +126,17 @@ static struct {
67
126
 
68
127
  VALUE fake_frame_names[TOTAL_FAKE_FRAMES];
69
128
  VALUE empty_string;
129
+
130
+ int buffer_count;
131
+ sample_time_t buffer_time;
70
132
  VALUE frames_buffer[BUF_SIZE];
71
133
  int lines_buffer[BUF_SIZE];
72
134
  } _stackprof;
73
135
 
74
136
  static VALUE sym_object, sym_wall, sym_cpu, sym_custom, sym_name, sym_file, sym_line;
75
137
  static VALUE sym_samples, sym_total_samples, sym_missed_samples, sym_edges, sym_lines;
76
- static VALUE sym_version, sym_mode, sym_interval, sym_raw, sym_metadata, sym_frames, sym_out, sym_aggregate, sym_raw_timestamp_deltas;
77
- static VALUE sym_state, sym_marking, sym_sweeping;
138
+ static VALUE sym_version, sym_mode, sym_interval, sym_raw, sym_metadata, sym_frames, sym_ignore_gc, sym_out;
139
+ static VALUE sym_aggregate, sym_raw_sample_timestamps, sym_raw_timestamp_deltas, sym_state, sym_marking, sym_sweeping;
78
140
  static VALUE sym_gc_samples, objtracer;
79
141
  static VALUE gc_hook;
80
142
  static VALUE rb_mStackProf;
@@ -88,6 +150,7 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
88
150
  struct sigaction sa;
89
151
  struct itimerval timer;
90
152
  VALUE opts = Qnil, mode = Qnil, interval = Qnil, metadata = rb_hash_new(), out = Qfalse;
153
+ int ignore_gc = 0;
91
154
  int raw = 0, aggregate = 1;
92
155
 
93
156
  if (_stackprof.running)
@@ -99,6 +162,9 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
99
162
  mode = rb_hash_aref(opts, sym_mode);
100
163
  interval = rb_hash_aref(opts, sym_interval);
101
164
  out = rb_hash_aref(opts, sym_out);
165
+ if (RTEST(rb_hash_aref(opts, sym_ignore_gc))) {
166
+ ignore_gc = 1;
167
+ }
102
168
 
103
169
  VALUE metadata_val = rb_hash_aref(opts, sym_metadata);
104
170
  if (RTEST(metadata_val)) {
@@ -115,6 +181,10 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
115
181
  }
116
182
  if (!RTEST(mode)) mode = sym_wall;
117
183
 
184
+ if (!NIL_P(interval) && (NUM2INT(interval) < 1 || NUM2INT(interval) >= MICROSECONDS_IN_SECOND)) {
185
+ rb_raise(rb_eArgError, "interval is a number of microseconds between 1 and 1 million");
186
+ }
187
+
118
188
  if (!_stackprof.frames) {
119
189
  _stackprof.frames = st_init_numtable();
120
190
  _stackprof.overall_signals = 0;
@@ -151,11 +221,12 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
151
221
  _stackprof.aggregate = aggregate;
152
222
  _stackprof.mode = mode;
153
223
  _stackprof.interval = interval;
224
+ _stackprof.ignore_gc = ignore_gc;
154
225
  _stackprof.metadata = metadata;
155
226
  _stackprof.out = out;
156
227
 
157
228
  if (raw) {
158
- gettimeofday(&_stackprof.last_sample_at, NULL);
229
+ capture_timestamp(&_stackprof.last_sample_at);
159
230
  }
160
231
 
161
232
  return Qtrue;
@@ -190,13 +261,19 @@ stackprof_stop(VALUE self)
190
261
  return Qtrue;
191
262
  }
192
263
 
264
+ #if SIZEOF_VOIDP == SIZEOF_LONG
265
+ # define PTR2NUM(x) (LONG2NUM((long)(x)))
266
+ #else
267
+ # define PTR2NUM(x) (LL2NUM((LONG_LONG)(x)))
268
+ #endif
269
+
193
270
  static int
194
271
  frame_edges_i(st_data_t key, st_data_t val, st_data_t arg)
195
272
  {
196
273
  VALUE edges = (VALUE)arg;
197
274
 
198
275
  intptr_t weight = (intptr_t)val;
199
- rb_hash_aset(edges, rb_obj_id((VALUE)key), INT2FIX(weight));
276
+ rb_hash_aset(edges, PTR2NUM(key), INT2FIX(weight));
200
277
  return ST_CONTINUE;
201
278
  }
202
279
 
@@ -223,7 +300,7 @@ frame_i(st_data_t key, st_data_t val, st_data_t arg)
223
300
  VALUE name, file, edges, lines;
224
301
  VALUE line;
225
302
 
226
- rb_hash_aset(results, rb_obj_id(frame), details);
303
+ rb_hash_aset(results, PTR2NUM(frame), details);
227
304
 
228
305
  if (FIXNUM_P(frame)) {
229
306
  name = _stackprof.fake_frame_names[FIX2INT(frame)];
@@ -284,6 +361,8 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
284
361
  rb_hash_aset(results, sym_missed_samples, SIZET2NUM(_stackprof.overall_signals - _stackprof.overall_samples));
285
362
  rb_hash_aset(results, sym_metadata, _stackprof.metadata);
286
363
 
364
+ _stackprof.metadata = Qnil;
365
+
287
366
  frames = rb_hash_new();
288
367
  rb_hash_aset(results, sym_frames, frames);
289
368
  st_foreach(_stackprof.frames, frame_i, (st_data_t)frames);
@@ -293,7 +372,7 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
293
372
 
294
373
  if (_stackprof.raw && _stackprof.raw_samples_len) {
295
374
  size_t len, n, o;
296
- VALUE raw_timestamp_deltas;
375
+ VALUE raw_sample_timestamps, raw_timestamp_deltas;
297
376
  VALUE raw_samples = rb_ary_new_capa(_stackprof.raw_samples_len);
298
377
 
299
378
  for (n = 0; n < _stackprof.raw_samples_len; n++) {
@@ -301,7 +380,7 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
301
380
  rb_ary_push(raw_samples, SIZET2NUM(len));
302
381
 
303
382
  for (o = 0, n++; o < len; n++, o++)
304
- rb_ary_push(raw_samples, rb_obj_id(_stackprof.raw_samples[n]));
383
+ rb_ary_push(raw_samples, PTR2NUM(_stackprof.raw_samples[n]));
305
384
  rb_ary_push(raw_samples, SIZET2NUM((size_t)_stackprof.raw_samples[n]));
306
385
  }
307
386
 
@@ -313,17 +392,20 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
313
392
 
314
393
  rb_hash_aset(results, sym_raw, raw_samples);
315
394
 
316
- raw_timestamp_deltas = rb_ary_new_capa(_stackprof.raw_timestamp_deltas_len);
395
+ raw_sample_timestamps = rb_ary_new_capa(_stackprof.raw_sample_times_len);
396
+ raw_timestamp_deltas = rb_ary_new_capa(_stackprof.raw_sample_times_len);
317
397
 
318
- for (n = 0; n < _stackprof.raw_timestamp_deltas_len; n++) {
319
- rb_ary_push(raw_timestamp_deltas, INT2FIX(_stackprof.raw_timestamp_deltas[n]));
398
+ for (n = 0; n < _stackprof.raw_sample_times_len; n++) {
399
+ rb_ary_push(raw_sample_timestamps, ULL2NUM(_stackprof.raw_sample_times[n].timestamp_usec));
400
+ rb_ary_push(raw_timestamp_deltas, LL2NUM(_stackprof.raw_sample_times[n].delta_usec));
320
401
  }
321
402
 
322
- free(_stackprof.raw_timestamp_deltas);
323
- _stackprof.raw_timestamp_deltas = NULL;
324
- _stackprof.raw_timestamp_deltas_len = 0;
325
- _stackprof.raw_timestamp_deltas_capa = 0;
403
+ free(_stackprof.raw_sample_times);
404
+ _stackprof.raw_sample_times = NULL;
405
+ _stackprof.raw_sample_times_len = 0;
406
+ _stackprof.raw_sample_times_capa = 0;
326
407
 
408
+ rb_hash_aset(results, sym_raw_sample_timestamps, raw_sample_timestamps);
327
409
  rb_hash_aset(results, sym_raw_timestamp_deltas, raw_timestamp_deltas);
328
410
 
329
411
  _stackprof.raw = 0;
@@ -403,14 +485,14 @@ st_numtable_increment(st_table *table, st_data_t key, size_t increment)
403
485
  }
404
486
 
405
487
  void
406
- stackprof_record_sample_for_stack(int num, int timestamp_delta)
488
+ stackprof_record_sample_for_stack(int num, uint64_t sample_timestamp, int64_t timestamp_delta)
407
489
  {
408
490
  int i, n;
409
491
  VALUE prev_frame = Qnil;
410
492
 
411
493
  _stackprof.overall_samples++;
412
494
 
413
- if (_stackprof.raw) {
495
+ if (_stackprof.raw && num > 0) {
414
496
  int found = 0;
415
497
 
416
498
  /* If there's no sample buffer allocated, then allocate one. The buffer
@@ -462,20 +544,23 @@ stackprof_record_sample_for_stack(int num, int timestamp_delta)
462
544
  }
463
545
 
464
546
  /* If there's no timestamp delta buffer, allocate one */
465
- if (!_stackprof.raw_timestamp_deltas) {
466
- _stackprof.raw_timestamp_deltas_capa = 100;
467
- _stackprof.raw_timestamp_deltas = malloc(sizeof(int) * _stackprof.raw_timestamp_deltas_capa);
468
- _stackprof.raw_timestamp_deltas_len = 0;
547
+ if (!_stackprof.raw_sample_times) {
548
+ _stackprof.raw_sample_times_capa = 100;
549
+ _stackprof.raw_sample_times = malloc(sizeof(sample_time_t) * _stackprof.raw_sample_times_capa);
550
+ _stackprof.raw_sample_times_len = 0;
469
551
  }
470
552
 
471
553
  /* Double the buffer size if it's too small */
472
- while (_stackprof.raw_timestamp_deltas_capa <= _stackprof.raw_timestamp_deltas_len + 1) {
473
- _stackprof.raw_timestamp_deltas_capa *= 2;
474
- _stackprof.raw_timestamp_deltas = realloc(_stackprof.raw_timestamp_deltas, sizeof(int) * _stackprof.raw_timestamp_deltas_capa);
554
+ while (_stackprof.raw_sample_times_capa <= _stackprof.raw_sample_times_len + 1) {
555
+ _stackprof.raw_sample_times_capa *= 2;
556
+ _stackprof.raw_sample_times = realloc(_stackprof.raw_sample_times, sizeof(sample_time_t) * _stackprof.raw_sample_times_capa);
475
557
  }
476
558
 
477
- /* Store the time delta (which is the amount of time between samples) */
478
- _stackprof.raw_timestamp_deltas[_stackprof.raw_timestamp_deltas_len++] = timestamp_delta;
559
+ /* Store the time delta (which is the amount of microseconds between samples). */
560
+ _stackprof.raw_sample_times[_stackprof.raw_sample_times_len++] = (sample_time_t) {
561
+ .timestamp_usec = sample_timestamp,
562
+ .delta_usec = timestamp_delta,
563
+ };
479
564
  }
480
565
 
481
566
  for (i = 0; i < num; i++) {
@@ -508,48 +593,59 @@ stackprof_record_sample_for_stack(int num, int timestamp_delta)
508
593
  }
509
594
 
510
595
  if (_stackprof.raw) {
511
- gettimeofday(&_stackprof.last_sample_at, NULL);
596
+ capture_timestamp(&_stackprof.last_sample_at);
512
597
  }
513
598
  }
514
599
 
600
+ // buffer the current profile frames
601
+ // This must be async-signal-safe
602
+ // Returns immediately if another set of frames are already in the buffer
515
603
  void
516
- stackprof_record_sample()
604
+ stackprof_buffer_sample(void)
517
605
  {
518
- int timestamp_delta = 0;
606
+ if (_stackprof.buffer_count > 0) {
607
+ // Another sample is already pending
608
+ return;
609
+ }
610
+
611
+ uint64_t start_timestamp = 0;
612
+ int64_t timestamp_delta = 0;
519
613
  int num;
520
614
  if (_stackprof.raw) {
521
- struct timeval t;
522
- struct timeval diff;
523
- gettimeofday(&t, NULL);
524
- timersub(&t, &_stackprof.last_sample_at, &diff);
525
- timestamp_delta = (1000 * diff.tv_sec) + diff.tv_usec;
615
+ struct timestamp_t t;
616
+ capture_timestamp(&t);
617
+ start_timestamp = timestamp_usec(&t);
618
+ timestamp_delta = delta_usec(&t, &_stackprof.last_sample_at);
526
619
  }
620
+
527
621
  num = rb_profile_frames(0, sizeof(_stackprof.frames_buffer) / sizeof(VALUE), _stackprof.frames_buffer, _stackprof.lines_buffer);
528
- stackprof_record_sample_for_stack(num, timestamp_delta);
622
+
623
+ _stackprof.buffer_count = num;
624
+ _stackprof.buffer_time.timestamp_usec = start_timestamp;
625
+ _stackprof.buffer_time.delta_usec = timestamp_delta;
529
626
  }
530
627
 
531
628
  void
532
- stackprof_record_gc_samples()
629
+ stackprof_record_gc_samples(void)
533
630
  {
534
- int delta_to_first_unrecorded_gc_sample = 0;
535
- int i;
631
+ int64_t delta_to_first_unrecorded_gc_sample = 0;
632
+ uint64_t start_timestamp = 0;
633
+ size_t i;
536
634
  if (_stackprof.raw) {
537
- struct timeval t;
538
- struct timeval diff;
539
- gettimeofday(&t, NULL);
540
- timersub(&t, &_stackprof.last_sample_at, &diff);
635
+ struct timestamp_t t;
636
+ capture_timestamp(&t);
637
+ start_timestamp = timestamp_usec(&t);
541
638
 
542
639
  // We don't know when the GC samples were actually marked, so let's
543
640
  // assume that they were marked at a perfectly regular interval.
544
- delta_to_first_unrecorded_gc_sample = (1000 * diff.tv_sec + diff.tv_usec) - (_stackprof.unrecorded_gc_samples - 1) * NUM2LONG(_stackprof.interval);
641
+ delta_to_first_unrecorded_gc_sample = delta_usec(&t, &_stackprof.last_sample_at) - (_stackprof.unrecorded_gc_samples - 1) * NUM2LONG(_stackprof.interval);
545
642
  if (delta_to_first_unrecorded_gc_sample < 0) {
546
643
  delta_to_first_unrecorded_gc_sample = 0;
547
644
  }
548
645
  }
549
646
 
550
-
551
647
  for (i = 0; i < _stackprof.unrecorded_gc_samples; i++) {
552
- int timestamp_delta = i == 0 ? delta_to_first_unrecorded_gc_sample : NUM2LONG(_stackprof.interval);
648
+ int64_t timestamp_delta = i == 0 ? delta_to_first_unrecorded_gc_sample : NUM2LONG(_stackprof.interval);
553
649
 
554
650
  if (_stackprof.unrecorded_gc_marking_samples) {
555
651
  _stackprof.frames_buffer[0] = FAKE_FRAME_MARK;
@@ -558,7 +654,7 @@ stackprof_record_gc_samples()
558
654
  _stackprof.lines_buffer[1] = 0;
559
655
  _stackprof.unrecorded_gc_marking_samples--;
560
656
 
561
- stackprof_record_sample_for_stack(2, timestamp_delta);
657
+ stackprof_record_sample_for_stack(2, start_timestamp, timestamp_delta);
562
658
  } else if (_stackprof.unrecorded_gc_sweeping_samples) {
563
659
  _stackprof.frames_buffer[0] = FAKE_FRAME_SWEEP;
564
660
  _stackprof.lines_buffer[0] = 0;
@@ -567,11 +663,11 @@ stackprof_record_gc_samples()
567
663
 
568
664
  _stackprof.unrecorded_gc_sweeping_samples--;
569
665
 
570
- stackprof_record_sample_for_stack(2, timestamp_delta);
666
+ stackprof_record_sample_for_stack(2, start_timestamp, timestamp_delta);
571
667
  } else {
572
668
  _stackprof.frames_buffer[0] = FAKE_FRAME_GC;
573
669
  _stackprof.lines_buffer[0] = 0;
574
- stackprof_record_sample_for_stack(1, timestamp_delta);
670
+ stackprof_record_sample_for_stack(1, start_timestamp, timestamp_delta);
575
671
  }
576
672
  }
577
673
  _stackprof.during_gc += _stackprof.unrecorded_gc_samples;
@@ -580,35 +676,61 @@ stackprof_record_gc_samples()
580
676
  _stackprof.unrecorded_gc_sweeping_samples = 0;
581
677
  }
582
678
 
679
+ // record the sample previously buffered by stackprof_buffer_sample
680
+ static void
681
+ stackprof_record_buffer(void)
682
+ {
683
+ stackprof_record_sample_for_stack(_stackprof.buffer_count, _stackprof.buffer_time.timestamp_usec, _stackprof.buffer_time.delta_usec);
684
+
685
+ // reset the buffer
686
+ _stackprof.buffer_count = 0;
687
+ }
688
+
583
689
  static void
584
- stackprof_gc_job_handler(void *data)
690
+ stackprof_sample_and_record(void)
691
+ {
692
+ stackprof_buffer_sample();
693
+ stackprof_record_buffer();
694
+ }
695
+
696
+ static void
697
+ stackprof_job_record_gc(void *data)
585
698
  {
586
- static int in_signal_handler = 0;
587
- if (in_signal_handler) return;
588
699
  if (!_stackprof.running) return;
589
700
 
590
- in_signal_handler++;
591
701
  stackprof_record_gc_samples();
592
- in_signal_handler--;
593
702
  }
594
703
 
704
+ #ifdef USE_POSTPONED_JOB
595
705
  static void
596
- stackprof_job_handler(void *data)
706
+ stackprof_job_sample_and_record(void *data)
597
707
  {
598
- static int in_signal_handler = 0;
599
- if (in_signal_handler) return;
600
708
  if (!_stackprof.running) return;
601
709
 
602
- in_signal_handler++;
603
- stackprof_record_sample();
604
- in_signal_handler--;
710
+ stackprof_sample_and_record();
711
+ }
712
+ #endif
713
+
714
+ static void
715
+ stackprof_job_record_buffer(void *data)
716
+ {
717
+ if (!_stackprof.running) return;
718
+
719
+ stackprof_record_buffer();
605
720
  }
606
721
 
607
722
  static void
608
723
  stackprof_signal_handler(int sig, siginfo_t *sinfo, void *ucontext)
609
724
  {
725
+ static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
726
+
610
727
  _stackprof.overall_signals++;
611
- if (rb_during_gc()) {
728
+
729
+ if (!_stackprof.running) return;
730
+ if (!ruby_native_thread_p()) return;
731
+ if (pthread_mutex_trylock(&lock)) return;
732
+
733
+ if (!_stackprof.ignore_gc && rb_during_gc()) {
612
734
  VALUE mode = rb_gc_latest_gc_info(sym_state);
613
735
  if (mode == sym_marking) {
614
736
  _stackprof.unrecorded_gc_marking_samples++;
@@ -616,10 +738,19 @@ stackprof_signal_handler(int sig, siginfo_t *sinfo, void *ucontext)
616
738
  _stackprof.unrecorded_gc_sweeping_samples++;
617
739
  }
618
740
  _stackprof.unrecorded_gc_samples++;
619
- rb_postponed_job_register_one(0, stackprof_gc_job_handler, (void*)0);
741
+ rb_postponed_job_register_one(0, stackprof_job_record_gc, (void*)0);
620
742
  } else {
621
- rb_postponed_job_register_one(0, stackprof_job_handler, (void*)0);
743
+ #ifdef USE_POSTPONED_JOB
744
+ rb_postponed_job_register_one(0, stackprof_job_sample_and_record, (void*)0);
745
+ #else
746
+ // Buffer a sample immediately, if an existing sample exists this will
747
+ // return immediately
748
+ stackprof_buffer_sample();
749
+ // Enqueue a job to record the sample
750
+ rb_postponed_job_register_one(0, stackprof_job_record_buffer, (void*)0);
751
+ #endif
622
752
  }
753
+ pthread_mutex_unlock(&lock);
623
754
  }
624
755
 
625
756
  static void
@@ -628,7 +759,7 @@ stackprof_newobj_handler(VALUE tpval, void *data)
628
759
  _stackprof.overall_signals++;
629
760
  if (RTEST(_stackprof.interval) && _stackprof.overall_signals % NUM2LONG(_stackprof.interval))
630
761
  return;
631
- stackprof_job_handler(0);
762
+ stackprof_sample_and_record();
632
763
  }
633
764
 
634
765
  static VALUE
@@ -638,7 +769,7 @@ stackprof_sample(VALUE self)
638
769
  return Qfalse;
639
770
 
640
771
  _stackprof.overall_signals++;
641
- stackprof_job_handler(0);
772
+ stackprof_sample_and_record();
642
773
  return Qtrue;
643
774
  }
644
775
 
@@ -653,6 +784,9 @@ frame_mark_i(st_data_t key, st_data_t val, st_data_t arg)
653
784
  static void
654
785
  stackprof_gc_mark(void *data)
655
786
  {
787
+ if (RTEST(_stackprof.metadata))
788
+ rb_gc_mark(_stackprof.metadata);
789
+
656
790
  if (RTEST(_stackprof.out))
657
791
  rb_gc_mark(_stackprof.out);
658
792
 
@@ -714,9 +848,11 @@ Init_stackprof(void)
714
848
  S(mode);
715
849
  S(interval);
716
850
  S(raw);
851
+ S(raw_sample_timestamps);
717
852
  S(raw_timestamp_deltas);
718
853
  S(out);
719
854
  S(metadata);
855
+ S(ignore_gc);
720
856
  S(frames);
721
857
  S(aggregate);
722
858
  S(state);
@@ -735,9 +871,9 @@ Init_stackprof(void)
735
871
  _stackprof.raw_samples_capa = 0;
736
872
  _stackprof.raw_sample_index = 0;
737
873
 
738
- _stackprof.raw_timestamp_deltas = NULL;
739
- _stackprof.raw_timestamp_deltas_len = 0;
740
- _stackprof.raw_timestamp_deltas_capa = 0;
874
+ _stackprof.raw_sample_times = NULL;
875
+ _stackprof.raw_sample_times_len = 0;
876
+ _stackprof.raw_sample_times_capa = 0;
741
877
 
742
878
  _stackprof.empty_string = rb_str_new_cstr("");
743
879
  rb_global_variable(&_stackprof.empty_string);
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'pp'
2
4
  require 'digest/md5'
3
5
 
@@ -38,7 +40,7 @@ module StackProf
38
40
  end
39
41
 
40
42
  def max_samples
41
- @data[:max_samples] ||= frames.max_by{ |addr, frame| frame[:samples] }.last[:samples]
43
+ @data[:max_samples] ||= @data[:frames].values.max_by{ |frame| frame[:samples] }[:samples]
42
44
  end
43
45
 
44
46
  def files
@@ -96,15 +98,7 @@ module StackProf
96
98
  def print_flamegraph(f, skip_common, alphabetical=false)
97
99
  raise "profile does not include raw samples (add `raw: true` to collecting StackProf.run)" unless raw = data[:raw]
98
100
 
99
- stacks = []
100
- max_x = 0
101
- max_y = 0
102
- while len = raw.shift
103
- max_y = len if len > max_y
104
- stack = raw.slice!(0, len+1)
105
- stacks << stack
106
- max_x += stack.last
107
- end
101
+ stacks, max_x, max_y = flamegraph_stacks(raw)
108
102
 
109
103
  stacks.sort! if alphabetical
110
104
 
@@ -156,8 +150,26 @@ module StackProf
156
150
  f.puts '])'
157
151
  end
158
152
 
153
+ def flamegraph_stacks(raw)
154
+ stacks = []
155
+ max_x = 0
156
+ max_y = 0
157
+ idx = 0
158
+
159
+ while len = raw[idx]
160
+ idx += 1
161
+ max_y = len if len > max_y
162
+ stack = raw.slice(idx, len+1)
163
+ idx += len+1
164
+ stacks << stack
165
+ max_x += stack.last
166
+ end
167
+
168
+ return stacks, max_x, max_y
169
+ end
170
+
159
171
  def flamegraph_row(f, x, y, weight, addr)
160
- frame = frames[addr]
172
+ frame = @data[:frames][addr]
161
173
  f.print ',' if @rows_started
162
174
  @rows_started = true
163
175
  f.puts %{{"x":#{x},"y":#{y},"width":#{weight},"frame_id":#{addr},"frame":#{frame[:name].dump},"file":#{frame[:file].dump}}}
@@ -178,7 +190,7 @@ module StackProf
178
190
  weight += stack.last
179
191
  end
180
192
  else
181
- frame = frames[val]
193
+ frame = @data[:frames][val]
182
194
  child_name = "#{ frame[:name] } : #{ frame[:file] }"
183
195
  child_data = convert_to_d3_flame_graph_format(child_name, child_stacks, depth + 1)
184
196
  weight += child_data["value"]
@@ -196,15 +208,7 @@ module StackProf
196
208
  def print_d3_flamegraph(f=STDOUT, skip_common=true)
197
209
  raise "profile does not include raw samples (add `raw: true` to collecting StackProf.run)" unless raw = data[:raw]
198
210
 
199
- stacks = []
200
- max_x = 0
201
- max_y = 0
202
- while len = raw.shift
203
- max_y = len if len > max_y
204
- stack = raw.slice!(0, len+1)
205
- stacks << stack
206
- max_x += stack.last
207
- end
211
+ stacks, * = flamegraph_stacks(raw)
208
212
 
209
213
  # d3-flame-grpah supports only alphabetical flamegraph
210
214
  stacks.sort!
@@ -410,7 +414,7 @@ module StackProf
410
414
  call, total = info.values_at(:samples, :total_samples)
411
415
  break if total < node_minimum || (limit && index >= limit)
412
416
 
413
- sample = ''
417
+ sample = ''.dup
414
418
  sample << "#{call} (%2.1f%%)\\rof " % (call*100.0/overall_samples) if call < total
415
419
  sample << "#{total} (%2.1f%%)\\r" % (total*100.0/overall_samples)
416
420
  fontsize = (1.0 * call / max_samples) * 28 + 10
@@ -654,7 +658,8 @@ module StackProf
654
658
  end
655
659
  end
656
660
  end
661
+ rescue SystemCallError
662
+ f.puts " SOURCE UNAVAILABLE"
657
663
  end
658
-
659
664
  end
660
665
  end
data/lib/stackprof.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  require "stackprof/stackprof"
2
2
 
3
3
  module StackProf
4
- VERSION = '0.2.14'
4
+ VERSION = '0.2.18'
5
5
  end
6
6
 
7
7
  StackProf.autoload :Report, "stackprof/report.rb"
data/stackprof.gemspec CHANGED
@@ -1,11 +1,18 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'stackprof'
3
- s.version = '0.2.14'
3
+ s.version = '0.2.18'
4
4
  s.homepage = 'http://github.com/tmm1/stackprof'
5
5
 
6
6
  s.authors = 'Aman Gupta'
7
7
  s.email = 'aman@tmm1.net'
8
8
 
9
+ s.metadata = {
10
+ 'bug_tracker_uri' => 'https://github.com/tmm1/stackprof/issues',
11
+ 'changelog_uri' => "https://github.com/tmm1/stackprof/blob/v#{s.version}/CHANGELOG.md",
12
+ 'documentation_uri' => "https://www.rubydoc.info/gems/stackprof/#{s.version}",
13
+ 'source_code_uri' => "https://github.com/tmm1/stackprof/tree/v#{s.version}"
14
+ }
15
+
9
16
  s.files = `git ls-files`.split("\n")
10
17
  s.extensions = 'ext/stackprof/extconf.rb'
11
18