stackprof 0.2.14 → 0.2.18

Sign up to get free protection for your applications and to get access to all the features.
@@ -7,20 +7,32 @@
7
7
  **********************************************************************/
8
8
 
9
9
  #include <ruby/ruby.h>
10
+ #include <ruby/version.h>
10
11
  #include <ruby/debug.h>
11
12
  #include <ruby/st.h>
12
13
  #include <ruby/io.h>
13
14
  #include <ruby/intern.h>
14
15
  #include <signal.h>
15
16
  #include <sys/time.h>
17
+ #include <time.h>
16
18
  #include <pthread.h>
17
19
 
18
20
  #define BUF_SIZE 2048
21
+ #define MICROSECONDS_IN_SECOND 1000000
22
+ #define NANOSECONDS_IN_SECOND 1000000000
19
23
 
20
24
  #define FAKE_FRAME_GC INT2FIX(0)
21
25
  #define FAKE_FRAME_MARK INT2FIX(1)
22
26
  #define FAKE_FRAME_SWEEP INT2FIX(2)
23
27
 
28
+ /*
29
+ * As of Ruby 3.0, it should be safe to read stack frames at any time
30
+ * See https://github.com/ruby/ruby/commit/0e276dc458f94d9d79a0f7c7669bde84abe80f21
31
+ */
32
+ #if RUBY_API_VERSION_MAJOR < 3
33
+ #define USE_POSTPONED_JOB
34
+ #endif
35
+
24
36
  static const char *fake_frame_cstrs[] = {
25
37
  "(garbage collection)",
26
38
  "(marking)",
@@ -29,6 +41,47 @@ static const char *fake_frame_cstrs[] = {
29
41
 
30
42
  #define TOTAL_FAKE_FRAMES (sizeof(fake_frame_cstrs) / sizeof(char *))
31
43
 
44
+ #ifdef _POSIX_MONOTONIC_CLOCK
45
+ #define timestamp_t timespec
46
+ typedef struct timestamp_t timestamp_t;
47
+
48
+ static void capture_timestamp(timestamp_t *ts) {
49
+ clock_gettime(CLOCK_MONOTONIC, ts);
50
+ }
51
+
52
+ static int64_t delta_usec(timestamp_t *start, timestamp_t *end) {
53
+ int64_t result = MICROSECONDS_IN_SECOND * (end->tv_sec - start->tv_sec);
54
+ if (end->tv_nsec < start->tv_nsec) {
55
+ result -= MICROSECONDS_IN_SECOND;
56
+ result += (NANOSECONDS_IN_SECOND + end->tv_nsec - start->tv_nsec) / 1000;
57
+ } else {
58
+ result += (end->tv_nsec - start->tv_nsec) / 1000;
59
+ }
60
+ return result;
61
+ }
62
+
63
+ static uint64_t timestamp_usec(timestamp_t *ts) {
64
+ return (MICROSECONDS_IN_SECOND * ts->tv_sec) + (ts->tv_nsec / 1000);
65
+ }
66
+ #else
67
+ #define timestamp_t timeval
68
+ typedef struct timestamp_t timestamp_t;
69
+
70
+ static void capture_timestamp(timestamp_t *ts) {
71
+ gettimeofday(ts, NULL);
72
+ }
73
+
74
+ static int64_t delta_usec(timestamp_t *start, timestamp_t *end) {
75
+ struct timeval diff;
76
+ timersub(end, start, &diff);
77
+ return (MICROSECONDS_IN_SECOND * diff.tv_sec) + diff.tv_usec;
78
+ }
79
+
80
+ static uint64_t timestamp_usec(timestamp_t *ts) {
81
+ return (MICROSECONDS_IN_SECOND * ts.tv_sec) + diff.tv_usec
82
+ }
83
+ #endif
84
+
32
85
  typedef struct {
33
86
  size_t total_samples;
34
87
  size_t caller_samples;
@@ -37,6 +90,11 @@ typedef struct {
37
90
  st_table *lines;
38
91
  } frame_data_t;
39
92
 
93
+ typedef struct {
94
+ uint64_t timestamp_usec;
95
+ int64_t delta_usec;
96
+ } sample_time_t;
97
+
40
98
  static struct {
41
99
  int running;
42
100
  int raw;
@@ -46,16 +104,17 @@ static struct {
46
104
  VALUE interval;
47
105
  VALUE out;
48
106
  VALUE metadata;
107
+ int ignore_gc;
49
108
 
50
109
  VALUE *raw_samples;
51
110
  size_t raw_samples_len;
52
111
  size_t raw_samples_capa;
53
112
  size_t raw_sample_index;
54
113
 
55
- struct timeval last_sample_at;
56
- int *raw_timestamp_deltas;
57
- size_t raw_timestamp_deltas_len;
58
- size_t raw_timestamp_deltas_capa;
114
+ struct timestamp_t last_sample_at;
115
+ sample_time_t *raw_sample_times;
116
+ size_t raw_sample_times_len;
117
+ size_t raw_sample_times_capa;
59
118
 
60
119
  size_t overall_signals;
61
120
  size_t overall_samples;
@@ -67,14 +126,17 @@ static struct {
67
126
 
68
127
  VALUE fake_frame_names[TOTAL_FAKE_FRAMES];
69
128
  VALUE empty_string;
129
+
130
+ int buffer_count;
131
+ sample_time_t buffer_time;
70
132
  VALUE frames_buffer[BUF_SIZE];
71
133
  int lines_buffer[BUF_SIZE];
72
134
  } _stackprof;
73
135
 
74
136
  static VALUE sym_object, sym_wall, sym_cpu, sym_custom, sym_name, sym_file, sym_line;
75
137
  static VALUE sym_samples, sym_total_samples, sym_missed_samples, sym_edges, sym_lines;
76
- static VALUE sym_version, sym_mode, sym_interval, sym_raw, sym_metadata, sym_frames, sym_out, sym_aggregate, sym_raw_timestamp_deltas;
77
- static VALUE sym_state, sym_marking, sym_sweeping;
138
+ static VALUE sym_version, sym_mode, sym_interval, sym_raw, sym_metadata, sym_frames, sym_ignore_gc, sym_out;
139
+ static VALUE sym_aggregate, sym_raw_sample_timestamps, sym_raw_timestamp_deltas, sym_state, sym_marking, sym_sweeping;
78
140
  static VALUE sym_gc_samples, objtracer;
79
141
  static VALUE gc_hook;
80
142
  static VALUE rb_mStackProf;
@@ -88,6 +150,7 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
88
150
  struct sigaction sa;
89
151
  struct itimerval timer;
90
152
  VALUE opts = Qnil, mode = Qnil, interval = Qnil, metadata = rb_hash_new(), out = Qfalse;
153
+ int ignore_gc = 0;
91
154
  int raw = 0, aggregate = 1;
92
155
 
93
156
  if (_stackprof.running)
@@ -99,6 +162,9 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
99
162
  mode = rb_hash_aref(opts, sym_mode);
100
163
  interval = rb_hash_aref(opts, sym_interval);
101
164
  out = rb_hash_aref(opts, sym_out);
165
+ if (RTEST(rb_hash_aref(opts, sym_ignore_gc))) {
166
+ ignore_gc = 1;
167
+ }
102
168
 
103
169
  VALUE metadata_val = rb_hash_aref(opts, sym_metadata);
104
170
  if (RTEST(metadata_val)) {
@@ -115,6 +181,10 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
115
181
  }
116
182
  if (!RTEST(mode)) mode = sym_wall;
117
183
 
184
+ if (!NIL_P(interval) && (NUM2INT(interval) < 1 || NUM2INT(interval) >= MICROSECONDS_IN_SECOND)) {
185
+ rb_raise(rb_eArgError, "interval is a number of microseconds between 1 and 1 million");
186
+ }
187
+
118
188
  if (!_stackprof.frames) {
119
189
  _stackprof.frames = st_init_numtable();
120
190
  _stackprof.overall_signals = 0;
@@ -151,11 +221,12 @@ stackprof_start(int argc, VALUE *argv, VALUE self)
151
221
  _stackprof.aggregate = aggregate;
152
222
  _stackprof.mode = mode;
153
223
  _stackprof.interval = interval;
224
+ _stackprof.ignore_gc = ignore_gc;
154
225
  _stackprof.metadata = metadata;
155
226
  _stackprof.out = out;
156
227
 
157
228
  if (raw) {
158
- gettimeofday(&_stackprof.last_sample_at, NULL);
229
+ capture_timestamp(&_stackprof.last_sample_at);
159
230
  }
160
231
 
161
232
  return Qtrue;
@@ -190,13 +261,19 @@ stackprof_stop(VALUE self)
190
261
  return Qtrue;
191
262
  }
192
263
 
264
+ #if SIZEOF_VOIDP == SIZEOF_LONG
265
+ # define PTR2NUM(x) (LONG2NUM((long)(x)))
266
+ #else
267
+ # define PTR2NUM(x) (LL2NUM((LONG_LONG)(x)))
268
+ #endif
269
+
193
270
  static int
194
271
  frame_edges_i(st_data_t key, st_data_t val, st_data_t arg)
195
272
  {
196
273
  VALUE edges = (VALUE)arg;
197
274
 
198
275
  intptr_t weight = (intptr_t)val;
199
- rb_hash_aset(edges, rb_obj_id((VALUE)key), INT2FIX(weight));
276
+ rb_hash_aset(edges, PTR2NUM(key), INT2FIX(weight));
200
277
  return ST_CONTINUE;
201
278
  }
202
279
 
@@ -223,7 +300,7 @@ frame_i(st_data_t key, st_data_t val, st_data_t arg)
223
300
  VALUE name, file, edges, lines;
224
301
  VALUE line;
225
302
 
226
- rb_hash_aset(results, rb_obj_id(frame), details);
303
+ rb_hash_aset(results, PTR2NUM(frame), details);
227
304
 
228
305
  if (FIXNUM_P(frame)) {
229
306
  name = _stackprof.fake_frame_names[FIX2INT(frame)];
@@ -284,6 +361,8 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
284
361
  rb_hash_aset(results, sym_missed_samples, SIZET2NUM(_stackprof.overall_signals - _stackprof.overall_samples));
285
362
  rb_hash_aset(results, sym_metadata, _stackprof.metadata);
286
363
 
364
+ _stackprof.metadata = Qnil;
365
+
287
366
  frames = rb_hash_new();
288
367
  rb_hash_aset(results, sym_frames, frames);
289
368
  st_foreach(_stackprof.frames, frame_i, (st_data_t)frames);
@@ -293,7 +372,7 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
293
372
 
294
373
  if (_stackprof.raw && _stackprof.raw_samples_len) {
295
374
  size_t len, n, o;
296
- VALUE raw_timestamp_deltas;
375
+ VALUE raw_sample_timestamps, raw_timestamp_deltas;
297
376
  VALUE raw_samples = rb_ary_new_capa(_stackprof.raw_samples_len);
298
377
 
299
378
  for (n = 0; n < _stackprof.raw_samples_len; n++) {
@@ -301,7 +380,7 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
301
380
  rb_ary_push(raw_samples, SIZET2NUM(len));
302
381
 
303
382
  for (o = 0, n++; o < len; n++, o++)
304
- rb_ary_push(raw_samples, rb_obj_id(_stackprof.raw_samples[n]));
383
+ rb_ary_push(raw_samples, PTR2NUM(_stackprof.raw_samples[n]));
305
384
  rb_ary_push(raw_samples, SIZET2NUM((size_t)_stackprof.raw_samples[n]));
306
385
  }
307
386
 
@@ -313,17 +392,20 @@ stackprof_results(int argc, VALUE *argv, VALUE self)
313
392
 
314
393
  rb_hash_aset(results, sym_raw, raw_samples);
315
394
 
316
- raw_timestamp_deltas = rb_ary_new_capa(_stackprof.raw_timestamp_deltas_len);
395
+ raw_sample_timestamps = rb_ary_new_capa(_stackprof.raw_sample_times_len);
396
+ raw_timestamp_deltas = rb_ary_new_capa(_stackprof.raw_sample_times_len);
317
397
 
318
- for (n = 0; n < _stackprof.raw_timestamp_deltas_len; n++) {
319
- rb_ary_push(raw_timestamp_deltas, INT2FIX(_stackprof.raw_timestamp_deltas[n]));
398
+ for (n = 0; n < _stackprof.raw_sample_times_len; n++) {
399
+ rb_ary_push(raw_sample_timestamps, ULL2NUM(_stackprof.raw_sample_times[n].timestamp_usec));
400
+ rb_ary_push(raw_timestamp_deltas, LL2NUM(_stackprof.raw_sample_times[n].delta_usec));
320
401
  }
321
402
 
322
- free(_stackprof.raw_timestamp_deltas);
323
- _stackprof.raw_timestamp_deltas = NULL;
324
- _stackprof.raw_timestamp_deltas_len = 0;
325
- _stackprof.raw_timestamp_deltas_capa = 0;
403
+ free(_stackprof.raw_sample_times);
404
+ _stackprof.raw_sample_times = NULL;
405
+ _stackprof.raw_sample_times_len = 0;
406
+ _stackprof.raw_sample_times_capa = 0;
326
407
 
408
+ rb_hash_aset(results, sym_raw_sample_timestamps, raw_sample_timestamps);
327
409
  rb_hash_aset(results, sym_raw_timestamp_deltas, raw_timestamp_deltas);
328
410
 
329
411
  _stackprof.raw = 0;
@@ -403,14 +485,14 @@ st_numtable_increment(st_table *table, st_data_t key, size_t increment)
403
485
  }
404
486
 
405
487
  void
406
- stackprof_record_sample_for_stack(int num, int timestamp_delta)
488
+ stackprof_record_sample_for_stack(int num, uint64_t sample_timestamp, int64_t timestamp_delta)
407
489
  {
408
490
  int i, n;
409
491
  VALUE prev_frame = Qnil;
410
492
 
411
493
  _stackprof.overall_samples++;
412
494
 
413
- if (_stackprof.raw) {
495
+ if (_stackprof.raw && num > 0) {
414
496
  int found = 0;
415
497
 
416
498
  /* If there's no sample buffer allocated, then allocate one. The buffer
@@ -462,20 +544,23 @@ stackprof_record_sample_for_stack(int num, int timestamp_delta)
462
544
  }
463
545
 
464
546
  /* If there's no timestamp delta buffer, allocate one */
465
- if (!_stackprof.raw_timestamp_deltas) {
466
- _stackprof.raw_timestamp_deltas_capa = 100;
467
- _stackprof.raw_timestamp_deltas = malloc(sizeof(int) * _stackprof.raw_timestamp_deltas_capa);
468
- _stackprof.raw_timestamp_deltas_len = 0;
547
+ if (!_stackprof.raw_sample_times) {
548
+ _stackprof.raw_sample_times_capa = 100;
549
+ _stackprof.raw_sample_times = malloc(sizeof(sample_time_t) * _stackprof.raw_sample_times_capa);
550
+ _stackprof.raw_sample_times_len = 0;
469
551
  }
470
552
 
471
553
  /* Double the buffer size if it's too small */
472
- while (_stackprof.raw_timestamp_deltas_capa <= _stackprof.raw_timestamp_deltas_len + 1) {
473
- _stackprof.raw_timestamp_deltas_capa *= 2;
474
- _stackprof.raw_timestamp_deltas = realloc(_stackprof.raw_timestamp_deltas, sizeof(int) * _stackprof.raw_timestamp_deltas_capa);
554
+ while (_stackprof.raw_sample_times_capa <= _stackprof.raw_sample_times_len + 1) {
555
+ _stackprof.raw_sample_times_capa *= 2;
556
+ _stackprof.raw_sample_times = realloc(_stackprof.raw_sample_times, sizeof(sample_time_t) * _stackprof.raw_sample_times_capa);
475
557
  }
476
558
 
477
- /* Store the time delta (which is the amount of time between samples) */
478
- _stackprof.raw_timestamp_deltas[_stackprof.raw_timestamp_deltas_len++] = timestamp_delta;
559
+ /* Store the time delta (which is the amount of microseconds between samples). */
560
+ _stackprof.raw_sample_times[_stackprof.raw_sample_times_len++] = (sample_time_t) {
561
+ .timestamp_usec = sample_timestamp,
562
+ .delta_usec = timestamp_delta,
563
+ };
479
564
  }
480
565
 
481
566
  for (i = 0; i < num; i++) {
@@ -508,48 +593,59 @@ stackprof_record_sample_for_stack(int num, int timestamp_delta)
508
593
  }
509
594
 
510
595
  if (_stackprof.raw) {
511
- gettimeofday(&_stackprof.last_sample_at, NULL);
596
+ capture_timestamp(&_stackprof.last_sample_at);
512
597
  }
513
598
  }
514
599
 
600
+ // buffer the current profile frames
601
+ // This must be async-signal-safe
602
+ // Returns immediately if another set of frames are already in the buffer
515
603
  void
516
- stackprof_record_sample()
604
+ stackprof_buffer_sample(void)
517
605
  {
518
- int timestamp_delta = 0;
606
+ if (_stackprof.buffer_count > 0) {
607
+ // Another sample is already pending
608
+ return;
609
+ }
610
+
611
+ uint64_t start_timestamp = 0;
612
+ int64_t timestamp_delta = 0;
519
613
  int num;
520
614
  if (_stackprof.raw) {
521
- struct timeval t;
522
- struct timeval diff;
523
- gettimeofday(&t, NULL);
524
- timersub(&t, &_stackprof.last_sample_at, &diff);
525
- timestamp_delta = (1000 * diff.tv_sec) + diff.tv_usec;
615
+ struct timestamp_t t;
616
+ capture_timestamp(&t);
617
+ start_timestamp = timestamp_usec(&t);
618
+ timestamp_delta = delta_usec(&t, &_stackprof.last_sample_at);
526
619
  }
620
+
527
621
  num = rb_profile_frames(0, sizeof(_stackprof.frames_buffer) / sizeof(VALUE), _stackprof.frames_buffer, _stackprof.lines_buffer);
528
- stackprof_record_sample_for_stack(num, timestamp_delta);
622
+
623
+ _stackprof.buffer_count = num;
624
+ _stackprof.buffer_time.timestamp_usec = start_timestamp;
625
+ _stackprof.buffer_time.delta_usec = timestamp_delta;
529
626
  }
530
627
 
531
628
  void
532
- stackprof_record_gc_samples()
629
+ stackprof_record_gc_samples(void)
533
630
  {
534
- int delta_to_first_unrecorded_gc_sample = 0;
535
- int i;
631
+ int64_t delta_to_first_unrecorded_gc_sample = 0;
632
+ uint64_t start_timestamp = 0;
633
+ size_t i;
536
634
  if (_stackprof.raw) {
537
- struct timeval t;
538
- struct timeval diff;
539
- gettimeofday(&t, NULL);
540
- timersub(&t, &_stackprof.last_sample_at, &diff);
635
+ struct timestamp_t t;
636
+ capture_timestamp(&t);
637
+ start_timestamp = timestamp_usec(&t);
541
638
 
542
639
  // We don't know when the GC samples were actually marked, so let's
543
640
  // assume that they were marked at a perfectly regular interval.
544
- delta_to_first_unrecorded_gc_sample = (1000 * diff.tv_sec + diff.tv_usec) - (_stackprof.unrecorded_gc_samples - 1) * NUM2LONG(_stackprof.interval);
641
+ delta_to_first_unrecorded_gc_sample = delta_usec(&t, &_stackprof.last_sample_at) - (_stackprof.unrecorded_gc_samples - 1) * NUM2LONG(_stackprof.interval);
545
642
  if (delta_to_first_unrecorded_gc_sample < 0) {
546
643
  delta_to_first_unrecorded_gc_sample = 0;
547
644
  }
548
645
  }
549
646
 
550
-
551
647
  for (i = 0; i < _stackprof.unrecorded_gc_samples; i++) {
552
- int timestamp_delta = i == 0 ? delta_to_first_unrecorded_gc_sample : NUM2LONG(_stackprof.interval);
648
+ int64_t timestamp_delta = i == 0 ? delta_to_first_unrecorded_gc_sample : NUM2LONG(_stackprof.interval);
553
649
 
554
650
  if (_stackprof.unrecorded_gc_marking_samples) {
555
651
  _stackprof.frames_buffer[0] = FAKE_FRAME_MARK;
@@ -558,7 +654,7 @@ stackprof_record_gc_samples()
558
654
  _stackprof.lines_buffer[1] = 0;
559
655
  _stackprof.unrecorded_gc_marking_samples--;
560
656
 
561
- stackprof_record_sample_for_stack(2, timestamp_delta);
657
+ stackprof_record_sample_for_stack(2, start_timestamp, timestamp_delta);
562
658
  } else if (_stackprof.unrecorded_gc_sweeping_samples) {
563
659
  _stackprof.frames_buffer[0] = FAKE_FRAME_SWEEP;
564
660
  _stackprof.lines_buffer[0] = 0;
@@ -567,11 +663,11 @@ stackprof_record_gc_samples()
567
663
 
568
664
  _stackprof.unrecorded_gc_sweeping_samples--;
569
665
 
570
- stackprof_record_sample_for_stack(2, timestamp_delta);
666
+ stackprof_record_sample_for_stack(2, start_timestamp, timestamp_delta);
571
667
  } else {
572
668
  _stackprof.frames_buffer[0] = FAKE_FRAME_GC;
573
669
  _stackprof.lines_buffer[0] = 0;
574
- stackprof_record_sample_for_stack(1, timestamp_delta);
670
+ stackprof_record_sample_for_stack(1, start_timestamp, timestamp_delta);
575
671
  }
576
672
  }
577
673
  _stackprof.during_gc += _stackprof.unrecorded_gc_samples;
@@ -580,35 +676,61 @@ stackprof_record_gc_samples()
580
676
  _stackprof.unrecorded_gc_sweeping_samples = 0;
581
677
  }
582
678
 
679
+ // record the sample previously buffered by stackprof_buffer_sample
680
+ static void
681
+ stackprof_record_buffer(void)
682
+ {
683
+ stackprof_record_sample_for_stack(_stackprof.buffer_count, _stackprof.buffer_time.timestamp_usec, _stackprof.buffer_time.delta_usec);
684
+
685
+ // reset the buffer
686
+ _stackprof.buffer_count = 0;
687
+ }
688
+
583
689
  static void
584
- stackprof_gc_job_handler(void *data)
690
+ stackprof_sample_and_record(void)
691
+ {
692
+ stackprof_buffer_sample();
693
+ stackprof_record_buffer();
694
+ }
695
+
696
+ static void
697
+ stackprof_job_record_gc(void *data)
585
698
  {
586
- static int in_signal_handler = 0;
587
- if (in_signal_handler) return;
588
699
  if (!_stackprof.running) return;
589
700
 
590
- in_signal_handler++;
591
701
  stackprof_record_gc_samples();
592
- in_signal_handler--;
593
702
  }
594
703
 
704
+ #ifdef USE_POSTPONED_JOB
595
705
  static void
596
- stackprof_job_handler(void *data)
706
+ stackprof_job_sample_and_record(void *data)
597
707
  {
598
- static int in_signal_handler = 0;
599
- if (in_signal_handler) return;
600
708
  if (!_stackprof.running) return;
601
709
 
602
- in_signal_handler++;
603
- stackprof_record_sample();
604
- in_signal_handler--;
710
+ stackprof_sample_and_record();
711
+ }
712
+ #endif
713
+
714
+ static void
715
+ stackprof_job_record_buffer(void *data)
716
+ {
717
+ if (!_stackprof.running) return;
718
+
719
+ stackprof_record_buffer();
605
720
  }
606
721
 
607
722
  static void
608
723
  stackprof_signal_handler(int sig, siginfo_t *sinfo, void *ucontext)
609
724
  {
725
+ static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
726
+
610
727
  _stackprof.overall_signals++;
611
- if (rb_during_gc()) {
728
+
729
+ if (!_stackprof.running) return;
730
+ if (!ruby_native_thread_p()) return;
731
+ if (pthread_mutex_trylock(&lock)) return;
732
+
733
+ if (!_stackprof.ignore_gc && rb_during_gc()) {
612
734
  VALUE mode = rb_gc_latest_gc_info(sym_state);
613
735
  if (mode == sym_marking) {
614
736
  _stackprof.unrecorded_gc_marking_samples++;
@@ -616,10 +738,19 @@ stackprof_signal_handler(int sig, siginfo_t *sinfo, void *ucontext)
616
738
  _stackprof.unrecorded_gc_sweeping_samples++;
617
739
  }
618
740
  _stackprof.unrecorded_gc_samples++;
619
- rb_postponed_job_register_one(0, stackprof_gc_job_handler, (void*)0);
741
+ rb_postponed_job_register_one(0, stackprof_job_record_gc, (void*)0);
620
742
  } else {
621
- rb_postponed_job_register_one(0, stackprof_job_handler, (void*)0);
743
+ #ifdef USE_POSTPONED_JOB
744
+ rb_postponed_job_register_one(0, stackprof_job_sample_and_record, (void*)0);
745
+ #else
746
+ // Buffer a sample immediately, if an existing sample exists this will
747
+ // return immediately
748
+ stackprof_buffer_sample();
749
+ // Enqueue a job to record the sample
750
+ rb_postponed_job_register_one(0, stackprof_job_record_buffer, (void*)0);
751
+ #endif
622
752
  }
753
+ pthread_mutex_unlock(&lock);
623
754
  }
624
755
 
625
756
  static void
@@ -628,7 +759,7 @@ stackprof_newobj_handler(VALUE tpval, void *data)
628
759
  _stackprof.overall_signals++;
629
760
  if (RTEST(_stackprof.interval) && _stackprof.overall_signals % NUM2LONG(_stackprof.interval))
630
761
  return;
631
- stackprof_job_handler(0);
762
+ stackprof_sample_and_record();
632
763
  }
633
764
 
634
765
  static VALUE
@@ -638,7 +769,7 @@ stackprof_sample(VALUE self)
638
769
  return Qfalse;
639
770
 
640
771
  _stackprof.overall_signals++;
641
- stackprof_job_handler(0);
772
+ stackprof_sample_and_record();
642
773
  return Qtrue;
643
774
  }
644
775
 
@@ -653,6 +784,9 @@ frame_mark_i(st_data_t key, st_data_t val, st_data_t arg)
653
784
  static void
654
785
  stackprof_gc_mark(void *data)
655
786
  {
787
+ if (RTEST(_stackprof.metadata))
788
+ rb_gc_mark(_stackprof.metadata);
789
+
656
790
  if (RTEST(_stackprof.out))
657
791
  rb_gc_mark(_stackprof.out);
658
792
 
@@ -714,9 +848,11 @@ Init_stackprof(void)
714
848
  S(mode);
715
849
  S(interval);
716
850
  S(raw);
851
+ S(raw_sample_timestamps);
717
852
  S(raw_timestamp_deltas);
718
853
  S(out);
719
854
  S(metadata);
855
+ S(ignore_gc);
720
856
  S(frames);
721
857
  S(aggregate);
722
858
  S(state);
@@ -735,9 +871,9 @@ Init_stackprof(void)
735
871
  _stackprof.raw_samples_capa = 0;
736
872
  _stackprof.raw_sample_index = 0;
737
873
 
738
- _stackprof.raw_timestamp_deltas = NULL;
739
- _stackprof.raw_timestamp_deltas_len = 0;
740
- _stackprof.raw_timestamp_deltas_capa = 0;
874
+ _stackprof.raw_sample_times = NULL;
875
+ _stackprof.raw_sample_times_len = 0;
876
+ _stackprof.raw_sample_times_capa = 0;
741
877
 
742
878
  _stackprof.empty_string = rb_str_new_cstr("");
743
879
  rb_global_variable(&_stackprof.empty_string);
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'pp'
2
4
  require 'digest/md5'
3
5
 
@@ -38,7 +40,7 @@ module StackProf
38
40
  end
39
41
 
40
42
  def max_samples
41
- @data[:max_samples] ||= frames.max_by{ |addr, frame| frame[:samples] }.last[:samples]
43
+ @data[:max_samples] ||= @data[:frames].values.max_by{ |frame| frame[:samples] }[:samples]
42
44
  end
43
45
 
44
46
  def files
@@ -96,15 +98,7 @@ module StackProf
96
98
  def print_flamegraph(f, skip_common, alphabetical=false)
97
99
  raise "profile does not include raw samples (add `raw: true` to collecting StackProf.run)" unless raw = data[:raw]
98
100
 
99
- stacks = []
100
- max_x = 0
101
- max_y = 0
102
- while len = raw.shift
103
- max_y = len if len > max_y
104
- stack = raw.slice!(0, len+1)
105
- stacks << stack
106
- max_x += stack.last
107
- end
101
+ stacks, max_x, max_y = flamegraph_stacks(raw)
108
102
 
109
103
  stacks.sort! if alphabetical
110
104
 
@@ -156,8 +150,26 @@ module StackProf
156
150
  f.puts '])'
157
151
  end
158
152
 
153
+ def flamegraph_stacks(raw)
154
+ stacks = []
155
+ max_x = 0
156
+ max_y = 0
157
+ idx = 0
158
+
159
+ while len = raw[idx]
160
+ idx += 1
161
+ max_y = len if len > max_y
162
+ stack = raw.slice(idx, len+1)
163
+ idx += len+1
164
+ stacks << stack
165
+ max_x += stack.last
166
+ end
167
+
168
+ return stacks, max_x, max_y
169
+ end
170
+
159
171
  def flamegraph_row(f, x, y, weight, addr)
160
- frame = frames[addr]
172
+ frame = @data[:frames][addr]
161
173
  f.print ',' if @rows_started
162
174
  @rows_started = true
163
175
  f.puts %{{"x":#{x},"y":#{y},"width":#{weight},"frame_id":#{addr},"frame":#{frame[:name].dump},"file":#{frame[:file].dump}}}
@@ -178,7 +190,7 @@ module StackProf
178
190
  weight += stack.last
179
191
  end
180
192
  else
181
- frame = frames[val]
193
+ frame = @data[:frames][val]
182
194
  child_name = "#{ frame[:name] } : #{ frame[:file] }"
183
195
  child_data = convert_to_d3_flame_graph_format(child_name, child_stacks, depth + 1)
184
196
  weight += child_data["value"]
@@ -196,15 +208,7 @@ module StackProf
196
208
  def print_d3_flamegraph(f=STDOUT, skip_common=true)
197
209
  raise "profile does not include raw samples (add `raw: true` to collecting StackProf.run)" unless raw = data[:raw]
198
210
 
199
- stacks = []
200
- max_x = 0
201
- max_y = 0
202
- while len = raw.shift
203
- max_y = len if len > max_y
204
- stack = raw.slice!(0, len+1)
205
- stacks << stack
206
- max_x += stack.last
207
- end
211
+ stacks, * = flamegraph_stacks(raw)
208
212
 
209
213
  # d3-flame-grpah supports only alphabetical flamegraph
210
214
  stacks.sort!
@@ -410,7 +414,7 @@ module StackProf
410
414
  call, total = info.values_at(:samples, :total_samples)
411
415
  break if total < node_minimum || (limit && index >= limit)
412
416
 
413
- sample = ''
417
+ sample = ''.dup
414
418
  sample << "#{call} (%2.1f%%)\\rof " % (call*100.0/overall_samples) if call < total
415
419
  sample << "#{total} (%2.1f%%)\\r" % (total*100.0/overall_samples)
416
420
  fontsize = (1.0 * call / max_samples) * 28 + 10
@@ -654,7 +658,8 @@ module StackProf
654
658
  end
655
659
  end
656
660
  end
661
+ rescue SystemCallError
662
+ f.puts " SOURCE UNAVAILABLE"
657
663
  end
658
-
659
664
  end
660
665
  end
data/lib/stackprof.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  require "stackprof/stackprof"
2
2
 
3
3
  module StackProf
4
- VERSION = '0.2.14'
4
+ VERSION = '0.2.18'
5
5
  end
6
6
 
7
7
  StackProf.autoload :Report, "stackprof/report.rb"
data/stackprof.gemspec CHANGED
@@ -1,11 +1,18 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'stackprof'
3
- s.version = '0.2.14'
3
+ s.version = '0.2.18'
4
4
  s.homepage = 'http://github.com/tmm1/stackprof'
5
5
 
6
6
  s.authors = 'Aman Gupta'
7
7
  s.email = 'aman@tmm1.net'
8
8
 
9
+ s.metadata = {
10
+ 'bug_tracker_uri' => 'https://github.com/tmm1/stackprof/issues',
11
+ 'changelog_uri' => "https://github.com/tmm1/stackprof/blob/v#{s.version}/CHANGELOG.md",
12
+ 'documentation_uri' => "https://www.rubydoc.info/gems/stackprof/#{s.version}",
13
+ 'source_code_uri' => "https://github.com/tmm1/stackprof/tree/v#{s.version}"
14
+ }
15
+
9
16
  s.files = `git ls-files`.split("\n")
10
17
  s.extensions = 'ext/stackprof/extconf.rb'
11
18