vernier 0.5.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -57,6 +57,7 @@ static VALUE rb_mVernier;
57
57
  static VALUE rb_cVernierResult;
58
58
  static VALUE rb_mVernierMarkerType;
59
59
  static VALUE rb_cVernierCollector;
60
+ static VALUE rb_cStackTable;
60
61
 
61
62
  static const char *gvl_event_name(rb_event_flag_t event) {
62
63
  switch (event) {
@@ -197,6 +198,7 @@ std::ostream& operator<<(std::ostream& os, const TimeStamp& info) {
197
198
  return os;
198
199
  }
199
200
 
201
+ // TODO: Rename FuncInfo
200
202
  struct FrameInfo {
201
203
  static const char *label_cstr(VALUE frame) {
202
204
  VALUE label = rb_profile_frame_full_label(frame);
@@ -239,10 +241,6 @@ bool operator==(const FrameInfo& lhs, const FrameInfo& rhs) noexcept {
239
241
  struct Frame {
240
242
  VALUE frame;
241
243
  int line;
242
-
243
- FrameInfo info() const {
244
- return FrameInfo(frame);
245
- }
246
244
  };
247
245
 
248
246
  bool operator==(const Frame& lhs, const Frame& rhs) noexcept {
@@ -266,7 +264,7 @@ namespace std {
266
264
 
267
265
  // A basic semaphore built on sem_wait/sem_post
268
266
  // post() is guaranteed to be async-signal-safe
269
- class SamplerSemaphore {
267
+ class SignalSafeSemaphore {
270
268
  #ifdef __APPLE__
271
269
  dispatch_semaphore_t sem;
272
270
  #else
@@ -275,7 +273,7 @@ class SamplerSemaphore {
275
273
 
276
274
  public:
277
275
 
278
- SamplerSemaphore(unsigned int value = 0) {
276
+ SignalSafeSemaphore(unsigned int value = 0) {
279
277
  #ifdef __APPLE__
280
278
  sem = dispatch_semaphore_create(value);
281
279
  #else
@@ -283,7 +281,7 @@ class SamplerSemaphore {
283
281
  #endif
284
282
  };
285
283
 
286
- ~SamplerSemaphore() {
284
+ ~SignalSafeSemaphore() {
287
285
  #ifdef __APPLE__
288
286
  dispatch_release(sem);
289
287
  #else
@@ -316,17 +314,25 @@ class SamplerSemaphore {
316
314
  }
317
315
  };
318
316
 
319
- struct RawSample {
317
+ class RawSample {
318
+ public:
319
+
320
320
  constexpr static int MAX_LEN = 2048;
321
+
322
+ private:
323
+
321
324
  VALUE frames[MAX_LEN];
322
325
  int lines[MAX_LEN];
323
326
  int len;
327
+ int offset;
324
328
  bool gc;
325
329
 
326
- RawSample() : len(0), gc(false) { }
330
+ public:
331
+
332
+ RawSample() : len(0), gc(false), offset(0) { }
327
333
 
328
334
  int size() const {
329
- return len;
335
+ return len - offset;
330
336
  }
331
337
 
332
338
  Frame frame(int i) const {
@@ -336,7 +342,7 @@ struct RawSample {
336
342
  return frame;
337
343
  }
338
344
 
339
- void sample() {
345
+ void sample(int offset = 0) {
340
346
  clear();
341
347
 
342
348
  if (!ruby_native_thread_p()) {
@@ -347,16 +353,18 @@ struct RawSample {
347
353
  gc = true;
348
354
  } else {
349
355
  len = rb_profile_frames(0, MAX_LEN, frames, lines);
356
+ this->offset = std::min(offset, len);
350
357
  }
351
358
  }
352
359
 
353
360
  void clear() {
354
361
  len = 0;
362
+ offset = 0;
355
363
  gc = false;
356
364
  }
357
365
 
358
366
  bool empty() const {
359
- return len == 0;
367
+ return len <= offset;
360
368
  }
361
369
  };
362
370
 
@@ -366,7 +374,7 @@ struct RawSample {
366
374
  struct LiveSample {
367
375
  RawSample sample;
368
376
 
369
- SamplerSemaphore sem_complete;
377
+ SignalSafeSemaphore sem_complete;
370
378
 
371
379
  // Wait for a sample to be collected by the signal handler on another thread
372
380
  void wait() {
@@ -393,41 +401,51 @@ struct LiveSample {
393
401
  }
394
402
  };
395
403
 
396
- struct FrameList {
397
- std::unordered_map<std::string, int> string_to_idx;
398
- std::vector<std::string> string_list;
404
+ template <typename K>
405
+ class IndexMap {
406
+ public:
407
+ std::unordered_map<K, int> to_idx;
408
+ std::vector<K> list;
399
409
 
400
- int string_index(const std::string str) {
401
- auto it = string_to_idx.find(str);
402
- if (it == string_to_idx.end()) {
403
- int idx = string_list.size();
404
- string_list.push_back(str);
410
+ const K& operator[](int i) const noexcept {
411
+ return list[i];
412
+ }
405
413
 
406
- auto result = string_to_idx.insert({str, idx});
407
- it = result.first;
414
+ size_t size() const noexcept {
415
+ return list.size();
408
416
  }
409
417
 
410
- return it->second;
411
- }
418
+ int index(const K key) {
419
+ auto it = to_idx.find(key);
420
+ if (it == to_idx.end()) {
421
+ int idx = list.size();
422
+ list.push_back(key);
423
+
424
+ auto result = to_idx.insert({key, idx});
425
+ it = result.first;
426
+ }
427
+
428
+ return it->second;
429
+ }
430
+
431
+ void clear() {
432
+ list.clear();
433
+ to_idx.clear();
434
+ }
435
+ };
436
+
437
+ struct StackTable {
438
+ private:
412
439
 
413
440
  struct FrameWithInfo {
414
441
  Frame frame;
415
442
  FrameInfo info;
416
443
  };
417
444
 
418
- std::unordered_map<Frame, int> frame_to_idx;
419
- std::vector<Frame> frame_list;
420
- std::vector<FrameWithInfo> frame_with_info_list;
421
- int frame_index(const Frame frame) {
422
- auto it = frame_to_idx.find(frame);
423
- if (it == frame_to_idx.end()) {
424
- int idx = frame_list.size();
425
- frame_list.push_back(frame);
426
- auto result = frame_to_idx.insert({frame, idx});
427
- it = result.first;
428
- }
429
- return it->second;
430
- }
445
+ IndexMap<Frame> frame_map;
446
+
447
+ IndexMap<VALUE> func_map;
448
+ std::vector<FrameInfo> func_info_list;
431
449
 
432
450
  struct StackNode {
433
451
  std::unordered_map<Frame, int> children;
@@ -441,21 +459,13 @@ struct FrameList {
441
459
  StackNode() : frame(Frame{0, 0}), index(-1), parent(-1) {}
442
460
  };
443
461
 
462
+ // This mutex guards the StackNodes only. The rest of the maps and vectors
463
+ // should be guarded by the GVL
464
+ std::mutex stack_mutex;
465
+
444
466
  StackNode root_stack_node;
445
467
  vector<StackNode> stack_node_list;
446
-
447
- int stack_index(const RawSample &stack) {
448
- if (stack.empty()) {
449
- throw std::runtime_error("VERNIER BUG: empty stack");
450
- }
451
-
452
- StackNode *node = &root_stack_node;
453
- for (int i = 0; i < stack.size(); i++) {
454
- Frame frame = stack.frame(i);
455
- node = next_stack_node(node, frame);
456
- }
457
- return node->index;
458
- }
468
+ int stack_node_list_finalized_idx = 0;
459
469
 
460
470
  StackNode *next_stack_node(StackNode *node, Frame frame) {
461
471
  auto search = node->children.find(frame);
@@ -475,83 +485,267 @@ struct FrameList {
475
485
  }
476
486
  }
477
487
 
488
+ public:
489
+
490
+ int stack_index(const RawSample &stack) {
491
+ if (stack.empty()) {
492
+ throw std::runtime_error("VERNIER BUG: empty stack");
493
+ }
494
+
495
+ const std::lock_guard<std::mutex> lock(stack_mutex);
496
+
497
+ StackNode *node = &root_stack_node;
498
+ for (int i = 0; i < stack.size(); i++) {
499
+ Frame frame = stack.frame(i);
500
+ node = next_stack_node(node, frame);
501
+ }
502
+ return node->index;
503
+ }
504
+
505
+ int stack_parent(int stack_idx) {
506
+ const std::lock_guard<std::mutex> lock(stack_mutex);
507
+ if (stack_idx < 0 || stack_idx >= stack_node_list.size()) {
508
+ return -1;
509
+ } else {
510
+ return stack_node_list[stack_idx].parent;
511
+ }
512
+ }
513
+
514
+ int stack_frame(int stack_idx) {
515
+ const std::lock_guard<std::mutex> lock(stack_mutex);
516
+ if (stack_idx < 0 || stack_idx >= stack_node_list.size()) {
517
+ return -1;
518
+ } else {
519
+ return frame_map.index(stack_node_list[stack_idx].frame);
520
+ }
521
+ }
522
+
478
523
  // Converts Frames from stacks other tables. "Symbolicates" the frames
479
524
  // which allocates.
480
525
  void finalize() {
481
- for (const auto &stack_node : stack_node_list) {
482
- frame_index(stack_node.frame);
526
+ {
527
+ const std::lock_guard<std::mutex> lock(stack_mutex);
528
+ for (int i = stack_node_list_finalized_idx; i < stack_node_list.size(); i++) {
529
+ const auto &stack_node = stack_node_list[i];
530
+ frame_map.index(stack_node.frame);
531
+ func_map.index(stack_node.frame.frame);
532
+ stack_node_list_finalized_idx = i;
533
+ }
483
534
  }
484
- for (const auto &frame : frame_list) {
485
- frame_with_info_list.push_back(FrameWithInfo{frame, frame.info()});
535
+
536
+ for (int i = func_info_list.size(); i < func_map.size(); i++) {
537
+ const auto &func = func_map[i];
538
+ // must not hold a mutex here
539
+ func_info_list.push_back(FrameInfo(func));
486
540
  }
487
541
  }
488
542
 
489
543
  void mark_frames() {
544
+ const std::lock_guard<std::mutex> lock(stack_mutex);
545
+
490
546
  for (auto stack_node: stack_node_list) {
491
547
  rb_gc_mark(stack_node.frame.frame);
492
548
  }
493
549
  }
494
550
 
551
+ // FIXME: probably should remove
495
552
  void clear() {
496
- string_list.clear();
497
- frame_list.clear();
498
- stack_node_list.clear();
499
- frame_with_info_list.clear();
500
-
501
- string_to_idx.clear();
502
- frame_to_idx.clear();
503
- root_stack_node.children.clear();
504
- }
505
-
506
- void write_result(VALUE result) {
507
- FrameList &frame_list = *this;
508
-
509
- VALUE stack_table = rb_hash_new();
510
- rb_ivar_set(result, rb_intern("@stack_table"), stack_table);
511
- VALUE stack_table_parent = rb_ary_new();
512
- VALUE stack_table_frame = rb_ary_new();
513
- rb_hash_aset(stack_table, sym("parent"), stack_table_parent);
514
- rb_hash_aset(stack_table, sym("frame"), stack_table_frame);
515
- for (const auto &stack : frame_list.stack_node_list) {
516
- VALUE parent_val = stack.parent == -1 ? Qnil : INT2NUM(stack.parent);
517
- rb_ary_push(stack_table_parent, parent_val);
518
- rb_ary_push(stack_table_frame, INT2NUM(frame_list.frame_index(stack.frame)));
519
- }
520
-
521
- VALUE frame_table = rb_hash_new();
522
- rb_ivar_set(result, rb_intern("@frame_table"), frame_table);
523
- VALUE frame_table_func = rb_ary_new();
524
- VALUE frame_table_line = rb_ary_new();
525
- rb_hash_aset(frame_table, sym("func"), frame_table_func);
526
- rb_hash_aset(frame_table, sym("line"), frame_table_line);
527
- //for (const auto &frame : frame_list.frame_list) {
528
- for (int i = 0; i < frame_list.frame_with_info_list.size(); i++) {
529
- const auto &frame = frame_list.frame_with_info_list[i];
530
- rb_ary_push(frame_table_func, INT2NUM(i));
531
- rb_ary_push(frame_table_line, INT2NUM(frame.frame.line));
532
- }
533
-
534
- // TODO: dedup funcs before this step
535
- VALUE func_table = rb_hash_new();
536
- rb_ivar_set(result, rb_intern("@func_table"), func_table);
537
- VALUE func_table_name = rb_ary_new();
538
- VALUE func_table_filename = rb_ary_new();
539
- VALUE func_table_first_line = rb_ary_new();
540
- rb_hash_aset(func_table, sym("name"), func_table_name);
541
- rb_hash_aset(func_table, sym("filename"), func_table_filename);
542
- rb_hash_aset(func_table, sym("first_line"), func_table_first_line);
543
- for (const auto &frame : frame_list.frame_with_info_list) {
544
- const std::string label = frame.info.label;
545
- const std::string filename = frame.info.file;
546
- const int first_line = frame.info.first_lineno;
547
-
548
- rb_ary_push(func_table_name, rb_str_new(label.c_str(), label.length()));
549
- rb_ary_push(func_table_filename, rb_str_new(filename.c_str(), filename.length()));
550
- rb_ary_push(func_table_first_line, INT2NUM(first_line));
553
+ frame_map.clear();
554
+ func_map.clear();
555
+ func_info_list.clear();
556
+
557
+ {
558
+ const std::lock_guard<std::mutex> lock(stack_mutex);
559
+ stack_node_list.clear();
560
+ root_stack_node.children.clear();
551
561
  }
552
562
  }
563
+
564
+ static VALUE stack_table_stack_count(VALUE self);
565
+ static VALUE stack_table_frame_count(VALUE self);
566
+ static VALUE stack_table_func_count(VALUE self);
567
+
568
+ static VALUE stack_table_frame_line_no(VALUE self, VALUE idxval);
569
+ static VALUE stack_table_frame_func_idx(VALUE self, VALUE idxval);
570
+ static VALUE stack_table_func_name(VALUE self, VALUE idxval);
571
+ static VALUE stack_table_func_filename(VALUE self, VALUE idxval);
572
+ static VALUE stack_table_func_first_lineno(VALUE self, VALUE idxval);
573
+
574
+ friend class SampleTranslator;
575
+ };
576
+
577
+ static void
578
+ stack_table_mark(void *data) {
579
+ StackTable *stack_table = static_cast<StackTable *>(data);
580
+ stack_table->mark_frames();
581
+ }
582
+
583
+ static void
584
+ stack_table_free(void *data) {
585
+ StackTable *stack_table = static_cast<StackTable *>(data);
586
+ delete stack_table;
587
+ }
588
+
589
+ static const rb_data_type_t rb_stack_table_type = {
590
+ .wrap_struct_name = "vernier/stack_table",
591
+ .function = {
592
+ //.dmemsize = rb_collector_memsize,
593
+ .dmark = stack_table_mark,
594
+ .dfree = stack_table_free,
595
+ },
553
596
  };
554
597
 
598
+ static VALUE
599
+ stack_table_new(VALUE self) {
600
+ StackTable *stack_table = new StackTable();
601
+ VALUE obj = TypedData_Wrap_Struct(self, &rb_stack_table_type, stack_table);
602
+ return obj;
603
+ }
604
+
605
+ static StackTable *get_stack_table(VALUE obj) {
606
+ StackTable *stack_table;
607
+ TypedData_Get_Struct(obj, StackTable, &rb_stack_table_type, stack_table);
608
+ return stack_table;
609
+ }
610
+
611
+ static VALUE
612
+ stack_table_current_stack(int argc, VALUE *argv, VALUE self) {
613
+ int offset;
614
+ VALUE offset_v;
615
+
616
+ rb_scan_args(argc, argv, "01", &offset_v);
617
+ if (argc > 0) {
618
+ offset = NUM2INT(offset_v) + 1;
619
+ } else {
620
+ offset = 1;
621
+ }
622
+
623
+ StackTable *stack_table = get_stack_table(self);
624
+ RawSample stack;
625
+ stack.sample(offset);
626
+ int stack_index = stack_table->stack_index(stack);
627
+ return INT2NUM(stack_index);
628
+ }
629
+
630
+ static VALUE
631
+ stack_table_stack_parent_idx(VALUE self, VALUE idxval) {
632
+ StackTable *stack_table = get_stack_table(self);
633
+ int idx = NUM2INT(idxval);
634
+ int parent_idx = stack_table->stack_parent(idx);
635
+ if (parent_idx < 0) {
636
+ return Qnil;
637
+ } else {
638
+ return INT2NUM(parent_idx);
639
+ }
640
+ }
641
+
642
+ static VALUE
643
+ stack_table_stack_frame_idx(VALUE self, VALUE idxval) {
644
+ StackTable *stack_table = get_stack_table(self);
645
+ //stack_table->finalize();
646
+ int idx = NUM2INT(idxval);
647
+ int frame_idx = stack_table->stack_frame(idx);
648
+ return frame_idx < 0 ? Qnil : INT2NUM(frame_idx);
649
+ }
650
+
651
+ VALUE
652
+ StackTable::stack_table_stack_count(VALUE self) {
653
+ StackTable *stack_table = get_stack_table(self);
654
+ int count;
655
+ {
656
+ const std::lock_guard<std::mutex> lock(stack_table->stack_mutex);
657
+ count = stack_table->stack_node_list.size();
658
+ }
659
+ return INT2NUM(count);
660
+ }
661
+
662
+ VALUE
663
+ StackTable::stack_table_frame_count(VALUE self) {
664
+ StackTable *stack_table = get_stack_table(self);
665
+ stack_table->finalize();
666
+ int count = stack_table->frame_map.size();
667
+ return INT2NUM(count);
668
+ }
669
+
670
+ VALUE
671
+ StackTable::stack_table_func_count(VALUE self) {
672
+ StackTable *stack_table = get_stack_table(self);
673
+ stack_table->finalize();
674
+ int count = stack_table->func_map.size();
675
+ return INT2NUM(count);
676
+ }
677
+
678
+ VALUE
679
+ StackTable::stack_table_frame_line_no(VALUE self, VALUE idxval) {
680
+ StackTable *stack_table = get_stack_table(self);
681
+ stack_table->finalize();
682
+ int idx = NUM2INT(idxval);
683
+ if (idx < 0 || idx >= stack_table->frame_map.size()) {
684
+ return Qnil;
685
+ } else {
686
+ const auto &frame = stack_table->frame_map[idx];
687
+ return INT2NUM(frame.line);
688
+ }
689
+ }
690
+
691
+ VALUE
692
+ StackTable::stack_table_frame_func_idx(VALUE self, VALUE idxval) {
693
+ StackTable *stack_table = get_stack_table(self);
694
+ stack_table->finalize();
695
+ int idx = NUM2INT(idxval);
696
+ if (idx < 0 || idx >= stack_table->frame_map.size()) {
697
+ return Qnil;
698
+ } else {
699
+ const auto &frame = stack_table->frame_map[idx];
700
+ int func_idx = stack_table->func_map.index(frame.frame);
701
+ return INT2NUM(func_idx);
702
+ }
703
+ }
704
+
705
+ VALUE
706
+ StackTable::stack_table_func_name(VALUE self, VALUE idxval) {
707
+ StackTable *stack_table = get_stack_table(self);
708
+ stack_table->finalize();
709
+ int idx = NUM2INT(idxval);
710
+ auto &table = stack_table->func_info_list;
711
+ if (idx < 0 || idx >= table.size()) {
712
+ return Qnil;
713
+ } else {
714
+ const auto &func_info = table[idx];
715
+ const std::string &label = func_info.label;
716
+ return rb_interned_str(label.c_str(), label.length());
717
+ }
718
+ }
719
+
720
+ VALUE
721
+ StackTable::stack_table_func_filename(VALUE self, VALUE idxval) {
722
+ StackTable *stack_table = get_stack_table(self);
723
+ stack_table->finalize();
724
+ int idx = NUM2INT(idxval);
725
+ auto &table = stack_table->func_info_list;
726
+ if (idx < 0 || idx >= table.size()) {
727
+ return Qnil;
728
+ } else {
729
+ const auto &func_info = table[idx];
730
+ const std::string &filename = func_info.file;
731
+ return rb_interned_str(filename.c_str(), filename.length());
732
+ }
733
+ }
734
+
735
+ VALUE
736
+ StackTable::stack_table_func_first_lineno(VALUE self, VALUE idxval) {
737
+ StackTable *stack_table = get_stack_table(self);
738
+ stack_table->finalize();
739
+ int idx = NUM2INT(idxval);
740
+ auto &table = stack_table->func_info_list;
741
+ if (idx < 0 || idx >= table.size()) {
742
+ return Qnil;
743
+ } else {
744
+ const auto &func_info = table[idx];
745
+ return INT2NUM(func_info.first_lineno);
746
+ }
747
+ }
748
+
555
749
  class SampleTranslator {
556
750
  public:
557
751
  int last_stack_index;
@@ -563,7 +757,7 @@ class SampleTranslator {
563
757
  SampleTranslator() : len(0), last_stack_index(-1) {
564
758
  }
565
759
 
566
- int translate(FrameList &frame_list, const RawSample &sample) {
760
+ int translate(StackTable &frame_list, const RawSample &sample) {
567
761
  int i = 0;
568
762
  for (; i < len && i < sample.size(); i++) {
569
763
  if (frames[i] != sample.frame(i)) {
@@ -571,7 +765,8 @@ class SampleTranslator {
571
765
  }
572
766
  }
573
767
 
574
- FrameList::StackNode *node = i == 0 ? &frame_list.root_stack_node : &frame_list.stack_node_list[frame_indexes[i - 1]];
768
+ const std::lock_guard<std::mutex> lock(frame_list.stack_mutex);
769
+ StackTable::StackNode *node = i == 0 ? &frame_list.root_stack_node : &frame_list.stack_node_list[frame_indexes[i - 1]];
575
770
 
576
771
  for (; i < sample.size(); i++) {
577
772
  Frame frame = sample.frame(i);
@@ -693,12 +888,56 @@ enum Category{
693
888
  CATEGORY_IDLE
694
889
  };
695
890
 
891
+ class ObjectSampleList {
892
+ public:
893
+
894
+ std::vector<int> stacks;
895
+ std::vector<TimeStamp> timestamps;
896
+ std::vector<int> weights;
897
+
898
+ size_t size() {
899
+ return stacks.size();
900
+ }
901
+
902
+ bool empty() {
903
+ return size() == 0;
904
+ }
905
+
906
+ void record_sample(int stack_index, TimeStamp time, int weight) {
907
+ stacks.push_back(stack_index);
908
+ timestamps.push_back(time);
909
+ weights.push_back(1);
910
+ }
911
+
912
+ void write_result(VALUE result) const {
913
+ VALUE allocations = rb_hash_new();
914
+ rb_hash_aset(result, sym("allocations"), allocations);
915
+
916
+ VALUE samples = rb_ary_new();
917
+ rb_hash_aset(allocations, sym("samples"), samples);
918
+ for (auto& stack_index: this->stacks) {
919
+ rb_ary_push(samples, INT2NUM(stack_index));
920
+ }
921
+
922
+ VALUE weights = rb_ary_new();
923
+ rb_hash_aset(allocations, sym("weights"), weights);
924
+ for (auto& weight: this->weights) {
925
+ rb_ary_push(weights, INT2NUM(weight));
926
+ }
927
+
928
+ VALUE timestamps = rb_ary_new();
929
+ rb_hash_aset(allocations, sym("timestamps"), timestamps);
930
+ for (auto& timestamp: this->timestamps) {
931
+ rb_ary_push(timestamps, ULL2NUM(timestamp.nanoseconds()));
932
+ }
933
+ }
934
+ };
935
+
696
936
  class SampleList {
697
937
  public:
698
938
 
699
939
  std::vector<int> stacks;
700
940
  std::vector<TimeStamp> timestamps;
701
- std::vector<native_thread_id_t> threads;
702
941
  std::vector<Category> categories;
703
942
  std::vector<int> weights;
704
943
 
@@ -710,11 +949,10 @@ class SampleList {
710
949
  return size() == 0;
711
950
  }
712
951
 
713
- void record_sample(int stack_index, TimeStamp time, native_thread_id_t thread_id, Category category) {
952
+ void record_sample(int stack_index, TimeStamp time, Category category) {
714
953
  if (
715
954
  !empty() &&
716
955
  stacks.back() == stack_index &&
717
- threads.back() == thread_id &&
718
956
  categories.back() == category)
719
957
  {
720
958
  // We don't compare timestamps for de-duplication
@@ -722,7 +960,6 @@ class SampleList {
722
960
  } else {
723
961
  stacks.push_back(stack_index);
724
962
  timestamps.push_back(time);
725
- threads.push_back(thread_id);
726
963
  categories.push_back(category);
727
964
  weights.push_back(1);
728
965
  }
@@ -758,6 +995,7 @@ class SampleList {
758
995
  class Thread {
759
996
  public:
760
997
  SampleList samples;
998
+ ObjectSampleList allocation_samples;
761
999
 
762
1000
  enum State {
763
1001
  STARTED,
@@ -782,15 +1020,12 @@ class Thread {
782
1020
 
783
1021
  unique_ptr<MarkerTable> markers;
784
1022
 
785
- std::string name;
786
-
787
1023
  // FIXME: don't use pthread at start
788
1024
  Thread(State state, pthread_t pthread_id, VALUE ruby_thread) : pthread_id(pthread_id), ruby_thread(ruby_thread), state(state), stack_on_suspend_idx(-1) {
789
1025
  ruby_thread_id = rb_obj_id(ruby_thread);
790
1026
  //ruby_thread_id = ULL2NUM(ruby_thread);
791
1027
  native_tid = get_native_thread_id();
792
1028
  started_at = state_changed_at = TimeStamp::Now();
793
- name = "";
794
1029
  markers = std::make_unique<MarkerTable>();
795
1030
 
796
1031
  if (state == State::STARTED) {
@@ -798,6 +1033,14 @@ class Thread {
798
1033
  }
799
1034
  }
800
1035
 
1036
+ void record_newobj(VALUE obj, StackTable &frame_list) {
1037
+ RawSample sample;
1038
+ sample.sample();
1039
+
1040
+ int stack_idx = translator.translate(frame_list, sample);
1041
+ allocation_samples.record_sample(stack_idx, TimeStamp::Now(), 1);
1042
+ }
1043
+
801
1044
  void set_state(State new_state) {
802
1045
  if (state == Thread::State::STOPPED) {
803
1046
  return;
@@ -866,6 +1109,10 @@ class Thread {
866
1109
  state_changed_at = now;
867
1110
  }
868
1111
 
1112
+ bool is_main() {
1113
+ return rb_thread_main() == ruby_thread;
1114
+ }
1115
+
869
1116
  bool running() {
870
1117
  return state != State::STOPPED;
871
1118
  }
@@ -876,12 +1123,12 @@ class Thread {
876
1123
 
877
1124
  class ThreadTable {
878
1125
  public:
879
- FrameList &frame_list;
1126
+ StackTable &frame_list;
880
1127
 
881
1128
  std::vector<std::unique_ptr<Thread> > list;
882
1129
  std::mutex mutex;
883
1130
 
884
- ThreadTable(FrameList &frame_list) : frame_list(frame_list) {
1131
+ ThreadTable(StackTable &frame_list) : frame_list(frame_list) {
885
1132
  }
886
1133
 
887
1134
  void mark() {
@@ -962,15 +1209,17 @@ class BaseCollector {
962
1209
  protected:
963
1210
 
964
1211
  virtual void reset() {
965
- frame_list.clear();
966
1212
  }
967
1213
 
968
1214
  public:
969
1215
  bool running = false;
970
- FrameList frame_list;
1216
+ StackTable *stack_table;
1217
+ VALUE stack_table_value;
971
1218
 
972
1219
  TimeStamp started_at;
973
1220
 
1221
+ BaseCollector(VALUE stack_table_value) : stack_table_value(stack_table_value), stack_table(get_stack_table(stack_table_value)) {
1222
+ }
974
1223
  virtual ~BaseCollector() {}
975
1224
 
976
1225
  virtual bool start() {
@@ -1013,7 +1262,8 @@ class BaseCollector {
1013
1262
  };
1014
1263
 
1015
1264
  virtual void mark() {
1016
- frame_list.mark_frames();
1265
+ //frame_list.mark_frames();
1266
+ rb_gc_mark(stack_table_value);
1017
1267
  };
1018
1268
 
1019
1269
  virtual VALUE get_markers() {
@@ -1027,16 +1277,15 @@ class CustomCollector : public BaseCollector {
1027
1277
  void sample() {
1028
1278
  RawSample sample;
1029
1279
  sample.sample();
1030
- int stack_index = frame_list.stack_index(sample);
1280
+ int stack_index = stack_table->stack_index(sample);
1031
1281
 
1032
- native_thread_id_t thread_id = 0;
1033
- samples.record_sample(stack_index, TimeStamp::Now(), thread_id, CATEGORY_NORMAL);
1282
+ samples.record_sample(stack_index, TimeStamp::Now(), CATEGORY_NORMAL);
1034
1283
  }
1035
1284
 
1036
1285
  VALUE stop() {
1037
1286
  BaseCollector::stop();
1038
1287
 
1039
- frame_list.finalize();
1288
+ stack_table->finalize();
1040
1289
 
1041
1290
  VALUE result = build_collector_result();
1042
1291
 
@@ -1057,10 +1306,12 @@ class CustomCollector : public BaseCollector {
1057
1306
  rb_hash_aset(threads, ULL2NUM(0), thread_hash);
1058
1307
  rb_hash_aset(thread_hash, sym("tid"), ULL2NUM(0));
1059
1308
 
1060
- frame_list.write_result(result);
1061
-
1062
1309
  return result;
1063
1310
  }
1311
+
1312
+ public:
1313
+
1314
+ CustomCollector(VALUE stack_table) : BaseCollector(stack_table) { }
1064
1315
  };
1065
1316
 
1066
1317
  class RetainedCollector : public BaseCollector {
@@ -1080,7 +1331,7 @@ class RetainedCollector : public BaseCollector {
1080
1331
  // Ideally we'd allow empty samples to be represented
1081
1332
  return;
1082
1333
  }
1083
- int stack_index = frame_list.stack_index(sample);
1334
+ int stack_index = stack_table->stack_index(sample);
1084
1335
 
1085
1336
  object_list.push_back(obj);
1086
1337
  object_frames.emplace(obj, stack_index);
@@ -1110,6 +1361,8 @@ class RetainedCollector : public BaseCollector {
1110
1361
 
1111
1362
  public:
1112
1363
 
1364
+ RetainedCollector(VALUE stack_table) : BaseCollector(stack_table) { }
1365
+
1113
1366
  bool start() {
1114
1367
  if (!BaseCollector::start()) {
1115
1368
  return false;
@@ -1136,7 +1389,7 @@ class RetainedCollector : public BaseCollector {
1136
1389
  rb_tracepoint_disable(tp_newobj);
1137
1390
  tp_newobj = Qnil;
1138
1391
 
1139
- frame_list.finalize();
1392
+ stack_table->finalize();
1140
1393
 
1141
1394
  // We should have collected info for all our frames, so no need to continue
1142
1395
  // marking them
@@ -1158,7 +1411,7 @@ class RetainedCollector : public BaseCollector {
1158
1411
 
1159
1412
  VALUE build_collector_result() {
1160
1413
  RetainedCollector *collector = this;
1161
- FrameList &frame_list = collector->frame_list;
1414
+ StackTable &frame_list = *collector->stack_table;
1162
1415
 
1163
1416
  VALUE result = BaseCollector::build_collector_result();
1164
1417
 
@@ -1186,8 +1439,6 @@ class RetainedCollector : public BaseCollector {
1186
1439
  }
1187
1440
  }
1188
1441
 
1189
- frame_list.write_result(result);
1190
-
1191
1442
  return result;
1192
1443
  }
1193
1444
 
@@ -1196,7 +1447,8 @@ class RetainedCollector : public BaseCollector {
1196
1447
  // can be garbage collected.
1197
1448
  // When we stop collection we will stringify the remaining frames, and then
1198
1449
  // clear them from the set, allowing them to be removed from out output.
1199
- frame_list.mark_frames();
1450
+ stack_table->mark_frames();
1451
+ rb_gc_mark(stack_table_value);
1200
1452
 
1201
1453
  rb_gc_mark(tp_newobj);
1202
1454
  rb_gc_mark(tp_freeobj);
@@ -1278,23 +1530,53 @@ class TimeCollector : public BaseCollector {
1278
1530
  pthread_t sample_thread;
1279
1531
 
1280
1532
  atomic_bool running;
1281
- SamplerSemaphore thread_stopped;
1533
+ SignalSafeSemaphore thread_stopped;
1282
1534
 
1283
1535
  TimeStamp interval;
1536
+ unsigned int allocation_sample_rate;
1537
+ unsigned int allocation_sample_tick = 0;
1538
+
1539
+ VALUE tp_newobj = Qnil;
1540
+
1541
+ static void newobj_i(VALUE tpval, void *data) {
1542
+ TimeCollector *collector = static_cast<TimeCollector *>(data);
1543
+ rb_trace_arg_t *tparg = rb_tracearg_from_tracepoint(tpval);
1544
+ VALUE obj = rb_tracearg_object(tparg);
1545
+
1546
+ collector->record_newobj(obj);
1547
+ }
1284
1548
 
1285
1549
  public:
1286
- TimeCollector(TimeStamp interval) : interval(interval), threads(frame_list) {
1550
+ TimeCollector(VALUE stack_table, TimeStamp interval, unsigned int allocation_sample_rate) : BaseCollector(stack_table), interval(interval), allocation_sample_rate(allocation_sample_rate), threads(*get_stack_table(stack_table)) {
1551
+ }
1552
+
1553
+ void record_newobj(VALUE obj) {
1554
+ if (++allocation_sample_tick < allocation_sample_rate) {
1555
+ return;
1556
+ }
1557
+ allocation_sample_tick = 0;
1558
+
1559
+ VALUE current_thread = rb_thread_current();
1560
+ threads.mutex.lock();
1561
+ for (auto &threadptr : threads.list) {
1562
+ auto &thread = *threadptr;
1563
+ if (current_thread == thread.ruby_thread) {
1564
+ thread.record_newobj(obj, threads.frame_list);
1565
+ break;
1566
+ }
1567
+ }
1568
+ threads.mutex.unlock();
1569
+
1287
1570
  }
1288
1571
 
1289
1572
  private:
1290
1573
 
1291
1574
  void record_sample(const RawSample &sample, TimeStamp time, Thread &thread, Category category) {
1292
1575
  if (!sample.empty()) {
1293
- int stack_index = thread.translator.translate(frame_list, sample);
1576
+ int stack_index = thread.translator.translate(*stack_table, sample);
1294
1577
  thread.samples.record_sample(
1295
1578
  stack_index,
1296
1579
  time,
1297
- thread.native_tid,
1298
1580
  category
1299
1581
  );
1300
1582
  }
@@ -1344,7 +1626,7 @@ class TimeCollector : public BaseCollector {
1344
1626
  // that by the GVL instrumentation, but let's try to get
1345
1627
  // it to a consistent state and stop profiling it.
1346
1628
  thread.set_state(Thread::State::STOPPED);
1347
- } else if (sample.sample.gc) {
1629
+ } else if (sample.sample.empty()) {
1348
1630
  // fprintf(stderr, "skipping GC sample\n");
1349
1631
  } else {
1350
1632
  record_sample(sample.sample, sample_start, thread, CATEGORY_NORMAL);
@@ -1353,7 +1635,6 @@ class TimeCollector : public BaseCollector {
1353
1635
  thread.samples.record_sample(
1354
1636
  thread.stack_on_suspend_idx,
1355
1637
  sample_start,
1356
- thread.native_tid,
1357
1638
  CATEGORY_IDLE);
1358
1639
  } else {
1359
1640
  }
@@ -1470,6 +1751,11 @@ class TimeCollector : public BaseCollector {
1470
1751
  return false;
1471
1752
  }
1472
1753
 
1754
+ if (allocation_sample_rate > 0) {
1755
+ tp_newobj = rb_tracepoint_new(0, RUBY_INTERNAL_EVENT_NEWOBJ, newobj_i, this);
1756
+ rb_tracepoint_enable(tp_newobj);
1757
+ }
1758
+
1473
1759
  GlobalSignalHandler::get_instance()->install();
1474
1760
 
1475
1761
  running = true;
@@ -1502,11 +1788,16 @@ class TimeCollector : public BaseCollector {
1502
1788
 
1503
1789
  GlobalSignalHandler::get_instance()->uninstall();
1504
1790
 
1791
+ if (RTEST(tp_newobj)) {
1792
+ rb_tracepoint_disable(tp_newobj);
1793
+ tp_newobj = Qnil;
1794
+ }
1795
+
1505
1796
  rb_internal_thread_remove_event_hook(thread_hook);
1506
1797
  rb_remove_event_hook(internal_gc_event_cb);
1507
1798
  rb_remove_event_hook(internal_thread_event_cb);
1508
1799
 
1509
- frame_list.finalize();
1800
+ stack_table->finalize();
1510
1801
 
1511
1802
  VALUE result = build_collector_result();
1512
1803
 
@@ -1524,6 +1815,7 @@ class TimeCollector : public BaseCollector {
1524
1815
  for (const auto& thread: this->threads.list) {
1525
1816
  VALUE hash = rb_hash_new();
1526
1817
  thread->samples.write_result(hash);
1818
+ thread->allocation_samples.write_result(hash);
1527
1819
 
1528
1820
  rb_hash_aset(threads, thread->ruby_thread_id, hash);
1529
1821
  rb_hash_aset(hash, sym("tid"), ULL2NUM(thread->native_tid));
@@ -1531,17 +1823,16 @@ class TimeCollector : public BaseCollector {
1531
1823
  if (!thread->stopped_at.zero()) {
1532
1824
  rb_hash_aset(hash, sym("stopped_at"), ULL2NUM(thread->stopped_at.nanoseconds()));
1533
1825
  }
1534
- rb_hash_aset(hash, sym("name"), rb_str_new(thread->name.data(), thread->name.length()));
1826
+ rb_hash_aset(hash, sym("is_main"), thread->is_main() ? Qtrue : Qfalse);
1535
1827
 
1536
1828
  }
1537
1829
 
1538
- frame_list.write_result(result);
1539
-
1540
1830
  return result;
1541
1831
  }
1542
1832
 
1543
1833
  void mark() {
1544
- frame_list.mark_frames();
1834
+ stack_table->mark_frames();
1835
+ rb_gc_mark(stack_table_value);
1545
1836
  threads.mark();
1546
1837
 
1547
1838
  //for (int i = 0; i < queued_length; i++) {
@@ -1613,12 +1904,21 @@ collector_sample(VALUE self) {
1613
1904
  return Qtrue;
1614
1905
  }
1615
1906
 
1907
+ static VALUE
1908
+ collector_stack_table(VALUE self) {
1909
+ auto *collector = get_collector(self);
1910
+
1911
+ return collector->stack_table_value;
1912
+ }
1913
+
1616
1914
  static VALUE collector_new(VALUE self, VALUE mode, VALUE options) {
1617
1915
  BaseCollector *collector;
1916
+
1917
+ VALUE stack_table = stack_table_new(rb_cStackTable);
1618
1918
  if (mode == sym("retained")) {
1619
- collector = new RetainedCollector();
1919
+ collector = new RetainedCollector(stack_table);
1620
1920
  } else if (mode == sym("custom")) {
1621
- collector = new CustomCollector();
1921
+ collector = new CustomCollector(stack_table);
1622
1922
  } else if (mode == sym("wall")) {
1623
1923
  VALUE intervalv = rb_hash_aref(options, sym("interval"));
1624
1924
  TimeStamp interval;
@@ -1627,12 +1927,20 @@ static VALUE collector_new(VALUE self, VALUE mode, VALUE options) {
1627
1927
  } else {
1628
1928
  interval = TimeStamp::from_microseconds(NUM2UINT(intervalv));
1629
1929
  }
1630
- collector = new TimeCollector(interval);
1930
+
1931
+ VALUE allocation_sample_ratev = rb_hash_aref(options, sym("allocation_sample_rate"));
1932
+ unsigned int allocation_sample_rate;
1933
+ if (NIL_P(allocation_sample_ratev)) {
1934
+ allocation_sample_rate = 0;
1935
+ } else {
1936
+ allocation_sample_rate = NUM2UINT(allocation_sample_ratev);
1937
+ }
1938
+ collector = new TimeCollector(stack_table, interval, allocation_sample_rate);
1631
1939
  } else {
1632
1940
  rb_raise(rb_eArgError, "invalid mode");
1633
1941
  }
1634
1942
  VALUE obj = TypedData_Wrap_Struct(self, &rb_collector_type, collector);
1635
- rb_funcall(obj, rb_intern("initialize"), 1, mode);
1943
+ rb_funcall(obj, rb_intern("initialize"), 2, mode, options);
1636
1944
  return obj;
1637
1945
  }
1638
1946
 
@@ -1681,9 +1989,25 @@ Init_vernier(void)
1681
1989
  rb_define_singleton_method(rb_cVernierCollector, "_new", collector_new, 2);
1682
1990
  rb_define_method(rb_cVernierCollector, "start", collector_start, 0);
1683
1991
  rb_define_method(rb_cVernierCollector, "sample", collector_sample, 0);
1992
+ rb_define_method(rb_cVernierCollector, "stack_table", collector_stack_table, 0);
1684
1993
  rb_define_private_method(rb_cVernierCollector, "finish", collector_stop, 0);
1685
1994
  rb_define_private_method(rb_cVernierCollector, "markers", markers, 0);
1686
1995
 
1996
+ rb_cStackTable = rb_define_class_under(rb_mVernier, "StackTable", rb_cObject);
1997
+ rb_undef_alloc_func(rb_cStackTable);
1998
+ rb_define_singleton_method(rb_cStackTable, "new", stack_table_new, 0);
1999
+ rb_define_method(rb_cStackTable, "current_stack", stack_table_current_stack, -1);
2000
+ rb_define_method(rb_cStackTable, "stack_parent_idx", stack_table_stack_parent_idx, 1);
2001
+ rb_define_method(rb_cStackTable, "stack_frame_idx", stack_table_stack_frame_idx, 1);
2002
+ rb_define_method(rb_cStackTable, "frame_line_no", StackTable::stack_table_frame_line_no, 1);
2003
+ rb_define_method(rb_cStackTable, "frame_func_idx", StackTable::stack_table_frame_func_idx, 1);
2004
+ rb_define_method(rb_cStackTable, "func_name", StackTable::stack_table_func_name, 1);
2005
+ rb_define_method(rb_cStackTable, "func_filename", StackTable::stack_table_func_filename, 1);
2006
+ rb_define_method(rb_cStackTable, "func_first_lineno", StackTable::stack_table_func_first_lineno, 1);
2007
+ rb_define_method(rb_cStackTable, "stack_count", StackTable::stack_table_stack_count, 0);
2008
+ rb_define_method(rb_cStackTable, "frame_count", StackTable::stack_table_frame_count, 0);
2009
+ rb_define_method(rb_cStackTable, "func_count", StackTable::stack_table_func_count, 0);
2010
+
1687
2011
  Init_consts(rb_mVernierMarkerPhase);
1688
2012
 
1689
2013
  //static VALUE gc_hook = Data_Wrap_Struct(rb_cObject, collector_mark, NULL, &_collector);