vernier 0.5.1 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -57,6 +57,7 @@ static VALUE rb_mVernier;
57
57
  static VALUE rb_cVernierResult;
58
58
  static VALUE rb_mVernierMarkerType;
59
59
  static VALUE rb_cVernierCollector;
60
+ static VALUE rb_cStackTable;
60
61
 
61
62
  static const char *gvl_event_name(rb_event_flag_t event) {
62
63
  switch (event) {
@@ -197,6 +198,7 @@ std::ostream& operator<<(std::ostream& os, const TimeStamp& info) {
197
198
  return os;
198
199
  }
199
200
 
201
+ // TODO: Rename FuncInfo
200
202
  struct FrameInfo {
201
203
  static const char *label_cstr(VALUE frame) {
202
204
  VALUE label = rb_profile_frame_full_label(frame);
@@ -239,10 +241,6 @@ bool operator==(const FrameInfo& lhs, const FrameInfo& rhs) noexcept {
239
241
  struct Frame {
240
242
  VALUE frame;
241
243
  int line;
242
-
243
- FrameInfo info() const {
244
- return FrameInfo(frame);
245
- }
246
244
  };
247
245
 
248
246
  bool operator==(const Frame& lhs, const Frame& rhs) noexcept {
@@ -266,7 +264,7 @@ namespace std {
266
264
 
267
265
  // A basic semaphore built on sem_wait/sem_post
268
266
  // post() is guaranteed to be async-signal-safe
269
- class SamplerSemaphore {
267
+ class SignalSafeSemaphore {
270
268
  #ifdef __APPLE__
271
269
  dispatch_semaphore_t sem;
272
270
  #else
@@ -275,7 +273,7 @@ class SamplerSemaphore {
275
273
 
276
274
  public:
277
275
 
278
- SamplerSemaphore(unsigned int value = 0) {
276
+ SignalSafeSemaphore(unsigned int value = 0) {
279
277
  #ifdef __APPLE__
280
278
  sem = dispatch_semaphore_create(value);
281
279
  #else
@@ -283,7 +281,7 @@ class SamplerSemaphore {
283
281
  #endif
284
282
  };
285
283
 
286
- ~SamplerSemaphore() {
284
+ ~SignalSafeSemaphore() {
287
285
  #ifdef __APPLE__
288
286
  dispatch_release(sem);
289
287
  #else
@@ -316,17 +314,25 @@ class SamplerSemaphore {
316
314
  }
317
315
  };
318
316
 
319
- struct RawSample {
317
+ class RawSample {
318
+ public:
319
+
320
320
  constexpr static int MAX_LEN = 2048;
321
+
322
+ private:
323
+
321
324
  VALUE frames[MAX_LEN];
322
325
  int lines[MAX_LEN];
323
326
  int len;
327
+ int offset;
324
328
  bool gc;
325
329
 
326
- RawSample() : len(0), gc(false) { }
330
+ public:
331
+
332
+ RawSample() : len(0), gc(false), offset(0) { }
327
333
 
328
334
  int size() const {
329
- return len;
335
+ return len - offset;
330
336
  }
331
337
 
332
338
  Frame frame(int i) const {
@@ -336,7 +342,7 @@ struct RawSample {
336
342
  return frame;
337
343
  }
338
344
 
339
- void sample() {
345
+ void sample(int offset = 0) {
340
346
  clear();
341
347
 
342
348
  if (!ruby_native_thread_p()) {
@@ -347,16 +353,18 @@ struct RawSample {
347
353
  gc = true;
348
354
  } else {
349
355
  len = rb_profile_frames(0, MAX_LEN, frames, lines);
356
+ this->offset = std::min(offset, len);
350
357
  }
351
358
  }
352
359
 
353
360
  void clear() {
354
361
  len = 0;
362
+ offset = 0;
355
363
  gc = false;
356
364
  }
357
365
 
358
366
  bool empty() const {
359
- return len == 0;
367
+ return len <= offset;
360
368
  }
361
369
  };
362
370
 
@@ -366,7 +374,7 @@ struct RawSample {
366
374
  struct LiveSample {
367
375
  RawSample sample;
368
376
 
369
- SamplerSemaphore sem_complete;
377
+ SignalSafeSemaphore sem_complete;
370
378
 
371
379
  // Wait for a sample to be collected by the signal handler on another thread
372
380
  void wait() {
@@ -393,41 +401,51 @@ struct LiveSample {
393
401
  }
394
402
  };
395
403
 
396
- struct FrameList {
397
- std::unordered_map<std::string, int> string_to_idx;
398
- std::vector<std::string> string_list;
404
+ template <typename K>
405
+ class IndexMap {
406
+ public:
407
+ std::unordered_map<K, int> to_idx;
408
+ std::vector<K> list;
399
409
 
400
- int string_index(const std::string str) {
401
- auto it = string_to_idx.find(str);
402
- if (it == string_to_idx.end()) {
403
- int idx = string_list.size();
404
- string_list.push_back(str);
410
+ const K& operator[](int i) const noexcept {
411
+ return list[i];
412
+ }
405
413
 
406
- auto result = string_to_idx.insert({str, idx});
407
- it = result.first;
414
+ size_t size() const noexcept {
415
+ return list.size();
408
416
  }
409
417
 
410
- return it->second;
411
- }
418
+ int index(const K key) {
419
+ auto it = to_idx.find(key);
420
+ if (it == to_idx.end()) {
421
+ int idx = list.size();
422
+ list.push_back(key);
423
+
424
+ auto result = to_idx.insert({key, idx});
425
+ it = result.first;
426
+ }
427
+
428
+ return it->second;
429
+ }
430
+
431
+ void clear() {
432
+ list.clear();
433
+ to_idx.clear();
434
+ }
435
+ };
436
+
437
+ struct StackTable {
438
+ private:
412
439
 
413
440
  struct FrameWithInfo {
414
441
  Frame frame;
415
442
  FrameInfo info;
416
443
  };
417
444
 
418
- std::unordered_map<Frame, int> frame_to_idx;
419
- std::vector<Frame> frame_list;
420
- std::vector<FrameWithInfo> frame_with_info_list;
421
- int frame_index(const Frame frame) {
422
- auto it = frame_to_idx.find(frame);
423
- if (it == frame_to_idx.end()) {
424
- int idx = frame_list.size();
425
- frame_list.push_back(frame);
426
- auto result = frame_to_idx.insert({frame, idx});
427
- it = result.first;
428
- }
429
- return it->second;
430
- }
445
+ IndexMap<Frame> frame_map;
446
+
447
+ IndexMap<VALUE> func_map;
448
+ std::vector<FrameInfo> func_info_list;
431
449
 
432
450
  struct StackNode {
433
451
  std::unordered_map<Frame, int> children;
@@ -441,21 +459,13 @@ struct FrameList {
441
459
  StackNode() : frame(Frame{0, 0}), index(-1), parent(-1) {}
442
460
  };
443
461
 
462
+ // This mutex guards the StackNodes only. The rest of the maps and vectors
463
+ // should be guarded by the GVL
464
+ std::mutex stack_mutex;
465
+
444
466
  StackNode root_stack_node;
445
467
  vector<StackNode> stack_node_list;
446
-
447
- int stack_index(const RawSample &stack) {
448
- if (stack.empty()) {
449
- throw std::runtime_error("VERNIER BUG: empty stack");
450
- }
451
-
452
- StackNode *node = &root_stack_node;
453
- for (int i = 0; i < stack.size(); i++) {
454
- Frame frame = stack.frame(i);
455
- node = next_stack_node(node, frame);
456
- }
457
- return node->index;
458
- }
468
+ int stack_node_list_finalized_idx = 0;
459
469
 
460
470
  StackNode *next_stack_node(StackNode *node, Frame frame) {
461
471
  auto search = node->children.find(frame);
@@ -475,83 +485,267 @@ struct FrameList {
475
485
  }
476
486
  }
477
487
 
488
+ public:
489
+
490
+ int stack_index(const RawSample &stack) {
491
+ if (stack.empty()) {
492
+ throw std::runtime_error("VERNIER BUG: empty stack");
493
+ }
494
+
495
+ const std::lock_guard<std::mutex> lock(stack_mutex);
496
+
497
+ StackNode *node = &root_stack_node;
498
+ for (int i = 0; i < stack.size(); i++) {
499
+ Frame frame = stack.frame(i);
500
+ node = next_stack_node(node, frame);
501
+ }
502
+ return node->index;
503
+ }
504
+
505
+ int stack_parent(int stack_idx) {
506
+ const std::lock_guard<std::mutex> lock(stack_mutex);
507
+ if (stack_idx < 0 || stack_idx >= stack_node_list.size()) {
508
+ return -1;
509
+ } else {
510
+ return stack_node_list[stack_idx].parent;
511
+ }
512
+ }
513
+
514
+ int stack_frame(int stack_idx) {
515
+ const std::lock_guard<std::mutex> lock(stack_mutex);
516
+ if (stack_idx < 0 || stack_idx >= stack_node_list.size()) {
517
+ return -1;
518
+ } else {
519
+ return frame_map.index(stack_node_list[stack_idx].frame);
520
+ }
521
+ }
522
+
478
523
  // Converts Frames from stacks other tables. "Symbolicates" the frames
479
524
  // which allocates.
480
525
  void finalize() {
481
- for (const auto &stack_node : stack_node_list) {
482
- frame_index(stack_node.frame);
526
+ {
527
+ const std::lock_guard<std::mutex> lock(stack_mutex);
528
+ for (int i = stack_node_list_finalized_idx; i < stack_node_list.size(); i++) {
529
+ const auto &stack_node = stack_node_list[i];
530
+ frame_map.index(stack_node.frame);
531
+ func_map.index(stack_node.frame.frame);
532
+ stack_node_list_finalized_idx = i;
533
+ }
483
534
  }
484
- for (const auto &frame : frame_list) {
485
- frame_with_info_list.push_back(FrameWithInfo{frame, frame.info()});
535
+
536
+ for (int i = func_info_list.size(); i < func_map.size(); i++) {
537
+ const auto &func = func_map[i];
538
+ // must not hold a mutex here
539
+ func_info_list.push_back(FrameInfo(func));
486
540
  }
487
541
  }
488
542
 
489
543
  void mark_frames() {
544
+ const std::lock_guard<std::mutex> lock(stack_mutex);
545
+
490
546
  for (auto stack_node: stack_node_list) {
491
547
  rb_gc_mark(stack_node.frame.frame);
492
548
  }
493
549
  }
494
550
 
551
+ // FIXME: probably should remove
495
552
  void clear() {
496
- string_list.clear();
497
- frame_list.clear();
498
- stack_node_list.clear();
499
- frame_with_info_list.clear();
500
-
501
- string_to_idx.clear();
502
- frame_to_idx.clear();
503
- root_stack_node.children.clear();
504
- }
505
-
506
- void write_result(VALUE result) {
507
- FrameList &frame_list = *this;
508
-
509
- VALUE stack_table = rb_hash_new();
510
- rb_ivar_set(result, rb_intern("@stack_table"), stack_table);
511
- VALUE stack_table_parent = rb_ary_new();
512
- VALUE stack_table_frame = rb_ary_new();
513
- rb_hash_aset(stack_table, sym("parent"), stack_table_parent);
514
- rb_hash_aset(stack_table, sym("frame"), stack_table_frame);
515
- for (const auto &stack : frame_list.stack_node_list) {
516
- VALUE parent_val = stack.parent == -1 ? Qnil : INT2NUM(stack.parent);
517
- rb_ary_push(stack_table_parent, parent_val);
518
- rb_ary_push(stack_table_frame, INT2NUM(frame_list.frame_index(stack.frame)));
519
- }
520
-
521
- VALUE frame_table = rb_hash_new();
522
- rb_ivar_set(result, rb_intern("@frame_table"), frame_table);
523
- VALUE frame_table_func = rb_ary_new();
524
- VALUE frame_table_line = rb_ary_new();
525
- rb_hash_aset(frame_table, sym("func"), frame_table_func);
526
- rb_hash_aset(frame_table, sym("line"), frame_table_line);
527
- //for (const auto &frame : frame_list.frame_list) {
528
- for (int i = 0; i < frame_list.frame_with_info_list.size(); i++) {
529
- const auto &frame = frame_list.frame_with_info_list[i];
530
- rb_ary_push(frame_table_func, INT2NUM(i));
531
- rb_ary_push(frame_table_line, INT2NUM(frame.frame.line));
532
- }
533
-
534
- // TODO: dedup funcs before this step
535
- VALUE func_table = rb_hash_new();
536
- rb_ivar_set(result, rb_intern("@func_table"), func_table);
537
- VALUE func_table_name = rb_ary_new();
538
- VALUE func_table_filename = rb_ary_new();
539
- VALUE func_table_first_line = rb_ary_new();
540
- rb_hash_aset(func_table, sym("name"), func_table_name);
541
- rb_hash_aset(func_table, sym("filename"), func_table_filename);
542
- rb_hash_aset(func_table, sym("first_line"), func_table_first_line);
543
- for (const auto &frame : frame_list.frame_with_info_list) {
544
- const std::string label = frame.info.label;
545
- const std::string filename = frame.info.file;
546
- const int first_line = frame.info.first_lineno;
547
-
548
- rb_ary_push(func_table_name, rb_str_new(label.c_str(), label.length()));
549
- rb_ary_push(func_table_filename, rb_str_new(filename.c_str(), filename.length()));
550
- rb_ary_push(func_table_first_line, INT2NUM(first_line));
553
+ frame_map.clear();
554
+ func_map.clear();
555
+ func_info_list.clear();
556
+
557
+ {
558
+ const std::lock_guard<std::mutex> lock(stack_mutex);
559
+ stack_node_list.clear();
560
+ root_stack_node.children.clear();
551
561
  }
552
562
  }
563
+
564
+ static VALUE stack_table_stack_count(VALUE self);
565
+ static VALUE stack_table_frame_count(VALUE self);
566
+ static VALUE stack_table_func_count(VALUE self);
567
+
568
+ static VALUE stack_table_frame_line_no(VALUE self, VALUE idxval);
569
+ static VALUE stack_table_frame_func_idx(VALUE self, VALUE idxval);
570
+ static VALUE stack_table_func_name(VALUE self, VALUE idxval);
571
+ static VALUE stack_table_func_filename(VALUE self, VALUE idxval);
572
+ static VALUE stack_table_func_first_lineno(VALUE self, VALUE idxval);
573
+
574
+ friend class SampleTranslator;
575
+ };
576
+
577
+ static void
578
+ stack_table_mark(void *data) {
579
+ StackTable *stack_table = static_cast<StackTable *>(data);
580
+ stack_table->mark_frames();
581
+ }
582
+
583
+ static void
584
+ stack_table_free(void *data) {
585
+ StackTable *stack_table = static_cast<StackTable *>(data);
586
+ delete stack_table;
587
+ }
588
+
589
+ static const rb_data_type_t rb_stack_table_type = {
590
+ .wrap_struct_name = "vernier/stack_table",
591
+ .function = {
592
+ //.dmemsize = rb_collector_memsize,
593
+ .dmark = stack_table_mark,
594
+ .dfree = stack_table_free,
595
+ },
553
596
  };
554
597
 
598
+ static VALUE
599
+ stack_table_new(VALUE self) {
600
+ StackTable *stack_table = new StackTable();
601
+ VALUE obj = TypedData_Wrap_Struct(self, &rb_stack_table_type, stack_table);
602
+ return obj;
603
+ }
604
+
605
+ static StackTable *get_stack_table(VALUE obj) {
606
+ StackTable *stack_table;
607
+ TypedData_Get_Struct(obj, StackTable, &rb_stack_table_type, stack_table);
608
+ return stack_table;
609
+ }
610
+
611
+ static VALUE
612
+ stack_table_current_stack(int argc, VALUE *argv, VALUE self) {
613
+ int offset;
614
+ VALUE offset_v;
615
+
616
+ rb_scan_args(argc, argv, "01", &offset_v);
617
+ if (argc > 0) {
618
+ offset = NUM2INT(offset_v) + 1;
619
+ } else {
620
+ offset = 1;
621
+ }
622
+
623
+ StackTable *stack_table = get_stack_table(self);
624
+ RawSample stack;
625
+ stack.sample(offset);
626
+ int stack_index = stack_table->stack_index(stack);
627
+ return INT2NUM(stack_index);
628
+ }
629
+
630
+ static VALUE
631
+ stack_table_stack_parent_idx(VALUE self, VALUE idxval) {
632
+ StackTable *stack_table = get_stack_table(self);
633
+ int idx = NUM2INT(idxval);
634
+ int parent_idx = stack_table->stack_parent(idx);
635
+ if (parent_idx < 0) {
636
+ return Qnil;
637
+ } else {
638
+ return INT2NUM(parent_idx);
639
+ }
640
+ }
641
+
642
+ static VALUE
643
+ stack_table_stack_frame_idx(VALUE self, VALUE idxval) {
644
+ StackTable *stack_table = get_stack_table(self);
645
+ //stack_table->finalize();
646
+ int idx = NUM2INT(idxval);
647
+ int frame_idx = stack_table->stack_frame(idx);
648
+ return frame_idx < 0 ? Qnil : INT2NUM(frame_idx);
649
+ }
650
+
651
+ VALUE
652
+ StackTable::stack_table_stack_count(VALUE self) {
653
+ StackTable *stack_table = get_stack_table(self);
654
+ int count;
655
+ {
656
+ const std::lock_guard<std::mutex> lock(stack_table->stack_mutex);
657
+ count = stack_table->stack_node_list.size();
658
+ }
659
+ return INT2NUM(count);
660
+ }
661
+
662
+ VALUE
663
+ StackTable::stack_table_frame_count(VALUE self) {
664
+ StackTable *stack_table = get_stack_table(self);
665
+ stack_table->finalize();
666
+ int count = stack_table->frame_map.size();
667
+ return INT2NUM(count);
668
+ }
669
+
670
+ VALUE
671
+ StackTable::stack_table_func_count(VALUE self) {
672
+ StackTable *stack_table = get_stack_table(self);
673
+ stack_table->finalize();
674
+ int count = stack_table->func_map.size();
675
+ return INT2NUM(count);
676
+ }
677
+
678
+ VALUE
679
+ StackTable::stack_table_frame_line_no(VALUE self, VALUE idxval) {
680
+ StackTable *stack_table = get_stack_table(self);
681
+ stack_table->finalize();
682
+ int idx = NUM2INT(idxval);
683
+ if (idx < 0 || idx >= stack_table->frame_map.size()) {
684
+ return Qnil;
685
+ } else {
686
+ const auto &frame = stack_table->frame_map[idx];
687
+ return INT2NUM(frame.line);
688
+ }
689
+ }
690
+
691
+ VALUE
692
+ StackTable::stack_table_frame_func_idx(VALUE self, VALUE idxval) {
693
+ StackTable *stack_table = get_stack_table(self);
694
+ stack_table->finalize();
695
+ int idx = NUM2INT(idxval);
696
+ if (idx < 0 || idx >= stack_table->frame_map.size()) {
697
+ return Qnil;
698
+ } else {
699
+ const auto &frame = stack_table->frame_map[idx];
700
+ int func_idx = stack_table->func_map.index(frame.frame);
701
+ return INT2NUM(func_idx);
702
+ }
703
+ }
704
+
705
+ VALUE
706
+ StackTable::stack_table_func_name(VALUE self, VALUE idxval) {
707
+ StackTable *stack_table = get_stack_table(self);
708
+ stack_table->finalize();
709
+ int idx = NUM2INT(idxval);
710
+ auto &table = stack_table->func_info_list;
711
+ if (idx < 0 || idx >= table.size()) {
712
+ return Qnil;
713
+ } else {
714
+ const auto &func_info = table[idx];
715
+ const std::string &label = func_info.label;
716
+ return rb_interned_str(label.c_str(), label.length());
717
+ }
718
+ }
719
+
720
+ VALUE
721
+ StackTable::stack_table_func_filename(VALUE self, VALUE idxval) {
722
+ StackTable *stack_table = get_stack_table(self);
723
+ stack_table->finalize();
724
+ int idx = NUM2INT(idxval);
725
+ auto &table = stack_table->func_info_list;
726
+ if (idx < 0 || idx >= table.size()) {
727
+ return Qnil;
728
+ } else {
729
+ const auto &func_info = table[idx];
730
+ const std::string &filename = func_info.file;
731
+ return rb_interned_str(filename.c_str(), filename.length());
732
+ }
733
+ }
734
+
735
+ VALUE
736
+ StackTable::stack_table_func_first_lineno(VALUE self, VALUE idxval) {
737
+ StackTable *stack_table = get_stack_table(self);
738
+ stack_table->finalize();
739
+ int idx = NUM2INT(idxval);
740
+ auto &table = stack_table->func_info_list;
741
+ if (idx < 0 || idx >= table.size()) {
742
+ return Qnil;
743
+ } else {
744
+ const auto &func_info = table[idx];
745
+ return INT2NUM(func_info.first_lineno);
746
+ }
747
+ }
748
+
555
749
  class SampleTranslator {
556
750
  public:
557
751
  int last_stack_index;
@@ -563,7 +757,7 @@ class SampleTranslator {
563
757
  SampleTranslator() : len(0), last_stack_index(-1) {
564
758
  }
565
759
 
566
- int translate(FrameList &frame_list, const RawSample &sample) {
760
+ int translate(StackTable &frame_list, const RawSample &sample) {
567
761
  int i = 0;
568
762
  for (; i < len && i < sample.size(); i++) {
569
763
  if (frames[i] != sample.frame(i)) {
@@ -571,7 +765,8 @@ class SampleTranslator {
571
765
  }
572
766
  }
573
767
 
574
- FrameList::StackNode *node = i == 0 ? &frame_list.root_stack_node : &frame_list.stack_node_list[frame_indexes[i - 1]];
768
+ const std::lock_guard<std::mutex> lock(frame_list.stack_mutex);
769
+ StackTable::StackNode *node = i == 0 ? &frame_list.root_stack_node : &frame_list.stack_node_list[frame_indexes[i - 1]];
575
770
 
576
771
  for (; i < sample.size(); i++) {
577
772
  Frame frame = sample.frame(i);
@@ -693,12 +888,56 @@ enum Category{
693
888
  CATEGORY_IDLE
694
889
  };
695
890
 
891
+ class ObjectSampleList {
892
+ public:
893
+
894
+ std::vector<int> stacks;
895
+ std::vector<TimeStamp> timestamps;
896
+ std::vector<int> weights;
897
+
898
+ size_t size() {
899
+ return stacks.size();
900
+ }
901
+
902
+ bool empty() {
903
+ return size() == 0;
904
+ }
905
+
906
+ void record_sample(int stack_index, TimeStamp time, int weight) {
907
+ stacks.push_back(stack_index);
908
+ timestamps.push_back(time);
909
+ weights.push_back(1);
910
+ }
911
+
912
+ void write_result(VALUE result) const {
913
+ VALUE allocations = rb_hash_new();
914
+ rb_hash_aset(result, sym("allocations"), allocations);
915
+
916
+ VALUE samples = rb_ary_new();
917
+ rb_hash_aset(allocations, sym("samples"), samples);
918
+ for (auto& stack_index: this->stacks) {
919
+ rb_ary_push(samples, INT2NUM(stack_index));
920
+ }
921
+
922
+ VALUE weights = rb_ary_new();
923
+ rb_hash_aset(allocations, sym("weights"), weights);
924
+ for (auto& weight: this->weights) {
925
+ rb_ary_push(weights, INT2NUM(weight));
926
+ }
927
+
928
+ VALUE timestamps = rb_ary_new();
929
+ rb_hash_aset(allocations, sym("timestamps"), timestamps);
930
+ for (auto& timestamp: this->timestamps) {
931
+ rb_ary_push(timestamps, ULL2NUM(timestamp.nanoseconds()));
932
+ }
933
+ }
934
+ };
935
+
696
936
  class SampleList {
697
937
  public:
698
938
 
699
939
  std::vector<int> stacks;
700
940
  std::vector<TimeStamp> timestamps;
701
- std::vector<native_thread_id_t> threads;
702
941
  std::vector<Category> categories;
703
942
  std::vector<int> weights;
704
943
 
@@ -710,11 +949,10 @@ class SampleList {
710
949
  return size() == 0;
711
950
  }
712
951
 
713
- void record_sample(int stack_index, TimeStamp time, native_thread_id_t thread_id, Category category) {
952
+ void record_sample(int stack_index, TimeStamp time, Category category) {
714
953
  if (
715
954
  !empty() &&
716
955
  stacks.back() == stack_index &&
717
- threads.back() == thread_id &&
718
956
  categories.back() == category)
719
957
  {
720
958
  // We don't compare timestamps for de-duplication
@@ -722,7 +960,6 @@ class SampleList {
722
960
  } else {
723
961
  stacks.push_back(stack_index);
724
962
  timestamps.push_back(time);
725
- threads.push_back(thread_id);
726
963
  categories.push_back(category);
727
964
  weights.push_back(1);
728
965
  }
@@ -758,6 +995,7 @@ class SampleList {
758
995
  class Thread {
759
996
  public:
760
997
  SampleList samples;
998
+ ObjectSampleList allocation_samples;
761
999
 
762
1000
  enum State {
763
1001
  STARTED,
@@ -782,15 +1020,12 @@ class Thread {
782
1020
 
783
1021
  unique_ptr<MarkerTable> markers;
784
1022
 
785
- std::string name;
786
-
787
1023
  // FIXME: don't use pthread at start
788
1024
  Thread(State state, pthread_t pthread_id, VALUE ruby_thread) : pthread_id(pthread_id), ruby_thread(ruby_thread), state(state), stack_on_suspend_idx(-1) {
789
1025
  ruby_thread_id = rb_obj_id(ruby_thread);
790
1026
  //ruby_thread_id = ULL2NUM(ruby_thread);
791
1027
  native_tid = get_native_thread_id();
792
1028
  started_at = state_changed_at = TimeStamp::Now();
793
- name = "";
794
1029
  markers = std::make_unique<MarkerTable>();
795
1030
 
796
1031
  if (state == State::STARTED) {
@@ -798,6 +1033,14 @@ class Thread {
798
1033
  }
799
1034
  }
800
1035
 
1036
+ void record_newobj(VALUE obj, StackTable &frame_list) {
1037
+ RawSample sample;
1038
+ sample.sample();
1039
+
1040
+ int stack_idx = translator.translate(frame_list, sample);
1041
+ allocation_samples.record_sample(stack_idx, TimeStamp::Now(), 1);
1042
+ }
1043
+
801
1044
  void set_state(State new_state) {
802
1045
  if (state == Thread::State::STOPPED) {
803
1046
  return;
@@ -866,6 +1109,10 @@ class Thread {
866
1109
  state_changed_at = now;
867
1110
  }
868
1111
 
1112
+ bool is_main() {
1113
+ return rb_thread_main() == ruby_thread;
1114
+ }
1115
+
869
1116
  bool running() {
870
1117
  return state != State::STOPPED;
871
1118
  }
@@ -876,12 +1123,12 @@ class Thread {
876
1123
 
877
1124
  class ThreadTable {
878
1125
  public:
879
- FrameList &frame_list;
1126
+ StackTable &frame_list;
880
1127
 
881
1128
  std::vector<std::unique_ptr<Thread> > list;
882
1129
  std::mutex mutex;
883
1130
 
884
- ThreadTable(FrameList &frame_list) : frame_list(frame_list) {
1131
+ ThreadTable(StackTable &frame_list) : frame_list(frame_list) {
885
1132
  }
886
1133
 
887
1134
  void mark() {
@@ -962,15 +1209,17 @@ class BaseCollector {
962
1209
  protected:
963
1210
 
964
1211
  virtual void reset() {
965
- frame_list.clear();
966
1212
  }
967
1213
 
968
1214
  public:
969
1215
  bool running = false;
970
- FrameList frame_list;
1216
+ StackTable *stack_table;
1217
+ VALUE stack_table_value;
971
1218
 
972
1219
  TimeStamp started_at;
973
1220
 
1221
+ BaseCollector(VALUE stack_table_value) : stack_table_value(stack_table_value), stack_table(get_stack_table(stack_table_value)) {
1222
+ }
974
1223
  virtual ~BaseCollector() {}
975
1224
 
976
1225
  virtual bool start() {
@@ -1013,7 +1262,8 @@ class BaseCollector {
1013
1262
  };
1014
1263
 
1015
1264
  virtual void mark() {
1016
- frame_list.mark_frames();
1265
+ //frame_list.mark_frames();
1266
+ rb_gc_mark(stack_table_value);
1017
1267
  };
1018
1268
 
1019
1269
  virtual VALUE get_markers() {
@@ -1027,16 +1277,15 @@ class CustomCollector : public BaseCollector {
1027
1277
  void sample() {
1028
1278
  RawSample sample;
1029
1279
  sample.sample();
1030
- int stack_index = frame_list.stack_index(sample);
1280
+ int stack_index = stack_table->stack_index(sample);
1031
1281
 
1032
- native_thread_id_t thread_id = 0;
1033
- samples.record_sample(stack_index, TimeStamp::Now(), thread_id, CATEGORY_NORMAL);
1282
+ samples.record_sample(stack_index, TimeStamp::Now(), CATEGORY_NORMAL);
1034
1283
  }
1035
1284
 
1036
1285
  VALUE stop() {
1037
1286
  BaseCollector::stop();
1038
1287
 
1039
- frame_list.finalize();
1288
+ stack_table->finalize();
1040
1289
 
1041
1290
  VALUE result = build_collector_result();
1042
1291
 
@@ -1057,10 +1306,12 @@ class CustomCollector : public BaseCollector {
1057
1306
  rb_hash_aset(threads, ULL2NUM(0), thread_hash);
1058
1307
  rb_hash_aset(thread_hash, sym("tid"), ULL2NUM(0));
1059
1308
 
1060
- frame_list.write_result(result);
1061
-
1062
1309
  return result;
1063
1310
  }
1311
+
1312
+ public:
1313
+
1314
+ CustomCollector(VALUE stack_table) : BaseCollector(stack_table) { }
1064
1315
  };
1065
1316
 
1066
1317
  class RetainedCollector : public BaseCollector {
@@ -1080,7 +1331,7 @@ class RetainedCollector : public BaseCollector {
1080
1331
  // Ideally we'd allow empty samples to be represented
1081
1332
  return;
1082
1333
  }
1083
- int stack_index = frame_list.stack_index(sample);
1334
+ int stack_index = stack_table->stack_index(sample);
1084
1335
 
1085
1336
  object_list.push_back(obj);
1086
1337
  object_frames.emplace(obj, stack_index);
@@ -1110,6 +1361,8 @@ class RetainedCollector : public BaseCollector {
1110
1361
 
1111
1362
  public:
1112
1363
 
1364
+ RetainedCollector(VALUE stack_table) : BaseCollector(stack_table) { }
1365
+
1113
1366
  bool start() {
1114
1367
  if (!BaseCollector::start()) {
1115
1368
  return false;
@@ -1136,7 +1389,7 @@ class RetainedCollector : public BaseCollector {
1136
1389
  rb_tracepoint_disable(tp_newobj);
1137
1390
  tp_newobj = Qnil;
1138
1391
 
1139
- frame_list.finalize();
1392
+ stack_table->finalize();
1140
1393
 
1141
1394
  // We should have collected info for all our frames, so no need to continue
1142
1395
  // marking them
@@ -1158,7 +1411,7 @@ class RetainedCollector : public BaseCollector {
1158
1411
 
1159
1412
  VALUE build_collector_result() {
1160
1413
  RetainedCollector *collector = this;
1161
- FrameList &frame_list = collector->frame_list;
1414
+ StackTable &frame_list = *collector->stack_table;
1162
1415
 
1163
1416
  VALUE result = BaseCollector::build_collector_result();
1164
1417
 
@@ -1186,8 +1439,6 @@ class RetainedCollector : public BaseCollector {
1186
1439
  }
1187
1440
  }
1188
1441
 
1189
- frame_list.write_result(result);
1190
-
1191
1442
  return result;
1192
1443
  }
1193
1444
 
@@ -1196,7 +1447,8 @@ class RetainedCollector : public BaseCollector {
1196
1447
  // can be garbage collected.
1197
1448
  // When we stop collection we will stringify the remaining frames, and then
1198
1449
  // clear them from the set, allowing them to be removed from out output.
1199
- frame_list.mark_frames();
1450
+ stack_table->mark_frames();
1451
+ rb_gc_mark(stack_table_value);
1200
1452
 
1201
1453
  rb_gc_mark(tp_newobj);
1202
1454
  rb_gc_mark(tp_freeobj);
@@ -1278,23 +1530,53 @@ class TimeCollector : public BaseCollector {
1278
1530
  pthread_t sample_thread;
1279
1531
 
1280
1532
  atomic_bool running;
1281
- SamplerSemaphore thread_stopped;
1533
+ SignalSafeSemaphore thread_stopped;
1282
1534
 
1283
1535
  TimeStamp interval;
1536
+ unsigned int allocation_sample_rate;
1537
+ unsigned int allocation_sample_tick = 0;
1538
+
1539
+ VALUE tp_newobj = Qnil;
1540
+
1541
+ static void newobj_i(VALUE tpval, void *data) {
1542
+ TimeCollector *collector = static_cast<TimeCollector *>(data);
1543
+ rb_trace_arg_t *tparg = rb_tracearg_from_tracepoint(tpval);
1544
+ VALUE obj = rb_tracearg_object(tparg);
1545
+
1546
+ collector->record_newobj(obj);
1547
+ }
1284
1548
 
1285
1549
  public:
1286
- TimeCollector(TimeStamp interval) : interval(interval), threads(frame_list) {
1550
+ TimeCollector(VALUE stack_table, TimeStamp interval, unsigned int allocation_sample_rate) : BaseCollector(stack_table), interval(interval), allocation_sample_rate(allocation_sample_rate), threads(*get_stack_table(stack_table)) {
1551
+ }
1552
+
1553
+ void record_newobj(VALUE obj) {
1554
+ if (++allocation_sample_tick < allocation_sample_rate) {
1555
+ return;
1556
+ }
1557
+ allocation_sample_tick = 0;
1558
+
1559
+ VALUE current_thread = rb_thread_current();
1560
+ threads.mutex.lock();
1561
+ for (auto &threadptr : threads.list) {
1562
+ auto &thread = *threadptr;
1563
+ if (current_thread == thread.ruby_thread) {
1564
+ thread.record_newobj(obj, threads.frame_list);
1565
+ break;
1566
+ }
1567
+ }
1568
+ threads.mutex.unlock();
1569
+
1287
1570
  }
1288
1571
 
1289
1572
  private:
1290
1573
 
1291
1574
  void record_sample(const RawSample &sample, TimeStamp time, Thread &thread, Category category) {
1292
1575
  if (!sample.empty()) {
1293
- int stack_index = thread.translator.translate(frame_list, sample);
1576
+ int stack_index = thread.translator.translate(*stack_table, sample);
1294
1577
  thread.samples.record_sample(
1295
1578
  stack_index,
1296
1579
  time,
1297
- thread.native_tid,
1298
1580
  category
1299
1581
  );
1300
1582
  }
@@ -1344,7 +1626,7 @@ class TimeCollector : public BaseCollector {
1344
1626
  // that by the GVL instrumentation, but let's try to get
1345
1627
  // it to a consistent state and stop profiling it.
1346
1628
  thread.set_state(Thread::State::STOPPED);
1347
- } else if (sample.sample.gc) {
1629
+ } else if (sample.sample.empty()) {
1348
1630
  // fprintf(stderr, "skipping GC sample\n");
1349
1631
  } else {
1350
1632
  record_sample(sample.sample, sample_start, thread, CATEGORY_NORMAL);
@@ -1353,7 +1635,6 @@ class TimeCollector : public BaseCollector {
1353
1635
  thread.samples.record_sample(
1354
1636
  thread.stack_on_suspend_idx,
1355
1637
  sample_start,
1356
- thread.native_tid,
1357
1638
  CATEGORY_IDLE);
1358
1639
  } else {
1359
1640
  }
@@ -1470,6 +1751,11 @@ class TimeCollector : public BaseCollector {
1470
1751
  return false;
1471
1752
  }
1472
1753
 
1754
+ if (allocation_sample_rate > 0) {
1755
+ tp_newobj = rb_tracepoint_new(0, RUBY_INTERNAL_EVENT_NEWOBJ, newobj_i, this);
1756
+ rb_tracepoint_enable(tp_newobj);
1757
+ }
1758
+
1473
1759
  GlobalSignalHandler::get_instance()->install();
1474
1760
 
1475
1761
  running = true;
@@ -1502,11 +1788,16 @@ class TimeCollector : public BaseCollector {
1502
1788
 
1503
1789
  GlobalSignalHandler::get_instance()->uninstall();
1504
1790
 
1791
+ if (RTEST(tp_newobj)) {
1792
+ rb_tracepoint_disable(tp_newobj);
1793
+ tp_newobj = Qnil;
1794
+ }
1795
+
1505
1796
  rb_internal_thread_remove_event_hook(thread_hook);
1506
1797
  rb_remove_event_hook(internal_gc_event_cb);
1507
1798
  rb_remove_event_hook(internal_thread_event_cb);
1508
1799
 
1509
- frame_list.finalize();
1800
+ stack_table->finalize();
1510
1801
 
1511
1802
  VALUE result = build_collector_result();
1512
1803
 
@@ -1524,6 +1815,7 @@ class TimeCollector : public BaseCollector {
1524
1815
  for (const auto& thread: this->threads.list) {
1525
1816
  VALUE hash = rb_hash_new();
1526
1817
  thread->samples.write_result(hash);
1818
+ thread->allocation_samples.write_result(hash);
1527
1819
 
1528
1820
  rb_hash_aset(threads, thread->ruby_thread_id, hash);
1529
1821
  rb_hash_aset(hash, sym("tid"), ULL2NUM(thread->native_tid));
@@ -1531,17 +1823,16 @@ class TimeCollector : public BaseCollector {
1531
1823
  if (!thread->stopped_at.zero()) {
1532
1824
  rb_hash_aset(hash, sym("stopped_at"), ULL2NUM(thread->stopped_at.nanoseconds()));
1533
1825
  }
1534
- rb_hash_aset(hash, sym("name"), rb_str_new(thread->name.data(), thread->name.length()));
1826
+ rb_hash_aset(hash, sym("is_main"), thread->is_main() ? Qtrue : Qfalse);
1535
1827
 
1536
1828
  }
1537
1829
 
1538
- frame_list.write_result(result);
1539
-
1540
1830
  return result;
1541
1831
  }
1542
1832
 
1543
1833
  void mark() {
1544
- frame_list.mark_frames();
1834
+ stack_table->mark_frames();
1835
+ rb_gc_mark(stack_table_value);
1545
1836
  threads.mark();
1546
1837
 
1547
1838
  //for (int i = 0; i < queued_length; i++) {
@@ -1613,12 +1904,21 @@ collector_sample(VALUE self) {
1613
1904
  return Qtrue;
1614
1905
  }
1615
1906
 
1907
+ static VALUE
1908
+ collector_stack_table(VALUE self) {
1909
+ auto *collector = get_collector(self);
1910
+
1911
+ return collector->stack_table_value;
1912
+ }
1913
+
1616
1914
  static VALUE collector_new(VALUE self, VALUE mode, VALUE options) {
1617
1915
  BaseCollector *collector;
1916
+
1917
+ VALUE stack_table = stack_table_new(rb_cStackTable);
1618
1918
  if (mode == sym("retained")) {
1619
- collector = new RetainedCollector();
1919
+ collector = new RetainedCollector(stack_table);
1620
1920
  } else if (mode == sym("custom")) {
1621
- collector = new CustomCollector();
1921
+ collector = new CustomCollector(stack_table);
1622
1922
  } else if (mode == sym("wall")) {
1623
1923
  VALUE intervalv = rb_hash_aref(options, sym("interval"));
1624
1924
  TimeStamp interval;
@@ -1627,12 +1927,20 @@ static VALUE collector_new(VALUE self, VALUE mode, VALUE options) {
1627
1927
  } else {
1628
1928
  interval = TimeStamp::from_microseconds(NUM2UINT(intervalv));
1629
1929
  }
1630
- collector = new TimeCollector(interval);
1930
+
1931
+ VALUE allocation_sample_ratev = rb_hash_aref(options, sym("allocation_sample_rate"));
1932
+ unsigned int allocation_sample_rate;
1933
+ if (NIL_P(allocation_sample_ratev)) {
1934
+ allocation_sample_rate = 0;
1935
+ } else {
1936
+ allocation_sample_rate = NUM2UINT(allocation_sample_ratev);
1937
+ }
1938
+ collector = new TimeCollector(stack_table, interval, allocation_sample_rate);
1631
1939
  } else {
1632
1940
  rb_raise(rb_eArgError, "invalid mode");
1633
1941
  }
1634
1942
  VALUE obj = TypedData_Wrap_Struct(self, &rb_collector_type, collector);
1635
- rb_funcall(obj, rb_intern("initialize"), 1, mode);
1943
+ rb_funcall(obj, rb_intern("initialize"), 2, mode, options);
1636
1944
  return obj;
1637
1945
  }
1638
1946
 
@@ -1681,9 +1989,25 @@ Init_vernier(void)
1681
1989
  rb_define_singleton_method(rb_cVernierCollector, "_new", collector_new, 2);
1682
1990
  rb_define_method(rb_cVernierCollector, "start", collector_start, 0);
1683
1991
  rb_define_method(rb_cVernierCollector, "sample", collector_sample, 0);
1992
+ rb_define_method(rb_cVernierCollector, "stack_table", collector_stack_table, 0);
1684
1993
  rb_define_private_method(rb_cVernierCollector, "finish", collector_stop, 0);
1685
1994
  rb_define_private_method(rb_cVernierCollector, "markers", markers, 0);
1686
1995
 
1996
+ rb_cStackTable = rb_define_class_under(rb_mVernier, "StackTable", rb_cObject);
1997
+ rb_undef_alloc_func(rb_cStackTable);
1998
+ rb_define_singleton_method(rb_cStackTable, "new", stack_table_new, 0);
1999
+ rb_define_method(rb_cStackTable, "current_stack", stack_table_current_stack, -1);
2000
+ rb_define_method(rb_cStackTable, "stack_parent_idx", stack_table_stack_parent_idx, 1);
2001
+ rb_define_method(rb_cStackTable, "stack_frame_idx", stack_table_stack_frame_idx, 1);
2002
+ rb_define_method(rb_cStackTable, "frame_line_no", StackTable::stack_table_frame_line_no, 1);
2003
+ rb_define_method(rb_cStackTable, "frame_func_idx", StackTable::stack_table_frame_func_idx, 1);
2004
+ rb_define_method(rb_cStackTable, "func_name", StackTable::stack_table_func_name, 1);
2005
+ rb_define_method(rb_cStackTable, "func_filename", StackTable::stack_table_func_filename, 1);
2006
+ rb_define_method(rb_cStackTable, "func_first_lineno", StackTable::stack_table_func_first_lineno, 1);
2007
+ rb_define_method(rb_cStackTable, "stack_count", StackTable::stack_table_stack_count, 0);
2008
+ rb_define_method(rb_cStackTable, "frame_count", StackTable::stack_table_frame_count, 0);
2009
+ rb_define_method(rb_cStackTable, "func_count", StackTable::stack_table_func_count, 0);
2010
+
1687
2011
  Init_consts(rb_mVernierMarkerPhase);
1688
2012
 
1689
2013
  //static VALUE gc_hook = Data_Wrap_Struct(rb_cObject, collector_mark, NULL, &_collector);