vernier 0.1.1 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,27 +1,340 @@
1
1
  #include <iostream>
2
+ #include <iomanip>
2
3
  #include <vector>
3
4
  #include <memory>
4
5
  #include <algorithm>
5
6
  #include <sstream>
6
7
  #include <unordered_map>
7
8
  #include <unordered_set>
9
+ #include <cassert>
10
+ #include <atomic>
11
+ #include <mutex>
12
+
13
+ #include <sys/time.h>
14
+ #include <signal.h>
15
+ #ifdef __APPLE__
16
+ /* macOS */
17
+ #include <dispatch/dispatch.h>
18
+ #else
19
+ /* Linux */
20
+ #include <semaphore.h>
21
+ #include <sys/syscall.h> /* for SYS_gettid */
22
+ #endif
8
23
 
9
24
  #include "vernier.hh"
10
- #include "stack.hh"
25
+
26
+ #include "ruby/ruby.h"
11
27
  #include "ruby/debug.h"
28
+ #include "ruby/thread.h"
12
29
 
13
- using namespace std;
30
+ // GC event's we'll monitor during profiling
31
+ #define RUBY_GC_PHASE_EVENTS \
32
+ RUBY_INTERNAL_EVENT_GC_START | \
33
+ RUBY_INTERNAL_EVENT_GC_END_MARK | \
34
+ RUBY_INTERNAL_EVENT_GC_END_SWEEP | \
35
+ RUBY_INTERNAL_EVENT_GC_ENTER | \
36
+ RUBY_INTERNAL_EVENT_GC_EXIT
37
+
38
+ #define sym(name) ID2SYM(rb_intern_const(name))
39
+
40
+ // HACK: This isn't public, but the objspace ext uses it
41
+ extern "C" size_t rb_obj_memsize_of(VALUE);
14
42
 
15
- #define numberof(array) ((int)(sizeof(array) / sizeof((array)[0])))
43
+ using namespace std;
16
44
 
17
45
  static VALUE rb_mVernier;
46
+ static VALUE rb_cVernierResult;
47
+ static VALUE rb_mVernierMarkerType;
48
+ static VALUE rb_cVernierCollector;
49
+
50
+ class TimeStamp {
51
+ static const uint64_t nanoseconds_per_second = 1000000000;
52
+ uint64_t value_ns;
53
+
54
+ TimeStamp(uint64_t value_ns) : value_ns(value_ns) {}
55
+
56
+ public:
57
+ TimeStamp() : value_ns(0) {}
58
+
59
+ static TimeStamp Now() {
60
+ struct timespec ts;
61
+ clock_gettime(CLOCK_MONOTONIC, &ts);
62
+ return TimeStamp(ts.tv_sec * nanoseconds_per_second + ts.tv_nsec);
63
+ }
64
+
65
+ static TimeStamp Zero() {
66
+ return TimeStamp(0);
67
+ }
68
+
69
+ static void Sleep(const TimeStamp &time) {
70
+ struct timespec ts = time.timespec();
71
+
72
+ int res;
73
+ do {
74
+ res = nanosleep(&ts, &ts);
75
+ } while (res && errno == EINTR);
76
+ }
77
+
78
+ static TimeStamp from_microseconds(uint64_t us) {
79
+ return TimeStamp(us * 1000);
80
+ }
81
+
82
+ static TimeStamp from_nanoseconds(uint64_t ns) {
83
+ return TimeStamp(ns);
84
+ }
85
+
86
+ TimeStamp operator-(const TimeStamp &other) const {
87
+ TimeStamp result = *this;
88
+ return result -= other;
89
+ }
90
+
91
+ TimeStamp &operator-=(const TimeStamp &other) {
92
+ if (value_ns > other.value_ns) {
93
+ value_ns = value_ns - other.value_ns;
94
+ } else {
95
+ // underflow
96
+ value_ns = 0;
97
+ }
98
+ return *this;
99
+ }
100
+
101
+ TimeStamp operator+(const TimeStamp &other) const {
102
+ TimeStamp result = *this;
103
+ return result += other;
104
+ }
105
+
106
+ TimeStamp &operator+=(const TimeStamp &other) {
107
+ uint64_t new_value = value_ns + other.value_ns;
108
+ value_ns = new_value;
109
+ return *this;
110
+ }
111
+
112
+ bool operator<(const TimeStamp &other) const {
113
+ return value_ns < other.value_ns;
114
+ }
115
+
116
+ bool operator<=(const TimeStamp &other) const {
117
+ return value_ns <= other.value_ns;
118
+ }
119
+
120
+ bool operator>(const TimeStamp &other) const {
121
+ return value_ns > other.value_ns;
122
+ }
123
+
124
+ bool operator>=(const TimeStamp &other) const {
125
+ return value_ns >= other.value_ns;
126
+ }
127
+
128
+ uint64_t nanoseconds() const {
129
+ return value_ns;
130
+ }
131
+
132
+ uint64_t microseconds() const {
133
+ return value_ns / 1000;
134
+ }
135
+
136
+ bool zero() const {
137
+ return value_ns == 0;
138
+ }
139
+
140
+ struct timespec timespec() const {
141
+ struct timespec ts;
142
+ ts.tv_sec = nanoseconds() / nanoseconds_per_second;
143
+ ts.tv_nsec = (nanoseconds() % nanoseconds_per_second);
144
+ return ts;
145
+ }
146
+ };
147
+
148
+ std::ostream& operator<<(std::ostream& os, const TimeStamp& info) {
149
+ os << info.nanoseconds() << "ns";
150
+ return os;
151
+ }
152
+
153
+ struct FrameInfo {
154
+ static const char *label_cstr(VALUE frame) {
155
+ VALUE label = rb_profile_frame_full_label(frame);
156
+ return StringValueCStr(label);
157
+ }
158
+
159
+ static const char *file_cstr(VALUE frame) {
160
+ VALUE file = rb_profile_frame_absolute_path(frame);
161
+ if (NIL_P(file))
162
+ file = rb_profile_frame_path(frame);
163
+ if (NIL_P(file)) {
164
+ return "";
165
+ } else {
166
+ return StringValueCStr(file);
167
+ }
168
+ }
169
+
170
+ static int first_lineno_int(VALUE frame) {
171
+ VALUE first_lineno = rb_profile_frame_first_lineno(frame);
172
+ return NIL_P(first_lineno) ? 0 : FIX2INT(first_lineno);
173
+ }
174
+
175
+ FrameInfo(VALUE frame) :
176
+ label(label_cstr(frame)),
177
+ file(file_cstr(frame)),
178
+ first_lineno(first_lineno_int(frame)) { }
179
+
180
+ std::string label;
181
+ std::string file;
182
+ int first_lineno;
183
+ };
184
+
185
+ bool operator==(const FrameInfo& lhs, const FrameInfo& rhs) noexcept {
186
+ return
187
+ lhs.label == rhs.label &&
188
+ lhs.file == rhs.file &&
189
+ lhs.first_lineno == rhs.first_lineno;
190
+ }
191
+
192
+ struct Frame {
193
+ VALUE frame;
194
+ int line;
195
+
196
+ FrameInfo info() const {
197
+ return FrameInfo(frame);
198
+ }
199
+ };
200
+
201
+ bool operator==(const Frame& lhs, const Frame& rhs) noexcept {
202
+ return lhs.frame == rhs.frame && lhs.line == rhs.line;
203
+ }
204
+
205
+ namespace std {
206
+ template<>
207
+ struct hash<Frame>
208
+ {
209
+ std::size_t operator()(Frame const& s) const noexcept
210
+ {
211
+ return s.frame ^ s.line;
212
+ }
213
+ };
214
+ }
18
215
 
19
- struct retained_collector {
20
- int allocated_objects = 0;
21
- int freed_objects = 0;
216
+ // A basic semaphore built on sem_wait/sem_post
217
+ // post() is guaranteed to be async-signal-safe
218
+ class SamplerSemaphore {
219
+ #ifdef __APPLE__
220
+ dispatch_semaphore_t sem;
221
+ #else
222
+ sem_t sem;
223
+ #endif
22
224
 
23
- std::unordered_set<VALUE> unique_frames;
24
- std::unordered_map<VALUE, std::unique_ptr<Stack>> object_frames;
225
+ public:
226
+
227
+ SamplerSemaphore(unsigned int value = 0) {
228
+ #ifdef __APPLE__
229
+ sem = dispatch_semaphore_create(value);
230
+ #else
231
+ sem_init(&sem, 0, value);
232
+ #endif
233
+ };
234
+
235
+ ~SamplerSemaphore() {
236
+ #ifdef __APPLE__
237
+ dispatch_release(sem);
238
+ #else
239
+ sem_destroy(&sem);
240
+ #endif
241
+ };
242
+
243
+ void wait() {
244
+ #ifdef __APPLE__
245
+ dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER);
246
+ #else
247
+ int ret;
248
+ do {
249
+ ret = sem_wait(&sem);
250
+ } while (ret && errno == EINTR);
251
+ assert(ret == 0);
252
+ #endif
253
+ }
254
+
255
+ void post() {
256
+ #ifdef __APPLE__
257
+ dispatch_semaphore_signal(sem);
258
+ #else
259
+ sem_post(&sem);
260
+ #endif
261
+ }
262
+ };
263
+
264
+ struct RawSample {
265
+ constexpr static int MAX_LEN = 2048;
266
+ VALUE frames[MAX_LEN];
267
+ int lines[MAX_LEN];
268
+ int len;
269
+ bool gc;
270
+
271
+ RawSample() : len(0), gc(false) { }
272
+
273
+ int size() const {
274
+ return len;
275
+ }
276
+
277
+ Frame frame(int i) const {
278
+ const Frame frame = {frames[i], lines[i]};
279
+ return frame;
280
+ }
281
+
282
+ void sample() {
283
+ if (!ruby_native_thread_p()) {
284
+ clear();
285
+ return;
286
+ }
287
+
288
+ if (rb_during_gc()) {
289
+ gc = true;
290
+ len = 0;
291
+ } else {
292
+ gc = false;
293
+ len = rb_profile_frames(0, MAX_LEN, frames, lines);
294
+ }
295
+ }
296
+
297
+ void clear() {
298
+ len = 0;
299
+ gc = false;
300
+ }
301
+
302
+ bool empty() const {
303
+ return len == 0;
304
+ }
305
+ };
306
+
307
+ // Based very loosely on the design of Gecko's SigHandlerCoordinator
308
+ // This is used for communication between the profiler thread and the signal
309
+ // handlers in the observed thread.
310
+ struct LiveSample {
311
+ RawSample sample;
312
+
313
+ SamplerSemaphore sem_complete;
314
+
315
+ // Wait for a sample to be collected by the signal handler on another thread
316
+ void wait() {
317
+ sem_complete.wait();
318
+ }
319
+
320
+ int size() const {
321
+ return sample.size();
322
+ }
323
+
324
+ Frame frame(int i) const {
325
+ return sample.frame(i);
326
+ }
327
+
328
+ // Called from a signal handler in the observed thread in order to take a
329
+ // sample and signal to the proifiler thread that the sample is ready.
330
+ //
331
+ // CRuby doesn't guarantee that rb_profile_frames can be used as
332
+ // async-signal-safe but in practice it seems to be.
333
+ // sem_post is safe in an async-signal-safe context.
334
+ void sample_current_thread() {
335
+ sample.sample();
336
+ sem_complete.post();
337
+ }
25
338
  };
26
339
 
27
340
  struct TraceArg {
@@ -42,149 +355,940 @@ struct TraceArg {
42
355
  }
43
356
  };
44
357
 
45
- static retained_collector _collector;
358
+ struct FrameList {
359
+ std::unordered_map<std::string, int> string_to_idx;
360
+ std::vector<std::string> string_list;
46
361
 
47
- static VALUE tp_newobj;
48
- static VALUE tp_freeobj;
49
- static void
50
- newobj_i(VALUE tpval, void *data) {
51
- retained_collector *collector = static_cast<retained_collector *>(data);
52
- TraceArg tp(tpval);
53
- collector->allocated_objects++;
362
+ int string_index(const std::string str) {
363
+ auto it = string_to_idx.find(str);
364
+ if (it == string_to_idx.end()) {
365
+ int idx = string_list.size();
366
+ string_list.push_back(str);
54
367
 
55
- VALUE frames_buffer[2048];
56
- int lines_buffer[2048];
57
- int n = rb_profile_frames(0, 2048, frames_buffer, lines_buffer);
368
+ auto result = string_to_idx.insert({str, idx});
369
+ it = result.first;
370
+ }
58
371
 
59
- for (int i = 0; i < n; i++) {
60
- collector->unique_frames.insert(frames_buffer[i]);
372
+ return it->second;
61
373
  }
62
374
 
63
- collector->object_frames.emplace(
64
- tp.obj,
65
- make_unique<Stack>(frames_buffer, lines_buffer, n)
66
- );
67
- }
375
+ struct FrameWithInfo {
376
+ Frame frame;
377
+ FrameInfo info;
378
+ };
379
+
380
+ std::unordered_map<Frame, int> frame_to_idx;
381
+ std::vector<Frame> frame_list;
382
+ std::vector<FrameWithInfo> frame_with_info_list;
383
+ int frame_index(const Frame frame) {
384
+ auto it = frame_to_idx.find(frame);
385
+ if (it == frame_to_idx.end()) {
386
+ int idx = frame_list.size();
387
+ frame_list.push_back(frame);
388
+ auto result = frame_to_idx.insert({frame, idx});
389
+ it = result.first;
390
+ }
391
+ return it->second;
392
+ }
393
+
394
+ struct StackNode {
395
+ std::unordered_map<Frame, int> children;
396
+ Frame frame;
397
+ int parent;
398
+ int index;
399
+
400
+ StackNode(Frame frame, int index, int parent) : frame(frame), index(index), parent(parent) {}
401
+
402
+ // root
403
+ StackNode() : frame(Frame{0, 0}), index(-1), parent(-1) {}
404
+ };
405
+
406
+ StackNode root_stack_node;
407
+ vector<StackNode> stack_node_list;
408
+
409
+ int stack_index(const RawSample &stack) {
410
+ if (stack.empty()) {
411
+ throw std::runtime_error("empty stack");
412
+ }
413
+
414
+ StackNode *node = &root_stack_node;
415
+ for (int i = stack.size() - 1; i >= 0; i--) {
416
+ const Frame &frame = stack.frame(i);
417
+ node = next_stack_node(node, frame);
418
+ }
419
+ return node->index;
420
+ }
421
+
422
+ StackNode *next_stack_node(StackNode *node, const Frame &frame) {
423
+ int next_node_idx = node->children[frame];
424
+ if (next_node_idx == 0) {
425
+ // insert a new node
426
+ next_node_idx = stack_node_list.size();
427
+ node->children[frame] = next_node_idx;
428
+ stack_node_list.emplace_back(
429
+ frame,
430
+ next_node_idx,
431
+ node->index
432
+ );
433
+ }
434
+
435
+ return &stack_node_list[next_node_idx];
436
+ }
437
+
438
+ // Converts Frames from stacks other tables. "Symbolicates" the frames
439
+ // which allocates.
440
+ void finalize() {
441
+ for (const auto &stack_node : stack_node_list) {
442
+ frame_index(stack_node.frame);
443
+ }
444
+ for (const auto &frame : frame_list) {
445
+ frame_with_info_list.push_back(FrameWithInfo{frame, frame.info()});
446
+ }
447
+ }
448
+
449
+ void mark_frames() {
450
+ for (auto stack_node: stack_node_list) {
451
+ rb_gc_mark(stack_node.frame.frame);
452
+ }
453
+ }
454
+
455
+ void clear() {
456
+ string_list.clear();
457
+ frame_list.clear();
458
+ stack_node_list.clear();
459
+ frame_with_info_list.clear();
460
+
461
+ string_to_idx.clear();
462
+ frame_to_idx.clear();
463
+ root_stack_node.children.clear();
464
+ }
465
+
466
+ void write_result(VALUE result) {
467
+ FrameList &frame_list = *this;
468
+
469
+ VALUE stack_table = rb_hash_new();
470
+ rb_ivar_set(result, rb_intern("@stack_table"), stack_table);
471
+ VALUE stack_table_parent = rb_ary_new();
472
+ VALUE stack_table_frame = rb_ary_new();
473
+ rb_hash_aset(stack_table, sym("parent"), stack_table_parent);
474
+ rb_hash_aset(stack_table, sym("frame"), stack_table_frame);
475
+ for (const auto &stack : frame_list.stack_node_list) {
476
+ VALUE parent_val = stack.parent == -1 ? Qnil : INT2NUM(stack.parent);
477
+ rb_ary_push(stack_table_parent, parent_val);
478
+ rb_ary_push(stack_table_frame, INT2NUM(frame_list.frame_index(stack.frame)));
479
+ }
480
+
481
+ VALUE frame_table = rb_hash_new();
482
+ rb_ivar_set(result, rb_intern("@frame_table"), frame_table);
483
+ VALUE frame_table_func = rb_ary_new();
484
+ VALUE frame_table_line = rb_ary_new();
485
+ rb_hash_aset(frame_table, sym("func"), frame_table_func);
486
+ rb_hash_aset(frame_table, sym("line"), frame_table_line);
487
+ //for (const auto &frame : frame_list.frame_list) {
488
+ for (int i = 0; i < frame_list.frame_with_info_list.size(); i++) {
489
+ const auto &frame = frame_list.frame_with_info_list[i];
490
+ rb_ary_push(frame_table_func, INT2NUM(i));
491
+ rb_ary_push(frame_table_line, INT2NUM(frame.frame.line));
492
+ }
493
+
494
+ // TODO: dedup funcs before this step
495
+ VALUE func_table = rb_hash_new();
496
+ rb_ivar_set(result, rb_intern("@func_table"), func_table);
497
+ VALUE func_table_name = rb_ary_new();
498
+ VALUE func_table_filename = rb_ary_new();
499
+ VALUE func_table_first_line = rb_ary_new();
500
+ rb_hash_aset(func_table, sym("name"), func_table_name);
501
+ rb_hash_aset(func_table, sym("filename"), func_table_filename);
502
+ rb_hash_aset(func_table, sym("first_line"), func_table_first_line);
503
+ for (const auto &frame : frame_list.frame_with_info_list) {
504
+ const std::string label = frame.info.label;
505
+ const std::string filename = frame.info.file;
506
+ const int first_line = frame.info.first_lineno;
507
+
508
+ rb_ary_push(func_table_name, rb_str_new(label.c_str(), label.length()));
509
+ rb_ary_push(func_table_filename, rb_str_new(filename.c_str(), filename.length()));
510
+ rb_ary_push(func_table_first_line, INT2NUM(first_line));
511
+ }
512
+ }
513
+ };
514
+
515
+ class BaseCollector {
516
+ protected:
517
+
518
+ virtual void reset() {
519
+ frame_list.clear();
520
+ }
521
+
522
+ public:
523
+ bool running = false;
524
+ FrameList frame_list;
525
+
526
+ virtual ~BaseCollector() {}
527
+
528
+ virtual bool start() {
529
+ if (running) {
530
+ return false;
531
+ } else {
532
+ running = true;
533
+ return true;
534
+ }
535
+ }
536
+
537
+ virtual VALUE stop() {
538
+ if (!running) {
539
+ rb_raise(rb_eRuntimeError, "collector not running");
540
+ }
541
+ running = false;
542
+
543
+ return Qnil;
544
+ }
545
+
546
+ virtual void sample() {
547
+ rb_raise(rb_eRuntimeError, "collector doesn't support manual sampling");
548
+ };
549
+
550
+ virtual void mark() {
551
+ frame_list.mark_frames();
552
+ };
553
+
554
+ virtual VALUE get_markers() {
555
+ return rb_ary_new();
556
+ };
557
+ };
558
+
559
+ class CustomCollector : public BaseCollector {
560
+ std::vector<int> samples;
561
+
562
+ void sample() {
563
+ RawSample sample;
564
+ sample.sample();
565
+ int stack_index = frame_list.stack_index(sample);
566
+
567
+ samples.push_back(stack_index);
568
+ }
569
+
570
+ VALUE stop() {
571
+ BaseCollector::stop();
572
+
573
+ frame_list.finalize();
574
+
575
+ VALUE result = build_collector_result();
576
+
577
+ reset();
578
+
579
+ return result;
580
+ }
581
+
582
+ VALUE build_collector_result() {
583
+ VALUE result = rb_obj_alloc(rb_cVernierResult);
584
+
585
+ VALUE samples = rb_ary_new();
586
+ rb_ivar_set(result, rb_intern("@samples"), samples);
587
+ VALUE weights = rb_ary_new();
588
+ rb_ivar_set(result, rb_intern("@weights"), weights);
589
+
590
+ for (auto& stack_index: this->samples) {
591
+ rb_ary_push(samples, INT2NUM(stack_index));
592
+ rb_ary_push(weights, INT2NUM(1));
593
+ }
594
+
595
+ frame_list.write_result(result);
596
+
597
+ return result;
598
+ }
599
+ };
600
+
601
+ class RetainedCollector : public BaseCollector {
602
+ void reset() {
603
+ object_frames.clear();
604
+ object_list.clear();
605
+
606
+ BaseCollector::reset();
607
+ }
608
+
609
+ void record(VALUE obj) {
610
+ RawSample sample;
611
+ sample.sample();
612
+ int stack_index = frame_list.stack_index(sample);
613
+
614
+ object_list.push_back(obj);
615
+ object_frames.emplace(obj, stack_index);
616
+ }
617
+
618
+ std::unordered_map<VALUE, int> object_frames;
619
+ std::vector<VALUE> object_list;
620
+
621
+ VALUE tp_newobj = Qnil;
622
+ VALUE tp_freeobj = Qnil;
623
+
624
+ static void newobj_i(VALUE tpval, void *data) {
625
+ RetainedCollector *collector = static_cast<RetainedCollector *>(data);
626
+ TraceArg tp(tpval);
627
+
628
+ collector->record(tp.obj);
629
+ }
630
+
631
+ static void freeobj_i(VALUE tpval, void *data) {
632
+ RetainedCollector *collector = static_cast<RetainedCollector *>(data);
633
+ TraceArg tp(tpval);
634
+
635
+ collector->object_frames.erase(tp.obj);
636
+ }
637
+
638
+ public:
639
+
640
+ bool start() {
641
+ if (!BaseCollector::start()) {
642
+ return false;
643
+ }
644
+
645
+ tp_newobj = rb_tracepoint_new(0, RUBY_INTERNAL_EVENT_NEWOBJ, newobj_i, this);
646
+ tp_freeobj = rb_tracepoint_new(0, RUBY_INTERNAL_EVENT_FREEOBJ, freeobj_i, this);
647
+
648
+ rb_tracepoint_enable(tp_newobj);
649
+ rb_tracepoint_enable(tp_freeobj);
650
+
651
+ return true;
652
+ }
653
+
654
+ VALUE stop() {
655
+ BaseCollector::stop();
656
+
657
+ // GC before we start turning stacks into strings
658
+ rb_gc();
659
+
660
+ // Stop tracking any more new objects, but we'll continue tracking free'd
661
+ // objects as we may be able to free some as we remove our own references
662
+ // to stack frames.
663
+ rb_tracepoint_disable(tp_newobj);
664
+ tp_newobj = Qnil;
665
+
666
+ frame_list.finalize();
667
+
668
+ // We should have collected info for all our frames, so no need to continue
669
+ // marking them
670
+ // FIXME: previously here we cleared the list of frames so we would stop
671
+ // marking them. Maybe now we should set a flag so that we stop marking them
672
+
673
+ // GC again
674
+ rb_gc();
675
+
676
+ rb_tracepoint_disable(tp_freeobj);
677
+ tp_freeobj = Qnil;
678
+
679
+ VALUE result = build_collector_result();
680
+
681
+ reset();
682
+
683
+ return result;
684
+ }
685
+
686
+ VALUE build_collector_result() {
687
+ RetainedCollector *collector = this;
688
+ FrameList &frame_list = collector->frame_list;
689
+
690
+ VALUE result = rb_obj_alloc(rb_cVernierResult);
691
+
692
+ VALUE samples = rb_ary_new();
693
+ rb_ivar_set(result, rb_intern("@samples"), samples);
694
+ VALUE weights = rb_ary_new();
695
+ rb_ivar_set(result, rb_intern("@weights"), weights);
696
+
697
+ for (auto& obj: collector->object_list) {
698
+ const auto search = collector->object_frames.find(obj);
699
+ if (search != collector->object_frames.end()) {
700
+ int stack_index = search->second;
701
+
702
+ rb_ary_push(samples, INT2NUM(stack_index));
703
+ rb_ary_push(weights, INT2NUM(rb_obj_memsize_of(obj)));
704
+ }
705
+ }
706
+
707
+ frame_list.write_result(result);
708
+
709
+ return result;
710
+ }
711
+
712
+ void mark() {
713
+ // We don't mark the objects, but we MUST mark the frames, otherwise they
714
+ // can be garbage collected.
715
+ // When we stop collection we will stringify the remaining frames, and then
716
+ // clear them from the set, allowing them to be removed from out output.
717
+ frame_list.mark_frames();
718
+
719
+ rb_gc_mark(tp_newobj);
720
+ rb_gc_mark(tp_freeobj);
721
+ }
722
+ };
723
+
724
+ typedef uint64_t native_thread_id_t;
725
+
726
+ class Thread {
727
+ public:
728
+ static native_thread_id_t get_native_thread_id() {
729
+ #ifdef __APPLE__
730
+ uint64_t thread_id;
731
+ int e = pthread_threadid_np(pthread_self(), &thread_id);
732
+ if (e != 0) rb_syserr_fail(e, "pthread_threadid_np");
733
+ return thread_id;
734
+ #else
735
+ // gettid() is only available as of glibc 2.30
736
+ pid_t tid = syscall(SYS_gettid);
737
+ return tid;
738
+ #endif
739
+ }
740
+
741
+ enum State {
742
+ STARTED,
743
+ RUNNING,
744
+ SUSPENDED,
745
+ STOPPED
746
+ };
747
+
748
+ pthread_t pthread_id;
749
+ native_thread_id_t native_tid;
750
+ State state;
751
+
752
+ TimeStamp state_changed_at;
753
+ TimeStamp started_at;
754
+ TimeStamp stopped_at;
755
+
756
+ RawSample stack_on_suspend;
757
+
758
+ std::string name;
759
+
760
+ Thread(State state) : state(state) {
761
+ pthread_id = pthread_self();
762
+ native_tid = get_native_thread_id();
763
+ started_at = state_changed_at = TimeStamp::Now();
764
+ }
765
+
766
+ void set_state(State new_state) {
767
+ if (state == Thread::State::STOPPED) {
768
+ return;
769
+ }
770
+
771
+ auto now = TimeStamp::Now();
772
+
773
+ state = new_state;
774
+ state_changed_at = now;
775
+ if (new_state == State::STARTED) {
776
+ if (started_at.zero()) {
777
+ started_at = now;
778
+ }
779
+ } else if (new_state == State::STOPPED) {
780
+ stopped_at = now;
781
+
782
+ capture_name();
783
+ }
784
+ }
785
+
786
+ bool running() {
787
+ return state != State::STOPPED;
788
+ }
789
+
790
+ void capture_name() {
791
+ char buf[128];
792
+ int rc = pthread_getname_np(pthread_id, buf, sizeof(buf));
793
+ if (rc == 0)
794
+ name = std::string(buf);
795
+ }
796
+ };
797
+
798
+ class Marker {
799
+ public:
800
+ enum Type {
801
+ MARKER_GVL_THREAD_STARTED,
802
+ MARKER_GVL_THREAD_READY,
803
+ MARKER_GVL_THREAD_RESUMED,
804
+ MARKER_GVL_THREAD_SUSPENDED,
805
+ MARKER_GVL_THREAD_EXITED,
806
+
807
+ MARKER_GC_START,
808
+ MARKER_GC_END_MARK,
809
+ MARKER_GC_END_SWEEP,
810
+ MARKER_GC_ENTER,
811
+ MARKER_GC_EXIT,
812
+
813
+ MARKER_MAX,
814
+ };
815
+ Type type;
816
+ TimeStamp timestamp;
817
+ native_thread_id_t thread_id;
818
+ };
819
+
820
+ class MarkerTable {
821
+ public:
822
+ std::vector<Marker> list;
823
+ std::mutex mutex;
824
+
825
+ void record(Marker::Type type) {
826
+ const std::lock_guard<std::mutex> lock(mutex);
827
+
828
+ list.push_back({ type, TimeStamp::Now(), Thread::get_native_thread_id() });
829
+ }
830
+ };
831
+
832
+ extern "C" int ruby_thread_has_gvl_p(void);
833
+
834
+ class ThreadTable {
835
+ public:
836
+ std::vector<Thread> list;
837
+ std::mutex mutex;
838
+
839
+ void started() {
840
+ //const std::lock_guard<std::mutex> lock(mutex);
841
+
842
+ //list.push_back(Thread{pthread_self(), Thread::State::SUSPENDED});
843
+ set_state(Thread::State::STARTED);
844
+ }
845
+
846
+ void set_state(Thread::State new_state) {
847
+ const std::lock_guard<std::mutex> lock(mutex);
848
+
849
+ pthread_t current_thread = pthread_self();
850
+ //cerr << "set state=" << new_state << " thread=" << gettid() << endl;
851
+
852
+ for (auto &thread : list) {
853
+ if (pthread_equal(current_thread, thread.pthread_id)) {
854
+ thread.set_state(new_state);
855
+
856
+ if (new_state == Thread::State::SUSPENDED) {
857
+ thread.stack_on_suspend.sample();
858
+ //cerr << gettid() << " suspended! Stack size:" << thread.stack_on_suspend.size() << endl;
859
+ }
860
+ return;
861
+ }
862
+ }
863
+
864
+ pid_t native_tid = Thread::get_native_thread_id();
865
+ list.emplace_back(new_state);
866
+ }
867
+ };
868
+
869
+ enum Category{
870
+ CATEGORY_NORMAL,
871
+ CATEGORY_IDLE
872
+ };
873
+
874
+ class TimeCollector : public BaseCollector {
875
+ std::vector<int> samples;
876
+ std::vector<TimeStamp> timestamps;
877
+ std::vector<native_thread_id_t> sample_threads;
878
+ std::vector<Category> sample_categories;
879
+
880
+ MarkerTable markers;
881
+ ThreadTable threads;
882
+
883
+ pthread_t sample_thread;
884
+
885
+ atomic_bool running;
886
+ SamplerSemaphore thread_stopped;
887
+
888
+ static LiveSample *live_sample;
889
+
890
+ TimeStamp started_at;
891
+ TimeStamp interval;
892
+
893
+ public:
894
+ TimeCollector(TimeStamp interval) : interval(interval) {
895
+ }
896
+
897
+ private:
898
+
899
+ void record_sample(const RawSample &sample, TimeStamp time, const Thread &thread, Category category) {
900
+ if (!sample.empty()) {
901
+ int stack_index = frame_list.stack_index(sample);
902
+ samples.push_back(stack_index);
903
+ timestamps.push_back(time);
904
+ sample_threads.push_back(thread.native_tid);
905
+ sample_categories.push_back(category);
906
+ }
907
+ }
908
+
909
+ static void signal_handler(int sig, siginfo_t* sinfo, void* ucontext) {
910
+ assert(live_sample);
911
+ live_sample->sample_current_thread();
912
+ }
913
+
914
+ VALUE get_markers() {
915
+ VALUE list = rb_ary_new();
916
+
917
+ for (auto& marker: this->markers.list) {
918
+ VALUE record[3] = {0};
919
+ record[0] = ULL2NUM(marker.thread_id);
920
+ record[1] = INT2NUM(marker.type);
921
+ record[2] = ULL2NUM(marker.timestamp.nanoseconds());
922
+ rb_ary_push(list, rb_ary_new_from_values(3, record));
923
+ }
924
+
925
+ return list;
926
+ }
927
+
928
+ void sample_thread_run() {
929
+ LiveSample sample;
930
+ live_sample = &sample;
931
+
932
+ TimeStamp next_sample_schedule = TimeStamp::Now();
933
+ while (running) {
934
+ TimeStamp sample_start = TimeStamp::Now();
935
+
936
+ threads.mutex.lock();
937
+ for (auto thread : threads.list) {
938
+ //if (thread.state == Thread::State::RUNNING) {
939
+ if (thread.state == Thread::State::RUNNING || (thread.state == Thread::State::SUSPENDED && thread.stack_on_suspend.size() == 0)) {
940
+ if (pthread_kill(thread.pthread_id, SIGPROF)) {
941
+ rb_bug("pthread_kill failed");
942
+ }
943
+ sample.wait();
944
+
945
+ if (sample.sample.gc) {
946
+ // fprintf(stderr, "skipping GC sample\n");
947
+ } else {
948
+ record_sample(sample.sample, sample_start, thread, CATEGORY_NORMAL);
949
+ }
950
+ } else if (thread.state == Thread::State::SUSPENDED) {
951
+ record_sample(thread.stack_on_suspend, sample_start, thread, CATEGORY_IDLE);
952
+ } else {
953
+ }
954
+ }
955
+ threads.mutex.unlock();
956
+
957
+ TimeStamp sample_complete = TimeStamp::Now();
958
+
959
+ next_sample_schedule += interval;
960
+
961
+ if (next_sample_schedule < sample_complete) {
962
+ //fprintf(stderr, "fell behind by %ius\n", (sample_complete - next_sample_schedule).microseconds());
963
+ next_sample_schedule = sample_complete + interval;
964
+ }
965
+
966
+ TimeStamp sleep_time = next_sample_schedule - sample_complete;
967
+ TimeStamp::Sleep(sleep_time);
968
+ }
969
+
970
+ live_sample = NULL;
971
+
972
+ thread_stopped.post();
973
+ }
974
+
975
+ static void *sample_thread_entry(void *arg) {
976
+ TimeCollector *collector = static_cast<TimeCollector *>(arg);
977
+ collector->sample_thread_run();
978
+ return NULL;
979
+ }
980
+
981
+ static void internal_gc_event_cb(VALUE tpval, void *data) {
982
+ TimeCollector *collector = static_cast<TimeCollector *>(data);
983
+ rb_trace_arg_t *tparg = rb_tracearg_from_tracepoint(tpval);
984
+ int event = rb_tracearg_event_flag(tparg);
985
+
986
+ switch (event) {
987
+ case RUBY_INTERNAL_EVENT_GC_START:
988
+ collector->markers.record(Marker::Type::MARKER_GC_START);
989
+ break;
990
+ case RUBY_INTERNAL_EVENT_GC_END_MARK:
991
+ collector->markers.record(Marker::Type::MARKER_GC_END_MARK);
992
+ break;
993
+ case RUBY_INTERNAL_EVENT_GC_END_SWEEP:
994
+ collector->markers.record(Marker::Type::MARKER_GC_END_SWEEP);
995
+ break;
996
+ case RUBY_INTERNAL_EVENT_GC_ENTER:
997
+ collector->markers.record(Marker::Type::MARKER_GC_ENTER);
998
+ break;
999
+ case RUBY_INTERNAL_EVENT_GC_EXIT:
1000
+ collector->markers.record(Marker::Type::MARKER_GC_EXIT);
1001
+ break;
1002
+ }
1003
+ }
1004
+
1005
+ static void internal_thread_event_cb(rb_event_flag_t event, const rb_internal_thread_event_data_t *event_data, void *data) {
1006
+ TimeCollector *collector = static_cast<TimeCollector *>(data);
1007
+ //cerr << "internal thread event" << event << " at " << TimeStamp::Now() << endl;
1008
+
1009
+ switch (event) {
1010
+ case RUBY_INTERNAL_THREAD_EVENT_STARTED:
1011
+ collector->markers.record(Marker::Type::MARKER_GVL_THREAD_STARTED);
1012
+ collector->threads.started();
1013
+ break;
1014
+ case RUBY_INTERNAL_THREAD_EVENT_READY:
1015
+ collector->markers.record(Marker::Type::MARKER_GVL_THREAD_READY);
1016
+ break;
1017
+ case RUBY_INTERNAL_THREAD_EVENT_RESUMED:
1018
+ collector->markers.record(Marker::Type::MARKER_GVL_THREAD_RESUMED);
1019
+ collector->threads.set_state(Thread::State::RUNNING);
1020
+ break;
1021
+ case RUBY_INTERNAL_THREAD_EVENT_SUSPENDED:
1022
+ collector->markers.record(Marker::Type::MARKER_GVL_THREAD_SUSPENDED);
1023
+ collector->threads.set_state(Thread::State::SUSPENDED);
1024
+ break;
1025
+ case RUBY_INTERNAL_THREAD_EVENT_EXITED:
1026
+ collector->markers.record(Marker::Type::MARKER_GVL_THREAD_EXITED);
1027
+ collector->threads.set_state(Thread::State::STOPPED);
1028
+ break;
1029
+
1030
+ }
1031
+ }
1032
+
1033
+ rb_internal_thread_event_hook_t *thread_hook;
1034
+ VALUE gc_hook;
1035
+
1036
+ bool start() {
1037
+ if (!BaseCollector::start()) {
1038
+ return false;
1039
+ }
1040
+
1041
+ started_at = TimeStamp::Now();
1042
+
1043
+ struct sigaction sa;
1044
+ sa.sa_sigaction = signal_handler;
1045
+ sa.sa_flags = SA_RESTART | SA_SIGINFO;
1046
+ sigemptyset(&sa.sa_mask);
1047
+ sigaction(SIGPROF, &sa, NULL);
1048
+
1049
+ running = true;
1050
+
1051
+ int ret = pthread_create(&sample_thread, NULL, &sample_thread_entry, this);
1052
+ if (ret != 0) {
1053
+ perror("pthread_create");
1054
+ rb_bug("pthread_create");
1055
+ }
1056
+
1057
+ // Set the state of the current Ruby thread to RUNNING.
1058
+ // We want to have at least one thread in our thread list because it's
1059
+ // possible that the profile might be such that we don't get any
1060
+ // thread switch events and we need at least one
1061
+ this->threads.set_state(Thread::State::RUNNING);
1062
+
1063
+ thread_hook = rb_internal_thread_add_event_hook(internal_thread_event_cb, RUBY_INTERNAL_THREAD_EVENT_MASK, this);
1064
+ gc_hook = rb_tracepoint_new(0, RUBY_GC_PHASE_EVENTS, internal_gc_event_cb, (void *)this);
1065
+ rb_tracepoint_enable(gc_hook);
1066
+
1067
+ return true;
1068
+ }
1069
+
1070
+ VALUE stop() {
1071
+ BaseCollector::stop();
1072
+
1073
+ running = false;
1074
+ thread_stopped.wait();
1075
+
1076
+ struct sigaction sa;
1077
+ sa.sa_handler = SIG_IGN;
1078
+ sa.sa_flags = SA_RESTART;
1079
+ sigemptyset(&sa.sa_mask);
1080
+ sigaction(SIGPROF, &sa, NULL);
1081
+
1082
+ rb_internal_thread_remove_event_hook(thread_hook);
1083
+ rb_tracepoint_disable(gc_hook);
1084
+
1085
+ // capture thread names
1086
+ for (auto& thread: this->threads.list) {
1087
+ if (thread.running()) {
1088
+ thread.capture_name();
1089
+ }
1090
+ }
1091
+
1092
+ frame_list.finalize();
1093
+
1094
+ VALUE result = build_collector_result();
1095
+
1096
+ reset();
1097
+
1098
+ return result;
1099
+ }
1100
+
1101
+ VALUE build_collector_result() {
1102
+ VALUE result = rb_obj_alloc(rb_cVernierResult);
1103
+
1104
+ VALUE meta = rb_hash_new();
1105
+ rb_ivar_set(result, rb_intern("@meta"), meta);
1106
+ rb_hash_aset(meta, sym("started_at"), ULL2NUM(started_at.nanoseconds()));
1107
+
1108
+ VALUE samples = rb_ary_new();
1109
+ rb_ivar_set(result, rb_intern("@samples"), samples);
1110
+ VALUE weights = rb_ary_new();
1111
+ rb_ivar_set(result, rb_intern("@weights"), weights);
1112
+ for (auto& stack_index: this->samples) {
1113
+ rb_ary_push(samples, INT2NUM(stack_index));
1114
+ rb_ary_push(weights, INT2NUM(1));
1115
+ }
1116
+
1117
+ VALUE timestamps = rb_ary_new();
1118
+ rb_ivar_set(result, rb_intern("@timestamps"), timestamps);
1119
+
1120
+ for (auto& timestamp: this->timestamps) {
1121
+ rb_ary_push(timestamps, ULL2NUM(timestamp.nanoseconds()));
1122
+ }
1123
+
1124
+ VALUE sample_threads = rb_ary_new();
1125
+ rb_ivar_set(result, rb_intern("@sample_threads"), sample_threads);
1126
+ for (auto& thread: this->sample_threads) {
1127
+ rb_ary_push(sample_threads, ULL2NUM(thread));
1128
+ }
1129
+
1130
+ VALUE sample_categories = rb_ary_new();
1131
+ rb_ivar_set(result, rb_intern("@sample_categories"), sample_categories);
1132
+ for (auto& cat: this->sample_categories) {
1133
+ rb_ary_push(sample_categories, INT2NUM(cat));
1134
+ }
1135
+
1136
+ VALUE threads = rb_hash_new();
1137
+ rb_ivar_set(result, rb_intern("@threads"), threads);
1138
+
1139
+ for (const auto& thread: this->threads.list) {
1140
+ VALUE hash = rb_hash_new();
1141
+ rb_hash_aset(threads, ULL2NUM(thread.native_tid), hash);
1142
+ rb_hash_aset(hash, sym("tid"), ULL2NUM(thread.native_tid));
1143
+ rb_hash_aset(hash, sym("started_at"), ULL2NUM(thread.started_at.nanoseconds()));
1144
+ if (!thread.stopped_at.zero()) {
1145
+ rb_hash_aset(hash, sym("stopped_at"), ULL2NUM(thread.stopped_at.nanoseconds()));
1146
+ }
1147
+ rb_hash_aset(hash, sym("name"), rb_str_new(thread.name.data(), thread.name.length()));
1148
+
1149
+ }
1150
+
1151
+ frame_list.write_result(result);
1152
+
1153
+ return result;
1154
+ }
1155
+
1156
+ void mark() {
1157
+ frame_list.mark_frames();
1158
+ rb_gc_mark(gc_hook);
1159
+
1160
+ //for (int i = 0; i < queued_length; i++) {
1161
+ // rb_gc_mark(queued_frames[i]);
1162
+ //}
1163
+
1164
+ // FIXME: How can we best mark buffered or pending frames?
1165
+ }
1166
+ };
1167
+
1168
+ LiveSample *TimeCollector::live_sample;
68
1169
 
69
1170
  static void
70
- freeobj_i(VALUE tpval, void *data) {
71
- retained_collector *collector = static_cast<retained_collector *>(data);
72
- TraceArg tp(tpval);
73
- collector->freed_objects++;
1171
+ collector_mark(void *data) {
1172
+ BaseCollector *collector = static_cast<BaseCollector *>(data);
1173
+ collector->mark();
1174
+ }
74
1175
 
75
- collector->object_frames.erase(tp.obj);
1176
+ static void
1177
+ collector_free(void *data) {
1178
+ BaseCollector *collector = static_cast<BaseCollector *>(data);
1179
+ delete collector;
76
1180
  }
77
1181
 
1182
+ static const rb_data_type_t rb_collector_type = {
1183
+ .wrap_struct_name = "vernier/collector",
1184
+ .function = {
1185
+ //.dmemsize = rb_collector_memsize,
1186
+ .dmark = collector_mark,
1187
+ .dfree = collector_free,
1188
+ },
1189
+ };
78
1190
 
79
- static VALUE
80
- trace_retained_start(VALUE self) {
81
- retained_collector *collector = &_collector;
1191
+ static BaseCollector *get_collector(VALUE obj) {
1192
+ BaseCollector *collector;
1193
+ TypedData_Get_Struct(obj, BaseCollector, &rb_collector_type, collector);
1194
+ return collector;
1195
+ }
82
1196
 
83
- tp_newobj = rb_tracepoint_new(0, RUBY_INTERNAL_EVENT_NEWOBJ, newobj_i, collector);
84
- tp_freeobj = rb_tracepoint_new(0, RUBY_INTERNAL_EVENT_FREEOBJ, freeobj_i, collector);
1197
+ static VALUE
1198
+ collector_start(VALUE self) {
1199
+ auto *collector = get_collector(self);
85
1200
 
86
- rb_tracepoint_enable(tp_newobj);
87
- rb_tracepoint_enable(tp_freeobj);
1201
+ if (!collector->start()) {
1202
+ rb_raise(rb_eRuntimeError, "already running");
1203
+ }
88
1204
 
89
1205
  return Qtrue;
90
1206
  }
91
1207
 
92
- #define sym(name) ID2SYM(rb_intern_const(name))
93
-
94
- // HACK: This isn't public, but the objspace ext uses it
95
- extern "C" size_t rb_obj_memsize_of(VALUE);
1208
+ static VALUE
1209
+ collector_stop(VALUE self) {
1210
+ auto *collector = get_collector(self);
96
1211
 
97
- static const char *
98
- ruby_object_type_name(VALUE obj) {
99
- enum ruby_value_type type = rb_type(obj);
100
-
101
- #define TYPE_CASE(x) case (x): return (#x)
102
-
103
- // Many of these are impossible, but it's easier to just include them
104
- switch (type) {
105
- TYPE_CASE(T_OBJECT);
106
- TYPE_CASE(T_CLASS);
107
- TYPE_CASE(T_MODULE);
108
- TYPE_CASE(T_FLOAT);
109
- TYPE_CASE(T_STRING);
110
- TYPE_CASE(T_REGEXP);
111
- TYPE_CASE(T_ARRAY);
112
- TYPE_CASE(T_HASH);
113
- TYPE_CASE(T_STRUCT);
114
- TYPE_CASE(T_BIGNUM);
115
- TYPE_CASE(T_FILE);
116
- TYPE_CASE(T_DATA);
117
- TYPE_CASE(T_MATCH);
118
- TYPE_CASE(T_COMPLEX);
119
- TYPE_CASE(T_RATIONAL);
120
-
121
- TYPE_CASE(T_NIL);
122
- TYPE_CASE(T_TRUE);
123
- TYPE_CASE(T_FALSE);
124
- TYPE_CASE(T_SYMBOL);
125
- TYPE_CASE(T_FIXNUM);
126
- TYPE_CASE(T_UNDEF);
127
-
128
- TYPE_CASE(T_IMEMO);
129
- TYPE_CASE(T_NODE);
130
- TYPE_CASE(T_ICLASS);
131
- TYPE_CASE(T_ZOMBIE);
132
- TYPE_CASE(T_MOVED);
133
-
134
- default:
135
- return "unknown type";
136
- }
137
- #undef TYPE_CASE
1212
+ VALUE result = collector->stop();
1213
+ return result;
138
1214
  }
139
1215
 
140
1216
  static VALUE
141
- trace_retained_stop(VALUE self) {
142
- rb_tracepoint_disable(tp_newobj);
143
- rb_tracepoint_disable(tp_freeobj);
1217
+ markers(VALUE self) {
1218
+ auto *collector = get_collector(self);
144
1219
 
145
- retained_collector *collector = &_collector;
1220
+ return collector->get_markers();
1221
+ }
146
1222
 
147
- std::stringstream ss;
1223
+ static VALUE
1224
+ collector_sample(VALUE self) {
1225
+ auto *collector = get_collector(self);
148
1226
 
149
- for (auto& it: collector->object_frames) {
150
- VALUE obj = it.first;
151
- const Stack &stack = *it.second;
1227
+ collector->sample();
1228
+ return Qtrue;
1229
+ }
152
1230
 
153
- for (int i = stack.size() - 1; i >= 0; i--) {
154
- const Frame &frame = stack.frame(i);
155
- ss << frame;
156
- if (i > 0) ss << ";";
1231
+ static VALUE collector_new(VALUE self, VALUE mode, VALUE options) {
1232
+ BaseCollector *collector;
1233
+ if (mode == sym("retained")) {
1234
+ collector = new RetainedCollector();
1235
+ } else if (mode == sym("custom")) {
1236
+ collector = new CustomCollector();
1237
+ } else if (mode == sym("wall")) {
1238
+ VALUE intervalv = rb_hash_aref(options, sym("interval"));
1239
+ TimeStamp interval;
1240
+ if (NIL_P(intervalv)) {
1241
+ interval = TimeStamp::from_microseconds(500);
1242
+ } else {
1243
+ interval = TimeStamp::from_microseconds(NUM2UINT(intervalv));
157
1244
  }
158
- ss << ";" << ruby_object_type_name(obj);
159
- ss << " " << rb_obj_memsize_of(obj) << endl;
1245
+ collector = new TimeCollector(interval);
1246
+ } else {
1247
+ rb_raise(rb_eArgError, "invalid mode");
160
1248
  }
161
-
162
- std::string s = ss.str();
163
- VALUE str = rb_str_new(s.c_str(), s.size());
164
-
165
- return str;
1249
+ VALUE obj = TypedData_Wrap_Struct(self, &rb_collector_type, collector);
1250
+ rb_funcall(obj, rb_intern("initialize"), 1, mode);
1251
+ return obj;
166
1252
  }
167
1253
 
168
1254
  static void
169
- retained_collector_mark(void *data) {
170
- retained_collector *collector = static_cast<retained_collector *>(data);
1255
+ Init_consts() {
1256
+ #define MARKER_CONST(name) \
1257
+ rb_define_const(rb_mVernierMarkerType, #name, INT2NUM(Marker::Type::MARKER_##name))
171
1258
 
172
- // We don't mark the objects, but we MUST mark the frames, otherwise they
173
- // can be garbage collected.
174
- // This may lead to method entries being unnecessarily retained.
175
- for (VALUE frame: collector->unique_frames) {
176
- rb_gc_mark(frame);
177
- }
1259
+ MARKER_CONST(GVL_THREAD_STARTED);
1260
+ MARKER_CONST(GVL_THREAD_READY);
1261
+ MARKER_CONST(GVL_THREAD_RESUMED);
1262
+ MARKER_CONST(GVL_THREAD_SUSPENDED);
1263
+ MARKER_CONST(GVL_THREAD_EXITED);
1264
+
1265
+ MARKER_CONST(GC_START);
1266
+ MARKER_CONST(GC_END_MARK);
1267
+ MARKER_CONST(GC_END_SWEEP);
1268
+ MARKER_CONST(GC_ENTER);
1269
+ MARKER_CONST(GC_EXIT);
1270
+
1271
+ #undef MARKER_CONST
178
1272
  }
179
1273
 
180
1274
  extern "C" void
181
1275
  Init_vernier(void)
182
1276
  {
183
1277
  rb_mVernier = rb_define_module("Vernier");
1278
+ rb_cVernierResult = rb_define_class_under(rb_mVernier, "Result", rb_cObject);
1279
+ VALUE rb_mVernierMarker = rb_define_module_under(rb_mVernier, "Marker");
1280
+ rb_mVernierMarkerType = rb_define_module_under(rb_mVernierMarker, "Type");
1281
+
1282
+ rb_cVernierCollector = rb_define_class_under(rb_mVernier, "Collector", rb_cObject);
1283
+ rb_undef_alloc_func(rb_cVernierCollector);
1284
+ rb_define_singleton_method(rb_cVernierCollector, "_new", collector_new, 2);
1285
+ rb_define_method(rb_cVernierCollector, "start", collector_start, 0);
1286
+ rb_define_method(rb_cVernierCollector, "sample", collector_sample, 0);
1287
+ rb_define_private_method(rb_cVernierCollector, "finish", collector_stop, 0);
1288
+ rb_define_private_method(rb_cVernierCollector, "markers", markers, 0);
184
1289
 
185
- rb_define_module_function(rb_mVernier, "trace_retained_start", trace_retained_start, 0);
186
- rb_define_module_function(rb_mVernier, "trace_retained_stop", trace_retained_stop, 0);
1290
+ Init_consts();
187
1291
 
188
- static VALUE gc_hook = Data_Wrap_Struct(rb_cObject, retained_collector_mark, NULL, &_collector);
189
- rb_global_variable(&gc_hook);
1292
+ //static VALUE gc_hook = Data_Wrap_Struct(rb_cObject, collector_mark, NULL, &_collector);
1293
+ //rb_global_variable(&gc_hook);
190
1294
  }