pf2 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/Cargo.lock +481 -0
- data/Cargo.toml +3 -0
- data/README.md +99 -13
- data/ext/pf2/Cargo.toml +24 -0
- data/ext/pf2/build.rs +3 -0
- data/ext/pf2/extconf.rb +6 -1
- data/ext/pf2/src/lib.rs +14 -0
- data/ext/pf2/src/profile.rs +50 -0
- data/ext/pf2/src/profile_serializer.rs +130 -0
- data/ext/pf2/src/ringbuffer.rs +145 -0
- data/ext/pf2/src/ruby_init.rs +62 -0
- data/ext/pf2/src/sample.rs +45 -0
- data/ext/pf2/src/siginfo_t.c +5 -0
- data/ext/pf2/src/signal_scheduler/configuration.rs +24 -0
- data/ext/pf2/src/signal_scheduler/timer_installer.rs +192 -0
- data/ext/pf2/src/signal_scheduler.rs +242 -0
- data/ext/pf2/src/timer_thread_scheduler.rs +243 -0
- data/ext/pf2/src/util.rs +30 -0
- data/lib/pf2/cli.rb +1 -1
- data/lib/pf2/reporter.rb +36 -11
- data/lib/pf2/version.rb +1 -1
- data/lib/pf2.rb +23 -5
- metadata +34 -5
- data/ext/pf2/pf2.c +0 -246
@@ -0,0 +1,50 @@
|
|
1
|
+
use std::collections::HashSet;
|
2
|
+
use std::time::Instant;
|
3
|
+
|
4
|
+
use rb_sys::*;
|
5
|
+
|
6
|
+
use super::ringbuffer::Ringbuffer;
|
7
|
+
use super::sample::Sample;
|
8
|
+
|
9
|
+
// Capacity large enough to hold 1 second worth of samples for 16 threads
|
10
|
+
// 16 threads * 20 samples per second * 1 second = 320
|
11
|
+
const DEFAULT_RINGBUFFER_CAPACITY: usize = 320;
|
12
|
+
|
13
|
+
#[derive(Debug)]
|
14
|
+
pub struct Profile {
|
15
|
+
pub start_timestamp: Instant,
|
16
|
+
pub samples: Vec<Sample>,
|
17
|
+
pub temporary_sample_buffer: Ringbuffer,
|
18
|
+
known_values: HashSet<VALUE>,
|
19
|
+
}
|
20
|
+
|
21
|
+
impl Profile {
|
22
|
+
pub fn new() -> Self {
|
23
|
+
Self {
|
24
|
+
start_timestamp: Instant::now(),
|
25
|
+
samples: vec![],
|
26
|
+
temporary_sample_buffer: Ringbuffer::new(DEFAULT_RINGBUFFER_CAPACITY),
|
27
|
+
known_values: HashSet::new(),
|
28
|
+
}
|
29
|
+
}
|
30
|
+
|
31
|
+
pub fn flush_temporary_sample_buffer(&mut self) {
|
32
|
+
while let Some(sample) = self.temporary_sample_buffer.pop() {
|
33
|
+
self.known_values.insert(sample.ruby_thread);
|
34
|
+
for frame in sample.frames.iter() {
|
35
|
+
if frame == &0 {
|
36
|
+
break;
|
37
|
+
}
|
38
|
+
self.known_values.insert(*frame);
|
39
|
+
}
|
40
|
+
self.samples.push(sample);
|
41
|
+
}
|
42
|
+
}
|
43
|
+
|
44
|
+
pub unsafe fn dmark(&self) {
|
45
|
+
for value in self.known_values.iter() {
|
46
|
+
rb_gc_mark(*value);
|
47
|
+
}
|
48
|
+
self.temporary_sample_buffer.dmark();
|
49
|
+
}
|
50
|
+
}
|
@@ -0,0 +1,130 @@
|
|
1
|
+
use std::{collections::HashMap, ffi::CStr};
|
2
|
+
|
3
|
+
use rb_sys::*;
|
4
|
+
|
5
|
+
use crate::profile::Profile;
|
6
|
+
|
7
|
+
#[derive(Debug, Deserialize, Serialize)]
|
8
|
+
pub struct ProfileSerializer {
|
9
|
+
threads: HashMap<ThreadId, ThreadProfile>,
|
10
|
+
}
|
11
|
+
|
12
|
+
type ThreadId = VALUE;
|
13
|
+
|
14
|
+
#[derive(Debug, Deserialize, Serialize)]
|
15
|
+
struct ThreadProfile {
|
16
|
+
thread_id: ThreadId,
|
17
|
+
stack_tree: StackTreeNode,
|
18
|
+
#[serde(rename = "frames")]
|
19
|
+
frame_table: HashMap<FrameTableId, FrameTableEntry>,
|
20
|
+
samples: Vec<ProfileSample>,
|
21
|
+
}
|
22
|
+
|
23
|
+
impl ThreadProfile {
|
24
|
+
fn new(thread_id: ThreadId) -> ThreadProfile {
|
25
|
+
ThreadProfile {
|
26
|
+
thread_id,
|
27
|
+
// The root node
|
28
|
+
stack_tree: StackTreeNode {
|
29
|
+
children: HashMap::new(),
|
30
|
+
node_id: 0,
|
31
|
+
frame_id: 0,
|
32
|
+
},
|
33
|
+
frame_table: HashMap::new(),
|
34
|
+
samples: vec![],
|
35
|
+
}
|
36
|
+
}
|
37
|
+
}
|
38
|
+
|
39
|
+
type StackTreeNodeId = i32;
|
40
|
+
|
41
|
+
// Arbitary value which is used inside StackTreeNode.
|
42
|
+
// This VALUE should not be dereferenced as a pointer; we're merely using its pointer as a unique value.
|
43
|
+
// (Probably should be reconsidered)
|
44
|
+
type FrameTableId = VALUE;
|
45
|
+
|
46
|
+
#[derive(Debug, Deserialize, Serialize)]
|
47
|
+
struct StackTreeNode {
|
48
|
+
// TODO: Maybe a Vec<StackTreeNode> is enough?
|
49
|
+
// There's no particular meaning in using FrameTableId as key
|
50
|
+
children: HashMap<FrameTableId, StackTreeNode>,
|
51
|
+
// An arbitary ID (no particular meaning)
|
52
|
+
node_id: StackTreeNodeId,
|
53
|
+
// ?
|
54
|
+
frame_id: FrameTableId,
|
55
|
+
}
|
56
|
+
|
57
|
+
#[derive(Debug, Deserialize, Serialize)]
|
58
|
+
struct FrameTableEntry {
|
59
|
+
full_label: String,
|
60
|
+
}
|
61
|
+
|
62
|
+
// Represents leaf (末端)
|
63
|
+
#[derive(Debug, Deserialize, Serialize)]
|
64
|
+
struct ProfileSample {
|
65
|
+
elapsed_ns: u128,
|
66
|
+
stack_tree_id: StackTreeNodeId,
|
67
|
+
}
|
68
|
+
|
69
|
+
impl ProfileSerializer {
|
70
|
+
pub fn serialize(profile: &Profile) -> String {
|
71
|
+
let mut sequence = 1;
|
72
|
+
|
73
|
+
let mut serializer = ProfileSerializer {
|
74
|
+
threads: HashMap::new(),
|
75
|
+
};
|
76
|
+
|
77
|
+
unsafe {
|
78
|
+
// Process each sample
|
79
|
+
for sample in profile.samples.iter() {
|
80
|
+
// Find the Thread profile for this sample
|
81
|
+
let thread_serializer = serializer
|
82
|
+
.threads
|
83
|
+
.entry(sample.ruby_thread)
|
84
|
+
.or_insert(ThreadProfile::new(sample.ruby_thread));
|
85
|
+
|
86
|
+
// Stack frames, shallow to deep
|
87
|
+
let mut stack_tree = &mut thread_serializer.stack_tree;
|
88
|
+
|
89
|
+
for i in (0..(sample.line_count - 1)).rev() {
|
90
|
+
let frame = sample.frames[i as usize];
|
91
|
+
|
92
|
+
// Register frame metadata to frame table, if not registered yet
|
93
|
+
let frame_table_id: FrameTableId = frame;
|
94
|
+
thread_serializer
|
95
|
+
.frame_table
|
96
|
+
.entry(frame_table_id)
|
97
|
+
.or_insert(FrameTableEntry {
|
98
|
+
full_label: CStr::from_ptr(rb_string_value_cstr(
|
99
|
+
&mut rb_profile_frame_full_label(frame),
|
100
|
+
))
|
101
|
+
.to_str()
|
102
|
+
.unwrap()
|
103
|
+
.to_string(),
|
104
|
+
});
|
105
|
+
|
106
|
+
stack_tree = stack_tree.children.entry(frame_table_id).or_insert({
|
107
|
+
let node = StackTreeNode {
|
108
|
+
children: HashMap::new(),
|
109
|
+
node_id: sequence,
|
110
|
+
frame_id: frame_table_id,
|
111
|
+
};
|
112
|
+
sequence += 1;
|
113
|
+
node
|
114
|
+
});
|
115
|
+
|
116
|
+
if i == 0 {
|
117
|
+
// This is the leaf node, record a Sample
|
118
|
+
let elapsed_ns = (sample.timestamp - profile.start_timestamp).as_nanos();
|
119
|
+
thread_serializer.samples.push(ProfileSample {
|
120
|
+
elapsed_ns,
|
121
|
+
stack_tree_id: stack_tree.node_id,
|
122
|
+
});
|
123
|
+
}
|
124
|
+
}
|
125
|
+
}
|
126
|
+
}
|
127
|
+
|
128
|
+
serde_json::to_string(&serializer).unwrap()
|
129
|
+
}
|
130
|
+
}
|
@@ -0,0 +1,145 @@
|
|
1
|
+
use crate::sample::Sample;
|
2
|
+
|
3
|
+
#[derive(Debug)]
|
4
|
+
pub struct Ringbuffer {
|
5
|
+
capacity: usize,
|
6
|
+
buffer: Vec<Option<Sample>>,
|
7
|
+
read_index: usize,
|
8
|
+
write_index: usize,
|
9
|
+
}
|
10
|
+
|
11
|
+
#[derive(Debug, PartialEq)]
|
12
|
+
pub enum RingbufferError {
|
13
|
+
Full,
|
14
|
+
}
|
15
|
+
|
16
|
+
impl Ringbuffer {
|
17
|
+
pub fn new(capacity: usize) -> Self {
|
18
|
+
Self {
|
19
|
+
capacity,
|
20
|
+
buffer: std::iter::repeat_with(|| None)
|
21
|
+
.take(capacity + 1)
|
22
|
+
.collect::<Vec<_>>(),
|
23
|
+
read_index: 0,
|
24
|
+
write_index: 0,
|
25
|
+
}
|
26
|
+
}
|
27
|
+
|
28
|
+
// async-signal-safe
|
29
|
+
pub fn push(&mut self, sample: Sample) -> Result<(), RingbufferError> {
|
30
|
+
let next = (self.write_index + 1) % (self.capacity + 1);
|
31
|
+
if next == self.read_index {
|
32
|
+
return Err(RingbufferError::Full);
|
33
|
+
}
|
34
|
+
self.buffer[self.write_index] = Some(sample);
|
35
|
+
self.write_index = next;
|
36
|
+
Ok(())
|
37
|
+
}
|
38
|
+
|
39
|
+
pub fn pop(&mut self) -> Option<Sample> {
|
40
|
+
if self.read_index == self.write_index {
|
41
|
+
return None;
|
42
|
+
}
|
43
|
+
let sample = self.buffer[self.read_index].take();
|
44
|
+
self.read_index = (self.read_index + 1) % (self.capacity + 1);
|
45
|
+
sample
|
46
|
+
}
|
47
|
+
|
48
|
+
// This will call rb_gc_mark() for capacity * Sample::MAX_STACK_DEPTH * 2 times, which is a lot!
|
49
|
+
pub fn dmark(&self) {
|
50
|
+
for sample in self.buffer.iter().flatten() {
|
51
|
+
unsafe {
|
52
|
+
sample.dmark();
|
53
|
+
}
|
54
|
+
}
|
55
|
+
}
|
56
|
+
}
|
57
|
+
|
58
|
+
#[cfg(test)]
|
59
|
+
mod tests {
|
60
|
+
use super::*;
|
61
|
+
use std::time::Instant;
|
62
|
+
|
63
|
+
#[test]
|
64
|
+
fn test_ringbuffer() {
|
65
|
+
let mut ringbuffer = Ringbuffer::new(2);
|
66
|
+
assert_eq!(ringbuffer.pop(), None);
|
67
|
+
|
68
|
+
let sample1 = Sample {
|
69
|
+
ruby_thread: 1,
|
70
|
+
timestamp: Instant::now(),
|
71
|
+
line_count: 0,
|
72
|
+
frames: [0; 500],
|
73
|
+
linenos: [0; 500],
|
74
|
+
};
|
75
|
+
let sample2 = Sample {
|
76
|
+
ruby_thread: 2,
|
77
|
+
timestamp: Instant::now(),
|
78
|
+
line_count: 0,
|
79
|
+
frames: [0; 500],
|
80
|
+
linenos: [0; 500],
|
81
|
+
};
|
82
|
+
|
83
|
+
ringbuffer.push(sample1).unwrap();
|
84
|
+
ringbuffer.push(sample2).unwrap();
|
85
|
+
|
86
|
+
assert_eq!(ringbuffer.pop().unwrap().ruby_thread, 1);
|
87
|
+
assert_eq!(ringbuffer.pop().unwrap().ruby_thread, 2);
|
88
|
+
assert_eq!(ringbuffer.pop(), None);
|
89
|
+
}
|
90
|
+
|
91
|
+
#[test]
|
92
|
+
fn test_ringbuffer_full() {
|
93
|
+
let mut ringbuffer = Ringbuffer::new(1);
|
94
|
+
let sample1 = Sample {
|
95
|
+
ruby_thread: 1,
|
96
|
+
timestamp: Instant::now(),
|
97
|
+
line_count: 0,
|
98
|
+
frames: [0; 500],
|
99
|
+
linenos: [0; 500],
|
100
|
+
};
|
101
|
+
let sample2 = Sample {
|
102
|
+
ruby_thread: 2,
|
103
|
+
timestamp: Instant::now(),
|
104
|
+
line_count: 0,
|
105
|
+
frames: [0; 500],
|
106
|
+
linenos: [0; 500],
|
107
|
+
};
|
108
|
+
|
109
|
+
ringbuffer.push(sample1).unwrap();
|
110
|
+
assert_eq!(ringbuffer.push(sample2), Err(RingbufferError::Full));
|
111
|
+
}
|
112
|
+
|
113
|
+
#[test]
|
114
|
+
fn test_ringbuffer_write_a_lot() {
|
115
|
+
let mut ringbuffer = Ringbuffer::new(2);
|
116
|
+
let sample1 = Sample {
|
117
|
+
ruby_thread: 1,
|
118
|
+
timestamp: Instant::now(),
|
119
|
+
line_count: 0,
|
120
|
+
frames: [0; 500],
|
121
|
+
linenos: [0; 500],
|
122
|
+
};
|
123
|
+
let sample2 = Sample {
|
124
|
+
ruby_thread: 2,
|
125
|
+
timestamp: Instant::now(),
|
126
|
+
line_count: 0,
|
127
|
+
frames: [0; 500],
|
128
|
+
linenos: [0; 500],
|
129
|
+
};
|
130
|
+
let sample3 = Sample {
|
131
|
+
ruby_thread: 3,
|
132
|
+
timestamp: Instant::now(),
|
133
|
+
line_count: 0,
|
134
|
+
frames: [0; 500],
|
135
|
+
linenos: [0; 500],
|
136
|
+
};
|
137
|
+
|
138
|
+
ringbuffer.push(sample1).unwrap();
|
139
|
+
ringbuffer.pop().unwrap();
|
140
|
+
ringbuffer.push(sample2).unwrap();
|
141
|
+
ringbuffer.pop().unwrap();
|
142
|
+
ringbuffer.push(sample3).unwrap();
|
143
|
+
assert_eq!(ringbuffer.pop().unwrap().ruby_thread, 3);
|
144
|
+
}
|
145
|
+
}
|
@@ -0,0 +1,62 @@
|
|
1
|
+
#![deny(unsafe_op_in_unsafe_fn)]
|
2
|
+
|
3
|
+
use rb_sys::*;
|
4
|
+
|
5
|
+
#[cfg(target_os = "linux")]
|
6
|
+
use crate::signal_scheduler::SignalScheduler;
|
7
|
+
use crate::timer_thread_scheduler::TimerThreadScheduler;
|
8
|
+
use crate::util::*;
|
9
|
+
|
10
|
+
#[allow(non_snake_case)]
|
11
|
+
#[no_mangle]
|
12
|
+
extern "C" fn Init_pf2() {
|
13
|
+
#[cfg(feature = "debug")]
|
14
|
+
{
|
15
|
+
env_logger::builder()
|
16
|
+
.format_timestamp(None)
|
17
|
+
.format_module_path(false)
|
18
|
+
.init();
|
19
|
+
}
|
20
|
+
|
21
|
+
unsafe {
|
22
|
+
let rb_mPf2: VALUE = rb_define_module(cstr!("Pf2"));
|
23
|
+
|
24
|
+
#[cfg(target_os = "linux")]
|
25
|
+
{
|
26
|
+
let rb_mPf2_SignalScheduler =
|
27
|
+
rb_define_class_under(rb_mPf2, cstr!("SignalScheduler"), rb_cObject);
|
28
|
+
rb_define_alloc_func(rb_mPf2_SignalScheduler, Some(SignalScheduler::rb_alloc));
|
29
|
+
rb_define_method(
|
30
|
+
rb_mPf2_SignalScheduler,
|
31
|
+
cstr!("start"),
|
32
|
+
Some(to_ruby_cfunc3(SignalScheduler::rb_start)),
|
33
|
+
2,
|
34
|
+
);
|
35
|
+
rb_define_method(
|
36
|
+
rb_mPf2_SignalScheduler,
|
37
|
+
cstr!("stop"),
|
38
|
+
Some(to_ruby_cfunc1(SignalScheduler::rb_stop)),
|
39
|
+
0,
|
40
|
+
);
|
41
|
+
}
|
42
|
+
|
43
|
+
let rb_mPf2_TimerThreadScheduler =
|
44
|
+
rb_define_class_under(rb_mPf2, cstr!("TimerThreadScheduler"), rb_cObject);
|
45
|
+
rb_define_alloc_func(
|
46
|
+
rb_mPf2_TimerThreadScheduler,
|
47
|
+
Some(TimerThreadScheduler::rb_alloc),
|
48
|
+
);
|
49
|
+
rb_define_method(
|
50
|
+
rb_mPf2_TimerThreadScheduler,
|
51
|
+
cstr!("start"),
|
52
|
+
Some(to_ruby_cfunc3(TimerThreadScheduler::rb_start)),
|
53
|
+
2,
|
54
|
+
);
|
55
|
+
rb_define_method(
|
56
|
+
rb_mPf2_TimerThreadScheduler,
|
57
|
+
cstr!("stop"),
|
58
|
+
Some(to_ruby_cfunc1(TimerThreadScheduler::rb_stop)),
|
59
|
+
0,
|
60
|
+
);
|
61
|
+
}
|
62
|
+
}
|
@@ -0,0 +1,45 @@
|
|
1
|
+
use std::time::Instant;
|
2
|
+
|
3
|
+
use rb_sys::*;
|
4
|
+
|
5
|
+
const MAX_STACK_DEPTH: usize = 500;
|
6
|
+
|
7
|
+
#[derive(Debug, PartialEq)]
|
8
|
+
pub struct Sample {
|
9
|
+
pub ruby_thread: VALUE,
|
10
|
+
pub timestamp: Instant,
|
11
|
+
pub line_count: i32,
|
12
|
+
pub frames: [VALUE; MAX_STACK_DEPTH],
|
13
|
+
pub linenos: [i32; MAX_STACK_DEPTH],
|
14
|
+
}
|
15
|
+
|
16
|
+
impl Sample {
|
17
|
+
// Nearly async-signal-safe
|
18
|
+
// (rb_profile_thread_frames isn't defined as a-s-s)
|
19
|
+
pub fn capture(ruby_thread: VALUE) -> Self {
|
20
|
+
let mut sample = Sample {
|
21
|
+
ruby_thread,
|
22
|
+
timestamp: Instant::now(),
|
23
|
+
line_count: 0,
|
24
|
+
frames: [0; MAX_STACK_DEPTH],
|
25
|
+
linenos: [0; MAX_STACK_DEPTH],
|
26
|
+
};
|
27
|
+
unsafe {
|
28
|
+
sample.line_count = rb_profile_thread_frames(
|
29
|
+
ruby_thread,
|
30
|
+
0,
|
31
|
+
2000,
|
32
|
+
sample.frames.as_mut_ptr(),
|
33
|
+
sample.linenos.as_mut_ptr(),
|
34
|
+
);
|
35
|
+
};
|
36
|
+
sample
|
37
|
+
}
|
38
|
+
|
39
|
+
pub unsafe fn dmark(&self) {
|
40
|
+
rb_gc_mark(self.ruby_thread);
|
41
|
+
for frame in self.frames.iter() {
|
42
|
+
rb_gc_mark(*frame);
|
43
|
+
}
|
44
|
+
}
|
45
|
+
}
|
@@ -0,0 +1,24 @@
|
|
1
|
+
use std::str::FromStr;
|
2
|
+
|
3
|
+
#[derive(Clone, Debug)]
|
4
|
+
pub struct Configuration {
|
5
|
+
pub time_mode: TimeMode,
|
6
|
+
}
|
7
|
+
|
8
|
+
#[derive(Clone, Debug)]
|
9
|
+
pub enum TimeMode {
|
10
|
+
CpuTime,
|
11
|
+
WallTime,
|
12
|
+
}
|
13
|
+
|
14
|
+
impl FromStr for TimeMode {
|
15
|
+
type Err = ();
|
16
|
+
|
17
|
+
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
18
|
+
match s {
|
19
|
+
"cpu" => Ok(Self::CpuTime),
|
20
|
+
"wall" => Ok(Self::WallTime),
|
21
|
+
_ => Err(()),
|
22
|
+
}
|
23
|
+
}
|
24
|
+
}
|
@@ -0,0 +1,192 @@
|
|
1
|
+
use std::collections::HashMap;
|
2
|
+
use std::ffi::c_void;
|
3
|
+
use std::mem;
|
4
|
+
use std::mem::ManuallyDrop;
|
5
|
+
use std::ptr::null_mut;
|
6
|
+
use std::sync::{Mutex, RwLock};
|
7
|
+
use std::{collections::HashSet, sync::Arc};
|
8
|
+
|
9
|
+
use rb_sys::*;
|
10
|
+
|
11
|
+
use crate::signal_scheduler::SignalHandlerArgs;
|
12
|
+
|
13
|
+
use super::configuration::Configuration;
|
14
|
+
use crate::profile::Profile;
|
15
|
+
|
16
|
+
// We could avoid deferring the timer creation by combining pthread_getcpuclockid(3) and timer_create(2) here,
|
17
|
+
// but we're not doing so since (1) Ruby does not expose the pthread_self() of a Ruby Thread
|
18
|
+
// (which is actually stored in th->nt->thread_id), and (2) pthread_getcpuclockid(3) is not portable
|
19
|
+
// in the first place (e.g. not available on macOS).
|
20
|
+
pub struct TimerInstaller {
|
21
|
+
internal: Box<Mutex<Internal>>,
|
22
|
+
}
|
23
|
+
|
24
|
+
struct Internal {
|
25
|
+
configuration: Configuration,
|
26
|
+
target_ruby_threads: HashSet<VALUE>,
|
27
|
+
registered_pthread_ids: HashSet<libc::pthread_t>,
|
28
|
+
kernel_thread_id_to_ruby_thread_map: HashMap<libc::pid_t, VALUE>,
|
29
|
+
profile: Arc<RwLock<Profile>>,
|
30
|
+
}
|
31
|
+
|
32
|
+
impl TimerInstaller {
|
33
|
+
// Register a callback that gets called when a Ruby Thread is resumed.
|
34
|
+
// The callback should create a timer for the thread.
|
35
|
+
pub fn install_timer_to_ruby_threads(
|
36
|
+
configuration: Configuration,
|
37
|
+
ruby_threads: &HashSet<VALUE>,
|
38
|
+
profile: Arc<RwLock<Profile>>,
|
39
|
+
track_new_threads: bool,
|
40
|
+
) {
|
41
|
+
let registrar = Self {
|
42
|
+
internal: Box::new(Mutex::new(Internal {
|
43
|
+
configuration,
|
44
|
+
target_ruby_threads: ruby_threads.clone(),
|
45
|
+
registered_pthread_ids: HashSet::new(),
|
46
|
+
kernel_thread_id_to_ruby_thread_map: HashMap::new(),
|
47
|
+
profile,
|
48
|
+
})),
|
49
|
+
};
|
50
|
+
|
51
|
+
let ptr = Box::into_raw(registrar.internal);
|
52
|
+
unsafe {
|
53
|
+
rb_internal_thread_add_event_hook(
|
54
|
+
Some(Self::on_thread_resume),
|
55
|
+
RUBY_INTERNAL_THREAD_EVENT_RESUMED,
|
56
|
+
ptr as *mut c_void,
|
57
|
+
);
|
58
|
+
// Spawn a no-op Thread to fire the event hook
|
59
|
+
// (at least 2 Ruby Threads must be active for the RESUMED hook to be fired)
|
60
|
+
rb_thread_create(Some(Self::do_nothing), null_mut());
|
61
|
+
};
|
62
|
+
|
63
|
+
if track_new_threads {
|
64
|
+
unsafe {
|
65
|
+
rb_internal_thread_add_event_hook(
|
66
|
+
Some(Self::on_thread_start),
|
67
|
+
RUBY_INTERNAL_THREAD_EVENT_STARTED,
|
68
|
+
ptr as *mut c_void,
|
69
|
+
);
|
70
|
+
};
|
71
|
+
}
|
72
|
+
}
|
73
|
+
|
74
|
+
unsafe extern "C" fn do_nothing(_: *mut c_void) -> VALUE {
|
75
|
+
Qnil.into()
|
76
|
+
}
|
77
|
+
|
78
|
+
// Thread resume callback
|
79
|
+
unsafe extern "C" fn on_thread_resume(
|
80
|
+
_flag: rb_event_flag_t,
|
81
|
+
data: *const rb_internal_thread_event_data,
|
82
|
+
custom_data: *mut c_void,
|
83
|
+
) {
|
84
|
+
// The SignalScheduler (as a Ruby obj) should be passed as custom_data
|
85
|
+
let internal =
|
86
|
+
unsafe { ManuallyDrop::new(Box::from_raw(custom_data as *mut Mutex<Internal>)) };
|
87
|
+
let mut internal = internal.lock().unwrap();
|
88
|
+
|
89
|
+
// Check if the current thread is a target Ruby Thread
|
90
|
+
let current_ruby_thread: VALUE = unsafe { (*data).thread };
|
91
|
+
if !internal.target_ruby_threads.contains(¤t_ruby_thread) {
|
92
|
+
return;
|
93
|
+
}
|
94
|
+
|
95
|
+
// Check if the current thread is already registered
|
96
|
+
let current_pthread_id = unsafe { libc::pthread_self() };
|
97
|
+
if internal
|
98
|
+
.registered_pthread_ids
|
99
|
+
.contains(¤t_pthread_id)
|
100
|
+
{
|
101
|
+
return;
|
102
|
+
}
|
103
|
+
|
104
|
+
// Record the pthread ID of the current thread
|
105
|
+
internal.registered_pthread_ids.insert(current_pthread_id);
|
106
|
+
// Keep a mapping from kernel thread ID to Ruby Thread
|
107
|
+
internal
|
108
|
+
.kernel_thread_id_to_ruby_thread_map
|
109
|
+
.insert(unsafe { libc::gettid() }, current_ruby_thread);
|
110
|
+
|
111
|
+
Self::register_timer_to_current_thread(
|
112
|
+
&internal.configuration,
|
113
|
+
&internal.profile,
|
114
|
+
&internal.kernel_thread_id_to_ruby_thread_map,
|
115
|
+
);
|
116
|
+
|
117
|
+
// TODO: Remove the hook when all threads have been registered
|
118
|
+
}
|
119
|
+
|
120
|
+
// Thread resume callback
|
121
|
+
unsafe extern "C" fn on_thread_start(
|
122
|
+
_flag: rb_event_flag_t,
|
123
|
+
data: *const rb_internal_thread_event_data,
|
124
|
+
custom_data: *mut c_void,
|
125
|
+
) {
|
126
|
+
// The SignalScheduler (as a Ruby obj) should be passed as custom_data
|
127
|
+
let internal =
|
128
|
+
unsafe { ManuallyDrop::new(Box::from_raw(custom_data as *mut Mutex<Internal>)) };
|
129
|
+
let mut internal = internal.lock().unwrap();
|
130
|
+
|
131
|
+
let current_ruby_thread: VALUE = unsafe { (*data).thread };
|
132
|
+
internal.target_ruby_threads.insert(current_ruby_thread);
|
133
|
+
}
|
134
|
+
|
135
|
+
// Creates a new POSIX timer which invocates sampling for the thread that called this function.
|
136
|
+
fn register_timer_to_current_thread(
|
137
|
+
configuration: &Configuration,
|
138
|
+
profile: &Arc<RwLock<Profile>>,
|
139
|
+
kernel_thread_id_to_ruby_thread_map: &HashMap<libc::pid_t, VALUE>,
|
140
|
+
) {
|
141
|
+
let current_pthread_id = unsafe { libc::pthread_self() };
|
142
|
+
let context_ruby_thread: VALUE = unsafe {
|
143
|
+
*(kernel_thread_id_to_ruby_thread_map
|
144
|
+
.get(&(libc::gettid()))
|
145
|
+
.unwrap())
|
146
|
+
};
|
147
|
+
|
148
|
+
// NOTE: This Box is never dropped
|
149
|
+
let signal_handler_args = Box::new(SignalHandlerArgs {
|
150
|
+
profile: Arc::clone(profile),
|
151
|
+
context_ruby_thread,
|
152
|
+
});
|
153
|
+
|
154
|
+
// Create a signal event
|
155
|
+
let mut sigevent: libc::sigevent = unsafe { mem::zeroed() };
|
156
|
+
// Note: SIGEV_THREAD_ID is Linux-specific. In other platforms, we would need to
|
157
|
+
// "tranpoline" the signal as any pthread can receive the signal.
|
158
|
+
sigevent.sigev_notify = libc::SIGEV_THREAD_ID;
|
159
|
+
sigevent.sigev_notify_thread_id =
|
160
|
+
unsafe { libc::syscall(libc::SYS_gettid).try_into().unwrap() }; // The kernel thread ID
|
161
|
+
sigevent.sigev_signo = libc::SIGALRM;
|
162
|
+
// Pass required args to the signal handler
|
163
|
+
sigevent.sigev_value.sival_ptr = Box::into_raw(signal_handler_args) as *mut c_void;
|
164
|
+
|
165
|
+
// Create and configure timer to fire every 10 ms of CPU time
|
166
|
+
let mut timer: libc::timer_t = unsafe { mem::zeroed() };
|
167
|
+
match configuration.time_mode {
|
168
|
+
crate::signal_scheduler::TimeMode::CpuTime => {
|
169
|
+
let err = unsafe {
|
170
|
+
libc::timer_create(libc::CLOCK_THREAD_CPUTIME_ID, &mut sigevent, &mut timer)
|
171
|
+
};
|
172
|
+
if err != 0 {
|
173
|
+
panic!("timer_create failed: {}", err);
|
174
|
+
}
|
175
|
+
}
|
176
|
+
crate::signal_scheduler::TimeMode::WallTime => {
|
177
|
+
todo!("WallTime is not supported yet");
|
178
|
+
}
|
179
|
+
};
|
180
|
+
let mut its: libc::itimerspec = unsafe { mem::zeroed() };
|
181
|
+
its.it_interval.tv_sec = 0;
|
182
|
+
its.it_interval.tv_nsec = 10_000_000; // 10 ms
|
183
|
+
its.it_value.tv_sec = 0;
|
184
|
+
its.it_value.tv_nsec = 10_000_000;
|
185
|
+
let err = unsafe { libc::timer_settime(timer, 0, &its, null_mut()) };
|
186
|
+
if err != 0 {
|
187
|
+
panic!("timer_settime failed: {}", err);
|
188
|
+
}
|
189
|
+
|
190
|
+
log::debug!("timer registered for thread {}", current_pthread_id);
|
191
|
+
}
|
192
|
+
}
|