pf2 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/Cargo.lock +481 -0
- data/Cargo.toml +3 -0
- data/README.md +99 -13
- data/ext/pf2/Cargo.toml +24 -0
- data/ext/pf2/build.rs +3 -0
- data/ext/pf2/extconf.rb +6 -1
- data/ext/pf2/src/lib.rs +14 -0
- data/ext/pf2/src/profile.rs +50 -0
- data/ext/pf2/src/profile_serializer.rs +130 -0
- data/ext/pf2/src/ringbuffer.rs +145 -0
- data/ext/pf2/src/ruby_init.rs +62 -0
- data/ext/pf2/src/sample.rs +45 -0
- data/ext/pf2/src/siginfo_t.c +5 -0
- data/ext/pf2/src/signal_scheduler/configuration.rs +24 -0
- data/ext/pf2/src/signal_scheduler/timer_installer.rs +192 -0
- data/ext/pf2/src/signal_scheduler.rs +242 -0
- data/ext/pf2/src/timer_thread_scheduler.rs +243 -0
- data/ext/pf2/src/util.rs +30 -0
- data/lib/pf2/cli.rb +1 -1
- data/lib/pf2/reporter.rb +36 -11
- data/lib/pf2/version.rb +1 -1
- data/lib/pf2.rb +23 -5
- metadata +34 -5
- data/ext/pf2/pf2.c +0 -246
@@ -0,0 +1,50 @@
|
|
1
|
+
use std::collections::HashSet;
|
2
|
+
use std::time::Instant;
|
3
|
+
|
4
|
+
use rb_sys::*;
|
5
|
+
|
6
|
+
use super::ringbuffer::Ringbuffer;
|
7
|
+
use super::sample::Sample;
|
8
|
+
|
9
|
+
// Capacity large enough to hold 1 second worth of samples for 16 threads
|
10
|
+
// 16 threads * 20 samples per second * 1 second = 320
|
11
|
+
const DEFAULT_RINGBUFFER_CAPACITY: usize = 320;
|
12
|
+
|
13
|
+
#[derive(Debug)]
|
14
|
+
pub struct Profile {
|
15
|
+
pub start_timestamp: Instant,
|
16
|
+
pub samples: Vec<Sample>,
|
17
|
+
pub temporary_sample_buffer: Ringbuffer,
|
18
|
+
known_values: HashSet<VALUE>,
|
19
|
+
}
|
20
|
+
|
21
|
+
impl Profile {
|
22
|
+
pub fn new() -> Self {
|
23
|
+
Self {
|
24
|
+
start_timestamp: Instant::now(),
|
25
|
+
samples: vec![],
|
26
|
+
temporary_sample_buffer: Ringbuffer::new(DEFAULT_RINGBUFFER_CAPACITY),
|
27
|
+
known_values: HashSet::new(),
|
28
|
+
}
|
29
|
+
}
|
30
|
+
|
31
|
+
pub fn flush_temporary_sample_buffer(&mut self) {
|
32
|
+
while let Some(sample) = self.temporary_sample_buffer.pop() {
|
33
|
+
self.known_values.insert(sample.ruby_thread);
|
34
|
+
for frame in sample.frames.iter() {
|
35
|
+
if frame == &0 {
|
36
|
+
break;
|
37
|
+
}
|
38
|
+
self.known_values.insert(*frame);
|
39
|
+
}
|
40
|
+
self.samples.push(sample);
|
41
|
+
}
|
42
|
+
}
|
43
|
+
|
44
|
+
pub unsafe fn dmark(&self) {
|
45
|
+
for value in self.known_values.iter() {
|
46
|
+
rb_gc_mark(*value);
|
47
|
+
}
|
48
|
+
self.temporary_sample_buffer.dmark();
|
49
|
+
}
|
50
|
+
}
|
@@ -0,0 +1,130 @@
|
|
1
|
+
use std::{collections::HashMap, ffi::CStr};
|
2
|
+
|
3
|
+
use rb_sys::*;
|
4
|
+
|
5
|
+
use crate::profile::Profile;
|
6
|
+
|
7
|
+
#[derive(Debug, Deserialize, Serialize)]
|
8
|
+
pub struct ProfileSerializer {
|
9
|
+
threads: HashMap<ThreadId, ThreadProfile>,
|
10
|
+
}
|
11
|
+
|
12
|
+
type ThreadId = VALUE;
|
13
|
+
|
14
|
+
#[derive(Debug, Deserialize, Serialize)]
|
15
|
+
struct ThreadProfile {
|
16
|
+
thread_id: ThreadId,
|
17
|
+
stack_tree: StackTreeNode,
|
18
|
+
#[serde(rename = "frames")]
|
19
|
+
frame_table: HashMap<FrameTableId, FrameTableEntry>,
|
20
|
+
samples: Vec<ProfileSample>,
|
21
|
+
}
|
22
|
+
|
23
|
+
impl ThreadProfile {
|
24
|
+
fn new(thread_id: ThreadId) -> ThreadProfile {
|
25
|
+
ThreadProfile {
|
26
|
+
thread_id,
|
27
|
+
// The root node
|
28
|
+
stack_tree: StackTreeNode {
|
29
|
+
children: HashMap::new(),
|
30
|
+
node_id: 0,
|
31
|
+
frame_id: 0,
|
32
|
+
},
|
33
|
+
frame_table: HashMap::new(),
|
34
|
+
samples: vec![],
|
35
|
+
}
|
36
|
+
}
|
37
|
+
}
|
38
|
+
|
39
|
+
type StackTreeNodeId = i32;
|
40
|
+
|
41
|
+
// Arbitary value which is used inside StackTreeNode.
|
42
|
+
// This VALUE should not be dereferenced as a pointer; we're merely using its pointer as a unique value.
|
43
|
+
// (Probably should be reconsidered)
|
44
|
+
type FrameTableId = VALUE;
|
45
|
+
|
46
|
+
#[derive(Debug, Deserialize, Serialize)]
|
47
|
+
struct StackTreeNode {
|
48
|
+
// TODO: Maybe a Vec<StackTreeNode> is enough?
|
49
|
+
// There's no particular meaning in using FrameTableId as key
|
50
|
+
children: HashMap<FrameTableId, StackTreeNode>,
|
51
|
+
// An arbitary ID (no particular meaning)
|
52
|
+
node_id: StackTreeNodeId,
|
53
|
+
// ?
|
54
|
+
frame_id: FrameTableId,
|
55
|
+
}
|
56
|
+
|
57
|
+
#[derive(Debug, Deserialize, Serialize)]
|
58
|
+
struct FrameTableEntry {
|
59
|
+
full_label: String,
|
60
|
+
}
|
61
|
+
|
62
|
+
// Represents leaf (末端)
|
63
|
+
#[derive(Debug, Deserialize, Serialize)]
|
64
|
+
struct ProfileSample {
|
65
|
+
elapsed_ns: u128,
|
66
|
+
stack_tree_id: StackTreeNodeId,
|
67
|
+
}
|
68
|
+
|
69
|
+
impl ProfileSerializer {
|
70
|
+
pub fn serialize(profile: &Profile) -> String {
|
71
|
+
let mut sequence = 1;
|
72
|
+
|
73
|
+
let mut serializer = ProfileSerializer {
|
74
|
+
threads: HashMap::new(),
|
75
|
+
};
|
76
|
+
|
77
|
+
unsafe {
|
78
|
+
// Process each sample
|
79
|
+
for sample in profile.samples.iter() {
|
80
|
+
// Find the Thread profile for this sample
|
81
|
+
let thread_serializer = serializer
|
82
|
+
.threads
|
83
|
+
.entry(sample.ruby_thread)
|
84
|
+
.or_insert(ThreadProfile::new(sample.ruby_thread));
|
85
|
+
|
86
|
+
// Stack frames, shallow to deep
|
87
|
+
let mut stack_tree = &mut thread_serializer.stack_tree;
|
88
|
+
|
89
|
+
for i in (0..(sample.line_count - 1)).rev() {
|
90
|
+
let frame = sample.frames[i as usize];
|
91
|
+
|
92
|
+
// Register frame metadata to frame table, if not registered yet
|
93
|
+
let frame_table_id: FrameTableId = frame;
|
94
|
+
thread_serializer
|
95
|
+
.frame_table
|
96
|
+
.entry(frame_table_id)
|
97
|
+
.or_insert(FrameTableEntry {
|
98
|
+
full_label: CStr::from_ptr(rb_string_value_cstr(
|
99
|
+
&mut rb_profile_frame_full_label(frame),
|
100
|
+
))
|
101
|
+
.to_str()
|
102
|
+
.unwrap()
|
103
|
+
.to_string(),
|
104
|
+
});
|
105
|
+
|
106
|
+
stack_tree = stack_tree.children.entry(frame_table_id).or_insert({
|
107
|
+
let node = StackTreeNode {
|
108
|
+
children: HashMap::new(),
|
109
|
+
node_id: sequence,
|
110
|
+
frame_id: frame_table_id,
|
111
|
+
};
|
112
|
+
sequence += 1;
|
113
|
+
node
|
114
|
+
});
|
115
|
+
|
116
|
+
if i == 0 {
|
117
|
+
// This is the leaf node, record a Sample
|
118
|
+
let elapsed_ns = (sample.timestamp - profile.start_timestamp).as_nanos();
|
119
|
+
thread_serializer.samples.push(ProfileSample {
|
120
|
+
elapsed_ns,
|
121
|
+
stack_tree_id: stack_tree.node_id,
|
122
|
+
});
|
123
|
+
}
|
124
|
+
}
|
125
|
+
}
|
126
|
+
}
|
127
|
+
|
128
|
+
serde_json::to_string(&serializer).unwrap()
|
129
|
+
}
|
130
|
+
}
|
@@ -0,0 +1,145 @@
|
|
1
|
+
use crate::sample::Sample;
|
2
|
+
|
3
|
+
#[derive(Debug)]
|
4
|
+
pub struct Ringbuffer {
|
5
|
+
capacity: usize,
|
6
|
+
buffer: Vec<Option<Sample>>,
|
7
|
+
read_index: usize,
|
8
|
+
write_index: usize,
|
9
|
+
}
|
10
|
+
|
11
|
+
#[derive(Debug, PartialEq)]
|
12
|
+
pub enum RingbufferError {
|
13
|
+
Full,
|
14
|
+
}
|
15
|
+
|
16
|
+
impl Ringbuffer {
|
17
|
+
pub fn new(capacity: usize) -> Self {
|
18
|
+
Self {
|
19
|
+
capacity,
|
20
|
+
buffer: std::iter::repeat_with(|| None)
|
21
|
+
.take(capacity + 1)
|
22
|
+
.collect::<Vec<_>>(),
|
23
|
+
read_index: 0,
|
24
|
+
write_index: 0,
|
25
|
+
}
|
26
|
+
}
|
27
|
+
|
28
|
+
// async-signal-safe
|
29
|
+
pub fn push(&mut self, sample: Sample) -> Result<(), RingbufferError> {
|
30
|
+
let next = (self.write_index + 1) % (self.capacity + 1);
|
31
|
+
if next == self.read_index {
|
32
|
+
return Err(RingbufferError::Full);
|
33
|
+
}
|
34
|
+
self.buffer[self.write_index] = Some(sample);
|
35
|
+
self.write_index = next;
|
36
|
+
Ok(())
|
37
|
+
}
|
38
|
+
|
39
|
+
pub fn pop(&mut self) -> Option<Sample> {
|
40
|
+
if self.read_index == self.write_index {
|
41
|
+
return None;
|
42
|
+
}
|
43
|
+
let sample = self.buffer[self.read_index].take();
|
44
|
+
self.read_index = (self.read_index + 1) % (self.capacity + 1);
|
45
|
+
sample
|
46
|
+
}
|
47
|
+
|
48
|
+
// This will call rb_gc_mark() for capacity * Sample::MAX_STACK_DEPTH * 2 times, which is a lot!
|
49
|
+
pub fn dmark(&self) {
|
50
|
+
for sample in self.buffer.iter().flatten() {
|
51
|
+
unsafe {
|
52
|
+
sample.dmark();
|
53
|
+
}
|
54
|
+
}
|
55
|
+
}
|
56
|
+
}
|
57
|
+
|
58
|
+
#[cfg(test)]
|
59
|
+
mod tests {
|
60
|
+
use super::*;
|
61
|
+
use std::time::Instant;
|
62
|
+
|
63
|
+
#[test]
|
64
|
+
fn test_ringbuffer() {
|
65
|
+
let mut ringbuffer = Ringbuffer::new(2);
|
66
|
+
assert_eq!(ringbuffer.pop(), None);
|
67
|
+
|
68
|
+
let sample1 = Sample {
|
69
|
+
ruby_thread: 1,
|
70
|
+
timestamp: Instant::now(),
|
71
|
+
line_count: 0,
|
72
|
+
frames: [0; 500],
|
73
|
+
linenos: [0; 500],
|
74
|
+
};
|
75
|
+
let sample2 = Sample {
|
76
|
+
ruby_thread: 2,
|
77
|
+
timestamp: Instant::now(),
|
78
|
+
line_count: 0,
|
79
|
+
frames: [0; 500],
|
80
|
+
linenos: [0; 500],
|
81
|
+
};
|
82
|
+
|
83
|
+
ringbuffer.push(sample1).unwrap();
|
84
|
+
ringbuffer.push(sample2).unwrap();
|
85
|
+
|
86
|
+
assert_eq!(ringbuffer.pop().unwrap().ruby_thread, 1);
|
87
|
+
assert_eq!(ringbuffer.pop().unwrap().ruby_thread, 2);
|
88
|
+
assert_eq!(ringbuffer.pop(), None);
|
89
|
+
}
|
90
|
+
|
91
|
+
#[test]
|
92
|
+
fn test_ringbuffer_full() {
|
93
|
+
let mut ringbuffer = Ringbuffer::new(1);
|
94
|
+
let sample1 = Sample {
|
95
|
+
ruby_thread: 1,
|
96
|
+
timestamp: Instant::now(),
|
97
|
+
line_count: 0,
|
98
|
+
frames: [0; 500],
|
99
|
+
linenos: [0; 500],
|
100
|
+
};
|
101
|
+
let sample2 = Sample {
|
102
|
+
ruby_thread: 2,
|
103
|
+
timestamp: Instant::now(),
|
104
|
+
line_count: 0,
|
105
|
+
frames: [0; 500],
|
106
|
+
linenos: [0; 500],
|
107
|
+
};
|
108
|
+
|
109
|
+
ringbuffer.push(sample1).unwrap();
|
110
|
+
assert_eq!(ringbuffer.push(sample2), Err(RingbufferError::Full));
|
111
|
+
}
|
112
|
+
|
113
|
+
#[test]
|
114
|
+
fn test_ringbuffer_write_a_lot() {
|
115
|
+
let mut ringbuffer = Ringbuffer::new(2);
|
116
|
+
let sample1 = Sample {
|
117
|
+
ruby_thread: 1,
|
118
|
+
timestamp: Instant::now(),
|
119
|
+
line_count: 0,
|
120
|
+
frames: [0; 500],
|
121
|
+
linenos: [0; 500],
|
122
|
+
};
|
123
|
+
let sample2 = Sample {
|
124
|
+
ruby_thread: 2,
|
125
|
+
timestamp: Instant::now(),
|
126
|
+
line_count: 0,
|
127
|
+
frames: [0; 500],
|
128
|
+
linenos: [0; 500],
|
129
|
+
};
|
130
|
+
let sample3 = Sample {
|
131
|
+
ruby_thread: 3,
|
132
|
+
timestamp: Instant::now(),
|
133
|
+
line_count: 0,
|
134
|
+
frames: [0; 500],
|
135
|
+
linenos: [0; 500],
|
136
|
+
};
|
137
|
+
|
138
|
+
ringbuffer.push(sample1).unwrap();
|
139
|
+
ringbuffer.pop().unwrap();
|
140
|
+
ringbuffer.push(sample2).unwrap();
|
141
|
+
ringbuffer.pop().unwrap();
|
142
|
+
ringbuffer.push(sample3).unwrap();
|
143
|
+
assert_eq!(ringbuffer.pop().unwrap().ruby_thread, 3);
|
144
|
+
}
|
145
|
+
}
|
@@ -0,0 +1,62 @@
|
|
1
|
+
#![deny(unsafe_op_in_unsafe_fn)]
|
2
|
+
|
3
|
+
use rb_sys::*;
|
4
|
+
|
5
|
+
#[cfg(target_os = "linux")]
|
6
|
+
use crate::signal_scheduler::SignalScheduler;
|
7
|
+
use crate::timer_thread_scheduler::TimerThreadScheduler;
|
8
|
+
use crate::util::*;
|
9
|
+
|
10
|
+
#[allow(non_snake_case)]
|
11
|
+
#[no_mangle]
|
12
|
+
extern "C" fn Init_pf2() {
|
13
|
+
#[cfg(feature = "debug")]
|
14
|
+
{
|
15
|
+
env_logger::builder()
|
16
|
+
.format_timestamp(None)
|
17
|
+
.format_module_path(false)
|
18
|
+
.init();
|
19
|
+
}
|
20
|
+
|
21
|
+
unsafe {
|
22
|
+
let rb_mPf2: VALUE = rb_define_module(cstr!("Pf2"));
|
23
|
+
|
24
|
+
#[cfg(target_os = "linux")]
|
25
|
+
{
|
26
|
+
let rb_mPf2_SignalScheduler =
|
27
|
+
rb_define_class_under(rb_mPf2, cstr!("SignalScheduler"), rb_cObject);
|
28
|
+
rb_define_alloc_func(rb_mPf2_SignalScheduler, Some(SignalScheduler::rb_alloc));
|
29
|
+
rb_define_method(
|
30
|
+
rb_mPf2_SignalScheduler,
|
31
|
+
cstr!("start"),
|
32
|
+
Some(to_ruby_cfunc3(SignalScheduler::rb_start)),
|
33
|
+
2,
|
34
|
+
);
|
35
|
+
rb_define_method(
|
36
|
+
rb_mPf2_SignalScheduler,
|
37
|
+
cstr!("stop"),
|
38
|
+
Some(to_ruby_cfunc1(SignalScheduler::rb_stop)),
|
39
|
+
0,
|
40
|
+
);
|
41
|
+
}
|
42
|
+
|
43
|
+
let rb_mPf2_TimerThreadScheduler =
|
44
|
+
rb_define_class_under(rb_mPf2, cstr!("TimerThreadScheduler"), rb_cObject);
|
45
|
+
rb_define_alloc_func(
|
46
|
+
rb_mPf2_TimerThreadScheduler,
|
47
|
+
Some(TimerThreadScheduler::rb_alloc),
|
48
|
+
);
|
49
|
+
rb_define_method(
|
50
|
+
rb_mPf2_TimerThreadScheduler,
|
51
|
+
cstr!("start"),
|
52
|
+
Some(to_ruby_cfunc3(TimerThreadScheduler::rb_start)),
|
53
|
+
2,
|
54
|
+
);
|
55
|
+
rb_define_method(
|
56
|
+
rb_mPf2_TimerThreadScheduler,
|
57
|
+
cstr!("stop"),
|
58
|
+
Some(to_ruby_cfunc1(TimerThreadScheduler::rb_stop)),
|
59
|
+
0,
|
60
|
+
);
|
61
|
+
}
|
62
|
+
}
|
@@ -0,0 +1,45 @@
|
|
1
|
+
use std::time::Instant;
|
2
|
+
|
3
|
+
use rb_sys::*;
|
4
|
+
|
5
|
+
const MAX_STACK_DEPTH: usize = 500;
|
6
|
+
|
7
|
+
#[derive(Debug, PartialEq)]
|
8
|
+
pub struct Sample {
|
9
|
+
pub ruby_thread: VALUE,
|
10
|
+
pub timestamp: Instant,
|
11
|
+
pub line_count: i32,
|
12
|
+
pub frames: [VALUE; MAX_STACK_DEPTH],
|
13
|
+
pub linenos: [i32; MAX_STACK_DEPTH],
|
14
|
+
}
|
15
|
+
|
16
|
+
impl Sample {
|
17
|
+
// Nearly async-signal-safe
|
18
|
+
// (rb_profile_thread_frames isn't defined as a-s-s)
|
19
|
+
pub fn capture(ruby_thread: VALUE) -> Self {
|
20
|
+
let mut sample = Sample {
|
21
|
+
ruby_thread,
|
22
|
+
timestamp: Instant::now(),
|
23
|
+
line_count: 0,
|
24
|
+
frames: [0; MAX_STACK_DEPTH],
|
25
|
+
linenos: [0; MAX_STACK_DEPTH],
|
26
|
+
};
|
27
|
+
unsafe {
|
28
|
+
sample.line_count = rb_profile_thread_frames(
|
29
|
+
ruby_thread,
|
30
|
+
0,
|
31
|
+
2000,
|
32
|
+
sample.frames.as_mut_ptr(),
|
33
|
+
sample.linenos.as_mut_ptr(),
|
34
|
+
);
|
35
|
+
};
|
36
|
+
sample
|
37
|
+
}
|
38
|
+
|
39
|
+
pub unsafe fn dmark(&self) {
|
40
|
+
rb_gc_mark(self.ruby_thread);
|
41
|
+
for frame in self.frames.iter() {
|
42
|
+
rb_gc_mark(*frame);
|
43
|
+
}
|
44
|
+
}
|
45
|
+
}
|
@@ -0,0 +1,24 @@
|
|
1
|
+
use std::str::FromStr;
|
2
|
+
|
3
|
+
#[derive(Clone, Debug)]
|
4
|
+
pub struct Configuration {
|
5
|
+
pub time_mode: TimeMode,
|
6
|
+
}
|
7
|
+
|
8
|
+
#[derive(Clone, Debug)]
|
9
|
+
pub enum TimeMode {
|
10
|
+
CpuTime,
|
11
|
+
WallTime,
|
12
|
+
}
|
13
|
+
|
14
|
+
impl FromStr for TimeMode {
|
15
|
+
type Err = ();
|
16
|
+
|
17
|
+
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
18
|
+
match s {
|
19
|
+
"cpu" => Ok(Self::CpuTime),
|
20
|
+
"wall" => Ok(Self::WallTime),
|
21
|
+
_ => Err(()),
|
22
|
+
}
|
23
|
+
}
|
24
|
+
}
|
@@ -0,0 +1,192 @@
|
|
1
|
+
use std::collections::HashMap;
|
2
|
+
use std::ffi::c_void;
|
3
|
+
use std::mem;
|
4
|
+
use std::mem::ManuallyDrop;
|
5
|
+
use std::ptr::null_mut;
|
6
|
+
use std::sync::{Mutex, RwLock};
|
7
|
+
use std::{collections::HashSet, sync::Arc};
|
8
|
+
|
9
|
+
use rb_sys::*;
|
10
|
+
|
11
|
+
use crate::signal_scheduler::SignalHandlerArgs;
|
12
|
+
|
13
|
+
use super::configuration::Configuration;
|
14
|
+
use crate::profile::Profile;
|
15
|
+
|
16
|
+
// We could avoid deferring the timer creation by combining pthread_getcpuclockid(3) and timer_create(2) here,
|
17
|
+
// but we're not doing so since (1) Ruby does not expose the pthread_self() of a Ruby Thread
|
18
|
+
// (which is actually stored in th->nt->thread_id), and (2) pthread_getcpuclockid(3) is not portable
|
19
|
+
// in the first place (e.g. not available on macOS).
|
20
|
+
pub struct TimerInstaller {
|
21
|
+
internal: Box<Mutex<Internal>>,
|
22
|
+
}
|
23
|
+
|
24
|
+
struct Internal {
|
25
|
+
configuration: Configuration,
|
26
|
+
target_ruby_threads: HashSet<VALUE>,
|
27
|
+
registered_pthread_ids: HashSet<libc::pthread_t>,
|
28
|
+
kernel_thread_id_to_ruby_thread_map: HashMap<libc::pid_t, VALUE>,
|
29
|
+
profile: Arc<RwLock<Profile>>,
|
30
|
+
}
|
31
|
+
|
32
|
+
impl TimerInstaller {
|
33
|
+
// Register a callback that gets called when a Ruby Thread is resumed.
|
34
|
+
// The callback should create a timer for the thread.
|
35
|
+
pub fn install_timer_to_ruby_threads(
|
36
|
+
configuration: Configuration,
|
37
|
+
ruby_threads: &HashSet<VALUE>,
|
38
|
+
profile: Arc<RwLock<Profile>>,
|
39
|
+
track_new_threads: bool,
|
40
|
+
) {
|
41
|
+
let registrar = Self {
|
42
|
+
internal: Box::new(Mutex::new(Internal {
|
43
|
+
configuration,
|
44
|
+
target_ruby_threads: ruby_threads.clone(),
|
45
|
+
registered_pthread_ids: HashSet::new(),
|
46
|
+
kernel_thread_id_to_ruby_thread_map: HashMap::new(),
|
47
|
+
profile,
|
48
|
+
})),
|
49
|
+
};
|
50
|
+
|
51
|
+
let ptr = Box::into_raw(registrar.internal);
|
52
|
+
unsafe {
|
53
|
+
rb_internal_thread_add_event_hook(
|
54
|
+
Some(Self::on_thread_resume),
|
55
|
+
RUBY_INTERNAL_THREAD_EVENT_RESUMED,
|
56
|
+
ptr as *mut c_void,
|
57
|
+
);
|
58
|
+
// Spawn a no-op Thread to fire the event hook
|
59
|
+
// (at least 2 Ruby Threads must be active for the RESUMED hook to be fired)
|
60
|
+
rb_thread_create(Some(Self::do_nothing), null_mut());
|
61
|
+
};
|
62
|
+
|
63
|
+
if track_new_threads {
|
64
|
+
unsafe {
|
65
|
+
rb_internal_thread_add_event_hook(
|
66
|
+
Some(Self::on_thread_start),
|
67
|
+
RUBY_INTERNAL_THREAD_EVENT_STARTED,
|
68
|
+
ptr as *mut c_void,
|
69
|
+
);
|
70
|
+
};
|
71
|
+
}
|
72
|
+
}
|
73
|
+
|
74
|
+
unsafe extern "C" fn do_nothing(_: *mut c_void) -> VALUE {
|
75
|
+
Qnil.into()
|
76
|
+
}
|
77
|
+
|
78
|
+
// Thread resume callback
|
79
|
+
unsafe extern "C" fn on_thread_resume(
|
80
|
+
_flag: rb_event_flag_t,
|
81
|
+
data: *const rb_internal_thread_event_data,
|
82
|
+
custom_data: *mut c_void,
|
83
|
+
) {
|
84
|
+
// The SignalScheduler (as a Ruby obj) should be passed as custom_data
|
85
|
+
let internal =
|
86
|
+
unsafe { ManuallyDrop::new(Box::from_raw(custom_data as *mut Mutex<Internal>)) };
|
87
|
+
let mut internal = internal.lock().unwrap();
|
88
|
+
|
89
|
+
// Check if the current thread is a target Ruby Thread
|
90
|
+
let current_ruby_thread: VALUE = unsafe { (*data).thread };
|
91
|
+
if !internal.target_ruby_threads.contains(¤t_ruby_thread) {
|
92
|
+
return;
|
93
|
+
}
|
94
|
+
|
95
|
+
// Check if the current thread is already registered
|
96
|
+
let current_pthread_id = unsafe { libc::pthread_self() };
|
97
|
+
if internal
|
98
|
+
.registered_pthread_ids
|
99
|
+
.contains(¤t_pthread_id)
|
100
|
+
{
|
101
|
+
return;
|
102
|
+
}
|
103
|
+
|
104
|
+
// Record the pthread ID of the current thread
|
105
|
+
internal.registered_pthread_ids.insert(current_pthread_id);
|
106
|
+
// Keep a mapping from kernel thread ID to Ruby Thread
|
107
|
+
internal
|
108
|
+
.kernel_thread_id_to_ruby_thread_map
|
109
|
+
.insert(unsafe { libc::gettid() }, current_ruby_thread);
|
110
|
+
|
111
|
+
Self::register_timer_to_current_thread(
|
112
|
+
&internal.configuration,
|
113
|
+
&internal.profile,
|
114
|
+
&internal.kernel_thread_id_to_ruby_thread_map,
|
115
|
+
);
|
116
|
+
|
117
|
+
// TODO: Remove the hook when all threads have been registered
|
118
|
+
}
|
119
|
+
|
120
|
+
// Thread resume callback
|
121
|
+
unsafe extern "C" fn on_thread_start(
|
122
|
+
_flag: rb_event_flag_t,
|
123
|
+
data: *const rb_internal_thread_event_data,
|
124
|
+
custom_data: *mut c_void,
|
125
|
+
) {
|
126
|
+
// The SignalScheduler (as a Ruby obj) should be passed as custom_data
|
127
|
+
let internal =
|
128
|
+
unsafe { ManuallyDrop::new(Box::from_raw(custom_data as *mut Mutex<Internal>)) };
|
129
|
+
let mut internal = internal.lock().unwrap();
|
130
|
+
|
131
|
+
let current_ruby_thread: VALUE = unsafe { (*data).thread };
|
132
|
+
internal.target_ruby_threads.insert(current_ruby_thread);
|
133
|
+
}
|
134
|
+
|
135
|
+
// Creates a new POSIX timer which invocates sampling for the thread that called this function.
|
136
|
+
fn register_timer_to_current_thread(
|
137
|
+
configuration: &Configuration,
|
138
|
+
profile: &Arc<RwLock<Profile>>,
|
139
|
+
kernel_thread_id_to_ruby_thread_map: &HashMap<libc::pid_t, VALUE>,
|
140
|
+
) {
|
141
|
+
let current_pthread_id = unsafe { libc::pthread_self() };
|
142
|
+
let context_ruby_thread: VALUE = unsafe {
|
143
|
+
*(kernel_thread_id_to_ruby_thread_map
|
144
|
+
.get(&(libc::gettid()))
|
145
|
+
.unwrap())
|
146
|
+
};
|
147
|
+
|
148
|
+
// NOTE: This Box is never dropped
|
149
|
+
let signal_handler_args = Box::new(SignalHandlerArgs {
|
150
|
+
profile: Arc::clone(profile),
|
151
|
+
context_ruby_thread,
|
152
|
+
});
|
153
|
+
|
154
|
+
// Create a signal event
|
155
|
+
let mut sigevent: libc::sigevent = unsafe { mem::zeroed() };
|
156
|
+
// Note: SIGEV_THREAD_ID is Linux-specific. In other platforms, we would need to
|
157
|
+
// "tranpoline" the signal as any pthread can receive the signal.
|
158
|
+
sigevent.sigev_notify = libc::SIGEV_THREAD_ID;
|
159
|
+
sigevent.sigev_notify_thread_id =
|
160
|
+
unsafe { libc::syscall(libc::SYS_gettid).try_into().unwrap() }; // The kernel thread ID
|
161
|
+
sigevent.sigev_signo = libc::SIGALRM;
|
162
|
+
// Pass required args to the signal handler
|
163
|
+
sigevent.sigev_value.sival_ptr = Box::into_raw(signal_handler_args) as *mut c_void;
|
164
|
+
|
165
|
+
// Create and configure timer to fire every 10 ms of CPU time
|
166
|
+
let mut timer: libc::timer_t = unsafe { mem::zeroed() };
|
167
|
+
match configuration.time_mode {
|
168
|
+
crate::signal_scheduler::TimeMode::CpuTime => {
|
169
|
+
let err = unsafe {
|
170
|
+
libc::timer_create(libc::CLOCK_THREAD_CPUTIME_ID, &mut sigevent, &mut timer)
|
171
|
+
};
|
172
|
+
if err != 0 {
|
173
|
+
panic!("timer_create failed: {}", err);
|
174
|
+
}
|
175
|
+
}
|
176
|
+
crate::signal_scheduler::TimeMode::WallTime => {
|
177
|
+
todo!("WallTime is not supported yet");
|
178
|
+
}
|
179
|
+
};
|
180
|
+
let mut its: libc::itimerspec = unsafe { mem::zeroed() };
|
181
|
+
its.it_interval.tv_sec = 0;
|
182
|
+
its.it_interval.tv_nsec = 10_000_000; // 10 ms
|
183
|
+
its.it_value.tv_sec = 0;
|
184
|
+
its.it_value.tv_nsec = 10_000_000;
|
185
|
+
let err = unsafe { libc::timer_settime(timer, 0, &its, null_mut()) };
|
186
|
+
if err != 0 {
|
187
|
+
panic!("timer_settime failed: {}", err);
|
188
|
+
}
|
189
|
+
|
190
|
+
log::debug!("timer registered for thread {}", current_pthread_id);
|
191
|
+
}
|
192
|
+
}
|