rrtrace 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/src/main.rs CHANGED
@@ -1,21 +1,29 @@
1
+ use crate::object_scatter::ObjectScatter;
2
+ use crate::oneshot_channel::{OneshotReceiver, OneshotSender};
1
3
  use crate::renderer::Renderer;
2
4
  use crate::ringbuffer::{EventRingBuffer, RRTraceEvent};
3
- use crate::trace_state::{FastTrace, SlowTrace, VISIBLE_DURATION};
5
+ use crate::trace_state::{FastTrace, SlowTrace};
6
+ use crate::universal_notifier::UniversalNotifier;
7
+ use std::collections::VecDeque;
4
8
  use std::ffi::CString;
5
- use std::sync::atomic::AtomicU64;
6
- use std::sync::{Arc, atomic};
9
+ use std::num::NonZeroUsize;
10
+ use std::sync::{Arc, OnceLock};
11
+ use std::time::{Duration, Instant};
7
12
  use std::{env, mem, thread};
8
13
  use winit::application::ApplicationHandler;
9
14
  use winit::event::*;
10
15
  use winit::event_loop::{ControlFlow, EventLoop};
11
16
  use winit::window::Window;
12
17
 
18
+ mod object_scatter;
19
+ mod oneshot_channel;
13
20
  mod renderer;
14
21
  mod ringbuffer;
15
22
  #[cfg_attr(unix, path = "shm_unix.rs")]
16
23
  #[cfg_attr(windows, path = "shm_windows.rs")]
17
24
  mod shm;
18
25
  mod trace_state;
26
+ mod universal_notifier;
19
27
 
20
28
  struct App {
21
29
  window: Option<Arc<Window>>,
@@ -77,14 +85,17 @@ impl ApplicationHandler for App {
77
85
  }
78
86
 
79
87
  fn about_to_wait(&mut self, _event_loop: &winit::event_loop::ActiveEventLoop) {
80
- let updated = self.renderer.sync();
81
- if updated && let Some(window) = self.window.as_ref() {
88
+ self.renderer.sync();
89
+ if let Some(window) = self.window.as_ref() {
82
90
  window.request_redraw();
83
91
  }
84
92
  }
85
93
  }
86
94
 
95
+ static BASE_TIME: OnceLock<Instant> = OnceLock::new();
96
+
87
97
  fn main() {
98
+ BASE_TIME.set(Instant::now()).unwrap();
88
99
  assert_eq!(env::args().len(), 2, "Usage: rrtrace <shm_name>");
89
100
  let shm_name = env::args().nth(1).unwrap();
90
101
 
@@ -141,7 +152,7 @@ async fn init_gpu() -> (wgpu::Instance, wgpu::Adapter, wgpu::Device, wgpu::Queue
141
152
 
142
153
  fn queue_pipe_thread(
143
154
  shm_name: String,
144
- event_queue: Arc<crossbeam_queue::SegQueue<Vec<RRTraceEvent>>>,
155
+ event_queue: Arc<crossbeam_queue::SegQueue<Arc<[RRTraceEvent]>>>,
145
156
  ) -> impl FnOnce() + Send + 'static {
146
157
  move || {
147
158
  let shm = unsafe {
@@ -152,46 +163,117 @@ fn queue_pipe_thread(
152
163
  };
153
164
  let mut ringbuffer = unsafe { EventRingBuffer::new(shm.as_ptr(), move || drop(shm)) };
154
165
  let mut buffer = vec![Default::default(); 65536];
166
+ let mut offset = 0;
167
+ let mut before_send_time = 0;
155
168
  loop {
156
- let count = ringbuffer.read(&mut buffer);
169
+ let count = ringbuffer.read(&mut buffer[offset..]);
157
170
  if count > 0 {
158
- buffer.truncate(count);
159
- event_queue.push(buffer.clone());
160
- buffer.resize_with(65536, Default::default);
171
+ offset += count;
172
+ let chunk = &mut buffer[..offset];
173
+ chunk.sort_by_key(RRTraceEvent::timestamp);
174
+ let last_time = chunk.last().unwrap().timestamp();
175
+ if offset >= 1024 || last_time.saturating_sub(before_send_time) > 1_000_000 {
176
+ event_queue.push(Arc::from(&chunk[..]));
177
+ offset = 0;
178
+ before_send_time = last_time;
179
+ }
180
+ } else {
181
+ thread::sleep(Duration::from_millis(1));
161
182
  }
162
183
  }
163
184
  }
164
185
  }
165
186
  fn trace_thread(
166
- event_queue: Arc<crossbeam_queue::SegQueue<Vec<RRTraceEvent>>>,
187
+ event_queue: Arc<crossbeam_queue::SegQueue<Arc<[RRTraceEvent]>>>,
167
188
  result_queue: Arc<crossbeam_queue::SegQueue<SlowTrace>>,
168
189
  ) -> impl FnOnce() + Send + 'static {
169
190
  move || {
170
- static LATEST_END_TIME: AtomicU64 = AtomicU64::new(0);
191
+ let parallel_trace_threads = thread::available_parallelism()
192
+ .map_or(0, NonZeroUsize::get)
193
+ .saturating_sub(2)
194
+ .max(1);
195
+
196
+ let universal_notifier = UniversalNotifier::new();
197
+ let (mut second_stage_sender, second_stage_receivers) =
198
+ ObjectScatter::<(u64, Arc<FastTrace>, Arc<[RRTraceEvent]>)>::new(
199
+ parallel_trace_threads,
200
+ );
201
+ let first_stage_event_queue = Arc::new(crossbeam_queue::SegQueue::<(
202
+ Arc<[RRTraceEvent]>,
203
+ OneshotSender<FastTrace>,
204
+ )>::new());
205
+ second_stage_receivers
206
+ .enumerate()
207
+ .for_each(|(i, mut second_stage_receiver)| {
208
+ let universal_notifier = universal_notifier.clone();
209
+ let first_stage_event_queue = Arc::clone(&first_stage_event_queue);
210
+ let result_queue = Arc::clone(&result_queue);
211
+ thread::Builder::new()
212
+ .name(format!("slow_trace_thread_{}", i))
213
+ .spawn(move || {
214
+ loop {
215
+ let v = universal_notifier.value();
216
+ if let Some((events, result_slot)) = first_stage_event_queue.pop() {
217
+ let trace = FastTrace::from_events(&events);
218
+ result_slot.send(trace);
219
+ continue;
220
+ }
221
+ if let Some(data) = second_stage_receiver.try_receive() {
222
+ let (start_time, fast_trace, events) = *data;
223
+ let trace = SlowTrace::trace(start_time, &fast_trace, &events);
224
+ result_queue.push(trace);
225
+ continue;
226
+ }
227
+ universal_notifier.wait(v);
228
+ }
229
+ })
230
+ .unwrap();
231
+ });
232
+ let mut first = true;
171
233
  let mut start_time = 0u64;
172
- let mut fast_trace = FastTrace::new();
234
+ let mut local_event_queue = VecDeque::new();
235
+ let mut first_stage_result_queue = VecDeque::new();
236
+ let mut first_stage_results = VecDeque::new();
237
+ let mut first_stage_result = None::<OneshotReceiver<FastTrace>>;
238
+ let mut trace_accumulate = None::<Arc<FastTrace>>;
173
239
  loop {
174
- let Some(events) = event_queue.pop() else {
175
- continue;
176
- };
177
- rayon_core::spawn({
178
- let fast_trace = fast_trace.clone();
179
- let events = events.clone();
180
- let result_queue = result_queue.clone();
181
- move || {
182
- if start_time + VISIBLE_DURATION
183
- < LATEST_END_TIME.load(atomic::Ordering::Relaxed)
184
- {
185
- return;
240
+ if first_stage_result.is_none()
241
+ && let Some(receiver) = first_stage_result_queue.pop_front()
242
+ {
243
+ first_stage_result = Some(receiver);
244
+ }
245
+ if let Some(receiver) = first_stage_result.take() {
246
+ match receiver.try_receive() {
247
+ Ok(mut trace) => {
248
+ match trace_accumulate.take() {
249
+ None => trace.mark_as_first(),
250
+ Some(acc) => acc.merge_into(&mut trace),
251
+ }
252
+ let trace = Arc::new(trace);
253
+ trace_accumulate = Some(Arc::clone(&trace));
254
+ first_stage_results.push_back(trace);
255
+ first_stage_result = first_stage_result_queue.pop_front();
186
256
  }
187
- let slow_trace = SlowTrace::trace(start_time, fast_trace, &events);
188
- result_queue.push(slow_trace);
257
+ Err(receiver) => first_stage_result = Some(receiver),
258
+ };
259
+ }
260
+ if !first_stage_results.is_empty() && !local_event_queue.is_empty() {
261
+ let trace = first_stage_results.pop_front().unwrap();
262
+ let (start_time, events) = local_event_queue.pop_front().unwrap();
263
+ second_stage_sender.send((start_time, trace, events));
264
+ universal_notifier.notify();
265
+ }
266
+ if let Some(events) = event_queue.pop() {
267
+ let end_time = events.last().unwrap().timestamp();
268
+ let (sender, receiver) = oneshot_channel::channel();
269
+ first_stage_event_queue.push((Arc::clone(&events), sender));
270
+ universal_notifier.notify();
271
+ first_stage_result_queue.push_back(receiver);
272
+ if !mem::replace(&mut first, false) {
273
+ local_event_queue.push_back((start_time, events));
189
274
  }
190
- });
191
- fast_trace.process_events(&events);
192
- let end_time = events.last().unwrap().timestamp();
193
- LATEST_END_TIME.store(end_time, atomic::Ordering::Relaxed);
194
- start_time = end_time;
275
+ start_time = end_time;
276
+ }
195
277
  }
196
278
  }
197
279
  }
@@ -0,0 +1,110 @@
1
+ use std::sync::atomic::AtomicPtr;
2
+ use std::sync::{Arc, atomic};
3
+ use std::{iter, marker::PhantomData, ptr, rc::Rc};
4
+
5
+ #[repr(align(128))]
6
+ struct Line<T> {
7
+ array: [AtomicPtr<T>; 16],
8
+ }
9
+
10
+ impl<T> Line<T> {
11
+ fn new() -> Line<T> {
12
+ Line {
13
+ array: [const { AtomicPtr::new(ptr::null_mut()) }; 16],
14
+ }
15
+ }
16
+
17
+ #[inline(always)]
18
+ fn get(&self, index: usize) -> &AtomicPtr<T> {
19
+ &self.array[index]
20
+ }
21
+ }
22
+
23
+ impl<T> Drop for Line<T> {
24
+ fn drop(&mut self) {
25
+ self.array.iter_mut().for_each(|ptr| {
26
+ let ptr = ptr.get_mut();
27
+ if !ptr.is_null() {
28
+ unsafe {
29
+ drop(Box::from_raw(ptr));
30
+ }
31
+ }
32
+ })
33
+ }
34
+ }
35
+
36
+ pub struct ObjectScatter<T> {
37
+ array: Arc<[Line<T>]>,
38
+ index: usize,
39
+ _not_send: PhantomData<Rc<()>>,
40
+ }
41
+
42
+ pub struct ObjectScatterReceiver<T> {
43
+ array: Arc<[Line<T>]>,
44
+ slot: usize,
45
+ index: usize,
46
+ }
47
+
48
+ impl<T: Send> ObjectScatter<T> {
49
+ pub fn new(
50
+ count: usize,
51
+ ) -> (
52
+ ObjectScatter<T>,
53
+ impl ExactSizeIterator<Item = ObjectScatterReceiver<T>>,
54
+ ) {
55
+ assert!(count > 0);
56
+ let array = Arc::from(Vec::from_iter(
57
+ iter::from_fn(|| Some(Line::new())).take(count),
58
+ ));
59
+ let scatter = ObjectScatter {
60
+ array: Arc::clone(&array),
61
+ index: 0,
62
+ _not_send: PhantomData,
63
+ };
64
+ let receiver_iter = (0..count).map(move |slot| ObjectScatterReceiver {
65
+ array: Arc::clone(&array),
66
+ slot,
67
+ index: 0,
68
+ });
69
+ (scatter, receiver_iter)
70
+ }
71
+
72
+ pub fn send(&mut self, value: T) {
73
+ let ptr = Box::new(value);
74
+ let ptr = Box::into_raw(ptr);
75
+ let i = self.index;
76
+ let slot = i % self.array.len();
77
+ let index = i / self.array.len();
78
+ let line = &self.array[slot];
79
+ let old_ptr = line.get(index).swap(ptr, atomic::Ordering::Release);
80
+ if !old_ptr.is_null() {
81
+ unsafe {
82
+ drop(Box::from_raw(old_ptr));
83
+ }
84
+ }
85
+ self.index += 1;
86
+ if self.index >= self.array.len() * 16 {
87
+ self.index = 0;
88
+ }
89
+ }
90
+ }
91
+
92
+ impl<T: Send> ObjectScatterReceiver<T> {
93
+ pub fn try_receive(&mut self) -> Option<Box<T>> {
94
+ let line = &self.array[self.slot];
95
+ for _ in 0..16 {
96
+ let index = self.index;
97
+ let ptr = line
98
+ .get(index)
99
+ .swap(ptr::null_mut(), atomic::Ordering::Acquire);
100
+ self.index += 1;
101
+ if self.index >= 16 {
102
+ self.index = 0;
103
+ }
104
+ if !ptr.is_null() {
105
+ return unsafe { Some(Box::from_raw(ptr)) };
106
+ }
107
+ }
108
+ None
109
+ }
110
+ }
@@ -0,0 +1,74 @@
1
+ use std::cell::UnsafeCell;
2
+ use std::mem::MaybeUninit;
3
+ use std::sync::atomic::AtomicBool;
4
+ use std::sync::{Arc, atomic};
5
+
6
+ struct OneshotSlot<T> {
7
+ value: UnsafeCell<MaybeUninit<T>>,
8
+ initialized: AtomicBool,
9
+ }
10
+
11
+ unsafe impl<T: Send> Send for OneshotSlot<T> {}
12
+ unsafe impl<T: Sync> Sync for OneshotSlot<T> {}
13
+
14
+ impl<T> Drop for OneshotSlot<T> {
15
+ fn drop(&mut self) {
16
+ if *self.initialized.get_mut() {
17
+ unsafe {
18
+ self.value.get_mut().assume_init_drop();
19
+ }
20
+ }
21
+ }
22
+ }
23
+
24
+ pub struct OneshotSender<T> {
25
+ slot: Arc<OneshotSlot<T>>,
26
+ }
27
+
28
+ impl<T> OneshotSender<T> {
29
+ #[inline(always)]
30
+ pub fn send(self, value: T) {
31
+ unsafe {
32
+ MaybeUninit::write(&mut *self.slot.value.get(), value);
33
+ }
34
+ self.slot.initialized.store(true, atomic::Ordering::Release);
35
+ }
36
+ }
37
+
38
+ pub struct OneshotReceiver<T> {
39
+ slot: Arc<OneshotSlot<T>>,
40
+ }
41
+
42
+ impl<T> OneshotReceiver<T> {
43
+ #[inline(always)]
44
+ pub fn try_receive(self) -> Result<T, OneshotReceiver<T>> {
45
+ if self
46
+ .slot
47
+ .initialized
48
+ .compare_exchange(
49
+ true,
50
+ false,
51
+ atomic::Ordering::Acquire,
52
+ atomic::Ordering::Relaxed,
53
+ )
54
+ .is_ok()
55
+ {
56
+ let value = unsafe { (*self.slot.value.get()).assume_init_read() };
57
+ Ok(value)
58
+ } else {
59
+ Err(self)
60
+ }
61
+ }
62
+ }
63
+
64
+ pub fn channel<T>() -> (OneshotSender<T>, OneshotReceiver<T>) {
65
+ let slot = Arc::new(OneshotSlot {
66
+ value: UnsafeCell::new(MaybeUninit::uninit()),
67
+ initialized: AtomicBool::new(false),
68
+ });
69
+ let sender = OneshotSender {
70
+ slot: Arc::clone(&slot),
71
+ };
72
+ let receiver = OneshotReceiver { slot };
73
+ (sender, receiver)
74
+ }
@@ -57,20 +57,20 @@ impl FreeList {
57
57
  let mut start = range.start;
58
58
  let mut end = range.end;
59
59
 
60
- if let Some((&next_start, &next_end)) = self.by_start.range(end..).next() {
61
- if next_start == end {
62
- self.by_size.remove(&(next_end - next_start, next_start));
63
- self.by_start.remove(&next_start);
64
- end = next_end;
65
- }
60
+ if let Some((&next_start, &next_end)) = self.by_start.range(end..).next()
61
+ && next_start == end
62
+ {
63
+ self.by_size.remove(&(next_end - next_start, next_start));
64
+ self.by_start.remove(&next_start);
65
+ end = next_end;
66
66
  }
67
67
 
68
- if let Some((&prev_start, &prev_end)) = self.by_start.range(..start).next_back() {
69
- if prev_end == start {
70
- self.by_size.remove(&(prev_end - prev_start, prev_start));
71
- self.by_start.remove(&prev_start);
72
- start = prev_start;
73
- }
68
+ if let Some((&prev_start, &prev_end)) = self.by_start.range(..start).next_back()
69
+ && prev_end == start
70
+ {
71
+ self.by_size.remove(&(prev_end - prev_start, prev_start));
72
+ self.by_start.remove(&prev_start);
73
+ start = prev_start;
74
74
  }
75
75
 
76
76
  self.by_start.insert(start, end);
@@ -96,6 +96,7 @@ impl<T> VertexArena<T> {
96
96
  usage,
97
97
  mapped_at_creation: false,
98
98
  });
99
+ #[allow(clippy::reversed_empty_ranges)]
99
100
  VertexArena {
100
101
  data: Vec::new(),
101
102
  device,
@@ -232,7 +233,10 @@ impl<T> VertexArena<T> {
232
233
  }
233
234
  }
234
235
 
235
- self.dirty_range = usize::MAX..0;
236
+ #[allow(clippy::reversed_empty_ranges)]
237
+ {
238
+ self.dirty_range = usize::MAX..0;
239
+ }
236
240
  }
237
241
 
238
242
  pub fn read_buffers(&self, mut f: impl FnMut(&Buffer, usize)) {