hyperion-rb 2.16.3 → 2.16.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,645 @@
1
+ //! Hotpath ring — multishot accept + multishot recv (with PBUF_RING
2
+ //! kernel buffers) + send SQEs. One ring per worker; the accept fiber
3
+ //! drains the unified completion queue.
4
+ //!
5
+ //! Per spec §#2: connection state stays in Ruby; this module owns
6
+ //! submission/completion + buffer-ring lifecycle.
7
+ //!
8
+ //! ## Drop-ordering contract
9
+ //!
10
+ //! `HotpathRing` declares fields in the order: `buffer_ring`, `ring`,
11
+ //! `healthy`. Rust drops fields in declaration order, so auto-drop
12
+ //! will free `buffer_ring` (frees backing memory) BEFORE closing the
13
+ //! `ring` fd. The explicit `Drop` impl runs `unregister_buf_ring`
14
+ //! BEFORE either auto-drop runs — while both allocations are still
15
+ //! alive — which is the required sequence:
16
+ //!
17
+ //! 1. `Drop::drop` calls `unregister_buf_ring` ← user code, runs first
18
+ //! 2. auto-drop `buffer_ring` ← frees backing memory
19
+ //! 3. auto-drop `ring` ← closes io_uring fd
20
+ //!
21
+ //! If the kernel retains a buf-ring registration while userspace frees
22
+ //! the backing memory, the next multishot recv CQE can write into freed
23
+ //! memory. This ordering prevents that UAF.
24
+
25
+ use std::os::raw::c_int;
26
+ use std::panic::catch_unwind;
27
+
28
+ /// POSIX EINVAL (22) used for null-pointer sentinel return values.
29
+ /// Hard-coded so we don't depend on `libc` in platform-uniform C ABI code.
30
+ const EINVAL: c_int = 22;
31
+
32
+ /// Op kind for completions delivered to Ruby. The numeric values are
33
+ /// packed into the high byte of `user_data` so the Ruby side can
34
+ /// dispatch by integer comparison without `rb_intern` lookup.
35
+ /// Stable ABI — do not renumber.
36
+ #[repr(u8)]
37
+ #[derive(Clone, Copy, Debug)]
38
+ pub enum OpKind {
39
+ Accept = 1,
40
+ Recv = 2,
41
+ Send = 3,
42
+ Close = 4,
43
+ }
44
+
45
+ /// FFI-safe completion record. Returned in batches via `wait_completions`.
46
+ /// `#[repr(C)]` so Ruby (via Fiddle) can index it by byte offset.
47
+ ///
48
+ /// Field layout (24 bytes total on 64-bit):
49
+ /// u8 op_kind (offset 0, size 1)
50
+ /// pad (offset 1, size 3)
51
+ /// i32 fd (offset 4, size 4)
52
+ /// i64 result (offset 8, size 8)
53
+ /// i32 buf_id (offset 16, size 4)
54
+ /// u32 flags (offset 20, size 4)
55
+ ///
56
+ /// `buf_id` is `-1` when the CQE is not a recv with a buffer id
57
+ /// (`IORING_CQE_F_BUFFER` not set, or `op_kind != Recv`).
58
+ #[repr(C)]
59
+ pub struct Completion {
60
+ pub op_kind: u8,
61
+ pub _pad: [u8; 3],
62
+ pub fd: i32,
63
+ pub result: i64,
64
+ pub buf_id: i32,
65
+ pub flags: u32,
66
+ }
67
+
68
+ // Compile-time ABI guard: Ruby (via Fiddle) reads Completion by byte
69
+ // offset using the size assumed in lib/hyperion/io_uring.rb's
70
+ // HotpathRing::COMPLETION_BYTES constant. If a future field/padding
71
+ // change drifts this size, build fails here with a clear message
72
+ // rather than producing silent garbage at runtime.
73
+ const _: () = assert!(
74
+ std::mem::size_of::<Completion>() == 24,
75
+ "Completion ABI size changed — update Ruby Fiddle offsets in lib/hyperion/io_uring.rb"
76
+ );
77
+
78
+ // ===== Linux implementation =====
79
+
80
+ #[cfg(target_os = "linux")]
81
+ mod linux_impl {
82
+ use super::*;
83
+ use crate::buffer_ring::BufferRing;
84
+ use io_uring::{cqueue, opcode, squeue, types, IoUring};
85
+ use std::os::unix::io::RawFd;
86
+ use std::sync::atomic::{AtomicBool, Ordering};
87
+
88
+ /// Per-worker hotpath ring. Owns one `IoUring` instance and one
89
+ /// `BufferRing` (PBUF_RING kernel buffer pool for multishot recv).
90
+ ///
91
+ /// # Field declaration order
92
+ ///
93
+ /// Fields are declared `buffer_ring` → `ring` → `healthy` so that
94
+ /// Rust's auto-drop (declaration order) frees `buffer_ring` first
95
+ /// (backing memory) and then `ring` (io_uring fd). The explicit
96
+ /// `Drop` impl calls `unregister_buf_ring` BEFORE either auto-drop
97
+ /// runs — see module-level doc for the full ordering proof.
98
+ pub struct HotpathRing {
99
+ /// Kernel-managed receive buffer pool. Dropped FIRST by
100
+ /// auto-drop (frees backing memory). The explicit `Drop` impl
101
+ /// has already unregistered the buf-ring with the kernel, so
102
+ /// this free is safe.
103
+ pub buffer_ring: BufferRing,
104
+ /// The io_uring instance. Dropped SECOND by auto-drop (closes
105
+ /// the ring fd).
106
+ pub ring: IoUring<squeue::Entry, cqueue::Entry>,
107
+ /// Set to `false` on `submit_and_wait` failure. Ruby checks
108
+ /// this after each batch to detect ring corruption.
109
+ pub healthy: AtomicBool,
110
+ }
111
+
112
+ impl HotpathRing {
113
+ /// Allocate a ring of `queue_depth` SQE slots + CQE slots, and
114
+ /// register a PBUF_RING of `n_bufs` buffers of `buf_size` bytes.
115
+ ///
116
+ /// `n_bufs` must be a power of two and ≤ 32768 (kernel limit).
117
+ /// Returns `Err` on kernel rejection (ENOSYS < 5.19, EINVAL for
118
+ /// bad params, EPERM in seccomp sandboxes, etc.).
119
+ pub fn new(queue_depth: u32, n_bufs: u16, buf_size: u32)
120
+ -> std::io::Result<Self>
121
+ {
122
+ let mut ring: IoUring<squeue::Entry, cqueue::Entry> =
123
+ IoUring::builder().build(queue_depth)?;
124
+ // group_id 0 — one buffer ring per HotpathRing.
125
+ let buffer_ring = BufferRing::new(&mut ring, 0, n_bufs, buf_size)?;
126
+ Ok(Self {
127
+ buffer_ring,
128
+ ring,
129
+ healthy: AtomicBool::new(true),
130
+ })
131
+ }
132
+
133
+ /// Post an `AcceptMulti` SQE for `listener_fd`.
134
+ ///
135
+ /// The multishot accept keeps reposting itself after each
136
+ /// accepted connection until the listener is closed or the SQE
137
+ /// is cancelled. Each accepted fd arrives as a separate CQE
138
+ /// drained by `wait_completions`.
139
+ ///
140
+ /// CONTRACT: when `wait_completions` returns -1 (sets
141
+ /// `healthy = false`), the Ruby caller MUST stop issuing any
142
+ /// further `submit_*` calls and engage the per-worker accept4
143
+ /// fallback. This method does NOT guard on `is_healthy()`
144
+ /// itself — it would unconditionally push the SQE onto a
145
+ /// broken ring and fail at submit() with a confusing OS error.
146
+ /// The Ruby side checks `is_healthy()` after each
147
+ /// wait_completions return.
148
+ ///
149
+ /// Available since kernel 5.19.
150
+ pub fn submit_accept_multishot(&mut self, listener_fd: RawFd)
151
+ -> Result<(), i32>
152
+ {
153
+ // user_data encodes: high byte = OpKind, low 32 bits = fd.
154
+ let ud = ((OpKind::Accept as u64) << 56)
155
+ | (listener_fd as u32 as u64);
156
+ let sqe = opcode::AcceptMulti::new(types::Fd(listener_fd))
157
+ .build()
158
+ .user_data(ud);
159
+ unsafe {
160
+ self.ring.submission().push(&sqe)
161
+ .map_err(|_| libc::EAGAIN)?;
162
+ }
163
+ self.ring.submit().map_err(|_| libc::EIO)?;
164
+ Ok(())
165
+ }
166
+
167
+ /// Post a `RecvMulti` SQE for `fd` backed by `buffer_ring`.
168
+ ///
169
+ /// The multishot recv rearms itself after each CQE unless
170
+ /// `IORING_CQE_F_MORE` is absent, in which case the caller
171
+ /// must reissue. Each CQE carries a buf_id (extracted by
172
+ /// `wait_completions`) that the caller must `release_buffer`
173
+ /// after consuming.
174
+ ///
175
+ /// Available since kernel 6.0.
176
+ pub fn submit_recv_multishot(&mut self, fd: RawFd)
177
+ -> Result<(), i32>
178
+ {
179
+ let group_id = self.buffer_ring.group_id();
180
+ let ud = ((OpKind::Recv as u64) << 56) | (fd as u32 as u64);
181
+ let sqe = opcode::RecvMulti::new(types::Fd(fd), group_id)
182
+ .build()
183
+ .user_data(ud);
184
+ unsafe {
185
+ self.ring.submission().push(&sqe)
186
+ .map_err(|_| libc::EAGAIN)?;
187
+ }
188
+ self.ring.submit().map_err(|_| libc::EIO)?;
189
+ Ok(())
190
+ }
191
+
192
+ /// Post a `Writev` SQE for `fd`.
193
+ ///
194
+ /// The caller is responsible for keeping `iov_ptr` (and the
195
+ /// underlying buffers) alive until the matching send CQE is
196
+ /// returned by `wait_completions`.
197
+ pub fn submit_send(
198
+ &mut self,
199
+ fd: RawFd,
200
+ iov_ptr: *const libc::iovec,
201
+ iov_count: u32,
202
+ ) -> Result<(), i32> {
203
+ let ud = ((OpKind::Send as u64) << 56) | (fd as u32 as u64);
204
+ let sqe = opcode::Writev::new(types::Fd(fd), iov_ptr, iov_count)
205
+ .build()
206
+ .user_data(ud);
207
+ unsafe {
208
+ self.ring.submission().push(&sqe)
209
+ .map_err(|_| libc::EAGAIN)?;
210
+ }
211
+ self.ring.submit().map_err(|_| libc::EIO)?;
212
+ Ok(())
213
+ }
214
+
215
+ /// Submit any pending SQEs and wait for at least `min_complete`
216
+ /// CQEs. Drains up to `out_cap` completions into `out`.
217
+ ///
218
+ /// `_timeout_ms` is reserved for a future
219
+ /// `io_uring_wait_cqe_timeout` path; for now we use
220
+ /// `submit_and_wait` which blocks until `min_complete` CQEs
221
+ /// arrive.
222
+ ///
223
+ /// Returns the number of completions written to `out`, or `-1`
224
+ /// if `submit_and_wait` fails (ring marked unhealthy).
225
+ ///
226
+ /// # Buffer-id extraction
227
+ ///
228
+ /// For `Recv` CQEs with `IORING_CQE_F_BUFFER` set, the
229
+ /// kernel encodes the buffer-id in `cqe.flags >> IORING_CQE_BUFFER_SHIFT`
230
+ /// (upper 16 bits of the flags word). We extract it and store it
231
+ /// in `Completion::buf_id`; all other completions get `buf_id = -1`.
232
+ pub fn wait_completions(
233
+ &mut self,
234
+ min_complete: u32,
235
+ _timeout_ms: u32,
236
+ out: *mut Completion,
237
+ out_cap: u32,
238
+ ) -> i32 {
239
+ if self.ring.submit_and_wait(min_complete as usize).is_err() {
240
+ self.healthy.store(false, Ordering::Release);
241
+ return -1;
242
+ }
243
+ let mut written = 0u32;
244
+ let mut completion = self.ring.completion();
245
+ while written < out_cap {
246
+ let cqe = match completion.next() {
247
+ Some(c) => c,
248
+ None => break,
249
+ };
250
+ let user = cqe.user_data();
251
+ let op_byte = (user >> 56) as u8;
252
+ let fd = (user & 0xffff_ffff) as i32;
253
+ let result = cqe.result() as i64;
254
+ let flags = cqe.flags();
255
+
256
+ // Extract buf_id for recv completions that carry a buffer.
257
+ // Use the public `cqueue::buffer_select(flags)` helper which
258
+ // tests IORING_CQE_F_BUFFER and extracts the buffer-id from
259
+ // the upper 16 bits — avoids the private `io_uring::sys` module.
260
+ let buf_id = if op_byte == (OpKind::Recv as u8) && result >= 0 {
261
+ io_uring::cqueue::buffer_select(flags)
262
+ .map(|id| id as i32)
263
+ .unwrap_or(-1)
264
+ } else {
265
+ -1
266
+ };
267
+
268
+ // SAFETY: `out` is valid for `out_cap` elements (caller
269
+ // contract); `written < out_cap` is checked above.
270
+ unsafe {
271
+ *out.add(written as usize) = Completion {
272
+ op_kind: op_byte,
273
+ _pad: [0; 3],
274
+ fd,
275
+ result,
276
+ buf_id,
277
+ flags,
278
+ };
279
+ }
280
+ written += 1;
281
+ }
282
+ written as i32
283
+ }
284
+
285
+ /// Return `buf_id` to the kernel buffer pool so it can be
286
+ /// reused for the next multishot recv CQE.
287
+ ///
288
+ /// Must be called once per recv CQE with `buf_id >= 0`. The
289
+ /// caller must NOT read from the buffer after calling this.
290
+ pub fn release_buffer(&self, buf_id: u16) {
291
+ self.buffer_ring.release(buf_id);
292
+ }
293
+
294
+ /// Force `is_healthy()` to return `false`. Used by Ruby when
295
+ /// it detects an unrecoverable error outside the ring (e.g. a
296
+ /// connection closure that shouldn't propagate further).
297
+ pub fn force_unhealthy(&self) {
298
+ self.healthy.store(false, Ordering::Release);
299
+ }
300
+
301
+ /// Returns `true` while the ring is in a usable state. Set to
302
+ /// `false` by `wait_completions` on `submit_and_wait` failure or
303
+ /// by `force_unhealthy`.
304
+ pub fn is_healthy(&self) -> bool {
305
+ self.healthy.load(Ordering::Acquire)
306
+ }
307
+ }
308
+
309
+ impl Drop for HotpathRing {
310
+ fn drop(&mut self) {
311
+ // CRITICAL: unregister the kernel buf-ring BEFORE
312
+ // `buffer_ring`'s auto-drop frees the backing memory.
313
+ //
314
+ // Rust's field-drop runs AFTER this user Drop body, in
315
+ // declaration order: buffer_ring first (backing memory freed),
316
+ // then ring (fd closed). So calling unregister here — while
317
+ // both buffer_ring.ring_ptr and ring.fd are still valid — is
318
+ // the correct sequence:
319
+ //
320
+ // 1. THIS: unregister_buf_ring ← kernel stops writing
321
+ // 2. auto-drop buffer_ring ← frees backing memory
322
+ // 3. auto-drop ring ← closes io_uring fd
323
+ //
324
+ // Best-effort: if the ring fd was already closed by a prior
325
+ // failure the unregister will error — we ignore that.
326
+ let _ = self.ring.submitter()
327
+ .unregister_buf_ring(self.buffer_ring.group_id());
328
+ }
329
+ }
330
+
331
+ /// Probe: try to set up a tiny ring + register a tiny PBUF_RING.
332
+ /// Returns 0 on success or -errno on failure (-ENOSYS in sandboxes,
333
+ /// -EINVAL on kernels that don't support PBUF_RING < 5.19).
334
+ pub fn probe() -> c_int {
335
+ match HotpathRing::new(8, 4, 256) {
336
+ Ok(_) => 0,
337
+ Err(e) => -(e.raw_os_error().unwrap_or(libc::ENOSYS)),
338
+ }
339
+ }
340
+ }
341
+
342
+ // ===== Non-Linux stubs =====
343
+ //
344
+ // On Darwin / BSD the io-uring dep is gated out. We compile zero-cost
345
+ // stubs so the macOS dev build succeeds cleanly. The Ruby layer checks
346
+ // the OS before loading the hotpath path and never reaches these stubs.
347
+
348
+ #[cfg(not(target_os = "linux"))]
349
+ mod stub_impl {
350
+ use super::*;
351
+
352
+ pub struct HotpathRing;
353
+
354
+ impl HotpathRing {
355
+ pub fn new(_qd: u32, _nb: u16, _bs: u32) -> std::io::Result<Self> {
356
+ Err(std::io::Error::from_raw_os_error(38)) // ENOSYS
357
+ }
358
+
359
+ pub fn submit_accept_multishot(&mut self, _fd: i32) -> Result<(), i32> {
360
+ Err(38)
361
+ }
362
+
363
+ pub fn submit_recv_multishot(&mut self, _fd: i32) -> Result<(), i32> {
364
+ Err(38)
365
+ }
366
+
367
+ // iov_ptr typed as *const u8 here — `libc` is not available on
368
+ // non-Linux targets (it's a Linux-only Cargo dep in this crate).
369
+ // The caller passes the raw pointer opaquely; this stub never
370
+ // dereferences it. The Linux extern "C" wrapper casts *const u8
371
+ // → *const libc::iovec before calling the real impl.
372
+ pub fn submit_send(
373
+ &mut self, _fd: i32, _p: *const u8, _n: u32,
374
+ ) -> Result<(), i32> {
375
+ Err(38)
376
+ }
377
+
378
+ pub fn wait_completions(
379
+ &mut self, _m: u32, _t: u32, _o: *mut Completion, _c: u32,
380
+ ) -> i32 {
381
+ -1
382
+ }
383
+
384
+ pub fn release_buffer(&self, _bid: u16) {}
385
+ pub fn force_unhealthy(&self) {}
386
+
387
+ pub fn is_healthy(&self) -> bool {
388
+ false
389
+ }
390
+ }
391
+
392
+ pub fn probe() -> c_int {
393
+ -38 // -ENOSYS
394
+ }
395
+ }
396
+
397
+ #[cfg(target_os = "linux")]
398
+ pub use linux_impl::{HotpathRing, probe};
399
+ #[cfg(not(target_os = "linux"))]
400
+ pub use stub_impl::{HotpathRing, probe};
401
+
402
+ // ===== C ABI =====
403
+ //
404
+ // All entry points:
405
+ // - are prefixed `hyperion_io_uring_hotpath_`
406
+ // - null-check their pointer argument before dereferencing
407
+ // - wrap the body in `catch_unwind(AssertUnwindSafe(...))` to prevent
408
+ // panic propagation across the FFI boundary (UB on stable Rust)
409
+ // - return a negative errno sentinel on panic or bad pointer
410
+
411
+ /// Probe whether the hotpath (PBUF_RING + multishot accept/recv) is
412
+ /// supported on this kernel. Returns 0 on success, negative errno
413
+ /// otherwise (e.g. -ENOSYS on kernels < 5.19 or in sandboxes).
414
+ ///
415
+ /// CAVEAT — partial probe coverage:
416
+ /// `probe()` exercises `IORING_REGISTER_PBUF_RING` only (kernel ≥ 5.19).
417
+ /// `IORING_OP_RECV` with `IORING_RECV_MULTISHOT` requires kernel ≥ 6.0
418
+ /// and is NOT exercised here. A 5.19-5.x kernel returns 0 from this
419
+ /// probe but will reject the first `submit_recv_multishot` SQE with
420
+ /// `result < 0` and no `IORING_CQE_F_MORE` bit. The Ruby caller MUST
421
+ /// treat the first recv CQE failure as a feature-unavailable signal
422
+ /// and fall back to the accept4 + read_nonblock path.
423
+ #[no_mangle]
424
+ pub extern "C" fn hyperion_io_uring_hotpath_supported() -> c_int {
425
+ catch_unwind(probe).unwrap_or(-EINVAL)
426
+ }
427
+
428
+ /// Allocate a new `HotpathRing`. Returns an opaque pointer, or NULL
429
+ /// on failure (memory exhaustion, kernel rejection, etc.).
430
+ ///
431
+ /// Caller must free with `hyperion_io_uring_hotpath_ring_free`.
432
+ #[no_mangle]
433
+ pub extern "C" fn hyperion_io_uring_hotpath_ring_new(
434
+ queue_depth: u32,
435
+ n_bufs: u16,
436
+ buf_size: u32,
437
+ ) -> *mut HotpathRing {
438
+ catch_unwind(|| match HotpathRing::new(queue_depth, n_bufs, buf_size) {
439
+ Ok(r) => Box::into_raw(Box::new(r)),
440
+ Err(_) => std::ptr::null_mut(),
441
+ })
442
+ .unwrap_or(std::ptr::null_mut())
443
+ }
444
+
445
+ /// Free a `HotpathRing` previously allocated by
446
+ /// `hyperion_io_uring_hotpath_ring_new`. No-op on NULL.
447
+ ///
448
+ /// SAFETY: `ptr` must be a live pointer returned by `ring_new` and not
449
+ /// yet freed. Must be called from the same worker that created it.
450
+ #[no_mangle]
451
+ pub unsafe extern "C" fn hyperion_io_uring_hotpath_ring_free(
452
+ ptr: *mut HotpathRing,
453
+ ) {
454
+ let _ = catch_unwind(std::panic::AssertUnwindSafe(|| {
455
+ if !ptr.is_null() {
456
+ drop(Box::from_raw(ptr));
457
+ }
458
+ }));
459
+ }
460
+
461
+ /// Post an `AcceptMulti` SQE for `listener_fd`.
462
+ /// Returns 0 on success or a negative errno on failure.
463
+ #[no_mangle]
464
+ pub unsafe extern "C" fn hyperion_io_uring_hotpath_submit_accept_multishot(
465
+ ptr: *mut HotpathRing,
466
+ listener_fd: c_int,
467
+ ) -> c_int {
468
+ if ptr.is_null() {
469
+ return -EINVAL;
470
+ }
471
+ catch_unwind(std::panic::AssertUnwindSafe(|| {
472
+ match (*ptr).submit_accept_multishot(listener_fd) {
473
+ Ok(()) => 0,
474
+ Err(e) => -e,
475
+ }
476
+ }))
477
+ .unwrap_or(-EINVAL)
478
+ }
479
+
480
+ /// Post a `RecvMulti` SQE for `fd` backed by the ring's buffer pool.
481
+ /// Returns 0 on success or a negative errno on failure.
482
+ #[no_mangle]
483
+ pub unsafe extern "C" fn hyperion_io_uring_hotpath_submit_recv_multishot(
484
+ ptr: *mut HotpathRing,
485
+ fd: c_int,
486
+ ) -> c_int {
487
+ if ptr.is_null() {
488
+ return -EINVAL;
489
+ }
490
+ catch_unwind(std::panic::AssertUnwindSafe(|| {
491
+ match (*ptr).submit_recv_multishot(fd) {
492
+ Ok(()) => 0,
493
+ Err(e) => -e,
494
+ }
495
+ }))
496
+ .unwrap_or(-EINVAL)
497
+ }
498
+
499
+ /// Post a `Writev` SQE for `fd`.
500
+ ///
501
+ /// `iov_ptr` must point to `iov_count` valid `iovec` entries (each
502
+ /// entry is `{ base: *mut u8, len: usize }` — the layout of POSIX
503
+ /// `iovec`) and must remain valid until the matching send CQE arrives.
504
+ /// Returns 0 on success or a negative errno on failure.
505
+ ///
506
+ /// The argument is typed as `*const u8` (rather than `*const libc::iovec`)
507
+ /// so this extern "C" declaration compiles on all platforms; the Linux
508
+ /// impl casts it to the correct `*const libc::iovec` type internally.
509
+ #[no_mangle]
510
+ pub unsafe extern "C" fn hyperion_io_uring_hotpath_submit_send(
511
+ ptr: *mut HotpathRing,
512
+ fd: c_int,
513
+ iov_ptr: *const u8,
514
+ iov_count: u32,
515
+ ) -> c_int {
516
+ if ptr.is_null() || iov_ptr.is_null() {
517
+ return -EINVAL;
518
+ }
519
+ #[cfg(target_os = "linux")]
520
+ {
521
+ catch_unwind(std::panic::AssertUnwindSafe(|| {
522
+ // SAFETY: caller guarantees `iov_ptr` points to `iov_count`
523
+ // valid `libc::iovec` entries with the same layout.
524
+ match (*ptr).submit_send(fd, iov_ptr as *const libc::iovec, iov_count) {
525
+ Ok(()) => 0,
526
+ Err(e) => -e,
527
+ }
528
+ }))
529
+ .unwrap_or(-EINVAL)
530
+ }
531
+ #[cfg(not(target_os = "linux"))]
532
+ {
533
+ let _ = (ptr, fd, iov_ptr, iov_count);
534
+ -38 // -ENOSYS
535
+ }
536
+ }
537
+
538
+ /// Submit pending SQEs and wait for at least `min_complete` CQEs.
539
+ /// Writes up to `out_cap` `Completion` structs into `out`.
540
+ ///
541
+ /// Returns the number of completions written, or `-1` on ring failure
542
+ /// (ring is marked unhealthy after this).
543
+ ///
544
+ /// `out` must point to a buffer of at least `out_cap * 24` bytes
545
+ /// (24 = `size_of::<Completion>()` on 64-bit).
546
+ #[no_mangle]
547
+ pub unsafe extern "C" fn hyperion_io_uring_hotpath_wait_completions(
548
+ ptr: *mut HotpathRing,
549
+ min_complete: u32,
550
+ timeout_ms: u32,
551
+ out: *mut Completion,
552
+ out_cap: u32,
553
+ ) -> c_int {
554
+ if ptr.is_null() || (out_cap > 0 && out.is_null()) {
555
+ return -EINVAL;
556
+ }
557
+ catch_unwind(std::panic::AssertUnwindSafe(|| {
558
+ (*ptr).wait_completions(min_complete, timeout_ms, out, out_cap)
559
+ }))
560
+ .unwrap_or(-EINVAL)
561
+ }
562
+
563
+ /// Release `buf_id` back to the kernel's buffer pool.
564
+ /// Must be called once per recv CQE whose `buf_id >= 0`.
565
+ #[no_mangle]
566
+ pub unsafe extern "C" fn hyperion_io_uring_hotpath_release_buffer(
567
+ ptr: *mut HotpathRing,
568
+ buf_id: u16,
569
+ ) {
570
+ let _ = catch_unwind(std::panic::AssertUnwindSafe(|| {
571
+ if !ptr.is_null() {
572
+ (*ptr).release_buffer(buf_id);
573
+ }
574
+ }));
575
+ }
576
+
577
+ /// Force `is_healthy` to return false.
578
+ #[no_mangle]
579
+ pub unsafe extern "C" fn hyperion_io_uring_hotpath_force_unhealthy(
580
+ ptr: *mut HotpathRing,
581
+ ) {
582
+ let _ = catch_unwind(std::panic::AssertUnwindSafe(|| {
583
+ if !ptr.is_null() {
584
+ (*ptr).force_unhealthy();
585
+ }
586
+ }));
587
+ }
588
+
589
+ /// Copy `len` bytes from buffer `buf_id` into the caller-supplied output
590
+ /// buffer `out_ptr` (capacity `out_cap` bytes). Returns the number of
591
+ /// bytes written on success, or a negative errno on failure.
592
+ ///
593
+ /// This is the "one-copy" recv-data path for Task 2.3.4: the kernel
594
+ /// has filled the buffer-ring slot; we copy into Ruby's string buffer,
595
+ /// then the caller calls `release_buffer` so the kernel can reuse the
596
+ /// slot. The copy is one `memcpy`-equivalent via `ptr::copy_nonoverlapping`.
597
+ ///
598
+ /// Returns -22 (`-EINVAL`) on null/invalid arguments.
599
+ #[no_mangle]
600
+ pub unsafe extern "C" fn hyperion_io_uring_hotpath_copy_buffer(
601
+ ptr: *mut HotpathRing,
602
+ buf_id: u16,
603
+ len: u32,
604
+ out_ptr: *mut u8,
605
+ out_cap: u32,
606
+ ) -> c_int {
607
+ if ptr.is_null() || out_ptr.is_null() {
608
+ return -EINVAL;
609
+ }
610
+ catch_unwind(std::panic::AssertUnwindSafe(|| {
611
+ if len > out_cap {
612
+ return -EINVAL;
613
+ }
614
+ // SAFETY: buf_id is valid (caller's responsibility per BufferRing::borrow
615
+ // contract — buf_id came from a CQE on this ring). len <= buf_size
616
+ // is guaranteed by the kernel (recv never writes more than buf_size).
617
+ // out_ptr is valid for out_cap >= len bytes (caller contract).
618
+ #[cfg(target_os = "linux")]
619
+ {
620
+ let view = (*ptr).buffer_ring.borrow(buf_id, len as usize);
621
+ std::ptr::copy_nonoverlapping(view.as_ptr(), out_ptr, len as usize);
622
+ len as c_int
623
+ }
624
+ #[cfg(not(target_os = "linux"))]
625
+ {
626
+ let _ = (buf_id, len, out_ptr, out_cap);
627
+ -38 // -ENOSYS
628
+ }
629
+ }))
630
+ .unwrap_or(-EINVAL)
631
+ }
632
+
633
+ /// Returns 1 if the ring is healthy, 0 otherwise.
634
+ #[no_mangle]
635
+ pub unsafe extern "C" fn hyperion_io_uring_hotpath_is_healthy(
636
+ ptr: *mut HotpathRing,
637
+ ) -> c_int {
638
+ if ptr.is_null() {
639
+ return 0;
640
+ }
641
+ catch_unwind(std::panic::AssertUnwindSafe(|| {
642
+ if (*ptr).is_healthy() { 1 } else { 0 }
643
+ }))
644
+ .unwrap_or(0)
645
+ }
@@ -47,9 +47,13 @@
47
47
 
48
48
  #![allow(clippy::missing_safety_doc)]
49
49
 
50
+ mod buffer_ring;
51
+ pub mod hotpath;
52
+ pub use hotpath::Completion as HotpathCompletion;
53
+
50
54
  use std::os::raw::{c_int, c_uchar, c_uint};
51
55
 
52
- const ABI_VERSION: u32 = 1;
56
+ const ABI_VERSION: u32 = 2;
53
57
 
54
58
  // ---------- ABI version + probe ----------
55
59