hyperion-rb 2.16.3 → 2.16.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +75 -0
- data/ext/hyperion_http/extconf.rb +9 -0
- data/ext/hyperion_http/parser.c +93 -21
- data/ext/hyperion_http/response_writer.c +604 -0
- data/ext/hyperion_http/response_writer.h +28 -0
- data/ext/hyperion_io_uring/Cargo.lock +1 -1
- data/ext/hyperion_io_uring/Cargo.toml +1 -1
- data/ext/hyperion_io_uring/src/buffer_ring.rs +319 -0
- data/ext/hyperion_io_uring/src/hotpath.rs +645 -0
- data/ext/hyperion_io_uring/src/lib.rs +5 -1
- data/lib/hyperion/cli.rb +23 -0
- data/lib/hyperion/config.rb +9 -0
- data/lib/hyperion/connection.rb +209 -1
- data/lib/hyperion/http/response_writer.rb +46 -0
- data/lib/hyperion/io_uring.rb +270 -5
- data/lib/hyperion/response_writer.rb +91 -1
- data/lib/hyperion/server.rb +200 -4
- data/lib/hyperion/version.rb +1 -1
- data/lib/hyperion.rb +1 -0
- metadata +6 -1
|
@@ -0,0 +1,645 @@
|
|
|
1
|
+
//! Hotpath ring — multishot accept + multishot recv (with PBUF_RING
|
|
2
|
+
//! kernel buffers) + send SQEs. One ring per worker; the accept fiber
|
|
3
|
+
//! drains the unified completion queue.
|
|
4
|
+
//!
|
|
5
|
+
//! Per spec §#2: connection state stays in Ruby; this module owns
|
|
6
|
+
//! submission/completion + buffer-ring lifecycle.
|
|
7
|
+
//!
|
|
8
|
+
//! ## Drop-ordering contract
|
|
9
|
+
//!
|
|
10
|
+
//! `HotpathRing` declares fields in the order: `buffer_ring`, `ring`,
|
|
11
|
+
//! `healthy`. Rust drops fields in declaration order, so auto-drop
|
|
12
|
+
//! will free `buffer_ring` (frees backing memory) BEFORE closing the
|
|
13
|
+
//! `ring` fd. The explicit `Drop` impl runs `unregister_buf_ring`
|
|
14
|
+
//! BEFORE either auto-drop runs — while both allocations are still
|
|
15
|
+
//! alive — which is the required sequence:
|
|
16
|
+
//!
|
|
17
|
+
//! 1. `Drop::drop` calls `unregister_buf_ring` ← user code, runs first
|
|
18
|
+
//! 2. auto-drop `buffer_ring` ← frees backing memory
|
|
19
|
+
//! 3. auto-drop `ring` ← closes io_uring fd
|
|
20
|
+
//!
|
|
21
|
+
//! If the kernel retains a buf-ring registration while userspace frees
|
|
22
|
+
//! the backing memory, the next multishot recv CQE can write into freed
|
|
23
|
+
//! memory. This ordering prevents that UAF.
|
|
24
|
+
|
|
25
|
+
use std::os::raw::c_int;
|
|
26
|
+
use std::panic::catch_unwind;
|
|
27
|
+
|
|
28
|
+
/// POSIX EINVAL (22) used for null-pointer sentinel return values.
|
|
29
|
+
/// Hard-coded so we don't depend on `libc` in platform-uniform C ABI code.
|
|
30
|
+
const EINVAL: c_int = 22;
|
|
31
|
+
|
|
32
|
+
/// Op kind for completions delivered to Ruby. The numeric values are
|
|
33
|
+
/// packed into the high byte of `user_data` so the Ruby side can
|
|
34
|
+
/// dispatch by integer comparison without `rb_intern` lookup.
|
|
35
|
+
/// Stable ABI — do not renumber.
|
|
36
|
+
#[repr(u8)]
|
|
37
|
+
#[derive(Clone, Copy, Debug)]
|
|
38
|
+
pub enum OpKind {
|
|
39
|
+
Accept = 1,
|
|
40
|
+
Recv = 2,
|
|
41
|
+
Send = 3,
|
|
42
|
+
Close = 4,
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/// FFI-safe completion record. Returned in batches via `wait_completions`.
|
|
46
|
+
/// `#[repr(C)]` so Ruby (via Fiddle) can index it by byte offset.
|
|
47
|
+
///
|
|
48
|
+
/// Field layout (24 bytes total on 64-bit):
|
|
49
|
+
/// u8 op_kind (offset 0, size 1)
|
|
50
|
+
/// pad (offset 1, size 3)
|
|
51
|
+
/// i32 fd (offset 4, size 4)
|
|
52
|
+
/// i64 result (offset 8, size 8)
|
|
53
|
+
/// i32 buf_id (offset 16, size 4)
|
|
54
|
+
/// u32 flags (offset 20, size 4)
|
|
55
|
+
///
|
|
56
|
+
/// `buf_id` is `-1` when the CQE is not a recv with a buffer id
|
|
57
|
+
/// (`IORING_CQE_F_BUFFER` not set, or `op_kind != Recv`).
|
|
58
|
+
#[repr(C)]
|
|
59
|
+
pub struct Completion {
|
|
60
|
+
pub op_kind: u8,
|
|
61
|
+
pub _pad: [u8; 3],
|
|
62
|
+
pub fd: i32,
|
|
63
|
+
pub result: i64,
|
|
64
|
+
pub buf_id: i32,
|
|
65
|
+
pub flags: u32,
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// Compile-time ABI guard: Ruby (via Fiddle) reads Completion by byte
|
|
69
|
+
// offset using the size assumed in lib/hyperion/io_uring.rb's
|
|
70
|
+
// HotpathRing::COMPLETION_BYTES constant. If a future field/padding
|
|
71
|
+
// change drifts this size, build fails here with a clear message
|
|
72
|
+
// rather than producing silent garbage at runtime.
|
|
73
|
+
const _: () = assert!(
|
|
74
|
+
std::mem::size_of::<Completion>() == 24,
|
|
75
|
+
"Completion ABI size changed — update Ruby Fiddle offsets in lib/hyperion/io_uring.rb"
|
|
76
|
+
);
|
|
77
|
+
|
|
78
|
+
// ===== Linux implementation =====
|
|
79
|
+
|
|
80
|
+
#[cfg(target_os = "linux")]
|
|
81
|
+
mod linux_impl {
|
|
82
|
+
use super::*;
|
|
83
|
+
use crate::buffer_ring::BufferRing;
|
|
84
|
+
use io_uring::{cqueue, opcode, squeue, types, IoUring};
|
|
85
|
+
use std::os::unix::io::RawFd;
|
|
86
|
+
use std::sync::atomic::{AtomicBool, Ordering};
|
|
87
|
+
|
|
88
|
+
/// Per-worker hotpath ring. Owns one `IoUring` instance and one
|
|
89
|
+
/// `BufferRing` (PBUF_RING kernel buffer pool for multishot recv).
|
|
90
|
+
///
|
|
91
|
+
/// # Field declaration order
|
|
92
|
+
///
|
|
93
|
+
/// Fields are declared `buffer_ring` → `ring` → `healthy` so that
|
|
94
|
+
/// Rust's auto-drop (declaration order) frees `buffer_ring` first
|
|
95
|
+
/// (backing memory) and then `ring` (io_uring fd). The explicit
|
|
96
|
+
/// `Drop` impl calls `unregister_buf_ring` BEFORE either auto-drop
|
|
97
|
+
/// runs — see module-level doc for the full ordering proof.
|
|
98
|
+
pub struct HotpathRing {
|
|
99
|
+
/// Kernel-managed receive buffer pool. Dropped FIRST by
|
|
100
|
+
/// auto-drop (frees backing memory). The explicit `Drop` impl
|
|
101
|
+
/// has already unregistered the buf-ring with the kernel, so
|
|
102
|
+
/// this free is safe.
|
|
103
|
+
pub buffer_ring: BufferRing,
|
|
104
|
+
/// The io_uring instance. Dropped SECOND by auto-drop (closes
|
|
105
|
+
/// the ring fd).
|
|
106
|
+
pub ring: IoUring<squeue::Entry, cqueue::Entry>,
|
|
107
|
+
/// Set to `false` on `submit_and_wait` failure. Ruby checks
|
|
108
|
+
/// this after each batch to detect ring corruption.
|
|
109
|
+
pub healthy: AtomicBool,
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
impl HotpathRing {
|
|
113
|
+
/// Allocate a ring of `queue_depth` SQE slots + CQE slots, and
|
|
114
|
+
/// register a PBUF_RING of `n_bufs` buffers of `buf_size` bytes.
|
|
115
|
+
///
|
|
116
|
+
/// `n_bufs` must be a power of two and ≤ 32768 (kernel limit).
|
|
117
|
+
/// Returns `Err` on kernel rejection (ENOSYS < 5.19, EINVAL for
|
|
118
|
+
/// bad params, EPERM in seccomp sandboxes, etc.).
|
|
119
|
+
pub fn new(queue_depth: u32, n_bufs: u16, buf_size: u32)
|
|
120
|
+
-> std::io::Result<Self>
|
|
121
|
+
{
|
|
122
|
+
let mut ring: IoUring<squeue::Entry, cqueue::Entry> =
|
|
123
|
+
IoUring::builder().build(queue_depth)?;
|
|
124
|
+
// group_id 0 — one buffer ring per HotpathRing.
|
|
125
|
+
let buffer_ring = BufferRing::new(&mut ring, 0, n_bufs, buf_size)?;
|
|
126
|
+
Ok(Self {
|
|
127
|
+
buffer_ring,
|
|
128
|
+
ring,
|
|
129
|
+
healthy: AtomicBool::new(true),
|
|
130
|
+
})
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
/// Post an `AcceptMulti` SQE for `listener_fd`.
|
|
134
|
+
///
|
|
135
|
+
/// The multishot accept keeps reposting itself after each
|
|
136
|
+
/// accepted connection until the listener is closed or the SQE
|
|
137
|
+
/// is cancelled. Each accepted fd arrives as a separate CQE
|
|
138
|
+
/// drained by `wait_completions`.
|
|
139
|
+
///
|
|
140
|
+
/// CONTRACT: when `wait_completions` returns -1 (sets
|
|
141
|
+
/// `healthy = false`), the Ruby caller MUST stop issuing any
|
|
142
|
+
/// further `submit_*` calls and engage the per-worker accept4
|
|
143
|
+
/// fallback. This method does NOT guard on `is_healthy()`
|
|
144
|
+
/// itself — it would unconditionally push the SQE onto a
|
|
145
|
+
/// broken ring and fail at submit() with a confusing OS error.
|
|
146
|
+
/// The Ruby side checks `is_healthy()` after each
|
|
147
|
+
/// wait_completions return.
|
|
148
|
+
///
|
|
149
|
+
/// Available since kernel 5.19.
|
|
150
|
+
pub fn submit_accept_multishot(&mut self, listener_fd: RawFd)
|
|
151
|
+
-> Result<(), i32>
|
|
152
|
+
{
|
|
153
|
+
// user_data encodes: high byte = OpKind, low 32 bits = fd.
|
|
154
|
+
let ud = ((OpKind::Accept as u64) << 56)
|
|
155
|
+
| (listener_fd as u32 as u64);
|
|
156
|
+
let sqe = opcode::AcceptMulti::new(types::Fd(listener_fd))
|
|
157
|
+
.build()
|
|
158
|
+
.user_data(ud);
|
|
159
|
+
unsafe {
|
|
160
|
+
self.ring.submission().push(&sqe)
|
|
161
|
+
.map_err(|_| libc::EAGAIN)?;
|
|
162
|
+
}
|
|
163
|
+
self.ring.submit().map_err(|_| libc::EIO)?;
|
|
164
|
+
Ok(())
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
/// Post a `RecvMulti` SQE for `fd` backed by `buffer_ring`.
|
|
168
|
+
///
|
|
169
|
+
/// The multishot recv rearms itself after each CQE unless
|
|
170
|
+
/// `IORING_CQE_F_MORE` is absent, in which case the caller
|
|
171
|
+
/// must reissue. Each CQE carries a buf_id (extracted by
|
|
172
|
+
/// `wait_completions`) that the caller must `release_buffer`
|
|
173
|
+
/// after consuming.
|
|
174
|
+
///
|
|
175
|
+
/// Available since kernel 6.0.
|
|
176
|
+
pub fn submit_recv_multishot(&mut self, fd: RawFd)
|
|
177
|
+
-> Result<(), i32>
|
|
178
|
+
{
|
|
179
|
+
let group_id = self.buffer_ring.group_id();
|
|
180
|
+
let ud = ((OpKind::Recv as u64) << 56) | (fd as u32 as u64);
|
|
181
|
+
let sqe = opcode::RecvMulti::new(types::Fd(fd), group_id)
|
|
182
|
+
.build()
|
|
183
|
+
.user_data(ud);
|
|
184
|
+
unsafe {
|
|
185
|
+
self.ring.submission().push(&sqe)
|
|
186
|
+
.map_err(|_| libc::EAGAIN)?;
|
|
187
|
+
}
|
|
188
|
+
self.ring.submit().map_err(|_| libc::EIO)?;
|
|
189
|
+
Ok(())
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
/// Post a `Writev` SQE for `fd`.
|
|
193
|
+
///
|
|
194
|
+
/// The caller is responsible for keeping `iov_ptr` (and the
|
|
195
|
+
/// underlying buffers) alive until the matching send CQE is
|
|
196
|
+
/// returned by `wait_completions`.
|
|
197
|
+
pub fn submit_send(
|
|
198
|
+
&mut self,
|
|
199
|
+
fd: RawFd,
|
|
200
|
+
iov_ptr: *const libc::iovec,
|
|
201
|
+
iov_count: u32,
|
|
202
|
+
) -> Result<(), i32> {
|
|
203
|
+
let ud = ((OpKind::Send as u64) << 56) | (fd as u32 as u64);
|
|
204
|
+
let sqe = opcode::Writev::new(types::Fd(fd), iov_ptr, iov_count)
|
|
205
|
+
.build()
|
|
206
|
+
.user_data(ud);
|
|
207
|
+
unsafe {
|
|
208
|
+
self.ring.submission().push(&sqe)
|
|
209
|
+
.map_err(|_| libc::EAGAIN)?;
|
|
210
|
+
}
|
|
211
|
+
self.ring.submit().map_err(|_| libc::EIO)?;
|
|
212
|
+
Ok(())
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
/// Submit any pending SQEs and wait for at least `min_complete`
|
|
216
|
+
/// CQEs. Drains up to `out_cap` completions into `out`.
|
|
217
|
+
///
|
|
218
|
+
/// `_timeout_ms` is reserved for a future
|
|
219
|
+
/// `io_uring_wait_cqe_timeout` path; for now we use
|
|
220
|
+
/// `submit_and_wait` which blocks until `min_complete` CQEs
|
|
221
|
+
/// arrive.
|
|
222
|
+
///
|
|
223
|
+
/// Returns the number of completions written to `out`, or `-1`
|
|
224
|
+
/// if `submit_and_wait` fails (ring marked unhealthy).
|
|
225
|
+
///
|
|
226
|
+
/// # Buffer-id extraction
|
|
227
|
+
///
|
|
228
|
+
/// For `Recv` CQEs with `IORING_CQE_F_BUFFER` set, the
|
|
229
|
+
/// kernel encodes the buffer-id in `cqe.flags >> IORING_CQE_BUFFER_SHIFT`
|
|
230
|
+
/// (upper 16 bits of the flags word). We extract it and store it
|
|
231
|
+
/// in `Completion::buf_id`; all other completions get `buf_id = -1`.
|
|
232
|
+
pub fn wait_completions(
|
|
233
|
+
&mut self,
|
|
234
|
+
min_complete: u32,
|
|
235
|
+
_timeout_ms: u32,
|
|
236
|
+
out: *mut Completion,
|
|
237
|
+
out_cap: u32,
|
|
238
|
+
) -> i32 {
|
|
239
|
+
if self.ring.submit_and_wait(min_complete as usize).is_err() {
|
|
240
|
+
self.healthy.store(false, Ordering::Release);
|
|
241
|
+
return -1;
|
|
242
|
+
}
|
|
243
|
+
let mut written = 0u32;
|
|
244
|
+
let mut completion = self.ring.completion();
|
|
245
|
+
while written < out_cap {
|
|
246
|
+
let cqe = match completion.next() {
|
|
247
|
+
Some(c) => c,
|
|
248
|
+
None => break,
|
|
249
|
+
};
|
|
250
|
+
let user = cqe.user_data();
|
|
251
|
+
let op_byte = (user >> 56) as u8;
|
|
252
|
+
let fd = (user & 0xffff_ffff) as i32;
|
|
253
|
+
let result = cqe.result() as i64;
|
|
254
|
+
let flags = cqe.flags();
|
|
255
|
+
|
|
256
|
+
// Extract buf_id for recv completions that carry a buffer.
|
|
257
|
+
// Use the public `cqueue::buffer_select(flags)` helper which
|
|
258
|
+
// tests IORING_CQE_F_BUFFER and extracts the buffer-id from
|
|
259
|
+
// the upper 16 bits — avoids the private `io_uring::sys` module.
|
|
260
|
+
let buf_id = if op_byte == (OpKind::Recv as u8) && result >= 0 {
|
|
261
|
+
io_uring::cqueue::buffer_select(flags)
|
|
262
|
+
.map(|id| id as i32)
|
|
263
|
+
.unwrap_or(-1)
|
|
264
|
+
} else {
|
|
265
|
+
-1
|
|
266
|
+
};
|
|
267
|
+
|
|
268
|
+
// SAFETY: `out` is valid for `out_cap` elements (caller
|
|
269
|
+
// contract); `written < out_cap` is checked above.
|
|
270
|
+
unsafe {
|
|
271
|
+
*out.add(written as usize) = Completion {
|
|
272
|
+
op_kind: op_byte,
|
|
273
|
+
_pad: [0; 3],
|
|
274
|
+
fd,
|
|
275
|
+
result,
|
|
276
|
+
buf_id,
|
|
277
|
+
flags,
|
|
278
|
+
};
|
|
279
|
+
}
|
|
280
|
+
written += 1;
|
|
281
|
+
}
|
|
282
|
+
written as i32
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
/// Return `buf_id` to the kernel buffer pool so it can be
|
|
286
|
+
/// reused for the next multishot recv CQE.
|
|
287
|
+
///
|
|
288
|
+
/// Must be called once per recv CQE with `buf_id >= 0`. The
|
|
289
|
+
/// caller must NOT read from the buffer after calling this.
|
|
290
|
+
pub fn release_buffer(&self, buf_id: u16) {
|
|
291
|
+
self.buffer_ring.release(buf_id);
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
/// Force `is_healthy()` to return `false`. Used by Ruby when
|
|
295
|
+
/// it detects an unrecoverable error outside the ring (e.g. a
|
|
296
|
+
/// connection closure that shouldn't propagate further).
|
|
297
|
+
pub fn force_unhealthy(&self) {
|
|
298
|
+
self.healthy.store(false, Ordering::Release);
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
/// Returns `true` while the ring is in a usable state. Set to
|
|
302
|
+
/// `false` by `wait_completions` on `submit_and_wait` failure or
|
|
303
|
+
/// by `force_unhealthy`.
|
|
304
|
+
pub fn is_healthy(&self) -> bool {
|
|
305
|
+
self.healthy.load(Ordering::Acquire)
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
impl Drop for HotpathRing {
|
|
310
|
+
fn drop(&mut self) {
|
|
311
|
+
// CRITICAL: unregister the kernel buf-ring BEFORE
|
|
312
|
+
// `buffer_ring`'s auto-drop frees the backing memory.
|
|
313
|
+
//
|
|
314
|
+
// Rust's field-drop runs AFTER this user Drop body, in
|
|
315
|
+
// declaration order: buffer_ring first (backing memory freed),
|
|
316
|
+
// then ring (fd closed). So calling unregister here — while
|
|
317
|
+
// both buffer_ring.ring_ptr and ring.fd are still valid — is
|
|
318
|
+
// the correct sequence:
|
|
319
|
+
//
|
|
320
|
+
// 1. THIS: unregister_buf_ring ← kernel stops writing
|
|
321
|
+
// 2. auto-drop buffer_ring ← frees backing memory
|
|
322
|
+
// 3. auto-drop ring ← closes io_uring fd
|
|
323
|
+
//
|
|
324
|
+
// Best-effort: if the ring fd was already closed by a prior
|
|
325
|
+
// failure the unregister will error — we ignore that.
|
|
326
|
+
let _ = self.ring.submitter()
|
|
327
|
+
.unregister_buf_ring(self.buffer_ring.group_id());
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
/// Probe: try to set up a tiny ring + register a tiny PBUF_RING.
|
|
332
|
+
/// Returns 0 on success or -errno on failure (-ENOSYS in sandboxes,
|
|
333
|
+
/// -EINVAL on kernels that don't support PBUF_RING < 5.19).
|
|
334
|
+
pub fn probe() -> c_int {
|
|
335
|
+
match HotpathRing::new(8, 4, 256) {
|
|
336
|
+
Ok(_) => 0,
|
|
337
|
+
Err(e) => -(e.raw_os_error().unwrap_or(libc::ENOSYS)),
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
// ===== Non-Linux stubs =====
|
|
343
|
+
//
|
|
344
|
+
// On Darwin / BSD the io-uring dep is gated out. We compile zero-cost
|
|
345
|
+
// stubs so the macOS dev build succeeds cleanly. The Ruby layer checks
|
|
346
|
+
// the OS before loading the hotpath path and never reaches these stubs.
|
|
347
|
+
|
|
348
|
+
#[cfg(not(target_os = "linux"))]
|
|
349
|
+
mod stub_impl {
|
|
350
|
+
use super::*;
|
|
351
|
+
|
|
352
|
+
pub struct HotpathRing;
|
|
353
|
+
|
|
354
|
+
impl HotpathRing {
|
|
355
|
+
pub fn new(_qd: u32, _nb: u16, _bs: u32) -> std::io::Result<Self> {
|
|
356
|
+
Err(std::io::Error::from_raw_os_error(38)) // ENOSYS
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
pub fn submit_accept_multishot(&mut self, _fd: i32) -> Result<(), i32> {
|
|
360
|
+
Err(38)
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
pub fn submit_recv_multishot(&mut self, _fd: i32) -> Result<(), i32> {
|
|
364
|
+
Err(38)
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
// iov_ptr typed as *const u8 here — `libc` is not available on
|
|
368
|
+
// non-Linux targets (it's a Linux-only Cargo dep in this crate).
|
|
369
|
+
// The caller passes the raw pointer opaquely; this stub never
|
|
370
|
+
// dereferences it. The Linux extern "C" wrapper casts *const u8
|
|
371
|
+
// → *const libc::iovec before calling the real impl.
|
|
372
|
+
pub fn submit_send(
|
|
373
|
+
&mut self, _fd: i32, _p: *const u8, _n: u32,
|
|
374
|
+
) -> Result<(), i32> {
|
|
375
|
+
Err(38)
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
pub fn wait_completions(
|
|
379
|
+
&mut self, _m: u32, _t: u32, _o: *mut Completion, _c: u32,
|
|
380
|
+
) -> i32 {
|
|
381
|
+
-1
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
pub fn release_buffer(&self, _bid: u16) {}
|
|
385
|
+
pub fn force_unhealthy(&self) {}
|
|
386
|
+
|
|
387
|
+
pub fn is_healthy(&self) -> bool {
|
|
388
|
+
false
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
pub fn probe() -> c_int {
|
|
393
|
+
-38 // -ENOSYS
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
#[cfg(target_os = "linux")]
|
|
398
|
+
pub use linux_impl::{HotpathRing, probe};
|
|
399
|
+
#[cfg(not(target_os = "linux"))]
|
|
400
|
+
pub use stub_impl::{HotpathRing, probe};
|
|
401
|
+
|
|
402
|
+
// ===== C ABI =====
|
|
403
|
+
//
|
|
404
|
+
// All entry points:
|
|
405
|
+
// - are prefixed `hyperion_io_uring_hotpath_`
|
|
406
|
+
// - null-check their pointer argument before dereferencing
|
|
407
|
+
// - wrap the body in `catch_unwind(AssertUnwindSafe(...))` to prevent
|
|
408
|
+
// panic propagation across the FFI boundary (UB on stable Rust)
|
|
409
|
+
// - return a negative errno sentinel on panic or bad pointer
|
|
410
|
+
|
|
411
|
+
/// Probe whether the hotpath (PBUF_RING + multishot accept/recv) is
|
|
412
|
+
/// supported on this kernel. Returns 0 on success, negative errno
|
|
413
|
+
/// otherwise (e.g. -ENOSYS on kernels < 5.19 or in sandboxes).
|
|
414
|
+
///
|
|
415
|
+
/// CAVEAT — partial probe coverage:
|
|
416
|
+
/// `probe()` exercises `IORING_REGISTER_PBUF_RING` only (kernel ≥ 5.19).
|
|
417
|
+
/// `IORING_OP_RECV` with `IORING_RECV_MULTISHOT` requires kernel ≥ 6.0
|
|
418
|
+
/// and is NOT exercised here. A 5.19-5.x kernel returns 0 from this
|
|
419
|
+
/// probe but will reject the first `submit_recv_multishot` SQE with
|
|
420
|
+
/// `result < 0` and no `IORING_CQE_F_MORE` bit. The Ruby caller MUST
|
|
421
|
+
/// treat the first recv CQE failure as a feature-unavailable signal
|
|
422
|
+
/// and fall back to the accept4 + read_nonblock path.
|
|
423
|
+
#[no_mangle]
|
|
424
|
+
pub extern "C" fn hyperion_io_uring_hotpath_supported() -> c_int {
|
|
425
|
+
catch_unwind(probe).unwrap_or(-EINVAL)
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
/// Allocate a new `HotpathRing`. Returns an opaque pointer, or NULL
|
|
429
|
+
/// on failure (memory exhaustion, kernel rejection, etc.).
|
|
430
|
+
///
|
|
431
|
+
/// Caller must free with `hyperion_io_uring_hotpath_ring_free`.
|
|
432
|
+
#[no_mangle]
|
|
433
|
+
pub extern "C" fn hyperion_io_uring_hotpath_ring_new(
|
|
434
|
+
queue_depth: u32,
|
|
435
|
+
n_bufs: u16,
|
|
436
|
+
buf_size: u32,
|
|
437
|
+
) -> *mut HotpathRing {
|
|
438
|
+
catch_unwind(|| match HotpathRing::new(queue_depth, n_bufs, buf_size) {
|
|
439
|
+
Ok(r) => Box::into_raw(Box::new(r)),
|
|
440
|
+
Err(_) => std::ptr::null_mut(),
|
|
441
|
+
})
|
|
442
|
+
.unwrap_or(std::ptr::null_mut())
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
/// Free a `HotpathRing` previously allocated by
|
|
446
|
+
/// `hyperion_io_uring_hotpath_ring_new`. No-op on NULL.
|
|
447
|
+
///
|
|
448
|
+
/// SAFETY: `ptr` must be a live pointer returned by `ring_new` and not
|
|
449
|
+
/// yet freed. Must be called from the same worker that created it.
|
|
450
|
+
#[no_mangle]
|
|
451
|
+
pub unsafe extern "C" fn hyperion_io_uring_hotpath_ring_free(
|
|
452
|
+
ptr: *mut HotpathRing,
|
|
453
|
+
) {
|
|
454
|
+
let _ = catch_unwind(std::panic::AssertUnwindSafe(|| {
|
|
455
|
+
if !ptr.is_null() {
|
|
456
|
+
drop(Box::from_raw(ptr));
|
|
457
|
+
}
|
|
458
|
+
}));
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
/// Post an `AcceptMulti` SQE for `listener_fd`.
|
|
462
|
+
/// Returns 0 on success or a negative errno on failure.
|
|
463
|
+
#[no_mangle]
|
|
464
|
+
pub unsafe extern "C" fn hyperion_io_uring_hotpath_submit_accept_multishot(
|
|
465
|
+
ptr: *mut HotpathRing,
|
|
466
|
+
listener_fd: c_int,
|
|
467
|
+
) -> c_int {
|
|
468
|
+
if ptr.is_null() {
|
|
469
|
+
return -EINVAL;
|
|
470
|
+
}
|
|
471
|
+
catch_unwind(std::panic::AssertUnwindSafe(|| {
|
|
472
|
+
match (*ptr).submit_accept_multishot(listener_fd) {
|
|
473
|
+
Ok(()) => 0,
|
|
474
|
+
Err(e) => -e,
|
|
475
|
+
}
|
|
476
|
+
}))
|
|
477
|
+
.unwrap_or(-EINVAL)
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
/// Post a `RecvMulti` SQE for `fd` backed by the ring's buffer pool.
|
|
481
|
+
/// Returns 0 on success or a negative errno on failure.
|
|
482
|
+
#[no_mangle]
|
|
483
|
+
pub unsafe extern "C" fn hyperion_io_uring_hotpath_submit_recv_multishot(
|
|
484
|
+
ptr: *mut HotpathRing,
|
|
485
|
+
fd: c_int,
|
|
486
|
+
) -> c_int {
|
|
487
|
+
if ptr.is_null() {
|
|
488
|
+
return -EINVAL;
|
|
489
|
+
}
|
|
490
|
+
catch_unwind(std::panic::AssertUnwindSafe(|| {
|
|
491
|
+
match (*ptr).submit_recv_multishot(fd) {
|
|
492
|
+
Ok(()) => 0,
|
|
493
|
+
Err(e) => -e,
|
|
494
|
+
}
|
|
495
|
+
}))
|
|
496
|
+
.unwrap_or(-EINVAL)
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
/// Post a `Writev` SQE for `fd`.
|
|
500
|
+
///
|
|
501
|
+
/// `iov_ptr` must point to `iov_count` valid `iovec` entries (each
|
|
502
|
+
/// entry is `{ base: *mut u8, len: usize }` — the layout of POSIX
|
|
503
|
+
/// `iovec`) and must remain valid until the matching send CQE arrives.
|
|
504
|
+
/// Returns 0 on success or a negative errno on failure.
|
|
505
|
+
///
|
|
506
|
+
/// The argument is typed as `*const u8` (rather than `*const libc::iovec`)
|
|
507
|
+
/// so this extern "C" declaration compiles on all platforms; the Linux
|
|
508
|
+
/// impl casts it to the correct `*const libc::iovec` type internally.
|
|
509
|
+
#[no_mangle]
|
|
510
|
+
pub unsafe extern "C" fn hyperion_io_uring_hotpath_submit_send(
|
|
511
|
+
ptr: *mut HotpathRing,
|
|
512
|
+
fd: c_int,
|
|
513
|
+
iov_ptr: *const u8,
|
|
514
|
+
iov_count: u32,
|
|
515
|
+
) -> c_int {
|
|
516
|
+
if ptr.is_null() || iov_ptr.is_null() {
|
|
517
|
+
return -EINVAL;
|
|
518
|
+
}
|
|
519
|
+
#[cfg(target_os = "linux")]
|
|
520
|
+
{
|
|
521
|
+
catch_unwind(std::panic::AssertUnwindSafe(|| {
|
|
522
|
+
// SAFETY: caller guarantees `iov_ptr` points to `iov_count`
|
|
523
|
+
// valid `libc::iovec` entries with the same layout.
|
|
524
|
+
match (*ptr).submit_send(fd, iov_ptr as *const libc::iovec, iov_count) {
|
|
525
|
+
Ok(()) => 0,
|
|
526
|
+
Err(e) => -e,
|
|
527
|
+
}
|
|
528
|
+
}))
|
|
529
|
+
.unwrap_or(-EINVAL)
|
|
530
|
+
}
|
|
531
|
+
#[cfg(not(target_os = "linux"))]
|
|
532
|
+
{
|
|
533
|
+
let _ = (ptr, fd, iov_ptr, iov_count);
|
|
534
|
+
-38 // -ENOSYS
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
/// Submit pending SQEs and wait for at least `min_complete` CQEs.
|
|
539
|
+
/// Writes up to `out_cap` `Completion` structs into `out`.
|
|
540
|
+
///
|
|
541
|
+
/// Returns the number of completions written, or `-1` on ring failure
|
|
542
|
+
/// (ring is marked unhealthy after this).
|
|
543
|
+
///
|
|
544
|
+
/// `out` must point to a buffer of at least `out_cap * 24` bytes
|
|
545
|
+
/// (24 = `size_of::<Completion>()` on 64-bit).
|
|
546
|
+
#[no_mangle]
|
|
547
|
+
pub unsafe extern "C" fn hyperion_io_uring_hotpath_wait_completions(
|
|
548
|
+
ptr: *mut HotpathRing,
|
|
549
|
+
min_complete: u32,
|
|
550
|
+
timeout_ms: u32,
|
|
551
|
+
out: *mut Completion,
|
|
552
|
+
out_cap: u32,
|
|
553
|
+
) -> c_int {
|
|
554
|
+
if ptr.is_null() || (out_cap > 0 && out.is_null()) {
|
|
555
|
+
return -EINVAL;
|
|
556
|
+
}
|
|
557
|
+
catch_unwind(std::panic::AssertUnwindSafe(|| {
|
|
558
|
+
(*ptr).wait_completions(min_complete, timeout_ms, out, out_cap)
|
|
559
|
+
}))
|
|
560
|
+
.unwrap_or(-EINVAL)
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
/// Release `buf_id` back to the kernel's buffer pool.
|
|
564
|
+
/// Must be called once per recv CQE whose `buf_id >= 0`.
|
|
565
|
+
#[no_mangle]
|
|
566
|
+
pub unsafe extern "C" fn hyperion_io_uring_hotpath_release_buffer(
|
|
567
|
+
ptr: *mut HotpathRing,
|
|
568
|
+
buf_id: u16,
|
|
569
|
+
) {
|
|
570
|
+
let _ = catch_unwind(std::panic::AssertUnwindSafe(|| {
|
|
571
|
+
if !ptr.is_null() {
|
|
572
|
+
(*ptr).release_buffer(buf_id);
|
|
573
|
+
}
|
|
574
|
+
}));
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
/// Force `is_healthy` to return false.
|
|
578
|
+
#[no_mangle]
|
|
579
|
+
pub unsafe extern "C" fn hyperion_io_uring_hotpath_force_unhealthy(
|
|
580
|
+
ptr: *mut HotpathRing,
|
|
581
|
+
) {
|
|
582
|
+
let _ = catch_unwind(std::panic::AssertUnwindSafe(|| {
|
|
583
|
+
if !ptr.is_null() {
|
|
584
|
+
(*ptr).force_unhealthy();
|
|
585
|
+
}
|
|
586
|
+
}));
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
/// Copy `len` bytes from buffer `buf_id` into the caller-supplied output
|
|
590
|
+
/// buffer `out_ptr` (capacity `out_cap` bytes). Returns the number of
|
|
591
|
+
/// bytes written on success, or a negative errno on failure.
|
|
592
|
+
///
|
|
593
|
+
/// This is the "one-copy" recv-data path for Task 2.3.4: the kernel
|
|
594
|
+
/// has filled the buffer-ring slot; we copy into Ruby's string buffer,
|
|
595
|
+
/// then the caller calls `release_buffer` so the kernel can reuse the
|
|
596
|
+
/// slot. The copy is one `memcpy`-equivalent via `ptr::copy_nonoverlapping`.
|
|
597
|
+
///
|
|
598
|
+
/// Returns -22 (`-EINVAL`) on null/invalid arguments.
|
|
599
|
+
#[no_mangle]
|
|
600
|
+
pub unsafe extern "C" fn hyperion_io_uring_hotpath_copy_buffer(
|
|
601
|
+
ptr: *mut HotpathRing,
|
|
602
|
+
buf_id: u16,
|
|
603
|
+
len: u32,
|
|
604
|
+
out_ptr: *mut u8,
|
|
605
|
+
out_cap: u32,
|
|
606
|
+
) -> c_int {
|
|
607
|
+
if ptr.is_null() || out_ptr.is_null() {
|
|
608
|
+
return -EINVAL;
|
|
609
|
+
}
|
|
610
|
+
catch_unwind(std::panic::AssertUnwindSafe(|| {
|
|
611
|
+
if len > out_cap {
|
|
612
|
+
return -EINVAL;
|
|
613
|
+
}
|
|
614
|
+
// SAFETY: buf_id is valid (caller's responsibility per BufferRing::borrow
|
|
615
|
+
// contract — buf_id came from a CQE on this ring). len <= buf_size
|
|
616
|
+
// is guaranteed by the kernel (recv never writes more than buf_size).
|
|
617
|
+
// out_ptr is valid for out_cap >= len bytes (caller contract).
|
|
618
|
+
#[cfg(target_os = "linux")]
|
|
619
|
+
{
|
|
620
|
+
let view = (*ptr).buffer_ring.borrow(buf_id, len as usize);
|
|
621
|
+
std::ptr::copy_nonoverlapping(view.as_ptr(), out_ptr, len as usize);
|
|
622
|
+
len as c_int
|
|
623
|
+
}
|
|
624
|
+
#[cfg(not(target_os = "linux"))]
|
|
625
|
+
{
|
|
626
|
+
let _ = (buf_id, len, out_ptr, out_cap);
|
|
627
|
+
-38 // -ENOSYS
|
|
628
|
+
}
|
|
629
|
+
}))
|
|
630
|
+
.unwrap_or(-EINVAL)
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
/// Returns 1 if the ring is healthy, 0 otherwise.
|
|
634
|
+
#[no_mangle]
|
|
635
|
+
pub unsafe extern "C" fn hyperion_io_uring_hotpath_is_healthy(
|
|
636
|
+
ptr: *mut HotpathRing,
|
|
637
|
+
) -> c_int {
|
|
638
|
+
if ptr.is_null() {
|
|
639
|
+
return 0;
|
|
640
|
+
}
|
|
641
|
+
catch_unwind(std::panic::AssertUnwindSafe(|| {
|
|
642
|
+
if (*ptr).is_healthy() { 1 } else { 0 }
|
|
643
|
+
}))
|
|
644
|
+
.unwrap_or(0)
|
|
645
|
+
}
|
|
@@ -47,9 +47,13 @@
|
|
|
47
47
|
|
|
48
48
|
#![allow(clippy::missing_safety_doc)]
|
|
49
49
|
|
|
50
|
+
mod buffer_ring;
|
|
51
|
+
pub mod hotpath;
|
|
52
|
+
pub use hotpath::Completion as HotpathCompletion;
|
|
53
|
+
|
|
50
54
|
use std::os::raw::{c_int, c_uchar, c_uint};
|
|
51
55
|
|
|
52
|
-
const ABI_VERSION: u32 =
|
|
56
|
+
const ABI_VERSION: u32 = 2;
|
|
53
57
|
|
|
54
58
|
// ---------- ABI version + probe ----------
|
|
55
59
|
|