hyperion-rb 2.16.3 → 2.16.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +75 -0
- data/ext/hyperion_http/extconf.rb +9 -0
- data/ext/hyperion_http/parser.c +93 -21
- data/ext/hyperion_http/response_writer.c +604 -0
- data/ext/hyperion_http/response_writer.h +28 -0
- data/ext/hyperion_io_uring/Cargo.lock +1 -1
- data/ext/hyperion_io_uring/Cargo.toml +1 -1
- data/ext/hyperion_io_uring/src/buffer_ring.rs +319 -0
- data/ext/hyperion_io_uring/src/hotpath.rs +645 -0
- data/ext/hyperion_io_uring/src/lib.rs +5 -1
- data/lib/hyperion/cli.rb +23 -0
- data/lib/hyperion/config.rb +9 -0
- data/lib/hyperion/connection.rb +209 -1
- data/lib/hyperion/http/response_writer.rb +46 -0
- data/lib/hyperion/io_uring.rb +270 -5
- data/lib/hyperion/response_writer.rb +91 -1
- data/lib/hyperion/server.rb +200 -4
- data/lib/hyperion/version.rb +1 -1
- data/lib/hyperion.rb +1 -0
- metadata +6 -1
|
@@ -0,0 +1,319 @@
|
|
|
1
|
+
//! `IORING_REGISTER_PBUF_RING` (Linux 5.19+) — kernel-managed receive
|
|
2
|
+
//! buffer pool. The ring registers N buffers of M bytes each; the
|
|
3
|
+
//! kernel hands back a buffer-id in each recv CQE. Caller borrows the
|
|
4
|
+
//! buffer (zero-copy view), consumes the bytes, then `release`s the
|
|
5
|
+
//! buffer-id back to the kernel so it can be refilled.
|
|
6
|
+
//!
|
|
7
|
+
//! Plan #2 (io_uring hot-path roadmap), Task 2.1.2. Linux-only;
|
|
8
|
+
//! non-Linux builds see `stub_impl` which always returns ENOSYS from
|
|
9
|
+
//! `new()` so the caller can fall through to the accept4 path cleanly.
|
|
10
|
+
//!
|
|
11
|
+
//! ## Memory layout
|
|
12
|
+
//!
|
|
13
|
+
//! PBUF_RING uses a single contiguous, **page-aligned** memory region as
|
|
14
|
+
//! a producer/consumer ring of `io_uring_buf` entries (16 bytes each:
|
|
15
|
+
//! `addr:u64 | len:u32 | bid:u16 | resv:u16`). The kernel treats
|
|
16
|
+
//! `ring[0].resv` as the tail counter it polls to discover newly-released
|
|
17
|
+
//! buffers; userspace increments it (with Release ordering) after writing
|
|
18
|
+
//! the entry's addr/len/bid. The actual receive data lands in a
|
|
19
|
+
//! separate `backing` allocation whose slices are pointed to by the ring
|
|
20
|
+
//! entries.
|
|
21
|
+
//!
|
|
22
|
+
//! The ring and backing allocations are kept alive by this struct.
|
|
23
|
+
//! The kernel's registration holds a reference to the *ring* memory; if
|
|
24
|
+
//! the IoUring is dropped before this BufferRing, the registration
|
|
25
|
+
//! becomes stale — the caller (Task 2.1.3's `HotpathRing`) is
|
|
26
|
+
//! responsible for drop ordering.
|
|
27
|
+
|
|
28
|
+
#[cfg(target_os = "linux")]
|
|
29
|
+
mod linux_impl {
|
|
30
|
+
use io_uring::{types::BufRingEntry, IoUring, squeue, cqueue};
|
|
31
|
+
use std::alloc::{alloc_zeroed, dealloc, Layout};
|
|
32
|
+
use std::sync::atomic::{AtomicU16, Ordering};
|
|
33
|
+
|
|
34
|
+
/// Kernel-managed receive buffer pool for one io_uring instance.
|
|
35
|
+
///
|
|
36
|
+
/// `group_id` (`bgid`) identifies the pool; recv SQEs reference it so
|
|
37
|
+
/// the kernel knows which pool to pull a buffer from and return the
|
|
38
|
+
/// buffer-id in `cqe.flags >> IORING_CQE_BUFFER_SHIFT`.
|
|
39
|
+
pub struct BufferRing {
|
|
40
|
+
/// Buffer group id passed to recv SQEs and to `register_buf_ring`.
|
|
41
|
+
pub group_id: u16,
|
|
42
|
+
/// Number of buffers in the ring. Must be a power of two; the
|
|
43
|
+
/// kernel enforces `ring_entries <= 32768`.
|
|
44
|
+
pub n_bufs: u16,
|
|
45
|
+
/// Size of each individual receive buffer in bytes.
|
|
46
|
+
pub buf_size: u32,
|
|
47
|
+
|
|
48
|
+
/// Page-aligned ring memory: N `BufRingEntry` (16 bytes each).
|
|
49
|
+
/// The kernel reads from this to discover available buffers.
|
|
50
|
+
/// Must stay pinned until the ring is unregistered.
|
|
51
|
+
ring_ptr: *mut BufRingEntry,
|
|
52
|
+
ring_layout: Layout,
|
|
53
|
+
|
|
54
|
+
/// Backing storage for the actual receive data. Slice `buf_id`
|
|
55
|
+
/// starts at `buf_id as usize * buf_size as usize`.
|
|
56
|
+
backing_ptr: *mut u8,
|
|
57
|
+
backing_layout: Layout,
|
|
58
|
+
|
|
59
|
+
/// Shadow of the tail counter. The authoritative tail lives at
|
|
60
|
+
/// `ring[0].resv` — this mirror lets `release` compute the slot
|
|
61
|
+
/// index without re-reading the (volatile) kernel-shared field.
|
|
62
|
+
/// AtomicU16 for forward-compatibility with a future SQPOLL path
|
|
63
|
+
/// that might race; under the GVL today a Cell<u16> would suffice.
|
|
64
|
+
tail: AtomicU16,
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// SAFETY: BufferRing owns its raw allocations and the ring_ptr /
|
|
68
|
+
// backing_ptr are not shared across threads (one ring per worker
|
|
69
|
+
// process; the GVL is held during every call into this struct).
|
|
70
|
+
unsafe impl Send for BufferRing {}
|
|
71
|
+
|
|
72
|
+
impl BufferRing {
|
|
73
|
+
/// Allocate the ring and backing memory, then register the buffer
|
|
74
|
+
/// ring with the kernel via `IORING_REGISTER_PBUF_RING`.
|
|
75
|
+
///
|
|
76
|
+
/// Returns `Err` with the OS errno on kernel rejection (e.g.
|
|
77
|
+
/// `EINVAL` if `n_bufs` is not a power of two or exceeds 32768,
|
|
78
|
+
/// `ENOSYS` on kernels < 5.19, `EPERM` under seccomp, etc.).
|
|
79
|
+
///
|
|
80
|
+
/// # Panics
|
|
81
|
+
///
|
|
82
|
+
/// Panics if `n_bufs == 0` or `buf_size == 0` (programming error).
|
|
83
|
+
pub fn new(
|
|
84
|
+
ring: &mut IoUring<squeue::Entry, cqueue::Entry>,
|
|
85
|
+
group_id: u16,
|
|
86
|
+
n_bufs: u16,
|
|
87
|
+
buf_size: u32,
|
|
88
|
+
) -> std::io::Result<Self> {
|
|
89
|
+
assert!(n_bufs > 0, "n_bufs must be > 0");
|
|
90
|
+
assert!(buf_size > 0, "buf_size must be > 0");
|
|
91
|
+
|
|
92
|
+
// --- Allocate the page-aligned ring entries ---
|
|
93
|
+
//
|
|
94
|
+
// The kernel requires the ring base address to be page-aligned.
|
|
95
|
+
// Each BufRingEntry is 16 bytes (size_of::<io_uring_buf>()).
|
|
96
|
+
let page_size = unsafe { libc::sysconf(libc::_SC_PAGESIZE) as usize };
|
|
97
|
+
let ring_bytes = (n_bufs as usize) * std::mem::size_of::<BufRingEntry>();
|
|
98
|
+
// Round up to a full page so the allocation is page-aligned.
|
|
99
|
+
let ring_alloc_bytes = round_up(ring_bytes, page_size);
|
|
100
|
+
let ring_layout = Layout::from_size_align(ring_alloc_bytes, page_size)
|
|
101
|
+
.map_err(|_| std::io::Error::from_raw_os_error(libc::EINVAL))?;
|
|
102
|
+
|
|
103
|
+
// SAFETY: layout has non-zero size and valid alignment.
|
|
104
|
+
let ring_ptr = unsafe { alloc_zeroed(ring_layout) as *mut BufRingEntry };
|
|
105
|
+
if ring_ptr.is_null() {
|
|
106
|
+
return Err(std::io::Error::from_raw_os_error(libc::ENOMEM));
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// --- Allocate the backing receive buffers ---
|
|
110
|
+
let backing_bytes = (n_bufs as usize) * (buf_size as usize);
|
|
111
|
+
// 64-byte alignment keeps each buffer on a cache line boundary.
|
|
112
|
+
let backing_layout = Layout::from_size_align(backing_bytes, 64)
|
|
113
|
+
.map_err(|_| {
|
|
114
|
+
unsafe { dealloc(ring_ptr as *mut u8, ring_layout) };
|
|
115
|
+
std::io::Error::from_raw_os_error(libc::EINVAL)
|
|
116
|
+
})?;
|
|
117
|
+
let backing_ptr = unsafe { alloc_zeroed(backing_layout) };
|
|
118
|
+
if backing_ptr.is_null() {
|
|
119
|
+
unsafe { dealloc(ring_ptr as *mut u8, ring_layout) };
|
|
120
|
+
return Err(std::io::Error::from_raw_os_error(libc::ENOMEM));
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// --- Populate the ring entries before registration ---
|
|
124
|
+
//
|
|
125
|
+
// We must fill addr/len/bid for all N slots and set the initial
|
|
126
|
+
// tail (in ring[0].resv) to N so the kernel sees all buffers as
|
|
127
|
+
// available immediately after registration.
|
|
128
|
+
for i in 0..n_bufs {
|
|
129
|
+
let buf_offset = (i as usize) * (buf_size as usize);
|
|
130
|
+
// SAFETY: ring_ptr is valid for n_bufs entries.
|
|
131
|
+
let entry = unsafe { &mut *ring_ptr.add(i as usize) };
|
|
132
|
+
entry.set_addr(unsafe { backing_ptr.add(buf_offset) } as u64);
|
|
133
|
+
entry.set_len(buf_size);
|
|
134
|
+
entry.set_bid(i);
|
|
135
|
+
}
|
|
136
|
+
// Write the initial tail into ring[0].resv. The kernel begins
|
|
137
|
+
// reading from tail=0, so setting tail=n_bufs makes all N
|
|
138
|
+
// buffers available (the ring wraps modulo n_bufs).
|
|
139
|
+
// SAFETY: ring_ptr is valid; BufRingEntry::tail returns a pointer
|
|
140
|
+
// into the first entry's resv field.
|
|
141
|
+
unsafe {
|
|
142
|
+
let tail_ptr = BufRingEntry::tail(ring_ptr) as *mut u16;
|
|
143
|
+
tail_ptr.write_volatile(n_bufs);
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// --- Register with the kernel ---
|
|
147
|
+
//
|
|
148
|
+
// io-uring 0.6.4: `Submitter::register_buf_ring(ring_addr, ring_entries, bgid)`.
|
|
149
|
+
// The kernel holds the registration until `unregister_buf_ring` or ring close.
|
|
150
|
+
unsafe {
|
|
151
|
+
ring.submitter()
|
|
152
|
+
.register_buf_ring(ring_ptr as u64, n_bufs, group_id)
|
|
153
|
+
.map_err(|e| {
|
|
154
|
+
// Free allocations on registration failure.
|
|
155
|
+
dealloc(backing_ptr, backing_layout);
|
|
156
|
+
dealloc(ring_ptr as *mut u8, ring_layout);
|
|
157
|
+
e
|
|
158
|
+
})?;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
Ok(BufferRing {
|
|
162
|
+
group_id,
|
|
163
|
+
n_bufs,
|
|
164
|
+
buf_size,
|
|
165
|
+
ring_ptr,
|
|
166
|
+
ring_layout,
|
|
167
|
+
backing_ptr,
|
|
168
|
+
backing_layout,
|
|
169
|
+
// Mirror of the tail we just wrote.
|
|
170
|
+
tail: AtomicU16::new(n_bufs),
|
|
171
|
+
})
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
/// Borrow a read-only view into the kernel-filled buffer `buf_id`.
|
|
175
|
+
///
|
|
176
|
+
/// The slice is valid until the next `release(buf_id)` call — the
|
|
177
|
+
/// kernel may overwrite the memory the moment the buffer is released.
|
|
178
|
+
/// Callers **must not** hold the slice across fiber yield points or
|
|
179
|
+
/// after calling `release`.
|
|
180
|
+
///
|
|
181
|
+
/// # Safety
|
|
182
|
+
///
|
|
183
|
+
/// - `buf_id` must be a valid id returned by a recv CQE on this ring.
|
|
184
|
+
/// - `len` must be `<= buf_size` (the kernel writes at most `buf_size`
|
|
185
|
+
/// bytes).
|
|
186
|
+
pub unsafe fn borrow(&self, buf_id: u16, len: usize) -> &[u8] {
|
|
187
|
+
debug_assert!((buf_id as usize) < (self.n_bufs as usize));
|
|
188
|
+
debug_assert!(len <= self.buf_size as usize);
|
|
189
|
+
let offset = (buf_id as usize) * (self.buf_size as usize);
|
|
190
|
+
// SAFETY: backing_ptr is valid for the full backing allocation;
|
|
191
|
+
// offset is within range by the invariants above.
|
|
192
|
+
std::slice::from_raw_parts(self.backing_ptr.add(offset), len)
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
/// Release `buf_id` back to the kernel.
|
|
196
|
+
///
|
|
197
|
+
/// Re-writes the ring entry (addr/len/bid) and increments the tail
|
|
198
|
+
/// counter. No syscall is required — the kernel polls the tail in
|
|
199
|
+
/// shared memory.
|
|
200
|
+
pub fn release(&self, buf_id: u16) {
|
|
201
|
+
// Shadow tail is purely local state under the GVL; Relaxed is
|
|
202
|
+
// sufficient. The cross-domain ordering with the kernel is
|
|
203
|
+
// enforced by the explicit Release fence below before the
|
|
204
|
+
// tail-pointer store.
|
|
205
|
+
let shadow_tail = self.tail.fetch_add(1, Ordering::Relaxed);
|
|
206
|
+
let slot = (shadow_tail as usize) & (self.n_bufs as usize - 1);
|
|
207
|
+
|
|
208
|
+
// Re-publish the buffer at the slot.
|
|
209
|
+
let buf_offset = (buf_id as usize) * (self.buf_size as usize);
|
|
210
|
+
// SAFETY: ring_ptr is valid; slot < n_bufs by the mask above.
|
|
211
|
+
unsafe {
|
|
212
|
+
let entry = &mut *self.ring_ptr.add(slot);
|
|
213
|
+
entry.set_addr(self.backing_ptr.add(buf_offset) as u64);
|
|
214
|
+
entry.set_len(self.buf_size);
|
|
215
|
+
entry.set_bid(buf_id);
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
// Store-Release barrier: the slot writes above MUST be visible
|
|
219
|
+
// to the kernel before the tail increment is. write_volatile
|
|
220
|
+
// alone is not a barrier on ARM (DMB ST is needed); on x86 TSO
|
|
221
|
+
// makes this redundant but the fence is free there. Without
|
|
222
|
+
// this fence, ARM kernels could observe the tail increment
|
|
223
|
+
// before the slot writes and pick up stale buffer pointers.
|
|
224
|
+
// Mirrors liburing's io_uring_buf_ring_advance which uses
|
|
225
|
+
// smp_store_release on the tail.
|
|
226
|
+
std::sync::atomic::fence(Ordering::Release);
|
|
227
|
+
// SAFETY: ring_ptr is valid; tail() points to ring[0].resv.
|
|
228
|
+
unsafe {
|
|
229
|
+
let tail_ptr = BufRingEntry::tail(self.ring_ptr) as *mut u16;
|
|
230
|
+
// wrapping_add handles u16 overflow correctly (the kernel
|
|
231
|
+
// also uses wrapping arithmetic on this counter).
|
|
232
|
+
tail_ptr.write_volatile(shadow_tail.wrapping_add(1));
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
/// Accessors for callers that need read-only metadata.
|
|
237
|
+
pub fn group_id(&self) -> u16 { self.group_id }
|
|
238
|
+
pub fn n_bufs(&self) -> u16 { self.n_bufs }
|
|
239
|
+
pub fn buf_size(&self) -> u32 { self.buf_size }
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
impl Drop for BufferRing {
|
|
243
|
+
fn drop(&mut self) {
|
|
244
|
+
// CRITICAL CONTRACT for HotpathRing (Task 2.1.3):
|
|
245
|
+
//
|
|
246
|
+
// Before this Drop runs, the owner MUST have called
|
|
247
|
+
// `ring.submitter().unregister_buf_ring(self.group_id)` on the
|
|
248
|
+
// associated IoUring, OR have dropped the IoUring (which closes
|
|
249
|
+
// the ring fd and tears down the registration kernel-side).
|
|
250
|
+
//
|
|
251
|
+
// Otherwise the kernel retains a registration pointing to the
|
|
252
|
+
// memory we are about to free, and the next multishot recv CQE
|
|
253
|
+
// can write into freed userspace memory — a kernel-side
|
|
254
|
+
// use-after-free, NOT a benign leak.
|
|
255
|
+
//
|
|
256
|
+
// HotpathRing's own Drop impl must enforce the order:
|
|
257
|
+
// 1. unregister_buf_ring(group_id)
|
|
258
|
+
// 2. drop(BufferRing) ← this Drop runs here
|
|
259
|
+
// 3. drop(IoUring)
|
|
260
|
+
//
|
|
261
|
+
// SAFETY: backing_ptr / ring_ptr / *_layout are valid; the
|
|
262
|
+
// owner has guaranteed (per contract above) that the kernel
|
|
263
|
+
// is no longer accessing this memory.
|
|
264
|
+
unsafe { dealloc(self.backing_ptr, self.backing_layout) };
|
|
265
|
+
unsafe { dealloc(self.ring_ptr as *mut u8, self.ring_layout) };
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
/// Round `n` up to the nearest multiple of `align` (which must be a
|
|
270
|
+
/// power of two).
|
|
271
|
+
#[inline]
|
|
272
|
+
fn round_up(n: usize, align: usize) -> usize {
|
|
273
|
+
(n + align - 1) & !(align - 1)
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
// ===== Non-Linux stub =====
|
|
278
|
+
//
|
|
279
|
+
// On Darwin / BSD the entire io-uring dep is gated out; we compile a
|
|
280
|
+
// zero-cost stub that always returns ENOSYS from `new()`. The Ruby
|
|
281
|
+
// caller probes with `IOUring.supported?` before reaching this code
|
|
282
|
+
// in practice, but the stub ensures the macOS cdylib links cleanly.
|
|
283
|
+
|
|
284
|
+
#[cfg(not(target_os = "linux"))]
|
|
285
|
+
mod stub_impl {
|
|
286
|
+
/// Non-Linux stub — never instantiated in practice.
|
|
287
|
+
pub struct BufferRing {
|
|
288
|
+
pub group_id: u16,
|
|
289
|
+
pub n_bufs: u16,
|
|
290
|
+
pub buf_size: u32,
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
impl BufferRing {
|
|
294
|
+
pub fn new(
|
|
295
|
+
_ring: &mut (),
|
|
296
|
+
_group_id: u16,
|
|
297
|
+
_n_bufs: u16,
|
|
298
|
+
_buf_size: u32,
|
|
299
|
+
) -> std::io::Result<Self> {
|
|
300
|
+
Err(std::io::Error::from_raw_os_error(38)) // ENOSYS
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
/// SAFETY: never called on non-Linux (new() always errors first).
|
|
304
|
+
pub unsafe fn borrow(&self, _buf_id: u16, _len: usize) -> &[u8] {
|
|
305
|
+
&[]
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
pub fn release(&self, _buf_id: u16) {}
|
|
309
|
+
|
|
310
|
+
pub fn group_id(&self) -> u16 { self.group_id }
|
|
311
|
+
pub fn n_bufs(&self) -> u16 { self.n_bufs }
|
|
312
|
+
pub fn buf_size(&self) -> u32 { self.buf_size }
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
#[cfg(target_os = "linux")]
|
|
317
|
+
pub use linux_impl::BufferRing;
|
|
318
|
+
#[cfg(not(target_os = "linux"))]
|
|
319
|
+
pub use stub_impl::BufferRing;
|