hyperion-rb 2.16.3 → 2.16.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,604 @@
1
+ /* response_writer.c — Hyperion::Http::ResponseWriter
2
+ *
3
+ * Direct-syscall response writer for plain-TCP kernel fds. Bypasses
4
+ * Ruby IO machinery (encoding, fiber-yield checks, GVL release/
5
+ * acquire) on the buffered hot path. TLS / non-fd / page-cache /
6
+ * sendfile callers fall through to the Ruby ResponseWriter at the
7
+ * dispatcher in response_writer.rb. */
8
+
9
+ #include <ruby.h>
10
+ #include <ruby/io.h>
11
+ #include <sys/types.h>
12
+ #include <sys/uio.h>
13
+ #include <sys/socket.h>
14
+ #include <errno.h>
15
+ #include <stdint.h>
16
+ #include <unistd.h>
17
+ #include <string.h>
18
+ #include <dlfcn.h>
19
+
20
+ #include "response_writer.h"
21
+
22
+ /* macOS lacks MSG_NOSIGNAL; fall back to 0 (no flag). Safe in a Ruby
23
+ * process: MRI installs a custom SIGPIPE handler that converts the
24
+ * signal into a soft event and the next IO call returns EPIPE — the
25
+ * process is not killed. Our C sendmsg/writev calls run under the
26
+ * GVL, so the same handler intercepts SIGPIPE for them. */
27
+ #ifndef MSG_NOSIGNAL
28
+ #define MSG_NOSIGNAL 0
29
+ #endif
30
+
31
+ static VALUE rb_mHyperion;
32
+ static VALUE rb_mHttp;
33
+ static VALUE rb_mResponseWriter;
34
+
35
+ /* IDs cached at init time — avoids rb_intern on the hot path. */
36
+ static ID id_fileno;
37
+ static ID id_each;
38
+ static ID id_hyp_flush; /* :__hyperion_flush__ chunked-drain sentinel */
39
+
40
+ /* Plan #2 seam: function-pointer for hyperion_io_uring's send-SQE
41
+ * submission. Resolved lazily on the first call to c_write_buffered_via_ring
42
+ * via dlsym(RTLD_DEFAULT, ...). NULL when the io_uring crate isn't loaded
43
+ * yet — the via-ring path short-circuits to direct write in that case.
44
+ *
45
+ * Order-of-loading note: Init_hyperion_response_writer runs when
46
+ * hyperion_http.bundle is required (early boot, before io_uring.rb loads
47
+ * the io_uring cdylib). Doing the dlsym here would always return NULL.
48
+ * Instead we re-try on the first call so the symbol is found AFTER
49
+ * lib/hyperion/io_uring.rb has called Fiddle.dlopen on the cdylib. */
50
+ static int (*hyp_submit_send_fn)(void *, int, const void *, unsigned int) = NULL;
51
+
52
+ /* Pre-baked frozen Ruby Strings for the 23 common reason phrases.
53
+ * Built once at init; looked up by status code in c_write_buffered.
54
+ * Eliminates the per-request rb_str_new_cstr allocation that would
55
+ * otherwise fire on every response. Statuses outside the table fall
56
+ * back to a per-call rb_str_new_cstr("Unknown"). */
57
+ #define HYP_REASON_TABLE_SIZE 23
58
+ static int k_reason_statuses[HYP_REASON_TABLE_SIZE] = {
59
+ 200, 201, 204, 301, 302, 304, 400, 401, 403, 404, 405, 408,
60
+ 409, 410, 413, 414, 422, 429, 500, 501, 502, 503, 504
61
+ };
62
+ static VALUE k_reason_strings[HYP_REASON_TABLE_SIZE];
63
+ static VALUE k_reason_unknown;
64
+
65
+ static VALUE c_response_writer_available_p(VALUE self) {
66
+ (void)self;
67
+ return Qtrue;
68
+ }
69
+
70
+ /* Sentinel returned to Ruby on EAGAIN — the dispatcher sees this and
71
+ * falls back to io.write (which yields under Async / blocks under
72
+ * threadpool correctly). We don't reimplement scheduler-aware parking
73
+ * in C. */
74
+ #define HYP_C_WRITE_WOULDBLOCK -2
75
+
76
+ /* Maximum iov entries we build on the stack: 1 (head) + up to
77
+ * HYP_C_IOV_MAX-1 body chunks. Cap at 8 so a pathological 100-element
78
+ * Array body coalesces into one buffer rather than blowing the stack.
79
+ * Normal Rack apps emit Array[1] bodies; Array[2..7] is the uncommon
80
+ * multi-part case; Array[8+] coalesces. */
81
+ #define HYP_C_IOV_MAX 8
82
+
83
+ /* Look up the cached reason String for `status`. Returns a frozen
84
+ * Ruby String for the 23 common statuses (zero allocation), or
85
+ * k_reason_unknown ("Unknown") for anything else. */
86
+ static inline VALUE hyp_lookup_reason(int status) {
87
+ for (int i = 0; i < HYP_REASON_TABLE_SIZE; i++) {
88
+ if (k_reason_statuses[i] == status) return k_reason_strings[i];
89
+ }
90
+ return k_reason_unknown;
91
+ }
92
+
93
+ /* Issue one sendmsg/writev with `iov_count` iovecs. Returns total
94
+ * bytes written, HYP_C_WRITE_WOULDBLOCK on EAGAIN/EWOULDBLOCK, or
95
+ * raises on hard errors. Handles short writes (rare on a non-blocking
96
+ * socket with room in the kernel send buffer) by advancing the iov
97
+ * and looping. EINTR retried up to 3 times. */
98
+ static ssize_t hyp_writev_all(int fd, struct iovec *iov, int iov_count) {
99
+ ssize_t total = 0;
100
+ int retries = 0;
101
+
102
+ for (;;) {
103
+ #ifdef HAVE_SENDMSG
104
+ struct msghdr msg;
105
+ memset(&msg, 0, sizeof(msg));
106
+ msg.msg_iov = iov;
107
+ msg.msg_iovlen = (int)iov_count;
108
+ ssize_t n = sendmsg(fd, &msg, MSG_NOSIGNAL);
109
+ #else
110
+ ssize_t n = writev(fd, iov, iov_count);
111
+ #endif
112
+ if (n >= 0) {
113
+ total += n;
114
+ /* Compute remaining bytes across all iov slots. */
115
+ ssize_t remaining = 0;
116
+ for (int i = 0; i < iov_count; i++)
117
+ remaining += (ssize_t)iov[i].iov_len;
118
+ if (n == remaining) return total;
119
+
120
+ /* Short write — advance iov past the bytes already sent. */
121
+ ssize_t skipped = 0;
122
+ int i = 0;
123
+ while (i < iov_count &&
124
+ skipped + (ssize_t)iov[i].iov_len <= n) {
125
+ skipped += (ssize_t)iov[i].iov_len;
126
+ i++;
127
+ }
128
+ if (i < iov_count) {
129
+ iov[i].iov_base =
130
+ (char *)iov[i].iov_base + (n - skipped);
131
+ iov[i].iov_len -= (size_t)(n - skipped);
132
+ }
133
+ iov += i;
134
+ iov_count -= i;
135
+ continue;
136
+ }
137
+
138
+ if (errno == EINTR) {
139
+ if (++retries > 3)
140
+ rb_sys_fail("sendmsg/writev: EINTR retries exhausted");
141
+ continue;
142
+ }
143
+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
144
+ return HYP_C_WRITE_WOULDBLOCK;
145
+ }
146
+ rb_sys_fail("sendmsg/writev failed");
147
+ }
148
+ }
149
+
150
+ /* Hyperion::Http::ResponseWriter.c_write_buffered(io, status, headers,
151
+ * body, keep_alive,
152
+ * date_str) -> Integer
153
+ *
154
+ * Writes a complete HTTP/1.1 response (head + body) to the kernel fd
155
+ * underlying `io` in a single sendmsg/writev call. Validates header
156
+ * values for CR/LF injection and body chunks for type safety before
157
+ * issuing the syscall.
158
+ *
159
+ * Returns total bytes written on success.
160
+ * Returns HYP_C_WRITE_WOULDBLOCK (-2) on EAGAIN — caller falls back
161
+ * to io.write (which parks the fiber / blocks the thread correctly).
162
+ * Raises rb_eArgError on CR/LF in header values.
163
+ * Raises rb_eTypeError on non-String body chunks.
164
+ * Raises SystemCallError on hard write failures. */
165
+ static VALUE c_write_buffered(VALUE self, VALUE io, VALUE rb_status,
166
+ VALUE rb_headers, VALUE rb_body,
167
+ VALUE rb_keep_alive, VALUE rb_date) {
168
+ (void)self;
169
+
170
+ /* 1. Type checks up front — fail fast on bad shapes before any
171
+ * syscall. Header CR/LF validation and value coercion happen
172
+ * inside cbuild_response_head (build_head_each), so we don't
173
+ * duplicate them here. */
174
+ Check_Type(rb_headers, T_HASH);
175
+ Check_Type(rb_body, T_ARRAY);
176
+
177
+ /* 2. Resolve fd from the Ruby IO object. rb_funcall can GC; do it
178
+ * before we take any raw C pointers into Ruby objects. */
179
+ int fd = NUM2INT(rb_funcall(io, id_fileno, 0));
180
+
181
+ /* 3. Body type check and byte-size sum.
182
+ * RARRAY_AREF is safe while rb_body is live on the C stack. */
183
+ long body_size = 0;
184
+ long body_len = RARRAY_LEN(rb_body);
185
+ for (long i = 0; i < body_len; i++) {
186
+ VALUE chunk = RARRAY_AREF(rb_body, i);
187
+ Check_Type(chunk, T_STRING);
188
+ body_size += RSTRING_LEN(chunk);
189
+ }
190
+
191
+ /* 4. Build the response head.
192
+ * hyperion_build_response_head lives in parser.c and is exported
193
+ * via response_writer.h. The reason String comes from a pre-baked
194
+ * frozen-String table — zero allocation for the 23 common statuses;
195
+ * only unknown statuses fall back to k_reason_unknown.
196
+ * cbuild_response_head's build_head_each performs the CR/LF guard
197
+ * and rb_obj_as_string coercion on header values, matching the
198
+ * Ruby fallback's semantics exactly. */
199
+ int status = NUM2INT(rb_status);
200
+ VALUE rb_reason = hyp_lookup_reason(status);
201
+ VALUE head = hyperion_build_response_head(
202
+ rb_status, rb_reason, rb_headers,
203
+ LL2NUM(body_size), rb_keep_alive, rb_date
204
+ );
205
+
206
+ /* 5. Assemble iovec: slot 0 = response head; slots 1..N = body chunks
207
+ * (capped at HYP_C_IOV_MAX-1). Bodies longer than HYP_C_IOV_MAX-1
208
+ * chunks are coalesced into a single buffer allocated here. */
209
+ struct iovec iov[HYP_C_IOV_MAX];
210
+ iov[0].iov_base = RSTRING_PTR(head);
211
+ iov[0].iov_len = (size_t)RSTRING_LEN(head);
212
+ int iov_count = 1;
213
+
214
+ /* Hold a reference so GC can't reap the coalesced buffer before
215
+ * the syscall completes. Qnil means "not used". */
216
+ VALUE coalesced = Qnil;
217
+
218
+ if (body_len <= (long)(HYP_C_IOV_MAX - 1)) {
219
+ /* Fast path: each chunk gets its own iov slot. The Array `rb_body`
220
+ * is a GC root that pins all its elements for our call duration. */
221
+ for (long i = 0; i < body_len; i++) {
222
+ VALUE chunk = RARRAY_AREF(rb_body, i);
223
+ iov[iov_count].iov_base = RSTRING_PTR(chunk);
224
+ iov[iov_count].iov_len = (size_t)RSTRING_LEN(chunk);
225
+ iov_count++;
226
+ }
227
+ } else {
228
+ /* Slow path: coalesce into one buffer to keep iov_count bounded.
229
+ * This branch fires only for Array bodies with >= 8 chunks — rare
230
+ * in practice. We accept the one-time allocation. */
231
+ coalesced = rb_str_buf_new(body_size);
232
+ for (long i = 0; i < body_len; i++)
233
+ rb_str_buf_append(coalesced, RARRAY_AREF(rb_body, i));
234
+ iov[1].iov_base = RSTRING_PTR(coalesced);
235
+ iov[1].iov_len = (size_t)RSTRING_LEN(coalesced);
236
+ iov_count = 2;
237
+ }
238
+
239
+ ssize_t n = hyp_writev_all(fd, iov, iov_count);
240
+
241
+ /* GC-safety: keep `head` and `coalesced` (when used) alive across
242
+ * the syscall. -O2 can elide local Ruby Strings whose only use is
243
+ * the RSTRING_PTR at iov assembly; MRI's conservative GC stack
244
+ * scan would then miss them. RB_GC_GUARD is the project-standard
245
+ * idiom (parser.c uses it 9 times for the same pattern). */
246
+ RB_GC_GUARD(head);
247
+ RB_GC_GUARD(coalesced);
248
+
249
+ if (n == HYP_C_WRITE_WOULDBLOCK) return INT2NUM(HYP_C_WRITE_WOULDBLOCK);
250
+ return SSIZET2NUM(n);
251
+ }
252
+
253
+ /* -----------------------------------------------------------------------
254
+ * c_write_chunked — chunked Transfer-Encoding response writer
255
+ * ----------------------------------------------------------------------- */
256
+
257
+ /* Per-call chunked state passed through rb_block_call. */
258
+ struct hyp_chunked_state {
259
+ int fd;
260
+ unsigned char buf[4096]; /* coalesce buffer; 4 KiB matches
261
+ * ResponseWriter::COALESCE_FLUSH_BYTES
262
+ * (response_writer.rb:19). */
263
+ size_t buf_used;
264
+ size_t bytes_written;
265
+ };
266
+
267
+ static const char HYP_HEX[16] = {
268
+ '0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'
269
+ };
270
+
271
+ /* Format `n` as lowercase hex (no 0x prefix). Returns bytes written.
272
+ * Handwritten so we don't pay snprintf cost per chunk; mirrors the
273
+ * u64_to_dec helper in c_access_line.c. */
274
+ static size_t hyp_u64_to_hex(unsigned char *dst, uint64_t n) {
275
+ if (n == 0) { dst[0] = '0'; return 1; }
276
+ unsigned char tmp[16];
277
+ int i = 0;
278
+ while (n > 0) { tmp[i++] = (unsigned char)HYP_HEX[n & 0xf]; n >>= 4; }
279
+ for (int j = 0; j < i; j++) dst[j] = tmp[i - 1 - j];
280
+ return (size_t)i;
281
+ }
282
+
283
+ /* Drain the coalesce buffer to the wire as a single syscall.
284
+ * Updates bytes_written and resets buf_used. Raises Errno::EAGAIN on
285
+ * mid-body backpressure: once any chunked bytes are on the wire, a
286
+ * partial flush would corrupt the chunked encoding for the peer
287
+ * (the next coalesce-and-drain would inject framing in the wrong
288
+ * place). The dispatcher (Task 6) catches the exception and tears
289
+ * the connection down — this matches "WOULDBLOCK is degenerate
290
+ * mid-body" from the spec. The pre-body WOULDBLOCK case is handled
291
+ * separately by the head-emit path in c_write_chunked. */
292
+ static void hyp_chunked_drain(struct hyp_chunked_state *st) {
293
+ if (st->buf_used == 0) return;
294
+ struct iovec iov[1];
295
+ iov[0].iov_base = st->buf;
296
+ iov[0].iov_len = st->buf_used;
297
+ ssize_t n = hyp_writev_all(st->fd, iov, 1);
298
+ if (n == HYP_C_WRITE_WOULDBLOCK) {
299
+ errno = EAGAIN;
300
+ rb_sys_fail("chunked-encoding mid-body backpressure (WOULDBLOCK)");
301
+ }
302
+ st->bytes_written += st->buf_used;
303
+ st->buf_used = 0;
304
+ }
305
+
306
+ /* Append `framed_len` bytes to the coalesce buffer. If they overflow
307
+ * the buffer, drain first; if the bytes themselves exceed 4 KiB,
308
+ * drain and write directly bypassing the coalesce. Mid-body
309
+ * WOULDBLOCK propagates as Errno::EAGAIN via hyp_chunked_drain
310
+ * and rb_sys_fail (see hyp_chunked_drain comment). */
311
+ static void hyp_chunked_append(struct hyp_chunked_state *st,
312
+ const unsigned char *framed,
313
+ size_t framed_len) {
314
+ if (framed_len >= sizeof(st->buf)) {
315
+ /* Big frame: drain anything we've buffered so order is preserved,
316
+ * then write the framed bytes directly with one syscall. */
317
+ hyp_chunked_drain(st);
318
+ struct iovec iov[1] = {{ (void *)framed, framed_len }};
319
+ ssize_t n = hyp_writev_all(st->fd, iov, 1);
320
+ if (n == HYP_C_WRITE_WOULDBLOCK) {
321
+ errno = EAGAIN;
322
+ rb_sys_fail("chunked-encoding mid-body backpressure (WOULDBLOCK)");
323
+ }
324
+ st->bytes_written += framed_len;
325
+ return;
326
+ }
327
+ if (st->buf_used + framed_len > sizeof(st->buf)) hyp_chunked_drain(st);
328
+ memcpy(st->buf + st->buf_used, framed, framed_len);
329
+ st->buf_used += framed_len;
330
+ }
331
+
332
+ /* rb_block_call callback invoked once per `body.each` yield. */
333
+ static VALUE hyp_chunked_callback(RB_BLOCK_CALL_FUNC_ARGLIST(yielded, callback_arg)) {
334
+ struct hyp_chunked_state *st = (struct hyp_chunked_state *)callback_arg;
335
+ VALUE chunk = yielded;
336
+ if (NIL_P(chunk)) return Qnil;
337
+
338
+ /* Flush sentinel: literal symbol :__hyperion_flush__ from
339
+ * response_writer.rb (used by SSE servers to push events past the
340
+ * coalescing latency). id_hyp_flush cached at init. */
341
+ if (SYMBOL_P(chunk) && rb_sym2id(chunk) == id_hyp_flush) {
342
+ hyp_chunked_drain(st);
343
+ return Qnil;
344
+ }
345
+
346
+ Check_Type(chunk, T_STRING);
347
+ size_t payload_len = (size_t)RSTRING_LEN(chunk);
348
+ if (payload_len == 0) return Qnil;
349
+
350
+ /* Frame: <hex-size>\r\n<payload>\r\n. We allocate the framed
351
+ * bytes on the C stack for small chunks. For large chunks the
352
+ * framing wrapping bytes are stack-built; the payload itself
353
+ * lives in the Ruby String and we writev with three iovs. */
354
+ if (payload_len < (sizeof(st->buf) - 32)) {
355
+ /* Stack-frame the chunk so it lands in the coalesce buffer
356
+ * (or drains directly via hyp_chunked_append if oversized). */
357
+ unsigned char framed[4096 + 32];
358
+ size_t hex_n = hyp_u64_to_hex(framed, (uint64_t)payload_len);
359
+ framed[hex_n++] = '\r'; framed[hex_n++] = '\n';
360
+ memcpy(framed + hex_n, RSTRING_PTR(chunk), payload_len);
361
+ hex_n += payload_len;
362
+ framed[hex_n++] = '\r'; framed[hex_n++] = '\n';
363
+ hyp_chunked_append(st, framed, hex_n);
364
+ } else {
365
+ /* Large chunk: drain coalesce, write the size-line + payload +
366
+ * CRLF in one writev (3 iovs). */
367
+ hyp_chunked_drain(st);
368
+ unsigned char hex_buf[18];
369
+ size_t hex_n = hyp_u64_to_hex(hex_buf, (uint64_t)payload_len);
370
+ hex_buf[hex_n++] = '\r'; hex_buf[hex_n++] = '\n';
371
+ unsigned char crlf[2] = { '\r', '\n' };
372
+ struct iovec iov[3];
373
+ iov[0].iov_base = hex_buf;
374
+ iov[0].iov_len = hex_n;
375
+ iov[1].iov_base = (void *)RSTRING_PTR(chunk);
376
+ iov[1].iov_len = payload_len;
377
+ iov[2].iov_base = crlf;
378
+ iov[2].iov_len = 2;
379
+ ssize_t n = hyp_writev_all(st->fd, iov, 3);
380
+ if (n == HYP_C_WRITE_WOULDBLOCK) {
381
+ errno = EAGAIN;
382
+ rb_sys_fail("chunked-encoding mid-body backpressure (WOULDBLOCK)");
383
+ }
384
+ st->bytes_written += hex_n + payload_len + 2;
385
+ RB_GC_GUARD(chunk);
386
+ }
387
+ return Qnil;
388
+ }
389
+
390
+ /* Hyperion::Http::ResponseWriter.c_write_chunked(io, status, headers,
391
+ * body, keep_alive,
392
+ * date_str) -> Integer */
393
+ static VALUE c_write_chunked(VALUE self, VALUE io, VALUE rb_status,
394
+ VALUE rb_headers, VALUE rb_body,
395
+ VALUE rb_keep_alive, VALUE rb_date) {
396
+ (void)self;
397
+ Check_Type(rb_headers, T_HASH);
398
+
399
+ int fd = NUM2INT(rb_funcall(io, id_fileno, 0));
400
+ int status = NUM2INT(rb_status);
401
+ VALUE rb_reason = hyp_lookup_reason(status);
402
+
403
+ /* Build chunked head: emits transfer-encoding: chunked instead of
404
+ * content-length; drops caller-supplied content-length and TE. */
405
+ VALUE head = hyperion_build_response_head_chunked(
406
+ rb_status, rb_reason, rb_headers, rb_keep_alive, rb_date
407
+ );
408
+
409
+ struct hyp_chunked_state st;
410
+ memset(&st, 0, sizeof(st));
411
+ st.fd = fd;
412
+
413
+ /* Emit the head as a single syscall. */
414
+ struct iovec head_iov[1];
415
+ head_iov[0].iov_base = (void *)RSTRING_PTR(head);
416
+ head_iov[0].iov_len = (size_t)RSTRING_LEN(head);
417
+ ssize_t n = hyp_writev_all(fd, head_iov, 1);
418
+ if (n == HYP_C_WRITE_WOULDBLOCK) {
419
+ RB_GC_GUARD(head);
420
+ return INT2NUM(HYP_C_WRITE_WOULDBLOCK);
421
+ }
422
+ st.bytes_written += (size_t)RSTRING_LEN(head);
423
+
424
+ /* Iterate body via rb_block_call. Ruby exceptions propagate
425
+ * (the dispatcher's Connection#serve rescue handles teardown).
426
+ * id_each cached at init. */
427
+ rb_block_call(rb_body, id_each, 0, NULL,
428
+ hyp_chunked_callback, (VALUE)&st);
429
+
430
+ /* Drain coalesce + emit terminator atomically when possible:
431
+ * coalesce buffer has room → memcpy the terminator and drain
432
+ * (single syscall ends the response). Otherwise drain first
433
+ * then write the terminator separately. */
434
+ static const unsigned char term[] = { '0','\r','\n','\r','\n' };
435
+ if (st.buf_used + sizeof(term) <= sizeof(st.buf)) {
436
+ memcpy(st.buf + st.buf_used, term, sizeof(term));
437
+ st.buf_used += sizeof(term);
438
+ hyp_chunked_drain(&st);
439
+ } else {
440
+ hyp_chunked_drain(&st);
441
+ struct iovec t_iov[1] = {{ (void *)term, sizeof(term) }};
442
+ ssize_t tn = hyp_writev_all(fd, t_iov, 1);
443
+ if (tn >= 0) st.bytes_written += sizeof(term);
444
+ }
445
+
446
+ RB_GC_GUARD(head);
447
+ return SIZET2NUM(st.bytes_written);
448
+ }
449
+
450
+ /* Hyperion::Http::ResponseWriter.c_write_buffered_via_ring(io, status,
451
+ * headers, body,
452
+ * keep_alive,
453
+ * date_str,
454
+ * ring_ptr)
455
+ * -> Integer
456
+ *
457
+ * Plan #2 — io_uring-owned variant of c_write_buffered. Submits a send
458
+ * SQE via the Rust hyperion_io_uring crate instead of issuing write/writev
459
+ * directly. `ring_ptr` is the HotpathRing raw pointer cast to an Integer
460
+ * by the Ruby caller (Connection layer).
461
+ *
462
+ * Falls back to direct write (c_write_buffered) when the io_uring crate
463
+ * isn't loaded (hyp_submit_send_fn == NULL after lazy-resolve attempt).
464
+ *
465
+ * iov lifetime caveat: the kernel reads iov data AFTER submit_send returns.
466
+ * The iov array is allocated via xmalloc and intentionally NOT freed here —
467
+ * the Ruby head + body Strings stay alive via GC roots; the iov array itself
468
+ * leaks one entry per response under sustained load.
469
+ *
470
+ * TODO(plan #2 task 2.5): replace xmalloc-leak with a per-conn iov arena
471
+ * that frees on send-CQE completion. Current behavior leaks one iov array
472
+ * per response under sustained load. */
473
+ static VALUE c_write_buffered_via_ring(VALUE self, VALUE io, VALUE rb_status,
474
+ VALUE rb_headers, VALUE rb_body,
475
+ VALUE rb_keep_alive, VALUE rb_date,
476
+ VALUE rb_ring_ptr) {
477
+ /* Lazy-resolve the io_uring submit_send symbol on first call. After the
478
+ * first successful resolve, hyp_submit_send_fn is non-NULL and this
479
+ * branch is skipped on every subsequent call (~50 ns dlsym cost paid
480
+ * once per process, not per request). */
481
+ if (!hyp_submit_send_fn) {
482
+ hyp_submit_send_fn =
483
+ (int (*)(void *, int, const void *, unsigned int))
484
+ dlsym(RTLD_DEFAULT, "hyperion_io_uring_hotpath_submit_send");
485
+ }
486
+ if (!hyp_submit_send_fn) {
487
+ /* io_uring crate not loaded — fall back to direct write path. */
488
+ return c_write_buffered(self, io, rb_status, rb_headers, rb_body,
489
+ rb_keep_alive, rb_date);
490
+ }
491
+
492
+ /* Resolve fd before taking raw C pointers into Ruby objects (rb_funcall
493
+ * may GC). */
494
+ int fd = NUM2INT(rb_funcall(io, id_fileno, 0));
495
+
496
+ Check_Type(rb_headers, T_HASH);
497
+ Check_Type(rb_body, T_ARRAY);
498
+
499
+ /* Sum body bytes and type-check chunks. */
500
+ long body_size = 0;
501
+ long body_len = RARRAY_LEN(rb_body);
502
+ for (long i = 0; i < body_len; i++) {
503
+ VALUE chunk = RARRAY_AREF(rb_body, i);
504
+ Check_Type(chunk, T_STRING);
505
+ body_size += RSTRING_LEN(chunk);
506
+ }
507
+
508
+ int status = NUM2INT(rb_status);
509
+ VALUE rb_reason = hyp_lookup_reason(status);
510
+ VALUE head = hyperion_build_response_head(
511
+ rb_status, rb_reason, rb_headers,
512
+ LL2NUM(body_size), rb_keep_alive, rb_date
513
+ );
514
+
515
+ /* Allocate iov array via xmalloc (Ruby-tracked). The kernel reads from
516
+ * the iov pointers AFTER submit_send returns; the iovs + their backing
517
+ * memory (RSTRING_PTR into Ruby Strings) MUST stay alive until the send
518
+ * CQE is processed by the accept fiber.
519
+ *
520
+ * TODO(plan #2 task 2.5): replace xmalloc-leak with a per-conn iov arena
521
+ * that frees on send-CQE completion. Current behavior leaks one iov array
522
+ * per response under sustained load. */
523
+ long total_iov = 1 + body_len;
524
+ struct iovec *iov = ALLOC_N(struct iovec, total_iov);
525
+ iov[0].iov_base = RSTRING_PTR(head);
526
+ iov[0].iov_len = (size_t)RSTRING_LEN(head);
527
+ for (long i = 0; i < body_len; i++) {
528
+ VALUE chunk = RARRAY_AREF(rb_body, i);
529
+ iov[i + 1].iov_base = RSTRING_PTR(chunk);
530
+ iov[i + 1].iov_len = (size_t)RSTRING_LEN(chunk);
531
+ }
532
+
533
+ void *ring_ptr = (void *)NUM2SIZET(rb_ring_ptr);
534
+ int rc = hyp_submit_send_fn(ring_ptr, fd, iov, (unsigned int)total_iov);
535
+ if (rc < 0) {
536
+ xfree(iov);
537
+ rb_sys_fail("hotpath submit_send");
538
+ }
539
+
540
+ /* Keep head alive across the submit_send call so the GC does not reap
541
+ * the Ruby String whose RSTRING_PTR is in iov[0]. rb_body (the Array)
542
+ * is a GC root that pins all body chunks for us. */
543
+ RB_GC_GUARD(head);
544
+
545
+ /* Return bytes-to-be-written (speculative; the actual byte count is
546
+ * confirmed by the send CQE in the accept fiber — Task 2.5 wires
547
+ * CQE feedback for metrics reconciliation). */
548
+ return SIZET2NUM((size_t)RSTRING_LEN(head) + (size_t)body_size);
549
+ }
550
+
551
+ void Init_hyperion_response_writer(void) {
552
+ rb_mHyperion = rb_const_get(rb_cObject, rb_intern("Hyperion"));
553
+ /* Hyperion::Http may already exist (created by Init_hyperion_sendfile
554
+ * earlier in Init_hyperion_http) or may not (init-order changes,
555
+ * or a Ruby file opened the module first). Use the same guard
556
+ * pattern as sendfile.c / page_cache.c so we never raise a
557
+ * TypeError if a future caller defines Http as a class. */
558
+ if (rb_const_defined(rb_mHyperion, rb_intern("Http"))) {
559
+ rb_mHttp = rb_const_get(rb_mHyperion, rb_intern("Http"));
560
+ } else {
561
+ rb_mHttp = rb_define_module_under(rb_mHyperion, "Http");
562
+ }
563
+ rb_mResponseWriter = rb_define_module_under(rb_mHttp, "ResponseWriter");
564
+
565
+ /* Cache rb_intern lookups at init time — never on the hot path. */
566
+ id_fileno = rb_intern("fileno");
567
+ id_each = rb_intern("each");
568
+ id_hyp_flush = rb_intern("__hyperion_flush__");
569
+
570
+ /* Pre-bake the 23 common reason phrases as frozen, never-GC'd Ruby
571
+ * Strings so c_write_buffered can hand them to cbuild_response_head
572
+ * without an allocation. rb_global_variable pins them as GC roots. */
573
+ static const char *k_reason_phrases[HYP_REASON_TABLE_SIZE] = {
574
+ "OK", "Created", "No Content", "Moved Permanently", "Found",
575
+ "Not Modified", "Bad Request", "Unauthorized", "Forbidden",
576
+ "Not Found", "Method Not Allowed", "Request Timeout", "Conflict",
577
+ "Gone", "Payload Too Large", "URI Too Long", "Unprocessable Entity",
578
+ "Too Many Requests", "Internal Server Error", "Not Implemented",
579
+ "Bad Gateway", "Service Unavailable", "Gateway Timeout"
580
+ };
581
+ for (int i = 0; i < HYP_REASON_TABLE_SIZE; i++) {
582
+ k_reason_strings[i] = rb_obj_freeze(rb_str_new_cstr(k_reason_phrases[i]));
583
+ rb_global_variable(&k_reason_strings[i]);
584
+ }
585
+ k_reason_unknown = rb_obj_freeze(rb_str_new_cstr("Unknown"));
586
+ rb_global_variable(&k_reason_unknown);
587
+
588
+ rb_define_singleton_method(rb_mResponseWriter, "available?",
589
+ c_response_writer_available_p, 0);
590
+ rb_define_singleton_method(rb_mResponseWriter, "c_write_buffered",
591
+ c_write_buffered, 6);
592
+ rb_define_singleton_method(rb_mResponseWriter, "c_write_chunked",
593
+ c_write_chunked, 6);
594
+ /* Plan #2 seam: io_uring send-SQE submission variant (7 args: the 6
595
+ * from c_write_buffered plus ring_ptr). Falls back to c_write_buffered
596
+ * when the io_uring crate is not loaded. */
597
+ rb_define_singleton_method(rb_mResponseWriter, "c_write_buffered_via_ring",
598
+ c_write_buffered_via_ring, 7);
599
+
600
+ /* WOULDBLOCK sentinel: Ruby caller checks for this value and falls
601
+ * back to io.write when the kernel send buffer is full (EAGAIN). */
602
+ rb_define_const(rb_mResponseWriter, "WOULDBLOCK",
603
+ INT2NUM(HYP_C_WRITE_WOULDBLOCK));
604
+ }
@@ -0,0 +1,28 @@
1
+ /* response_writer.h — internal header shared between parser.c and
2
+ * response_writer.c. NOT installed; only seen by ext sources.
3
+ *
4
+ * Exposes the head-builder symbols that response_writer.c needs to
5
+ * reuse `c_build_response_head`-equivalent logic without going back
6
+ * through Ruby method dispatch on the hot path. */
7
+
8
+ #ifndef HYPERION_RESPONSE_WRITER_H
9
+ #define HYPERION_RESPONSE_WRITER_H
10
+
11
+ #include <ruby.h>
12
+
13
+ /* Build an HTTP/1.1 response-head string into a fresh Ruby String.
14
+ * Same behavior as the Ruby-visible
15
+ * `Hyperion::CParser.build_response_head(...)` (parser.c). */
16
+ VALUE hyperion_build_response_head(VALUE status, VALUE reason, VALUE headers,
17
+ VALUE body_size, VALUE keep_alive,
18
+ VALUE date_str);
19
+
20
+ /* Build a chunked-encoding response-head string. Same byte shape as
21
+ * the Ruby-visible `build_head_chunked` in response_writer.rb but
22
+ * native, allocating one Ruby String. Implemented as
23
+ * cbuild_response_head with body_size = -1 sentinel. */
24
+ VALUE hyperion_build_response_head_chunked(VALUE status, VALUE reason,
25
+ VALUE headers, VALUE keep_alive,
26
+ VALUE date_str);
27
+
28
+ #endif /* HYPERION_RESPONSE_WRITER_H */
@@ -10,7 +10,7 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
10
10
 
11
11
  [[package]]
12
12
  name = "hyperion_io_uring"
13
- version = "2.3.0"
13
+ version = "2.4.0"
14
14
  dependencies = [
15
15
  "io-uring",
16
16
  "libc",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "hyperion_io_uring"
3
- version = "2.3.0"
3
+ version = "2.4.0"
4
4
  edition = "2021"
5
5
  publish = false
6
6
  description = "io_uring accept/read primitives for Hyperion (2.3-A, Linux 5.6+ only)"