hyperion-rb 2.16.3 → 2.16.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +75 -0
- data/ext/hyperion_http/extconf.rb +9 -0
- data/ext/hyperion_http/parser.c +93 -21
- data/ext/hyperion_http/response_writer.c +604 -0
- data/ext/hyperion_http/response_writer.h +28 -0
- data/ext/hyperion_io_uring/Cargo.lock +1 -1
- data/ext/hyperion_io_uring/Cargo.toml +1 -1
- data/ext/hyperion_io_uring/src/buffer_ring.rs +319 -0
- data/ext/hyperion_io_uring/src/hotpath.rs +645 -0
- data/ext/hyperion_io_uring/src/lib.rs +5 -1
- data/lib/hyperion/cli.rb +23 -0
- data/lib/hyperion/config.rb +9 -0
- data/lib/hyperion/connection.rb +209 -1
- data/lib/hyperion/http/response_writer.rb +46 -0
- data/lib/hyperion/io_uring.rb +270 -5
- data/lib/hyperion/response_writer.rb +91 -1
- data/lib/hyperion/server.rb +200 -4
- data/lib/hyperion/version.rb +1 -1
- data/lib/hyperion.rb +1 -0
- metadata +6 -1
|
@@ -0,0 +1,604 @@
|
|
|
1
|
+
/* response_writer.c — Hyperion::Http::ResponseWriter
|
|
2
|
+
*
|
|
3
|
+
* Direct-syscall response writer for plain-TCP kernel fds. Bypasses
|
|
4
|
+
* Ruby IO machinery (encoding, fiber-yield checks, GVL release/
|
|
5
|
+
* acquire) on the buffered hot path. TLS / non-fd / page-cache /
|
|
6
|
+
* sendfile callers fall through to the Ruby ResponseWriter at the
|
|
7
|
+
* dispatcher in response_writer.rb. */
|
|
8
|
+
|
|
9
|
+
#include <ruby.h>
|
|
10
|
+
#include <ruby/io.h>
|
|
11
|
+
#include <sys/types.h>
|
|
12
|
+
#include <sys/uio.h>
|
|
13
|
+
#include <sys/socket.h>
|
|
14
|
+
#include <errno.h>
|
|
15
|
+
#include <stdint.h>
|
|
16
|
+
#include <unistd.h>
|
|
17
|
+
#include <string.h>
|
|
18
|
+
#include <dlfcn.h>
|
|
19
|
+
|
|
20
|
+
#include "response_writer.h"
|
|
21
|
+
|
|
22
|
+
/* macOS lacks MSG_NOSIGNAL; fall back to 0 (no flag). Safe in a Ruby
|
|
23
|
+
* process: MRI installs a custom SIGPIPE handler that converts the
|
|
24
|
+
* signal into a soft event and the next IO call returns EPIPE — the
|
|
25
|
+
* process is not killed. Our C sendmsg/writev calls run under the
|
|
26
|
+
* GVL, so the same handler intercepts SIGPIPE for them. */
|
|
27
|
+
#ifndef MSG_NOSIGNAL
|
|
28
|
+
#define MSG_NOSIGNAL 0
|
|
29
|
+
#endif
|
|
30
|
+
|
|
31
|
+
static VALUE rb_mHyperion;
|
|
32
|
+
static VALUE rb_mHttp;
|
|
33
|
+
static VALUE rb_mResponseWriter;
|
|
34
|
+
|
|
35
|
+
/* IDs cached at init time — avoids rb_intern on the hot path. */
|
|
36
|
+
static ID id_fileno;
|
|
37
|
+
static ID id_each;
|
|
38
|
+
static ID id_hyp_flush; /* :__hyperion_flush__ chunked-drain sentinel */
|
|
39
|
+
|
|
40
|
+
/* Plan #2 seam: function-pointer for hyperion_io_uring's send-SQE
|
|
41
|
+
* submission. Resolved lazily on the first call to c_write_buffered_via_ring
|
|
42
|
+
* via dlsym(RTLD_DEFAULT, ...). NULL when the io_uring crate isn't loaded
|
|
43
|
+
* yet — the via-ring path short-circuits to direct write in that case.
|
|
44
|
+
*
|
|
45
|
+
* Order-of-loading note: Init_hyperion_response_writer runs when
|
|
46
|
+
* hyperion_http.bundle is required (early boot, before io_uring.rb loads
|
|
47
|
+
* the io_uring cdylib). Doing the dlsym here would always return NULL.
|
|
48
|
+
* Instead we re-try on the first call so the symbol is found AFTER
|
|
49
|
+
* lib/hyperion/io_uring.rb has called Fiddle.dlopen on the cdylib. */
|
|
50
|
+
static int (*hyp_submit_send_fn)(void *, int, const void *, unsigned int) = NULL;
|
|
51
|
+
|
|
52
|
+
/* Pre-baked frozen Ruby Strings for the 23 common reason phrases.
|
|
53
|
+
* Built once at init; looked up by status code in c_write_buffered.
|
|
54
|
+
* Eliminates the per-request rb_str_new_cstr allocation that would
|
|
55
|
+
* otherwise fire on every response. Statuses outside the table fall
|
|
56
|
+
* back to a per-call rb_str_new_cstr("Unknown"). */
|
|
57
|
+
#define HYP_REASON_TABLE_SIZE 23
|
|
58
|
+
static int k_reason_statuses[HYP_REASON_TABLE_SIZE] = {
|
|
59
|
+
200, 201, 204, 301, 302, 304, 400, 401, 403, 404, 405, 408,
|
|
60
|
+
409, 410, 413, 414, 422, 429, 500, 501, 502, 503, 504
|
|
61
|
+
};
|
|
62
|
+
static VALUE k_reason_strings[HYP_REASON_TABLE_SIZE];
|
|
63
|
+
static VALUE k_reason_unknown;
|
|
64
|
+
|
|
65
|
+
static VALUE c_response_writer_available_p(VALUE self) {
|
|
66
|
+
(void)self;
|
|
67
|
+
return Qtrue;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/* Sentinel returned to Ruby on EAGAIN — the dispatcher sees this and
|
|
71
|
+
* falls back to io.write (which yields under Async / blocks under
|
|
72
|
+
* threadpool correctly). We don't reimplement scheduler-aware parking
|
|
73
|
+
* in C. */
|
|
74
|
+
#define HYP_C_WRITE_WOULDBLOCK -2
|
|
75
|
+
|
|
76
|
+
/* Maximum iov entries we build on the stack: 1 (head) + up to
|
|
77
|
+
* HYP_C_IOV_MAX-1 body chunks. Cap at 8 so a pathological 100-element
|
|
78
|
+
* Array body coalesces into one buffer rather than blowing the stack.
|
|
79
|
+
* Normal Rack apps emit Array[1] bodies; Array[2..7] is the uncommon
|
|
80
|
+
* multi-part case; Array[8+] coalesces. */
|
|
81
|
+
#define HYP_C_IOV_MAX 8
|
|
82
|
+
|
|
83
|
+
/* Look up the cached reason String for `status`. Returns a frozen
|
|
84
|
+
* Ruby String for the 23 common statuses (zero allocation), or
|
|
85
|
+
* k_reason_unknown ("Unknown") for anything else. */
|
|
86
|
+
static inline VALUE hyp_lookup_reason(int status) {
|
|
87
|
+
for (int i = 0; i < HYP_REASON_TABLE_SIZE; i++) {
|
|
88
|
+
if (k_reason_statuses[i] == status) return k_reason_strings[i];
|
|
89
|
+
}
|
|
90
|
+
return k_reason_unknown;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/* Issue one sendmsg/writev with `iov_count` iovecs. Returns total
|
|
94
|
+
* bytes written, HYP_C_WRITE_WOULDBLOCK on EAGAIN/EWOULDBLOCK, or
|
|
95
|
+
* raises on hard errors. Handles short writes (rare on a non-blocking
|
|
96
|
+
* socket with room in the kernel send buffer) by advancing the iov
|
|
97
|
+
* and looping. EINTR retried up to 3 times. */
|
|
98
|
+
static ssize_t hyp_writev_all(int fd, struct iovec *iov, int iov_count) {
|
|
99
|
+
ssize_t total = 0;
|
|
100
|
+
int retries = 0;
|
|
101
|
+
|
|
102
|
+
for (;;) {
|
|
103
|
+
#ifdef HAVE_SENDMSG
|
|
104
|
+
struct msghdr msg;
|
|
105
|
+
memset(&msg, 0, sizeof(msg));
|
|
106
|
+
msg.msg_iov = iov;
|
|
107
|
+
msg.msg_iovlen = (int)iov_count;
|
|
108
|
+
ssize_t n = sendmsg(fd, &msg, MSG_NOSIGNAL);
|
|
109
|
+
#else
|
|
110
|
+
ssize_t n = writev(fd, iov, iov_count);
|
|
111
|
+
#endif
|
|
112
|
+
if (n >= 0) {
|
|
113
|
+
total += n;
|
|
114
|
+
/* Compute remaining bytes across all iov slots. */
|
|
115
|
+
ssize_t remaining = 0;
|
|
116
|
+
for (int i = 0; i < iov_count; i++)
|
|
117
|
+
remaining += (ssize_t)iov[i].iov_len;
|
|
118
|
+
if (n == remaining) return total;
|
|
119
|
+
|
|
120
|
+
/* Short write — advance iov past the bytes already sent. */
|
|
121
|
+
ssize_t skipped = 0;
|
|
122
|
+
int i = 0;
|
|
123
|
+
while (i < iov_count &&
|
|
124
|
+
skipped + (ssize_t)iov[i].iov_len <= n) {
|
|
125
|
+
skipped += (ssize_t)iov[i].iov_len;
|
|
126
|
+
i++;
|
|
127
|
+
}
|
|
128
|
+
if (i < iov_count) {
|
|
129
|
+
iov[i].iov_base =
|
|
130
|
+
(char *)iov[i].iov_base + (n - skipped);
|
|
131
|
+
iov[i].iov_len -= (size_t)(n - skipped);
|
|
132
|
+
}
|
|
133
|
+
iov += i;
|
|
134
|
+
iov_count -= i;
|
|
135
|
+
continue;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
if (errno == EINTR) {
|
|
139
|
+
if (++retries > 3)
|
|
140
|
+
rb_sys_fail("sendmsg/writev: EINTR retries exhausted");
|
|
141
|
+
continue;
|
|
142
|
+
}
|
|
143
|
+
if (errno == EAGAIN || errno == EWOULDBLOCK) {
|
|
144
|
+
return HYP_C_WRITE_WOULDBLOCK;
|
|
145
|
+
}
|
|
146
|
+
rb_sys_fail("sendmsg/writev failed");
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/* Hyperion::Http::ResponseWriter.c_write_buffered(io, status, headers,
|
|
151
|
+
* body, keep_alive,
|
|
152
|
+
* date_str) -> Integer
|
|
153
|
+
*
|
|
154
|
+
* Writes a complete HTTP/1.1 response (head + body) to the kernel fd
|
|
155
|
+
* underlying `io` in a single sendmsg/writev call. Validates header
|
|
156
|
+
* values for CR/LF injection and body chunks for type safety before
|
|
157
|
+
* issuing the syscall.
|
|
158
|
+
*
|
|
159
|
+
* Returns total bytes written on success.
|
|
160
|
+
* Returns HYP_C_WRITE_WOULDBLOCK (-2) on EAGAIN — caller falls back
|
|
161
|
+
* to io.write (which parks the fiber / blocks the thread correctly).
|
|
162
|
+
* Raises rb_eArgError on CR/LF in header values.
|
|
163
|
+
* Raises rb_eTypeError on non-String body chunks.
|
|
164
|
+
* Raises SystemCallError on hard write failures. */
|
|
165
|
+
static VALUE c_write_buffered(VALUE self, VALUE io, VALUE rb_status,
|
|
166
|
+
VALUE rb_headers, VALUE rb_body,
|
|
167
|
+
VALUE rb_keep_alive, VALUE rb_date) {
|
|
168
|
+
(void)self;
|
|
169
|
+
|
|
170
|
+
/* 1. Type checks up front — fail fast on bad shapes before any
|
|
171
|
+
* syscall. Header CR/LF validation and value coercion happen
|
|
172
|
+
* inside cbuild_response_head (build_head_each), so we don't
|
|
173
|
+
* duplicate them here. */
|
|
174
|
+
Check_Type(rb_headers, T_HASH);
|
|
175
|
+
Check_Type(rb_body, T_ARRAY);
|
|
176
|
+
|
|
177
|
+
/* 2. Resolve fd from the Ruby IO object. rb_funcall can GC; do it
|
|
178
|
+
* before we take any raw C pointers into Ruby objects. */
|
|
179
|
+
int fd = NUM2INT(rb_funcall(io, id_fileno, 0));
|
|
180
|
+
|
|
181
|
+
/* 3. Body type check and byte-size sum.
|
|
182
|
+
* RARRAY_AREF is safe while rb_body is live on the C stack. */
|
|
183
|
+
long body_size = 0;
|
|
184
|
+
long body_len = RARRAY_LEN(rb_body);
|
|
185
|
+
for (long i = 0; i < body_len; i++) {
|
|
186
|
+
VALUE chunk = RARRAY_AREF(rb_body, i);
|
|
187
|
+
Check_Type(chunk, T_STRING);
|
|
188
|
+
body_size += RSTRING_LEN(chunk);
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/* 4. Build the response head.
|
|
192
|
+
* hyperion_build_response_head lives in parser.c and is exported
|
|
193
|
+
* via response_writer.h. The reason String comes from a pre-baked
|
|
194
|
+
* frozen-String table — zero allocation for the 23 common statuses;
|
|
195
|
+
* only unknown statuses fall back to k_reason_unknown.
|
|
196
|
+
* cbuild_response_head's build_head_each performs the CR/LF guard
|
|
197
|
+
* and rb_obj_as_string coercion on header values, matching the
|
|
198
|
+
* Ruby fallback's semantics exactly. */
|
|
199
|
+
int status = NUM2INT(rb_status);
|
|
200
|
+
VALUE rb_reason = hyp_lookup_reason(status);
|
|
201
|
+
VALUE head = hyperion_build_response_head(
|
|
202
|
+
rb_status, rb_reason, rb_headers,
|
|
203
|
+
LL2NUM(body_size), rb_keep_alive, rb_date
|
|
204
|
+
);
|
|
205
|
+
|
|
206
|
+
/* 5. Assemble iovec: slot 0 = response head; slots 1..N = body chunks
|
|
207
|
+
* (capped at HYP_C_IOV_MAX-1). Bodies longer than HYP_C_IOV_MAX-1
|
|
208
|
+
* chunks are coalesced into a single buffer allocated here. */
|
|
209
|
+
struct iovec iov[HYP_C_IOV_MAX];
|
|
210
|
+
iov[0].iov_base = RSTRING_PTR(head);
|
|
211
|
+
iov[0].iov_len = (size_t)RSTRING_LEN(head);
|
|
212
|
+
int iov_count = 1;
|
|
213
|
+
|
|
214
|
+
/* Hold a reference so GC can't reap the coalesced buffer before
|
|
215
|
+
* the syscall completes. Qnil means "not used". */
|
|
216
|
+
VALUE coalesced = Qnil;
|
|
217
|
+
|
|
218
|
+
if (body_len <= (long)(HYP_C_IOV_MAX - 1)) {
|
|
219
|
+
/* Fast path: each chunk gets its own iov slot. The Array `rb_body`
|
|
220
|
+
* is a GC root that pins all its elements for our call duration. */
|
|
221
|
+
for (long i = 0; i < body_len; i++) {
|
|
222
|
+
VALUE chunk = RARRAY_AREF(rb_body, i);
|
|
223
|
+
iov[iov_count].iov_base = RSTRING_PTR(chunk);
|
|
224
|
+
iov[iov_count].iov_len = (size_t)RSTRING_LEN(chunk);
|
|
225
|
+
iov_count++;
|
|
226
|
+
}
|
|
227
|
+
} else {
|
|
228
|
+
/* Slow path: coalesce into one buffer to keep iov_count bounded.
|
|
229
|
+
* This branch fires only for Array bodies with >= 8 chunks — rare
|
|
230
|
+
* in practice. We accept the one-time allocation. */
|
|
231
|
+
coalesced = rb_str_buf_new(body_size);
|
|
232
|
+
for (long i = 0; i < body_len; i++)
|
|
233
|
+
rb_str_buf_append(coalesced, RARRAY_AREF(rb_body, i));
|
|
234
|
+
iov[1].iov_base = RSTRING_PTR(coalesced);
|
|
235
|
+
iov[1].iov_len = (size_t)RSTRING_LEN(coalesced);
|
|
236
|
+
iov_count = 2;
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
ssize_t n = hyp_writev_all(fd, iov, iov_count);
|
|
240
|
+
|
|
241
|
+
/* GC-safety: keep `head` and `coalesced` (when used) alive across
|
|
242
|
+
* the syscall. -O2 can elide local Ruby Strings whose only use is
|
|
243
|
+
* the RSTRING_PTR at iov assembly; MRI's conservative GC stack
|
|
244
|
+
* scan would then miss them. RB_GC_GUARD is the project-standard
|
|
245
|
+
* idiom (parser.c uses it 9 times for the same pattern). */
|
|
246
|
+
RB_GC_GUARD(head);
|
|
247
|
+
RB_GC_GUARD(coalesced);
|
|
248
|
+
|
|
249
|
+
if (n == HYP_C_WRITE_WOULDBLOCK) return INT2NUM(HYP_C_WRITE_WOULDBLOCK);
|
|
250
|
+
return SSIZET2NUM(n);
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
/* -----------------------------------------------------------------------
|
|
254
|
+
* c_write_chunked — chunked Transfer-Encoding response writer
|
|
255
|
+
* ----------------------------------------------------------------------- */
|
|
256
|
+
|
|
257
|
+
/* Per-call chunked state passed through rb_block_call. */
|
|
258
|
+
struct hyp_chunked_state {
|
|
259
|
+
int fd;
|
|
260
|
+
unsigned char buf[4096]; /* coalesce buffer; 4 KiB matches
|
|
261
|
+
* ResponseWriter::COALESCE_FLUSH_BYTES
|
|
262
|
+
* (response_writer.rb:19). */
|
|
263
|
+
size_t buf_used;
|
|
264
|
+
size_t bytes_written;
|
|
265
|
+
};
|
|
266
|
+
|
|
267
|
+
static const char HYP_HEX[16] = {
|
|
268
|
+
'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'
|
|
269
|
+
};
|
|
270
|
+
|
|
271
|
+
/* Format `n` as lowercase hex (no 0x prefix). Returns bytes written.
|
|
272
|
+
* Handwritten so we don't pay snprintf cost per chunk; mirrors the
|
|
273
|
+
* u64_to_dec helper in c_access_line.c. */
|
|
274
|
+
static size_t hyp_u64_to_hex(unsigned char *dst, uint64_t n) {
|
|
275
|
+
if (n == 0) { dst[0] = '0'; return 1; }
|
|
276
|
+
unsigned char tmp[16];
|
|
277
|
+
int i = 0;
|
|
278
|
+
while (n > 0) { tmp[i++] = (unsigned char)HYP_HEX[n & 0xf]; n >>= 4; }
|
|
279
|
+
for (int j = 0; j < i; j++) dst[j] = tmp[i - 1 - j];
|
|
280
|
+
return (size_t)i;
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
/* Drain the coalesce buffer to the wire as a single syscall.
|
|
284
|
+
* Updates bytes_written and resets buf_used. Raises Errno::EAGAIN on
|
|
285
|
+
* mid-body backpressure: once any chunked bytes are on the wire, a
|
|
286
|
+
* partial flush would corrupt the chunked encoding for the peer
|
|
287
|
+
* (the next coalesce-and-drain would inject framing in the wrong
|
|
288
|
+
* place). The dispatcher (Task 6) catches the exception and tears
|
|
289
|
+
* the connection down — this matches "WOULDBLOCK is degenerate
|
|
290
|
+
* mid-body" from the spec. The pre-body WOULDBLOCK case is handled
|
|
291
|
+
* separately by the head-emit path in c_write_chunked. */
|
|
292
|
+
static void hyp_chunked_drain(struct hyp_chunked_state *st) {
|
|
293
|
+
if (st->buf_used == 0) return;
|
|
294
|
+
struct iovec iov[1];
|
|
295
|
+
iov[0].iov_base = st->buf;
|
|
296
|
+
iov[0].iov_len = st->buf_used;
|
|
297
|
+
ssize_t n = hyp_writev_all(st->fd, iov, 1);
|
|
298
|
+
if (n == HYP_C_WRITE_WOULDBLOCK) {
|
|
299
|
+
errno = EAGAIN;
|
|
300
|
+
rb_sys_fail("chunked-encoding mid-body backpressure (WOULDBLOCK)");
|
|
301
|
+
}
|
|
302
|
+
st->bytes_written += st->buf_used;
|
|
303
|
+
st->buf_used = 0;
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
/* Append `framed_len` bytes to the coalesce buffer. If they overflow
|
|
307
|
+
* the buffer, drain first; if the bytes themselves exceed 4 KiB,
|
|
308
|
+
* drain and write directly bypassing the coalesce. Mid-body
|
|
309
|
+
* WOULDBLOCK propagates as Errno::EAGAIN via hyp_chunked_drain
|
|
310
|
+
* and rb_sys_fail (see hyp_chunked_drain comment). */
|
|
311
|
+
static void hyp_chunked_append(struct hyp_chunked_state *st,
|
|
312
|
+
const unsigned char *framed,
|
|
313
|
+
size_t framed_len) {
|
|
314
|
+
if (framed_len >= sizeof(st->buf)) {
|
|
315
|
+
/* Big frame: drain anything we've buffered so order is preserved,
|
|
316
|
+
* then write the framed bytes directly with one syscall. */
|
|
317
|
+
hyp_chunked_drain(st);
|
|
318
|
+
struct iovec iov[1] = {{ (void *)framed, framed_len }};
|
|
319
|
+
ssize_t n = hyp_writev_all(st->fd, iov, 1);
|
|
320
|
+
if (n == HYP_C_WRITE_WOULDBLOCK) {
|
|
321
|
+
errno = EAGAIN;
|
|
322
|
+
rb_sys_fail("chunked-encoding mid-body backpressure (WOULDBLOCK)");
|
|
323
|
+
}
|
|
324
|
+
st->bytes_written += framed_len;
|
|
325
|
+
return;
|
|
326
|
+
}
|
|
327
|
+
if (st->buf_used + framed_len > sizeof(st->buf)) hyp_chunked_drain(st);
|
|
328
|
+
memcpy(st->buf + st->buf_used, framed, framed_len);
|
|
329
|
+
st->buf_used += framed_len;
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
/* rb_block_call callback invoked once per `body.each` yield. */
|
|
333
|
+
static VALUE hyp_chunked_callback(RB_BLOCK_CALL_FUNC_ARGLIST(yielded, callback_arg)) {
|
|
334
|
+
struct hyp_chunked_state *st = (struct hyp_chunked_state *)callback_arg;
|
|
335
|
+
VALUE chunk = yielded;
|
|
336
|
+
if (NIL_P(chunk)) return Qnil;
|
|
337
|
+
|
|
338
|
+
/* Flush sentinel: literal symbol :__hyperion_flush__ from
|
|
339
|
+
* response_writer.rb (used by SSE servers to push events past the
|
|
340
|
+
* coalescing latency). id_hyp_flush cached at init. */
|
|
341
|
+
if (SYMBOL_P(chunk) && rb_sym2id(chunk) == id_hyp_flush) {
|
|
342
|
+
hyp_chunked_drain(st);
|
|
343
|
+
return Qnil;
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
Check_Type(chunk, T_STRING);
|
|
347
|
+
size_t payload_len = (size_t)RSTRING_LEN(chunk);
|
|
348
|
+
if (payload_len == 0) return Qnil;
|
|
349
|
+
|
|
350
|
+
/* Frame: <hex-size>\r\n<payload>\r\n. We allocate the framed
|
|
351
|
+
* bytes on the C stack for small chunks. For large chunks the
|
|
352
|
+
* framing wrapping bytes are stack-built; the payload itself
|
|
353
|
+
* lives in the Ruby String and we writev with three iovs. */
|
|
354
|
+
if (payload_len < (sizeof(st->buf) - 32)) {
|
|
355
|
+
/* Stack-frame the chunk so it lands in the coalesce buffer
|
|
356
|
+
* (or drains directly via hyp_chunked_append if oversized). */
|
|
357
|
+
unsigned char framed[4096 + 32];
|
|
358
|
+
size_t hex_n = hyp_u64_to_hex(framed, (uint64_t)payload_len);
|
|
359
|
+
framed[hex_n++] = '\r'; framed[hex_n++] = '\n';
|
|
360
|
+
memcpy(framed + hex_n, RSTRING_PTR(chunk), payload_len);
|
|
361
|
+
hex_n += payload_len;
|
|
362
|
+
framed[hex_n++] = '\r'; framed[hex_n++] = '\n';
|
|
363
|
+
hyp_chunked_append(st, framed, hex_n);
|
|
364
|
+
} else {
|
|
365
|
+
/* Large chunk: drain coalesce, write the size-line + payload +
|
|
366
|
+
* CRLF in one writev (3 iovs). */
|
|
367
|
+
hyp_chunked_drain(st);
|
|
368
|
+
unsigned char hex_buf[18];
|
|
369
|
+
size_t hex_n = hyp_u64_to_hex(hex_buf, (uint64_t)payload_len);
|
|
370
|
+
hex_buf[hex_n++] = '\r'; hex_buf[hex_n++] = '\n';
|
|
371
|
+
unsigned char crlf[2] = { '\r', '\n' };
|
|
372
|
+
struct iovec iov[3];
|
|
373
|
+
iov[0].iov_base = hex_buf;
|
|
374
|
+
iov[0].iov_len = hex_n;
|
|
375
|
+
iov[1].iov_base = (void *)RSTRING_PTR(chunk);
|
|
376
|
+
iov[1].iov_len = payload_len;
|
|
377
|
+
iov[2].iov_base = crlf;
|
|
378
|
+
iov[2].iov_len = 2;
|
|
379
|
+
ssize_t n = hyp_writev_all(st->fd, iov, 3);
|
|
380
|
+
if (n == HYP_C_WRITE_WOULDBLOCK) {
|
|
381
|
+
errno = EAGAIN;
|
|
382
|
+
rb_sys_fail("chunked-encoding mid-body backpressure (WOULDBLOCK)");
|
|
383
|
+
}
|
|
384
|
+
st->bytes_written += hex_n + payload_len + 2;
|
|
385
|
+
RB_GC_GUARD(chunk);
|
|
386
|
+
}
|
|
387
|
+
return Qnil;
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
/* Hyperion::Http::ResponseWriter.c_write_chunked(io, status, headers,
|
|
391
|
+
* body, keep_alive,
|
|
392
|
+
* date_str) -> Integer */
|
|
393
|
+
static VALUE c_write_chunked(VALUE self, VALUE io, VALUE rb_status,
|
|
394
|
+
VALUE rb_headers, VALUE rb_body,
|
|
395
|
+
VALUE rb_keep_alive, VALUE rb_date) {
|
|
396
|
+
(void)self;
|
|
397
|
+
Check_Type(rb_headers, T_HASH);
|
|
398
|
+
|
|
399
|
+
int fd = NUM2INT(rb_funcall(io, id_fileno, 0));
|
|
400
|
+
int status = NUM2INT(rb_status);
|
|
401
|
+
VALUE rb_reason = hyp_lookup_reason(status);
|
|
402
|
+
|
|
403
|
+
/* Build chunked head: emits transfer-encoding: chunked instead of
|
|
404
|
+
* content-length; drops caller-supplied content-length and TE. */
|
|
405
|
+
VALUE head = hyperion_build_response_head_chunked(
|
|
406
|
+
rb_status, rb_reason, rb_headers, rb_keep_alive, rb_date
|
|
407
|
+
);
|
|
408
|
+
|
|
409
|
+
struct hyp_chunked_state st;
|
|
410
|
+
memset(&st, 0, sizeof(st));
|
|
411
|
+
st.fd = fd;
|
|
412
|
+
|
|
413
|
+
/* Emit the head as a single syscall. */
|
|
414
|
+
struct iovec head_iov[1];
|
|
415
|
+
head_iov[0].iov_base = (void *)RSTRING_PTR(head);
|
|
416
|
+
head_iov[0].iov_len = (size_t)RSTRING_LEN(head);
|
|
417
|
+
ssize_t n = hyp_writev_all(fd, head_iov, 1);
|
|
418
|
+
if (n == HYP_C_WRITE_WOULDBLOCK) {
|
|
419
|
+
RB_GC_GUARD(head);
|
|
420
|
+
return INT2NUM(HYP_C_WRITE_WOULDBLOCK);
|
|
421
|
+
}
|
|
422
|
+
st.bytes_written += (size_t)RSTRING_LEN(head);
|
|
423
|
+
|
|
424
|
+
/* Iterate body via rb_block_call. Ruby exceptions propagate
|
|
425
|
+
* (the dispatcher's Connection#serve rescue handles teardown).
|
|
426
|
+
* id_each cached at init. */
|
|
427
|
+
rb_block_call(rb_body, id_each, 0, NULL,
|
|
428
|
+
hyp_chunked_callback, (VALUE)&st);
|
|
429
|
+
|
|
430
|
+
/* Drain coalesce + emit terminator atomically when possible:
|
|
431
|
+
* coalesce buffer has room → memcpy the terminator and drain
|
|
432
|
+
* (single syscall ends the response). Otherwise drain first
|
|
433
|
+
* then write the terminator separately. */
|
|
434
|
+
static const unsigned char term[] = { '0','\r','\n','\r','\n' };
|
|
435
|
+
if (st.buf_used + sizeof(term) <= sizeof(st.buf)) {
|
|
436
|
+
memcpy(st.buf + st.buf_used, term, sizeof(term));
|
|
437
|
+
st.buf_used += sizeof(term);
|
|
438
|
+
hyp_chunked_drain(&st);
|
|
439
|
+
} else {
|
|
440
|
+
hyp_chunked_drain(&st);
|
|
441
|
+
struct iovec t_iov[1] = {{ (void *)term, sizeof(term) }};
|
|
442
|
+
ssize_t tn = hyp_writev_all(fd, t_iov, 1);
|
|
443
|
+
if (tn >= 0) st.bytes_written += sizeof(term);
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
RB_GC_GUARD(head);
|
|
447
|
+
return SIZET2NUM(st.bytes_written);
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
/* Hyperion::Http::ResponseWriter.c_write_buffered_via_ring(io, status,
|
|
451
|
+
* headers, body,
|
|
452
|
+
* keep_alive,
|
|
453
|
+
* date_str,
|
|
454
|
+
* ring_ptr)
|
|
455
|
+
* -> Integer
|
|
456
|
+
*
|
|
457
|
+
* Plan #2 — io_uring-owned variant of c_write_buffered. Submits a send
|
|
458
|
+
* SQE via the Rust hyperion_io_uring crate instead of issuing write/writev
|
|
459
|
+
* directly. `ring_ptr` is the HotpathRing raw pointer cast to an Integer
|
|
460
|
+
* by the Ruby caller (Connection layer).
|
|
461
|
+
*
|
|
462
|
+
* Falls back to direct write (c_write_buffered) when the io_uring crate
|
|
463
|
+
* isn't loaded (hyp_submit_send_fn == NULL after lazy-resolve attempt).
|
|
464
|
+
*
|
|
465
|
+
* iov lifetime caveat: the kernel reads iov data AFTER submit_send returns.
|
|
466
|
+
* The iov array is allocated via xmalloc and intentionally NOT freed here —
|
|
467
|
+
* the Ruby head + body Strings stay alive via GC roots; the iov array itself
|
|
468
|
+
* leaks one entry per response under sustained load.
|
|
469
|
+
*
|
|
470
|
+
* TODO(plan #2 task 2.5): replace xmalloc-leak with a per-conn iov arena
|
|
471
|
+
* that frees on send-CQE completion. Current behavior leaks one iov array
|
|
472
|
+
* per response under sustained load. */
|
|
473
|
+
static VALUE c_write_buffered_via_ring(VALUE self, VALUE io, VALUE rb_status,
|
|
474
|
+
VALUE rb_headers, VALUE rb_body,
|
|
475
|
+
VALUE rb_keep_alive, VALUE rb_date,
|
|
476
|
+
VALUE rb_ring_ptr) {
|
|
477
|
+
/* Lazy-resolve the io_uring submit_send symbol on first call. After the
|
|
478
|
+
* first successful resolve, hyp_submit_send_fn is non-NULL and this
|
|
479
|
+
* branch is skipped on every subsequent call (~50 ns dlsym cost paid
|
|
480
|
+
* once per process, not per request). */
|
|
481
|
+
if (!hyp_submit_send_fn) {
|
|
482
|
+
hyp_submit_send_fn =
|
|
483
|
+
(int (*)(void *, int, const void *, unsigned int))
|
|
484
|
+
dlsym(RTLD_DEFAULT, "hyperion_io_uring_hotpath_submit_send");
|
|
485
|
+
}
|
|
486
|
+
if (!hyp_submit_send_fn) {
|
|
487
|
+
/* io_uring crate not loaded — fall back to direct write path. */
|
|
488
|
+
return c_write_buffered(self, io, rb_status, rb_headers, rb_body,
|
|
489
|
+
rb_keep_alive, rb_date);
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
/* Resolve fd before taking raw C pointers into Ruby objects (rb_funcall
|
|
493
|
+
* may GC). */
|
|
494
|
+
int fd = NUM2INT(rb_funcall(io, id_fileno, 0));
|
|
495
|
+
|
|
496
|
+
Check_Type(rb_headers, T_HASH);
|
|
497
|
+
Check_Type(rb_body, T_ARRAY);
|
|
498
|
+
|
|
499
|
+
/* Sum body bytes and type-check chunks. */
|
|
500
|
+
long body_size = 0;
|
|
501
|
+
long body_len = RARRAY_LEN(rb_body);
|
|
502
|
+
for (long i = 0; i < body_len; i++) {
|
|
503
|
+
VALUE chunk = RARRAY_AREF(rb_body, i);
|
|
504
|
+
Check_Type(chunk, T_STRING);
|
|
505
|
+
body_size += RSTRING_LEN(chunk);
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
int status = NUM2INT(rb_status);
|
|
509
|
+
VALUE rb_reason = hyp_lookup_reason(status);
|
|
510
|
+
VALUE head = hyperion_build_response_head(
|
|
511
|
+
rb_status, rb_reason, rb_headers,
|
|
512
|
+
LL2NUM(body_size), rb_keep_alive, rb_date
|
|
513
|
+
);
|
|
514
|
+
|
|
515
|
+
/* Allocate iov array via xmalloc (Ruby-tracked). The kernel reads from
|
|
516
|
+
* the iov pointers AFTER submit_send returns; the iovs + their backing
|
|
517
|
+
* memory (RSTRING_PTR into Ruby Strings) MUST stay alive until the send
|
|
518
|
+
* CQE is processed by the accept fiber.
|
|
519
|
+
*
|
|
520
|
+
* TODO(plan #2 task 2.5): replace xmalloc-leak with a per-conn iov arena
|
|
521
|
+
* that frees on send-CQE completion. Current behavior leaks one iov array
|
|
522
|
+
* per response under sustained load. */
|
|
523
|
+
long total_iov = 1 + body_len;
|
|
524
|
+
struct iovec *iov = ALLOC_N(struct iovec, total_iov);
|
|
525
|
+
iov[0].iov_base = RSTRING_PTR(head);
|
|
526
|
+
iov[0].iov_len = (size_t)RSTRING_LEN(head);
|
|
527
|
+
for (long i = 0; i < body_len; i++) {
|
|
528
|
+
VALUE chunk = RARRAY_AREF(rb_body, i);
|
|
529
|
+
iov[i + 1].iov_base = RSTRING_PTR(chunk);
|
|
530
|
+
iov[i + 1].iov_len = (size_t)RSTRING_LEN(chunk);
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
void *ring_ptr = (void *)NUM2SIZET(rb_ring_ptr);
|
|
534
|
+
int rc = hyp_submit_send_fn(ring_ptr, fd, iov, (unsigned int)total_iov);
|
|
535
|
+
if (rc < 0) {
|
|
536
|
+
xfree(iov);
|
|
537
|
+
rb_sys_fail("hotpath submit_send");
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
/* Keep head alive across the submit_send call so the GC does not reap
|
|
541
|
+
* the Ruby String whose RSTRING_PTR is in iov[0]. rb_body (the Array)
|
|
542
|
+
* is a GC root that pins all body chunks for us. */
|
|
543
|
+
RB_GC_GUARD(head);
|
|
544
|
+
|
|
545
|
+
/* Return bytes-to-be-written (speculative; the actual byte count is
|
|
546
|
+
* confirmed by the send CQE in the accept fiber — Task 2.5 wires
|
|
547
|
+
* CQE feedback for metrics reconciliation). */
|
|
548
|
+
return SIZET2NUM((size_t)RSTRING_LEN(head) + (size_t)body_size);
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
void Init_hyperion_response_writer(void) {
|
|
552
|
+
rb_mHyperion = rb_const_get(rb_cObject, rb_intern("Hyperion"));
|
|
553
|
+
/* Hyperion::Http may already exist (created by Init_hyperion_sendfile
|
|
554
|
+
* earlier in Init_hyperion_http) or may not (init-order changes,
|
|
555
|
+
* or a Ruby file opened the module first). Use the same guard
|
|
556
|
+
* pattern as sendfile.c / page_cache.c so we never raise a
|
|
557
|
+
* TypeError if a future caller defines Http as a class. */
|
|
558
|
+
if (rb_const_defined(rb_mHyperion, rb_intern("Http"))) {
|
|
559
|
+
rb_mHttp = rb_const_get(rb_mHyperion, rb_intern("Http"));
|
|
560
|
+
} else {
|
|
561
|
+
rb_mHttp = rb_define_module_under(rb_mHyperion, "Http");
|
|
562
|
+
}
|
|
563
|
+
rb_mResponseWriter = rb_define_module_under(rb_mHttp, "ResponseWriter");
|
|
564
|
+
|
|
565
|
+
/* Cache rb_intern lookups at init time — never on the hot path. */
|
|
566
|
+
id_fileno = rb_intern("fileno");
|
|
567
|
+
id_each = rb_intern("each");
|
|
568
|
+
id_hyp_flush = rb_intern("__hyperion_flush__");
|
|
569
|
+
|
|
570
|
+
/* Pre-bake the 23 common reason phrases as frozen, never-GC'd Ruby
|
|
571
|
+
* Strings so c_write_buffered can hand them to cbuild_response_head
|
|
572
|
+
* without an allocation. rb_global_variable pins them as GC roots. */
|
|
573
|
+
static const char *k_reason_phrases[HYP_REASON_TABLE_SIZE] = {
|
|
574
|
+
"OK", "Created", "No Content", "Moved Permanently", "Found",
|
|
575
|
+
"Not Modified", "Bad Request", "Unauthorized", "Forbidden",
|
|
576
|
+
"Not Found", "Method Not Allowed", "Request Timeout", "Conflict",
|
|
577
|
+
"Gone", "Payload Too Large", "URI Too Long", "Unprocessable Entity",
|
|
578
|
+
"Too Many Requests", "Internal Server Error", "Not Implemented",
|
|
579
|
+
"Bad Gateway", "Service Unavailable", "Gateway Timeout"
|
|
580
|
+
};
|
|
581
|
+
for (int i = 0; i < HYP_REASON_TABLE_SIZE; i++) {
|
|
582
|
+
k_reason_strings[i] = rb_obj_freeze(rb_str_new_cstr(k_reason_phrases[i]));
|
|
583
|
+
rb_global_variable(&k_reason_strings[i]);
|
|
584
|
+
}
|
|
585
|
+
k_reason_unknown = rb_obj_freeze(rb_str_new_cstr("Unknown"));
|
|
586
|
+
rb_global_variable(&k_reason_unknown);
|
|
587
|
+
|
|
588
|
+
rb_define_singleton_method(rb_mResponseWriter, "available?",
|
|
589
|
+
c_response_writer_available_p, 0);
|
|
590
|
+
rb_define_singleton_method(rb_mResponseWriter, "c_write_buffered",
|
|
591
|
+
c_write_buffered, 6);
|
|
592
|
+
rb_define_singleton_method(rb_mResponseWriter, "c_write_chunked",
|
|
593
|
+
c_write_chunked, 6);
|
|
594
|
+
/* Plan #2 seam: io_uring send-SQE submission variant (7 args: the 6
|
|
595
|
+
* from c_write_buffered plus ring_ptr). Falls back to c_write_buffered
|
|
596
|
+
* when the io_uring crate is not loaded. */
|
|
597
|
+
rb_define_singleton_method(rb_mResponseWriter, "c_write_buffered_via_ring",
|
|
598
|
+
c_write_buffered_via_ring, 7);
|
|
599
|
+
|
|
600
|
+
/* WOULDBLOCK sentinel: Ruby caller checks for this value and falls
|
|
601
|
+
* back to io.write when the kernel send buffer is full (EAGAIN). */
|
|
602
|
+
rb_define_const(rb_mResponseWriter, "WOULDBLOCK",
|
|
603
|
+
INT2NUM(HYP_C_WRITE_WOULDBLOCK));
|
|
604
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/* response_writer.h — internal header shared between parser.c and
|
|
2
|
+
* response_writer.c. NOT installed; only seen by ext sources.
|
|
3
|
+
*
|
|
4
|
+
* Exposes the head-builder symbols that response_writer.c needs to
|
|
5
|
+
* reuse `c_build_response_head`-equivalent logic without going back
|
|
6
|
+
* through Ruby method dispatch on the hot path. */
|
|
7
|
+
|
|
8
|
+
#ifndef HYPERION_RESPONSE_WRITER_H
|
|
9
|
+
#define HYPERION_RESPONSE_WRITER_H
|
|
10
|
+
|
|
11
|
+
#include <ruby.h>
|
|
12
|
+
|
|
13
|
+
/* Build an HTTP/1.1 response-head string into a fresh Ruby String.
|
|
14
|
+
* Same behavior as the Ruby-visible
|
|
15
|
+
* `Hyperion::CParser.build_response_head(...)` (parser.c). */
|
|
16
|
+
VALUE hyperion_build_response_head(VALUE status, VALUE reason, VALUE headers,
|
|
17
|
+
VALUE body_size, VALUE keep_alive,
|
|
18
|
+
VALUE date_str);
|
|
19
|
+
|
|
20
|
+
/* Build a chunked-encoding response-head string. Same byte shape as
|
|
21
|
+
* the Ruby-visible `build_head_chunked` in response_writer.rb but
|
|
22
|
+
* native, allocating one Ruby String. Implemented as
|
|
23
|
+
* cbuild_response_head with body_size = -1 sentinel. */
|
|
24
|
+
VALUE hyperion_build_response_head_chunked(VALUE status, VALUE reason,
|
|
25
|
+
VALUE headers, VALUE keep_alive,
|
|
26
|
+
VALUE date_str);
|
|
27
|
+
|
|
28
|
+
#endif /* HYPERION_RESPONSE_WRITER_H */
|