hyperion-rb 1.6.2 → 2.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4768 -0
- data/README.md +222 -13
- data/ext/hyperion_h2_codec/Cargo.lock +7 -0
- data/ext/hyperion_h2_codec/Cargo.toml +33 -0
- data/ext/hyperion_h2_codec/extconf.rb +73 -0
- data/ext/hyperion_h2_codec/src/frames.rs +140 -0
- data/ext/hyperion_h2_codec/src/hpack/huffman.rs +161 -0
- data/ext/hyperion_h2_codec/src/hpack.rs +457 -0
- data/ext/hyperion_h2_codec/src/lib.rs +296 -0
- data/ext/hyperion_http/extconf.rb +28 -0
- data/ext/hyperion_http/h2_codec_glue.c +408 -0
- data/ext/hyperion_http/page_cache.c +1125 -0
- data/ext/hyperion_http/parser.c +473 -38
- data/ext/hyperion_http/sendfile.c +982 -0
- data/ext/hyperion_http/websocket.c +493 -0
- data/ext/hyperion_io_uring/Cargo.lock +33 -0
- data/ext/hyperion_io_uring/Cargo.toml +34 -0
- data/ext/hyperion_io_uring/extconf.rb +74 -0
- data/ext/hyperion_io_uring/src/lib.rs +316 -0
- data/lib/hyperion/adapter/rack.rb +370 -42
- data/lib/hyperion/admin_listener.rb +207 -0
- data/lib/hyperion/admin_middleware.rb +36 -7
- data/lib/hyperion/cli.rb +310 -11
- data/lib/hyperion/config.rb +440 -14
- data/lib/hyperion/connection.rb +679 -22
- data/lib/hyperion/deprecations.rb +81 -0
- data/lib/hyperion/dispatch_mode.rb +165 -0
- data/lib/hyperion/fiber_local.rb +75 -13
- data/lib/hyperion/h2_admission.rb +77 -0
- data/lib/hyperion/h2_codec.rb +499 -0
- data/lib/hyperion/http/page_cache.rb +122 -0
- data/lib/hyperion/http/sendfile.rb +696 -0
- data/lib/hyperion/http2/native_hpack_adapter.rb +70 -0
- data/lib/hyperion/http2_handler.rb +618 -19
- data/lib/hyperion/io_uring.rb +317 -0
- data/lib/hyperion/lint_wrapper_pool.rb +126 -0
- data/lib/hyperion/master.rb +96 -9
- data/lib/hyperion/metrics/path_templater.rb +68 -0
- data/lib/hyperion/metrics.rb +256 -0
- data/lib/hyperion/prometheus_exporter.rb +150 -0
- data/lib/hyperion/request.rb +13 -0
- data/lib/hyperion/response_writer.rb +477 -16
- data/lib/hyperion/runtime.rb +195 -0
- data/lib/hyperion/server/route_table.rb +179 -0
- data/lib/hyperion/server.rb +519 -55
- data/lib/hyperion/static_preload.rb +133 -0
- data/lib/hyperion/thread_pool.rb +61 -7
- data/lib/hyperion/tls.rb +343 -1
- data/lib/hyperion/version.rb +1 -1
- data/lib/hyperion/websocket/close_codes.rb +71 -0
- data/lib/hyperion/websocket/connection.rb +876 -0
- data/lib/hyperion/websocket/frame.rb +356 -0
- data/lib/hyperion/websocket/handshake.rb +525 -0
- data/lib/hyperion/worker.rb +111 -9
- data/lib/hyperion.rb +137 -3
- metadata +50 -1
|
@@ -0,0 +1,982 @@
|
|
|
1
|
+
/* ----------------------------------------------------------------------
|
|
2
|
+
* Hyperion::Http::Sendfile — zero-copy static-file fast path.
|
|
3
|
+
*
|
|
4
|
+
* Public surface (defined as singleton methods on Hyperion::Http::Sendfile):
|
|
5
|
+
*
|
|
6
|
+
* Sendfile.supported? -> true | false
|
|
7
|
+
* true on Linux (splice / sendfile64) and Darwin / BSD (sendfile).
|
|
8
|
+
* false everywhere else; Ruby caller must fall back to IO.copy_stream.
|
|
9
|
+
*
|
|
10
|
+
* Sendfile.copy(out_io, in_io, offset, len) -> [bytes_written, status]
|
|
11
|
+
* out_io — writable IO (TCPSocket or anything to_io / fileno-able). Must
|
|
12
|
+
* NOT be a TLS-wrapped socket (kernel has no plaintext to send).
|
|
13
|
+
* in_io — readable IO pointing at a regular file (must support fileno).
|
|
14
|
+
* offset — non-negative Integer; byte offset into the source file.
|
|
15
|
+
* len — non-negative Integer; number of bytes to copy.
|
|
16
|
+
*
|
|
17
|
+
* Returns a 2-element Array:
|
|
18
|
+
* bytes_written :: Integer bytes the kernel acknowledged this call
|
|
19
|
+
* status :: Symbol one of:
|
|
20
|
+
* :done — bytes_written == len; transfer complete.
|
|
21
|
+
* :partial — short write; caller MUST loop with offset+bytes.
|
|
22
|
+
* :eagain — socket buffer full; caller yields to fiber
|
|
23
|
+
* scheduler / IO.select then retries from the same
|
|
24
|
+
* offset+bytes_written cursor.
|
|
25
|
+
* :unsupported — host kernel returned ENOSYS / EINVAL on a path that
|
|
26
|
+
* SHOULD work; caller falls back to IO.copy_stream.
|
|
27
|
+
*
|
|
28
|
+
* On any other error (EPIPE, ECONNRESET, ENOMEM, …) the helper raises
|
|
29
|
+
* the matching Errno::* — same shape Ruby socket writes raise.
|
|
30
|
+
*
|
|
31
|
+
* Sendfile.copy_small(out_io, in_io, offset, len) -> Integer
|
|
32
|
+
* 2.0.1 Phase 8a small-file fast path. Bounded by SMALL_FILE_THRESHOLD
|
|
33
|
+
* (64 KiB). Reads the whole slice into a heap buffer, blocks the OS
|
|
34
|
+
* thread on read+write under the GVL released, retries EAGAIN with
|
|
35
|
+
* short select() polls instead of fiber-yielding. Returns total bytes
|
|
36
|
+
* written. Raises Errno::* on hard errors. The fiber-yield round-trip
|
|
37
|
+
* for an 8 KB file (~40 µs per yield × N retries) was the catastrophic
|
|
38
|
+
* row at -t 5 in the 2.0.0 BENCH; the small-file path avoids it
|
|
39
|
+
* entirely by completing the transfer in the same syscall slice.
|
|
40
|
+
*
|
|
41
|
+
* Sendfile.splice_supported? -> true | false
|
|
42
|
+
* 2.0.1 Phase 8b — true iff this build carries the Linux splice(2)
|
|
43
|
+
* pipe-tee path AND the host kernel implemented it. Used by the
|
|
44
|
+
* userspace caller (and specs) to assert the splice branch fires.
|
|
45
|
+
*
|
|
46
|
+
* Sendfile.copy_splice(out_io, in_io, offset, len) -> [bytes_written, status]
|
|
47
|
+
* 2.0.1 Phase 8b primitive; 2.2.0 lifecycle — opens a fresh
|
|
48
|
+
* pipe2(O_CLOEXEC | O_NONBLOCK) pair on every call and closes
|
|
49
|
+
* both fds on every exit path (success, EAGAIN, error, EOF).
|
|
50
|
+
* Two extra syscalls per call vs the old TLS-cached layout, but
|
|
51
|
+
* correctness is restored: a partial transfer interrupted by
|
|
52
|
+
* EPIPE cannot leak residual bytes onto the next request's
|
|
53
|
+
* socket. Kept as a self-contained one-shot primitive for
|
|
54
|
+
* small payloads or out-of-band callers that don't want to
|
|
55
|
+
* manage the pipe lifecycle.
|
|
56
|
+
*
|
|
57
|
+
* Sendfile.copy_splice_into_pipe(out_io, in_io, offset, len, pipe_r, pipe_w)
|
|
58
|
+
* -> [bytes_written, status]
|
|
59
|
+
* 2.2.x fix-A primitive — splice ladder for ONE chunk against a
|
|
60
|
+
* CALLER-PROVIDED pipe pair. Does NOT open or close the pipe;
|
|
61
|
+
* the Ruby caller (`native_copy_loop` in lib/hyperion/http/sendfile.rb)
|
|
62
|
+
* opens one pipe2(O_CLOEXEC | O_NONBLOCK) per RESPONSE, hands the
|
|
63
|
+
* fds in for every chunk of the response, and closes them in an
|
|
64
|
+
* ensure block when the loop unwinds. For a 1 MiB asset at 64 KiB
|
|
65
|
+
* chunks that's 16 splice-rounds + 1 pipe2 + 2 closes = 19 syscalls
|
|
66
|
+
* versus the old per-chunk `copy_splice` shape's 16 splice-rounds +
|
|
67
|
+
* 16 pipe2 + 32 closes = 64 syscalls; a 3.4× syscall-count reduction
|
|
68
|
+
* per 1 MiB request, which restores the splice-vs-sendfile win the
|
|
69
|
+
* bench sweep on 2026-04-30 lost (see CHANGELOG 2.2.x fix-A).
|
|
70
|
+
*
|
|
71
|
+
* Phase 1 strategy
|
|
72
|
+
* ----------------
|
|
73
|
+
* Linux: prefer sendfile(2) (single syscall, file -> socket). If sendfile
|
|
74
|
+
* is unavailable in this build (very old kernels), splice(2)
|
|
75
|
+
* through a pipe-tee acts as the fallback (file -> pipe -> socket).
|
|
76
|
+
* Both paths are true zero-copy: page cache bytes never enter
|
|
77
|
+
* userspace.
|
|
78
|
+
* BSD/Darwin: sendfile(2) — different signature (offset is in/out via
|
|
79
|
+
* off_t*), same zero-copy guarantee.
|
|
80
|
+
* Other: Sendfile.supported? returns false; copy() raises NotImplementedError
|
|
81
|
+
* so Ruby's caller drops to IO.copy_stream.
|
|
82
|
+
*
|
|
83
|
+
* Phase 8 (2.0.1) — close the last two static-file rps gaps
|
|
84
|
+
* --------------------------------------------------------
|
|
85
|
+
* 8a. Small files (<= 64 KiB) bypass the EAGAIN-yield-retry storm. At
|
|
86
|
+
* -t 5 with 5 fibers per worker, an 8 KB file paying ~40 ms in
|
|
87
|
+
* fiber-yield ping-pong dropped to 121 r/s (Puma at 1,246). The
|
|
88
|
+
* small-file path reads the slice in one syscall and writes it in
|
|
89
|
+
* one or two — under the GVL released, polling EAGAIN with short
|
|
90
|
+
* select() rather than fiber-yielding. Per-call cost on the 8 KB
|
|
91
|
+
* row drops from milliseconds to microseconds.
|
|
92
|
+
*
|
|
93
|
+
* 8b. Big files on Linux (> 64 KiB) optionally splice through a
|
|
94
|
+
* pipe pair (file_fd -> pipe_w -> sock_fd) with
|
|
95
|
+
* SPLICE_F_MOVE | SPLICE_F_MORE for an extra ~5-15% over plain
|
|
96
|
+
* sendfile on the 1 MiB asset. 2.0.1 cached one pipe per OS
|
|
97
|
+
* thread; 2.2.0 opens a fresh pipe per call and closes it on
|
|
98
|
+
* every exit path (success, EAGAIN, error, EOF). The two
|
|
99
|
+
* extra syscalls per call (pipe2 + 2× close) are amortized
|
|
100
|
+
* against the kernel-side zero-copy splice transfer; correctness
|
|
101
|
+
* is unconditional: a pipe never carries bytes for more than
|
|
102
|
+
* one transfer, so EPIPE mid-transfer cannot leak residual
|
|
103
|
+
* bytes onto the next request's socket.
|
|
104
|
+
*
|
|
105
|
+
* GVL discipline
|
|
106
|
+
* --------------
|
|
107
|
+
* The kernel call itself runs under rb_thread_call_without_gvl so that other
|
|
108
|
+
* fibers / threads can run while we wait on socket buffer space. EAGAIN /
|
|
109
|
+
* EWOULDBLOCK do NOT spin in C — we return :eagain and let the Ruby caller
|
|
110
|
+
* yield to the fiber scheduler (or IO.select when no scheduler is active).
|
|
111
|
+
*
|
|
112
|
+
* Single-writer invariant
|
|
113
|
+
* -----------------------
|
|
114
|
+
* Phase 1 is HTTP/1.1 only. The connection is owned by a single fiber/thread
|
|
115
|
+
* for the duration of the response, so there's no concurrent-writer problem
|
|
116
|
+
* to worry about here. h2 sendfile would require coordination with the
|
|
117
|
+
* per-connection writer fiber; out of scope for 1.7.0 (RFC §3 future work).
|
|
118
|
+
* ---------------------------------------------------------------------- */
|
|
119
|
+
|
|
120
|
+
#include <ruby.h>
|
|
121
|
+
#include <ruby/thread.h>
|
|
122
|
+
#include <ruby/io.h>
|
|
123
|
+
|
|
124
|
+
#include <errno.h>
|
|
125
|
+
#include <string.h>
|
|
126
|
+
#include <unistd.h>
|
|
127
|
+
#include <fcntl.h>
|
|
128
|
+
#include <sys/types.h>
|
|
129
|
+
#include <sys/stat.h>
|
|
130
|
+
|
|
131
|
+
#if defined(__linux__)
|
|
132
|
+
# include <sys/sendfile.h>
|
|
133
|
+
# include <sys/uio.h>
|
|
134
|
+
# include <fcntl.h>
|
|
135
|
+
# define HYP_SF_LINUX 1
|
|
136
|
+
# ifdef F_SETPIPE_SZ
|
|
137
|
+
# define HYP_HAVE_F_SETPIPE_SZ 1
|
|
138
|
+
# endif
|
|
139
|
+
#elif defined(__APPLE__) || defined(__FreeBSD__) || defined(__DragonFly__) || defined(__NetBSD__)
|
|
140
|
+
# include <sys/socket.h>
|
|
141
|
+
# include <sys/uio.h>
|
|
142
|
+
# define HYP_SF_BSD 1
|
|
143
|
+
#endif
|
|
144
|
+
|
|
145
|
+
#include <sys/select.h>
|
|
146
|
+
|
|
147
|
+
/* Phase 8a small-file threshold. Files at or below this size take the
|
|
148
|
+
* synchronous read+write path. 64 KiB matches the kernel TCP send-buffer
|
|
149
|
+
* sweet spot on Linux (also `USERSPACE_CHUNK` in the Ruby façade), and
|
|
150
|
+
* covers the vast majority of static assets (favicons, sprites, JSON
|
|
151
|
+
* manifests, CSS bundles below 64 KB). */
|
|
152
|
+
#define HYP_SMALL_FILE_THRESHOLD (64 * 1024)
|
|
153
|
+
|
|
154
|
+
/* Phase 8a single-MSS threshold. A file under one TCP segment payload
|
|
155
|
+
* fits in a single packet under typical 1500-byte MTU; we issue exactly
|
|
156
|
+
* one read() + one write() with no loop. */
|
|
157
|
+
#define HYP_SINGLE_MSS_THRESHOLD 1500
|
|
158
|
+
|
|
159
|
+
/* Phase 8a EAGAIN poll budget for the small-file path. We poll up to
|
|
160
|
+
* ~50 ms total (5 × 10 ms select) before giving up and surfacing EAGAIN
|
|
161
|
+
* to Ruby; on the small-file path this almost never triggers because
|
|
162
|
+
* the slice fits in the socket buffer immediately. */
|
|
163
|
+
#define HYP_SMALL_EAGAIN_RETRIES 5
|
|
164
|
+
#define HYP_SMALL_EAGAIN_USEC_PER_RETRY 10000
|
|
165
|
+
|
|
166
|
+
static VALUE rb_mHyperion;
|
|
167
|
+
static VALUE rb_mHyperionHttp;
|
|
168
|
+
static VALUE rb_mHyperionHttpSendfile;
|
|
169
|
+
|
|
170
|
+
static ID id_fileno;
|
|
171
|
+
static ID id_to_io;
|
|
172
|
+
|
|
173
|
+
static VALUE sym_done;
|
|
174
|
+
static VALUE sym_partial;
|
|
175
|
+
static VALUE sym_eagain;
|
|
176
|
+
static VALUE sym_unsupported;
|
|
177
|
+
|
|
178
|
+
/* Extract a kernel fd from a Ruby IO-ish object.
|
|
179
|
+
*
|
|
180
|
+
* We accept:
|
|
181
|
+
* - an Integer (the caller already pulled fileno)
|
|
182
|
+
* - a real ::IO subclass (use rb_io_descriptor)
|
|
183
|
+
* - anything responding to #to_io (call it, then take its fd)
|
|
184
|
+
* - anything responding to #fileno (call it as last resort)
|
|
185
|
+
*
|
|
186
|
+
* Raises TypeError on anything else.
|
|
187
|
+
*/
|
|
188
|
+
static int extract_fd(VALUE obj, const char *role) {
|
|
189
|
+
if (RB_TYPE_P(obj, T_FIXNUM) || RB_TYPE_P(obj, T_BIGNUM)) {
|
|
190
|
+
return NUM2INT(obj);
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
if (RB_TYPE_P(obj, T_FILE)) {
|
|
194
|
+
return rb_io_descriptor(obj);
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
if (rb_respond_to(obj, id_to_io)) {
|
|
198
|
+
VALUE io = rb_funcall(obj, id_to_io, 0);
|
|
199
|
+
if (RB_TYPE_P(io, T_FILE)) {
|
|
200
|
+
return rb_io_descriptor(io);
|
|
201
|
+
}
|
|
202
|
+
if (RB_TYPE_P(io, T_FIXNUM) || RB_TYPE_P(io, T_BIGNUM)) {
|
|
203
|
+
return NUM2INT(io);
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
if (rb_respond_to(obj, id_fileno)) {
|
|
208
|
+
VALUE fd = rb_funcall(obj, id_fileno, 0);
|
|
209
|
+
if (RB_TYPE_P(fd, T_FIXNUM) || RB_TYPE_P(fd, T_BIGNUM)) {
|
|
210
|
+
return NUM2INT(fd);
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
rb_raise(rb_eTypeError,
|
|
215
|
+
"Hyperion::Http::Sendfile.copy: %s argument must be an IO, "
|
|
216
|
+
"an Integer fd, or respond to #to_io / #fileno",
|
|
217
|
+
role);
|
|
218
|
+
return -1; /* unreachable */
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
#if defined(HYP_SF_LINUX) || defined(HYP_SF_BSD)
|
|
222
|
+
|
|
223
|
+
/* Arguments shuttled into / out of the GVL-released kernel call. */
|
|
224
|
+
typedef struct {
|
|
225
|
+
int out_fd;
|
|
226
|
+
int in_fd;
|
|
227
|
+
off_t offset; /* in: requested offset; on Linux passed by reference
|
|
228
|
+
* to sendfile so the kernel updates it. */
|
|
229
|
+
size_t len;
|
|
230
|
+
ssize_t rc; /* out: kernel return value */
|
|
231
|
+
int err; /* out: errno from the kernel call */
|
|
232
|
+
} sendfile_args_t;
|
|
233
|
+
|
|
234
|
+
# ifdef HYP_SF_LINUX
|
|
235
|
+
static void *sendfile_blocking_call(void *raw) {
|
|
236
|
+
sendfile_args_t *a = (sendfile_args_t *)raw;
|
|
237
|
+
a->rc = sendfile(a->out_fd, a->in_fd, &a->offset, a->len);
|
|
238
|
+
a->err = (a->rc < 0) ? errno : 0;
|
|
239
|
+
return NULL;
|
|
240
|
+
}
|
|
241
|
+
# endif /* HYP_SF_LINUX */
|
|
242
|
+
|
|
243
|
+
# ifdef HYP_SF_BSD
|
|
244
|
+
static void *sendfile_blocking_call(void *raw) {
|
|
245
|
+
sendfile_args_t *a = (sendfile_args_t *)raw;
|
|
246
|
+
# if defined(__APPLE__)
|
|
247
|
+
/* Darwin: sendfile(int fd, int s, off_t offset, off_t *len, struct sf_hdtr*, int flags)
|
|
248
|
+
* On entry *len is bytes to send; on return *len is bytes actually sent.
|
|
249
|
+
*/
|
|
250
|
+
off_t io_len = (off_t)a->len;
|
|
251
|
+
int rc = sendfile(a->in_fd, a->out_fd, a->offset, &io_len, NULL, 0);
|
|
252
|
+
a->rc = (ssize_t)io_len; /* Darwin reports partial bytes via *len even on error */
|
|
253
|
+
a->err = (rc < 0) ? errno : 0;
|
|
254
|
+
# else
|
|
255
|
+
/* FreeBSD/Net/Dragon: sendfile(int fd, int s, off_t offset, size_t nbytes,
|
|
256
|
+
* struct sf_hdtr*, off_t *sbytes, int flags)
|
|
257
|
+
*/
|
|
258
|
+
off_t sent = 0;
|
|
259
|
+
int rc = sendfile(a->in_fd, a->out_fd, a->offset, a->len, NULL, &sent, 0);
|
|
260
|
+
a->rc = (ssize_t)sent;
|
|
261
|
+
a->err = (rc < 0) ? errno : 0;
|
|
262
|
+
# endif
|
|
263
|
+
return NULL;
|
|
264
|
+
}
|
|
265
|
+
# endif /* HYP_SF_BSD */
|
|
266
|
+
|
|
267
|
+
#endif /* HYP_SF_LINUX || HYP_SF_BSD */
|
|
268
|
+
|
|
269
|
+
/* ============================================================
|
|
270
|
+
* Phase 8a — small-file synchronous read+write fast path.
|
|
271
|
+
* ============================================================ */
|
|
272
|
+
|
|
273
|
+
typedef struct {
|
|
274
|
+
int in_fd;
|
|
275
|
+
int out_fd;
|
|
276
|
+
off_t offset;
|
|
277
|
+
size_t len;
|
|
278
|
+
char *buf; /* heap buffer, sized to len */
|
|
279
|
+
ssize_t total; /* out: bytes successfully written */
|
|
280
|
+
int err; /* out: errno on failure (0 on success) */
|
|
281
|
+
} small_copy_args_t;
|
|
282
|
+
|
|
283
|
+
/* Synchronous read+write loop. Runs under rb_thread_call_without_gvl —
|
|
284
|
+
* it never yields to the fiber scheduler. EAGAIN is handled inline via
|
|
285
|
+
* short select() polls (up to ~50 ms total). For files that fit in the
|
|
286
|
+
* socket send buffer (the 8 KB and 1 KB rows), no EAGAIN poll fires;
|
|
287
|
+
* the whole transfer completes in one or two syscalls. */
|
|
288
|
+
static void *small_copy_blocking(void *raw) {
|
|
289
|
+
small_copy_args_t *a = (small_copy_args_t *)raw;
|
|
290
|
+
a->total = 0;
|
|
291
|
+
a->err = 0;
|
|
292
|
+
|
|
293
|
+
/* Read the slice into our heap buffer. pread() lets us read from
|
|
294
|
+
* an absolute offset without having to seek the file fd, which
|
|
295
|
+
* matters because the same File handle may be used by other code
|
|
296
|
+
* paths (and seek+read isn't atomic w.r.t. concurrent fibers). */
|
|
297
|
+
size_t read_total = 0;
|
|
298
|
+
while (read_total < a->len) {
|
|
299
|
+
ssize_t r = pread(a->in_fd, a->buf + read_total,
|
|
300
|
+
a->len - read_total, a->offset + (off_t)read_total);
|
|
301
|
+
if (r > 0) {
|
|
302
|
+
read_total += (size_t)r;
|
|
303
|
+
continue;
|
|
304
|
+
}
|
|
305
|
+
if (r == 0) {
|
|
306
|
+
/* Short file (caller asked for more bytes than the file
|
|
307
|
+
* holds). Truncate len to what we got and proceed. */
|
|
308
|
+
a->len = read_total;
|
|
309
|
+
break;
|
|
310
|
+
}
|
|
311
|
+
if (errno == EINTR) {
|
|
312
|
+
continue;
|
|
313
|
+
}
|
|
314
|
+
a->err = errno;
|
|
315
|
+
return NULL;
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
/* Write the buffer to the socket. Loop on short writes. EAGAIN is
|
|
319
|
+
* handled with a bounded select() poll instead of a fiber yield —
|
|
320
|
+
* for an 8 KB file the kernel send buffer almost always has space
|
|
321
|
+
* and this loop runs once. */
|
|
322
|
+
size_t write_total = 0;
|
|
323
|
+
int eagain_retries = HYP_SMALL_EAGAIN_RETRIES;
|
|
324
|
+
while (write_total < a->len) {
|
|
325
|
+
ssize_t w = write(a->out_fd, a->buf + write_total,
|
|
326
|
+
a->len - write_total);
|
|
327
|
+
if (w > 0) {
|
|
328
|
+
write_total += (size_t)w;
|
|
329
|
+
continue;
|
|
330
|
+
}
|
|
331
|
+
if (w < 0 && errno == EINTR) {
|
|
332
|
+
continue;
|
|
333
|
+
}
|
|
334
|
+
if (w < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
|
|
335
|
+
if (eagain_retries-- <= 0) {
|
|
336
|
+
a->err = EAGAIN;
|
|
337
|
+
break;
|
|
338
|
+
}
|
|
339
|
+
fd_set wfds;
|
|
340
|
+
FD_ZERO(&wfds);
|
|
341
|
+
FD_SET(a->out_fd, &wfds);
|
|
342
|
+
struct timeval tv;
|
|
343
|
+
tv.tv_sec = 0;
|
|
344
|
+
tv.tv_usec = HYP_SMALL_EAGAIN_USEC_PER_RETRY;
|
|
345
|
+
(void)select(a->out_fd + 1, NULL, &wfds, NULL, &tv);
|
|
346
|
+
continue;
|
|
347
|
+
}
|
|
348
|
+
if (w < 0) {
|
|
349
|
+
a->err = errno;
|
|
350
|
+
break;
|
|
351
|
+
}
|
|
352
|
+
/* w == 0: should not happen on a regular socket; treat as
|
|
353
|
+
* short-write retry once, then fail. */
|
|
354
|
+
a->err = EIO;
|
|
355
|
+
break;
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
a->total = (ssize_t)write_total;
|
|
359
|
+
return NULL;
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
/* Sendfile.copy_small(out_io, in_io, offset, len) -> Integer */
|
|
363
|
+
static VALUE rb_sendfile_copy_small(VALUE self, VALUE out_io, VALUE in_io,
|
|
364
|
+
VALUE rb_offset, VALUE rb_len) {
|
|
365
|
+
(void)self;
|
|
366
|
+
|
|
367
|
+
long offset_l = NUM2LONG(rb_offset);
|
|
368
|
+
long len_l = NUM2LONG(rb_len);
|
|
369
|
+
if (offset_l < 0) {
|
|
370
|
+
rb_raise(rb_eArgError, "offset must be >= 0 (got %ld)", offset_l);
|
|
371
|
+
}
|
|
372
|
+
if (len_l < 0) {
|
|
373
|
+
rb_raise(rb_eArgError, "len must be >= 0 (got %ld)", len_l);
|
|
374
|
+
}
|
|
375
|
+
if (len_l == 0) {
|
|
376
|
+
return INT2FIX(0);
|
|
377
|
+
}
|
|
378
|
+
if (len_l > HYP_SMALL_FILE_THRESHOLD) {
|
|
379
|
+
rb_raise(rb_eArgError,
|
|
380
|
+
"Hyperion::Http::Sendfile.copy_small: len %ld exceeds "
|
|
381
|
+
"SMALL_FILE_THRESHOLD %d; use copy() for streaming",
|
|
382
|
+
len_l, HYP_SMALL_FILE_THRESHOLD);
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
small_copy_args_t args;
|
|
386
|
+
args.out_fd = extract_fd(out_io, "out_io");
|
|
387
|
+
args.in_fd = extract_fd(in_io, "in_io");
|
|
388
|
+
args.offset = (off_t)offset_l;
|
|
389
|
+
args.len = (size_t)len_l;
|
|
390
|
+
|
|
391
|
+
/* Heap-allocate a buffer of exactly the requested size. Bounded by
|
|
392
|
+
* 64 KiB, so this is a one-shot small alloc. We could pull from a
|
|
393
|
+
* per-thread arena to avoid malloc, but the bench shape (one alloc
|
|
394
|
+
* per request, freed before the next) is well within glibc's
|
|
395
|
+
* thread-local cache hot path. */
|
|
396
|
+
args.buf = (char *)malloc(args.len);
|
|
397
|
+
if (args.buf == NULL) {
|
|
398
|
+
rb_raise(rb_eNoMemError, "Hyperion::Http::Sendfile.copy_small: "
|
|
399
|
+
"failed to allocate %lu bytes",
|
|
400
|
+
(unsigned long)args.len);
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
rb_thread_call_without_gvl(small_copy_blocking, &args, RUBY_UBF_IO, NULL);
|
|
404
|
+
|
|
405
|
+
free(args.buf);
|
|
406
|
+
|
|
407
|
+
if (args.err != 0 && args.total == 0) {
|
|
408
|
+
errno = args.err;
|
|
409
|
+
rb_sys_fail("Hyperion::Http::Sendfile.copy_small");
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
/* Partial transfer (e.g. EAGAIN budget exhausted). Surface what we
|
|
413
|
+
* got; the caller can re-issue from cursor + total. The 8 KB row
|
|
414
|
+
* doesn't hit this in practice but we're defensive about it. */
|
|
415
|
+
return LONG2NUM((long)args.total);
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
/* ============================================================
|
|
419
|
+
* Phase 8b / 2.2.0 — Linux splice(2) through a fresh per-request pipe.
|
|
420
|
+
* ============================================================
|
|
421
|
+
*
|
|
422
|
+
* 2.0.1 originally cached one pipe pair per OS thread in pthread TLS.
|
|
423
|
+
* That layout leaked residual bytes between requests on EPIPE: if
|
|
424
|
+
* splice(file -> pipe) succeeded but splice(pipe -> sock) failed
|
|
425
|
+
* mid-transfer (peer closed), the unread bytes stayed in the pipe
|
|
426
|
+
* and were sent on the NEXT connection's socket. The 2.0.1 release
|
|
427
|
+
* disabled the splice path entirely from copy_to_socket and routed
|
|
428
|
+
* production traffic back through plain sendfile.
|
|
429
|
+
*
|
|
430
|
+
* 2.2.0 fix — fresh pipe pair per call. pipe2(O_CLOEXEC) at entry,
|
|
431
|
+
* close both fds on every exit path (success, EAGAIN, error, EOF).
|
|
432
|
+
* Two extra syscalls per call, but the splice copies remain
|
|
433
|
+
* kernel-side zero-copy (file -> pipe -> socket, page cache bytes
|
|
434
|
+
* never enter userspace) and the correctness window is gone: a pipe
|
|
435
|
+
* pair only ever carries bytes for one transfer. No persistent
|
|
436
|
+
* state, no fd leak across thousands of requests, no cross-connection
|
|
437
|
+
* byte leak. */
|
|
438
|
+
|
|
439
|
+
#ifdef HYP_SF_LINUX
|
|
440
|
+
|
|
441
|
+
typedef struct {
|
|
442
|
+
int in_fd;
|
|
443
|
+
int out_fd;
|
|
444
|
+
int pipe_r;
|
|
445
|
+
int pipe_w;
|
|
446
|
+
off_t offset;
|
|
447
|
+
size_t len;
|
|
448
|
+
ssize_t rc; /* bytes spliced to socket this call */
|
|
449
|
+
int err;
|
|
450
|
+
} splice_args_t;
|
|
451
|
+
|
|
452
|
+
# ifndef SPLICE_F_MOVE
|
|
453
|
+
# define SPLICE_F_MOVE 1
|
|
454
|
+
# endif
|
|
455
|
+
# ifndef SPLICE_F_MORE
|
|
456
|
+
# define SPLICE_F_MORE 4
|
|
457
|
+
# endif
|
|
458
|
+
# ifndef SPLICE_F_NONBLOCK
|
|
459
|
+
# define SPLICE_F_NONBLOCK 2
|
|
460
|
+
# endif
|
|
461
|
+
|
|
462
|
+
/* Open a fresh pipe pair for a single splice call. Returns 0 on
|
|
463
|
+
* success and writes the [read, write] fds into out_fds; returns
|
|
464
|
+
* -errno on failure (caller surfaces :unsupported / :eagain to
|
|
465
|
+
* Ruby). Always pairs with hyp_close_pipe_pair on every exit
|
|
466
|
+
* path. */
|
|
467
|
+
static int hyp_open_pipe_pair(int out_fds[2]) {
|
|
468
|
+
out_fds[0] = out_fds[1] = -1;
|
|
469
|
+
|
|
470
|
+
int rc;
|
|
471
|
+
# ifdef O_CLOEXEC
|
|
472
|
+
rc = pipe2(out_fds, O_CLOEXEC | O_NONBLOCK);
|
|
473
|
+
if (rc != 0 && errno == ENOSYS) {
|
|
474
|
+
rc = pipe(out_fds);
|
|
475
|
+
if (rc == 0) {
|
|
476
|
+
fcntl(out_fds[0], F_SETFD, FD_CLOEXEC);
|
|
477
|
+
fcntl(out_fds[1], F_SETFD, FD_CLOEXEC);
|
|
478
|
+
int fl0 = fcntl(out_fds[0], F_GETFL);
|
|
479
|
+
int fl1 = fcntl(out_fds[1], F_GETFL);
|
|
480
|
+
if (fl0 >= 0) fcntl(out_fds[0], F_SETFL, fl0 | O_NONBLOCK);
|
|
481
|
+
if (fl1 >= 0) fcntl(out_fds[1], F_SETFL, fl1 | O_NONBLOCK);
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
# else
|
|
485
|
+
rc = pipe(out_fds);
|
|
486
|
+
# endif
|
|
487
|
+
if (rc != 0) {
|
|
488
|
+
return -errno;
|
|
489
|
+
}
|
|
490
|
+
# ifdef HYP_HAVE_F_SETPIPE_SZ
|
|
491
|
+
/* Best-effort: ask the kernel to size this pipe at 1 MiB so the
|
|
492
|
+
* splice loop can move a 1 MiB file in a small number of
|
|
493
|
+
* round-trips. Cap at /proc/sys/fs/pipe-max-size; we ignore
|
|
494
|
+
* failure and iterate more often on a smaller pipe. */
|
|
495
|
+
(void)fcntl(out_fds[1], F_SETPIPE_SZ, 1024 * 1024);
|
|
496
|
+
# endif
|
|
497
|
+
return 0;
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
static void hyp_close_pipe_pair(int fds[2]) {
|
|
501
|
+
if (fds[0] >= 0) {
|
|
502
|
+
close(fds[0]);
|
|
503
|
+
fds[0] = -1;
|
|
504
|
+
}
|
|
505
|
+
if (fds[1] >= 0) {
|
|
506
|
+
close(fds[1]);
|
|
507
|
+
fds[1] = -1;
|
|
508
|
+
}
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
static void *splice_blocking_call(void *raw) {
|
|
512
|
+
splice_args_t *a = (splice_args_t *)raw;
|
|
513
|
+
a->rc = 0;
|
|
514
|
+
a->err = 0;
|
|
515
|
+
|
|
516
|
+
/* Step 1: file -> pipe (kernel page cache to pipe buffer). */
|
|
517
|
+
ssize_t in_n = splice(a->in_fd, &a->offset, a->pipe_w, NULL,
|
|
518
|
+
a->len, SPLICE_F_MOVE | SPLICE_F_MORE);
|
|
519
|
+
if (in_n < 0) {
|
|
520
|
+
a->err = errno;
|
|
521
|
+
return NULL;
|
|
522
|
+
}
|
|
523
|
+
if (in_n == 0) {
|
|
524
|
+
/* Source EOF before any bytes moved. */
|
|
525
|
+
return NULL;
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
/* Step 2: pipe -> socket. May short-write; loop until the pipe
|
|
529
|
+
* is fully drained or the socket signals EAGAIN/error. We
|
|
530
|
+
* surface the count of bytes actually delivered to the socket
|
|
531
|
+
* (`written`), NOT the count we read from the file (`in_n`).
|
|
532
|
+
* Any (in_n - written) bytes still queued in the pipe will be
|
|
533
|
+
* dropped when the caller closes the pipe pair on its way out.
|
|
534
|
+
* This is safe because the file offset we passed in by pointer
|
|
535
|
+
* is local (a->offset) and the Ruby caller tracks its own
|
|
536
|
+
* absolute cursor — on retry it passes a fresh offset of
|
|
537
|
+
* old_cursor + written, so the file is re-read from the right
|
|
538
|
+
* place and no bytes are duplicated or skipped on the wire. */
|
|
539
|
+
ssize_t written = 0;
|
|
540
|
+
while (written < in_n) {
|
|
541
|
+
ssize_t out_n = splice(a->pipe_r, NULL, a->out_fd, NULL,
|
|
542
|
+
(size_t)(in_n - written),
|
|
543
|
+
SPLICE_F_MOVE | SPLICE_F_MORE);
|
|
544
|
+
if (out_n > 0) {
|
|
545
|
+
written += out_n;
|
|
546
|
+
continue;
|
|
547
|
+
}
|
|
548
|
+
if (out_n < 0 && errno == EINTR) continue;
|
|
549
|
+
if (out_n < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
|
|
550
|
+
/* Socket buffer full. Surface what we got; pipe will
|
|
551
|
+
* be closed by the caller (drops the in_n-written bytes
|
|
552
|
+
* still queued in it — caller's offset arithmetic
|
|
553
|
+
* compensates). */
|
|
554
|
+
a->err = EAGAIN;
|
|
555
|
+
break;
|
|
556
|
+
}
|
|
557
|
+
if (out_n < 0) {
|
|
558
|
+
a->err = errno;
|
|
559
|
+
break;
|
|
560
|
+
}
|
|
561
|
+
/* out_n == 0: peer side gone. */
|
|
562
|
+
a->err = EPIPE;
|
|
563
|
+
break;
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
a->rc = written;
|
|
567
|
+
return NULL;
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
#endif /* HYP_SF_LINUX */
|
|
571
|
+
|
|
572
|
+
/* Sendfile.copy(out_io, in_io, offset, len) */
|
|
573
|
+
static VALUE rb_sendfile_copy(VALUE self, VALUE out_io, VALUE in_io,
|
|
574
|
+
VALUE rb_offset, VALUE rb_len) {
|
|
575
|
+
(void)self;
|
|
576
|
+
|
|
577
|
+
#if defined(HYP_SF_LINUX) || defined(HYP_SF_BSD)
|
|
578
|
+
long offset_l = NUM2LONG(rb_offset);
|
|
579
|
+
long len_l = NUM2LONG(rb_len);
|
|
580
|
+
if (offset_l < 0) {
|
|
581
|
+
rb_raise(rb_eArgError, "offset must be >= 0 (got %ld)", offset_l);
|
|
582
|
+
}
|
|
583
|
+
if (len_l < 0) {
|
|
584
|
+
rb_raise(rb_eArgError, "len must be >= 0 (got %ld)", len_l);
|
|
585
|
+
}
|
|
586
|
+
if (len_l == 0) {
|
|
587
|
+
return rb_ary_new3(2, INT2FIX(0), sym_done);
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
sendfile_args_t args;
|
|
591
|
+
args.out_fd = extract_fd(out_io, "out_io");
|
|
592
|
+
args.in_fd = extract_fd(in_io, "in_io");
|
|
593
|
+
args.offset = (off_t)offset_l;
|
|
594
|
+
args.len = (size_t)len_l;
|
|
595
|
+
args.rc = -1;
|
|
596
|
+
args.err = 0;
|
|
597
|
+
|
|
598
|
+
rb_thread_call_without_gvl(sendfile_blocking_call, &args, RUBY_UBF_IO, NULL);
|
|
599
|
+
|
|
600
|
+
if (args.rc < 0) {
|
|
601
|
+
if (args.err == EAGAIN || args.err == EWOULDBLOCK || args.err == EINTR) {
|
|
602
|
+
/* Kernel didn't accept any bytes; caller yields and retries. */
|
|
603
|
+
return rb_ary_new3(2, INT2FIX(0), sym_eagain);
|
|
604
|
+
}
|
|
605
|
+
if (args.err == ENOSYS || args.err == EINVAL || args.err == ENOTSUP
|
|
606
|
+
# ifdef EOPNOTSUPP
|
|
607
|
+
|| args.err == EOPNOTSUPP
|
|
608
|
+
# endif
|
|
609
|
+
) {
|
|
610
|
+
/* Kernel says "this combination of fds doesn't support sendfile"
|
|
611
|
+
* (e.g. socket on a tunfs that doesn't expose page cache, or
|
|
612
|
+
* Darwin trying to sendfile to a non-stream socket). Caller
|
|
613
|
+
* falls back to IO.copy_stream. */
|
|
614
|
+
return rb_ary_new3(2, INT2FIX(0), sym_unsupported);
|
|
615
|
+
}
|
|
616
|
+
# ifdef HYP_SF_BSD
|
|
617
|
+
/* On Darwin/BSD a partial transfer can also report errno; if any
|
|
618
|
+
* bytes flew, surface them with :partial so the caller can advance
|
|
619
|
+
* its cursor before re-erroring on the next iteration. */
|
|
620
|
+
if (args.rc > 0) {
|
|
621
|
+
return rb_ary_new3(2, LONG2NUM((long)args.rc),
|
|
622
|
+
sym_partial);
|
|
623
|
+
}
|
|
624
|
+
# endif
|
|
625
|
+
errno = args.err;
|
|
626
|
+
rb_sys_fail("sendfile");
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
if (args.rc == 0) {
|
|
630
|
+
/* Kernel accepted nothing AND didn't error. Treat as :eagain so
|
|
631
|
+
* the caller yields rather than spinning. (Some kernels behave
|
|
632
|
+
* this way under tight non-blocking pressure.) */
|
|
633
|
+
return rb_ary_new3(2, INT2FIX(0), sym_eagain);
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
if ((size_t)args.rc < args.len) {
|
|
637
|
+
return rb_ary_new3(2, LONG2NUM((long)args.rc), sym_partial);
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
return rb_ary_new3(2, LONG2NUM((long)args.rc), sym_done);
|
|
641
|
+
|
|
642
|
+
#else /* !Linux && !BSD */
|
|
643
|
+
(void)out_io; (void)in_io; (void)rb_offset; (void)rb_len;
|
|
644
|
+
rb_raise(rb_eNotImpError,
|
|
645
|
+
"Hyperion::Http::Sendfile.copy: native zero-copy unsupported on "
|
|
646
|
+
"this platform; fall back to IO.copy_stream");
|
|
647
|
+
return Qnil; /* unreachable */
|
|
648
|
+
#endif
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
/* Sendfile.copy_splice(out_io, in_io, offset, len) -> [bytes_written, status]
|
|
652
|
+
* Linux-only. 2.2.0 layout: opens a fresh pipe pair via
|
|
653
|
+
* pipe2(O_CLOEXEC | O_NONBLOCK) on every call and closes it on every
|
|
654
|
+
* exit path. No persistent state, no cross-request byte leak.
|
|
655
|
+
* Returns :unsupported on non-Linux hosts so the Ruby caller can fall
|
|
656
|
+
* back to copy(). */
|
|
657
|
+
static VALUE rb_sendfile_copy_splice(VALUE self, VALUE out_io, VALUE in_io,
|
|
658
|
+
VALUE rb_offset, VALUE rb_len) {
|
|
659
|
+
(void)self;
|
|
660
|
+
|
|
661
|
+
#ifdef HYP_SF_LINUX
|
|
662
|
+
long offset_l = NUM2LONG(rb_offset);
|
|
663
|
+
long len_l = NUM2LONG(rb_len);
|
|
664
|
+
if (offset_l < 0) {
|
|
665
|
+
rb_raise(rb_eArgError, "offset must be >= 0 (got %ld)", offset_l);
|
|
666
|
+
}
|
|
667
|
+
if (len_l < 0) {
|
|
668
|
+
rb_raise(rb_eArgError, "len must be >= 0 (got %ld)", len_l);
|
|
669
|
+
}
|
|
670
|
+
if (len_l == 0) {
|
|
671
|
+
return rb_ary_new3(2, INT2FIX(0), sym_done);
|
|
672
|
+
}
|
|
673
|
+
|
|
674
|
+
/* Fresh pipe pair for THIS call only. Opened here, closed on
|
|
675
|
+
* every exit path below. pipe2 is one syscall; the close pair
|
|
676
|
+
* is two more. The 3-syscall overhead is amortized against the
|
|
677
|
+
* splice copies (which stay zero-copy across file -> pipe ->
|
|
678
|
+
* socket) for files >= 64 KiB; the Ruby caller gates on size. */
|
|
679
|
+
int pipe_fds[2];
|
|
680
|
+
int prc = hyp_open_pipe_pair(pipe_fds);
|
|
681
|
+
if (prc != 0) {
|
|
682
|
+
/* pipe2 / pipe failed. ENOSYS / EMFILE / ENFILE — all map
|
|
683
|
+
* to "splice path can't run right now"; let the caller fall
|
|
684
|
+
* back to plain sendfile. */
|
|
685
|
+
return rb_ary_new3(2, INT2FIX(0), sym_unsupported);
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
splice_args_t args;
|
|
689
|
+
args.in_fd = extract_fd(in_io, "in_io");
|
|
690
|
+
args.out_fd = extract_fd(out_io, "out_io");
|
|
691
|
+
args.pipe_r = pipe_fds[0];
|
|
692
|
+
args.pipe_w = pipe_fds[1];
|
|
693
|
+
args.offset = (off_t)offset_l;
|
|
694
|
+
args.len = (size_t)len_l;
|
|
695
|
+
args.rc = 0;
|
|
696
|
+
args.err = 0;
|
|
697
|
+
|
|
698
|
+
rb_thread_call_without_gvl(splice_blocking_call, &args, RUBY_UBF_IO, NULL);
|
|
699
|
+
|
|
700
|
+
/* Close the pipe pair before we either return a value or
|
|
701
|
+
* raise. This is the whole point of the 2.2.0 fix: the pipe
|
|
702
|
+
* never outlives this call, so residual bytes from a partial
|
|
703
|
+
* transfer cannot leak onto the next request's socket. */
|
|
704
|
+
hyp_close_pipe_pair(pipe_fds);
|
|
705
|
+
|
|
706
|
+
if (args.rc > 0) {
|
|
707
|
+
if (args.err == EAGAIN || args.err == EWOULDBLOCK) {
|
|
708
|
+
return rb_ary_new3(2, LONG2NUM((long)args.rc), sym_partial);
|
|
709
|
+
}
|
|
710
|
+
if (args.err != 0) {
|
|
711
|
+
errno = args.err;
|
|
712
|
+
rb_sys_fail("splice");
|
|
713
|
+
}
|
|
714
|
+
if ((size_t)args.rc < args.len) {
|
|
715
|
+
return rb_ary_new3(2, LONG2NUM((long)args.rc), sym_partial);
|
|
716
|
+
}
|
|
717
|
+
return rb_ary_new3(2, LONG2NUM((long)args.rc), sym_done);
|
|
718
|
+
}
|
|
719
|
+
|
|
720
|
+
/* args.rc == 0. */
|
|
721
|
+
if (args.err == EAGAIN || args.err == EWOULDBLOCK || args.err == EINTR) {
|
|
722
|
+
return rb_ary_new3(2, INT2FIX(0), sym_eagain);
|
|
723
|
+
}
|
|
724
|
+
if (args.err == ENOSYS || args.err == EINVAL) {
|
|
725
|
+
return rb_ary_new3(2, INT2FIX(0), sym_unsupported);
|
|
726
|
+
}
|
|
727
|
+
if (args.err != 0) {
|
|
728
|
+
errno = args.err;
|
|
729
|
+
rb_sys_fail("splice");
|
|
730
|
+
}
|
|
731
|
+
return rb_ary_new3(2, INT2FIX(0), sym_done);
|
|
732
|
+
#else
|
|
733
|
+
(void)out_io; (void)in_io; (void)rb_offset; (void)rb_len;
|
|
734
|
+
return rb_ary_new3(2, INT2FIX(0), sym_unsupported);
|
|
735
|
+
#endif
|
|
736
|
+
}
|
|
737
|
+
|
|
738
|
+
/* Sendfile.copy_splice_into_pipe(out_io, in_io, offset, len, pipe_r, pipe_w)
|
|
739
|
+
* -> [bytes_written, status]
|
|
740
|
+
*
|
|
741
|
+
* 2.2.x fix-A — pipe-hoisted splice primitive.
|
|
742
|
+
*
|
|
743
|
+
* Splices file_fd → pipe_w → sock_fd for ONE chunk of a response. The
|
|
744
|
+
* pipe pair is supplied by the caller and is reused across every chunk
|
|
745
|
+
* of a single response; this function does NOT open or close the pipe.
|
|
746
|
+
* The Ruby façade (`native_copy_loop`) is responsible for the
|
|
747
|
+
* pipe lifecycle (`open_splice_pipe!` at entry, `close` in an ensure
|
|
748
|
+
* block at exit). Same return shape as `copy_splice` — :done /
|
|
749
|
+
* :partial / :eagain / :unsupported.
|
|
750
|
+
*
|
|
751
|
+
* Linux-only. Returns [0, :unsupported] on non-Linux hosts so the
|
|
752
|
+
* Ruby caller can fall back to plain sendfile. pipe_r / pipe_w may
|
|
753
|
+
* be Integer fds or IO objects (`IO.pipe` returns the latter); we
|
|
754
|
+
* extract via the same helper used for in_io/out_io. */
|
|
755
|
+
static VALUE rb_sendfile_copy_splice_into_pipe(VALUE self, VALUE out_io, VALUE in_io,
|
|
756
|
+
VALUE rb_offset, VALUE rb_len,
|
|
757
|
+
VALUE rb_pipe_r, VALUE rb_pipe_w) {
|
|
758
|
+
(void)self;
|
|
759
|
+
|
|
760
|
+
#ifdef HYP_SF_LINUX
|
|
761
|
+
long offset_l = NUM2LONG(rb_offset);
|
|
762
|
+
long len_l = NUM2LONG(rb_len);
|
|
763
|
+
if (offset_l < 0) {
|
|
764
|
+
rb_raise(rb_eArgError, "offset must be >= 0 (got %ld)", offset_l);
|
|
765
|
+
}
|
|
766
|
+
if (len_l < 0) {
|
|
767
|
+
rb_raise(rb_eArgError, "len must be >= 0 (got %ld)", len_l);
|
|
768
|
+
}
|
|
769
|
+
if (len_l == 0) {
|
|
770
|
+
return rb_ary_new3(2, INT2FIX(0), sym_done);
|
|
771
|
+
}
|
|
772
|
+
|
|
773
|
+
splice_args_t args;
|
|
774
|
+
args.in_fd = extract_fd(in_io, "in_io");
|
|
775
|
+
args.out_fd = extract_fd(out_io, "out_io");
|
|
776
|
+
args.pipe_r = extract_fd(rb_pipe_r, "pipe_r");
|
|
777
|
+
args.pipe_w = extract_fd(rb_pipe_w, "pipe_w");
|
|
778
|
+
args.offset = (off_t)offset_l;
|
|
779
|
+
args.len = (size_t)len_l;
|
|
780
|
+
args.rc = 0;
|
|
781
|
+
args.err = 0;
|
|
782
|
+
|
|
783
|
+
rb_thread_call_without_gvl(splice_blocking_call, &args, RUBY_UBF_IO, NULL);
|
|
784
|
+
|
|
785
|
+
if (args.rc > 0) {
|
|
786
|
+
if (args.err == EAGAIN || args.err == EWOULDBLOCK) {
|
|
787
|
+
return rb_ary_new3(2, LONG2NUM((long)args.rc), sym_partial);
|
|
788
|
+
}
|
|
789
|
+
if (args.err != 0) {
|
|
790
|
+
errno = args.err;
|
|
791
|
+
rb_sys_fail("splice");
|
|
792
|
+
}
|
|
793
|
+
if ((size_t)args.rc < args.len) {
|
|
794
|
+
return rb_ary_new3(2, LONG2NUM((long)args.rc), sym_partial);
|
|
795
|
+
}
|
|
796
|
+
return rb_ary_new3(2, LONG2NUM((long)args.rc), sym_done);
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
/* args.rc == 0. */
|
|
800
|
+
if (args.err == EAGAIN || args.err == EWOULDBLOCK || args.err == EINTR) {
|
|
801
|
+
return rb_ary_new3(2, INT2FIX(0), sym_eagain);
|
|
802
|
+
}
|
|
803
|
+
if (args.err == ENOSYS || args.err == EINVAL) {
|
|
804
|
+
return rb_ary_new3(2, INT2FIX(0), sym_unsupported);
|
|
805
|
+
}
|
|
806
|
+
if (args.err != 0) {
|
|
807
|
+
errno = args.err;
|
|
808
|
+
rb_sys_fail("splice");
|
|
809
|
+
}
|
|
810
|
+
return rb_ary_new3(2, INT2FIX(0), sym_done);
|
|
811
|
+
#else
|
|
812
|
+
(void)out_io; (void)in_io; (void)rb_offset; (void)rb_len;
|
|
813
|
+
(void)rb_pipe_r; (void)rb_pipe_w;
|
|
814
|
+
return rb_ary_new3(2, INT2FIX(0), sym_unsupported);
|
|
815
|
+
#endif
|
|
816
|
+
}
|
|
817
|
+
|
|
818
|
+
/* ============================================================
|
|
819
|
+
* 2.7-F — posix_fadvise(SEQUENTIAL) hoisted once per response.
|
|
820
|
+
* ============================================================
|
|
821
|
+
*
|
|
822
|
+
* 2.6-B added posix_fadvise(fd, 0, len, POSIX_FADV_SEQUENTIAL) PER
|
|
823
|
+
* CHUNK in the body of rb_sendfile_copy / rb_sendfile_copy_splice /
|
|
824
|
+
* rb_sendfile_copy_splice_into_pipe. After 2.6-A's chunk-size bump
|
|
825
|
+
* to 256 KiB, that meant 4 fadvise64 syscalls per 1 MiB response —
|
|
826
|
+
* 4 wasted syscalls on warm-cache, where the page cache already
|
|
827
|
+
* holds the data and the hint is a no-op. Maintainer's bench
|
|
828
|
+
* rerun on openclaw-vm measured -6.6% warm-cache (1,289 → 1,204
|
|
829
|
+
* r/s); 2.6-B was reverted (commit 4cd8009).
|
|
830
|
+
*
|
|
831
|
+
* 2.7-F retries the kernel hint with the right architecture: a
|
|
832
|
+
* standalone primitive that the Ruby loop entry calls ONCE per
|
|
833
|
+
* response, BEFORE the chunk loop starts. One syscall per response
|
|
834
|
+
* is well within warm-cache noise (≤1%); the cold-cache pre-read
|
|
835
|
+
* benefit (kernel reads pages into page cache before sendfile chunks
|
|
836
|
+
* hit them) is preserved. The threshold gate ("don't fadvise tiny
|
|
837
|
+
* files") lives in the Ruby caller — the C primitive is unconditional
|
|
838
|
+
* once invoked.
|
|
839
|
+
*
|
|
840
|
+
* Linux only. Non-Linux builds compile this as a no-op that returns
|
|
841
|
+
* :noop; the Ruby caller checks `respond_to?(:fadvise_sequential)`
|
|
842
|
+
* before calling, so non-Linux callers don't even hit the no-op path.
|
|
843
|
+
*/
|
|
844
|
+
static VALUE sym_ok;
|
|
845
|
+
static VALUE sym_noop;
|
|
846
|
+
static VALUE sym_error;
|
|
847
|
+
|
|
848
|
+
static VALUE rb_sendfile_fadvise_sequential(VALUE self, VALUE file_io, VALUE rb_len) {
|
|
849
|
+
(void)self;
|
|
850
|
+
|
|
851
|
+
#ifdef HYP_SF_LINUX
|
|
852
|
+
long len_l = NUM2LONG(rb_len);
|
|
853
|
+
if (len_l <= 0) {
|
|
854
|
+
/* Nothing to advise on; the Ruby caller normally gates on a
|
|
855
|
+
* threshold but defend against zero/negative anyway. */
|
|
856
|
+
return sym_noop;
|
|
857
|
+
}
|
|
858
|
+
|
|
859
|
+
int fd = extract_fd(file_io, "file_io");
|
|
860
|
+
|
|
861
|
+
/* posix_fadvise on Linux returns 0 on success or a positive errno
|
|
862
|
+
* on failure (does NOT set the global errno). Treat any non-zero
|
|
863
|
+
* return as :error — the caller ignores the result either way,
|
|
864
|
+
* the hint is informational. */
|
|
865
|
+
int rc = posix_fadvise(fd, 0, (off_t)len_l, POSIX_FADV_SEQUENTIAL);
|
|
866
|
+
if (rc != 0) {
|
|
867
|
+
return sym_error;
|
|
868
|
+
}
|
|
869
|
+
return sym_ok;
|
|
870
|
+
#else
|
|
871
|
+
(void)file_io; (void)rb_len;
|
|
872
|
+
return sym_noop;
|
|
873
|
+
#endif
|
|
874
|
+
}
|
|
875
|
+
|
|
876
|
+
/* Sendfile.supported? — module-introspection helper. Lets the Ruby caller
|
|
877
|
+
* pick its branch without needing a rescue NotImplementedError around the
|
|
878
|
+
* first call (which would burn an exception object on every static
|
|
879
|
+
* response on unsupported hosts). */
|
|
880
|
+
static VALUE rb_sendfile_supported_p(VALUE self) {
|
|
881
|
+
(void)self;
|
|
882
|
+
#if defined(HYP_SF_LINUX) || defined(HYP_SF_BSD)
|
|
883
|
+
return Qtrue;
|
|
884
|
+
#else
|
|
885
|
+
return Qfalse;
|
|
886
|
+
#endif
|
|
887
|
+
}
|
|
888
|
+
|
|
889
|
+
/* Sendfile.splice_supported? — true on Linux builds where the splice
|
|
890
|
+
* branch was compiled in. The runtime kernel may still reject splice
|
|
891
|
+
* (very old kernels return ENOSYS), in which case copy_splice surfaces
|
|
892
|
+
* :unsupported and the Ruby caller falls back to copy(). */
|
|
893
|
+
static VALUE rb_sendfile_splice_supported_p(VALUE self) {
|
|
894
|
+
(void)self;
|
|
895
|
+
#ifdef HYP_SF_LINUX
|
|
896
|
+
return Qtrue;
|
|
897
|
+
#else
|
|
898
|
+
return Qfalse;
|
|
899
|
+
#endif
|
|
900
|
+
}
|
|
901
|
+
|
|
902
|
+
/* Sendfile.small_file_threshold — exposes the C constant to Ruby. */
|
|
903
|
+
static VALUE rb_sendfile_small_threshold(VALUE self) {
|
|
904
|
+
(void)self;
|
|
905
|
+
return INT2NUM(HYP_SMALL_FILE_THRESHOLD);
|
|
906
|
+
}
|
|
907
|
+
|
|
908
|
+
/* Sendfile.platform_tag — returns a small Symbol describing which kernel
|
|
909
|
+
* path got compiled in. Used by specs and the bench reporter. */
|
|
910
|
+
static VALUE rb_sendfile_platform_tag(VALUE self) {
|
|
911
|
+
(void)self;
|
|
912
|
+
#if defined(HYP_SF_LINUX)
|
|
913
|
+
return ID2SYM(rb_intern("linux"));
|
|
914
|
+
#elif defined(HYP_SF_BSD)
|
|
915
|
+
# if defined(__APPLE__)
|
|
916
|
+
return ID2SYM(rb_intern("darwin"));
|
|
917
|
+
# else
|
|
918
|
+
return ID2SYM(rb_intern("bsd"));
|
|
919
|
+
# endif
|
|
920
|
+
#else
|
|
921
|
+
return ID2SYM(rb_intern("unsupported"));
|
|
922
|
+
#endif
|
|
923
|
+
}
|
|
924
|
+
|
|
925
|
+
void Init_hyperion_sendfile(void) {
|
|
926
|
+
rb_mHyperion = rb_const_get(rb_cObject, rb_intern("Hyperion"));
|
|
927
|
+
|
|
928
|
+
/* Hyperion::Http — created lazily; ResponseWriter doesn't need it
|
|
929
|
+
* to exist before the C ext loads, so we tolerate either order. */
|
|
930
|
+
if (rb_const_defined(rb_mHyperion, rb_intern("Http"))) {
|
|
931
|
+
rb_mHyperionHttp = rb_const_get(rb_mHyperion, rb_intern("Http"));
|
|
932
|
+
} else {
|
|
933
|
+
rb_mHyperionHttp = rb_define_module_under(rb_mHyperion, "Http");
|
|
934
|
+
}
|
|
935
|
+
|
|
936
|
+
rb_mHyperionHttpSendfile = rb_define_module_under(rb_mHyperionHttp, "Sendfile");
|
|
937
|
+
|
|
938
|
+
rb_define_singleton_method(rb_mHyperionHttpSendfile, "copy",
|
|
939
|
+
rb_sendfile_copy, 4);
|
|
940
|
+
rb_define_singleton_method(rb_mHyperionHttpSendfile, "copy_small",
|
|
941
|
+
rb_sendfile_copy_small, 4);
|
|
942
|
+
rb_define_singleton_method(rb_mHyperionHttpSendfile, "copy_splice",
|
|
943
|
+
rb_sendfile_copy_splice, 4);
|
|
944
|
+
rb_define_singleton_method(rb_mHyperionHttpSendfile, "copy_splice_into_pipe",
|
|
945
|
+
rb_sendfile_copy_splice_into_pipe, 6);
|
|
946
|
+
rb_define_singleton_method(rb_mHyperionHttpSendfile, "supported?",
|
|
947
|
+
rb_sendfile_supported_p, 0);
|
|
948
|
+
rb_define_singleton_method(rb_mHyperionHttpSendfile, "splice_supported?",
|
|
949
|
+
rb_sendfile_splice_supported_p, 0);
|
|
950
|
+
rb_define_singleton_method(rb_mHyperionHttpSendfile, "small_file_threshold",
|
|
951
|
+
rb_sendfile_small_threshold, 0);
|
|
952
|
+
rb_define_singleton_method(rb_mHyperionHttpSendfile, "platform_tag",
|
|
953
|
+
rb_sendfile_platform_tag, 0);
|
|
954
|
+
rb_define_singleton_method(rb_mHyperionHttpSendfile, "fadvise_sequential",
|
|
955
|
+
rb_sendfile_fadvise_sequential, 2);
|
|
956
|
+
|
|
957
|
+
id_fileno = rb_intern("fileno");
|
|
958
|
+
id_to_io = rb_intern("to_io");
|
|
959
|
+
|
|
960
|
+
sym_done = ID2SYM(rb_intern("done"));
|
|
961
|
+
sym_partial = ID2SYM(rb_intern("partial"));
|
|
962
|
+
sym_eagain = ID2SYM(rb_intern("eagain"));
|
|
963
|
+
sym_unsupported = ID2SYM(rb_intern("unsupported"));
|
|
964
|
+
sym_ok = ID2SYM(rb_intern("ok"));
|
|
965
|
+
sym_noop = ID2SYM(rb_intern("noop"));
|
|
966
|
+
sym_error = ID2SYM(rb_intern("error"));
|
|
967
|
+
|
|
968
|
+
/* Keep symbols and module references rooted so the GC doesn't
|
|
969
|
+
* collect them between calls. */
|
|
970
|
+
rb_gc_register_mark_object(sym_done);
|
|
971
|
+
rb_gc_register_mark_object(sym_partial);
|
|
972
|
+
rb_gc_register_mark_object(sym_eagain);
|
|
973
|
+
rb_gc_register_mark_object(sym_unsupported);
|
|
974
|
+
rb_gc_register_mark_object(sym_ok);
|
|
975
|
+
rb_gc_register_mark_object(sym_noop);
|
|
976
|
+
rb_gc_register_mark_object(sym_error);
|
|
977
|
+
|
|
978
|
+
/* 2.2.0 — the splice path no longer carries persistent state.
|
|
979
|
+
* Each copy_splice() call opens its own pipe2(O_CLOEXEC) pair
|
|
980
|
+
* and closes both fds before returning. No TLS key, no
|
|
981
|
+
* destructor, no cross-request residual-bytes window. */
|
|
982
|
+
}
|