hyperion-rb 1.6.2 → 2.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4563 -0
  3. data/README.md +189 -13
  4. data/ext/hyperion_h2_codec/Cargo.lock +7 -0
  5. data/ext/hyperion_h2_codec/Cargo.toml +33 -0
  6. data/ext/hyperion_h2_codec/extconf.rb +73 -0
  7. data/ext/hyperion_h2_codec/src/frames.rs +140 -0
  8. data/ext/hyperion_h2_codec/src/hpack/huffman.rs +161 -0
  9. data/ext/hyperion_h2_codec/src/hpack.rs +457 -0
  10. data/ext/hyperion_h2_codec/src/lib.rs +296 -0
  11. data/ext/hyperion_http/extconf.rb +28 -0
  12. data/ext/hyperion_http/h2_codec_glue.c +408 -0
  13. data/ext/hyperion_http/page_cache.c +1125 -0
  14. data/ext/hyperion_http/parser.c +473 -38
  15. data/ext/hyperion_http/sendfile.c +982 -0
  16. data/ext/hyperion_http/websocket.c +493 -0
  17. data/ext/hyperion_io_uring/Cargo.lock +33 -0
  18. data/ext/hyperion_io_uring/Cargo.toml +34 -0
  19. data/ext/hyperion_io_uring/extconf.rb +74 -0
  20. data/ext/hyperion_io_uring/src/lib.rs +316 -0
  21. data/lib/hyperion/adapter/rack.rb +370 -42
  22. data/lib/hyperion/admin_listener.rb +207 -0
  23. data/lib/hyperion/admin_middleware.rb +36 -7
  24. data/lib/hyperion/cli.rb +310 -11
  25. data/lib/hyperion/config.rb +440 -14
  26. data/lib/hyperion/connection.rb +679 -22
  27. data/lib/hyperion/deprecations.rb +81 -0
  28. data/lib/hyperion/dispatch_mode.rb +165 -0
  29. data/lib/hyperion/fiber_local.rb +75 -13
  30. data/lib/hyperion/h2_admission.rb +77 -0
  31. data/lib/hyperion/h2_codec.rb +452 -0
  32. data/lib/hyperion/http/page_cache.rb +122 -0
  33. data/lib/hyperion/http/sendfile.rb +696 -0
  34. data/lib/hyperion/http2/native_hpack_adapter.rb +70 -0
  35. data/lib/hyperion/http2_handler.rb +368 -9
  36. data/lib/hyperion/io_uring.rb +317 -0
  37. data/lib/hyperion/lint_wrapper_pool.rb +126 -0
  38. data/lib/hyperion/master.rb +96 -9
  39. data/lib/hyperion/metrics/path_templater.rb +68 -0
  40. data/lib/hyperion/metrics.rb +256 -0
  41. data/lib/hyperion/prometheus_exporter.rb +150 -0
  42. data/lib/hyperion/request.rb +13 -0
  43. data/lib/hyperion/response_writer.rb +477 -16
  44. data/lib/hyperion/runtime.rb +195 -0
  45. data/lib/hyperion/server/route_table.rb +179 -0
  46. data/lib/hyperion/server.rb +519 -55
  47. data/lib/hyperion/static_preload.rb +133 -0
  48. data/lib/hyperion/thread_pool.rb +61 -7
  49. data/lib/hyperion/tls.rb +343 -1
  50. data/lib/hyperion/version.rb +1 -1
  51. data/lib/hyperion/websocket/close_codes.rb +71 -0
  52. data/lib/hyperion/websocket/connection.rb +876 -0
  53. data/lib/hyperion/websocket/frame.rb +356 -0
  54. data/lib/hyperion/websocket/handshake.rb +525 -0
  55. data/lib/hyperion/worker.rb +111 -9
  56. data/lib/hyperion.rb +137 -3
  57. metadata +50 -1
@@ -0,0 +1,696 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'fcntl'
4
+
5
+ module Hyperion
6
+ module Http
7
+ # Sendfile — Ruby-side façade over the C-extension Hyperion::Http::Sendfile
8
+ # native helper. Handles the portable concerns the C ext deliberately leaves
9
+ # to userspace:
10
+ #
11
+ # * Looping on :partial returns from the kernel (short writes).
12
+ # * Yielding to the fiber scheduler / IO.select on :eagain.
13
+ # * Falling back to IO.copy_stream when:
14
+ # - native zero-copy isn't compiled (non-Linux, non-BSD/Darwin host),
15
+ # - the kernel returned :unsupported (this fd pair can't sendfile),
16
+ # - the destination IO is a TLS-wrapped socket (kernel can't encrypt).
17
+ #
18
+ # The C ext defines `Hyperion::Http::Sendfile` as a module too — when the
19
+ # extension loads first it pre-creates the constant and we re-open it
20
+ # here to add the higher-level helpers. The native singleton methods
21
+ # (`copy`, `copy_small`, `copy_splice`, `supported?`, `splice_supported?`,
22
+ # `small_file_threshold`, `platform_tag`) survive the re-open untouched.
23
+ #
24
+ # 2.0.1 Phase 8 — close static-file rps gaps
25
+ # -------------------------------------------
26
+ # The 2.0.0 BENCH report had two rows where Hyperion still lost Puma on
27
+ # rps:
28
+ #
29
+ # * 8 KB static at -t 5 (-w 1) — 121 r/s vs Puma 1,246 r/s (10× loss)
30
+ # * 1 MiB static at -t 5 (-w 1) — 1,809 r/s vs Puma 2,139 r/s (-15%)
31
+ #
32
+ # Diagnosis (see ext/hyperion_http/sendfile.c header):
33
+ #
34
+ # 8 KB row: every request paid ~40 ms in EAGAIN-yield-retry cycles
35
+ # because sendfile against an 8 KB file routinely hits EAGAIN once
36
+ # before the kernel TCP send buffer accepts it; with -t 5 only 5
37
+ # fibers can be in-flight, and 4 sleeping in EAGAIN-yield-retry
38
+ # starves the wrk loop.
39
+ #
40
+ # 1 MiB row: sendfile(2) re-derives some bookkeeping per call that
41
+ # splice(2) through a pipe-tee avoids.
42
+ #
43
+ # Fixes:
44
+ #
45
+ # 8a. Small-file fast path. If file_size <= 64 KiB we use the new
46
+ # `copy_small` C primitive: heap-buffered read + write under the
47
+ # GVL released, EAGAIN polled with a short select() instead of
48
+ # fiber-yielding. The transfer completes in microseconds rather
49
+ # than dancing with the fiber scheduler.
50
+ #
51
+ # 8b. Linux splice path (2.0.1 / disabled / re-enabled in 2.2.0). For
52
+ # files > 64 KiB on Linux we try `copy_splice` first (file_fd ->
53
+ # fresh pipe -> sock_fd with SPLICE_F_MOVE | SPLICE_F_MORE).
54
+ # Falls back to plain `copy` (sendfile) if the runtime kernel
55
+ # returns :unsupported, if `splice_supported?` is false (non-
56
+ # Linux builds), or if any SystemCallError surfaces from the
57
+ # primitive.
58
+ #
59
+ # 2.2.0 — splice path re-enabled with fresh per-request pipe pair
60
+ # ---------------------------------------------------------------
61
+ # 2.0.1 disabled the splice route from copy_to_socket because the
62
+ # cached per-thread pipe pair leaked residual bytes between requests:
63
+ # if `splice(file -> pipe)` succeeded but `splice(pipe -> socket)`
64
+ # failed mid-transfer (peer closed), the unread bytes stayed in the
65
+ # pipe and went out on the NEXT connection's socket. 2.2.0 fixes
66
+ # this at the lifecycle layer rather than abandoning the path —
67
+ # `copy_splice` now opens a fresh `pipe2(O_CLOEXEC | O_NONBLOCK)`
68
+ # pair on every call and closes both fds on every exit path. Two
69
+ # extra syscalls per call vs the cached layout, but correctness is
70
+ # unconditional: a pipe never carries bytes for more than one
71
+ # transfer.
72
+ #
73
+ # 2.2.x fix-A — pipe-hoist out of the chunk loop
74
+ # ----------------------------------------------
75
+ # The 2026-04-30 bench sweep showed 2.2.0's per-call pipe2 cost a
76
+ # -23% rps regression on the static 1 MiB row (1,697 → 1,312 r/s)
77
+ # because `native_copy_loop` invokes the splice primitive ONCE PER
78
+ # CHUNK in a `while remaining.positive?` loop. For a 1 MiB asset
79
+ # at 64 KiB chunks that's 16 calls × 3 syscalls of pipe overhead =
80
+ # 48 wasted syscalls per request. Fix-A pushes the pipe lifecycle
81
+ # up one level: `native_copy_loop` now opens a single
82
+ # pipe2(O_CLOEXEC | O_NONBLOCK) pair per RESPONSE, hands it to the
83
+ # new `copy_splice_into_pipe` primitive for every chunk, and
84
+ # closes both fds in an ensure block when the response loop
85
+ # unwinds (success, EAGAIN-retry-loop exit, raised exception).
86
+ # Same correctness window as 2.2.0 — a pipe pair never outlives
87
+ # one response, so EPIPE mid-transfer cannot leak residual bytes
88
+ # onto the next request's socket — at 1/16th the syscall cost.
89
+ module Sendfile
90
+ # Maximum bytes per IO.copy_stream call on the userspace fallback, and
91
+ # per-call cap on the native sendfile / splice loops. 2.6-A bumped this
92
+ # from 64 KiB to 256 KiB.
93
+ #
94
+ # 64 KiB was the original "kernel TCP send buffer's typical sweet spot"
95
+ # value — small enough to bound a single syscall's GVL hold-time, large
96
+ # enough to amortize the syscall cost. 2.6-A measurements on
97
+ # openclaw-vm (Linux 6.x, 1 MiB warm-cache static asset) showed the
98
+ # kernel happily accepts 256 KiB per sendfile(2) / splice(2) call —
99
+ # the kernel TCP send buffer auto-tunes upward under sustained load,
100
+ # and modern NICs+TSO segment 256 KiB-1 MiB chunks at line rate. At
101
+ # 256 KiB we issue 4× fewer syscalls per 1 MiB response (4 calls vs
102
+ # 16) while keeping the GVL hold-time well under 1 ms even on a slow
103
+ # client.
104
+ #
105
+ # Reference: nginx (`sendfile_max_chunk` default 0 = unlimited, but
106
+ # most distros ship with `2m` overrides), Apache (`SendBufferSize`
107
+ # 128k–256k), Caddy (256 KiB hard-coded). Hyperion sits in the
108
+ # middle of that field.
109
+ USERSPACE_CHUNK = 256 * 1024
110
+
111
+ # 2.0.1 Phase 8a small-file threshold. Files <= this size take the
112
+ # synchronous read+write path with no fiber-yield. Mirrors the C
113
+ # constant `HYP_SMALL_FILE_THRESHOLD` — kept in sync via the
114
+ # `small_file_threshold` introspection method on hosts where the
115
+ # native ext is loaded.
116
+ SMALL_FILE_THRESHOLD = 64 * 1024
117
+
118
+ # 2.2.0 — splice fires for files strictly larger than this many
119
+ # bytes. Below the threshold the small-file synchronous path
120
+ # (`copy_small`) wins outright; between the small-file ceiling
121
+ # and this constant plain sendfile(2) is fast enough that the
122
+ # extra pipe2 + 2× close round-trip isn't worth it. Set equal
123
+ # to SMALL_FILE_THRESHOLD so anything above the small-file path
124
+ # gets the splice attempt.
125
+ SPLICE_THRESHOLD = SMALL_FILE_THRESHOLD
126
+
127
+ # 2.7-F — `posix_fadvise(fd, 0, len, POSIX_FADV_SEQUENTIAL)` fires
128
+ # ONCE per response when the streaming loop is engaged AND the
129
+ # response body is at least this large. Files smaller than the
130
+ # threshold hit the kernel in a single sendfile / splice round;
131
+ # the readahead hint is dead weight for them. At and above
132
+ # FADVISE_THRESHOLD the kernel will issue multiple chunks (the
133
+ # 2.6-A USERSPACE_CHUNK is 256 KiB), and pre-warming the page
134
+ # cache before the chunk loop starts avoids the second/third
135
+ # chunk waiting on disk I/O on cold-cache requests.
136
+ #
137
+ # 2.6-B regressed warm-cache by -6.6% because the same hint
138
+ # was called PER CHUNK in the C primitive (4× per 1 MiB
139
+ # response). 2.7-F hoists the call to the Ruby loop entry —
140
+ # once per response, regardless of how many chunks the kernel
141
+ # uses — making the warm-cache impact at most 1 extra syscall
142
+ # per response (≤1%). See CHANGELOG entry 2.7-F + ext/...
143
+ # /sendfile.c (rb_sendfile_fadvise_sequential).
144
+ FADVISE_THRESHOLD = 256 * 1024
145
+
146
+ class << self
147
+ # 2.2.0 — runtime probe for the splice path. `splice_supported?`
148
+ # in the C ext only reports compile-time availability (true on
149
+ # Linux builds, false elsewhere). At runtime an old kernel can
150
+ # still reject splice(2) with ENOSYS / EINVAL the first time we
151
+ # call it; once observed, we cache the answer for the lifetime
152
+ # of the process so subsequent requests don't pay the failed-
153
+ # syscall round-trip. Default value tracks the C ext flag so
154
+ # specs that assert `splice_supported? == true` on Linux still
155
+ # pass without an explicit probe; `mark_splice_unsupported!` is
156
+ # called by `native_copy_loop` when copy_splice surfaces
157
+ # :unsupported, transitioning the cached flag to false for the
158
+ # rest of the process.
159
+ def splice_runtime_supported?
160
+ # Memoize the boot-time C ext flag. We deliberately don't
161
+ # run a live pipe2+splice probe here — the production path
162
+ # is the runtime probe: copy_splice_into_pipe's :unsupported
163
+ # return is cheap (one pipe2 + one close pair on the first
164
+ # request) and authoritative.
165
+ return @splice_runtime_supported if defined?(@splice_runtime_supported)
166
+
167
+ # 2.2.x fix-A — pipe2 has been hoisted out of the chunk
168
+ # loop (one pipe pair per response, reused across every
169
+ # chunk via `copy_splice_into_pipe`). The syscall-count
170
+ # math (64 → 19 syscalls per 1 MiB request) makes the
171
+ # 2.2.0 env-var gate obsolete in principle, but we leave
172
+ # the gate in place until the openclaw-vm bench
173
+ # re-confirms splice ≥ plain sendfile baseline on Linux.
174
+ # The fix-A landing session couldn't reach openclaw-vm
175
+ # (SSH auth gap, see CHANGELOG); the maintainer is
176
+ # expected to drop the gate in a follow-up commit once
177
+ # the bench is re-run from a session with working SSH.
178
+ # Operators wanting to A/B test on other kernels can
179
+ # flip HYPERION_HTTP_SPLICE=1.
180
+ enabled =
181
+ ENV['HYPERION_HTTP_SPLICE'] == '1' &&
182
+ respond_to?(:splice_supported?) &&
183
+ splice_supported?
184
+
185
+ @splice_runtime_supported = enabled
186
+ end
187
+
188
+ # Called by native_copy_loop when copy_splice reports
189
+ # :unsupported at runtime (very old kernel without splice(2),
190
+ # sandboxed environment that blocks pipe2, etc.). Flips the
191
+ # cached flag to false so we stop attempting splice on this
192
+ # process for the rest of its lifetime — falling all the way
193
+ # through to plain sendfile(2).
194
+ def mark_splice_unsupported!
195
+ @splice_runtime_supported = false
196
+ end
197
+
198
+ # Returns true when the Ruby-side helper can take the fast path for
199
+ # `out_io`. Two conditions:
200
+ #
201
+ # 1. The C ext was compiled with native zero-copy (Linux / BSD /
202
+ # Darwin). On other hosts `Sendfile.supported?` returns false
203
+ # (defined in C); we still have a userspace fallback that's
204
+ # faster than the per-chunk fiber hop, so we report :userspace
205
+ # from #fast_path_kind in that case.
206
+ #
207
+ # 2. `out_io` is NOT a TLS socket. SSL sockets would need kernel-
208
+ # TLS support to sendfile, which is rarely enabled.
209
+ def fast_path_kind(out_io)
210
+ return :tls_userspace if tls_socket?(out_io)
211
+ # Native sendfile needs a kernel fd on BOTH ends. StringIO and
212
+ # other userspace-only IOs (custom buffer adapters in specs,
213
+ # `Rack::MockResponse`, …) don't expose one — drop straight to
214
+ # the userspace `IO.copy_stream` loop, which handles those.
215
+ return :userspace unless real_fd?(out_io)
216
+ return :native if respond_to?(:supported?) && supported?
217
+
218
+ :userspace
219
+ end
220
+
221
+ # High-level helper: copy `len` bytes from `file_io` (regular file)
222
+ # starting at `offset` into `out_io` (TCP socket or other writable
223
+ # IO). Loops on partial writes; yields on EAGAIN.
224
+ #
225
+ # Returns the total number of bytes written. Raises Errno::* on real
226
+ # socket errors (EPIPE, ECONNRESET, …) — same shape as a raw
227
+ # `socket.write` call. The caller's existing rescue handlers (slow-
228
+ # client cleanup, metrics, body#close) keep working unchanged.
229
+ def copy_to_socket(out_io, file_io, offset, len)
230
+ return 0 if len.zero?
231
+
232
+ kind = fast_path_kind(out_io)
233
+
234
+ # Phase 8a: small-file synchronous fast path. Only fires on the
235
+ # native branch (we need a real socket fd to issue write(2)
236
+ # against) AND when the source side is also a real fd (pread(2)
237
+ # against an Integer fd). The C ext is only loaded on native
238
+ # builds. This MUST come BEFORE the :native streaming branch —
239
+ # it's the whole point of Phase 8a: skip the fiber-yield
240
+ # round-trip for the 8 KB row.
241
+ if kind == :native && len <= SMALL_FILE_THRESHOLD &&
242
+ respond_to?(:copy_small) && real_fd?(file_io)
243
+ return copy_small(out_io, file_io, offset, len)
244
+ end
245
+
246
+ case kind
247
+ when :native
248
+ native_copy_loop(out_io, file_io, offset, len)
249
+ when :userspace, :tls_userspace
250
+ userspace_copy_loop(out_io, file_io, offset, len)
251
+ end
252
+ end
253
+
254
+ # 2.6-C — Puma-style serial-per-thread sendfile loop. Same
255
+ # zero-copy mechanics as `copy_to_socket` but with EAGAIN
256
+ # handled by `IO.select(nil, [out], nil, 5.0)` instead of
257
+ # `wait_writable` (fiber yield). Under the GVL the OS thread
258
+ # parks on the select; no per-chunk fiber-scheduler hop.
259
+ #
260
+ # Engaged from `ResponseWriter#write_sendfile` when the
261
+ # per-response `dispatch_mode` is `:inline_blocking` — auto-
262
+ # detected for `body.respond_to?(:to_path)` static-file routes
263
+ # in `Adapter::Rack#call`, or set explicitly by the app via
264
+ # `env['hyperion.dispatch_mode'] = :inline_blocking`.
265
+ #
266
+ # Userspace + TLS-userspace branches reuse `userspace_copy_loop`
267
+ # — `IO.copy_stream` is already blocking on the calling thread,
268
+ # no fiber-yield refactor needed there. Small-file (<= 64 KiB)
269
+ # native path also stays through `copy_small`: that primitive
270
+ # already handles EAGAIN with a short select() under the GVL,
271
+ # so the small-file fast path is "blocking" in the relevant
272
+ # sense regardless of `:inline_blocking` opt-in.
273
+ def copy_to_socket_blocking(out_io, file_io, offset, len)
274
+ return 0 if len.zero?
275
+
276
+ # 2.6-D — defensive `Fiber.blocking` wrap so direct callers
277
+ # (specs, future code paths) get the no-yield guarantee
278
+ # even if they didn't already wrap us themselves. When
279
+ # the calling fiber is already blocking (the fast path:
280
+ # `ResponseWriter#write_sendfile` wraps the whole sendfile
281
+ # path in `Fiber.blocking` for `:inline_blocking`) the
282
+ # nested wrap is a no-op — `Fiber.blocking` short-circuits
283
+ # if the current fiber's blocking flag is already set.
284
+ if ::Fiber.current.blocking?
285
+ copy_to_socket_blocking_inner(out_io, file_io, offset, len)
286
+ else
287
+ ::Fiber.blocking { copy_to_socket_blocking_inner(out_io, file_io, offset, len) }
288
+ end
289
+ end
290
+
291
+ private
292
+
293
+ def copy_to_socket_blocking_inner(out_io, file_io, offset, len)
294
+ kind = fast_path_kind(out_io)
295
+
296
+ if kind == :native && len <= SMALL_FILE_THRESHOLD &&
297
+ respond_to?(:copy_small) && real_fd?(file_io)
298
+ return copy_small(out_io, file_io, offset, len)
299
+ end
300
+
301
+ case kind
302
+ when :native
303
+ native_copy_loop_blocking(out_io, file_io, offset, len)
304
+ when :userspace, :tls_userspace
305
+ userspace_copy_loop(out_io, file_io, offset, len)
306
+ end
307
+ end
308
+
309
+ def tls_socket?(io)
310
+ defined?(::OpenSSL::SSL::SSLSocket) && io.is_a?(::OpenSSL::SSL::SSLSocket)
311
+ end
312
+
313
+ # Does `io` expose a real kernel fd we can hand to sendfile(2)?
314
+ # `IO#fileno` raises NotImplementedError on StringIO / Tempfile-
315
+ # before-flush / custom IO-shaped objects, and TCPSocket wraps a
316
+ # T_FILE so `RB_TYPE_P(obj, T_FILE)` returns true. We probe by
317
+ # calling `fileno` inside a forgiving rescue — anything that
318
+ # answers a non-negative Integer is good enough; everything else
319
+ # routes through the userspace fallback.
320
+ def real_fd?(io)
321
+ return true if io.is_a?(::IO) && !io.closed?
322
+
323
+ if io.respond_to?(:to_io)
324
+ inner = io.to_io
325
+ return inner.is_a?(::IO) && !inner.closed?
326
+ end
327
+
328
+ if io.respond_to?(:fileno)
329
+ fd = io.fileno
330
+ return fd.is_a?(Integer) && fd >= 0
331
+ end
332
+
333
+ false
334
+ rescue StandardError
335
+ false
336
+ end
337
+
338
+ # Native streaming loop for files > SMALL_FILE_THRESHOLD.
339
+ #
340
+ # 2.2.x fix-A — on Linux, files above SPLICE_THRESHOLD route
341
+ # through `copy_splice_into_pipe`. That primitive splices
342
+ # file -> pipe -> socket for ONE chunk against a pipe pair
343
+ # owned by THIS METHOD: one `IO.pipe` (binmode, non-blocking)
344
+ # at the top, both fds closed in the ensure block at the
345
+ # bottom. For a 1 MiB asset at 64 KiB chunks that drops the
346
+ # pipe overhead from 16 × pipe2 + 32 × close (one set per
347
+ # chunk in the 2.2.0 layout) to 1 × pipe2 + 2 × close per
348
+ # response — a 3.4× syscall-count reduction. The
349
+ # correctness window (no cross-request byte leak) stays
350
+ # closed: a pipe pair still never outlives a single
351
+ # response.
352
+ #
353
+ # On non-Linux hosts (`splice_supported?` == false) we go
354
+ # straight to the plain sendfile(2) path via `copy`. On
355
+ # Linux hosts where the runtime kernel rejects splice (very
356
+ # old kernels return ENOSYS / EINVAL) we mark the path
357
+ # unsupported for the rest of the process and fall through
358
+ # to plain sendfile.
359
+ def native_copy_loop(out_io, file_io, offset, len)
360
+ # 2.7-F — hoisted fadvise hint. Called ONCE per response
361
+ # on Linux for files >= FADVISE_THRESHOLD (256 KiB). Pre-
362
+ # warms the page cache so subsequent sendfile / splice
363
+ # chunks don't wait on disk I/O. NOT called per-chunk:
364
+ # 2.6-B did that and regressed -6.6% warm-cache (commit
365
+ # 4cd8009). Both `splice_copy_loop` and
366
+ # `plain_sendfile_loop` benefit from the single call here
367
+ # — the dispatch below picks exactly one branch per
368
+ # response, so this is true once-per-response.
369
+ maybe_fadvise_sequential(file_io, len)
370
+
371
+ use_splice = splice_runtime_supported? && len > SPLICE_THRESHOLD &&
372
+ respond_to?(:copy_splice_into_pipe)
373
+
374
+ if use_splice
375
+ splice_copy_loop(out_io, file_io, offset, len)
376
+ else
377
+ plain_sendfile_loop(out_io, file_io, offset, len)
378
+ end
379
+ end
380
+
381
+ # 2.7-F — best-effort POSIX_FADV_SEQUENTIAL hint. Called
382
+ # once per response from `native_copy_loop`. Skipped on
383
+ # non-Linux hosts (the C ext doesn't define
384
+ # `fadvise_sequential` there), on small files (single-chunk
385
+ # responses don't benefit from readahead pre-warming), and
386
+ # on file-like objects without a real kernel fd (StringIO,
387
+ # mock IOs). Errors from the C primitive are intentionally
388
+ # ignored — the hint is informational, never load-bearing
389
+ # for correctness. If `copy_to_socket_blocking` ever wants
390
+ # the same hint, lift this call into a helper called from
391
+ # the blocking dispatcher too (deferred to 2.7.x — the
392
+ # blocking path's spec surface is wider and the warm/cold
393
+ # bench numbers should drive that decision).
394
+ def maybe_fadvise_sequential(file_io, len)
395
+ return unless respond_to?(:fadvise_sequential)
396
+ return if len < FADVISE_THRESHOLD
397
+ return unless real_fd?(file_io)
398
+
399
+ fadvise_sequential(file_io, len)
400
+ rescue StandardError
401
+ # Defensive: posix_fadvise's surface is informational. Any
402
+ # type-coercion / fd-extraction error must not bring down a
403
+ # static-file response.
404
+ nil
405
+ end
406
+
407
+ # 2.2.x fix-A — splice path with one pipe pair per response.
408
+ # Opens the pipe at entry, hands the same fds to
409
+ # `copy_splice_into_pipe` for every chunk of the response,
410
+ # and closes both fds in the ensure block on every exit
411
+ # path (return, raise, throw). If the runtime kernel
412
+ # rejects splice (:unsupported on the first chunk), we tear
413
+ # the pipe down immediately and recurse through
414
+ # `plain_sendfile_loop` for the remainder of the response.
415
+ def splice_copy_loop(out_io, file_io, offset, len)
416
+ remaining = len
417
+ cursor = offset
418
+ total = 0
419
+ pipe_r, pipe_w = open_splice_pipe!
420
+
421
+ begin
422
+ while remaining.positive?
423
+ # 2.6-A — cap each splice round at USERSPACE_CHUNK
424
+ # (256 KiB) so the kernel doesn't get an arbitrarily
425
+ # large `count` arg on huge responses. At 256 KiB a
426
+ # 1 MiB asset moves in 4 splice rounds vs 16 at the
427
+ # legacy 64 KiB kernel-TCP-send-buffer ceiling.
428
+ chunk = remaining < USERSPACE_CHUNK ? remaining : USERSPACE_CHUNK
429
+ bytes, status =
430
+ begin
431
+ copy_splice_into_pipe(out_io, file_io, cursor, chunk, pipe_r, pipe_w)
432
+ rescue NotImplementedError
433
+ mark_splice_unsupported!
434
+ return total + plain_sendfile_loop(out_io, file_io, cursor, remaining)
435
+ end
436
+
437
+ case status
438
+ when :done
439
+ # 2.6-A — `:done` from the C ext means the kernel
440
+ # accepted the FULL `chunk` we asked for, not the
441
+ # full response. Advance cursor / remaining and
442
+ # loop; the while-condition exits when the response
443
+ # is fully drained.
444
+ total += bytes
445
+ cursor += bytes
446
+ remaining -= bytes
447
+ when :partial
448
+ total += bytes
449
+ cursor += bytes
450
+ remaining -= bytes
451
+ when :eagain
452
+ # `copy_splice_into_pipe` only returns :eagain when
453
+ # zero bytes hit the wire (bytes>0 + EAGAIN maps to
454
+ # :partial in the C ext), so cursor / remaining
455
+ # don't move here — we just yield to the scheduler.
456
+ wait_writable(out_io)
457
+ when :unsupported
458
+ # Runtime kernel rejected splice but plain sendfile
459
+ # may still work. Cache the negative answer and
460
+ # finish this response through plain sendfile from
461
+ # the same cursor.
462
+ mark_splice_unsupported!
463
+ return total + plain_sendfile_loop(out_io, file_io, cursor, remaining)
464
+ else
465
+ raise "Hyperion::Http::Sendfile: unexpected status #{status.inspect}"
466
+ end
467
+ end
468
+
469
+ total
470
+ ensure
471
+ # Close both fds on every exit path — success, EAGAIN
472
+ # retry-loop exit, raised exception, mid-transfer
473
+ # EPIPE. This is the whole point of fix-A's per-
474
+ # response pipe lifecycle: the pipe never outlives the
475
+ # response, so residual bytes from a partial transfer
476
+ # cannot leak onto the next request's socket.
477
+ close_splice_pipe(pipe_r, pipe_w)
478
+ end
479
+ end
480
+
481
+ # Plain sendfile(2) loop — used on non-Linux hosts, on
482
+ # hosts where splice is unavailable at runtime, and as the
483
+ # tail of a splice run that hit :unsupported mid-response.
484
+ #
485
+ # 2.6-A — each kernel call is capped at USERSPACE_CHUNK
486
+ # (256 KiB) so a 1 MiB response moves in 4 sendfile rounds
487
+ # vs 16 at the legacy 64 KiB ceiling. The kernel happily
488
+ # accepts the larger count arg on Linux 4.x+ and Darwin /
489
+ # *BSD; partial returns still fall through the :partial
490
+ # branch unchanged.
491
+ def plain_sendfile_loop(out_io, file_io, offset, len)
492
+ remaining = len
493
+ cursor = offset
494
+ total = 0
495
+
496
+ while remaining.positive?
497
+ chunk = remaining < USERSPACE_CHUNK ? remaining : USERSPACE_CHUNK
498
+ bytes, status = copy(out_io, file_io, cursor, chunk)
499
+
500
+ case status
501
+ when :done
502
+ # 2.6-A — `:done` means the kernel wrote the FULL
503
+ # `chunk` we asked for, not the full response.
504
+ # Advance and loop; the while-condition exits when
505
+ # remaining hits zero.
506
+ total += bytes
507
+ cursor += bytes
508
+ remaining -= bytes
509
+ when :partial
510
+ total += bytes
511
+ cursor += bytes
512
+ remaining -= bytes
513
+ when :eagain
514
+ wait_writable(out_io)
515
+ when :unsupported
516
+ # Kernel said this fd pair doesn't support sendfile.
517
+ # Drop to userspace. The file's read offset is still
518
+ # untouched (we've been passing absolute offsets
519
+ # through to the kernel), so rewind via offset arg
520
+ # into the userspace path.
521
+ file_io.seek(cursor) if file_io.respond_to?(:seek)
522
+ return total + userspace_copy_loop(out_io, file_io, cursor, remaining)
523
+ else
524
+ raise "Hyperion::Http::Sendfile: unexpected status #{status.inspect}"
525
+ end
526
+ end
527
+
528
+ total
529
+ end
530
+
531
+ # Open a pipe pair sized for the splice response loop.
532
+ # Returns [pipe_r, pipe_w] as Ruby IO objects so the ensure
533
+ # block can `.close` them via the standard IO protocol — no
534
+ # stale-fd risk if the C ext closed the underlying fd
535
+ # during a runtime-:unsupported teardown. Both ends are
536
+ # set non-blocking (matches the C ext's pipe2 fallback for
537
+ # `copy_splice`) so a wedged splice can't block a worker
538
+ # thread.
539
+ def open_splice_pipe!
540
+ pipe_r, pipe_w = IO.pipe
541
+ set_nonblock!(pipe_r)
542
+ set_nonblock!(pipe_w)
543
+ [pipe_r, pipe_w]
544
+ end
545
+
546
+ def set_nonblock!(io)
547
+ flags = io.fcntl(Fcntl::F_GETFL)
548
+ io.fcntl(Fcntl::F_SETFL, flags | Fcntl::O_NONBLOCK)
549
+ rescue StandardError
550
+ # F_SETFL is best-effort; the splice ladder copes with
551
+ # blocking pipe ends just fine, the non-blocking flag is
552
+ # a defense-in-depth knob. Older Ruby builds without
553
+ # Fcntl loaded fall through silently.
554
+ end
555
+
556
+ def close_splice_pipe(pipe_r, pipe_w)
557
+ pipe_r.close unless pipe_r.nil? || pipe_r.closed?
558
+ pipe_w.close unless pipe_w.nil? || pipe_w.closed?
559
+ rescue StandardError
560
+ # We're typically in an ensure block; never let close
561
+ # bubble up over the original exception (or success
562
+ # return).
563
+ end
564
+
565
+ # Userspace fallback. Bypasses the per-chunk fiber-hop in
566
+ # WriterContext-style writers by issuing a single IO.copy_stream call
567
+ # with USERSPACE_CHUNK at a time. IO.copy_stream itself handles the
568
+ # internal read+write loop and (on Linux plain TCP) will pick
569
+ # sendfile(2) under the hood; we keep it as a defensive fallback for
570
+ # TLS sockets and non-sendfile-capable hosts.
571
+ def userspace_copy_loop(out_io, file_io, offset, len)
572
+ file_io.seek(offset) if file_io.respond_to?(:seek)
573
+ remaining = len
574
+ total = 0
575
+ while remaining.positive?
576
+ chunk = remaining < USERSPACE_CHUNK ? remaining : USERSPACE_CHUNK
577
+ written = IO.copy_stream(file_io, out_io, chunk)
578
+ break if written.nil? || written.zero?
579
+
580
+ total += written
581
+ remaining -= written
582
+ end
583
+ total
584
+ end
585
+
586
+ # Yield the fiber until `out_io` is writable. Under Async, the
587
+ # scheduler's `io_wait` is invoked transparently by IO#wait_writable.
588
+ # Outside Async we fall back to IO.select. We tolerate IOs that
589
+ # don't expose `wait_writable` (e.g. plain Integer fd, StringIO in
590
+ # tests) by spinning a single CPU yield — those paths are rare in
591
+ # production and the bookkeeping isn't worth a custom waiter.
592
+ def wait_writable(out_io)
593
+ if out_io.respond_to?(:wait_writable)
594
+ out_io.wait_writable
595
+ elsif out_io.respond_to?(:to_io)
596
+ IO.select(nil, [out_io.to_io], nil, 1.0)
597
+ else
598
+ Thread.pass
599
+ end
600
+ end
601
+
602
+ # 2.6-C — Plain-sendfile loop variant with `IO.select` instead
603
+ # of fiber yield. Same body as `plain_sendfile_loop` plus the
604
+ # 2.6-A USERSPACE_CHUNK cap, but EAGAIN parks the OS thread on
605
+ # a 5 s select() rather than calling out to the fiber scheduler.
606
+ # Under the GVL this is the Puma-style "serial-per-thread"
607
+ # response shape — sendfile, syscall returns EAGAIN, thread
608
+ # blocks on select, kernel wakes us when the socket drains, we
609
+ # retry from the same cursor. No per-chunk fiber-scheduler hop.
610
+ #
611
+ # We deliberately don't reuse the splice path here: splice's
612
+ # per-response pipe lifecycle pairs cleanly with fiber dispatch
613
+ # (where wait_writable is cheap), and the splice win is
614
+ # marginal vs sendfile on warm-cache static. For
615
+ # `:inline_blocking` we keep the loop deliberately straight-line
616
+ # — sendfile only, no userspace pipe ladder.
617
+ def native_copy_loop_blocking(out_io, file_io, offset, len)
618
+ remaining = len
619
+ cursor = offset
620
+ total = 0
621
+
622
+ while remaining.positive?
623
+ chunk = remaining < USERSPACE_CHUNK ? remaining : USERSPACE_CHUNK
624
+ bytes, status = copy(out_io, file_io, cursor, chunk)
625
+
626
+ case status
627
+ when :done
628
+ total += bytes
629
+ cursor += bytes
630
+ remaining -= bytes
631
+ when :partial
632
+ total += bytes
633
+ cursor += bytes
634
+ remaining -= bytes
635
+ when :eagain
636
+ select_writable_blocking(out_io)
637
+ when :unsupported
638
+ # Kernel said this fd pair doesn't support sendfile. Drop
639
+ # to userspace. The userspace path is already blocking on
640
+ # the calling thread (IO.copy_stream loops on write).
641
+ file_io.seek(cursor) if file_io.respond_to?(:seek)
642
+ return total + userspace_copy_loop(out_io, file_io, cursor, remaining)
643
+ else
644
+ raise "Hyperion::Http::Sendfile: unexpected status #{status.inspect}"
645
+ end
646
+ end
647
+
648
+ total
649
+ end
650
+
651
+ # 2.6-C — block the OS thread on a writable-readiness select
652
+ # rather than yield to the fiber scheduler. 5 s timeout is a
653
+ # belt-and-suspenders bound: Connection's per-request deadline
654
+ # (default 60 s) fires first on a stuck peer, but we still want
655
+ # IO.select to wake periodically so a misbehaving peer can't
656
+ # park a worker thread forever. Tolerate IOs that don't expose
657
+ # `to_io` (StringIO in specs, mock objects) by short-circuiting
658
+ # via `Thread.pass` — same fallback shape as `wait_writable`.
659
+ #
660
+ # 2.6-D — ensure the select bypasses the fiber scheduler even
661
+ # if the caller didn't already wrap us in `Fiber.blocking`.
662
+ # The 2.6-C path called `IO.select` from a fiber whose
663
+ # scheduler hook (Async::Reactor#kernel_select) intercepts
664
+ # the call and yields cooperatively — the OS-thread block
665
+ # never happened, the fiber kept getting rescheduled, and
666
+ # the per-chunk yield-resume tax that `:inline_blocking` was
667
+ # designed to eliminate stayed in place. Wrapping the
668
+ # select in `Fiber.blocking { ... }` flips
669
+ # `Fiber.current.blocking?` to true for the duration; the
670
+ # scheduler is no longer consulted, and the OS thread
671
+ # parks on the kernel readiness check.
672
+ def select_writable_blocking(out_io)
673
+ target =
674
+ if out_io.is_a?(::IO)
675
+ out_io
676
+ elsif out_io.respond_to?(:to_io)
677
+ begin
678
+ out_io.to_io
679
+ rescue StandardError
680
+ nil
681
+ end
682
+ end
683
+ if target
684
+ if ::Fiber.current.blocking?
685
+ ::IO.select(nil, [target], nil, 5.0)
686
+ else
687
+ ::Fiber.blocking { ::IO.select(nil, [target], nil, 5.0) }
688
+ end
689
+ else
690
+ Thread.pass
691
+ end
692
+ end
693
+ end
694
+ end
695
+ end
696
+ end