hyperion-rb 1.6.2 → 2.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4563 -0
- data/README.md +189 -13
- data/ext/hyperion_h2_codec/Cargo.lock +7 -0
- data/ext/hyperion_h2_codec/Cargo.toml +33 -0
- data/ext/hyperion_h2_codec/extconf.rb +73 -0
- data/ext/hyperion_h2_codec/src/frames.rs +140 -0
- data/ext/hyperion_h2_codec/src/hpack/huffman.rs +161 -0
- data/ext/hyperion_h2_codec/src/hpack.rs +457 -0
- data/ext/hyperion_h2_codec/src/lib.rs +296 -0
- data/ext/hyperion_http/extconf.rb +28 -0
- data/ext/hyperion_http/h2_codec_glue.c +408 -0
- data/ext/hyperion_http/page_cache.c +1125 -0
- data/ext/hyperion_http/parser.c +473 -38
- data/ext/hyperion_http/sendfile.c +982 -0
- data/ext/hyperion_http/websocket.c +493 -0
- data/ext/hyperion_io_uring/Cargo.lock +33 -0
- data/ext/hyperion_io_uring/Cargo.toml +34 -0
- data/ext/hyperion_io_uring/extconf.rb +74 -0
- data/ext/hyperion_io_uring/src/lib.rs +316 -0
- data/lib/hyperion/adapter/rack.rb +370 -42
- data/lib/hyperion/admin_listener.rb +207 -0
- data/lib/hyperion/admin_middleware.rb +36 -7
- data/lib/hyperion/cli.rb +310 -11
- data/lib/hyperion/config.rb +440 -14
- data/lib/hyperion/connection.rb +679 -22
- data/lib/hyperion/deprecations.rb +81 -0
- data/lib/hyperion/dispatch_mode.rb +165 -0
- data/lib/hyperion/fiber_local.rb +75 -13
- data/lib/hyperion/h2_admission.rb +77 -0
- data/lib/hyperion/h2_codec.rb +452 -0
- data/lib/hyperion/http/page_cache.rb +122 -0
- data/lib/hyperion/http/sendfile.rb +696 -0
- data/lib/hyperion/http2/native_hpack_adapter.rb +70 -0
- data/lib/hyperion/http2_handler.rb +368 -9
- data/lib/hyperion/io_uring.rb +317 -0
- data/lib/hyperion/lint_wrapper_pool.rb +126 -0
- data/lib/hyperion/master.rb +96 -9
- data/lib/hyperion/metrics/path_templater.rb +68 -0
- data/lib/hyperion/metrics.rb +256 -0
- data/lib/hyperion/prometheus_exporter.rb +150 -0
- data/lib/hyperion/request.rb +13 -0
- data/lib/hyperion/response_writer.rb +477 -16
- data/lib/hyperion/runtime.rb +195 -0
- data/lib/hyperion/server/route_table.rb +179 -0
- data/lib/hyperion/server.rb +519 -55
- data/lib/hyperion/static_preload.rb +133 -0
- data/lib/hyperion/thread_pool.rb +61 -7
- data/lib/hyperion/tls.rb +343 -1
- data/lib/hyperion/version.rb +1 -1
- data/lib/hyperion/websocket/close_codes.rb +71 -0
- data/lib/hyperion/websocket/connection.rb +876 -0
- data/lib/hyperion/websocket/frame.rb +356 -0
- data/lib/hyperion/websocket/handshake.rb +525 -0
- data/lib/hyperion/worker.rb +111 -9
- data/lib/hyperion.rb +137 -3
- metadata +50 -1
|
@@ -0,0 +1,696 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'fcntl'
|
|
4
|
+
|
|
5
|
+
module Hyperion
|
|
6
|
+
module Http
|
|
7
|
+
# Sendfile — Ruby-side façade over the C-extension Hyperion::Http::Sendfile
|
|
8
|
+
# native helper. Handles the portable concerns the C ext deliberately leaves
|
|
9
|
+
# to userspace:
|
|
10
|
+
#
|
|
11
|
+
# * Looping on :partial returns from the kernel (short writes).
|
|
12
|
+
# * Yielding to the fiber scheduler / IO.select on :eagain.
|
|
13
|
+
# * Falling back to IO.copy_stream when:
|
|
14
|
+
# - native zero-copy isn't compiled (non-Linux, non-BSD/Darwin host),
|
|
15
|
+
# - the kernel returned :unsupported (this fd pair can't sendfile),
|
|
16
|
+
# - the destination IO is a TLS-wrapped socket (kernel can't encrypt).
|
|
17
|
+
#
|
|
18
|
+
# The C ext defines `Hyperion::Http::Sendfile` as a module too — when the
|
|
19
|
+
# extension loads first it pre-creates the constant and we re-open it
|
|
20
|
+
# here to add the higher-level helpers. The native singleton methods
|
|
21
|
+
# (`copy`, `copy_small`, `copy_splice`, `supported?`, `splice_supported?`,
|
|
22
|
+
# `small_file_threshold`, `platform_tag`) survive the re-open untouched.
|
|
23
|
+
#
|
|
24
|
+
# 2.0.1 Phase 8 — close static-file rps gaps
|
|
25
|
+
# -------------------------------------------
|
|
26
|
+
# The 2.0.0 BENCH report had two rows where Hyperion still lost Puma on
|
|
27
|
+
# rps:
|
|
28
|
+
#
|
|
29
|
+
# * 8 KB static at -t 5 (-w 1) — 121 r/s vs Puma 1,246 r/s (10× loss)
|
|
30
|
+
# * 1 MiB static at -t 5 (-w 1) — 1,809 r/s vs Puma 2,139 r/s (-15%)
|
|
31
|
+
#
|
|
32
|
+
# Diagnosis (see ext/hyperion_http/sendfile.c header):
|
|
33
|
+
#
|
|
34
|
+
# 8 KB row: every request paid ~40 ms in EAGAIN-yield-retry cycles
|
|
35
|
+
# because sendfile against an 8 KB file routinely hits EAGAIN once
|
|
36
|
+
# before the kernel TCP send buffer accepts it; with -t 5 only 5
|
|
37
|
+
# fibers can be in-flight, and 4 sleeping in EAGAIN-yield-retry
|
|
38
|
+
# starves the wrk loop.
|
|
39
|
+
#
|
|
40
|
+
# 1 MiB row: sendfile(2) re-derives some bookkeeping per call that
|
|
41
|
+
# splice(2) through a pipe-tee avoids.
|
|
42
|
+
#
|
|
43
|
+
# Fixes:
|
|
44
|
+
#
|
|
45
|
+
# 8a. Small-file fast path. If file_size <= 64 KiB we use the new
|
|
46
|
+
# `copy_small` C primitive: heap-buffered read + write under the
|
|
47
|
+
# GVL released, EAGAIN polled with a short select() instead of
|
|
48
|
+
# fiber-yielding. The transfer completes in microseconds rather
|
|
49
|
+
# than dancing with the fiber scheduler.
|
|
50
|
+
#
|
|
51
|
+
# 8b. Linux splice path (2.0.1 / disabled / re-enabled in 2.2.0). For
|
|
52
|
+
# files > 64 KiB on Linux we try `copy_splice` first (file_fd ->
|
|
53
|
+
# fresh pipe -> sock_fd with SPLICE_F_MOVE | SPLICE_F_MORE).
|
|
54
|
+
# Falls back to plain `copy` (sendfile) if the runtime kernel
|
|
55
|
+
# returns :unsupported, if `splice_supported?` is false (non-
|
|
56
|
+
# Linux builds), or if any SystemCallError surfaces from the
|
|
57
|
+
# primitive.
|
|
58
|
+
#
|
|
59
|
+
# 2.2.0 — splice path re-enabled with fresh per-request pipe pair
|
|
60
|
+
# ---------------------------------------------------------------
|
|
61
|
+
# 2.0.1 disabled the splice route from copy_to_socket because the
|
|
62
|
+
# cached per-thread pipe pair leaked residual bytes between requests:
|
|
63
|
+
# if `splice(file -> pipe)` succeeded but `splice(pipe -> socket)`
|
|
64
|
+
# failed mid-transfer (peer closed), the unread bytes stayed in the
|
|
65
|
+
# pipe and went out on the NEXT connection's socket. 2.2.0 fixes
|
|
66
|
+
# this at the lifecycle layer rather than abandoning the path —
|
|
67
|
+
# `copy_splice` now opens a fresh `pipe2(O_CLOEXEC | O_NONBLOCK)`
|
|
68
|
+
# pair on every call and closes both fds on every exit path. Two
|
|
69
|
+
# extra syscalls per call vs the cached layout, but correctness is
|
|
70
|
+
# unconditional: a pipe never carries bytes for more than one
|
|
71
|
+
# transfer.
|
|
72
|
+
#
|
|
73
|
+
# 2.2.x fix-A — pipe-hoist out of the chunk loop
|
|
74
|
+
# ----------------------------------------------
|
|
75
|
+
# The 2026-04-30 bench sweep showed 2.2.0's per-call pipe2 cost a
|
|
76
|
+
# -23% rps regression on the static 1 MiB row (1,697 → 1,312 r/s)
|
|
77
|
+
# because `native_copy_loop` invokes the splice primitive ONCE PER
|
|
78
|
+
# CHUNK in a `while remaining.positive?` loop. For a 1 MiB asset
|
|
79
|
+
# at 64 KiB chunks that's 16 calls × 3 syscalls of pipe overhead =
|
|
80
|
+
# 48 wasted syscalls per request. Fix-A pushes the pipe lifecycle
|
|
81
|
+
# up one level: `native_copy_loop` now opens a single
|
|
82
|
+
# pipe2(O_CLOEXEC | O_NONBLOCK) pair per RESPONSE, hands it to the
|
|
83
|
+
# new `copy_splice_into_pipe` primitive for every chunk, and
|
|
84
|
+
# closes both fds in an ensure block when the response loop
|
|
85
|
+
# unwinds (success, EAGAIN-retry-loop exit, raised exception).
|
|
86
|
+
# Same correctness window as 2.2.0 — a pipe pair never outlives
|
|
87
|
+
# one response, so EPIPE mid-transfer cannot leak residual bytes
|
|
88
|
+
# onto the next request's socket — at 1/16th the syscall cost.
|
|
89
|
+
module Sendfile
|
|
90
|
+
# Maximum bytes per IO.copy_stream call on the userspace fallback, and
|
|
91
|
+
# per-call cap on the native sendfile / splice loops. 2.6-A bumped this
|
|
92
|
+
# from 64 KiB to 256 KiB.
|
|
93
|
+
#
|
|
94
|
+
# 64 KiB was the original "kernel TCP send buffer's typical sweet spot"
|
|
95
|
+
# value — small enough to bound a single syscall's GVL hold-time, large
|
|
96
|
+
# enough to amortize the syscall cost. 2.6-A measurements on
|
|
97
|
+
# openclaw-vm (Linux 6.x, 1 MiB warm-cache static asset) showed the
|
|
98
|
+
# kernel happily accepts 256 KiB per sendfile(2) / splice(2) call —
|
|
99
|
+
# the kernel TCP send buffer auto-tunes upward under sustained load,
|
|
100
|
+
# and modern NICs+TSO segment 256 KiB-1 MiB chunks at line rate. At
|
|
101
|
+
# 256 KiB we issue 4× fewer syscalls per 1 MiB response (4 calls vs
|
|
102
|
+
# 16) while keeping the GVL hold-time well under 1 ms even on a slow
|
|
103
|
+
# client.
|
|
104
|
+
#
|
|
105
|
+
# Reference: nginx (`sendfile_max_chunk` default 0 = unlimited, but
|
|
106
|
+
# most distros ship with `2m` overrides), Apache (`SendBufferSize`
|
|
107
|
+
# 128k–256k), Caddy (256 KiB hard-coded). Hyperion sits in the
|
|
108
|
+
# middle of that field.
|
|
109
|
+
USERSPACE_CHUNK = 256 * 1024
|
|
110
|
+
|
|
111
|
+
# 2.0.1 Phase 8a small-file threshold. Files <= this size take the
|
|
112
|
+
# synchronous read+write path with no fiber-yield. Mirrors the C
|
|
113
|
+
# constant `HYP_SMALL_FILE_THRESHOLD` — kept in sync via the
|
|
114
|
+
# `small_file_threshold` introspection method on hosts where the
|
|
115
|
+
# native ext is loaded.
|
|
116
|
+
SMALL_FILE_THRESHOLD = 64 * 1024
|
|
117
|
+
|
|
118
|
+
# 2.2.0 — splice fires for files strictly larger than this many
|
|
119
|
+
# bytes. Below the threshold the small-file synchronous path
|
|
120
|
+
# (`copy_small`) wins outright; between the small-file ceiling
|
|
121
|
+
# and this constant plain sendfile(2) is fast enough that the
|
|
122
|
+
# extra pipe2 + 2× close round-trip isn't worth it. Set equal
|
|
123
|
+
# to SMALL_FILE_THRESHOLD so anything above the small-file path
|
|
124
|
+
# gets the splice attempt.
|
|
125
|
+
SPLICE_THRESHOLD = SMALL_FILE_THRESHOLD
|
|
126
|
+
|
|
127
|
+
# 2.7-F — `posix_fadvise(fd, 0, len, POSIX_FADV_SEQUENTIAL)` fires
|
|
128
|
+
# ONCE per response when the streaming loop is engaged AND the
|
|
129
|
+
# response body is at least this large. Files smaller than the
|
|
130
|
+
# threshold hit the kernel in a single sendfile / splice round;
|
|
131
|
+
# the readahead hint is dead weight for them. At and above
|
|
132
|
+
# FADVISE_THRESHOLD the kernel will issue multiple chunks (the
|
|
133
|
+
# 2.6-A USERSPACE_CHUNK is 256 KiB), and pre-warming the page
|
|
134
|
+
# cache before the chunk loop starts avoids the second/third
|
|
135
|
+
# chunk waiting on disk I/O on cold-cache requests.
|
|
136
|
+
#
|
|
137
|
+
# 2.6-B regressed warm-cache by -6.6% because the same hint
|
|
138
|
+
# was called PER CHUNK in the C primitive (4× per 1 MiB
|
|
139
|
+
# response). 2.7-F hoists the call to the Ruby loop entry —
|
|
140
|
+
# once per response, regardless of how many chunks the kernel
|
|
141
|
+
# uses — making the warm-cache impact at most 1 extra syscall
|
|
142
|
+
# per response (≤1%). See CHANGELOG entry 2.7-F + ext/...
|
|
143
|
+
# /sendfile.c (rb_sendfile_fadvise_sequential).
|
|
144
|
+
FADVISE_THRESHOLD = 256 * 1024
|
|
145
|
+
|
|
146
|
+
class << self
|
|
147
|
+
# 2.2.0 — runtime probe for the splice path. `splice_supported?`
|
|
148
|
+
# in the C ext only reports compile-time availability (true on
|
|
149
|
+
# Linux builds, false elsewhere). At runtime an old kernel can
|
|
150
|
+
# still reject splice(2) with ENOSYS / EINVAL the first time we
|
|
151
|
+
# call it; once observed, we cache the answer for the lifetime
|
|
152
|
+
# of the process so subsequent requests don't pay the failed-
|
|
153
|
+
# syscall round-trip. Default value tracks the C ext flag so
|
|
154
|
+
# specs that assert `splice_supported? == true` on Linux still
|
|
155
|
+
# pass without an explicit probe; `mark_splice_unsupported!` is
|
|
156
|
+
# called by `native_copy_loop` when copy_splice surfaces
|
|
157
|
+
# :unsupported, transitioning the cached flag to false for the
|
|
158
|
+
# rest of the process.
|
|
159
|
+
def splice_runtime_supported?
|
|
160
|
+
# Memoize the boot-time C ext flag. We deliberately don't
|
|
161
|
+
# run a live pipe2+splice probe here — the production path
|
|
162
|
+
# is the runtime probe: copy_splice_into_pipe's :unsupported
|
|
163
|
+
# return is cheap (one pipe2 + one close pair on the first
|
|
164
|
+
# request) and authoritative.
|
|
165
|
+
return @splice_runtime_supported if defined?(@splice_runtime_supported)
|
|
166
|
+
|
|
167
|
+
# 2.2.x fix-A — pipe2 has been hoisted out of the chunk
|
|
168
|
+
# loop (one pipe pair per response, reused across every
|
|
169
|
+
# chunk via `copy_splice_into_pipe`). The syscall-count
|
|
170
|
+
# math (64 → 19 syscalls per 1 MiB request) makes the
|
|
171
|
+
# 2.2.0 env-var gate obsolete in principle, but we leave
|
|
172
|
+
# the gate in place until the openclaw-vm bench
|
|
173
|
+
# re-confirms splice ≥ plain sendfile baseline on Linux.
|
|
174
|
+
# The fix-A landing session couldn't reach openclaw-vm
|
|
175
|
+
# (SSH auth gap, see CHANGELOG); the maintainer is
|
|
176
|
+
# expected to drop the gate in a follow-up commit once
|
|
177
|
+
# the bench is re-run from a session with working SSH.
|
|
178
|
+
# Operators wanting to A/B test on other kernels can
|
|
179
|
+
# flip HYPERION_HTTP_SPLICE=1.
|
|
180
|
+
enabled =
|
|
181
|
+
ENV['HYPERION_HTTP_SPLICE'] == '1' &&
|
|
182
|
+
respond_to?(:splice_supported?) &&
|
|
183
|
+
splice_supported?
|
|
184
|
+
|
|
185
|
+
@splice_runtime_supported = enabled
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
# Called by native_copy_loop when copy_splice reports
|
|
189
|
+
# :unsupported at runtime (very old kernel without splice(2),
|
|
190
|
+
# sandboxed environment that blocks pipe2, etc.). Flips the
|
|
191
|
+
# cached flag to false so we stop attempting splice on this
|
|
192
|
+
# process for the rest of its lifetime — falling all the way
|
|
193
|
+
# through to plain sendfile(2).
|
|
194
|
+
def mark_splice_unsupported!
|
|
195
|
+
@splice_runtime_supported = false
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
# Returns true when the Ruby-side helper can take the fast path for
|
|
199
|
+
# `out_io`. Two conditions:
|
|
200
|
+
#
|
|
201
|
+
# 1. The C ext was compiled with native zero-copy (Linux / BSD /
|
|
202
|
+
# Darwin). On other hosts `Sendfile.supported?` returns false
|
|
203
|
+
# (defined in C); we still have a userspace fallback that's
|
|
204
|
+
# faster than the per-chunk fiber hop, so we report :userspace
|
|
205
|
+
# from #fast_path_kind in that case.
|
|
206
|
+
#
|
|
207
|
+
# 2. `out_io` is NOT a TLS socket. SSL sockets would need kernel-
|
|
208
|
+
# TLS support to sendfile, which is rarely enabled.
|
|
209
|
+
def fast_path_kind(out_io)
|
|
210
|
+
return :tls_userspace if tls_socket?(out_io)
|
|
211
|
+
# Native sendfile needs a kernel fd on BOTH ends. StringIO and
|
|
212
|
+
# other userspace-only IOs (custom buffer adapters in specs,
|
|
213
|
+
# `Rack::MockResponse`, …) don't expose one — drop straight to
|
|
214
|
+
# the userspace `IO.copy_stream` loop, which handles those.
|
|
215
|
+
return :userspace unless real_fd?(out_io)
|
|
216
|
+
return :native if respond_to?(:supported?) && supported?
|
|
217
|
+
|
|
218
|
+
:userspace
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
# High-level helper: copy `len` bytes from `file_io` (regular file)
|
|
222
|
+
# starting at `offset` into `out_io` (TCP socket or other writable
|
|
223
|
+
# IO). Loops on partial writes; yields on EAGAIN.
|
|
224
|
+
#
|
|
225
|
+
# Returns the total number of bytes written. Raises Errno::* on real
|
|
226
|
+
# socket errors (EPIPE, ECONNRESET, …) — same shape as a raw
|
|
227
|
+
# `socket.write` call. The caller's existing rescue handlers (slow-
|
|
228
|
+
# client cleanup, metrics, body#close) keep working unchanged.
|
|
229
|
+
def copy_to_socket(out_io, file_io, offset, len)
|
|
230
|
+
return 0 if len.zero?
|
|
231
|
+
|
|
232
|
+
kind = fast_path_kind(out_io)
|
|
233
|
+
|
|
234
|
+
# Phase 8a: small-file synchronous fast path. Only fires on the
|
|
235
|
+
# native branch (we need a real socket fd to issue write(2)
|
|
236
|
+
# against) AND when the source side is also a real fd (pread(2)
|
|
237
|
+
# against an Integer fd). The C ext is only loaded on native
|
|
238
|
+
# builds. This MUST come BEFORE the :native streaming branch —
|
|
239
|
+
# it's the whole point of Phase 8a: skip the fiber-yield
|
|
240
|
+
# round-trip for the 8 KB row.
|
|
241
|
+
if kind == :native && len <= SMALL_FILE_THRESHOLD &&
|
|
242
|
+
respond_to?(:copy_small) && real_fd?(file_io)
|
|
243
|
+
return copy_small(out_io, file_io, offset, len)
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
case kind
|
|
247
|
+
when :native
|
|
248
|
+
native_copy_loop(out_io, file_io, offset, len)
|
|
249
|
+
when :userspace, :tls_userspace
|
|
250
|
+
userspace_copy_loop(out_io, file_io, offset, len)
|
|
251
|
+
end
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
# 2.6-C — Puma-style serial-per-thread sendfile loop. Same
|
|
255
|
+
# zero-copy mechanics as `copy_to_socket` but with EAGAIN
|
|
256
|
+
# handled by `IO.select(nil, [out], nil, 5.0)` instead of
|
|
257
|
+
# `wait_writable` (fiber yield). Under the GVL the OS thread
|
|
258
|
+
# parks on the select; no per-chunk fiber-scheduler hop.
|
|
259
|
+
#
|
|
260
|
+
# Engaged from `ResponseWriter#write_sendfile` when the
|
|
261
|
+
# per-response `dispatch_mode` is `:inline_blocking` — auto-
|
|
262
|
+
# detected for `body.respond_to?(:to_path)` static-file routes
|
|
263
|
+
# in `Adapter::Rack#call`, or set explicitly by the app via
|
|
264
|
+
# `env['hyperion.dispatch_mode'] = :inline_blocking`.
|
|
265
|
+
#
|
|
266
|
+
# Userspace + TLS-userspace branches reuse `userspace_copy_loop`
|
|
267
|
+
# — `IO.copy_stream` is already blocking on the calling thread,
|
|
268
|
+
# no fiber-yield refactor needed there. Small-file (<= 64 KiB)
|
|
269
|
+
# native path also stays through `copy_small`: that primitive
|
|
270
|
+
# already handles EAGAIN with a short select() under the GVL,
|
|
271
|
+
# so the small-file fast path is "blocking" in the relevant
|
|
272
|
+
# sense regardless of `:inline_blocking` opt-in.
|
|
273
|
+
def copy_to_socket_blocking(out_io, file_io, offset, len)
|
|
274
|
+
return 0 if len.zero?
|
|
275
|
+
|
|
276
|
+
# 2.6-D — defensive `Fiber.blocking` wrap so direct callers
|
|
277
|
+
# (specs, future code paths) get the no-yield guarantee
|
|
278
|
+
# even if they didn't already wrap us themselves. When
|
|
279
|
+
# the calling fiber is already blocking (the fast path:
|
|
280
|
+
# `ResponseWriter#write_sendfile` wraps the whole sendfile
|
|
281
|
+
# path in `Fiber.blocking` for `:inline_blocking`) the
|
|
282
|
+
# nested wrap is a no-op — `Fiber.blocking` short-circuits
|
|
283
|
+
# if the current fiber's blocking flag is already set.
|
|
284
|
+
if ::Fiber.current.blocking?
|
|
285
|
+
copy_to_socket_blocking_inner(out_io, file_io, offset, len)
|
|
286
|
+
else
|
|
287
|
+
::Fiber.blocking { copy_to_socket_blocking_inner(out_io, file_io, offset, len) }
|
|
288
|
+
end
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
private
|
|
292
|
+
|
|
293
|
+
def copy_to_socket_blocking_inner(out_io, file_io, offset, len)
|
|
294
|
+
kind = fast_path_kind(out_io)
|
|
295
|
+
|
|
296
|
+
if kind == :native && len <= SMALL_FILE_THRESHOLD &&
|
|
297
|
+
respond_to?(:copy_small) && real_fd?(file_io)
|
|
298
|
+
return copy_small(out_io, file_io, offset, len)
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
case kind
|
|
302
|
+
when :native
|
|
303
|
+
native_copy_loop_blocking(out_io, file_io, offset, len)
|
|
304
|
+
when :userspace, :tls_userspace
|
|
305
|
+
userspace_copy_loop(out_io, file_io, offset, len)
|
|
306
|
+
end
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
def tls_socket?(io)
|
|
310
|
+
defined?(::OpenSSL::SSL::SSLSocket) && io.is_a?(::OpenSSL::SSL::SSLSocket)
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
# Does `io` expose a real kernel fd we can hand to sendfile(2)?
|
|
314
|
+
# `IO#fileno` raises NotImplementedError on StringIO / Tempfile-
|
|
315
|
+
# before-flush / custom IO-shaped objects, and TCPSocket wraps a
|
|
316
|
+
# T_FILE so `RB_TYPE_P(obj, T_FILE)` returns true. We probe by
|
|
317
|
+
# calling `fileno` inside a forgiving rescue — anything that
|
|
318
|
+
# answers a non-negative Integer is good enough; everything else
|
|
319
|
+
# routes through the userspace fallback.
|
|
320
|
+
def real_fd?(io)
|
|
321
|
+
return true if io.is_a?(::IO) && !io.closed?
|
|
322
|
+
|
|
323
|
+
if io.respond_to?(:to_io)
|
|
324
|
+
inner = io.to_io
|
|
325
|
+
return inner.is_a?(::IO) && !inner.closed?
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
if io.respond_to?(:fileno)
|
|
329
|
+
fd = io.fileno
|
|
330
|
+
return fd.is_a?(Integer) && fd >= 0
|
|
331
|
+
end
|
|
332
|
+
|
|
333
|
+
false
|
|
334
|
+
rescue StandardError
|
|
335
|
+
false
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
# Native streaming loop for files > SMALL_FILE_THRESHOLD.
|
|
339
|
+
#
|
|
340
|
+
# 2.2.x fix-A — on Linux, files above SPLICE_THRESHOLD route
|
|
341
|
+
# through `copy_splice_into_pipe`. That primitive splices
|
|
342
|
+
# file -> pipe -> socket for ONE chunk against a pipe pair
|
|
343
|
+
# owned by THIS METHOD: one `IO.pipe` (binmode, non-blocking)
|
|
344
|
+
# at the top, both fds closed in the ensure block at the
|
|
345
|
+
# bottom. For a 1 MiB asset at 64 KiB chunks that drops the
|
|
346
|
+
# pipe overhead from 16 × pipe2 + 32 × close (one set per
|
|
347
|
+
# chunk in the 2.2.0 layout) to 1 × pipe2 + 2 × close per
|
|
348
|
+
# response — a 3.4× syscall-count reduction. The
|
|
349
|
+
# correctness window (no cross-request byte leak) stays
|
|
350
|
+
# closed: a pipe pair still never outlives a single
|
|
351
|
+
# response.
|
|
352
|
+
#
|
|
353
|
+
# On non-Linux hosts (`splice_supported?` == false) we go
|
|
354
|
+
# straight to the plain sendfile(2) path via `copy`. On
|
|
355
|
+
# Linux hosts where the runtime kernel rejects splice (very
|
|
356
|
+
# old kernels return ENOSYS / EINVAL) we mark the path
|
|
357
|
+
# unsupported for the rest of the process and fall through
|
|
358
|
+
# to plain sendfile.
|
|
359
|
+
def native_copy_loop(out_io, file_io, offset, len)
|
|
360
|
+
# 2.7-F — hoisted fadvise hint. Called ONCE per response
|
|
361
|
+
# on Linux for files >= FADVISE_THRESHOLD (256 KiB). Pre-
|
|
362
|
+
# warms the page cache so subsequent sendfile / splice
|
|
363
|
+
# chunks don't wait on disk I/O. NOT called per-chunk:
|
|
364
|
+
# 2.6-B did that and regressed -6.6% warm-cache (commit
|
|
365
|
+
# 4cd8009). Both `splice_copy_loop` and
|
|
366
|
+
# `plain_sendfile_loop` benefit from the single call here
|
|
367
|
+
# — the dispatch below picks exactly one branch per
|
|
368
|
+
# response, so this is true once-per-response.
|
|
369
|
+
maybe_fadvise_sequential(file_io, len)
|
|
370
|
+
|
|
371
|
+
use_splice = splice_runtime_supported? && len > SPLICE_THRESHOLD &&
|
|
372
|
+
respond_to?(:copy_splice_into_pipe)
|
|
373
|
+
|
|
374
|
+
if use_splice
|
|
375
|
+
splice_copy_loop(out_io, file_io, offset, len)
|
|
376
|
+
else
|
|
377
|
+
plain_sendfile_loop(out_io, file_io, offset, len)
|
|
378
|
+
end
|
|
379
|
+
end
|
|
380
|
+
|
|
381
|
+
# 2.7-F — best-effort POSIX_FADV_SEQUENTIAL hint. Called
|
|
382
|
+
# once per response from `native_copy_loop`. Skipped on
|
|
383
|
+
# non-Linux hosts (the C ext doesn't define
|
|
384
|
+
# `fadvise_sequential` there), on small files (single-chunk
|
|
385
|
+
# responses don't benefit from readahead pre-warming), and
|
|
386
|
+
# on file-like objects without a real kernel fd (StringIO,
|
|
387
|
+
# mock IOs). Errors from the C primitive are intentionally
|
|
388
|
+
# ignored — the hint is informational, never load-bearing
|
|
389
|
+
# for correctness. If `copy_to_socket_blocking` ever wants
|
|
390
|
+
# the same hint, lift this call into a helper called from
|
|
391
|
+
# the blocking dispatcher too (deferred to 2.7.x — the
|
|
392
|
+
# blocking path's spec surface is wider and the warm/cold
|
|
393
|
+
# bench numbers should drive that decision).
|
|
394
|
+
def maybe_fadvise_sequential(file_io, len)
|
|
395
|
+
return unless respond_to?(:fadvise_sequential)
|
|
396
|
+
return if len < FADVISE_THRESHOLD
|
|
397
|
+
return unless real_fd?(file_io)
|
|
398
|
+
|
|
399
|
+
fadvise_sequential(file_io, len)
|
|
400
|
+
rescue StandardError
|
|
401
|
+
# Defensive: posix_fadvise's surface is informational. Any
|
|
402
|
+
# type-coercion / fd-extraction error must not bring down a
|
|
403
|
+
# static-file response.
|
|
404
|
+
nil
|
|
405
|
+
end
|
|
406
|
+
|
|
407
|
+
# 2.2.x fix-A — splice path with one pipe pair per response.
|
|
408
|
+
# Opens the pipe at entry, hands the same fds to
|
|
409
|
+
# `copy_splice_into_pipe` for every chunk of the response,
|
|
410
|
+
# and closes both fds in the ensure block on every exit
|
|
411
|
+
# path (return, raise, throw). If the runtime kernel
|
|
412
|
+
# rejects splice (:unsupported on the first chunk), we tear
|
|
413
|
+
# the pipe down immediately and recurse through
|
|
414
|
+
# `plain_sendfile_loop` for the remainder of the response.
|
|
415
|
+
def splice_copy_loop(out_io, file_io, offset, len)
|
|
416
|
+
remaining = len
|
|
417
|
+
cursor = offset
|
|
418
|
+
total = 0
|
|
419
|
+
pipe_r, pipe_w = open_splice_pipe!
|
|
420
|
+
|
|
421
|
+
begin
|
|
422
|
+
while remaining.positive?
|
|
423
|
+
# 2.6-A — cap each splice round at USERSPACE_CHUNK
|
|
424
|
+
# (256 KiB) so the kernel doesn't get an arbitrarily
|
|
425
|
+
# large `count` arg on huge responses. At 256 KiB a
|
|
426
|
+
# 1 MiB asset moves in 4 splice rounds vs 16 at the
|
|
427
|
+
# legacy 64 KiB kernel-TCP-send-buffer ceiling.
|
|
428
|
+
chunk = remaining < USERSPACE_CHUNK ? remaining : USERSPACE_CHUNK
|
|
429
|
+
bytes, status =
|
|
430
|
+
begin
|
|
431
|
+
copy_splice_into_pipe(out_io, file_io, cursor, chunk, pipe_r, pipe_w)
|
|
432
|
+
rescue NotImplementedError
|
|
433
|
+
mark_splice_unsupported!
|
|
434
|
+
return total + plain_sendfile_loop(out_io, file_io, cursor, remaining)
|
|
435
|
+
end
|
|
436
|
+
|
|
437
|
+
case status
|
|
438
|
+
when :done
|
|
439
|
+
# 2.6-A — `:done` from the C ext means the kernel
|
|
440
|
+
# accepted the FULL `chunk` we asked for, not the
|
|
441
|
+
# full response. Advance cursor / remaining and
|
|
442
|
+
# loop; the while-condition exits when the response
|
|
443
|
+
# is fully drained.
|
|
444
|
+
total += bytes
|
|
445
|
+
cursor += bytes
|
|
446
|
+
remaining -= bytes
|
|
447
|
+
when :partial
|
|
448
|
+
total += bytes
|
|
449
|
+
cursor += bytes
|
|
450
|
+
remaining -= bytes
|
|
451
|
+
when :eagain
|
|
452
|
+
# `copy_splice_into_pipe` only returns :eagain when
|
|
453
|
+
# zero bytes hit the wire (bytes>0 + EAGAIN maps to
|
|
454
|
+
# :partial in the C ext), so cursor / remaining
|
|
455
|
+
# don't move here — we just yield to the scheduler.
|
|
456
|
+
wait_writable(out_io)
|
|
457
|
+
when :unsupported
|
|
458
|
+
# Runtime kernel rejected splice but plain sendfile
|
|
459
|
+
# may still work. Cache the negative answer and
|
|
460
|
+
# finish this response through plain sendfile from
|
|
461
|
+
# the same cursor.
|
|
462
|
+
mark_splice_unsupported!
|
|
463
|
+
return total + plain_sendfile_loop(out_io, file_io, cursor, remaining)
|
|
464
|
+
else
|
|
465
|
+
raise "Hyperion::Http::Sendfile: unexpected status #{status.inspect}"
|
|
466
|
+
end
|
|
467
|
+
end
|
|
468
|
+
|
|
469
|
+
total
|
|
470
|
+
ensure
|
|
471
|
+
# Close both fds on every exit path — success, EAGAIN
|
|
472
|
+
# retry-loop exit, raised exception, mid-transfer
|
|
473
|
+
# EPIPE. This is the whole point of fix-A's per-
|
|
474
|
+
# response pipe lifecycle: the pipe never outlives the
|
|
475
|
+
# response, so residual bytes from a partial transfer
|
|
476
|
+
# cannot leak onto the next request's socket.
|
|
477
|
+
close_splice_pipe(pipe_r, pipe_w)
|
|
478
|
+
end
|
|
479
|
+
end
|
|
480
|
+
|
|
481
|
+
# Plain sendfile(2) loop — used on non-Linux hosts, on
|
|
482
|
+
# hosts where splice is unavailable at runtime, and as the
|
|
483
|
+
# tail of a splice run that hit :unsupported mid-response.
|
|
484
|
+
#
|
|
485
|
+
# 2.6-A — each kernel call is capped at USERSPACE_CHUNK
|
|
486
|
+
# (256 KiB) so a 1 MiB response moves in 4 sendfile rounds
|
|
487
|
+
# vs 16 at the legacy 64 KiB ceiling. The kernel happily
|
|
488
|
+
# accepts the larger count arg on Linux 4.x+ and Darwin /
|
|
489
|
+
# *BSD; partial returns still fall through the :partial
|
|
490
|
+
# branch unchanged.
|
|
491
|
+
def plain_sendfile_loop(out_io, file_io, offset, len)
|
|
492
|
+
remaining = len
|
|
493
|
+
cursor = offset
|
|
494
|
+
total = 0
|
|
495
|
+
|
|
496
|
+
while remaining.positive?
|
|
497
|
+
chunk = remaining < USERSPACE_CHUNK ? remaining : USERSPACE_CHUNK
|
|
498
|
+
bytes, status = copy(out_io, file_io, cursor, chunk)
|
|
499
|
+
|
|
500
|
+
case status
|
|
501
|
+
when :done
|
|
502
|
+
# 2.6-A — `:done` means the kernel wrote the FULL
|
|
503
|
+
# `chunk` we asked for, not the full response.
|
|
504
|
+
# Advance and loop; the while-condition exits when
|
|
505
|
+
# remaining hits zero.
|
|
506
|
+
total += bytes
|
|
507
|
+
cursor += bytes
|
|
508
|
+
remaining -= bytes
|
|
509
|
+
when :partial
|
|
510
|
+
total += bytes
|
|
511
|
+
cursor += bytes
|
|
512
|
+
remaining -= bytes
|
|
513
|
+
when :eagain
|
|
514
|
+
wait_writable(out_io)
|
|
515
|
+
when :unsupported
|
|
516
|
+
# Kernel said this fd pair doesn't support sendfile.
|
|
517
|
+
# Drop to userspace. The file's read offset is still
|
|
518
|
+
# untouched (we've been passing absolute offsets
|
|
519
|
+
# through to the kernel), so rewind via offset arg
|
|
520
|
+
# into the userspace path.
|
|
521
|
+
file_io.seek(cursor) if file_io.respond_to?(:seek)
|
|
522
|
+
return total + userspace_copy_loop(out_io, file_io, cursor, remaining)
|
|
523
|
+
else
|
|
524
|
+
raise "Hyperion::Http::Sendfile: unexpected status #{status.inspect}"
|
|
525
|
+
end
|
|
526
|
+
end
|
|
527
|
+
|
|
528
|
+
total
|
|
529
|
+
end
|
|
530
|
+
|
|
531
|
+
# Open a pipe pair sized for the splice response loop.
|
|
532
|
+
# Returns [pipe_r, pipe_w] as Ruby IO objects so the ensure
|
|
533
|
+
# block can `.close` them via the standard IO protocol — no
|
|
534
|
+
# stale-fd risk if the C ext closed the underlying fd
|
|
535
|
+
# during a runtime-:unsupported teardown. Both ends are
|
|
536
|
+
# set non-blocking (matches the C ext's pipe2 fallback for
|
|
537
|
+
# `copy_splice`) so a wedged splice can't block a worker
|
|
538
|
+
# thread.
|
|
539
|
+
def open_splice_pipe!
|
|
540
|
+
pipe_r, pipe_w = IO.pipe
|
|
541
|
+
set_nonblock!(pipe_r)
|
|
542
|
+
set_nonblock!(pipe_w)
|
|
543
|
+
[pipe_r, pipe_w]
|
|
544
|
+
end
|
|
545
|
+
|
|
546
|
+
def set_nonblock!(io)
|
|
547
|
+
flags = io.fcntl(Fcntl::F_GETFL)
|
|
548
|
+
io.fcntl(Fcntl::F_SETFL, flags | Fcntl::O_NONBLOCK)
|
|
549
|
+
rescue StandardError
|
|
550
|
+
# F_SETFL is best-effort; the splice ladder copes with
|
|
551
|
+
# blocking pipe ends just fine, the non-blocking flag is
|
|
552
|
+
# a defense-in-depth knob. Older Ruby builds without
|
|
553
|
+
# Fcntl loaded fall through silently.
|
|
554
|
+
end
|
|
555
|
+
|
|
556
|
+
def close_splice_pipe(pipe_r, pipe_w)
|
|
557
|
+
pipe_r.close unless pipe_r.nil? || pipe_r.closed?
|
|
558
|
+
pipe_w.close unless pipe_w.nil? || pipe_w.closed?
|
|
559
|
+
rescue StandardError
|
|
560
|
+
# We're typically in an ensure block; never let close
|
|
561
|
+
# bubble up over the original exception (or success
|
|
562
|
+
# return).
|
|
563
|
+
end
|
|
564
|
+
|
|
565
|
+
# Userspace fallback. Bypasses the per-chunk fiber-hop in
|
|
566
|
+
# WriterContext-style writers by issuing a single IO.copy_stream call
|
|
567
|
+
# with USERSPACE_CHUNK at a time. IO.copy_stream itself handles the
|
|
568
|
+
# internal read+write loop and (on Linux plain TCP) will pick
|
|
569
|
+
# sendfile(2) under the hood; we keep it as a defensive fallback for
|
|
570
|
+
# TLS sockets and non-sendfile-capable hosts.
|
|
571
|
+
def userspace_copy_loop(out_io, file_io, offset, len)
|
|
572
|
+
file_io.seek(offset) if file_io.respond_to?(:seek)
|
|
573
|
+
remaining = len
|
|
574
|
+
total = 0
|
|
575
|
+
while remaining.positive?
|
|
576
|
+
chunk = remaining < USERSPACE_CHUNK ? remaining : USERSPACE_CHUNK
|
|
577
|
+
written = IO.copy_stream(file_io, out_io, chunk)
|
|
578
|
+
break if written.nil? || written.zero?
|
|
579
|
+
|
|
580
|
+
total += written
|
|
581
|
+
remaining -= written
|
|
582
|
+
end
|
|
583
|
+
total
|
|
584
|
+
end
|
|
585
|
+
|
|
586
|
+
# Yield the fiber until `out_io` is writable. Under Async, the
|
|
587
|
+
# scheduler's `io_wait` is invoked transparently by IO#wait_writable.
|
|
588
|
+
# Outside Async we fall back to IO.select. We tolerate IOs that
|
|
589
|
+
# don't expose `wait_writable` (e.g. plain Integer fd, StringIO in
|
|
590
|
+
# tests) by spinning a single CPU yield — those paths are rare in
|
|
591
|
+
# production and the bookkeeping isn't worth a custom waiter.
|
|
592
|
+
def wait_writable(out_io)
|
|
593
|
+
if out_io.respond_to?(:wait_writable)
|
|
594
|
+
out_io.wait_writable
|
|
595
|
+
elsif out_io.respond_to?(:to_io)
|
|
596
|
+
IO.select(nil, [out_io.to_io], nil, 1.0)
|
|
597
|
+
else
|
|
598
|
+
Thread.pass
|
|
599
|
+
end
|
|
600
|
+
end
|
|
601
|
+
|
|
602
|
+
# 2.6-C — Plain-sendfile loop variant with `IO.select` instead
|
|
603
|
+
# of fiber yield. Same body as `plain_sendfile_loop` plus the
|
|
604
|
+
# 2.6-A USERSPACE_CHUNK cap, but EAGAIN parks the OS thread on
|
|
605
|
+
# a 5 s select() rather than calling out to the fiber scheduler.
|
|
606
|
+
# Under the GVL this is the Puma-style "serial-per-thread"
|
|
607
|
+
# response shape — sendfile, syscall returns EAGAIN, thread
|
|
608
|
+
# blocks on select, kernel wakes us when the socket drains, we
|
|
609
|
+
# retry from the same cursor. No per-chunk fiber-scheduler hop.
|
|
610
|
+
#
|
|
611
|
+
# We deliberately don't reuse the splice path here: splice's
|
|
612
|
+
# per-response pipe lifecycle pairs cleanly with fiber dispatch
|
|
613
|
+
# (where wait_writable is cheap), and the splice win is
|
|
614
|
+
# marginal vs sendfile on warm-cache static. For
|
|
615
|
+
# `:inline_blocking` we keep the loop deliberately straight-line
|
|
616
|
+
# — sendfile only, no userspace pipe ladder.
|
|
617
|
+
def native_copy_loop_blocking(out_io, file_io, offset, len)
|
|
618
|
+
remaining = len
|
|
619
|
+
cursor = offset
|
|
620
|
+
total = 0
|
|
621
|
+
|
|
622
|
+
while remaining.positive?
|
|
623
|
+
chunk = remaining < USERSPACE_CHUNK ? remaining : USERSPACE_CHUNK
|
|
624
|
+
bytes, status = copy(out_io, file_io, cursor, chunk)
|
|
625
|
+
|
|
626
|
+
case status
|
|
627
|
+
when :done
|
|
628
|
+
total += bytes
|
|
629
|
+
cursor += bytes
|
|
630
|
+
remaining -= bytes
|
|
631
|
+
when :partial
|
|
632
|
+
total += bytes
|
|
633
|
+
cursor += bytes
|
|
634
|
+
remaining -= bytes
|
|
635
|
+
when :eagain
|
|
636
|
+
select_writable_blocking(out_io)
|
|
637
|
+
when :unsupported
|
|
638
|
+
# Kernel said this fd pair doesn't support sendfile. Drop
|
|
639
|
+
# to userspace. The userspace path is already blocking on
|
|
640
|
+
# the calling thread (IO.copy_stream loops on write).
|
|
641
|
+
file_io.seek(cursor) if file_io.respond_to?(:seek)
|
|
642
|
+
return total + userspace_copy_loop(out_io, file_io, cursor, remaining)
|
|
643
|
+
else
|
|
644
|
+
raise "Hyperion::Http::Sendfile: unexpected status #{status.inspect}"
|
|
645
|
+
end
|
|
646
|
+
end
|
|
647
|
+
|
|
648
|
+
total
|
|
649
|
+
end
|
|
650
|
+
|
|
651
|
+
# 2.6-C — block the OS thread on a writable-readiness select
|
|
652
|
+
# rather than yield to the fiber scheduler. 5 s timeout is a
|
|
653
|
+
# belt-and-suspenders bound: Connection's per-request deadline
|
|
654
|
+
# (default 60 s) fires first on a stuck peer, but we still want
|
|
655
|
+
# IO.select to wake periodically so a misbehaving peer can't
|
|
656
|
+
# park a worker thread forever. Tolerate IOs that don't expose
|
|
657
|
+
# `to_io` (StringIO in specs, mock objects) by short-circuiting
|
|
658
|
+
# via `Thread.pass` — same fallback shape as `wait_writable`.
|
|
659
|
+
#
|
|
660
|
+
# 2.6-D — ensure the select bypasses the fiber scheduler even
|
|
661
|
+
# if the caller didn't already wrap us in `Fiber.blocking`.
|
|
662
|
+
# The 2.6-C path called `IO.select` from a fiber whose
|
|
663
|
+
# scheduler hook (Async::Reactor#kernel_select) intercepts
|
|
664
|
+
# the call and yields cooperatively — the OS-thread block
|
|
665
|
+
# never happened, the fiber kept getting rescheduled, and
|
|
666
|
+
# the per-chunk yield-resume tax that `:inline_blocking` was
|
|
667
|
+
# designed to eliminate stayed in place. Wrapping the
|
|
668
|
+
# select in `Fiber.blocking { ... }` flips
|
|
669
|
+
# `Fiber.current.blocking?` to true for the duration; the
|
|
670
|
+
# scheduler is no longer consulted, and the OS thread
|
|
671
|
+
# parks on the kernel readiness check.
|
|
672
|
+
def select_writable_blocking(out_io)
|
|
673
|
+
target =
|
|
674
|
+
if out_io.is_a?(::IO)
|
|
675
|
+
out_io
|
|
676
|
+
elsif out_io.respond_to?(:to_io)
|
|
677
|
+
begin
|
|
678
|
+
out_io.to_io
|
|
679
|
+
rescue StandardError
|
|
680
|
+
nil
|
|
681
|
+
end
|
|
682
|
+
end
|
|
683
|
+
if target
|
|
684
|
+
if ::Fiber.current.blocking?
|
|
685
|
+
::IO.select(nil, [target], nil, 5.0)
|
|
686
|
+
else
|
|
687
|
+
::Fiber.blocking { ::IO.select(nil, [target], nil, 5.0) }
|
|
688
|
+
end
|
|
689
|
+
else
|
|
690
|
+
Thread.pass
|
|
691
|
+
end
|
|
692
|
+
end
|
|
693
|
+
end
|
|
694
|
+
end
|
|
695
|
+
end
|
|
696
|
+
end
|