hyperion-rb 1.6.2 → 2.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4768 -0
- data/README.md +222 -13
- data/ext/hyperion_h2_codec/Cargo.lock +7 -0
- data/ext/hyperion_h2_codec/Cargo.toml +33 -0
- data/ext/hyperion_h2_codec/extconf.rb +73 -0
- data/ext/hyperion_h2_codec/src/frames.rs +140 -0
- data/ext/hyperion_h2_codec/src/hpack/huffman.rs +161 -0
- data/ext/hyperion_h2_codec/src/hpack.rs +457 -0
- data/ext/hyperion_h2_codec/src/lib.rs +296 -0
- data/ext/hyperion_http/extconf.rb +28 -0
- data/ext/hyperion_http/h2_codec_glue.c +408 -0
- data/ext/hyperion_http/page_cache.c +1125 -0
- data/ext/hyperion_http/parser.c +473 -38
- data/ext/hyperion_http/sendfile.c +982 -0
- data/ext/hyperion_http/websocket.c +493 -0
- data/ext/hyperion_io_uring/Cargo.lock +33 -0
- data/ext/hyperion_io_uring/Cargo.toml +34 -0
- data/ext/hyperion_io_uring/extconf.rb +74 -0
- data/ext/hyperion_io_uring/src/lib.rs +316 -0
- data/lib/hyperion/adapter/rack.rb +370 -42
- data/lib/hyperion/admin_listener.rb +207 -0
- data/lib/hyperion/admin_middleware.rb +36 -7
- data/lib/hyperion/cli.rb +310 -11
- data/lib/hyperion/config.rb +440 -14
- data/lib/hyperion/connection.rb +679 -22
- data/lib/hyperion/deprecations.rb +81 -0
- data/lib/hyperion/dispatch_mode.rb +165 -0
- data/lib/hyperion/fiber_local.rb +75 -13
- data/lib/hyperion/h2_admission.rb +77 -0
- data/lib/hyperion/h2_codec.rb +499 -0
- data/lib/hyperion/http/page_cache.rb +122 -0
- data/lib/hyperion/http/sendfile.rb +696 -0
- data/lib/hyperion/http2/native_hpack_adapter.rb +70 -0
- data/lib/hyperion/http2_handler.rb +618 -19
- data/lib/hyperion/io_uring.rb +317 -0
- data/lib/hyperion/lint_wrapper_pool.rb +126 -0
- data/lib/hyperion/master.rb +96 -9
- data/lib/hyperion/metrics/path_templater.rb +68 -0
- data/lib/hyperion/metrics.rb +256 -0
- data/lib/hyperion/prometheus_exporter.rb +150 -0
- data/lib/hyperion/request.rb +13 -0
- data/lib/hyperion/response_writer.rb +477 -16
- data/lib/hyperion/runtime.rb +195 -0
- data/lib/hyperion/server/route_table.rb +179 -0
- data/lib/hyperion/server.rb +519 -55
- data/lib/hyperion/static_preload.rb +133 -0
- data/lib/hyperion/thread_pool.rb +61 -7
- data/lib/hyperion/tls.rb +343 -1
- data/lib/hyperion/version.rb +1 -1
- data/lib/hyperion/websocket/close_codes.rb +71 -0
- data/lib/hyperion/websocket/connection.rb +876 -0
- data/lib/hyperion/websocket/frame.rb +356 -0
- data/lib/hyperion/websocket/handshake.rb +525 -0
- data/lib/hyperion/worker.rb +111 -9
- data/lib/hyperion.rb +137 -3
- metadata +50 -1
|
@@ -4,9 +4,22 @@ require 'time'
|
|
|
4
4
|
|
|
5
5
|
module Hyperion
|
|
6
6
|
# Serializes a Rack [status, headers, body] tuple to an HTTP/1.1 wire stream.
|
|
7
|
-
# Phase 5
|
|
8
|
-
# sibling Http2ResponseWriter. Public surface (#write) stays
|
|
7
|
+
# Phase 5 adds a chunked-streaming path with per-connection write coalescing;
|
|
8
|
+
# Phase 7 adds a sibling Http2ResponseWriter. Public surface (#write) stays
|
|
9
|
+
# stable.
|
|
9
10
|
class ResponseWriter
|
|
11
|
+
# Phase 5 — chunked-write coalescing tunables. Chunks smaller than the
|
|
12
|
+
# threshold accumulate in a per-response buffer; the buffer flushes on
|
|
13
|
+
# any of (a) >= COALESCE_FLUSH_BYTES filled, (b) the writer-fiber tick
|
|
14
|
+
# of COALESCE_TICK_SECONDS elapsed since the last buffer drain, or
|
|
15
|
+
# (c) end-of-body / explicit body.flush. Picked to keep added latency
|
|
16
|
+
# under 1 ms while still cutting syscall count 3-5× on SSE / streaming
|
|
17
|
+
# JSON / log-tail workloads where per-event payloads are ~50 B.
|
|
18
|
+
COALESCE_SMALL_CHUNK_BYTES = 512
|
|
19
|
+
COALESCE_FLUSH_BYTES = 4096
|
|
20
|
+
COALESCE_TICK_SECONDS = 0.001
|
|
21
|
+
CHUNKED_TERMINATOR = "0\r\n\r\n"
|
|
22
|
+
|
|
10
23
|
REASONS = {
|
|
11
24
|
200 => 'OK',
|
|
12
25
|
201 => 'Created',
|
|
@@ -35,15 +48,54 @@ module Hyperion
|
|
|
35
48
|
|
|
36
49
|
CRLF_HEADER_VALUE = /[\r\n]/
|
|
37
50
|
|
|
38
|
-
|
|
51
|
+
# 2.10-C — class-level memoised probe for the C-side page cache.
|
|
52
|
+
# The C ext registers `Hyperion::Http::PageCache.write_to` at
|
|
53
|
+
# `Init_hyperion_page_cache` time (parser.c calls it after
|
|
54
|
+
# `Init_hyperion_sendfile`). We probe once per ResponseWriter
|
|
55
|
+
# class load and cache the bool — keeps the per-request branch a
|
|
56
|
+
# single ivar read. Operators can flip this off at runtime with
|
|
57
|
+
# `Hyperion::ResponseWriter.page_cache_available = false` for A/B
|
|
58
|
+
# rollback (handy during the 2.10 bake).
|
|
59
|
+
class << self
|
|
60
|
+
attr_accessor :page_cache_available
|
|
61
|
+
|
|
62
|
+
def page_cache_available?
|
|
63
|
+
@page_cache_available
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
self.page_cache_available =
|
|
67
|
+
defined?(::Hyperion::Http::PageCache) &&
|
|
68
|
+
::Hyperion::Http::PageCache.respond_to?(:write_to)
|
|
69
|
+
|
|
70
|
+
# 2.6-C — `dispatch_mode:` is the per-response opt-in dispatch shape
|
|
71
|
+
# (typically `:inline_blocking` for static-file routes auto-detected
|
|
72
|
+
# by `Adapter::Rack#call`, or `nil` for the default fiber-yielding
|
|
73
|
+
# path). Only the sendfile branch consumes it today; the chunked
|
|
74
|
+
# and buffered branches ignore it (no fiber-yield in their hot
|
|
75
|
+
# loop to begin with). Forward-compatible — future per-response
|
|
76
|
+
# dispatch shapes plug in here without changing the call-site
|
|
77
|
+
# arity for non-sendfile branches.
|
|
78
|
+
def write(io, status, headers, body, keep_alive: false, dispatch_mode: nil)
|
|
39
79
|
# Zero-copy fast path: bodies that point at an on-disk file (Rack::Files,
|
|
40
80
|
# asset servers, signed-download responders) get streamed via
|
|
41
81
|
# IO.copy_stream which delegates to sendfile(2) on Linux for plain TCP
|
|
42
82
|
# sockets — bytes go from the file's page cache straight to the socket
|
|
43
83
|
# buffer with no userspace allocation. For TLS sockets we still avoid the
|
|
44
84
|
# multi-MB String build, but encryption forces a userspace round-trip so
|
|
45
|
-
# we count that path separately.
|
|
46
|
-
|
|
85
|
+
# we count that path separately. Phase 5 leaves this branch untouched —
|
|
86
|
+
# sendfile bypasses the chunked coalescer entirely (the file IS the body
|
|
87
|
+
# buffer, no userspace chunks to coalesce).
|
|
88
|
+
if body.respond_to?(:to_path)
|
|
89
|
+
return write_sendfile(io, status, headers, body, keep_alive: keep_alive,
|
|
90
|
+
dispatch_mode: dispatch_mode)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Phase 5 — opt-in chunked streaming path. The app sets
|
|
94
|
+
# `Transfer-Encoding: chunked` to signal "this body is a stream; do not
|
|
95
|
+
# buffer". We then iterate `body.each` and emit each chunk in chunked
|
|
96
|
+
# framing (size-line + payload + CRLF), coalescing chunks <512 B in a
|
|
97
|
+
# per-response buffer to cut syscall count on SSE / streaming JSON.
|
|
98
|
+
return write_chunked(io, status, headers, body, keep_alive: keep_alive) if chunked_transfer?(headers)
|
|
47
99
|
|
|
48
100
|
write_buffered(io, status, headers, body, keep_alive: keep_alive)
|
|
49
101
|
end
|
|
@@ -54,8 +106,24 @@ module Hyperion
|
|
|
54
106
|
# Phase 1 buffers the full body so Content-Length is exact.
|
|
55
107
|
# Phase 2 introduces chunked transfer-encoding for streaming bodies;
|
|
56
108
|
# Phase 5 batches via IO::Buffer to avoid this intermediate String.
|
|
57
|
-
|
|
58
|
-
|
|
109
|
+
#
|
|
110
|
+
# Phase 11 — single-element-Array fast path. The overwhelmingly
|
|
111
|
+
# common Rack body shape is `[body_string]` (Rails ActionController,
|
|
112
|
+
# Sinatra, Grape, hand-rolled lambdas). For that shape we skip the
|
|
113
|
+
# `+''` accumulator entirely and treat body[0] as the buffered
|
|
114
|
+
# bytes directly. Multi-chunk bodies and Enumerator-style bodies
|
|
115
|
+
# still take the original loop. Saves one String allocation per
|
|
116
|
+
# response on the hot path; saves the per-chunk `<<` overhead too.
|
|
117
|
+
buffered = nil
|
|
118
|
+
if body.is_a?(Array) && body.length == 1
|
|
119
|
+
chunk = body[0]
|
|
120
|
+
buffered = chunk if chunk.is_a?(String)
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
if buffered.nil?
|
|
124
|
+
buffered = +''
|
|
125
|
+
body.each { |chunk| buffered << chunk }
|
|
126
|
+
end
|
|
59
127
|
|
|
60
128
|
reason = REASONS[status] || 'Unknown'
|
|
61
129
|
date_str = cached_date
|
|
@@ -82,8 +150,74 @@ module Hyperion
|
|
|
82
150
|
body.close if body.respond_to?(:close)
|
|
83
151
|
end
|
|
84
152
|
|
|
85
|
-
|
|
153
|
+
# 2.0.1 Phase 8 — coalesce head + body into ONE write for small
|
|
154
|
+
# static files. With Nagle on (kernel default) and TCP_NODELAY off,
|
|
155
|
+
# `io.write(head)` followed by a separate `write(body)` for an 8 KB
|
|
156
|
+
# asset stalled ~40 ms per response on the client's delayed-ACK
|
|
157
|
+
# waiting for the next packet to fill an MSS — capping the static
|
|
158
|
+
# 8 KB row at 121 r/s vs Puma 1,246. By concatenating head + body
|
|
159
|
+
# into a single read+write under the threshold (= Sendfile small-
|
|
160
|
+
# file fast path), the response goes out as one TCP segment train
|
|
161
|
+
# and the client ACKs immediately. No setsockopt churn required.
|
|
162
|
+
SENDFILE_COALESCE_THRESHOLD = 64 * 1024
|
|
163
|
+
|
|
164
|
+
def write_sendfile(io, status, headers, body, keep_alive:, dispatch_mode: nil)
|
|
165
|
+
# 2.6-D — when `:inline_blocking` is engaged, wrap the entire
|
|
166
|
+
# write path in `Fiber.blocking { ... }` so the calling fiber's
|
|
167
|
+
# `Fiber.current.blocking?` flag flips to true for the duration
|
|
168
|
+
# of the response. Without this wrap, `IO.select` and `io.write`
|
|
169
|
+
# inside the helpers below silently route through the Async
|
|
170
|
+
# fiber scheduler under `--async-io` — that was the 2.6-C
|
|
171
|
+
# engagement gap (resolver set `:inline_blocking`, writer
|
|
172
|
+
# plumbed it, but every blocking IO call still yielded the
|
|
173
|
+
# fiber). With the wrap, the OS thread parks on the kernel
|
|
174
|
+
# write under the GVL — the whole point of the dispatch mode.
|
|
175
|
+
#
|
|
176
|
+
# `Fiber.blocking` is a no-op when no scheduler is current
|
|
177
|
+
# (default threadpool / inline_h1_no_pool / no-async paths) so
|
|
178
|
+
# the perf cost is one method-dispatch when this branch is
|
|
179
|
+
# never the hot path.
|
|
180
|
+
if dispatch_mode == :inline_blocking
|
|
181
|
+
return Fiber.blocking do
|
|
182
|
+
write_sendfile_inner(io, status, headers, body, keep_alive: keep_alive,
|
|
183
|
+
dispatch_mode: dispatch_mode)
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
write_sendfile_inner(io, status, headers, body, keep_alive: keep_alive,
|
|
188
|
+
dispatch_mode: dispatch_mode)
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
def write_sendfile_inner(io, status, headers, body, keep_alive:, dispatch_mode: nil)
|
|
86
192
|
path = body.to_path
|
|
193
|
+
|
|
194
|
+
# 2.10-C — pre-built static-response cache fast path. When the
|
|
195
|
+
# page cache holds an entry for this path AND the response is a
|
|
196
|
+
# plain 200 with no app-supplied headers we can't bake into the
|
|
197
|
+
# cache (Set-Cookie, ETag, custom Cache-Control), bypass the
|
|
198
|
+
# entire file-open / head-build / write loop and issue ONE write
|
|
199
|
+
# syscall with the pre-built buffer. Operators get this
|
|
200
|
+
# automatically on Rack::Files routes; bigger files (>
|
|
201
|
+
# AUTO_THRESHOLD = 64 KiB) keep the existing sendfile path
|
|
202
|
+
# because Hyperion already dominates big-static at 9× Agoo
|
|
203
|
+
# (per the 2.10-B baseline).
|
|
204
|
+
#
|
|
205
|
+
# Wire-output note: the cached buffer carries status +
|
|
206
|
+
# Content-Type + Content-Length only (no Date, no Connection)
|
|
207
|
+
# — same shape Agoo emits. This is a deliberate wire-output
|
|
208
|
+
# change FOR CACHED RESPONSES ONLY. Non-cached paths fall
|
|
209
|
+
# through to `build_head` below and still emit the full header
|
|
210
|
+
# set. Apps needing Date/Connection on every response can opt
|
|
211
|
+
# out by setting `env['hyperion.streaming'] = true`, which
|
|
212
|
+
# skips the auto-detect that landed dispatch_mode here in the
|
|
213
|
+
# first place.
|
|
214
|
+
cached_bytes = page_cache_write(io, path, headers)
|
|
215
|
+
if cached_bytes
|
|
216
|
+
Hyperion.metrics.increment(:bytes_written, cached_bytes)
|
|
217
|
+
body.close if body.respond_to?(:close)
|
|
218
|
+
return
|
|
219
|
+
end
|
|
220
|
+
|
|
87
221
|
file = File.open(path, 'rb')
|
|
88
222
|
file_size = file.size
|
|
89
223
|
|
|
@@ -96,16 +230,53 @@ module Hyperion
|
|
|
96
230
|
date_str = cached_date
|
|
97
231
|
head = build_head(status, reason, headers, content_length, keep_alive, date_str)
|
|
98
232
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
#
|
|
102
|
-
#
|
|
103
|
-
#
|
|
104
|
-
#
|
|
105
|
-
|
|
233
|
+
head_bytes = head.bytesize
|
|
234
|
+
|
|
235
|
+
# Phase 8 small-file coalescing. For files <= 64 KiB, read the
|
|
236
|
+
# body bytes inline and emit head + body as one write. This
|
|
237
|
+
# bypasses the Nagle delayed-ACK stall completely (one TCP
|
|
238
|
+
# segment train carries everything; client ACKs the whole
|
|
239
|
+
# response, no second write parked waiting for an ACK on the
|
|
240
|
+
# first). Bonus: skips the syscall round-trip into copy_small.
|
|
241
|
+
copied =
|
|
242
|
+
if file_size.positive? && file_size <= SENDFILE_COALESCE_THRESHOLD
|
|
243
|
+
body_bytes = file.read(file_size)
|
|
244
|
+
head << body_bytes if body_bytes
|
|
245
|
+
io.write(head)
|
|
246
|
+
file_size
|
|
247
|
+
else
|
|
248
|
+
# Streaming path for larger files. 1.7.0 Phase 1 —
|
|
249
|
+
# Hyperion::Http::Sendfile picks the best kernel route:
|
|
250
|
+
# * Linux + plain TCPSocket → native sendfile(2) (true
|
|
251
|
+
# zero-copy, page cache → socket buffer, no userspace
|
|
252
|
+
# intermediate).
|
|
253
|
+
# * Darwin / *BSD + plain TCPSocket → BSD sendfile(2).
|
|
254
|
+
# * TLS-wrapped sockets → 64 KiB IO.copy_stream loop
|
|
255
|
+
# (kernel can't encrypt for us; we still bypass the
|
|
256
|
+
# per-chunk fiber-hop).
|
|
257
|
+
# * Hosts where the C ext didn't compile → IO.copy_stream
|
|
258
|
+
# fallback.
|
|
259
|
+
#
|
|
260
|
+
# 2.6-C — when `dispatch_mode == :inline_blocking` the loop
|
|
261
|
+
# uses `IO.select` + GVL-blocking sendfile instead of
|
|
262
|
+
# fiber-yielding `wait_writable`. Auto-detected by
|
|
263
|
+
# `Adapter::Rack#call` for `to_path` bodies that don't carry
|
|
264
|
+
# a streaming marker; opt-in via
|
|
265
|
+
# `env['hyperion.dispatch_mode'] = :inline_blocking` for
|
|
266
|
+
# routes the auto-detect doesn't catch. Default `nil` /
|
|
267
|
+
# any other symbol stays on the fiber-yielding path so
|
|
268
|
+
# existing callers (TLS h1 / async-io / threadpool dispatch)
|
|
269
|
+
# are unaffected.
|
|
270
|
+
io.write(head)
|
|
271
|
+
if dispatch_mode == :inline_blocking
|
|
272
|
+
::Hyperion::Http::Sendfile.copy_to_socket_blocking(io, file, 0, file_size)
|
|
273
|
+
else
|
|
274
|
+
::Hyperion::Http::Sendfile.copy_to_socket(io, file, 0, file_size)
|
|
275
|
+
end
|
|
276
|
+
end
|
|
106
277
|
|
|
107
278
|
record_zero_copy_metric(io)
|
|
108
|
-
Hyperion.metrics.increment(:bytes_written,
|
|
279
|
+
Hyperion.metrics.increment(:bytes_written, head_bytes + copied)
|
|
109
280
|
ensure
|
|
110
281
|
file&.close
|
|
111
282
|
body.close if body.respond_to?(:close)
|
|
@@ -118,6 +289,267 @@ module Hyperion
|
|
|
118
289
|
nil
|
|
119
290
|
end
|
|
120
291
|
|
|
292
|
+
# 2.10-C — page-cache engage helper. Returns the bytes written
|
|
293
|
+
# on a cache hit (or after an opportunistic populate-then-write),
|
|
294
|
+
# or `nil` to signal "fall through to the existing sendfile
|
|
295
|
+
# path". Three short-circuits keep the hot path branchless on
|
|
296
|
+
# the common cases:
|
|
297
|
+
#
|
|
298
|
+
# 1. Status must be 200. Caches don't store negotiated 304s
|
|
299
|
+
# / 206 Range / 416 Range-not-satisfiable / 404s.
|
|
300
|
+
# 2. The response carries no header that has to be re-emitted
|
|
301
|
+
# per request (Set-Cookie / Cache-Control: max-age=N /
|
|
302
|
+
# ETag / Last-Modified). We pre-bake Content-Type +
|
|
303
|
+
# Content-Length into the cache buffer; anything else
|
|
304
|
+
# forces the full path.
|
|
305
|
+
# 3. The C primitive must be loaded. Falsy on JRuby /
|
|
306
|
+
# TruffleRuby / hosts where the C ext didn't compile.
|
|
307
|
+
#
|
|
308
|
+
# On a path-not-cached hit, opportunistically populate the cache
|
|
309
|
+
# *if* the file is below `AUTO_THRESHOLD` (64 KiB); larger files
|
|
310
|
+
# keep the existing sendfile path because Hyperion already
|
|
311
|
+
# dominates big-static at 9× Agoo.
|
|
312
|
+
def page_cache_write(io, path, headers)
|
|
313
|
+
return nil unless self.class.page_cache_available?
|
|
314
|
+
# The C primitive writes via the OS-level fd, so StringIO /
|
|
315
|
+
# OpenSSL::SSL::SSLSocket / any IO-like that doesn't expose a
|
|
316
|
+
# real kernel fd has to fall through to the existing path.
|
|
317
|
+
# Probe up front to avoid the C primitive raising on an
|
|
318
|
+
# extracted-fd attempt.
|
|
319
|
+
return nil unless real_fd_io?(io)
|
|
320
|
+
|
|
321
|
+
# Fast skip for any response carrying a header that's per-
|
|
322
|
+
# request or security-sensitive enough that the cache can't
|
|
323
|
+
# safely bake it. We deliberately keep this list tight so the
|
|
324
|
+
# common Rack::Files case (which always emits `last-modified`
|
|
325
|
+
# + `content-type`) lands on the cache path; the cache
|
|
326
|
+
# buffer's `Content-Type` header is derived from the file
|
|
327
|
+
# extension via the same mime map agoo uses and `last-modified`
|
|
328
|
+
# is dropped (the client either revalidates via `If-None-Match`
|
|
329
|
+
# — not supported by the bare cache — or trusts
|
|
330
|
+
# `Cache-Control: max-age` when set, which IS in the skip list
|
|
331
|
+
# below). Operators wanting wire-byte parity opt out per route
|
|
332
|
+
# via `env['hyperion.streaming'] = true`.
|
|
333
|
+
headers.each do |k, _v|
|
|
334
|
+
case k.to_s.downcase
|
|
335
|
+
when 'set-cookie', 'cache-control', 'etag',
|
|
336
|
+
'content-encoding', 'content-disposition', 'vary'
|
|
337
|
+
return nil
|
|
338
|
+
end
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
result = ::Hyperion::Http::PageCache.write_to(io, path)
|
|
342
|
+
return result if result.is_a?(Integer)
|
|
343
|
+
|
|
344
|
+
# Cache miss. Populate-then-write *iff* the file is small
|
|
345
|
+
# enough that the page cache wins (big files keep the existing
|
|
346
|
+
# sendfile path).
|
|
347
|
+
begin
|
|
348
|
+
size = File.size?(path)
|
|
349
|
+
rescue StandardError
|
|
350
|
+
size = nil
|
|
351
|
+
end
|
|
352
|
+
return nil if size.nil?
|
|
353
|
+
return nil if size > ::Hyperion::Http::PageCache::AUTO_THRESHOLD
|
|
354
|
+
|
|
355
|
+
cache_result = ::Hyperion::Http::PageCache.cache_file(path)
|
|
356
|
+
return nil if cache_result == :missing
|
|
357
|
+
|
|
358
|
+
result = ::Hyperion::Http::PageCache.write_to(io, path)
|
|
359
|
+
result.is_a?(Integer) ? result : nil
|
|
360
|
+
end
|
|
361
|
+
|
|
362
|
+
# 2.10-C — the page cache writes through a real kernel fd, so the
|
|
363
|
+
# IO-side argument has to be a TCPSocket / UNIXSocket / fileno-able
|
|
364
|
+
# File. StringIO / OpenSSL::SSL::SSLSocket / pipe-wrapped Ractors
|
|
365
|
+
# don't expose a usable fd; those callers fall through to the
|
|
366
|
+
# existing sendfile path. Plain Integer fds are accepted directly.
|
|
367
|
+
def real_fd_io?(io)
|
|
368
|
+
return true if io.is_a?(Integer)
|
|
369
|
+
return false unless io.respond_to?(:fileno)
|
|
370
|
+
return false if defined?(StringIO) && io.is_a?(StringIO)
|
|
371
|
+
return false if defined?(::OpenSSL::SSL::SSLSocket) && io.is_a?(::OpenSSL::SSL::SSLSocket)
|
|
372
|
+
|
|
373
|
+
true
|
|
374
|
+
end
|
|
375
|
+
|
|
376
|
+
# True when the app explicitly opted into chunked transfer-encoding.
|
|
377
|
+
# We only stream when asked — for the common "buffer the whole thing
|
|
378
|
+
# and emit one Content-Length response" case, the existing single-write
|
|
379
|
+
# path is still optimal (one syscall, no chunked-framing overhead).
|
|
380
|
+
def chunked_transfer?(headers)
|
|
381
|
+
headers.each do |k, v|
|
|
382
|
+
next unless k.to_s.casecmp('transfer-encoding').zero?
|
|
383
|
+
|
|
384
|
+
return v.to_s.downcase.include?('chunked')
|
|
385
|
+
end
|
|
386
|
+
false
|
|
387
|
+
end
|
|
388
|
+
|
|
389
|
+
# Phase 5 — streaming chunked writer with per-response coalescing.
|
|
390
|
+
#
|
|
391
|
+
# Wire format per RFC 7230 §4.1:
|
|
392
|
+
# <hex-size>\r\n<payload>\r\n for each chunk
|
|
393
|
+
# 0\r\n\r\n terminator
|
|
394
|
+
#
|
|
395
|
+
# Coalescing rules:
|
|
396
|
+
# * Chunks < COALESCE_SMALL_CHUNK_BYTES (512) accumulate in a per-
|
|
397
|
+
# response buffer rather than triggering an immediate syscall.
|
|
398
|
+
# * The buffer drains as soon as it reaches COALESCE_FLUSH_BYTES (4096)
|
|
399
|
+
# or a 1 ms writer-fiber tick elapses (best-effort; only meaningful
|
|
400
|
+
# under Async).
|
|
401
|
+
# * Chunks >= COALESCE_SMALL_CHUNK_BYTES drain the buffer first (to
|
|
402
|
+
# preserve order on the wire) then emit the large chunk directly.
|
|
403
|
+
# * If the body responds to #flush or yields :__hyperion_flush__, the
|
|
404
|
+
# buffer drains immediately — SSE servers use this to push events
|
|
405
|
+
# past per-event coalescing latency.
|
|
406
|
+
# * body.close (or end-of-each) drains the buffer and appends the
|
|
407
|
+
# 0\r\n\r\n terminator in a single syscall (atomic w.r.t. the wire).
|
|
408
|
+
def write_chunked(io, status, headers, body, keep_alive:)
|
|
409
|
+
reason = REASONS[status] || 'Unknown'
|
|
410
|
+
date_str = cached_date
|
|
411
|
+
head = build_head_chunked(status, reason, headers, keep_alive, date_str)
|
|
412
|
+
|
|
413
|
+
io.write(head)
|
|
414
|
+
bytes_out = head.bytesize
|
|
415
|
+
|
|
416
|
+
coalescer = ChunkedCoalescer.new(io)
|
|
417
|
+
body.each do |chunk|
|
|
418
|
+
next if chunk.nil?
|
|
419
|
+
|
|
420
|
+
if chunk.equal?(:__hyperion_flush__) || chunk == :__hyperion_flush__
|
|
421
|
+
coalescer.force_flush!
|
|
422
|
+
next
|
|
423
|
+
end
|
|
424
|
+
|
|
425
|
+
bytes = chunk.to_s
|
|
426
|
+
next if bytes.empty?
|
|
427
|
+
|
|
428
|
+
coalescer.write_chunk(bytes)
|
|
429
|
+
end
|
|
430
|
+
|
|
431
|
+
coalescer.flush_and_terminate!
|
|
432
|
+
bytes_out += coalescer.bytes_written
|
|
433
|
+
Hyperion.metrics.increment(:bytes_written, bytes_out)
|
|
434
|
+
Hyperion.metrics.increment(:chunked_responses)
|
|
435
|
+
Hyperion.metrics.increment(:chunked_coalesced_writes, coalescer.coalesced_write_count)
|
|
436
|
+
Hyperion.metrics.increment(:chunked_total_writes, coalescer.total_write_count)
|
|
437
|
+
ensure
|
|
438
|
+
body.close if body.respond_to?(:close)
|
|
439
|
+
end
|
|
440
|
+
|
|
441
|
+
# Per-response coalescing buffer. Holds <512 B chunks until either
|
|
442
|
+
# the 4 KiB threshold is hit, the 1 ms writer-fiber tick elapses, or
|
|
443
|
+
# an explicit flush / end-of-body fires. One instance per response;
|
|
444
|
+
# not shared across the connection (state lifecycle = response
|
|
445
|
+
# lifecycle, matches the Stepable-style "per-call object" pattern).
|
|
446
|
+
class ChunkedCoalescer
|
|
447
|
+
attr_reader :bytes_written, :coalesced_write_count, :total_write_count
|
|
448
|
+
|
|
449
|
+
def initialize(io)
|
|
450
|
+
@io = io
|
|
451
|
+
@buffer = String.new(capacity: ResponseWriter::COALESCE_FLUSH_BYTES,
|
|
452
|
+
encoding: Encoding::ASCII_8BIT)
|
|
453
|
+
@bytes_written = 0
|
|
454
|
+
@total_write_count = 0
|
|
455
|
+
@coalesced_write_count = 0
|
|
456
|
+
@last_drain_at = monotonic_now
|
|
457
|
+
end
|
|
458
|
+
|
|
459
|
+
# Append a chunk into the wire stream. Small chunks coalesce into the
|
|
460
|
+
# buffer; large chunks drain the buffer first then write directly.
|
|
461
|
+
# Returns the number of body-bytes consumed (used by metrics).
|
|
462
|
+
def write_chunk(payload)
|
|
463
|
+
framed = frame_chunk(payload)
|
|
464
|
+
if payload.bytesize < ResponseWriter::COALESCE_SMALL_CHUNK_BYTES
|
|
465
|
+
append_to_buffer(framed)
|
|
466
|
+
maybe_tick_flush
|
|
467
|
+
else
|
|
468
|
+
# Big chunk: drain anything we've accumulated first so that
|
|
469
|
+
# bytes hit the wire in body-yield order, then write the big
|
|
470
|
+
# chunk in its own syscall (no point coalescing — it's already
|
|
471
|
+
# past the threshold).
|
|
472
|
+
drain_buffer!
|
|
473
|
+
do_write(framed)
|
|
474
|
+
end
|
|
475
|
+
payload.bytesize
|
|
476
|
+
end
|
|
477
|
+
|
|
478
|
+
# External flush (body responded to flush, or yielded the flush
|
|
479
|
+
# sentinel). Drains the buffer; safe to call when the buffer is empty.
|
|
480
|
+
def force_flush!
|
|
481
|
+
drain_buffer!
|
|
482
|
+
end
|
|
483
|
+
|
|
484
|
+
# End-of-body. Drain any buffered bytes AND emit the chunked terminator
|
|
485
|
+
# in a single syscall — this preserves the "terminator follows the last
|
|
486
|
+
# chunk atomically" invariant on the wire (otherwise a peer could see
|
|
487
|
+
# a half-flushed response if the writer fiber were preempted between
|
|
488
|
+
# our flush + terminator writes).
|
|
489
|
+
def flush_and_terminate!
|
|
490
|
+
if @buffer.empty?
|
|
491
|
+
do_write(ResponseWriter::CHUNKED_TERMINATOR)
|
|
492
|
+
else
|
|
493
|
+
@buffer << ResponseWriter::CHUNKED_TERMINATOR
|
|
494
|
+
drain_buffer!
|
|
495
|
+
end
|
|
496
|
+
end
|
|
497
|
+
|
|
498
|
+
private
|
|
499
|
+
|
|
500
|
+
# Hex-size + CRLF + payload + CRLF (RFC 7230 §4.1). The size field is
|
|
501
|
+
# lowercased hex without a 0x prefix; bytesize is correct on
|
|
502
|
+
# ASCII-8BIT-encoded inputs (which is what comes off the socket / Rack).
|
|
503
|
+
def frame_chunk(payload)
|
|
504
|
+
size_line = payload.bytesize.to_s(16)
|
|
505
|
+
framed = String.new(capacity: size_line.bytesize + payload.bytesize + 4,
|
|
506
|
+
encoding: Encoding::ASCII_8BIT)
|
|
507
|
+
framed << size_line << "\r\n" << payload.b << "\r\n"
|
|
508
|
+
framed
|
|
509
|
+
end
|
|
510
|
+
|
|
511
|
+
def append_to_buffer(framed)
|
|
512
|
+
@buffer << framed
|
|
513
|
+
return unless @buffer.bytesize >= ResponseWriter::COALESCE_FLUSH_BYTES
|
|
514
|
+
|
|
515
|
+
drain_buffer!
|
|
516
|
+
end
|
|
517
|
+
|
|
518
|
+
# Best-effort 1 ms tick. We don't spawn a real timer fiber per
|
|
519
|
+
# response — that would cost more than the syscall savings on a
|
|
520
|
+
# short-lived coalescer. Instead we check the wallclock on each
|
|
521
|
+
# chunk arrival; if the buffer has been sitting for >= 1 ms we
|
|
522
|
+
# drain it. Under Async, the per-fiber kernel_sleep round-trip
|
|
523
|
+
# between body.each chunks gives us a natural tick on the slow
|
|
524
|
+
# cadence path. End-of-body always flushes regardless.
|
|
525
|
+
def maybe_tick_flush
|
|
526
|
+
return if @buffer.empty?
|
|
527
|
+
return if (monotonic_now - @last_drain_at) < ResponseWriter::COALESCE_TICK_SECONDS
|
|
528
|
+
|
|
529
|
+
drain_buffer!
|
|
530
|
+
end
|
|
531
|
+
|
|
532
|
+
def drain_buffer!
|
|
533
|
+
return if @buffer.empty?
|
|
534
|
+
|
|
535
|
+
do_write(@buffer)
|
|
536
|
+
@coalesced_write_count += 1
|
|
537
|
+
@buffer = String.new(capacity: ResponseWriter::COALESCE_FLUSH_BYTES,
|
|
538
|
+
encoding: Encoding::ASCII_8BIT)
|
|
539
|
+
@last_drain_at = monotonic_now
|
|
540
|
+
end
|
|
541
|
+
|
|
542
|
+
def do_write(bytes)
|
|
543
|
+
@io.write(bytes)
|
|
544
|
+
@bytes_written += bytes.bytesize
|
|
545
|
+
@total_write_count += 1
|
|
546
|
+
end
|
|
547
|
+
|
|
548
|
+
def monotonic_now
|
|
549
|
+
Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
550
|
+
end
|
|
551
|
+
end
|
|
552
|
+
|
|
121
553
|
# Plain TCPSocket → real sendfile(2). TLS-wrapped sockets cannot use
|
|
122
554
|
# sendfile (kernel can't encrypt) but still avoid the per-response String
|
|
123
555
|
# allocation, so we track them under a separate counter.
|
|
@@ -180,5 +612,34 @@ module Hyperion
|
|
|
180
612
|
buf << "\r\n"
|
|
181
613
|
buf
|
|
182
614
|
end
|
|
615
|
+
|
|
616
|
+
# Phase 5 — chunked-transfer-encoding head. Mirrors build_head_ruby but
|
|
617
|
+
# emits `transfer-encoding: chunked` instead of `content-length` (the
|
|
618
|
+
# two are mutually exclusive per RFC 7230 §3.3.3). Always Ruby (no C
|
|
619
|
+
# builder yet — this is a low-volume opt-in path; the C builder
|
|
620
|
+
# currently always emits content-length).
|
|
621
|
+
def build_head_chunked(status, reason, headers, keep_alive, date_str)
|
|
622
|
+
normalized = {}
|
|
623
|
+
headers.each do |k, v|
|
|
624
|
+
key = k.to_s.downcase
|
|
625
|
+
next if key == 'content-length' # Mutually exclusive with chunked.
|
|
626
|
+
next if key == 'transfer-encoding' # We re-emit ourselves below.
|
|
627
|
+
|
|
628
|
+
normalized[key] = v
|
|
629
|
+
end
|
|
630
|
+
normalized['transfer-encoding'] = 'chunked'
|
|
631
|
+
normalized['connection'] = keep_alive ? 'keep-alive' : 'close'
|
|
632
|
+
normalized['date'] ||= date_str
|
|
633
|
+
|
|
634
|
+
buf = +"HTTP/1.1 #{status} #{reason}\r\n"
|
|
635
|
+
normalized.each do |k, v|
|
|
636
|
+
value = v.to_s
|
|
637
|
+
raise ArgumentError, "header #{k.inspect} contains CR/LF" if value.match?(CRLF_HEADER_VALUE)
|
|
638
|
+
|
|
639
|
+
buf << k << ': ' << value << "\r\n"
|
|
640
|
+
end
|
|
641
|
+
buf << "\r\n"
|
|
642
|
+
buf
|
|
643
|
+
end
|
|
183
644
|
end
|
|
184
645
|
end
|