hyperion-rb 1.6.2 → 2.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4768 -0
  3. data/README.md +222 -13
  4. data/ext/hyperion_h2_codec/Cargo.lock +7 -0
  5. data/ext/hyperion_h2_codec/Cargo.toml +33 -0
  6. data/ext/hyperion_h2_codec/extconf.rb +73 -0
  7. data/ext/hyperion_h2_codec/src/frames.rs +140 -0
  8. data/ext/hyperion_h2_codec/src/hpack/huffman.rs +161 -0
  9. data/ext/hyperion_h2_codec/src/hpack.rs +457 -0
  10. data/ext/hyperion_h2_codec/src/lib.rs +296 -0
  11. data/ext/hyperion_http/extconf.rb +28 -0
  12. data/ext/hyperion_http/h2_codec_glue.c +408 -0
  13. data/ext/hyperion_http/page_cache.c +1125 -0
  14. data/ext/hyperion_http/parser.c +473 -38
  15. data/ext/hyperion_http/sendfile.c +982 -0
  16. data/ext/hyperion_http/websocket.c +493 -0
  17. data/ext/hyperion_io_uring/Cargo.lock +33 -0
  18. data/ext/hyperion_io_uring/Cargo.toml +34 -0
  19. data/ext/hyperion_io_uring/extconf.rb +74 -0
  20. data/ext/hyperion_io_uring/src/lib.rs +316 -0
  21. data/lib/hyperion/adapter/rack.rb +370 -42
  22. data/lib/hyperion/admin_listener.rb +207 -0
  23. data/lib/hyperion/admin_middleware.rb +36 -7
  24. data/lib/hyperion/cli.rb +310 -11
  25. data/lib/hyperion/config.rb +440 -14
  26. data/lib/hyperion/connection.rb +679 -22
  27. data/lib/hyperion/deprecations.rb +81 -0
  28. data/lib/hyperion/dispatch_mode.rb +165 -0
  29. data/lib/hyperion/fiber_local.rb +75 -13
  30. data/lib/hyperion/h2_admission.rb +77 -0
  31. data/lib/hyperion/h2_codec.rb +499 -0
  32. data/lib/hyperion/http/page_cache.rb +122 -0
  33. data/lib/hyperion/http/sendfile.rb +696 -0
  34. data/lib/hyperion/http2/native_hpack_adapter.rb +70 -0
  35. data/lib/hyperion/http2_handler.rb +618 -19
  36. data/lib/hyperion/io_uring.rb +317 -0
  37. data/lib/hyperion/lint_wrapper_pool.rb +126 -0
  38. data/lib/hyperion/master.rb +96 -9
  39. data/lib/hyperion/metrics/path_templater.rb +68 -0
  40. data/lib/hyperion/metrics.rb +256 -0
  41. data/lib/hyperion/prometheus_exporter.rb +150 -0
  42. data/lib/hyperion/request.rb +13 -0
  43. data/lib/hyperion/response_writer.rb +477 -16
  44. data/lib/hyperion/runtime.rb +195 -0
  45. data/lib/hyperion/server/route_table.rb +179 -0
  46. data/lib/hyperion/server.rb +519 -55
  47. data/lib/hyperion/static_preload.rb +133 -0
  48. data/lib/hyperion/thread_pool.rb +61 -7
  49. data/lib/hyperion/tls.rb +343 -1
  50. data/lib/hyperion/version.rb +1 -1
  51. data/lib/hyperion/websocket/close_codes.rb +71 -0
  52. data/lib/hyperion/websocket/connection.rb +876 -0
  53. data/lib/hyperion/websocket/frame.rb +356 -0
  54. data/lib/hyperion/websocket/handshake.rb +525 -0
  55. data/lib/hyperion/worker.rb +111 -9
  56. data/lib/hyperion.rb +137 -3
  57. metadata +50 -1
@@ -4,9 +4,22 @@ require 'time'
4
4
 
5
5
  module Hyperion
6
6
  # Serializes a Rack [status, headers, body] tuple to an HTTP/1.1 wire stream.
7
- # Phase 5 replaces this with an io_buffer-batched writer; Phase 7 adds a
8
- # sibling Http2ResponseWriter. Public surface (#write) stays stable.
7
+ # Phase 5 adds a chunked-streaming path with per-connection write coalescing;
8
+ # Phase 7 adds a sibling Http2ResponseWriter. Public surface (#write) stays
9
+ # stable.
9
10
  class ResponseWriter
11
+ # Phase 5 — chunked-write coalescing tunables. Chunks smaller than the
12
+ # threshold accumulate in a per-response buffer; the buffer flushes on
13
+ # any of (a) >= COALESCE_FLUSH_BYTES filled, (b) the writer-fiber tick
14
+ # of COALESCE_TICK_SECONDS elapsed since the last buffer drain, or
15
+ # (c) end-of-body / explicit body.flush. Picked to keep added latency
16
+ # under 1 ms while still cutting syscall count 3-5× on SSE / streaming
17
+ # JSON / log-tail workloads where per-event payloads are ~50 B.
18
+ COALESCE_SMALL_CHUNK_BYTES = 512
19
+ COALESCE_FLUSH_BYTES = 4096
20
+ COALESCE_TICK_SECONDS = 0.001
21
+ CHUNKED_TERMINATOR = "0\r\n\r\n"
22
+
10
23
  REASONS = {
11
24
  200 => 'OK',
12
25
  201 => 'Created',
@@ -35,15 +48,54 @@ module Hyperion
35
48
 
36
49
  CRLF_HEADER_VALUE = /[\r\n]/
37
50
 
38
- def write(io, status, headers, body, keep_alive: false)
51
+ # 2.10-C class-level memoised probe for the C-side page cache.
52
+ # The C ext registers `Hyperion::Http::PageCache.write_to` at
53
+ # `Init_hyperion_page_cache` time (parser.c calls it after
54
+ # `Init_hyperion_sendfile`). We probe once per ResponseWriter
55
+ # class load and cache the bool — keeps the per-request branch a
56
+ # single ivar read. Operators can flip this off at runtime with
57
+ # `Hyperion::ResponseWriter.page_cache_available = false` for A/B
58
+ # rollback (handy during the 2.10 bake).
59
+ class << self
60
+ attr_accessor :page_cache_available
61
+
62
+ def page_cache_available?
63
+ @page_cache_available
64
+ end
65
+ end
66
+ self.page_cache_available =
67
+ defined?(::Hyperion::Http::PageCache) &&
68
+ ::Hyperion::Http::PageCache.respond_to?(:write_to)
69
+
70
+ # 2.6-C — `dispatch_mode:` is the per-response opt-in dispatch shape
71
+ # (typically `:inline_blocking` for static-file routes auto-detected
72
+ # by `Adapter::Rack#call`, or `nil` for the default fiber-yielding
73
+ # path). Only the sendfile branch consumes it today; the chunked
74
+ # and buffered branches ignore it (no fiber-yield in their hot
75
+ # loop to begin with). Forward-compatible — future per-response
76
+ # dispatch shapes plug in here without changing the call-site
77
+ # arity for non-sendfile branches.
78
+ def write(io, status, headers, body, keep_alive: false, dispatch_mode: nil)
39
79
  # Zero-copy fast path: bodies that point at an on-disk file (Rack::Files,
40
80
  # asset servers, signed-download responders) get streamed via
41
81
  # IO.copy_stream which delegates to sendfile(2) on Linux for plain TCP
42
82
  # sockets — bytes go from the file's page cache straight to the socket
43
83
  # buffer with no userspace allocation. For TLS sockets we still avoid the
44
84
  # multi-MB String build, but encryption forces a userspace round-trip so
45
- # we count that path separately.
46
- return write_sendfile(io, status, headers, body, keep_alive: keep_alive) if body.respond_to?(:to_path)
85
+ # we count that path separately. Phase 5 leaves this branch untouched —
86
+ # sendfile bypasses the chunked coalescer entirely (the file IS the body
87
+ # buffer, no userspace chunks to coalesce).
88
+ if body.respond_to?(:to_path)
89
+ return write_sendfile(io, status, headers, body, keep_alive: keep_alive,
90
+ dispatch_mode: dispatch_mode)
91
+ end
92
+
93
+ # Phase 5 — opt-in chunked streaming path. The app sets
94
+ # `Transfer-Encoding: chunked` to signal "this body is a stream; do not
95
+ # buffer". We then iterate `body.each` and emit each chunk in chunked
96
+ # framing (size-line + payload + CRLF), coalescing chunks <512 B in a
97
+ # per-response buffer to cut syscall count on SSE / streaming JSON.
98
+ return write_chunked(io, status, headers, body, keep_alive: keep_alive) if chunked_transfer?(headers)
47
99
 
48
100
  write_buffered(io, status, headers, body, keep_alive: keep_alive)
49
101
  end
@@ -54,8 +106,24 @@ module Hyperion
54
106
  # Phase 1 buffers the full body so Content-Length is exact.
55
107
  # Phase 2 introduces chunked transfer-encoding for streaming bodies;
56
108
  # Phase 5 batches via IO::Buffer to avoid this intermediate String.
57
- buffered = +''
58
- body.each { |chunk| buffered << chunk }
109
+ #
110
+ # Phase 11 single-element-Array fast path. The overwhelmingly
111
+ # common Rack body shape is `[body_string]` (Rails ActionController,
112
+ # Sinatra, Grape, hand-rolled lambdas). For that shape we skip the
113
+ # `+''` accumulator entirely and treat body[0] as the buffered
114
+ # bytes directly. Multi-chunk bodies and Enumerator-style bodies
115
+ # still take the original loop. Saves one String allocation per
116
+ # response on the hot path; saves the per-chunk `<<` overhead too.
117
+ buffered = nil
118
+ if body.is_a?(Array) && body.length == 1
119
+ chunk = body[0]
120
+ buffered = chunk if chunk.is_a?(String)
121
+ end
122
+
123
+ if buffered.nil?
124
+ buffered = +''
125
+ body.each { |chunk| buffered << chunk }
126
+ end
59
127
 
60
128
  reason = REASONS[status] || 'Unknown'
61
129
  date_str = cached_date
@@ -82,8 +150,74 @@ module Hyperion
82
150
  body.close if body.respond_to?(:close)
83
151
  end
84
152
 
85
- def write_sendfile(io, status, headers, body, keep_alive:)
153
+ # 2.0.1 Phase 8 — coalesce head + body into ONE write for small
154
+ # static files. With Nagle on (kernel default) and TCP_NODELAY off,
155
+ # `io.write(head)` followed by a separate `write(body)` for an 8 KB
156
+ # asset stalled ~40 ms per response on the client's delayed-ACK
157
+ # waiting for the next packet to fill an MSS — capping the static
158
+ # 8 KB row at 121 r/s vs Puma 1,246. By concatenating head + body
159
+ # into a single read+write under the threshold (= Sendfile small-
160
+ # file fast path), the response goes out as one TCP segment train
161
+ # and the client ACKs immediately. No setsockopt churn required.
162
+ SENDFILE_COALESCE_THRESHOLD = 64 * 1024
163
+
164
+ def write_sendfile(io, status, headers, body, keep_alive:, dispatch_mode: nil)
165
+ # 2.6-D — when `:inline_blocking` is engaged, wrap the entire
166
+ # write path in `Fiber.blocking { ... }` so the calling fiber's
167
+ # `Fiber.current.blocking?` flag flips to true for the duration
168
+ # of the response. Without this wrap, `IO.select` and `io.write`
169
+ # inside the helpers below silently route through the Async
170
+ # fiber scheduler under `--async-io` — that was the 2.6-C
171
+ # engagement gap (resolver set `:inline_blocking`, writer
172
+ # plumbed it, but every blocking IO call still yielded the
173
+ # fiber). With the wrap, the OS thread parks on the kernel
174
+ # write under the GVL — the whole point of the dispatch mode.
175
+ #
176
+ # `Fiber.blocking` is a no-op when no scheduler is current
177
+ # (default threadpool / inline_h1_no_pool / no-async paths) so
178
+ # the perf cost is one method-dispatch when this branch is
179
+ # never the hot path.
180
+ if dispatch_mode == :inline_blocking
181
+ return Fiber.blocking do
182
+ write_sendfile_inner(io, status, headers, body, keep_alive: keep_alive,
183
+ dispatch_mode: dispatch_mode)
184
+ end
185
+ end
186
+
187
+ write_sendfile_inner(io, status, headers, body, keep_alive: keep_alive,
188
+ dispatch_mode: dispatch_mode)
189
+ end
190
+
191
+ def write_sendfile_inner(io, status, headers, body, keep_alive:, dispatch_mode: nil)
86
192
  path = body.to_path
193
+
194
+ # 2.10-C — pre-built static-response cache fast path. When the
195
+ # page cache holds an entry for this path AND the response is a
196
+ # plain 200 with no app-supplied headers we can't bake into the
197
+ # cache (Set-Cookie, ETag, custom Cache-Control), bypass the
198
+ # entire file-open / head-build / write loop and issue ONE write
199
+ # syscall with the pre-built buffer. Operators get this
200
+ # automatically on Rack::Files routes; bigger files (>
201
+ # AUTO_THRESHOLD = 64 KiB) keep the existing sendfile path
202
+ # because Hyperion already dominates big-static at 9× Agoo
203
+ # (per the 2.10-B baseline).
204
+ #
205
+ # Wire-output note: the cached buffer carries status +
206
+ # Content-Type + Content-Length only (no Date, no Connection)
207
+ # — same shape Agoo emits. This is a deliberate wire-output
208
+ # change FOR CACHED RESPONSES ONLY. Non-cached paths fall
209
+ # through to `build_head` below and still emit the full header
210
+ # set. Apps needing Date/Connection on every response can opt
211
+ # out by setting `env['hyperion.streaming'] = true`, which
212
+ # skips the auto-detect that landed dispatch_mode here in the
213
+ # first place.
214
+ cached_bytes = page_cache_write(io, path, headers)
215
+ if cached_bytes
216
+ Hyperion.metrics.increment(:bytes_written, cached_bytes)
217
+ body.close if body.respond_to?(:close)
218
+ return
219
+ end
220
+
87
221
  file = File.open(path, 'rb')
88
222
  file_size = file.size
89
223
 
@@ -96,16 +230,53 @@ module Hyperion
96
230
  date_str = cached_date
97
231
  head = build_head(status, reason, headers, content_length, keep_alive, date_str)
98
232
 
99
- io.write(head)
100
- # IO.copy_stream copies up to file_size bytes from the file to the socket.
101
- # On Linux + plain TCPSocket this triggers sendfile(2) kernel-level
102
- # zero-copy. On TLS sockets and non-Linux platforms it falls back to
103
- # internal read+write loops, but we still avoid building a String the
104
- # size of the file in Ruby.
105
- copied = IO.copy_stream(file, io, file_size)
233
+ head_bytes = head.bytesize
234
+
235
+ # Phase 8 small-file coalescing. For files <= 64 KiB, read the
236
+ # body bytes inline and emit head + body as one write. This
237
+ # bypasses the Nagle delayed-ACK stall completely (one TCP
238
+ # segment train carries everything; client ACKs the whole
239
+ # response, no second write parked waiting for an ACK on the
240
+ # first). Bonus: skips the syscall round-trip into copy_small.
241
+ copied =
242
+ if file_size.positive? && file_size <= SENDFILE_COALESCE_THRESHOLD
243
+ body_bytes = file.read(file_size)
244
+ head << body_bytes if body_bytes
245
+ io.write(head)
246
+ file_size
247
+ else
248
+ # Streaming path for larger files. 1.7.0 Phase 1 —
249
+ # Hyperion::Http::Sendfile picks the best kernel route:
250
+ # * Linux + plain TCPSocket → native sendfile(2) (true
251
+ # zero-copy, page cache → socket buffer, no userspace
252
+ # intermediate).
253
+ # * Darwin / *BSD + plain TCPSocket → BSD sendfile(2).
254
+ # * TLS-wrapped sockets → 64 KiB IO.copy_stream loop
255
+ # (kernel can't encrypt for us; we still bypass the
256
+ # per-chunk fiber-hop).
257
+ # * Hosts where the C ext didn't compile → IO.copy_stream
258
+ # fallback.
259
+ #
260
+ # 2.6-C — when `dispatch_mode == :inline_blocking` the loop
261
+ # uses `IO.select` + GVL-blocking sendfile instead of
262
+ # fiber-yielding `wait_writable`. Auto-detected by
263
+ # `Adapter::Rack#call` for `to_path` bodies that don't carry
264
+ # a streaming marker; opt-in via
265
+ # `env['hyperion.dispatch_mode'] = :inline_blocking` for
266
+ # routes the auto-detect doesn't catch. Default `nil` /
267
+ # any other symbol stays on the fiber-yielding path so
268
+ # existing callers (TLS h1 / async-io / threadpool dispatch)
269
+ # are unaffected.
270
+ io.write(head)
271
+ if dispatch_mode == :inline_blocking
272
+ ::Hyperion::Http::Sendfile.copy_to_socket_blocking(io, file, 0, file_size)
273
+ else
274
+ ::Hyperion::Http::Sendfile.copy_to_socket(io, file, 0, file_size)
275
+ end
276
+ end
106
277
 
107
278
  record_zero_copy_metric(io)
108
- Hyperion.metrics.increment(:bytes_written, head.bytesize + copied)
279
+ Hyperion.metrics.increment(:bytes_written, head_bytes + copied)
109
280
  ensure
110
281
  file&.close
111
282
  body.close if body.respond_to?(:close)
@@ -118,6 +289,267 @@ module Hyperion
118
289
  nil
119
290
  end
120
291
 
292
+ # 2.10-C — page-cache engage helper. Returns the bytes written
293
+ # on a cache hit (or after an opportunistic populate-then-write),
294
+ # or `nil` to signal "fall through to the existing sendfile
295
+ # path". Three short-circuits keep the hot path branchless on
296
+ # the common cases:
297
+ #
298
+ # 1. Status must be 200. Caches don't store negotiated 304s
299
+ # / 206 Range / 416 Range-not-satisfiable / 404s.
300
+ # 2. The response carries no header that has to be re-emitted
301
+ # per request (Set-Cookie / Cache-Control: max-age=N /
302
+ # ETag / Last-Modified). We pre-bake Content-Type +
303
+ # Content-Length into the cache buffer; anything else
304
+ # forces the full path.
305
+ # 3. The C primitive must be loaded. Falsy on JRuby /
306
+ # TruffleRuby / hosts where the C ext didn't compile.
307
+ #
308
+ # On a path-not-cached hit, opportunistically populate the cache
309
+ # *if* the file is below `AUTO_THRESHOLD` (64 KiB); larger files
310
+ # keep the existing sendfile path because Hyperion already
311
+ # dominates big-static at 9× Agoo.
312
+ def page_cache_write(io, path, headers)
313
+ return nil unless self.class.page_cache_available?
314
+ # The C primitive writes via the OS-level fd, so StringIO /
315
+ # OpenSSL::SSL::SSLSocket / any IO-like that doesn't expose a
316
+ # real kernel fd has to fall through to the existing path.
317
+ # Probe up front to avoid the C primitive raising on an
318
+ # extracted-fd attempt.
319
+ return nil unless real_fd_io?(io)
320
+
321
+ # Fast skip for any response carrying a header that's per-
322
+ # request or security-sensitive enough that the cache can't
323
+ # safely bake it. We deliberately keep this list tight so the
324
+ # common Rack::Files case (which always emits `last-modified`
325
+ # + `content-type`) lands on the cache path; the cache
326
+ # buffer's `Content-Type` header is derived from the file
327
+ # extension via the same mime map agoo uses and `last-modified`
328
+ # is dropped (the client either revalidates via `If-None-Match`
329
+ # — not supported by the bare cache — or trusts
330
+ # `Cache-Control: max-age` when set, which IS in the skip list
331
+ # below). Operators wanting wire-byte parity opt out per route
332
+ # via `env['hyperion.streaming'] = true`.
333
+ headers.each do |k, _v|
334
+ case k.to_s.downcase
335
+ when 'set-cookie', 'cache-control', 'etag',
336
+ 'content-encoding', 'content-disposition', 'vary'
337
+ return nil
338
+ end
339
+ end
340
+
341
+ result = ::Hyperion::Http::PageCache.write_to(io, path)
342
+ return result if result.is_a?(Integer)
343
+
344
+ # Cache miss. Populate-then-write *iff* the file is small
345
+ # enough that the page cache wins (big files keep the existing
346
+ # sendfile path).
347
+ begin
348
+ size = File.size?(path)
349
+ rescue StandardError
350
+ size = nil
351
+ end
352
+ return nil if size.nil?
353
+ return nil if size > ::Hyperion::Http::PageCache::AUTO_THRESHOLD
354
+
355
+ cache_result = ::Hyperion::Http::PageCache.cache_file(path)
356
+ return nil if cache_result == :missing
357
+
358
+ result = ::Hyperion::Http::PageCache.write_to(io, path)
359
+ result.is_a?(Integer) ? result : nil
360
+ end
361
+
362
+ # 2.10-C — the page cache writes through a real kernel fd, so the
363
+ # IO-side argument has to be a TCPSocket / UNIXSocket / fileno-able
364
+ # File. StringIO / OpenSSL::SSL::SSLSocket / pipe-wrapped Ractors
365
+ # don't expose a usable fd; those callers fall through to the
366
+ # existing sendfile path. Plain Integer fds are accepted directly.
367
+ def real_fd_io?(io)
368
+ return true if io.is_a?(Integer)
369
+ return false unless io.respond_to?(:fileno)
370
+ return false if defined?(StringIO) && io.is_a?(StringIO)
371
+ return false if defined?(::OpenSSL::SSL::SSLSocket) && io.is_a?(::OpenSSL::SSL::SSLSocket)
372
+
373
+ true
374
+ end
375
+
376
+ # True when the app explicitly opted into chunked transfer-encoding.
377
+ # We only stream when asked — for the common "buffer the whole thing
378
+ # and emit one Content-Length response" case, the existing single-write
379
+ # path is still optimal (one syscall, no chunked-framing overhead).
380
+ def chunked_transfer?(headers)
381
+ headers.each do |k, v|
382
+ next unless k.to_s.casecmp('transfer-encoding').zero?
383
+
384
+ return v.to_s.downcase.include?('chunked')
385
+ end
386
+ false
387
+ end
388
+
389
+ # Phase 5 — streaming chunked writer with per-response coalescing.
390
+ #
391
+ # Wire format per RFC 7230 §4.1:
392
+ # <hex-size>\r\n<payload>\r\n for each chunk
393
+ # 0\r\n\r\n terminator
394
+ #
395
+ # Coalescing rules:
396
+ # * Chunks < COALESCE_SMALL_CHUNK_BYTES (512) accumulate in a per-
397
+ # response buffer rather than triggering an immediate syscall.
398
+ # * The buffer drains as soon as it reaches COALESCE_FLUSH_BYTES (4096)
399
+ # or a 1 ms writer-fiber tick elapses (best-effort; only meaningful
400
+ # under Async).
401
+ # * Chunks >= COALESCE_SMALL_CHUNK_BYTES drain the buffer first (to
402
+ # preserve order on the wire) then emit the large chunk directly.
403
+ # * If the body responds to #flush or yields :__hyperion_flush__, the
404
+ # buffer drains immediately — SSE servers use this to push events
405
+ # past per-event coalescing latency.
406
+ # * body.close (or end-of-each) drains the buffer and appends the
407
+ # 0\r\n\r\n terminator in a single syscall (atomic w.r.t. the wire).
408
+ def write_chunked(io, status, headers, body, keep_alive:)
409
+ reason = REASONS[status] || 'Unknown'
410
+ date_str = cached_date
411
+ head = build_head_chunked(status, reason, headers, keep_alive, date_str)
412
+
413
+ io.write(head)
414
+ bytes_out = head.bytesize
415
+
416
+ coalescer = ChunkedCoalescer.new(io)
417
+ body.each do |chunk|
418
+ next if chunk.nil?
419
+
420
+ if chunk.equal?(:__hyperion_flush__) || chunk == :__hyperion_flush__
421
+ coalescer.force_flush!
422
+ next
423
+ end
424
+
425
+ bytes = chunk.to_s
426
+ next if bytes.empty?
427
+
428
+ coalescer.write_chunk(bytes)
429
+ end
430
+
431
+ coalescer.flush_and_terminate!
432
+ bytes_out += coalescer.bytes_written
433
+ Hyperion.metrics.increment(:bytes_written, bytes_out)
434
+ Hyperion.metrics.increment(:chunked_responses)
435
+ Hyperion.metrics.increment(:chunked_coalesced_writes, coalescer.coalesced_write_count)
436
+ Hyperion.metrics.increment(:chunked_total_writes, coalescer.total_write_count)
437
+ ensure
438
+ body.close if body.respond_to?(:close)
439
+ end
440
+
441
+ # Per-response coalescing buffer. Holds <512 B chunks until either
442
+ # the 4 KiB threshold is hit, the 1 ms writer-fiber tick elapses, or
443
+ # an explicit flush / end-of-body fires. One instance per response;
444
+ # not shared across the connection (state lifecycle = response
445
+ # lifecycle, matches the Stepable-style "per-call object" pattern).
446
+ class ChunkedCoalescer
447
+ attr_reader :bytes_written, :coalesced_write_count, :total_write_count
448
+
449
+ def initialize(io)
450
+ @io = io
451
+ @buffer = String.new(capacity: ResponseWriter::COALESCE_FLUSH_BYTES,
452
+ encoding: Encoding::ASCII_8BIT)
453
+ @bytes_written = 0
454
+ @total_write_count = 0
455
+ @coalesced_write_count = 0
456
+ @last_drain_at = monotonic_now
457
+ end
458
+
459
+ # Append a chunk into the wire stream. Small chunks coalesce into the
460
+ # buffer; large chunks drain the buffer first then write directly.
461
+ # Returns the number of body-bytes consumed (used by metrics).
462
+ def write_chunk(payload)
463
+ framed = frame_chunk(payload)
464
+ if payload.bytesize < ResponseWriter::COALESCE_SMALL_CHUNK_BYTES
465
+ append_to_buffer(framed)
466
+ maybe_tick_flush
467
+ else
468
+ # Big chunk: drain anything we've accumulated first so that
469
+ # bytes hit the wire in body-yield order, then write the big
470
+ # chunk in its own syscall (no point coalescing — it's already
471
+ # past the threshold).
472
+ drain_buffer!
473
+ do_write(framed)
474
+ end
475
+ payload.bytesize
476
+ end
477
+
478
+ # External flush (body responded to flush, or yielded the flush
479
+ # sentinel). Drains the buffer; safe to call when the buffer is empty.
480
+ def force_flush!
481
+ drain_buffer!
482
+ end
483
+
484
+ # End-of-body. Drain any buffered bytes AND emit the chunked terminator
485
+ # in a single syscall — this preserves the "terminator follows the last
486
+ # chunk atomically" invariant on the wire (otherwise a peer could see
487
+ # a half-flushed response if the writer fiber were preempted between
488
+ # our flush + terminator writes).
489
+ def flush_and_terminate!
490
+ if @buffer.empty?
491
+ do_write(ResponseWriter::CHUNKED_TERMINATOR)
492
+ else
493
+ @buffer << ResponseWriter::CHUNKED_TERMINATOR
494
+ drain_buffer!
495
+ end
496
+ end
497
+
498
+ private
499
+
500
+ # Hex-size + CRLF + payload + CRLF (RFC 7230 §4.1). The size field is
501
+ # lowercased hex without a 0x prefix; bytesize is correct on
502
+ # ASCII-8BIT-encoded inputs (which is what comes off the socket / Rack).
503
+ def frame_chunk(payload)
504
+ size_line = payload.bytesize.to_s(16)
505
+ framed = String.new(capacity: size_line.bytesize + payload.bytesize + 4,
506
+ encoding: Encoding::ASCII_8BIT)
507
+ framed << size_line << "\r\n" << payload.b << "\r\n"
508
+ framed
509
+ end
510
+
511
+ def append_to_buffer(framed)
512
+ @buffer << framed
513
+ return unless @buffer.bytesize >= ResponseWriter::COALESCE_FLUSH_BYTES
514
+
515
+ drain_buffer!
516
+ end
517
+
518
+ # Best-effort 1 ms tick. We don't spawn a real timer fiber per
519
+ # response — that would cost more than the syscall savings on a
520
+ # short-lived coalescer. Instead we check the wallclock on each
521
+ # chunk arrival; if the buffer has been sitting for >= 1 ms we
522
+ # drain it. Under Async, the per-fiber kernel_sleep round-trip
523
+ # between body.each chunks gives us a natural tick on the slow
524
+ # cadence path. End-of-body always flushes regardless.
525
+ def maybe_tick_flush
526
+ return if @buffer.empty?
527
+ return if (monotonic_now - @last_drain_at) < ResponseWriter::COALESCE_TICK_SECONDS
528
+
529
+ drain_buffer!
530
+ end
531
+
532
+ def drain_buffer!
533
+ return if @buffer.empty?
534
+
535
+ do_write(@buffer)
536
+ @coalesced_write_count += 1
537
+ @buffer = String.new(capacity: ResponseWriter::COALESCE_FLUSH_BYTES,
538
+ encoding: Encoding::ASCII_8BIT)
539
+ @last_drain_at = monotonic_now
540
+ end
541
+
542
+ def do_write(bytes)
543
+ @io.write(bytes)
544
+ @bytes_written += bytes.bytesize
545
+ @total_write_count += 1
546
+ end
547
+
548
+ def monotonic_now
549
+ Process.clock_gettime(Process::CLOCK_MONOTONIC)
550
+ end
551
+ end
552
+
121
553
  # Plain TCPSocket → real sendfile(2). TLS-wrapped sockets cannot use
122
554
  # sendfile (kernel can't encrypt) but still avoid the per-response String
123
555
  # allocation, so we track them under a separate counter.
@@ -180,5 +612,34 @@ module Hyperion
180
612
  buf << "\r\n"
181
613
  buf
182
614
  end
615
+
616
+ # Phase 5 — chunked-transfer-encoding head. Mirrors build_head_ruby but
617
+ # emits `transfer-encoding: chunked` instead of `content-length` (the
618
+ # two are mutually exclusive per RFC 7230 §3.3.3). Always Ruby (no C
619
+ # builder yet — this is a low-volume opt-in path; the C builder
620
+ # currently always emits content-length).
621
+ def build_head_chunked(status, reason, headers, keep_alive, date_str)
622
+ normalized = {}
623
+ headers.each do |k, v|
624
+ key = k.to_s.downcase
625
+ next if key == 'content-length' # Mutually exclusive with chunked.
626
+ next if key == 'transfer-encoding' # We re-emit ourselves below.
627
+
628
+ normalized[key] = v
629
+ end
630
+ normalized['transfer-encoding'] = 'chunked'
631
+ normalized['connection'] = keep_alive ? 'keep-alive' : 'close'
632
+ normalized['date'] ||= date_str
633
+
634
+ buf = +"HTTP/1.1 #{status} #{reason}\r\n"
635
+ normalized.each do |k, v|
636
+ value = v.to_s
637
+ raise ArgumentError, "header #{k.inspect} contains CR/LF" if value.match?(CRLF_HEADER_VALUE)
638
+
639
+ buf << k << ': ' << value << "\r\n"
640
+ end
641
+ buf << "\r\n"
642
+ buf
643
+ end
183
644
  end
184
645
  end