hyperion-rb 1.6.2 → 2.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4768 -0
  3. data/README.md +222 -13
  4. data/ext/hyperion_h2_codec/Cargo.lock +7 -0
  5. data/ext/hyperion_h2_codec/Cargo.toml +33 -0
  6. data/ext/hyperion_h2_codec/extconf.rb +73 -0
  7. data/ext/hyperion_h2_codec/src/frames.rs +140 -0
  8. data/ext/hyperion_h2_codec/src/hpack/huffman.rs +161 -0
  9. data/ext/hyperion_h2_codec/src/hpack.rs +457 -0
  10. data/ext/hyperion_h2_codec/src/lib.rs +296 -0
  11. data/ext/hyperion_http/extconf.rb +28 -0
  12. data/ext/hyperion_http/h2_codec_glue.c +408 -0
  13. data/ext/hyperion_http/page_cache.c +1125 -0
  14. data/ext/hyperion_http/parser.c +473 -38
  15. data/ext/hyperion_http/sendfile.c +982 -0
  16. data/ext/hyperion_http/websocket.c +493 -0
  17. data/ext/hyperion_io_uring/Cargo.lock +33 -0
  18. data/ext/hyperion_io_uring/Cargo.toml +34 -0
  19. data/ext/hyperion_io_uring/extconf.rb +74 -0
  20. data/ext/hyperion_io_uring/src/lib.rs +316 -0
  21. data/lib/hyperion/adapter/rack.rb +370 -42
  22. data/lib/hyperion/admin_listener.rb +207 -0
  23. data/lib/hyperion/admin_middleware.rb +36 -7
  24. data/lib/hyperion/cli.rb +310 -11
  25. data/lib/hyperion/config.rb +440 -14
  26. data/lib/hyperion/connection.rb +679 -22
  27. data/lib/hyperion/deprecations.rb +81 -0
  28. data/lib/hyperion/dispatch_mode.rb +165 -0
  29. data/lib/hyperion/fiber_local.rb +75 -13
  30. data/lib/hyperion/h2_admission.rb +77 -0
  31. data/lib/hyperion/h2_codec.rb +499 -0
  32. data/lib/hyperion/http/page_cache.rb +122 -0
  33. data/lib/hyperion/http/sendfile.rb +696 -0
  34. data/lib/hyperion/http2/native_hpack_adapter.rb +70 -0
  35. data/lib/hyperion/http2_handler.rb +618 -19
  36. data/lib/hyperion/io_uring.rb +317 -0
  37. data/lib/hyperion/lint_wrapper_pool.rb +126 -0
  38. data/lib/hyperion/master.rb +96 -9
  39. data/lib/hyperion/metrics/path_templater.rb +68 -0
  40. data/lib/hyperion/metrics.rb +256 -0
  41. data/lib/hyperion/prometheus_exporter.rb +150 -0
  42. data/lib/hyperion/request.rb +13 -0
  43. data/lib/hyperion/response_writer.rb +477 -16
  44. data/lib/hyperion/runtime.rb +195 -0
  45. data/lib/hyperion/server/route_table.rb +179 -0
  46. data/lib/hyperion/server.rb +519 -55
  47. data/lib/hyperion/static_preload.rb +133 -0
  48. data/lib/hyperion/thread_pool.rb +61 -7
  49. data/lib/hyperion/tls.rb +343 -1
  50. data/lib/hyperion/version.rb +1 -1
  51. data/lib/hyperion/websocket/close_codes.rb +71 -0
  52. data/lib/hyperion/websocket/connection.rb +876 -0
  53. data/lib/hyperion/websocket/frame.rb +356 -0
  54. data/lib/hyperion/websocket/handshake.rb +525 -0
  55. data/lib/hyperion/worker.rb +111 -9
  56. data/lib/hyperion.rb +137 -3
  57. metadata +50 -1
@@ -18,7 +18,41 @@ module Hyperion
18
18
  HEADER_TERM = "\r\n\r\n"
19
19
  TIMEOUT_SENTINEL = :__hyperion_read_timeout__
20
20
  DEADLINE_SENTINEL = :__hyperion_request_deadline__
21
+ OVERSIZED_BODY_SENTINEL = :__hyperion_oversized_body__
21
22
  IDLE_KEEPALIVE_TIMEOUT_SECONDS = 5
23
+ # Phase 2b (1.7.1) — per-Connection pre-sized scratch buffer for the
24
+ # read accumulator. Most HTTP/1.1 request lines + headers fit in a few
25
+ # hundred bytes; 8 KiB covers ~99% of legitimate traffic without ever
26
+ # re-allocating. We reuse the same String across keep-alive requests on
27
+ # the same connection (clear between requests preserves capacity).
28
+ # Requests larger than 8 KiB still parse correctly — `String#<<` grows
29
+ # the underlying buffer transparently — they just pay the realloc the
30
+ # first time, same as the pre-1.7.1 behaviour.
31
+ INBUF_INITIAL_CAPACITY = 8 * 1024
32
+
33
+ # Pre-built canned 413 — body is small + plain text, connection forced
34
+ # closed. Reused across every oversized-CL rejection so the DOS-defense
35
+ # path stays allocation-free and never has to dip into ResponseWriter
36
+ # (which would require a full Rack-style headers hash for an error
37
+ # we can answer with frozen bytes).
38
+ REJECT_413_PAYLOAD_TOO_LARGE = (+"HTTP/1.1 413 Payload Too Large\r\n" \
39
+ "content-type: text/plain\r\n" \
40
+ "content-length: 18\r\n" \
41
+ "connection: close\r\n" \
42
+ "\r\n" \
43
+ "payload too large\n").freeze
44
+
45
+ # 2.3-B per-conn fairness 503. Connection stays alive (no
46
+ # `connection: close` here, no `Connection: close` to nginx) so
47
+ # the upstream peer can retry the request in 1s — nginx-friendly.
48
+ # Body is small + plain text + frozen so the reject path stays
49
+ # allocation-free on the hot path.
50
+ REJECT_503_PER_CONN_OVERLOAD = (+"HTTP/1.1 503 Service Unavailable\r\n" \
51
+ "content-type: text/plain\r\n" \
52
+ "content-length: 31\r\n" \
53
+ "retry-after: 1\r\n" \
54
+ "\r\n" \
55
+ "per-connection overload, retry\n").freeze
22
56
 
23
57
  # Default parser is the C-extension `CParser` when the extension built;
24
58
  # otherwise we fall back to the pure-Ruby `Parser`. Evaluated each call
@@ -27,27 +61,187 @@ module Hyperion
27
61
  defined?(::Hyperion::CParser) ? ::Hyperion::CParser.new : ::Hyperion::Parser.new
28
62
  end
29
63
 
64
+ # 2.4-C — histogram bucket edges for the per-route request duration
65
+ # histogram. Powers-of-5 spread covers 1ms to 10s, the realistic range
66
+ # for any HTTP-served workload. Frozen so the same Array is reused
67
+ # across every Connection (cheaper hist registration, no per-conn
68
+ # allocation).
69
+ REQUEST_DURATION_BUCKETS = [0.001, 0.005, 0.025, 0.1, 0.5, 2.5, 10.0].freeze
70
+
71
+ REQUEST_DURATION_HISTOGRAM = :hyperion_request_duration_seconds
72
+
73
+ # Pre-bucketed status-class strings. Lookup `STATUS_CLASS[status / 100]`
74
+ # avoids `"#{n}xx"` interpolation per request.
75
+ STATUS_CLASS = %w[0xx 1xx 2xx 3xx 4xx 5xx 6xx 7xx 8xx 9xx].each(&:freeze).freeze
76
+
30
77
  def initialize(parser: self.class.default_parser, writer: ResponseWriter.new, thread_pool: nil,
31
- log_requests: nil)
32
- @parser = parser
33
- @writer = writer
34
- @thread_pool = thread_pool
35
- # Cache module-level singletons once per Connection instance so the hot
36
- # path doesn't re-dispatch through Hyperion.metrics / Hyperion.logger
37
- # (each was a method call + ivar nil-check on every request).
38
- @metrics = Hyperion.metrics
39
- @logger = Hyperion.logger
78
+ log_requests: nil, max_body_bytes: MAX_BODY_BYTES, runtime: nil,
79
+ max_in_flight_per_conn: nil, path_templater: nil, route_table: nil)
80
+ @parser = parser
81
+ @writer = writer
82
+ @thread_pool = thread_pool
83
+ @max_body_bytes = max_body_bytes
84
+ # 2.3-B: per-conn fairness cap. nil disables the check entirely
85
+ # (the hot path stays branchless). Positive integer sets the
86
+ # in-flight ceiling. The counter + dedup-warn flag live as ivars
87
+ # so a single Connection's lifetime sees one warn at most, not
88
+ # one per rejected request.
89
+ @max_in_flight_per_conn = max_in_flight_per_conn
90
+ @in_flight = 0
91
+ @in_flight_mutex = Mutex.new if max_in_flight_per_conn
92
+ @overload_warned = false
93
+ # 1.7.0: explicit Runtime injection. When the caller passes
94
+ # `runtime:`, that runtime is the sole source of metrics + logger
95
+ # for this connection — no implicit fallback to module-level
96
+ # singletons. When omitted, fall back to `Runtime.default` so
97
+ # legacy callers keep working untouched.
98
+ #
99
+ # We still cache the metrics/logger refs in ivars (vs reading
100
+ # `runtime.metrics` per request) so the hot path doesn't pay a
101
+ # method-dispatch per increment. Long-lived keep-alive connections
102
+ # therefore see a Runtime swap only at construction — that's a
103
+ # 1.7.0 limitation; 2.0 drops the singleton entirely and the
104
+ # ivar cache becomes the only path.
105
+ if runtime
106
+ @runtime = runtime
107
+ @metrics = runtime.metrics
108
+ @logger = runtime.logger
109
+ else
110
+ # No explicit runtime → keep the 1.6.x shape: ivars cache the
111
+ # module-level accessors. This preserves stub seams used by
112
+ # existing specs (`allow(Hyperion).to receive(:metrics)`) and
113
+ # the `Hyperion.instance_variable_set(:@metrics, ...)` swap.
114
+ @runtime = Hyperion::Runtime.default
115
+ @metrics = Hyperion.metrics
116
+ @logger = Hyperion.logger
117
+ end
40
118
  # Per-request access logging is ON by default (matches Puma+Rails
41
119
  # operator expectation). The hot path is optimised end-to-end: one
42
120
  # Process.clock_gettime per request, per-thread cached timestamp,
43
121
  # hand-rolled line builder, lock-free emit. Operator disables via
44
122
  # `--no-log-requests` or `HYPERION_LOG_REQUESTS=0`.
45
- @log_requests = log_requests.nil? ? Hyperion.log_requests? : log_requests
123
+ @log_requests = if log_requests.nil?
124
+ # Per-Connection override absent → consult the
125
+ # Runtime's logging config (1.7.0+) which falls
126
+ # through to `Hyperion.log_requests?` (env +
127
+ # default ON).
128
+ Hyperion.log_requests?
129
+ else
130
+ log_requests
131
+ end
132
+ # 2.4-C: cache the path-templater ref at construction. Reading it
133
+ # via Hyperion::Metrics.default_path_templater per request would
134
+ # add a method dispatch + a memo branch on every observation — we
135
+ # keep the existing pattern of caching boot-time refs as ivars so
136
+ # the per-request observe stays a single Hash lookup.
137
+ @path_templater = path_templater || Hyperion::Metrics.default_path_templater
138
+ # 2.10-D — direct-dispatch route table. The hot-path lookup
139
+ # is `@route_table&.lookup(method, path)` so the nil-default
140
+ # case (no operator-registered direct routes — the
141
+ # overwhelming majority of 2.x deployments) collapses to a
142
+ # single `nil`-test before falling through to the Rack
143
+ # adapter. When `route_table:` is passed we honour the
144
+ # explicit value (test seam / multi-tenant). When omitted
145
+ # AND the Hyperion::Server class is loaded, we resolve to
146
+ # the process-wide singleton; ad-hoc Connection callers in
147
+ # specs that don't load Server keep the nil fallback.
148
+ @route_table = if route_table
149
+ route_table
150
+ elsif defined?(Hyperion::Server) && Hyperion::Server.respond_to?(:route_table)
151
+ Hyperion::Server.route_table
152
+ end
153
+ register_request_duration_histogram!
154
+ end
155
+
156
+ # 2.4-C: register the per-route histogram family on this Connection's
157
+ # metrics sink. Idempotent — `Metrics#register_histogram` no-ops on
158
+ # re-registration with the same shape. Called once per Connection so
159
+ # the histogram exists before the first observe.
160
+ def register_request_duration_histogram!
161
+ @metrics.register_histogram(
162
+ REQUEST_DURATION_HISTOGRAM,
163
+ buckets: REQUEST_DURATION_BUCKETS,
164
+ label_keys: %w[method path status]
165
+ )
166
+ rescue StandardError
167
+ # Histogram registration is observability — never block a Connection
168
+ # from booting because the metrics sink misbehaved.
169
+ nil
170
+ end
171
+
172
+ # 2.1.0 (WS-1): the connection itself caches the live socket so that
173
+ # `hijack!` (called from inside the app, possibly on a thread-pool
174
+ # worker thread) can reach back and yield it. `@hijacked` is the flag
175
+ # that gates writer + cleanup behaviour after the app returns. Reset
176
+ # at the top of each request iteration: a keep-alive client that does
177
+ # NOT hijack on request N must still get the normal response path,
178
+ # and a hijack on request N+1 should not be observed during request N.
179
+ attr_reader :socket
180
+
181
+ # 2.6-C — per-response dispatch-mode override. Reset to `nil` at
182
+ # the top of each request iteration; the Rack adapter sets this to
183
+ # `:inline_blocking` when it auto-detects a static-file body
184
+ # (`body.respond_to?(:to_path)`) or when the app explicitly opts in
185
+ # via `env['hyperion.dispatch_mode'] = :inline_blocking`. The
186
+ # response-write path reads it back here in `serve` and forwards
187
+ # the symbol to `ResponseWriter#write` so the writer can pick the
188
+ # blocking-sendfile variant.
189
+ #
190
+ # The override is per-RESPONSE, NOT per-connection: the connection's
191
+ # connection-wide dispatch mode (resolved at boot from `tls`,
192
+ # `async_io`, `thread_count`, ALPN) stays whatever the operator
193
+ # configured. Only the response-write loop downgrades.
194
+ attr_accessor :response_dispatch_mode
195
+
196
+ def hijacked?
197
+ @hijacked == true
198
+ end
199
+
200
+ # Called by the Rack app (via `env['rack.hijack'].call`). Flips the
201
+ # `@hijacked` flag — Connection#serve checks this after `call_app`
202
+ # returns and skips the writer + the ensure-block close. Returns the
203
+ # raw socket IO so the app can speak any post-HTTP protocol on it.
204
+ #
205
+ # Idempotent: a subsequent call returns the same socket without
206
+ # re-flipping (the flag is monotonic). Defensive — apps occasionally
207
+ # do `io = env['rack.hijack'].call; io2 = env['rack.hijack'].call`
208
+ # when chaining middleware.
209
+ def hijack!
210
+ @hijacked = true
211
+ Hyperion.metrics.increment(:rack_hijacks) if defined?(Hyperion) && Hyperion.respond_to?(:metrics)
212
+ @socket
213
+ end
214
+
215
+ # Bytes the connection had buffered past the parsed request boundary
216
+ # at the moment we entered the dispatch step (pipelined keep-alive
217
+ # carry, or — for an Upgrade — early bytes the client sent right
218
+ # after the headers, before they could see our 101 response).
219
+ # Returns a binary-encoded String (possibly empty). Captured fresh
220
+ # per request inside `serve` *before* `call_app` so reads from the
221
+ # socket past this point still go to the OS buffer; the carry is
222
+ # the application's responsibility to drain.
223
+ def hijack_buffered
224
+ @hijack_buffered ||= +''
46
225
  end
47
226
 
48
227
  def serve(socket, app, max_request_read_seconds: 60)
49
228
  request_count = 0
50
- carry = +'' # bytes already pulled off the socket but past the prev request boundary
229
+ @socket = socket
230
+ @hijacked = false
231
+ # 2.6-D — sticky flag set after each `:inline_blocking` response
232
+ # so the next request iteration on the same keep-alive
233
+ # connection can bypass the per-conn fairness admission check
234
+ # (and the bookkeeping it carries). See the
235
+ # `skip_per_conn_fairness` branch in the request loop below.
236
+ @last_response_was_static_inline_blocking = false
237
+ # Phase 2b (1.7.1): pre-size the read accumulator once per connection
238
+ # and reuse it across keep-alive requests. `String#clear` between
239
+ # requests preserves the underlying capacity, so subsequent appends
240
+ # don't pay the realloc tax. Pre-1.7.1 allocated a fresh `+''` per
241
+ # request; per-connection reuse is a strict win because the previous
242
+ # request's carry-over bytes (pipelined input) are copied into this
243
+ # same buffer at the bottom of the loop instead of into a new String.
244
+ @inbuf ||= String.new(capacity: INBUF_INITIAL_CAPACITY, encoding: Encoding::ASCII_8BIT)
51
245
  peer_addr = peer_address(socket)
52
246
  @metrics.increment(:connections_accepted)
53
247
  @metrics.increment(:connections_active)
@@ -56,9 +250,18 @@ module Hyperion
56
250
  # long-lived keep-alive sessions with many small requests don't
57
251
  # falsely trip after the cumulative budget elapses.
58
252
  request_started_clock = Process.clock_gettime(Process::CLOCK_MONOTONIC) if max_request_read_seconds
59
- buffer = read_request(socket, carry, deadline_started_at: request_started_clock,
60
- max_request_read_seconds: max_request_read_seconds,
61
- peer_addr: peer_addr)
253
+ # 2.6-C clear the per-response dispatch-mode override at the
254
+ # top of every request iteration. The Rack adapter sets it
255
+ # *during* `app.call` (auto-detect on `to_path` body or
256
+ # explicit `env['hyperion.dispatch_mode']` override) and the
257
+ # writer reads it back; a keep-alive client whose request N
258
+ # was static must NOT have request N+1 inherit the
259
+ # `:inline_blocking` flag if request N+1's body is a streaming
260
+ # response.
261
+ @response_dispatch_mode = nil
262
+ buffer = read_request(socket, @inbuf, deadline_started_at: request_started_clock,
263
+ max_request_read_seconds: max_request_read_seconds,
264
+ peer_addr: peer_addr)
62
265
  return unless buffer
63
266
 
64
267
  if buffer == TIMEOUT_SENTINEL
@@ -76,24 +279,142 @@ module Hyperion
76
279
  # written the 408 (best-effort) inside read_request; close out here.
77
280
  return if buffer == DEADLINE_SENTINEL
78
281
 
282
+ # DOS-defense: client declared a Content-Length larger than
283
+ # max_body_bytes. We've already written the canned 413 + close inside
284
+ # read_request, BEFORE reading any body bytes. Drop the connection.
285
+ return if buffer == OVERSIZED_BODY_SENTINEL
286
+
79
287
  request, body_end = @parser.parse(buffer)
80
- carry = +(buffer.byteslice(body_end, buffer.bytesize - body_end) || '')
288
+ # Carry over any pipelined trailing bytes for the next iteration. We
289
+ # rewrite @inbuf in place — `replace` keeps the underlying capacity
290
+ # allocation, so the next request starts with a warm 8 KiB buffer.
291
+ #
292
+ # 2.1.0 (WS-1): snapshot the carry BEFORE we collapse it back into
293
+ # the read buffer. If the app full-hijacks this request, those
294
+ # bytes are the application's responsibility (sent right after the
295
+ # Upgrade headers, etc.) — exposed via `env['hyperion.hijack_buffered']`.
296
+ # On the non-hijack hot path the snapshot is empty (no allocation
297
+ # past the constant `EMPTY_BIN`) for keep-alive without pipelining.
298
+ @hijack_buffered = if buffer.bytesize > body_end
299
+ buffer.byteslice(body_end,
300
+ buffer.bytesize - body_end).b
301
+ else
302
+ EMPTY_BIN
303
+ end
304
+ carry_into_inbuf!(buffer, body_end)
81
305
  request = enrich_with_peer(request, peer_addr) if peer_addr && request.peer_address.nil?
82
306
 
83
307
  @metrics.increment(:bytes_read, body_end)
84
308
  @metrics.increment(:requests_total)
85
309
  @metrics.increment(:requests_in_flight)
86
- request_started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC) if @log_requests
310
+ # 2.4-C: capture start time for the per-route duration histogram.
311
+ # Same Process.clock_gettime that the access-log path was already
312
+ # paying — at default-ON log_requests the second call here is
313
+ # avoided (we reuse `request_started_at`).
314
+ request_started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
315
+ # 2.10-D — direct-dispatch fast path. Bypasses the Rack
316
+ # adapter entirely (no env-hash build, no middleware chain,
317
+ # no body-iteration overhead) on routes the operator
318
+ # registered via `Hyperion::Server.handle(:GET, '/path',
319
+ # handler)` or `.handle_static(...)`. Lifecycle hooks
320
+ # still fire so trace instrumentation works regardless of
321
+ # dispatch shape.
322
+ #
323
+ # Lookup is O(1) (two Hash#[] hits) and the nil-default
324
+ # case (no direct routes registered — the overwhelming
325
+ # majority of deployments) collapses to one nil-test plus
326
+ # one Hash#[] miss before falling through to the regular
327
+ # path; cost on the regular path is < 1 us.
328
+ if @route_table && (direct_handler = @route_table.lookup(request.method, request.path))
329
+ dispatch_direct!(socket, request, direct_handler, request_started_at, peer_addr)
330
+ request_count += 1
331
+ break unless should_keep_alive_after_direct?(request)
332
+
333
+ set_idle_timeout(socket)
334
+ next
335
+ end
336
+ # 2.3-B per-conn fairness gate. Returns true when the slot was
337
+ # reserved (caller must release in ensure), false when the cap
338
+ # was hit and a 503 was emitted. nil cap → admit always (hot
339
+ # path stays branchless).
340
+ #
341
+ # 2.6-D — skip the fairness check entirely on connections whose
342
+ # previous response was `:inline_blocking` (auto-detected
343
+ # static-file traffic). Static streams are dominated by the
344
+ # write phase, not concurrent app.call invocations, so the
345
+ # per-conn fairness cap is dead weight here — its purpose is
346
+ # to throttle dynamic-route concurrency on a single keep-alive
347
+ # connection. Static-asset connections (CDN origins, signed-
348
+ # download responders) typically run a long sequence of
349
+ # `to_path` responses; once the first one auto-detects, the
350
+ # remaining requests skip the admit / release / metric trio.
351
+ # The flag flips back to false the moment a non-static
352
+ # response lands on the same connection.
353
+ skip_per_conn_fairness = @last_response_was_static_inline_blocking
354
+ if @max_in_flight_per_conn && !skip_per_conn_fairness && !per_conn_admit!(socket, peer_addr)
355
+ @metrics.decrement(:requests_in_flight)
356
+ request_count += 1
357
+ # Don't close — keep the conn alive so the upstream peer can
358
+ # retry after the in-flight request drains. Skip writer +
359
+ # logging (we wrote a canned response above) and proceed to
360
+ # the next iteration's read.
361
+ set_idle_timeout(socket)
362
+ next
363
+ end
87
364
  begin
88
365
  status, headers, body = call_app(app, request)
89
366
  ensure
90
367
  @metrics.decrement(:requests_in_flight)
368
+ per_conn_release! if @max_in_flight_per_conn && !skip_per_conn_fairness
369
+ end
370
+
371
+ # 2.1.0 (WS-1): if the app called `env['rack.hijack'].call` during
372
+ # `call_app`, the connection has handed the socket over. We MUST
373
+ # NOT write a response (the app is now driving the wire) and we
374
+ # MUST NOT close the socket (the app owns it). The status/headers/body
375
+ # tuple from the app is ignored on this path — Rack 3 spec calls this
376
+ # out explicitly. Drop out of the per-request loop; the ensure block
377
+ # will skip socket close because of @hijacked.
378
+ if @hijacked
379
+ @logger.debug do
380
+ { message: 'rack hijack', method: request.method, path: request.path, peer_addr: peer_addr }
381
+ end
382
+ # Drop body if the app still returned one — apps occasionally
383
+ # return [-1, {}, []] but some return real arrays out of habit.
384
+ # We don't iterate or close the body; iterating would let it
385
+ # write to the (now app-owned) socket via env['rack.input'] etc.
386
+ # body.close is the one safe call (frees temp files), best-effort.
387
+ body.close if body.respond_to?(:close)
388
+ return
91
389
  end
92
390
 
93
391
  keep_alive = should_keep_alive?(request, status, headers)
94
- @writer.write(socket, status, headers, body, keep_alive: keep_alive)
392
+ # 2.6-C pass the per-response dispatch-mode override to the
393
+ # writer. Default `nil` means "use the writer's default
394
+ # (fiber-yielding sendfile / userspace copy)". Only
395
+ # `:inline_blocking` currently flips the writer onto a
396
+ # different code path (the Puma-style serial-per-thread
397
+ # blocking-sendfile loop). Forward-compatible — future per-
398
+ # response dispatch modes plug in here without changing the
399
+ # call-site shape.
400
+ @writer.write(socket, status, headers, body, keep_alive: keep_alive,
401
+ dispatch_mode: @response_dispatch_mode)
402
+ # 2.6-D — record whether this response engaged
403
+ # `:inline_blocking` so the next request iteration can skip
404
+ # the per-conn fairness admission check (see the
405
+ # `skip_per_conn_fairness` branch above). Sticky on
406
+ # consecutive static responses; resets on the first non-
407
+ # static response back on the same conn.
408
+ @last_response_was_static_inline_blocking =
409
+ @response_dispatch_mode == :inline_blocking
95
410
  @metrics.increment_status(status)
96
411
  log_request(request, status, request_started_at) if @log_requests
412
+ # 2.4-C: per-route duration histogram observation. Templating the
413
+ # path (e.g. `/users/123` → `/users/:id`) keeps cardinality
414
+ # bounded; the templater itself is LRU-cached so the cost on a
415
+ # repeated path is one Hash#[] + one Hash re-insert. We swallow
416
+ # any exception — observability must never block a response.
417
+ observe_request_duration(request, status, request_started_at)
97
418
  request_count += 1
98
419
 
99
420
  return unless keep_alive
@@ -121,24 +442,303 @@ module Hyperion
121
442
  # the connection go idle. Otherwise a low-traffic worker would hold
122
443
  # logs in its per-thread buffer indefinitely.
123
444
  @logger.flush_access_buffer if @log_requests && @logger.respond_to?(:flush_access_buffer)
445
+ # 2.1.0 (WS-1): when the app full-hijacked the socket, ownership has
446
+ # transferred. Hyperion MUST NOT close — the app may still be reading
447
+ # from / writing to the wire (e.g. an open WebSocket) long after this
448
+ # fiber exits. Skip the close branch entirely; the app is the sole
449
+ # closer from this point on.
450
+ unless @hijacked
451
+ # 2.4-C: drop the per-worker kTLS gauge for this socket if it
452
+ # was tracked at handshake time. No-op for plain TCP and for
453
+ # TLS-without-kTLS sockets.
454
+ Hyperion::TLS.untrack_ktls_handshake!(socket) if defined?(Hyperion::TLS)
455
+ begin
456
+ socket.close unless socket.closed?
457
+ rescue StandardError
458
+ # Already failing; swallow close errors so we don't mask the real cause.
459
+ end
460
+ end
461
+ end
462
+
463
+ private
464
+
465
+ # 2.10-D — direct-dispatch handler invocation. Bypasses the
466
+ # Rack adapter (`Adapter::Rack.call` builds the env hash, walks
467
+ # the middleware chain, runs WS handshake validation — none of
468
+ # which a direct route needs). Fires the runtime's lifecycle
469
+ # hooks so NewRelic / AppSignal / OpenTelemetry instrumentation
470
+ # is mode-agnostic; `env` is `nil` on direct routes (no env was
471
+ # built) — observers documented to expect a nil env on this
472
+ # branch.
473
+ #
474
+ # Two write shapes:
475
+ #
476
+ # * `RouteTable::StaticEntry` — pre-built response buffer
477
+ # from `handle_static`. The hot path: ONE socket.write of
478
+ # the full HTTP/1.1 response (status + Content-Type +
479
+ # Content-Length + body), zero header build, zero body
480
+ # iteration.
481
+ # * Plain `[status, headers, body]` Rack tuple — the
482
+ # standard ResponseWriter writes it via the existing
483
+ # code path. Slower than StaticEntry but still skips the
484
+ # entire Rack env construction.
485
+ def dispatch_direct!(socket, request, handler, request_started_at, peer_addr)
486
+ @metrics.increment(:bytes_read, 0) # no-op — bytes already counted upstream
487
+ @metrics.increment(:requests_in_flight)
488
+ @metrics.increment(:direct_route_hits)
489
+
490
+ # 2.10-F — C-ext fast path for prebuilt static responses. When
491
+ # the matched route is a `StaticEntry`, the prebuilt response
492
+ # bytes are already registered with `Hyperion::Http::PageCache`
493
+ # under the route path; `PageCache.serve_request` does the
494
+ # whole thing — hash lookup, snapshot under the C lock, GVL-
495
+ # released write — without invoking the handler closure or
496
+ # building a `[status, headers, body]` tuple. Lifecycle hooks
497
+ # still fire (with `env=nil`, matching the 2.10-D contract) so
498
+ # APM observers see direct-route requests regardless of whether
499
+ # the wire write happens in Ruby or C.
500
+ if handler.is_a?(::Hyperion::Server::RouteTable::StaticEntry)
501
+ return dispatch_direct_static!(socket, request, handler, request_started_at)
502
+ end
503
+
504
+ response = nil
505
+ error = nil
124
506
  begin
125
- socket.close unless socket.closed?
507
+ @runtime.fire_request_start(request, nil) if @runtime.has_request_hooks?
508
+ response = handler.call(request)
509
+ rescue StandardError => e
510
+ error = e
511
+ @metrics.increment(:app_errors)
512
+ @logger.error do
513
+ {
514
+ message: 'direct route raised',
515
+ method: request.method,
516
+ path: request.path,
517
+ error: e.message,
518
+ error_class: e.class.name
519
+ }
520
+ end
521
+ response = [500, { 'content-type' => 'text/plain' }, ['Internal Server Error']]
522
+ ensure
523
+ @metrics.decrement(:requests_in_flight)
524
+ end
525
+
526
+ status = write_direct_response(socket, response)
527
+
528
+ if @runtime.has_request_hooks?
529
+ @runtime.fire_request_end(request, nil, error.nil? ? response : nil, error)
530
+ end
531
+
532
+ @metrics.increment_status(status)
533
+ log_request(request, status, request_started_at) if @log_requests
534
+ observe_request_duration(request, status, request_started_at)
535
+ status
536
+ rescue StandardError => e
537
+ # Lifecycle-hook failure is logged inside fire_request_*; this
538
+ # rescue catches socket write errors so the request loop sees
539
+ # the problem and can decide whether to keep the connection
540
+ # alive (we just close on any exception here — it's the safe
541
+ # default).
542
+ @logger.error do
543
+ { message: 'direct dispatch write failed',
544
+ peer_addr: peer_addr,
545
+ error: e.message,
546
+ error_class: e.class.name }
547
+ end
548
+ 500
549
+ end
550
+
551
+ # 2.10-F — StaticEntry-only dispatch path. Calls `PageCache.serve_request`
552
+ # which performs the full lookup + snapshot + write entirely in C
553
+ # (with the GVL released across the write syscall). On `:miss`
554
+ # (e.g. the C cache was cleared between registration and request,
555
+ # or the request method is something we didn't pre-register —
556
+ # POST against a GET route would have already missed the route
557
+ # table, so this branch is paranoia) we fall back to the Ruby
558
+ # `socket.write` path — same bytes, slightly more overhead.
559
+ #
560
+ # Lifecycle hooks (`Runtime#on_request_start` / `#on_request_end`)
561
+ # MUST still fire here so APM observers see direct-route hits.
562
+ # `env` is `nil` on direct routes, matching the 2.10-D contract.
563
+ def dispatch_direct_static!(socket, request, entry, request_started_at)
564
+ error = nil
565
+ begin
566
+ @runtime.fire_request_start(request, nil) if @runtime.has_request_hooks?
567
+ bytes_written = serve_static_entry(socket, request, entry)
568
+ # We always emit a 200 from a StaticEntry (that's what
569
+ # `Server.handle_static` builds). Track the bytes for
570
+ # operators tracking egress, mirroring what ResponseWriter
571
+ # does on the regular path.
572
+ @metrics.increment(:bytes_written, bytes_written)
573
+ rescue StandardError => e
574
+ error = e
575
+ @metrics.increment(:app_errors)
576
+ @logger.error do
577
+ { message: 'static direct route write failed',
578
+ method: request.method,
579
+ path: request.path,
580
+ error: e.message,
581
+ error_class: e.class.name }
582
+ end
583
+ ensure
584
+ @metrics.decrement(:requests_in_flight)
585
+ end
586
+
587
+ if @runtime.has_request_hooks?
588
+ @runtime.fire_request_end(request, nil, error.nil? ? entry : nil, error)
589
+ end
590
+
591
+ status = error ? 500 : 200
592
+ @metrics.increment_status(status)
593
+ log_request(request, status, request_started_at) if @log_requests
594
+ observe_request_duration(request, status, request_started_at)
595
+ status
596
+ end
597
+
598
+ # 2.10-F — call into the C ext when available, else fall back to
599
+ # the 2.10-D Ruby `socket.write` path. Returns bytes written.
600
+ def serve_static_entry(socket, request, entry)
601
+ if defined?(::Hyperion::Http::PageCache) &&
602
+ ::Hyperion::Http::PageCache.respond_to?(:serve_request)
603
+ result = ::Hyperion::Http::PageCache.serve_request(socket, request.method, entry.path)
604
+ return result.last if result.is_a?(Array) && result.first == :ok
605
+ end
606
+ # Fallback: Ruby write of the full buffer (or headers-only on HEAD).
607
+ bytes = if request.method == 'HEAD' && entry.headers_bytesize < entry.buffer.bytesize
608
+ entry.buffer.byteslice(0, entry.headers_bytesize)
609
+ else
610
+ entry.buffer
611
+ end
612
+ socket.write(bytes)
613
+ end
614
+
615
+ # 2.10-D — write a direct-route response. Returns the status
616
+ # code that was written (so `dispatch_direct!` can bump the
617
+ # status counter without re-parsing the response). Two
618
+ # shapes — the StaticEntry one-shot write is the agoo-style
619
+ # hot path; the Rack-tuple branch lets handlers compute a
620
+ # response per-request without paying for env construction.
621
+ def write_direct_response(socket, response)
622
+ if response.is_a?(::Hyperion::Server::RouteTable::StaticEntry)
623
+ # 2.10-F note: the StaticEntry-from-handler path (a Rack-style
624
+ # handler that returns a StaticEntry, not a route registered
625
+ # via `Server.handle_static`) lands here. Keep the 2.10-D
626
+ # one-shot Ruby write — these are NOT in the C cache.
627
+ socket.write(response.response_bytes)
628
+ return 200
629
+ end
630
+
631
+ status, headers, body = response
632
+ @writer.write(socket, status, headers, body, keep_alive: true)
633
+ status
634
+ end
635
+
636
+ # 2.10-D — keep-alive decision for direct-dispatch responses.
637
+ # Direct routes don't get the full
638
+ # `Connection: close` header inspection that Rack tuples
639
+ # receive (StaticEntry has its headers baked in; we trust the
640
+ # operator); we just honour the request-side `Connection`
641
+ # header. HTTP/1.1 default-keepalive, HTTP/1.0 default-close.
642
+ def should_keep_alive_after_direct?(request)
643
+ conn_request = request.header('connection')&.downcase
644
+ case request.http_version
645
+ when 'HTTP/1.1' then conn_request != 'close'
646
+ when 'HTTP/1.0' then conn_request == 'keep-alive'
647
+ else false
648
+ end
649
+ end
650
+
651
+ # 2.3-B per-conn fairness admit. Mutex-guarded compare-and-bump so
652
+ # async-io fibers / pipelined requests on the same OS thread don't
653
+ # race the counter. Returns true when the slot was reserved, false
654
+ # when the cap was hit (caller writes 503 + Retry-After). The 503
655
+ # path bumps a metric, emits a deduplicated warn, and writes a
656
+ # canned response — all best-effort; a peer that's gone away is
657
+ # silently swallowed.
658
+ def per_conn_admit!(socket, peer_addr)
659
+ cap = @max_in_flight_per_conn
660
+ admitted = @in_flight_mutex.synchronize do
661
+ if @in_flight >= cap
662
+ false
663
+ else
664
+ @in_flight += 1
665
+ true
666
+ end
667
+ end
668
+ return true if admitted
669
+
670
+ @metrics.increment(:per_conn_overload_rejects)
671
+ # 2.4-C: also feed the labeled counter so operators can break
672
+ # rejections down per worker (one row per worker_id at scrape
673
+ # time) without losing the legacy unlabeled counter for back-
674
+ # compat dashboards.
675
+ @metrics.increment_labeled_counter(:hyperion_per_conn_rejections_total,
676
+ [Process.pid.to_s])
677
+ @metrics.increment_status(503)
678
+ unless @overload_warned
679
+ @logger.warn do
680
+ { message: 'per-connection in-flight cap hit, returning 503 + Retry-After',
681
+ remote_addr: peer_addr, cap: cap, in_flight: cap }
682
+ end
683
+ @overload_warned = true
684
+ end
685
+ begin
686
+ socket.write(REJECT_503_PER_CONN_OVERLOAD)
126
687
  rescue StandardError
127
- # Already failing; swallow close errors so we don't mask the real cause.
688
+ # Peer may have already gone nothing to do.
128
689
  end
690
+ false
129
691
  end
130
692
 
131
- private
693
+ def per_conn_release!
694
+ @in_flight_mutex.synchronize { @in_flight -= 1 if @in_flight.positive? }
695
+ end
696
+
697
+ # Phase 2b: collapse @inbuf in place to retain only the carry-over (any
698
+ # bytes past the parsed request boundary, used for keep-alive pipelining).
699
+ # Operates byte-wise so the underlying capacity allocation stays put —
700
+ # `String#replace` with `byteslice` would allocate a fresh substring AND
701
+ # then memcpy back. Splice-with-empty keeps everything in the original
702
+ # buffer.
703
+ EMPTY_BIN = String.new('', encoding: Encoding::ASCII_8BIT).freeze
704
+ def carry_into_inbuf!(buffer, body_end)
705
+ total = buffer.bytesize
706
+ if body_end >= total
707
+ buffer.clear
708
+ else
709
+ # Splice the [0, body_end) prefix away. Ruby's String#[]=(start, len, "")
710
+ # performs an in-place shift of the remaining bytes — no new String
711
+ # allocation, capacity preserved.
712
+ buffer[0, body_end] = EMPTY_BIN
713
+ end
714
+ end
132
715
 
133
716
  # Route Rack dispatch through the thread pool when one was injected,
134
717
  # otherwise run inline on the current fiber. Inline keeps the test path
135
718
  # simple (no extra threads spun up for unit specs) and provides a
136
719
  # debugging escape hatch via `Server#thread_count: 0`.
720
+ #
721
+ # 2.1.0 (WS-1) passes `self` as the hijack target so the env hash gets
722
+ # a working `rack.hijack?` + `rack.hijack` proc. Both modes (inline and
723
+ # thread-pool) plumb the connection through — the app can hijack on
724
+ # either path; the connection's `@hijacked` ivar is the source of
725
+ # truth that's read back here in `serve` after `call_app` returns,
726
+ # regardless of which thread evaluated the proc.
137
727
  def call_app(app, request)
138
- if @thread_pool
728
+ if @thread_pool && @thread_pool.respond_to?(:call_with_connection)
729
+ @thread_pool.call_with_connection(app, request, self)
730
+ elsif @thread_pool
731
+ # Older ThreadPool (or stubs) without the WS-1 helper — fall
732
+ # back to the no-hijack path. Keeps third-party pool plug-ins
733
+ # working at the cost of disabling hijack on those paths.
139
734
  @thread_pool.call(app, request)
140
735
  else
141
- Adapter::Rack.call(app, request)
736
+ # 2.5-C — thread the per-conn Runtime through so request
737
+ # lifecycle hooks fire against the correct (per-server, in
738
+ # multi-tenant deployments) observer registry. `@runtime` is
739
+ # always set by the initializer (either an explicit injection
740
+ # or `Runtime.default`), so this is a non-nil pass-through.
741
+ Adapter::Rack.call(app, request, connection: self, runtime: @runtime)
142
742
  end
143
743
  end
144
744
 
@@ -239,6 +839,17 @@ module Hyperion
239
839
  end
240
840
  else
241
841
  content_length = headers_part[/^content-length:\s*(\d+)/i, 1].to_i
842
+ # DOS-defense: cap declared Content-Length at max_body_bytes BEFORE
843
+ # we touch the socket again. An attacker advertising
844
+ # `Content-Length: 99999999999` should not get us to allocate a
845
+ # multi-GB read buffer or sit in the read loop draining their
846
+ # body. The pure-int comparison itself is bounded — Ruby's `to_i`
847
+ # on the regex capture stops at the first non-digit, so even an
848
+ # adversarial header value can't blow up here. Negative or
849
+ # malformed values fall through to the parser (which raises
850
+ # ParseError → 400) so existing behaviour is preserved.
851
+ return abort_for_oversized_body(socket, content_length, peer_addr) if content_length > @max_body_bytes
852
+
242
853
  while buffer.bytesize < header_end + content_length
243
854
  if deadline_exceeded?(deadline_started_at, max_request_read_seconds)
244
855
  return abort_for_deadline(socket, deadline_started_at, peer_addr)
@@ -281,6 +892,30 @@ module Hyperion
281
892
  DEADLINE_SENTINEL
282
893
  end
283
894
 
895
+ # DOS-defense fallback: declared Content-Length exceeds the configured
896
+ # max_body_bytes. Emit a canned 413 + close BEFORE reading any body
897
+ # bytes off the socket — that's the whole point of the cap. Best-effort
898
+ # write so a peer that's already gone away doesn't trip an exception
899
+ # we'd swallow in the rescue clause anyway.
900
+ def abort_for_oversized_body(socket, declared_length, peer_addr)
901
+ @metrics.increment(:oversized_body_rejects)
902
+ @logger.warn do
903
+ {
904
+ message: 'rejected oversized Content-Length',
905
+ remote_addr: peer_addr,
906
+ declared_length: declared_length,
907
+ max_body_bytes: @max_body_bytes
908
+ }
909
+ end
910
+ begin
911
+ socket.write(REJECT_413_PAYLOAD_TOO_LARGE)
912
+ rescue StandardError
913
+ # Peer may have already gone — nothing to do.
914
+ end
915
+ @metrics.increment_status(413)
916
+ OVERSIZED_BODY_SENTINEL
917
+ end
918
+
284
919
  def chunked?(headers_part)
285
920
  headers_part.match?(/^transfer-encoding:[ \t]*[^\r\n]*chunked\b/i)
286
921
  end
@@ -406,5 +1041,27 @@ module Hyperion
406
1041
  request.http_version
407
1042
  )
408
1043
  end
1044
+
1045
+ # 2.4-C — observe one sample on the per-route request-duration
1046
+ # histogram. Best-effort: a misbehaving templater or sink degrades
1047
+ # silently to no observation. The label tuple Array is fresh per
1048
+ # call (3 small Strings) — that's the only allocation cost the
1049
+ # observation imposes on the response path. Histogram observation
1050
+ # itself reuses the per-(name, labels_tuple) accumulator after the
1051
+ # first samples for a given templated path, so steady-state per-
1052
+ # route observations are zero-allocation past the tuple Array.
1053
+ def observe_request_duration(request, status, started_at)
1054
+ duration = Process.clock_gettime(Process::CLOCK_MONOTONIC) - started_at
1055
+ method = request.method
1056
+ template = @path_templater.template(request.path)
1057
+ class_ = STATUS_CLASS[status / 100] || STATUS_CLASS[0]
1058
+ @metrics.observe_histogram(
1059
+ REQUEST_DURATION_HISTOGRAM,
1060
+ duration,
1061
+ [method, template, class_]
1062
+ )
1063
+ rescue StandardError
1064
+ nil
1065
+ end
409
1066
  end
410
1067
  end