RubyGems - hyperion-rb - Versions diffs - 1.6.2 → 2.11.0 - Mend

hyperion-rb 1.6.2 → 2.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4768 -0
data/README.md +222 -13
data/ext/hyperion_h2_codec/Cargo.lock +7 -0
data/ext/hyperion_h2_codec/Cargo.toml +33 -0
data/ext/hyperion_h2_codec/extconf.rb +73 -0
data/ext/hyperion_h2_codec/src/frames.rs +140 -0
data/ext/hyperion_h2_codec/src/hpack/huffman.rs +161 -0
data/ext/hyperion_h2_codec/src/hpack.rs +457 -0
data/ext/hyperion_h2_codec/src/lib.rs +296 -0
data/ext/hyperion_http/extconf.rb +28 -0
data/ext/hyperion_http/h2_codec_glue.c +408 -0
data/ext/hyperion_http/page_cache.c +1125 -0
data/ext/hyperion_http/parser.c +473 -38
data/ext/hyperion_http/sendfile.c +982 -0
data/ext/hyperion_http/websocket.c +493 -0
data/ext/hyperion_io_uring/Cargo.lock +33 -0
data/ext/hyperion_io_uring/Cargo.toml +34 -0
data/ext/hyperion_io_uring/extconf.rb +74 -0
data/ext/hyperion_io_uring/src/lib.rs +316 -0
data/lib/hyperion/adapter/rack.rb +370 -42
data/lib/hyperion/admin_listener.rb +207 -0
data/lib/hyperion/admin_middleware.rb +36 -7
data/lib/hyperion/cli.rb +310 -11
data/lib/hyperion/config.rb +440 -14
data/lib/hyperion/connection.rb +679 -22
data/lib/hyperion/deprecations.rb +81 -0
data/lib/hyperion/dispatch_mode.rb +165 -0
data/lib/hyperion/fiber_local.rb +75 -13
data/lib/hyperion/h2_admission.rb +77 -0
data/lib/hyperion/h2_codec.rb +499 -0
data/lib/hyperion/http/page_cache.rb +122 -0
data/lib/hyperion/http/sendfile.rb +696 -0
data/lib/hyperion/http2/native_hpack_adapter.rb +70 -0
data/lib/hyperion/http2_handler.rb +618 -19
data/lib/hyperion/io_uring.rb +317 -0
data/lib/hyperion/lint_wrapper_pool.rb +126 -0
data/lib/hyperion/master.rb +96 -9
data/lib/hyperion/metrics/path_templater.rb +68 -0
data/lib/hyperion/metrics.rb +256 -0
data/lib/hyperion/prometheus_exporter.rb +150 -0
data/lib/hyperion/request.rb +13 -0
data/lib/hyperion/response_writer.rb +477 -16
data/lib/hyperion/runtime.rb +195 -0
data/lib/hyperion/server/route_table.rb +179 -0
data/lib/hyperion/server.rb +519 -55
data/lib/hyperion/static_preload.rb +133 -0
data/lib/hyperion/thread_pool.rb +61 -7
data/lib/hyperion/tls.rb +343 -1
data/lib/hyperion/version.rb +1 -1
data/lib/hyperion/websocket/close_codes.rb +71 -0
data/lib/hyperion/websocket/connection.rb +876 -0
data/lib/hyperion/websocket/frame.rb +356 -0
data/lib/hyperion/websocket/handshake.rb +525 -0
data/lib/hyperion/worker.rb +111 -9
data/lib/hyperion.rb +137 -3
metadata +50 -1

data/lib/hyperion/http2_handler.rb CHANGED Viewed

@@ -2,10 +2,13 @@
 require 'async'
 require 'async/notification'
+require 'async/queue'
 require 'protocol/http2/server'
 require 'protocol/http2/framer'
 require 'protocol/http2/stream'
+require_relative 'http2/native_hpack_adapter'
 module Hyperion
   # Real HTTP/2 dispatch driven by `protocol-http2`.
   #
@@ -131,7 +134,12 @@ module Hyperion
     #
     # Single instance per connection, lives for the lifetime of `serve`.
     class WriterContext
-      attr_reader :encode_mutex
+      attr_reader :encode_mutex, :dispatch_queue
+      # 2.10-G — connection-lifecycle timing slots used by the optional h2
+      # latency-instrumentation path (gated by `HYPERION_H2_TIMING=1`).
+      # Each slot is a single CLOCK_MONOTONIC timestamp captured at most
+      # once per connection. nil = unset, set on first observation.
+      attr_accessor :t0_serve_entry, :t1_preface_done, :t2_first_encode, :t2_first_wire
       def initialize(max_pending_bytes: MAX_PER_CONN_PENDING_BYTES)
         @queue              = ::Thread::Queue.new
@@ -142,6 +150,46 @@ module Hyperion
         @pending_bytes_lock = ::Mutex.new
         @max_pending_bytes  = max_pending_bytes
         @writer_done        = false
+        # 2.11-A — pre-spawned dispatch worker pool. The connection-loop
+        # fiber pushes ready streams onto `@dispatch_queue`; workers
+        # parked on `dequeue` grab them and call `dispatch_stream`. The
+        # queue is created here (cheap — wraps a Thread::Queue) so the
+        # WriterContext is fully self-contained and unit-testable without
+        # an Async reactor.
+        @dispatch_queue           = ::Async::Queue.new
+        @dispatch_worker_count    = 0
+        @dispatch_worker_lock     = ::Mutex.new
+        # 2.10-G timing slots, all initially nil so capture is a single
+        # `||=` write under the encode mutex / writer fiber.
+        @t0_serve_entry  = nil
+        @t1_preface_done = nil
+        @t2_first_encode = nil
+        @t2_first_wire   = nil
+      end
+      # 2.11-A — bench/diagnostics introspection. Reads the live count
+      # of dispatch worker fibers parked on (or actively pulling from)
+      # `@dispatch_queue`. Reflects pre-spawned workers AND any ad-hoc
+      # workers spawned when the pool was saturated. Exposed as a method
+      # rather than `attr_reader` so the lock guards the counter.
+      def dispatch_worker_count
+        @dispatch_worker_lock.synchronize { @dispatch_worker_count }
+      end
+      # Called by a dispatch worker fiber when it enters its run loop.
+      # Pairs with `unregister_dispatch_worker` in an ensure block.
+      def register_dispatch_worker
+        @dispatch_worker_lock.synchronize { @dispatch_worker_count += 1 }
+      end
+      # Called by a dispatch worker fiber when it exits (queue closed,
+      # or unrecoverable error). Floors at 0 to defend against a stray
+      # double-unregister — instrumentation must never go negative.
+      def unregister_dispatch_worker
+        @dispatch_worker_lock.synchronize do
+          @dispatch_worker_count -= 1
+          @dispatch_worker_count = 0 if @dispatch_worker_count.negative?
+        end
       end
       # Called by SendQueueIO#write on the calling (encoder) fiber. Enforces
@@ -412,12 +460,270 @@ module Hyperion
     # MAXIMUM_ALLOWED_WINDOW_SIZE).
     H2_MAX_WINDOW_SIZE = 0x7FFFFFFF
-    def initialize(app:, thread_pool: nil, h2_settings: nil)
-      @app         = app
-      @thread_pool = thread_pool
-      @h2_settings = h2_settings
-      @metrics     = Hyperion.metrics
-      @logger      = Hyperion.logger
+    # 1.7.0 added kwargs:
+    #   * `runtime:`      — `Hyperion::Runtime` for metrics/logger
+    #                       isolation (default `Runtime.default`).
+    #   * `h2_admission:` — Optional `Hyperion::H2Admission` for the
+    #                       per-process stream cap (RFC A7). nil keeps
+    #                       the 1.6.x unbounded behaviour.
+    #
+    # 2.0.0 (Phase 6b) probed `Hyperion::H2Codec.available?` at
+    # construction so the handler knew whether the native HPACK path
+    # was operational, but the connection state machine still drove
+    # encode/decode through `protocol-http2`'s pure-Ruby Compressor /
+    # Decompressor.
+    #
+    # 2.2.0 (Phase 10 / RFC §3 Phase 6c) ships the wiring infrastructure:
+    # {Hyperion::Http2::NativeHpackAdapter} + {#install_native_hpack}
+    # replace the per-connection HPACK encode/decode boundary with
+    # the Rust crate when AND ONLY WHEN both:
+    #   1. `Hyperion::H2Codec.available?` is true (cdylib loaded), AND
+    #   2. `ENV['HYPERION_H2_NATIVE_HPACK']` is one of `1`/`true`/`yes`/`on`.
+    #
+    # The default is OFF because local h2load benchmarking on macOS
+    # showed the Fiddle FFI per-call marshalling overhead dominates
+    # for typical 3–8-header HEADERS frames — the standalone microbench's
+    # 3.26× encode win does not translate to wire wins until the FFI
+    # marshalling layer is rewritten to amortize allocation. Keeping the
+    # default OFF preserves 2.0.0/2.1.0 behavior; flipping the env var
+    # gives operators the swap they want to A/B test in their own env.
+    # The framer + stream state machine + flow control + HEADERS /
+    # CONTINUATION framing all stay in `protocol-http2`; only the
+    # HPACK byte-pump is replaced when the swap is enabled. Frame ser/de
+    # in Rust (Phase 6d) is a separate, larger lift.
+    def initialize(app:, thread_pool: nil, h2_settings: nil, runtime: nil, h2_admission: nil)
+      @app          = app
+      @thread_pool  = thread_pool
+      @h2_settings  = h2_settings
+      if runtime
+        @runtime = runtime
+        @metrics = runtime.metrics
+        @logger  = runtime.logger
+      else
+        # 1.6.x compat path — see Connection#initialize for rationale.
+        @runtime = Hyperion::Runtime.default
+        @metrics = Hyperion.metrics
+        @logger  = Hyperion.logger
+      end
+      @h2_admission       = h2_admission
+      @h2_codec_available = Hyperion::H2Codec.available?
+      # 2.5-B [breaking-default-change]: native HPACK now defaults to ON
+      # when the Rust crate is available. The 2026-04-30 Rails-shape
+      # bench (`bench/h2_rails_shape.ru`, 25 response headers) measured
+      # native v3 at 1,418 r/s vs Ruby fallback 1,201 r/s — **+18.0%**
+      # on a header-heavy workload, comfortably above the +15% flip
+      # threshold. 2.4-A's hello-shape bench saw parity because HPACK
+      # is <1% of per-stream CPU on a 2-header response.
+      #
+      # 2.11-B — `HYPERION_H2_NATIVE_HPACK` extended with a native-mode
+      # axis (`auto` / `cglue` / `v2` / `off`). See `resolve_h2_native_hpack_state`.
+      # Operators who want the prior 2.4.x default (Ruby fallback, env
+      # var unset) can set `HYPERION_H2_NATIVE_HPACK=off` (or
+      # `0`/`false`/`no`/`off`/`ruby`). `HYPERION_H2_NATIVE_HPACK=1`
+      # / unset preserves the 2.5-B `auto` behavior. `=cglue`/`=v2`
+      # forces the corresponding native sub-path.
+      #
+      # When OFF (env-overridden): `protocol-http2`'s pure-Ruby HPACK
+      # Compressor / Decompressor handles everything as in 2.0.0–2.4.x.
+      @h2_native_mode          = resolve_h2_native_hpack_state
+      @h2_native_hpack_enabled = @h2_codec_available && @h2_native_mode != :off
+      apply_h2_cglue_gate(@h2_native_mode)
+      @h2_codec_native = @h2_native_hpack_enabled # back-compat ivar — preserved for codec_native? readers
+      # 2.10-G — opt-in connection-setup timing instrumentation. When set,
+      # `serve` captures four monotonic timestamps per connection:
+      #
+      #   t0 — entry to `serve` (post-TLS, post-ALPN — the socket is already
+      #        the negotiated h2 SSLSocket by the time the handler sees it)
+      #   t1 — `read_connection_preface` returned (server-side SETTINGS
+      #        encoded + handed to the framer; client preface fully read)
+      #   t2_encode — first stream's HEADERS frame finished encoding (bytes
+      #               sit in the writer queue)
+      #   t2_wire   — writer fiber finished its first `socket.write` (bytes
+      #               on the wire)
+      #
+      # When the connection's first response completes, the handler emits
+      # a single `'h2 first-stream timing'` info line with t0→t1, t1→t2_encode,
+      # t2_encode→t2_wire deltas in milliseconds. Off by default (zero hot-path
+      # cost when disabled — a single ivar read per stream branch). Used by
+      # 2.10-G to root-cause Hyperion's flat ~40 ms first-stream max-latency.
+      @h2_timing_enabled = env_flag_enabled?('HYPERION_H2_TIMING')
+      # 2.11-A — resolve the dispatch worker pool size once at handler
+      # construction so every `serve` call uses the same value (instead
+      # of re-parsing ENV per connection on the hot path). Cached as an
+      # ivar; bench/diagnostics can read it via the spec seam.
+      @dispatch_pool_size = resolve_dispatch_pool_size
+      record_codec_boot_state
+    end
+    # 2.11-A — pre-spawned dispatch worker pool sizing.
+    #
+    # Default `4` workers per connection — enough to absorb the typical
+    # HTTP/2 burst (2-8 concurrent streams) without paying any per-stream
+    # `task.async {}` cost on the hot path. Operators on long-lived
+    # high-fan-out connections (e.g. an aggregator backend that fans
+    # 30+ parallel streams) can bump this with `HYPERION_H2_DISPATCH_POOL`.
+    # Streams that arrive when the pool is saturated still get an ad-hoc
+    # fiber (see `serve` below) so concurrency is never artificially
+    # capped — the operator-facing limit is `h2.max_concurrent_streams`.
+    #
+    # Ceiling at 16 guards against a pathological config that would
+    # spawn hundreds of idle fibers per accepted connection. Anything
+    # malformed / non-positive falls back to the default rather than
+    # crashing the connection — this is a tuning knob, not a spec
+    # parameter.
+    DISPATCH_POOL_DEFAULT = 4
+    DISPATCH_POOL_MAX     = 16
+    def resolve_dispatch_pool_size
+      raw = ENV['HYPERION_H2_DISPATCH_POOL']
+      return DISPATCH_POOL_DEFAULT if raw.nil? || raw.strip.empty?
+      n = Integer(raw.strip, 10)
+      return DISPATCH_POOL_DEFAULT unless n.positive?
+      [n, DISPATCH_POOL_MAX].min
+    rescue ArgumentError, TypeError
+      DISPATCH_POOL_DEFAULT
+    end
+    # Read an env-var flag with the usual truthiness rules (any of
+    # 1/true/yes/on, case-insensitive). Anything else → false.
+    def env_flag_enabled?(name)
+      v = ENV[name]
+      return false if v.nil? || v.empty?
+      %w[1 true yes on].include?(v.downcase)
+    end
+    # 2.11-B — resolve the operator-requested native-mode state from
+    # `HYPERION_H2_NATIVE_HPACK`.
+    #
+    # Returns one of:
+    #   * `:auto`  — native enabled, prefer cglue if available
+    #                (unset / `1` / `true` / `yes` / `on` / `auto`)
+    #   * `:cglue` — native enabled, force cglue (warn-fallback to v2
+    #                if cglue is unavailable; native_mode log marker
+    #                surfaces the divergence to the operator)
+    #   * `:v2`    — native enabled, force Fiddle (skip cglue even if
+    #                available; this is the bench-isolation knob the
+    #                2.11-B Rails-shape harness needs)
+    #   * `:off`   — ruby fallback (`0` / `false` / `no` / `off` / `ruby`)
+    #
+    # Unknown values fall through to `:auto` rather than crashing the
+    # connection — same forgiving-default policy as the pre-2.11-B
+    # `resolve_h2_native_hpack_default`.
+    def resolve_h2_native_hpack_state
+      v = ENV['HYPERION_H2_NATIVE_HPACK']
+      return :auto if v.nil? || v.empty?
+      lc = v.downcase
+      return :off   if %w[0 false no off ruby].include?(lc)
+      return :cglue if %w[cglue v3].include?(lc)
+      return :v2    if %w[v2 fiddle].include?(lc)
+      :auto
+    end
+    # 2.11-B — flip the global `H2Codec.cglue_disabled` gate based on
+    # the resolved native-mode state. The gate is per-process state
+    # (the codec module is a singleton) so reset it on every handler
+    # construction; otherwise a test that booted with `=v2` would leak
+    # the disable into a subsequent default-mode handler.
+    def apply_h2_cglue_gate(state)
+      Hyperion::H2Codec.cglue_disabled = (state == :v2)
+    end
+    # 2.0.0 Phase 6b: emit a single-shot boot log line per process
+    # describing the codec selection. Operators reading the boot log
+    # see whether the native HPACK path is in play. Idempotent across
+    # multiple Http2Handler constructions in the same process.
+    def record_codec_boot_state
+      return if Hyperion::Http2Handler.instance_variable_get(:@codec_state_logged)
+      Hyperion::Http2Handler.instance_variable_set(:@codec_state_logged, true)
+      # 2.11-B — `cglue_active` gates on the operator-controllable
+      # `cglue_active?` predicate (was `cglue_available?` pre-2.11-B).
+      # When the operator sets `=v2` we want the boot log to read
+      # `cglue_active: false` even though the C glue did install
+      # successfully — the bench harness inspects this field to
+      # differentiate the variants.
+      cglue_active = @h2_native_hpack_enabled && Hyperion::H2Codec.cglue_active?
+      cglue_requested_unavailable = @h2_native_mode == :cglue &&
+                                    @h2_native_hpack_enabled &&
+                                    !Hyperion::H2Codec.cglue_available?
+      mode = describe_codec_mode(cglue_active: cglue_active,
+                                 cglue_requested_unavailable: cglue_requested_unavailable)
+      native_mode_log = if !@h2_native_hpack_enabled
+                          @h2_native_mode == :off ? 'off' : 'native-disabled'
+                        elsif cglue_requested_unavailable
+                          'cglue-requested-unavailable'
+                        else
+                          @h2_native_mode.to_s
+                        end
+      @logger.info do
+        {
+          message: 'h2 codec selected',
+          mode: mode,
+          native_available: @h2_codec_available,
+          native_enabled: @h2_native_hpack_enabled,
+          native_mode: native_mode_log,
+          cglue_active: cglue_active,
+          hpack_path: if @h2_native_hpack_enabled
+                        cglue_active ? 'native-v3' : 'native-v2'
+                      else
+                        'pure-ruby'
+                      end
+        }
+      end
+      @metrics.increment(:h2_codec_native_selected) if @h2_native_hpack_enabled
+      @metrics.increment(:h2_codec_fallback_selected) unless @h2_native_hpack_enabled
+    end
+    # 2.11-B — boot-log mode descriptor (extracted for clarity since
+    # the matrix of native_mode × cglue_available × cglue_active grew
+    # past the point where an inline conditional was readable).
+    def describe_codec_mode(cglue_active:, cglue_requested_unavailable:)
+      if !@h2_native_hpack_enabled
+        if @h2_codec_available
+          'fallback (protocol-http2 / pure Ruby HPACK) — native available but opted out via HYPERION_H2_NATIVE_HPACK=off'
+        else
+          'fallback (protocol-http2 / pure Ruby HPACK) — native unavailable'
+        end
+      elsif cglue_active && @h2_native_mode == :cglue
+        'native (Rust v3 / CGlue, forced) — HPACK on hot path, no Fiddle per call'
+      elsif cglue_active
+        # 2.11-B confirmed cglue as the firm default — the bench-measured
+        # delta vs the v2 (Fiddle) path is +33-43% on Rails-shape h2
+        # responses, which is the actual win the 2.5-B "+18% native vs
+        # ruby" headline was capturing (v2 alone is +1-5%, basically
+        # noise vs the ruby fallback at this header count).
+        'native (Rust v3 / CGlue, default since 2.11-B) — HPACK on hot path, no Fiddle per call'
+      elsif @h2_native_mode == :v2
+        'native (Rust v2 / Fiddle, forced) — HPACK on hot path, Fiddle marshalling per call'
+      elsif cglue_requested_unavailable
+        'native (Rust v2 / Fiddle) — CGlue requested via HYPERION_H2_NATIVE_HPACK=cglue but unavailable, fell back'
+      else
+        'native (Rust v2 / Fiddle) — HPACK on hot path, Fiddle marshalling per call'
+      end
+    end
+    # Read-only accessor used by tests + diagnostics. true = the
+    # `Hyperion::H2Codec` Rust extension loaded successfully AND
+    # `HYPERION_H2_NATIVE_HPACK=1` is set, so `build_server` will
+    # wire the native adapter onto every new connection's
+    # `encode_headers` / `decode_headers` boundary. The 2.2.0 default
+    # is false (opt-in) — see `#initialize` for the rationale and the
+    # bench numbers in CHANGELOG/docs that pinned the default off.
+    def codec_native?
+      @h2_native_hpack_enabled
+    end
+    # True when the Rust crate loaded successfully, regardless of
+    # whether the operator opted in to wiring it into the wire path.
+    # Useful for diagnostics/health endpoints that want to surface
+    # "native is available but currently disabled".
+    def codec_available?
+      @h2_codec_available
     end
     def serve(socket)
@@ -431,8 +737,21 @@ module Hyperion
       framer       = ::Protocol::HTTP2::Framer.new(send_io)
       server       = build_server(framer)
+      # 2.10-G — connection entry timestamp. Captured before any framing
+      # work so the t0→t1 delta isolates "preface exchange + initial
+      # SETTINGS round-trip" from any pre-handler scheduling delay.
+      writer_ctx.t0_serve_entry = monotonic_now if @h2_timing_enabled
       task = ::Async::Task.current
+      # 2.11-A — extract the peer address BEFORE the preface exchange.
+      # Two wins: (1) the lookup runs in parallel with the writer fiber
+      # picking up the first scheduler slot, and (2) the first stream's
+      # dispatch fiber doesn't pay this `peeraddr` syscall on its hot
+      # path. The address is then captured by the worker closures
+      # below.
+      peer_addr = peer_address(socket)
       # Spawn the dedicated writer fiber BEFORE the preface exchange.
       # `Server#read_connection_preface` writes the server's SETTINGS frame
       # via the framer; if the writer isn't running, those bytes sit in the
@@ -441,14 +760,23 @@ module Hyperion
       # waits for our SETTINGS before sending more frames.
       writer_task = task.async { run_writer_loop(socket, writer_ctx) }
-      server.read_connection_preface(initial_settings_payload)
+      # 2.11-A — pre-spawn the dispatch worker pool BEFORE the preface
+      # exchange. Workers park on `writer_ctx.dispatch_queue.dequeue`;
+      # by the time the first client HEADERS frame arrives the workers
+      # are already in the scheduler's runnable set. The first stream
+      # is just an enqueue + dequeue (microseconds) instead of a
+      # `task.async {}` cold spawn (was the dominant cost in the t1→t2_enc
+      # bucket per the 2.10-G timing breakdown).
+      warmup_dispatch_pool!(task, writer_ctx, peer_addr: peer_addr,
+                                              pool_size: @dispatch_pool_size)
-      # Extract once — the same TCP peer drives every stream on this conn.
-      peer_addr = peer_address(socket)
+      server.read_connection_preface(initial_settings_payload)
+      writer_ctx.t1_preface_done = monotonic_now if @h2_timing_enabled
-      # Track in-flight per-stream dispatch fibers so we can drain them on
-      # connection close.
-      stream_tasks = []
+      # Track ad-hoc per-stream dispatch fibers (spilled when the pool is
+      # saturated). The pool handles the common case; we only fall back
+      # to `task.async {}` when more streams arrive than warm workers.
+      overflow_tasks = []
       until server.closed?
         ready_ids = []
@@ -467,14 +795,35 @@ module Hyperion
           # if subsequent frames (e.g. RST_STREAM races) arrive.
           stream.instance_variable_set(:@hyperion_dispatched, true)
-          stream_tasks << task.async do
-            dispatch_stream(stream, writer_ctx, peer_addr)
+          # 2.11-A — hand the stream to a warm worker via the dispatch
+          # queue. We use a simple "queue is empty" probe to decide:
+          #
+          #   * Empty queue ⇒ at least one worker is parked on
+          #     `dequeue`; the enqueue+dequeue handoff is microseconds
+          #     and we avoid a `task.async {}` cold spawn. This is the
+          #     hot path for the FIRST stream of a fresh connection
+          #     (the case 2.11-A is targeting).
+          #   * Non-empty queue ⇒ every parked worker has already
+          #     pulled a stream; another worker won't pick this up
+          #     until one finishes. To avoid head-of-line blocking
+          #     behind the warmup pool, fall back to `task.async {}`.
+          #     The overflow fiber re-uses `dispatch_stream` so the
+          #     dispatch contract is identical between pool and
+          #     overflow paths. Concurrency is never artificially
+          #     capped; the operator-facing knob is
+          #     `h2.max_concurrent_streams`.
+          if writer_ctx.dispatch_queue.size.zero?
+            writer_ctx.dispatch_queue.enqueue(stream)
+          else
+            overflow_tasks << task.async do
+              dispatch_stream(stream, writer_ctx, peer_addr)
+            end
           end
         end
       end
       # Drain in-flight stream dispatches before we close the socket.
-      stream_tasks.each do |t|
+      overflow_tasks.each do |t|
         t.wait
       rescue StandardError
         nil
@@ -498,12 +847,30 @@ module Hyperion
       # socket before the writer drains would discard final RST_STREAM /
       # GOAWAY / END_STREAM frames in the queue.
       if writer_ctx
+        # 2.11-A — close the dispatch queue so any pre-spawned workers
+        # parked on `dequeue` fall through (Async::Queue#dequeue returns
+        # nil after close). Do this BEFORE waiting on the writer so
+        # pool workers can drain their in-flight stream dispatches and
+        # release the encode mutex; otherwise the writer might park
+        # waiting for bytes that the dispatch worker never gets to
+        # encode.
+        begin
+          writer_ctx.dispatch_queue.close unless writer_ctx.dispatch_queue.closed?
+        rescue StandardError
+          nil
+        end
         writer_ctx.shutdown!
         begin
           writer_task&.wait
         rescue StandardError
           nil
         end
+        # 2.10-G — emit one info-level timing line per connection when the
+        # opt-in instrumentation is enabled and we collected a full set of
+        # samples (a connection that died before serving any stream lacks
+        # t2_first_encode / t2_first_wire and gets skipped — there's no
+        # first-stream signal to report).
+        log_h2_first_stream_timing(writer_ctx) if @h2_timing_enabled
       end
       @metrics.decrement(:connections_active)
       socket.close unless socket.closed?
@@ -511,6 +878,63 @@ module Hyperion
     private
+    # 2.11-A — pre-spawn the per-connection dispatch worker pool.
+    #
+    # Each worker is a fiber that loops:
+    #   1. `dequeue` a stream from the per-connection dispatch queue
+    #      (parks the fiber on the queue's internal notification when
+    #      empty — zero CPU until a stream arrives).
+    #   2. Calls `dispatch_stream` with the stream + writer context +
+    #      pre-resolved peer address.
+    #   3. Loops back to (1). Exits cleanly when `dequeue` returns nil
+    #      (queue closed by `serve`'s ensure block on connection
+    #      teardown).
+    #
+    # Why pre-spawn rather than `task.async {}` per stream:
+    #   * Fiber startup under Async involves a few µs of allocation and
+    #     scheduler bookkeeping. Per-stream that's negligible; on the
+    #     CONNECTION COLD PATH (first request on a fresh TCP/TLS conn)
+    #     it adds up to a measurable share of the t1→t2_enc bucket
+    #     (the 2.10-G timing breakdown showed ~12-25 ms on h2load
+    #     `-c 1 -m 100 -n 5000`).
+    #   * Workers parked on `dequeue` are already in the scheduler's
+    #     ready set; the first stream is just an enqueue + dequeue
+    #     handoff (microseconds).
+    #
+    # Errors inside `dispatch_stream` are already caught + RST_STREAMed
+    # there, so the worker only needs to defend against truly
+    # unexpected failures (queue shutdown races, fiber kill on graceful
+    # shutdown). We swallow those defensively and unregister so the
+    # `dispatch_worker_count` introspection is truthful.
+    def warmup_dispatch_pool!(task, writer_ctx, peer_addr:, pool_size:)
+      pool_size.times do
+        task.async do
+          writer_ctx.register_dispatch_worker
+          begin
+            loop do
+              stream = writer_ctx.dispatch_queue.dequeue
+              break if stream.nil? # queue closed → graceful exit
+              begin
+                dispatch_stream(stream, writer_ctx, peer_addr)
+              rescue StandardError => e
+                # `dispatch_stream` already logs + RST_STREAMs internally;
+                # if anything escapes that net we log here and keep the
+                # worker alive — one bad stream must not poison the
+                # connection's worker pool.
+                @logger.error do
+                  { message: 'h2 dispatch worker swallowed error',
+                    error: e.message, error_class: e.class.name }
+                end
+              end
+            end
+          ensure
+            writer_ctx.unregister_dispatch_worker
+          end
+        end
+      end
+    end
     # Build the [setting_id, value] pairs that go in the connection-preface
     # SETTINGS frame. protocol-http2's Server#read_connection_preface accepts
     # this array and does the wire encoding for us. Empty array (no overrides
@@ -576,6 +1000,7 @@ module Hyperion
     def build_server(framer)
       server = ::Protocol::HTTP2::Server.new(framer)
+      install_native_hpack(server) if @h2_native_hpack_enabled
       server.define_singleton_method(:accept_stream) do |stream_id, &block|
         unless valid_remote_stream_id?(stream_id)
           raise ::Protocol::HTTP2::ProtocolError, "Invalid stream id: #{stream_id}"
@@ -590,6 +1015,53 @@ module Hyperion
       server
     end
+    # Phase 10 (Phase 6c): swap the per-connection HPACK encode/decode
+    # entry points to route through the Rust crate. We replace
+    # `encode_headers` / `decode_headers` on the `Protocol::HTTP2::Server`
+    # instance via singleton methods — protocol-http2's framer + stream
+    # state machine call `connection.encode_headers(headers, buffer)` and
+    # `connection.decode_headers(data)` whenever HEADERS / CONTINUATION
+    # frames cross the wire, so this is exactly the boundary where the
+    # native codec slots in. The adapter holds one Encoder + one Decoder
+    # for this connection; their dynamic tables persist across all
+    # HEADERS frames in their respective directions, matching RFC 7541's
+    # per-direction HPACK context model.
+    #
+    # The Ruby `@encoder` / `@decoder` Context ivars on the
+    # `Protocol::HTTP2::Connection` superclass remain in place but are
+    # never consulted — the singleton-method overrides shortcut past
+    # them. That's safe: protocol-http2 only touches those Contexts
+    # through `encode_headers` / `decode_headers`, which we now own.
+    #
+    # If the substitution surface ever shifts in protocol-http2 (e.g.
+    # a future version inlines the call), this method becomes a no-op
+    # safely — `define_singleton_method` doesn't fail when the parent
+    # method is absent, but downstream calls would. The codec-boot log
+    # makes the substitution observable, so a regression would surface
+    # quickly via the integration spec.
+    def install_native_hpack(server)
+      adapter = Hyperion::Http2::NativeHpackAdapter.new
+      server.define_singleton_method(:encode_headers) do |headers, buffer = String.new.b|
+        adapter.encode_headers(headers, buffer)
+      end
+      server.define_singleton_method(:decode_headers) do |data|
+        adapter.decode_headers(data)
+      end
+      # Stash the adapter so introspection (and the encode-mutex synchronisation
+      # boundary, since adapter state is mutated under it) can reach it.
+      server.instance_variable_set(:@hyperion_native_hpack, adapter)
+      adapter
+    rescue StandardError => e
+      # Defence in depth: if the adapter ctor fails for any reason, log and
+      # fall back to protocol-http2's Ruby Compressor/Decompressor. Better
+      # than crashing the connection on first HEADERS frame.
+      @logger.warn do
+        { message: 'h2 native hpack install failed; falling back to Ruby HPACK',
+          error: e.class.name, detail: e.message }
+      end
+      nil
+    end
     def dispatch_stream(stream, writer_ctx, peer_addr = nil)
       # RFC 7540 §8.1.2 — header validation flagged this stream as malformed.
       # Send RST_STREAM PROTOCOL_ERROR instead of invoking the app.
@@ -608,6 +1080,25 @@ module Hyperion
         return
       end
+      # RFC A7: process-wide stream admission control. nil admission =
+      # unbounded (current behaviour). When the cap is hit we send
+      # REFUSED_STREAM (RFC 7540 §11 / RFC 9113 §5.4.1) — the spec-
+      # defined response for "this stream cannot be processed; client
+      # may retry on a different stream id". Bumps a counter so
+      # operators can alert on sustained refusal volume.
+      if @h2_admission && !@h2_admission.admit
+        @metrics.increment(:h2_streams_refused)
+        begin
+          writer_ctx.encode_mutex.synchronize do
+            stream.send_reset_stream(::Protocol::HTTP2::Error::REFUSED_STREAM) unless stream.closed?
+          end
+        rescue StandardError
+          nil
+        end
+        return
+      end
+      @h2_admission.nil?
       pseudo, regular = partition_pseudo(stream.request_headers)
       method    = pseudo[':method'] || 'GET'
@@ -630,11 +1121,24 @@ module Hyperion
       @metrics.increment(:requests_total)
       @metrics.increment(:requests_in_flight)
+      # 2.1.0 (WS-1): HTTP/2 hijack is intentionally NOT plumbed here.
+      # Rack 3 hijack over HTTP/2 requires Extended CONNECT (RFC 8441 +
+      # RFC 9220) — a separate feature with its own SETTINGS handshake,
+      # :protocol pseudo-header, and stream lifetime semantics. The
+      # 2.1.0 scope is HTTP/1.1 hijack only (env['rack.hijack?'] returns
+      # false on h2 streams because we don't pass `connection:` here).
+      # If a Rack app keys on rack.hijack? to choose a transport, the h2
+      # branch will fall through to its non-hijack path. See WS-2..WS-5
+      # for the full WebSocket roadmap.
       status, response_headers, body_chunks = begin
         if @thread_pool
           @thread_pool.call(@app, request)
         else
-          Hyperion::Adapter::Rack.call(@app, request)
+          # 2.5-C — pass the handler's Runtime so per-request hooks
+          # fire on h2 streams too. Multi-tenant deployments rely on
+          # this to keep tracing context per-server even on the h2
+          # path that doesn't go through Connection#call_app.
+          Hyperion::Adapter::Rack.call(@app, request, runtime: @runtime)
         end
       ensure
         @metrics.decrement(:requests_in_flight)
@@ -655,8 +1159,27 @@ module Hyperion
       body_chunks.each { |c| payload << c.to_s }
       body_chunks.close if body_chunks.respond_to?(:close)
-      writer_ctx.encode_mutex.synchronize { stream.send_headers(out_headers) }
-      send_body(stream, payload, writer_ctx)
+      # Hotfix C2: empty-body responses (RFC 7230 §3.3.3 — 204/304 + HEAD)
+      # MUST NOT carry a DATA frame. Folding END_STREAM onto the HEADERS
+      # frame collapses the response to one encoder-mutex acquisition and
+      # one writer-fiber wakeup instead of two. Any body the app returned
+      # for HEAD is discarded here per spec (the bytes were already
+      # built — that's a Rack-app smell, not our problem to fix).
+      if body_suppressed?(method, status)
+        writer_ctx.encode_mutex.synchronize do
+          stream.send_headers(out_headers, ::Protocol::HTTP2::END_STREAM)
+        end
+      else
+        writer_ctx.encode_mutex.synchronize { stream.send_headers(out_headers) }
+        send_body(stream, payload, writer_ctx)
+      end
+      # 2.10-G — first stream's HEADERS+DATA encoded. Capture exactly once
+      # per connection (use ||= under the encode mutex's freshly-released
+      # write so concurrent stream fibers race lose-race once). For h2load
+      # `-c 1 -m 100 -n 5000` the first stream is stream id 1, the only
+      # one that pays the connection-setup cost; later streams skip this
+      # branch via the `||=`.
+      writer_ctx.t2_first_encode = monotonic_now if @h2_timing_enabled && writer_ctx.t2_first_encode.nil?
       @metrics.increment_status(status)
     rescue StandardError => e
       @metrics.increment(:app_errors)
@@ -675,6 +1198,26 @@ module Hyperion
       rescue StandardError
         nil
       end
+    ensure
+      # Release the admission slot once the stream's served (success or
+      # error). h2_admitted is local-set above the slot acquisition, so
+      # the protocol-error / pre-admission early-returns above don't
+      # double-release.
+      @h2_admission.release if defined?(h2_admitted) && h2_admitted
+    end
+    # RFC 7230 §3.3.3: status codes that prohibit a response body, plus
+    # the HEAD method which always suppresses the body regardless of what
+    # the application returned. The h2 dispatch path uses this to fold
+    # END_STREAM onto the HEADERS frame and skip the DATA-frame write
+    # entirely (see Hotfix C2).
+    BODY_SUPPRESSED_STATUSES = [204, 304].freeze
+    def body_suppressed?(method, status)
+      return true if BODY_SUPPRESSED_STATUSES.include?(status)
+      return true if method == 'HEAD'
+      false
     end
     # Send the response body, respecting the peer's max frame size and
@@ -731,6 +1274,13 @@ module Hyperion
         while (chunk = writer_ctx.try_pop)
           begin
             socket.write(chunk)
+            # 2.10-G — first byte on the wire. Capture exactly once per
+            # connection (the first chunk drained is the server's
+            # connection-preface SETTINGS frame; we want the t1→t2_wire
+            # delta to bracket "preface bytes encoded → preface bytes on
+            # the socket". The expensive HEADERS+DATA enqueue happens
+            # later under t2_first_encode.)
+            writer_ctx.t2_first_wire = monotonic_now if @h2_timing_enabled && writer_ctx.t2_first_wire.nil?
           rescue EOFError, Errno::ECONNRESET, Errno::EPIPE, IOError, OpenSSL::SSL::SSLError
             # Peer hung up. Release THIS chunk's byte budget, then drain the
             # rest of the queue (without writing) so backpressured encoders
@@ -775,6 +1325,55 @@ module Hyperion
       end
     end
+    # 2.10-G — small helper so the four timing call sites in `serve`,
+    # `dispatch_stream`, and `run_writer_loop` agree on the clock source.
+    # CLOCK_MONOTONIC is unaffected by NTP jumps and is what the rest of
+    # the gem uses for elapsed-time math (see Connection#serve).
+    def monotonic_now
+      Process.clock_gettime(Process::CLOCK_MONOTONIC)
+    end
+    # 2.10-G — assemble + emit the per-connection timing breakdown that
+    # the bench harness greps for. Three deltas are reported in
+    # milliseconds:
+    #
+    #   t0_to_t1_ms     — preface exchange (read client preface + write
+    #                     server SETTINGS into the framer queue)
+    #   t1_to_t2_enc_ms — gap between preface complete and first stream's
+    #                     HEADERS+DATA encoded. If this is the dominant
+    #                     bucket, the framer-fiber priming / first-stream
+    #                     scheduling is the suspect.
+    #   t2_enc_to_t2_wire_ms — encode-complete to writer drained first
+    #                          chunk on the wire. Should be near-zero on
+    #                          a healthy connection (writer fiber is
+    #                          already running, parked on @send_notify).
+    #                          A large value here = writer-fiber
+    #                          starvation under the Async scheduler.
+    #
+    # Skipped when any timestamp is missing (connection died before
+    # serving a stream / instrumentation was disabled mid-flight).
+    def log_h2_first_stream_timing(writer_ctx)
+      t0 = writer_ctx.t0_serve_entry
+      t1 = writer_ctx.t1_preface_done
+      t2_enc  = writer_ctx.t2_first_encode
+      t2_wire = writer_ctx.t2_first_wire
+      return if t0.nil? || t1.nil? || t2_enc.nil? || t2_wire.nil?
+      @logger.info do
+        {
+          message: 'h2 first-stream timing',
+          t0_to_t1_ms: ((t1 - t0) * 1000).round(3),
+          t1_to_t2_enc_ms: ((t2_enc - t1) * 1000).round(3),
+          t2_enc_to_t2_wire_ms: ((t2_wire - t2_enc) * 1000).round(3),
+          t0_to_t2_wire_ms: ((t2_wire - t0) * 1000).round(3)
+        }
+      end
+    rescue StandardError
+      # Logging the timing breakdown must never crash the connection
+      # teardown path — instrumentation is best-effort.
+      nil
+    end
     # Mirrors Connection#peer_address — see the comment there. SSLSocket
     # wraps a TCPSocket; both expose #peeraddr after handshake.
     def peer_address(socket)