hyperion-rb 1.6.2 → 2.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4768 -0
  3. data/README.md +222 -13
  4. data/ext/hyperion_h2_codec/Cargo.lock +7 -0
  5. data/ext/hyperion_h2_codec/Cargo.toml +33 -0
  6. data/ext/hyperion_h2_codec/extconf.rb +73 -0
  7. data/ext/hyperion_h2_codec/src/frames.rs +140 -0
  8. data/ext/hyperion_h2_codec/src/hpack/huffman.rs +161 -0
  9. data/ext/hyperion_h2_codec/src/hpack.rs +457 -0
  10. data/ext/hyperion_h2_codec/src/lib.rs +296 -0
  11. data/ext/hyperion_http/extconf.rb +28 -0
  12. data/ext/hyperion_http/h2_codec_glue.c +408 -0
  13. data/ext/hyperion_http/page_cache.c +1125 -0
  14. data/ext/hyperion_http/parser.c +473 -38
  15. data/ext/hyperion_http/sendfile.c +982 -0
  16. data/ext/hyperion_http/websocket.c +493 -0
  17. data/ext/hyperion_io_uring/Cargo.lock +33 -0
  18. data/ext/hyperion_io_uring/Cargo.toml +34 -0
  19. data/ext/hyperion_io_uring/extconf.rb +74 -0
  20. data/ext/hyperion_io_uring/src/lib.rs +316 -0
  21. data/lib/hyperion/adapter/rack.rb +370 -42
  22. data/lib/hyperion/admin_listener.rb +207 -0
  23. data/lib/hyperion/admin_middleware.rb +36 -7
  24. data/lib/hyperion/cli.rb +310 -11
  25. data/lib/hyperion/config.rb +440 -14
  26. data/lib/hyperion/connection.rb +679 -22
  27. data/lib/hyperion/deprecations.rb +81 -0
  28. data/lib/hyperion/dispatch_mode.rb +165 -0
  29. data/lib/hyperion/fiber_local.rb +75 -13
  30. data/lib/hyperion/h2_admission.rb +77 -0
  31. data/lib/hyperion/h2_codec.rb +499 -0
  32. data/lib/hyperion/http/page_cache.rb +122 -0
  33. data/lib/hyperion/http/sendfile.rb +696 -0
  34. data/lib/hyperion/http2/native_hpack_adapter.rb +70 -0
  35. data/lib/hyperion/http2_handler.rb +618 -19
  36. data/lib/hyperion/io_uring.rb +317 -0
  37. data/lib/hyperion/lint_wrapper_pool.rb +126 -0
  38. data/lib/hyperion/master.rb +96 -9
  39. data/lib/hyperion/metrics/path_templater.rb +68 -0
  40. data/lib/hyperion/metrics.rb +256 -0
  41. data/lib/hyperion/prometheus_exporter.rb +150 -0
  42. data/lib/hyperion/request.rb +13 -0
  43. data/lib/hyperion/response_writer.rb +477 -16
  44. data/lib/hyperion/runtime.rb +195 -0
  45. data/lib/hyperion/server/route_table.rb +179 -0
  46. data/lib/hyperion/server.rb +519 -55
  47. data/lib/hyperion/static_preload.rb +133 -0
  48. data/lib/hyperion/thread_pool.rb +61 -7
  49. data/lib/hyperion/tls.rb +343 -1
  50. data/lib/hyperion/version.rb +1 -1
  51. data/lib/hyperion/websocket/close_codes.rb +71 -0
  52. data/lib/hyperion/websocket/connection.rb +876 -0
  53. data/lib/hyperion/websocket/frame.rb +356 -0
  54. data/lib/hyperion/websocket/handshake.rb +525 -0
  55. data/lib/hyperion/worker.rb +111 -9
  56. data/lib/hyperion.rb +137 -3
  57. metadata +50 -1
@@ -2,10 +2,13 @@
2
2
 
3
3
  require 'async'
4
4
  require 'async/notification'
5
+ require 'async/queue'
5
6
  require 'protocol/http2/server'
6
7
  require 'protocol/http2/framer'
7
8
  require 'protocol/http2/stream'
8
9
 
10
+ require_relative 'http2/native_hpack_adapter'
11
+
9
12
  module Hyperion
10
13
  # Real HTTP/2 dispatch driven by `protocol-http2`.
11
14
  #
@@ -131,7 +134,12 @@ module Hyperion
131
134
  #
132
135
  # Single instance per connection, lives for the lifetime of `serve`.
133
136
  class WriterContext
134
- attr_reader :encode_mutex
137
+ attr_reader :encode_mutex, :dispatch_queue
138
+ # 2.10-G — connection-lifecycle timing slots used by the optional h2
139
+ # latency-instrumentation path (gated by `HYPERION_H2_TIMING=1`).
140
+ # Each slot is a single CLOCK_MONOTONIC timestamp captured at most
141
+ # once per connection. nil = unset, set on first observation.
142
+ attr_accessor :t0_serve_entry, :t1_preface_done, :t2_first_encode, :t2_first_wire
135
143
 
136
144
  def initialize(max_pending_bytes: MAX_PER_CONN_PENDING_BYTES)
137
145
  @queue = ::Thread::Queue.new
@@ -142,6 +150,46 @@ module Hyperion
142
150
  @pending_bytes_lock = ::Mutex.new
143
151
  @max_pending_bytes = max_pending_bytes
144
152
  @writer_done = false
153
+ # 2.11-A — pre-spawned dispatch worker pool. The connection-loop
154
+ # fiber pushes ready streams onto `@dispatch_queue`; workers
155
+ # parked on `dequeue` grab them and call `dispatch_stream`. The
156
+ # queue is created here (cheap — wraps a Thread::Queue) so the
157
+ # WriterContext is fully self-contained and unit-testable without
158
+ # an Async reactor.
159
+ @dispatch_queue = ::Async::Queue.new
160
+ @dispatch_worker_count = 0
161
+ @dispatch_worker_lock = ::Mutex.new
162
+ # 2.10-G timing slots, all initially nil so capture is a single
163
+ # `||=` write under the encode mutex / writer fiber.
164
+ @t0_serve_entry = nil
165
+ @t1_preface_done = nil
166
+ @t2_first_encode = nil
167
+ @t2_first_wire = nil
168
+ end
169
+
170
+ # 2.11-A — bench/diagnostics introspection. Reads the live count
171
+ # of dispatch worker fibers parked on (or actively pulling from)
172
+ # `@dispatch_queue`. Reflects pre-spawned workers AND any ad-hoc
173
+ # workers spawned when the pool was saturated. Exposed as a method
174
+ # rather than `attr_reader` so the lock guards the counter.
175
+ def dispatch_worker_count
176
+ @dispatch_worker_lock.synchronize { @dispatch_worker_count }
177
+ end
178
+
179
+ # Called by a dispatch worker fiber when it enters its run loop.
180
+ # Pairs with `unregister_dispatch_worker` in an ensure block.
181
+ def register_dispatch_worker
182
+ @dispatch_worker_lock.synchronize { @dispatch_worker_count += 1 }
183
+ end
184
+
185
+ # Called by a dispatch worker fiber when it exits (queue closed,
186
+ # or unrecoverable error). Floors at 0 to defend against a stray
187
+ # double-unregister — instrumentation must never go negative.
188
+ def unregister_dispatch_worker
189
+ @dispatch_worker_lock.synchronize do
190
+ @dispatch_worker_count -= 1
191
+ @dispatch_worker_count = 0 if @dispatch_worker_count.negative?
192
+ end
145
193
  end
146
194
 
147
195
  # Called by SendQueueIO#write on the calling (encoder) fiber. Enforces
@@ -412,12 +460,270 @@ module Hyperion
412
460
  # MAXIMUM_ALLOWED_WINDOW_SIZE).
413
461
  H2_MAX_WINDOW_SIZE = 0x7FFFFFFF
414
462
 
415
- def initialize(app:, thread_pool: nil, h2_settings: nil)
416
- @app = app
417
- @thread_pool = thread_pool
418
- @h2_settings = h2_settings
419
- @metrics = Hyperion.metrics
420
- @logger = Hyperion.logger
463
+ # 1.7.0 added kwargs:
464
+ # * `runtime:` `Hyperion::Runtime` for metrics/logger
465
+ # isolation (default `Runtime.default`).
466
+ # * `h2_admission:` — Optional `Hyperion::H2Admission` for the
467
+ # per-process stream cap (RFC A7). nil keeps
468
+ # the 1.6.x unbounded behaviour.
469
+ #
470
+ # 2.0.0 (Phase 6b) probed `Hyperion::H2Codec.available?` at
471
+ # construction so the handler knew whether the native HPACK path
472
+ # was operational, but the connection state machine still drove
473
+ # encode/decode through `protocol-http2`'s pure-Ruby Compressor /
474
+ # Decompressor.
475
+ #
476
+ # 2.2.0 (Phase 10 / RFC §3 Phase 6c) ships the wiring infrastructure:
477
+ # {Hyperion::Http2::NativeHpackAdapter} + {#install_native_hpack}
478
+ # replace the per-connection HPACK encode/decode boundary with
479
+ # the Rust crate when AND ONLY WHEN both:
480
+ # 1. `Hyperion::H2Codec.available?` is true (cdylib loaded), AND
481
+ # 2. `ENV['HYPERION_H2_NATIVE_HPACK']` is one of `1`/`true`/`yes`/`on`.
482
+ #
483
+ # The default is OFF because local h2load benchmarking on macOS
484
+ # showed the Fiddle FFI per-call marshalling overhead dominates
485
+ # for typical 3–8-header HEADERS frames — the standalone microbench's
486
+ # 3.26× encode win does not translate to wire wins until the FFI
487
+ # marshalling layer is rewritten to amortize allocation. Keeping the
488
+ # default OFF preserves 2.0.0/2.1.0 behavior; flipping the env var
489
+ # gives operators the swap they want to A/B test in their own env.
490
+ # The framer + stream state machine + flow control + HEADERS /
491
+ # CONTINUATION framing all stay in `protocol-http2`; only the
492
+ # HPACK byte-pump is replaced when the swap is enabled. Frame ser/de
493
+ # in Rust (Phase 6d) is a separate, larger lift.
494
+ def initialize(app:, thread_pool: nil, h2_settings: nil, runtime: nil, h2_admission: nil)
495
+ @app = app
496
+ @thread_pool = thread_pool
497
+ @h2_settings = h2_settings
498
+ if runtime
499
+ @runtime = runtime
500
+ @metrics = runtime.metrics
501
+ @logger = runtime.logger
502
+ else
503
+ # 1.6.x compat path — see Connection#initialize for rationale.
504
+ @runtime = Hyperion::Runtime.default
505
+ @metrics = Hyperion.metrics
506
+ @logger = Hyperion.logger
507
+ end
508
+ @h2_admission = h2_admission
509
+ @h2_codec_available = Hyperion::H2Codec.available?
510
+ # 2.5-B [breaking-default-change]: native HPACK now defaults to ON
511
+ # when the Rust crate is available. The 2026-04-30 Rails-shape
512
+ # bench (`bench/h2_rails_shape.ru`, 25 response headers) measured
513
+ # native v3 at 1,418 r/s vs Ruby fallback 1,201 r/s — **+18.0%**
514
+ # on a header-heavy workload, comfortably above the +15% flip
515
+ # threshold. 2.4-A's hello-shape bench saw parity because HPACK
516
+ # is <1% of per-stream CPU on a 2-header response.
517
+ #
518
+ # 2.11-B — `HYPERION_H2_NATIVE_HPACK` extended with a native-mode
519
+ # axis (`auto` / `cglue` / `v2` / `off`). See `resolve_h2_native_hpack_state`.
520
+ # Operators who want the prior 2.4.x default (Ruby fallback, env
521
+ # var unset) can set `HYPERION_H2_NATIVE_HPACK=off` (or
522
+ # `0`/`false`/`no`/`off`/`ruby`). `HYPERION_H2_NATIVE_HPACK=1`
523
+ # / unset preserves the 2.5-B `auto` behavior. `=cglue`/`=v2`
524
+ # forces the corresponding native sub-path.
525
+ #
526
+ # When OFF (env-overridden): `protocol-http2`'s pure-Ruby HPACK
527
+ # Compressor / Decompressor handles everything as in 2.0.0–2.4.x.
528
+ @h2_native_mode = resolve_h2_native_hpack_state
529
+ @h2_native_hpack_enabled = @h2_codec_available && @h2_native_mode != :off
530
+ apply_h2_cglue_gate(@h2_native_mode)
531
+ @h2_codec_native = @h2_native_hpack_enabled # back-compat ivar — preserved for codec_native? readers
532
+ # 2.10-G — opt-in connection-setup timing instrumentation. When set,
533
+ # `serve` captures four monotonic timestamps per connection:
534
+ #
535
+ # t0 — entry to `serve` (post-TLS, post-ALPN — the socket is already
536
+ # the negotiated h2 SSLSocket by the time the handler sees it)
537
+ # t1 — `read_connection_preface` returned (server-side SETTINGS
538
+ # encoded + handed to the framer; client preface fully read)
539
+ # t2_encode — first stream's HEADERS frame finished encoding (bytes
540
+ # sit in the writer queue)
541
+ # t2_wire — writer fiber finished its first `socket.write` (bytes
542
+ # on the wire)
543
+ #
544
+ # When the connection's first response completes, the handler emits
545
+ # a single `'h2 first-stream timing'` info line with t0→t1, t1→t2_encode,
546
+ # t2_encode→t2_wire deltas in milliseconds. Off by default (zero hot-path
547
+ # cost when disabled — a single ivar read per stream branch). Used by
548
+ # 2.10-G to root-cause Hyperion's flat ~40 ms first-stream max-latency.
549
+ @h2_timing_enabled = env_flag_enabled?('HYPERION_H2_TIMING')
550
+ # 2.11-A — resolve the dispatch worker pool size once at handler
551
+ # construction so every `serve` call uses the same value (instead
552
+ # of re-parsing ENV per connection on the hot path). Cached as an
553
+ # ivar; bench/diagnostics can read it via the spec seam.
554
+ @dispatch_pool_size = resolve_dispatch_pool_size
555
+ record_codec_boot_state
556
+ end
557
+
558
+ # 2.11-A — pre-spawned dispatch worker pool sizing.
559
+ #
560
+ # Default `4` workers per connection — enough to absorb the typical
561
+ # HTTP/2 burst (2-8 concurrent streams) without paying any per-stream
562
+ # `task.async {}` cost on the hot path. Operators on long-lived
563
+ # high-fan-out connections (e.g. an aggregator backend that fans
564
+ # 30+ parallel streams) can bump this with `HYPERION_H2_DISPATCH_POOL`.
565
+ # Streams that arrive when the pool is saturated still get an ad-hoc
566
+ # fiber (see `serve` below) so concurrency is never artificially
567
+ # capped — the operator-facing limit is `h2.max_concurrent_streams`.
568
+ #
569
+ # Ceiling at 16 guards against a pathological config that would
570
+ # spawn hundreds of idle fibers per accepted connection. Anything
571
+ # malformed / non-positive falls back to the default rather than
572
+ # crashing the connection — this is a tuning knob, not a spec
573
+ # parameter.
574
+ DISPATCH_POOL_DEFAULT = 4
575
+ DISPATCH_POOL_MAX = 16
576
+
577
+ def resolve_dispatch_pool_size
578
+ raw = ENV['HYPERION_H2_DISPATCH_POOL']
579
+ return DISPATCH_POOL_DEFAULT if raw.nil? || raw.strip.empty?
580
+
581
+ n = Integer(raw.strip, 10)
582
+ return DISPATCH_POOL_DEFAULT unless n.positive?
583
+
584
+ [n, DISPATCH_POOL_MAX].min
585
+ rescue ArgumentError, TypeError
586
+ DISPATCH_POOL_DEFAULT
587
+ end
588
+
589
+ # Read an env-var flag with the usual truthiness rules (any of
590
+ # 1/true/yes/on, case-insensitive). Anything else → false.
591
+ def env_flag_enabled?(name)
592
+ v = ENV[name]
593
+ return false if v.nil? || v.empty?
594
+
595
+ %w[1 true yes on].include?(v.downcase)
596
+ end
597
+
598
+ # 2.11-B — resolve the operator-requested native-mode state from
599
+ # `HYPERION_H2_NATIVE_HPACK`.
600
+ #
601
+ # Returns one of:
602
+ # * `:auto` — native enabled, prefer cglue if available
603
+ # (unset / `1` / `true` / `yes` / `on` / `auto`)
604
+ # * `:cglue` — native enabled, force cglue (warn-fallback to v2
605
+ # if cglue is unavailable; native_mode log marker
606
+ # surfaces the divergence to the operator)
607
+ # * `:v2` — native enabled, force Fiddle (skip cglue even if
608
+ # available; this is the bench-isolation knob the
609
+ # 2.11-B Rails-shape harness needs)
610
+ # * `:off` — ruby fallback (`0` / `false` / `no` / `off` / `ruby`)
611
+ #
612
+ # Unknown values fall through to `:auto` rather than crashing the
613
+ # connection — same forgiving-default policy as the pre-2.11-B
614
+ # `resolve_h2_native_hpack_default`.
615
+ def resolve_h2_native_hpack_state
616
+ v = ENV['HYPERION_H2_NATIVE_HPACK']
617
+ return :auto if v.nil? || v.empty?
618
+
619
+ lc = v.downcase
620
+ return :off if %w[0 false no off ruby].include?(lc)
621
+ return :cglue if %w[cglue v3].include?(lc)
622
+ return :v2 if %w[v2 fiddle].include?(lc)
623
+
624
+ :auto
625
+ end
626
+
627
+ # 2.11-B — flip the global `H2Codec.cglue_disabled` gate based on
628
+ # the resolved native-mode state. The gate is per-process state
629
+ # (the codec module is a singleton) so reset it on every handler
630
+ # construction; otherwise a test that booted with `=v2` would leak
631
+ # the disable into a subsequent default-mode handler.
632
+ def apply_h2_cglue_gate(state)
633
+ Hyperion::H2Codec.cglue_disabled = (state == :v2)
634
+ end
635
+
636
+ # 2.0.0 Phase 6b: emit a single-shot boot log line per process
637
+ # describing the codec selection. Operators reading the boot log
638
+ # see whether the native HPACK path is in play. Idempotent across
639
+ # multiple Http2Handler constructions in the same process.
640
+ def record_codec_boot_state
641
+ return if Hyperion::Http2Handler.instance_variable_get(:@codec_state_logged)
642
+
643
+ Hyperion::Http2Handler.instance_variable_set(:@codec_state_logged, true)
644
+ # 2.11-B — `cglue_active` gates on the operator-controllable
645
+ # `cglue_active?` predicate (was `cglue_available?` pre-2.11-B).
646
+ # When the operator sets `=v2` we want the boot log to read
647
+ # `cglue_active: false` even though the C glue did install
648
+ # successfully — the bench harness inspects this field to
649
+ # differentiate the variants.
650
+ cglue_active = @h2_native_hpack_enabled && Hyperion::H2Codec.cglue_active?
651
+ cglue_requested_unavailable = @h2_native_mode == :cglue &&
652
+ @h2_native_hpack_enabled &&
653
+ !Hyperion::H2Codec.cglue_available?
654
+ mode = describe_codec_mode(cglue_active: cglue_active,
655
+ cglue_requested_unavailable: cglue_requested_unavailable)
656
+ native_mode_log = if !@h2_native_hpack_enabled
657
+ @h2_native_mode == :off ? 'off' : 'native-disabled'
658
+ elsif cglue_requested_unavailable
659
+ 'cglue-requested-unavailable'
660
+ else
661
+ @h2_native_mode.to_s
662
+ end
663
+ @logger.info do
664
+ {
665
+ message: 'h2 codec selected',
666
+ mode: mode,
667
+ native_available: @h2_codec_available,
668
+ native_enabled: @h2_native_hpack_enabled,
669
+ native_mode: native_mode_log,
670
+ cglue_active: cglue_active,
671
+ hpack_path: if @h2_native_hpack_enabled
672
+ cglue_active ? 'native-v3' : 'native-v2'
673
+ else
674
+ 'pure-ruby'
675
+ end
676
+ }
677
+ end
678
+ @metrics.increment(:h2_codec_native_selected) if @h2_native_hpack_enabled
679
+ @metrics.increment(:h2_codec_fallback_selected) unless @h2_native_hpack_enabled
680
+ end
681
+
682
+ # 2.11-B — boot-log mode descriptor (extracted for clarity since
683
+ # the matrix of native_mode × cglue_available × cglue_active grew
684
+ # past the point where an inline conditional was readable).
685
+ def describe_codec_mode(cglue_active:, cglue_requested_unavailable:)
686
+ if !@h2_native_hpack_enabled
687
+ if @h2_codec_available
688
+ 'fallback (protocol-http2 / pure Ruby HPACK) — native available but opted out via HYPERION_H2_NATIVE_HPACK=off'
689
+ else
690
+ 'fallback (protocol-http2 / pure Ruby HPACK) — native unavailable'
691
+ end
692
+ elsif cglue_active && @h2_native_mode == :cglue
693
+ 'native (Rust v3 / CGlue, forced) — HPACK on hot path, no Fiddle per call'
694
+ elsif cglue_active
695
+ # 2.11-B confirmed cglue as the firm default — the bench-measured
696
+ # delta vs the v2 (Fiddle) path is +33-43% on Rails-shape h2
697
+ # responses, which is the actual win the 2.5-B "+18% native vs
698
+ # ruby" headline was capturing (v2 alone is +1-5%, basically
699
+ # noise vs the ruby fallback at this header count).
700
+ 'native (Rust v3 / CGlue, default since 2.11-B) — HPACK on hot path, no Fiddle per call'
701
+ elsif @h2_native_mode == :v2
702
+ 'native (Rust v2 / Fiddle, forced) — HPACK on hot path, Fiddle marshalling per call'
703
+ elsif cglue_requested_unavailable
704
+ 'native (Rust v2 / Fiddle) — CGlue requested via HYPERION_H2_NATIVE_HPACK=cglue but unavailable, fell back'
705
+ else
706
+ 'native (Rust v2 / Fiddle) — HPACK on hot path, Fiddle marshalling per call'
707
+ end
708
+ end
709
+
710
+ # Read-only accessor used by tests + diagnostics. true = the
711
+ # `Hyperion::H2Codec` Rust extension loaded successfully AND
712
+ # `HYPERION_H2_NATIVE_HPACK=1` is set, so `build_server` will
713
+ # wire the native adapter onto every new connection's
714
+ # `encode_headers` / `decode_headers` boundary. The 2.2.0 default
715
+ # is false (opt-in) — see `#initialize` for the rationale and the
716
+ # bench numbers in CHANGELOG/docs that pinned the default off.
717
+ def codec_native?
718
+ @h2_native_hpack_enabled
719
+ end
720
+
721
+ # True when the Rust crate loaded successfully, regardless of
722
+ # whether the operator opted in to wiring it into the wire path.
723
+ # Useful for diagnostics/health endpoints that want to surface
724
+ # "native is available but currently disabled".
725
+ def codec_available?
726
+ @h2_codec_available
421
727
  end
422
728
 
423
729
  def serve(socket)
@@ -431,8 +737,21 @@ module Hyperion
431
737
  framer = ::Protocol::HTTP2::Framer.new(send_io)
432
738
  server = build_server(framer)
433
739
 
740
+ # 2.10-G — connection entry timestamp. Captured before any framing
741
+ # work so the t0→t1 delta isolates "preface exchange + initial
742
+ # SETTINGS round-trip" from any pre-handler scheduling delay.
743
+ writer_ctx.t0_serve_entry = monotonic_now if @h2_timing_enabled
744
+
434
745
  task = ::Async::Task.current
435
746
 
747
+ # 2.11-A — extract the peer address BEFORE the preface exchange.
748
+ # Two wins: (1) the lookup runs in parallel with the writer fiber
749
+ # picking up the first scheduler slot, and (2) the first stream's
750
+ # dispatch fiber doesn't pay this `peeraddr` syscall on its hot
751
+ # path. The address is then captured by the worker closures
752
+ # below.
753
+ peer_addr = peer_address(socket)
754
+
436
755
  # Spawn the dedicated writer fiber BEFORE the preface exchange.
437
756
  # `Server#read_connection_preface` writes the server's SETTINGS frame
438
757
  # via the framer; if the writer isn't running, those bytes sit in the
@@ -441,14 +760,23 @@ module Hyperion
441
760
  # waits for our SETTINGS before sending more frames.
442
761
  writer_task = task.async { run_writer_loop(socket, writer_ctx) }
443
762
 
444
- server.read_connection_preface(initial_settings_payload)
763
+ # 2.11-A — pre-spawn the dispatch worker pool BEFORE the preface
764
+ # exchange. Workers park on `writer_ctx.dispatch_queue.dequeue`;
765
+ # by the time the first client HEADERS frame arrives the workers
766
+ # are already in the scheduler's runnable set. The first stream
767
+ # is just an enqueue + dequeue (microseconds) instead of a
768
+ # `task.async {}` cold spawn (was the dominant cost in the t1→t2_enc
769
+ # bucket per the 2.10-G timing breakdown).
770
+ warmup_dispatch_pool!(task, writer_ctx, peer_addr: peer_addr,
771
+ pool_size: @dispatch_pool_size)
445
772
 
446
- # Extract once — the same TCP peer drives every stream on this conn.
447
- peer_addr = peer_address(socket)
773
+ server.read_connection_preface(initial_settings_payload)
774
+ writer_ctx.t1_preface_done = monotonic_now if @h2_timing_enabled
448
775
 
449
- # Track in-flight per-stream dispatch fibers so we can drain them on
450
- # connection close.
451
- stream_tasks = []
776
+ # Track ad-hoc per-stream dispatch fibers (spilled when the pool is
777
+ # saturated). The pool handles the common case; we only fall back
778
+ # to `task.async {}` when more streams arrive than warm workers.
779
+ overflow_tasks = []
452
780
 
453
781
  until server.closed?
454
782
  ready_ids = []
@@ -467,14 +795,35 @@ module Hyperion
467
795
  # if subsequent frames (e.g. RST_STREAM races) arrive.
468
796
  stream.instance_variable_set(:@hyperion_dispatched, true)
469
797
 
470
- stream_tasks << task.async do
471
- dispatch_stream(stream, writer_ctx, peer_addr)
798
+ # 2.11-A — hand the stream to a warm worker via the dispatch
799
+ # queue. We use a simple "queue is empty" probe to decide:
800
+ #
801
+ # * Empty queue ⇒ at least one worker is parked on
802
+ # `dequeue`; the enqueue+dequeue handoff is microseconds
803
+ # and we avoid a `task.async {}` cold spawn. This is the
804
+ # hot path for the FIRST stream of a fresh connection
805
+ # (the case 2.11-A is targeting).
806
+ # * Non-empty queue ⇒ every parked worker has already
807
+ # pulled a stream; another worker won't pick this up
808
+ # until one finishes. To avoid head-of-line blocking
809
+ # behind the warmup pool, fall back to `task.async {}`.
810
+ # The overflow fiber re-uses `dispatch_stream` so the
811
+ # dispatch contract is identical between pool and
812
+ # overflow paths. Concurrency is never artificially
813
+ # capped; the operator-facing knob is
814
+ # `h2.max_concurrent_streams`.
815
+ if writer_ctx.dispatch_queue.size.zero?
816
+ writer_ctx.dispatch_queue.enqueue(stream)
817
+ else
818
+ overflow_tasks << task.async do
819
+ dispatch_stream(stream, writer_ctx, peer_addr)
820
+ end
472
821
  end
473
822
  end
474
823
  end
475
824
 
476
825
  # Drain in-flight stream dispatches before we close the socket.
477
- stream_tasks.each do |t|
826
+ overflow_tasks.each do |t|
478
827
  t.wait
479
828
  rescue StandardError
480
829
  nil
@@ -498,12 +847,30 @@ module Hyperion
498
847
  # socket before the writer drains would discard final RST_STREAM /
499
848
  # GOAWAY / END_STREAM frames in the queue.
500
849
  if writer_ctx
850
+ # 2.11-A — close the dispatch queue so any pre-spawned workers
851
+ # parked on `dequeue` fall through (Async::Queue#dequeue returns
852
+ # nil after close). Do this BEFORE waiting on the writer so
853
+ # pool workers can drain their in-flight stream dispatches and
854
+ # release the encode mutex; otherwise the writer might park
855
+ # waiting for bytes that the dispatch worker never gets to
856
+ # encode.
857
+ begin
858
+ writer_ctx.dispatch_queue.close unless writer_ctx.dispatch_queue.closed?
859
+ rescue StandardError
860
+ nil
861
+ end
501
862
  writer_ctx.shutdown!
502
863
  begin
503
864
  writer_task&.wait
504
865
  rescue StandardError
505
866
  nil
506
867
  end
868
+ # 2.10-G — emit one info-level timing line per connection when the
869
+ # opt-in instrumentation is enabled and we collected a full set of
870
+ # samples (a connection that died before serving any stream lacks
871
+ # t2_first_encode / t2_first_wire and gets skipped — there's no
872
+ # first-stream signal to report).
873
+ log_h2_first_stream_timing(writer_ctx) if @h2_timing_enabled
507
874
  end
508
875
  @metrics.decrement(:connections_active)
509
876
  socket.close unless socket.closed?
@@ -511,6 +878,63 @@ module Hyperion
511
878
 
512
879
  private
513
880
 
881
+ # 2.11-A — pre-spawn the per-connection dispatch worker pool.
882
+ #
883
+ # Each worker is a fiber that loops:
884
+ # 1. `dequeue` a stream from the per-connection dispatch queue
885
+ # (parks the fiber on the queue's internal notification when
886
+ # empty — zero CPU until a stream arrives).
887
+ # 2. Calls `dispatch_stream` with the stream + writer context +
888
+ # pre-resolved peer address.
889
+ # 3. Loops back to (1). Exits cleanly when `dequeue` returns nil
890
+ # (queue closed by `serve`'s ensure block on connection
891
+ # teardown).
892
+ #
893
+ # Why pre-spawn rather than `task.async {}` per stream:
894
+ # * Fiber startup under Async involves a few µs of allocation and
895
+ # scheduler bookkeeping. Per-stream that's negligible; on the
896
+ # CONNECTION COLD PATH (first request on a fresh TCP/TLS conn)
897
+ # it adds up to a measurable share of the t1→t2_enc bucket
898
+ # (the 2.10-G timing breakdown showed ~12-25 ms on h2load
899
+ # `-c 1 -m 100 -n 5000`).
900
+ # * Workers parked on `dequeue` are already in the scheduler's
901
+ # ready set; the first stream is just an enqueue + dequeue
902
+ # handoff (microseconds).
903
+ #
904
+ # Errors inside `dispatch_stream` are already caught + RST_STREAMed
905
+ # there, so the worker only needs to defend against truly
906
+ # unexpected failures (queue shutdown races, fiber kill on graceful
907
+ # shutdown). We swallow those defensively and unregister so the
908
+ # `dispatch_worker_count` introspection is truthful.
909
+ def warmup_dispatch_pool!(task, writer_ctx, peer_addr:, pool_size:)
910
+ pool_size.times do
911
+ task.async do
912
+ writer_ctx.register_dispatch_worker
913
+ begin
914
+ loop do
915
+ stream = writer_ctx.dispatch_queue.dequeue
916
+ break if stream.nil? # queue closed → graceful exit
917
+
918
+ begin
919
+ dispatch_stream(stream, writer_ctx, peer_addr)
920
+ rescue StandardError => e
921
+ # `dispatch_stream` already logs + RST_STREAMs internally;
922
+ # if anything escapes that net we log here and keep the
923
+ # worker alive — one bad stream must not poison the
924
+ # connection's worker pool.
925
+ @logger.error do
926
+ { message: 'h2 dispatch worker swallowed error',
927
+ error: e.message, error_class: e.class.name }
928
+ end
929
+ end
930
+ end
931
+ ensure
932
+ writer_ctx.unregister_dispatch_worker
933
+ end
934
+ end
935
+ end
936
+ end
937
+
514
938
  # Build the [setting_id, value] pairs that go in the connection-preface
515
939
  # SETTINGS frame. protocol-http2's Server#read_connection_preface accepts
516
940
  # this array and does the wire encoding for us. Empty array (no overrides
@@ -576,6 +1000,7 @@ module Hyperion
576
1000
 
577
1001
  def build_server(framer)
578
1002
  server = ::Protocol::HTTP2::Server.new(framer)
1003
+ install_native_hpack(server) if @h2_native_hpack_enabled
579
1004
  server.define_singleton_method(:accept_stream) do |stream_id, &block|
580
1005
  unless valid_remote_stream_id?(stream_id)
581
1006
  raise ::Protocol::HTTP2::ProtocolError, "Invalid stream id: #{stream_id}"
@@ -590,6 +1015,53 @@ module Hyperion
590
1015
  server
591
1016
  end
592
1017
 
1018
+ # Phase 10 (Phase 6c): swap the per-connection HPACK encode/decode
1019
+ # entry points to route through the Rust crate. We replace
1020
+ # `encode_headers` / `decode_headers` on the `Protocol::HTTP2::Server`
1021
+ # instance via singleton methods — protocol-http2's framer + stream
1022
+ # state machine call `connection.encode_headers(headers, buffer)` and
1023
+ # `connection.decode_headers(data)` whenever HEADERS / CONTINUATION
1024
+ # frames cross the wire, so this is exactly the boundary where the
1025
+ # native codec slots in. The adapter holds one Encoder + one Decoder
1026
+ # for this connection; their dynamic tables persist across all
1027
+ # HEADERS frames in their respective directions, matching RFC 7541's
1028
+ # per-direction HPACK context model.
1029
+ #
1030
+ # The Ruby `@encoder` / `@decoder` Context ivars on the
1031
+ # `Protocol::HTTP2::Connection` superclass remain in place but are
1032
+ # never consulted — the singleton-method overrides shortcut past
1033
+ # them. That's safe: protocol-http2 only touches those Contexts
1034
+ # through `encode_headers` / `decode_headers`, which we now own.
1035
+ #
1036
+ # If the substitution surface ever shifts in protocol-http2 (e.g.
1037
+ # a future version inlines the call), this method becomes a no-op
1038
+ # safely — `define_singleton_method` doesn't fail when the parent
1039
+ # method is absent, but downstream calls would. The codec-boot log
1040
+ # makes the substitution observable, so a regression would surface
1041
+ # quickly via the integration spec.
1042
+ def install_native_hpack(server)
1043
+ adapter = Hyperion::Http2::NativeHpackAdapter.new
1044
+ server.define_singleton_method(:encode_headers) do |headers, buffer = String.new.b|
1045
+ adapter.encode_headers(headers, buffer)
1046
+ end
1047
+ server.define_singleton_method(:decode_headers) do |data|
1048
+ adapter.decode_headers(data)
1049
+ end
1050
+ # Stash the adapter so introspection (and the encode-mutex synchronisation
1051
+ # boundary, since adapter state is mutated under it) can reach it.
1052
+ server.instance_variable_set(:@hyperion_native_hpack, adapter)
1053
+ adapter
1054
+ rescue StandardError => e
1055
+ # Defence in depth: if the adapter ctor fails for any reason, log and
1056
+ # fall back to protocol-http2's Ruby Compressor/Decompressor. Better
1057
+ # than crashing the connection on first HEADERS frame.
1058
+ @logger.warn do
1059
+ { message: 'h2 native hpack install failed; falling back to Ruby HPACK',
1060
+ error: e.class.name, detail: e.message }
1061
+ end
1062
+ nil
1063
+ end
1064
+
593
1065
  def dispatch_stream(stream, writer_ctx, peer_addr = nil)
594
1066
  # RFC 7540 §8.1.2 — header validation flagged this stream as malformed.
595
1067
  # Send RST_STREAM PROTOCOL_ERROR instead of invoking the app.
@@ -608,6 +1080,25 @@ module Hyperion
608
1080
  return
609
1081
  end
610
1082
 
1083
+ # RFC A7: process-wide stream admission control. nil admission =
1084
+ # unbounded (current behaviour). When the cap is hit we send
1085
+ # REFUSED_STREAM (RFC 7540 §11 / RFC 9113 §5.4.1) — the spec-
1086
+ # defined response for "this stream cannot be processed; client
1087
+ # may retry on a different stream id". Bumps a counter so
1088
+ # operators can alert on sustained refusal volume.
1089
+ if @h2_admission && !@h2_admission.admit
1090
+ @metrics.increment(:h2_streams_refused)
1091
+ begin
1092
+ writer_ctx.encode_mutex.synchronize do
1093
+ stream.send_reset_stream(::Protocol::HTTP2::Error::REFUSED_STREAM) unless stream.closed?
1094
+ end
1095
+ rescue StandardError
1096
+ nil
1097
+ end
1098
+ return
1099
+ end
1100
+ @h2_admission.nil?
1101
+
611
1102
  pseudo, regular = partition_pseudo(stream.request_headers)
612
1103
 
613
1104
  method = pseudo[':method'] || 'GET'
@@ -630,11 +1121,24 @@ module Hyperion
630
1121
 
631
1122
  @metrics.increment(:requests_total)
632
1123
  @metrics.increment(:requests_in_flight)
1124
+ # 2.1.0 (WS-1): HTTP/2 hijack is intentionally NOT plumbed here.
1125
+ # Rack 3 hijack over HTTP/2 requires Extended CONNECT (RFC 8441 +
1126
+ # RFC 9220) — a separate feature with its own SETTINGS handshake,
1127
+ # :protocol pseudo-header, and stream lifetime semantics. The
1128
+ # 2.1.0 scope is HTTP/1.1 hijack only (env['rack.hijack?'] returns
1129
+ # false on h2 streams because we don't pass `connection:` here).
1130
+ # If a Rack app keys on rack.hijack? to choose a transport, the h2
1131
+ # branch will fall through to its non-hijack path. See WS-2..WS-5
1132
+ # for the full WebSocket roadmap.
633
1133
  status, response_headers, body_chunks = begin
634
1134
  if @thread_pool
635
1135
  @thread_pool.call(@app, request)
636
1136
  else
637
- Hyperion::Adapter::Rack.call(@app, request)
1137
+ # 2.5-C — pass the handler's Runtime so per-request hooks
1138
+ # fire on h2 streams too. Multi-tenant deployments rely on
1139
+ # this to keep tracing context per-server even on the h2
1140
+ # path that doesn't go through Connection#call_app.
1141
+ Hyperion::Adapter::Rack.call(@app, request, runtime: @runtime)
638
1142
  end
639
1143
  ensure
640
1144
  @metrics.decrement(:requests_in_flight)
@@ -655,8 +1159,27 @@ module Hyperion
655
1159
  body_chunks.each { |c| payload << c.to_s }
656
1160
  body_chunks.close if body_chunks.respond_to?(:close)
657
1161
 
658
- writer_ctx.encode_mutex.synchronize { stream.send_headers(out_headers) }
659
- send_body(stream, payload, writer_ctx)
1162
+ # Hotfix C2: empty-body responses (RFC 7230 §3.3.3 204/304 + HEAD)
1163
+ # MUST NOT carry a DATA frame. Folding END_STREAM onto the HEADERS
1164
+ # frame collapses the response to one encoder-mutex acquisition and
1165
+ # one writer-fiber wakeup instead of two. Any body the app returned
1166
+ # for HEAD is discarded here per spec (the bytes were already
1167
+ # built — that's a Rack-app smell, not our problem to fix).
1168
+ if body_suppressed?(method, status)
1169
+ writer_ctx.encode_mutex.synchronize do
1170
+ stream.send_headers(out_headers, ::Protocol::HTTP2::END_STREAM)
1171
+ end
1172
+ else
1173
+ writer_ctx.encode_mutex.synchronize { stream.send_headers(out_headers) }
1174
+ send_body(stream, payload, writer_ctx)
1175
+ end
1176
+ # 2.10-G — first stream's HEADERS+DATA encoded. Capture exactly once
1177
+ # per connection (use ||= under the encode mutex's freshly-released
1178
+ # write so concurrent stream fibers race lose-race once). For h2load
1179
+ # `-c 1 -m 100 -n 5000` the first stream is stream id 1, the only
1180
+ # one that pays the connection-setup cost; later streams skip this
1181
+ # branch via the `||=`.
1182
+ writer_ctx.t2_first_encode = monotonic_now if @h2_timing_enabled && writer_ctx.t2_first_encode.nil?
660
1183
  @metrics.increment_status(status)
661
1184
  rescue StandardError => e
662
1185
  @metrics.increment(:app_errors)
@@ -675,6 +1198,26 @@ module Hyperion
675
1198
  rescue StandardError
676
1199
  nil
677
1200
  end
1201
+ ensure
1202
+ # Release the admission slot once the stream's served (success or
1203
+ # error). h2_admitted is local-set above the slot acquisition, so
1204
+ # the protocol-error / pre-admission early-returns above don't
1205
+ # double-release.
1206
+ @h2_admission.release if defined?(h2_admitted) && h2_admitted
1207
+ end
1208
+
1209
+ # RFC 7230 §3.3.3: status codes that prohibit a response body, plus
1210
+ # the HEAD method which always suppresses the body regardless of what
1211
+ # the application returned. The h2 dispatch path uses this to fold
1212
+ # END_STREAM onto the HEADERS frame and skip the DATA-frame write
1213
+ # entirely (see Hotfix C2).
1214
+ BODY_SUPPRESSED_STATUSES = [204, 304].freeze
1215
+
1216
+ def body_suppressed?(method, status)
1217
+ return true if BODY_SUPPRESSED_STATUSES.include?(status)
1218
+ return true if method == 'HEAD'
1219
+
1220
+ false
678
1221
  end
679
1222
 
680
1223
  # Send the response body, respecting the peer's max frame size and
@@ -731,6 +1274,13 @@ module Hyperion
731
1274
  while (chunk = writer_ctx.try_pop)
732
1275
  begin
733
1276
  socket.write(chunk)
1277
+ # 2.10-G — first byte on the wire. Capture exactly once per
1278
+ # connection (the first chunk drained is the server's
1279
+ # connection-preface SETTINGS frame; we want the t1→t2_wire
1280
+ # delta to bracket "preface bytes encoded → preface bytes on
1281
+ # the socket". The expensive HEADERS+DATA enqueue happens
1282
+ # later under t2_first_encode.)
1283
+ writer_ctx.t2_first_wire = monotonic_now if @h2_timing_enabled && writer_ctx.t2_first_wire.nil?
734
1284
  rescue EOFError, Errno::ECONNRESET, Errno::EPIPE, IOError, OpenSSL::SSL::SSLError
735
1285
  # Peer hung up. Release THIS chunk's byte budget, then drain the
736
1286
  # rest of the queue (without writing) so backpressured encoders
@@ -775,6 +1325,55 @@ module Hyperion
775
1325
  end
776
1326
  end
777
1327
 
1328
+ # 2.10-G — small helper so the four timing call sites in `serve`,
1329
+ # `dispatch_stream`, and `run_writer_loop` agree on the clock source.
1330
+ # CLOCK_MONOTONIC is unaffected by NTP jumps and is what the rest of
1331
+ # the gem uses for elapsed-time math (see Connection#serve).
1332
+ def monotonic_now
1333
+ Process.clock_gettime(Process::CLOCK_MONOTONIC)
1334
+ end
1335
+
1336
+ # 2.10-G — assemble + emit the per-connection timing breakdown that
1337
+ # the bench harness greps for. Three deltas are reported in
1338
+ # milliseconds:
1339
+ #
1340
+ # t0_to_t1_ms — preface exchange (read client preface + write
1341
+ # server SETTINGS into the framer queue)
1342
+ # t1_to_t2_enc_ms — gap between preface complete and first stream's
1343
+ # HEADERS+DATA encoded. If this is the dominant
1344
+ # bucket, the framer-fiber priming / first-stream
1345
+ # scheduling is the suspect.
1346
+ # t2_enc_to_t2_wire_ms — encode-complete to writer drained first
1347
+ # chunk on the wire. Should be near-zero on
1348
+ # a healthy connection (writer fiber is
1349
+ # already running, parked on @send_notify).
1350
+ # A large value here = writer-fiber
1351
+ # starvation under the Async scheduler.
1352
+ #
1353
+ # Skipped when any timestamp is missing (connection died before
1354
+ # serving a stream / instrumentation was disabled mid-flight).
1355
+ def log_h2_first_stream_timing(writer_ctx)
1356
+ t0 = writer_ctx.t0_serve_entry
1357
+ t1 = writer_ctx.t1_preface_done
1358
+ t2_enc = writer_ctx.t2_first_encode
1359
+ t2_wire = writer_ctx.t2_first_wire
1360
+ return if t0.nil? || t1.nil? || t2_enc.nil? || t2_wire.nil?
1361
+
1362
+ @logger.info do
1363
+ {
1364
+ message: 'h2 first-stream timing',
1365
+ t0_to_t1_ms: ((t1 - t0) * 1000).round(3),
1366
+ t1_to_t2_enc_ms: ((t2_enc - t1) * 1000).round(3),
1367
+ t2_enc_to_t2_wire_ms: ((t2_wire - t2_enc) * 1000).round(3),
1368
+ t0_to_t2_wire_ms: ((t2_wire - t0) * 1000).round(3)
1369
+ }
1370
+ end
1371
+ rescue StandardError
1372
+ # Logging the timing breakdown must never crash the connection
1373
+ # teardown path — instrumentation is best-effort.
1374
+ nil
1375
+ end
1376
+
778
1377
  # Mirrors Connection#peer_address — see the comment there. SSLSocket
779
1378
  # wraps a TCPSocket; both expose #peeraddr after handshake.
780
1379
  def peer_address(socket)