hyperion-rb 2.10.1 → 2.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,7 @@
2
2
 
3
3
  require 'async'
4
4
  require 'async/notification'
5
+ require 'async/queue'
5
6
  require 'protocol/http2/server'
6
7
  require 'protocol/http2/framer'
7
8
  require 'protocol/http2/stream'
@@ -133,7 +134,7 @@ module Hyperion
133
134
  #
134
135
  # Single instance per connection, lives for the lifetime of `serve`.
135
136
  class WriterContext
136
- attr_reader :encode_mutex
137
+ attr_reader :encode_mutex, :dispatch_queue
137
138
  # 2.10-G — connection-lifecycle timing slots used by the optional h2
138
139
  # latency-instrumentation path (gated by `HYPERION_H2_TIMING=1`).
139
140
  # Each slot is a single CLOCK_MONOTONIC timestamp captured at most
@@ -149,6 +150,15 @@ module Hyperion
149
150
  @pending_bytes_lock = ::Mutex.new
150
151
  @max_pending_bytes = max_pending_bytes
151
152
  @writer_done = false
153
+ # 2.11-A — pre-spawned dispatch worker pool. The connection-loop
154
+ # fiber pushes ready streams onto `@dispatch_queue`; workers
155
+ # parked on `dequeue` grab them and call `dispatch_stream`. The
156
+ # queue is created here (cheap — wraps a Thread::Queue) so the
157
+ # WriterContext is fully self-contained and unit-testable without
158
+ # an Async reactor.
159
+ @dispatch_queue = ::Async::Queue.new
160
+ @dispatch_worker_count = 0
161
+ @dispatch_worker_lock = ::Mutex.new
152
162
  # 2.10-G timing slots, all initially nil so capture is a single
153
163
  # `||=` write under the encode mutex / writer fiber.
154
164
  @t0_serve_entry = nil
@@ -157,6 +167,31 @@ module Hyperion
157
167
  @t2_first_wire = nil
158
168
  end
159
169
 
170
+ # 2.11-A — bench/diagnostics introspection. Reads the live count
171
+ # of dispatch worker fibers parked on (or actively pulling from)
172
+ # `@dispatch_queue`. Reflects pre-spawned workers AND any ad-hoc
173
+ # workers spawned when the pool was saturated. Exposed as a method
174
+ # rather than `attr_reader` so the lock guards the counter.
175
+ def dispatch_worker_count
176
+ @dispatch_worker_lock.synchronize { @dispatch_worker_count }
177
+ end
178
+
179
+ # Called by a dispatch worker fiber when it enters its run loop.
180
+ # Pairs with `unregister_dispatch_worker` in an ensure block.
181
+ def register_dispatch_worker
182
+ @dispatch_worker_lock.synchronize { @dispatch_worker_count += 1 }
183
+ end
184
+
185
+ # Called by a dispatch worker fiber when it exits (queue closed,
186
+ # or unrecoverable error). Floors at 0 to defend against a stray
187
+ # double-unregister — instrumentation must never go negative.
188
+ def unregister_dispatch_worker
189
+ @dispatch_worker_lock.synchronize do
190
+ @dispatch_worker_count -= 1
191
+ @dispatch_worker_count = 0 if @dispatch_worker_count.negative?
192
+ end
193
+ end
194
+
160
195
  # Called by SendQueueIO#write on the calling (encoder) fiber. Enforces
161
196
  # the per-connection backpressure cap before enqueuing.
162
197
  def enqueue(bytes)
@@ -240,7 +275,16 @@ module Hyperion
240
275
  def initialize(*)
241
276
  super
242
277
  @request_headers = []
243
- @request_body = +''
278
+ # 2.12-F — gRPC carries opaque protobuf bytes
279
+ # ([1-byte compressed flag][4-byte length-prefix][message bytes]) in the
280
+ # request body. The default UTF-8 encoding on a `+''` literal would
281
+ # break valid_encoding? on byte sequences that don't form UTF-8
282
+ # codepoints, leading to a Rack app reading `body.string` and getting
283
+ # a String that misreports its bytesize / corrupts when string-
284
+ # interpolated. ASCII_8BIT (binary) preserves bytes verbatim and is
285
+ # the encoding gRPC Ruby clients expect. Same change is applied to
286
+ # the HTTP/1.1 path as a separate concern; see Connection.
287
+ @request_body = String.new(encoding: Encoding::ASCII_8BIT)
244
288
  @request_body_bytes = 0
245
289
  @request_complete = false
246
290
  @window_available = ::Async::Notification.new
@@ -471,6 +515,10 @@ module Hyperion
471
515
  @logger = Hyperion.logger
472
516
  end
473
517
  @h2_admission = h2_admission
518
+ # 2.12-E — per-worker request counter label. Identical caching
519
+ # rationale to Connection#initialize: process-constant ID, looked
520
+ # up once and held in the ivar.
521
+ @worker_id = Process.pid.to_s
474
522
  @h2_codec_available = Hyperion::H2Codec.available?
475
523
  # 2.5-B [breaking-default-change]: native HPACK now defaults to ON
476
524
  # when the Rust crate is available. The 2026-04-30 Rails-shape
@@ -480,14 +528,19 @@ module Hyperion
480
528
  # threshold. 2.4-A's hello-shape bench saw parity because HPACK
481
529
  # is <1% of per-stream CPU on a 2-header response.
482
530
  #
531
+ # 2.11-B — `HYPERION_H2_NATIVE_HPACK` extended with a native-mode
532
+ # axis (`auto` / `cglue` / `v2` / `off`). See `resolve_h2_native_hpack_state`.
483
533
  # Operators who want the prior 2.4.x default (Ruby fallback, env
484
- # var unset) can now set `HYPERION_H2_NATIVE_HPACK=off` (or
485
- # `0`/`false`/`no`/`off`) explicitly. `HYPERION_H2_NATIVE_HPACK=1`
486
- # still works for explicit opt-in.
534
+ # var unset) can set `HYPERION_H2_NATIVE_HPACK=off` (or
535
+ # `0`/`false`/`no`/`off`/`ruby`). `HYPERION_H2_NATIVE_HPACK=1`
536
+ # / unset preserves the 2.5-B `auto` behavior. `=cglue`/`=v2`
537
+ # forces the corresponding native sub-path.
487
538
  #
488
539
  # When OFF (env-overridden): `protocol-http2`'s pure-Ruby HPACK
489
540
  # Compressor / Decompressor handles everything as in 2.0.0–2.4.x.
490
- @h2_native_hpack_enabled = @h2_codec_available && resolve_h2_native_hpack_default
541
+ @h2_native_mode = resolve_h2_native_hpack_state
542
+ @h2_native_hpack_enabled = @h2_codec_available && @h2_native_mode != :off
543
+ apply_h2_cglue_gate(@h2_native_mode)
491
544
  @h2_codec_native = @h2_native_hpack_enabled # back-compat ivar — preserved for codec_native? readers
492
545
  # 2.10-G — opt-in connection-setup timing instrumentation. When set,
493
546
  # `serve` captures four monotonic timestamps per connection:
@@ -507,9 +560,45 @@ module Hyperion
507
560
  # cost when disabled — a single ivar read per stream branch). Used by
508
561
  # 2.10-G to root-cause Hyperion's flat ~40 ms first-stream max-latency.
509
562
  @h2_timing_enabled = env_flag_enabled?('HYPERION_H2_TIMING')
563
+ # 2.11-A — resolve the dispatch worker pool size once at handler
564
+ # construction so every `serve` call uses the same value (instead
565
+ # of re-parsing ENV per connection on the hot path). Cached as an
566
+ # ivar; bench/diagnostics can read it via the spec seam.
567
+ @dispatch_pool_size = resolve_dispatch_pool_size
510
568
  record_codec_boot_state
511
569
  end
512
570
 
571
+ # 2.11-A — pre-spawned dispatch worker pool sizing.
572
+ #
573
+ # Default `4` workers per connection — enough to absorb the typical
574
+ # HTTP/2 burst (2-8 concurrent streams) without paying any per-stream
575
+ # `task.async {}` cost on the hot path. Operators on long-lived
576
+ # high-fan-out connections (e.g. an aggregator backend that fans
577
+ # 30+ parallel streams) can bump this with `HYPERION_H2_DISPATCH_POOL`.
578
+ # Streams that arrive when the pool is saturated still get an ad-hoc
579
+ # fiber (see `serve` below) so concurrency is never artificially
580
+ # capped — the operator-facing limit is `h2.max_concurrent_streams`.
581
+ #
582
+ # Ceiling at 16 guards against a pathological config that would
583
+ # spawn hundreds of idle fibers per accepted connection. Anything
584
+ # malformed / non-positive falls back to the default rather than
585
+ # crashing the connection — this is a tuning knob, not a spec
586
+ # parameter.
587
+ DISPATCH_POOL_DEFAULT = 4
588
+ DISPATCH_POOL_MAX = 16
589
+
590
+ def resolve_dispatch_pool_size
591
+ raw = ENV['HYPERION_H2_DISPATCH_POOL']
592
+ return DISPATCH_POOL_DEFAULT if raw.nil? || raw.strip.empty?
593
+
594
+ n = Integer(raw.strip, 10)
595
+ return DISPATCH_POOL_DEFAULT unless n.positive?
596
+
597
+ [n, DISPATCH_POOL_MAX].min
598
+ rescue ArgumentError, TypeError
599
+ DISPATCH_POOL_DEFAULT
600
+ end
601
+
513
602
  # Read an env-var flag with the usual truthiness rules (any of
514
603
  # 1/true/yes/on, case-insensitive). Anything else → false.
515
604
  def env_flag_enabled?(name)
@@ -519,21 +608,42 @@ module Hyperion
519
608
  %w[1 true yes on].include?(v.downcase)
520
609
  end
521
610
 
522
- # Read an env-var flag with explicit OFF support. Used by
523
- # `HYPERION_H2_NATIVE_HPACK` since 2.5-B flipped the default to ON.
524
- # Returns true if the env var is unset / empty / explicitly truthy;
525
- # returns false only when the operator sets it to a truthy-OFF
526
- # value (0/false/no/off, case-insensitive). Anything else falls
527
- # back to the default-on behavior so we don't surprise operators
528
- # who set typo'd values.
529
- def resolve_h2_native_hpack_default
611
+ # 2.11-B resolve the operator-requested native-mode state from
612
+ # `HYPERION_H2_NATIVE_HPACK`.
613
+ #
614
+ # Returns one of:
615
+ # * `:auto` — native enabled, prefer cglue if available
616
+ # (unset / `1` / `true` / `yes` / `on` / `auto`)
617
+ # * `:cglue` native enabled, force cglue (warn-fallback to v2
618
+ # if cglue is unavailable; native_mode log marker
619
+ # surfaces the divergence to the operator)
620
+ # * `:v2` — native enabled, force Fiddle (skip cglue even if
621
+ # available; this is the bench-isolation knob the
622
+ # 2.11-B Rails-shape harness needs)
623
+ # * `:off` — ruby fallback (`0` / `false` / `no` / `off` / `ruby`)
624
+ #
625
+ # Unknown values fall through to `:auto` rather than crashing the
626
+ # connection — same forgiving-default policy as the pre-2.11-B
627
+ # `resolve_h2_native_hpack_default`.
628
+ def resolve_h2_native_hpack_state
530
629
  v = ENV['HYPERION_H2_NATIVE_HPACK']
531
- return true if v.nil? || v.empty?
630
+ return :auto if v.nil? || v.empty?
532
631
 
533
632
  lc = v.downcase
534
- return false if %w[0 false no off].include?(lc)
633
+ return :off if %w[0 false no off ruby].include?(lc)
634
+ return :cglue if %w[cglue v3].include?(lc)
635
+ return :v2 if %w[v2 fiddle].include?(lc)
535
636
 
536
- true
637
+ :auto
638
+ end
639
+
640
+ # 2.11-B — flip the global `H2Codec.cglue_disabled` gate based on
641
+ # the resolved native-mode state. The gate is per-process state
642
+ # (the codec module is a singleton) so reset it on every handler
643
+ # construction; otherwise a test that booted with `=v2` would leak
644
+ # the disable into a subsequent default-mode handler.
645
+ def apply_h2_cglue_gate(state)
646
+ Hyperion::H2Codec.cglue_disabled = (state == :v2)
537
647
  end
538
648
 
539
649
  # 2.0.0 Phase 6b: emit a single-shot boot log line per process
@@ -544,23 +654,32 @@ module Hyperion
544
654
  return if Hyperion::Http2Handler.instance_variable_get(:@codec_state_logged)
545
655
 
546
656
  Hyperion::Http2Handler.instance_variable_set(:@codec_state_logged, true)
547
- cglue_active = @h2_native_hpack_enabled && Hyperion::H2Codec.cglue_available?
548
- mode =
549
- if @h2_native_hpack_enabled && cglue_active
550
- 'native (Rust v3 / CGlue) HPACK on hot path, no Fiddle per call'
551
- elsif @h2_native_hpack_enabled
552
- 'native (Rust v2 / Fiddle) — HPACK on hot path, Fiddle marshalling per call'
553
- elsif @h2_codec_available
554
- 'fallback (protocol-http2 / pure Ruby HPACK) — native available but opted out via HYPERION_H2_NATIVE_HPACK=off'
555
- else
556
- 'fallback (protocol-http2 / pure Ruby HPACK) — native unavailable'
557
- end
657
+ # 2.11-B — `cglue_active` gates on the operator-controllable
658
+ # `cglue_active?` predicate (was `cglue_available?` pre-2.11-B).
659
+ # When the operator sets `=v2` we want the boot log to read
660
+ # `cglue_active: false` even though the C glue did install
661
+ # successfully — the bench harness inspects this field to
662
+ # differentiate the variants.
663
+ cglue_active = @h2_native_hpack_enabled && Hyperion::H2Codec.cglue_active?
664
+ cglue_requested_unavailable = @h2_native_mode == :cglue &&
665
+ @h2_native_hpack_enabled &&
666
+ !Hyperion::H2Codec.cglue_available?
667
+ mode = describe_codec_mode(cglue_active: cglue_active,
668
+ cglue_requested_unavailable: cglue_requested_unavailable)
669
+ native_mode_log = if !@h2_native_hpack_enabled
670
+ @h2_native_mode == :off ? 'off' : 'native-disabled'
671
+ elsif cglue_requested_unavailable
672
+ 'cglue-requested-unavailable'
673
+ else
674
+ @h2_native_mode.to_s
675
+ end
558
676
  @logger.info do
559
677
  {
560
678
  message: 'h2 codec selected',
561
679
  mode: mode,
562
680
  native_available: @h2_codec_available,
563
681
  native_enabled: @h2_native_hpack_enabled,
682
+ native_mode: native_mode_log,
564
683
  cglue_active: cglue_active,
565
684
  hpack_path: if @h2_native_hpack_enabled
566
685
  cglue_active ? 'native-v3' : 'native-v2'
@@ -573,6 +692,34 @@ module Hyperion
573
692
  @metrics.increment(:h2_codec_fallback_selected) unless @h2_native_hpack_enabled
574
693
  end
575
694
 
695
+ # 2.11-B — boot-log mode descriptor (extracted for clarity since
696
+ # the matrix of native_mode × cglue_available × cglue_active grew
697
+ # past the point where an inline conditional was readable).
698
+ def describe_codec_mode(cglue_active:, cglue_requested_unavailable:)
699
+ if !@h2_native_hpack_enabled
700
+ if @h2_codec_available
701
+ 'fallback (protocol-http2 / pure Ruby HPACK) — native available but opted out via HYPERION_H2_NATIVE_HPACK=off'
702
+ else
703
+ 'fallback (protocol-http2 / pure Ruby HPACK) — native unavailable'
704
+ end
705
+ elsif cglue_active && @h2_native_mode == :cglue
706
+ 'native (Rust v3 / CGlue, forced) — HPACK on hot path, no Fiddle per call'
707
+ elsif cglue_active
708
+ # 2.11-B confirmed cglue as the firm default — the bench-measured
709
+ # delta vs the v2 (Fiddle) path is +33-43% on Rails-shape h2
710
+ # responses, which is the actual win the 2.5-B "+18% native vs
711
+ # ruby" headline was capturing (v2 alone is +1-5%, basically
712
+ # noise vs the ruby fallback at this header count).
713
+ 'native (Rust v3 / CGlue, default since 2.11-B) — HPACK on hot path, no Fiddle per call'
714
+ elsif @h2_native_mode == :v2
715
+ 'native (Rust v2 / Fiddle, forced) — HPACK on hot path, Fiddle marshalling per call'
716
+ elsif cglue_requested_unavailable
717
+ 'native (Rust v2 / Fiddle) — CGlue requested via HYPERION_H2_NATIVE_HPACK=cglue but unavailable, fell back'
718
+ else
719
+ 'native (Rust v2 / Fiddle) — HPACK on hot path, Fiddle marshalling per call'
720
+ end
721
+ end
722
+
576
723
  # Read-only accessor used by tests + diagnostics. true = the
577
724
  # `Hyperion::H2Codec` Rust extension loaded successfully AND
578
725
  # `HYPERION_H2_NATIVE_HPACK=1` is set, so `build_server` will
@@ -610,6 +757,14 @@ module Hyperion
610
757
 
611
758
  task = ::Async::Task.current
612
759
 
760
+ # 2.11-A — extract the peer address BEFORE the preface exchange.
761
+ # Two wins: (1) the lookup runs in parallel with the writer fiber
762
+ # picking up the first scheduler slot, and (2) the first stream's
763
+ # dispatch fiber doesn't pay this `peeraddr` syscall on its hot
764
+ # path. The address is then captured by the worker closures
765
+ # below.
766
+ peer_addr = peer_address(socket)
767
+
613
768
  # Spawn the dedicated writer fiber BEFORE the preface exchange.
614
769
  # `Server#read_connection_preface` writes the server's SETTINGS frame
615
770
  # via the framer; if the writer isn't running, those bytes sit in the
@@ -618,15 +773,23 @@ module Hyperion
618
773
  # waits for our SETTINGS before sending more frames.
619
774
  writer_task = task.async { run_writer_loop(socket, writer_ctx) }
620
775
 
776
+ # 2.11-A — pre-spawn the dispatch worker pool BEFORE the preface
777
+ # exchange. Workers park on `writer_ctx.dispatch_queue.dequeue`;
778
+ # by the time the first client HEADERS frame arrives the workers
779
+ # are already in the scheduler's runnable set. The first stream
780
+ # is just an enqueue + dequeue (microseconds) instead of a
781
+ # `task.async {}` cold spawn (was the dominant cost in the t1→t2_enc
782
+ # bucket per the 2.10-G timing breakdown).
783
+ warmup_dispatch_pool!(task, writer_ctx, peer_addr: peer_addr,
784
+ pool_size: @dispatch_pool_size)
785
+
621
786
  server.read_connection_preface(initial_settings_payload)
622
787
  writer_ctx.t1_preface_done = monotonic_now if @h2_timing_enabled
623
788
 
624
- # Extract once the same TCP peer drives every stream on this conn.
625
- peer_addr = peer_address(socket)
626
-
627
- # Track in-flight per-stream dispatch fibers so we can drain them on
628
- # connection close.
629
- stream_tasks = []
789
+ # Track ad-hoc per-stream dispatch fibers (spilled when the pool is
790
+ # saturated). The pool handles the common case; we only fall back
791
+ # to `task.async {}` when more streams arrive than warm workers.
792
+ overflow_tasks = []
630
793
 
631
794
  until server.closed?
632
795
  ready_ids = []
@@ -645,14 +808,35 @@ module Hyperion
645
808
  # if subsequent frames (e.g. RST_STREAM races) arrive.
646
809
  stream.instance_variable_set(:@hyperion_dispatched, true)
647
810
 
648
- stream_tasks << task.async do
649
- dispatch_stream(stream, writer_ctx, peer_addr)
811
+ # 2.11-A — hand the stream to a warm worker via the dispatch
812
+ # queue. We use a simple "queue is empty" probe to decide:
813
+ #
814
+ # * Empty queue ⇒ at least one worker is parked on
815
+ # `dequeue`; the enqueue+dequeue handoff is microseconds
816
+ # and we avoid a `task.async {}` cold spawn. This is the
817
+ # hot path for the FIRST stream of a fresh connection
818
+ # (the case 2.11-A is targeting).
819
+ # * Non-empty queue ⇒ every parked worker has already
820
+ # pulled a stream; another worker won't pick this up
821
+ # until one finishes. To avoid head-of-line blocking
822
+ # behind the warmup pool, fall back to `task.async {}`.
823
+ # The overflow fiber re-uses `dispatch_stream` so the
824
+ # dispatch contract is identical between pool and
825
+ # overflow paths. Concurrency is never artificially
826
+ # capped; the operator-facing knob is
827
+ # `h2.max_concurrent_streams`.
828
+ if writer_ctx.dispatch_queue.size.zero?
829
+ writer_ctx.dispatch_queue.enqueue(stream)
830
+ else
831
+ overflow_tasks << task.async do
832
+ dispatch_stream(stream, writer_ctx, peer_addr)
833
+ end
650
834
  end
651
835
  end
652
836
  end
653
837
 
654
838
  # Drain in-flight stream dispatches before we close the socket.
655
- stream_tasks.each do |t|
839
+ overflow_tasks.each do |t|
656
840
  t.wait
657
841
  rescue StandardError
658
842
  nil
@@ -676,6 +860,18 @@ module Hyperion
676
860
  # socket before the writer drains would discard final RST_STREAM /
677
861
  # GOAWAY / END_STREAM frames in the queue.
678
862
  if writer_ctx
863
+ # 2.11-A — close the dispatch queue so any pre-spawned workers
864
+ # parked on `dequeue` fall through (Async::Queue#dequeue returns
865
+ # nil after close). Do this BEFORE waiting on the writer so
866
+ # pool workers can drain their in-flight stream dispatches and
867
+ # release the encode mutex; otherwise the writer might park
868
+ # waiting for bytes that the dispatch worker never gets to
869
+ # encode.
870
+ begin
871
+ writer_ctx.dispatch_queue.close unless writer_ctx.dispatch_queue.closed?
872
+ rescue StandardError
873
+ nil
874
+ end
679
875
  writer_ctx.shutdown!
680
876
  begin
681
877
  writer_task&.wait
@@ -695,6 +891,63 @@ module Hyperion
695
891
 
696
892
  private
697
893
 
894
+ # 2.11-A — pre-spawn the per-connection dispatch worker pool.
895
+ #
896
+ # Each worker is a fiber that loops:
897
+ # 1. `dequeue` a stream from the per-connection dispatch queue
898
+ # (parks the fiber on the queue's internal notification when
899
+ # empty — zero CPU until a stream arrives).
900
+ # 2. Calls `dispatch_stream` with the stream + writer context +
901
+ # pre-resolved peer address.
902
+ # 3. Loops back to (1). Exits cleanly when `dequeue` returns nil
903
+ # (queue closed by `serve`'s ensure block on connection
904
+ # teardown).
905
+ #
906
+ # Why pre-spawn rather than `task.async {}` per stream:
907
+ # * Fiber startup under Async involves a few µs of allocation and
908
+ # scheduler bookkeeping. Per-stream that's negligible; on the
909
+ # CONNECTION COLD PATH (first request on a fresh TCP/TLS conn)
910
+ # it adds up to a measurable share of the t1→t2_enc bucket
911
+ # (the 2.10-G timing breakdown showed ~12-25 ms on h2load
912
+ # `-c 1 -m 100 -n 5000`).
913
+ # * Workers parked on `dequeue` are already in the scheduler's
914
+ # ready set; the first stream is just an enqueue + dequeue
915
+ # handoff (microseconds).
916
+ #
917
+ # Errors inside `dispatch_stream` are already caught + RST_STREAMed
918
+ # there, so the worker only needs to defend against truly
919
+ # unexpected failures (queue shutdown races, fiber kill on graceful
920
+ # shutdown). We swallow those defensively and unregister so the
921
+ # `dispatch_worker_count` introspection is truthful.
922
+ def warmup_dispatch_pool!(task, writer_ctx, peer_addr:, pool_size:)
923
+ pool_size.times do
924
+ task.async do
925
+ writer_ctx.register_dispatch_worker
926
+ begin
927
+ loop do
928
+ stream = writer_ctx.dispatch_queue.dequeue
929
+ break if stream.nil? # queue closed → graceful exit
930
+
931
+ begin
932
+ dispatch_stream(stream, writer_ctx, peer_addr)
933
+ rescue StandardError => e
934
+ # `dispatch_stream` already logs + RST_STREAMs internally;
935
+ # if anything escapes that net we log here and keep the
936
+ # worker alive — one bad stream must not poison the
937
+ # connection's worker pool.
938
+ @logger.error do
939
+ { message: 'h2 dispatch worker swallowed error',
940
+ error: e.message, error_class: e.class.name }
941
+ end
942
+ end
943
+ end
944
+ ensure
945
+ writer_ctx.unregister_dispatch_worker
946
+ end
947
+ end
948
+ end
949
+ end
950
+
698
951
  # Build the [setting_id, value] pairs that go in the connection-preface
699
952
  # SETTINGS frame. protocol-http2's Server#read_connection_preface accepts
700
953
  # this array and does the wire encoding for us. Empty array (no overrides
@@ -881,6 +1134,11 @@ module Hyperion
881
1134
 
882
1135
  @metrics.increment(:requests_total)
883
1136
  @metrics.increment(:requests_in_flight)
1137
+ # 2.12-E — per-worker request counter, ticked once per h2 stream.
1138
+ # Same family as Connection#serve so the audit metric reflects
1139
+ # cluster distribution across BOTH transports without operators
1140
+ # needing to alert on two separate counters.
1141
+ @metrics.tick_worker_request(@worker_id)
884
1142
  # 2.1.0 (WS-1): HTTP/2 hijack is intentionally NOT plumbed here.
885
1143
  # Rack 3 hijack over HTTP/2 requires Extended CONNECT (RFC 8441 +
886
1144
  # RFC 9220) — a separate feature with its own SETTINGS handshake,
@@ -915,8 +1173,17 @@ module Hyperion
915
1173
  end
916
1174
  end
917
1175
 
918
- payload = +''
1176
+ # 2.12-F — gRPC support: bodies that respond to `:trailers` carry a
1177
+ # final HEADERS frame (with END_STREAM=1) right after the DATA frames.
1178
+ # The Rack 3 contract is "iterate body first, then call body.trailers"
1179
+ # — so we materialise the payload, then *before* `body.close`
1180
+ # (`Rack::BodyProxy` clears state on close) snapshot the trailers Hash.
1181
+ # `nil` / empty Hash → no trailing frame. Non-Hash values are coerced
1182
+ # to a Hash defensively; a misbehaving app must not be able to crash
1183
+ # the connection.
1184
+ payload = String.new(encoding: Encoding::ASCII_8BIT)
919
1185
  body_chunks.each { |c| payload << c.to_s }
1186
+ response_trailers = collect_response_trailers(body_chunks)
920
1187
  body_chunks.close if body_chunks.respond_to?(:close)
921
1188
 
922
1189
  # Hotfix C2: empty-body responses (RFC 7230 §3.3.3 — 204/304 + HEAD)
@@ -925,10 +1192,21 @@ module Hyperion
925
1192
  # one writer-fiber wakeup instead of two. Any body the app returned
926
1193
  # for HEAD is discarded here per spec (the bytes were already
927
1194
  # built — that's a Rack-app smell, not our problem to fix).
1195
+ #
1196
+ # Trailers on body-suppressed responses (HEAD/204/304) are dropped:
1197
+ # the response is end-of-stream after HEADERS, with no place to put
1198
+ # a trailing HEADERS frame. This matches what curl --http2 / grpc
1199
+ # clients do (HEAD + gRPC isn't a meaningful combination).
928
1200
  if body_suppressed?(method, status)
929
1201
  writer_ctx.encode_mutex.synchronize do
930
1202
  stream.send_headers(out_headers, ::Protocol::HTTP2::END_STREAM)
931
1203
  end
1204
+ elsif have_trailers?(response_trailers)
1205
+ # gRPC / Rack-3-trailers path: HEADERS (no END_STREAM), DATA frames
1206
+ # (no END_STREAM on last DATA), final HEADERS with END_STREAM=1.
1207
+ writer_ctx.encode_mutex.synchronize { stream.send_headers(out_headers) }
1208
+ send_body(stream, payload, writer_ctx, end_stream: false)
1209
+ send_trailers(stream, response_trailers, writer_ctx)
932
1210
  else
933
1211
  writer_ctx.encode_mutex.synchronize { stream.send_headers(out_headers) }
934
1212
  send_body(stream, payload, writer_ctx)
@@ -989,9 +1267,24 @@ module Hyperion
989
1267
  #
990
1268
  # The encode_mutex protects HPACK state and per-stream frame ordering;
991
1269
  # the actual socket write happens off-fiber via the writer task.
992
- def send_body(stream, payload, writer_ctx)
1270
+ #
1271
+ # 2.12-F — `end_stream:` controls whether the LAST DATA frame carries
1272
+ # the END_STREAM flag. The default `true` preserves pre-2.12-F semantics
1273
+ # (final DATA frame closes the stream). Callers that intend to send a
1274
+ # trailing HEADERS frame after the body pass `end_stream: false` so the
1275
+ # final DATA frame leaves the stream half-open from the server side
1276
+ # and the trailer HEADERS frame can carry END_STREAM=1.
1277
+ def send_body(stream, payload, writer_ctx, end_stream: true)
993
1278
  if payload.empty?
994
- writer_ctx.encode_mutex.synchronize { stream.send_data('', ::Protocol::HTTP2::END_STREAM) }
1279
+ if end_stream
1280
+ writer_ctx.encode_mutex.synchronize do
1281
+ stream.send_data('', ::Protocol::HTTP2::END_STREAM)
1282
+ end
1283
+ end
1284
+ # When end_stream is false AND payload is empty, we deliberately
1285
+ # send NO DATA frame at all — gRPC trailers-only responses (the
1286
+ # error-without-payload shape) are HEADERS → trailer-HEADERS, no
1287
+ # DATA in between. send_trailers handles the closing END_STREAM.
995
1288
  return
996
1289
  end
997
1290
 
@@ -1010,12 +1303,77 @@ module Hyperion
1010
1303
 
1011
1304
  chunk = payload.byteslice(offset, available)
1012
1305
  offset += chunk.bytesize
1013
- flags = offset >= bytesize ? ::Protocol::HTTP2::END_STREAM : 0
1306
+ last_chunk = offset >= bytesize
1307
+ flags = last_chunk && end_stream ? ::Protocol::HTTP2::END_STREAM : 0
1014
1308
 
1015
1309
  writer_ctx.encode_mutex.synchronize { stream.send_data(chunk, flags) }
1016
1310
  end
1017
1311
  end
1018
1312
 
1313
+ # 2.12-F — pull a trailers Hash off the response body if Rack 3
1314
+ # `body.trailers` is implemented. Called AFTER the body has been
1315
+ # fully iterated (Rack 3 contract: trailers are computed by the body
1316
+ # while it streams; reading them before iteration is undefined).
1317
+ # Returns nil when the body doesn't expose trailers, when the call
1318
+ # raises, or when the result isn't a Hash-coercible map. Defensive
1319
+ # by design: a misbehaving app must not crash the dispatch loop.
1320
+ def collect_response_trailers(body)
1321
+ return nil unless body.respond_to?(:trailers)
1322
+
1323
+ raw = body.trailers
1324
+ return nil if raw.nil?
1325
+ return raw if raw.is_a?(Hash)
1326
+ return raw.to_h if raw.respond_to?(:to_h)
1327
+
1328
+ nil
1329
+ rescue StandardError => e
1330
+ @logger.warn do
1331
+ { message: 'h2 body.trailers raised; ignoring',
1332
+ error: e.message, error_class: e.class.name }
1333
+ end
1334
+ nil
1335
+ end
1336
+
1337
+ # 2.12-F — predicate for "we have trailers worth sending". Defined as
1338
+ # a method (rather than the more idiomatic `!h.nil? && !h.empty?` /
1339
+ # `h&.any?`) because rubocop-rails on the hot path autocorrects both
1340
+ # of those forms to `h.present?`, which raises NoMethodError on a
1341
+ # plain Hash outside ActiveSupport. Hyperion is a stand-alone gem;
1342
+ # we don't depend on ActiveSupport, so we route through this helper
1343
+ # to keep the rubocop-rails formatter quiet without adding a Cop
1344
+ # disable comment everywhere a nil-or-empty Hash check appears.
1345
+ def have_trailers?(trailers)
1346
+ return false if trailers.nil?
1347
+ return false if trailers.respond_to?(:empty?) && trailers.empty?
1348
+
1349
+ true
1350
+ end
1351
+
1352
+ # 2.12-F — emit the final HEADERS frame carrying response trailers.
1353
+ # The wire shape is one HEADERS frame with END_STREAM=1; HPACK
1354
+ # encodes the trailer block exactly like a regular HEADERS frame.
1355
+ # Trailer keys MUST be lowercased (RFC 7540 §8.1.2) — same rule as
1356
+ # regular HTTP/2 headers. We strip CR/LF from values defensively
1357
+ # (a header-injection guard) and split multi-line values on \n the
1358
+ # same way the regular response-header path does.
1359
+ def send_trailers(stream, trailers, writer_ctx)
1360
+ pairs = []
1361
+ trailers.each do |k, v|
1362
+ name = k.to_s.downcase
1363
+ # Pseudo-headers and forbidden names cannot appear in trailers.
1364
+ next if name.empty?
1365
+ next if name.start_with?(':')
1366
+ next if RequestStream::FORBIDDEN_HEADERS.include?(name)
1367
+
1368
+ Array(v).each do |val|
1369
+ val.to_s.split("\n").each { |line| pairs << [name, line] }
1370
+ end
1371
+ end
1372
+ writer_ctx.encode_mutex.synchronize do
1373
+ stream.send_headers(pairs, ::Protocol::HTTP2::END_STREAM)
1374
+ end
1375
+ end
1376
+
1019
1377
  # Drain bytes off the per-connection send queue onto the real socket.
1020
1378
  # This fiber is the SOLE writer to `socket` for the connection's
1021
1379
  # lifetime, which satisfies SSLSocket's "no concurrent writes from
@@ -113,6 +113,44 @@ module Hyperion
113
113
  increment(:"responses_#{code}")
114
114
  end
115
115
 
116
+ # 2.12-E — labeled counter family that observes which worker
117
+ # process a given request landed on. Ticks once per dispatched
118
+ # request from every dispatch shape (Connection#serve, h2 streams,
119
+ # the C accept4 + io_uring loops; see PrometheusExporter for the
120
+ # C-loop fold-in at scrape time).
121
+ #
122
+ # `worker_id` is conventionally `Process.pid.to_s` — matches the
123
+ # 2.4-C `hyperion_io_uring_workers_active` and
124
+ # `hyperion_per_conn_rejections_total` labeling convention; lets
125
+ # operators correlate distribution rows with `ps`/`/proc` data
126
+ # without a separate worker_id <-> pid mapping table.
127
+ #
128
+ # Hot-path cost: one `@hg_mutex` acquisition per tick. That's
129
+ # acceptable for the audit metric: contention shows up only on
130
+ # the `tick + render` overlap, never inside the C accept loop
131
+ # (which uses its own atomic counter folded in at scrape time).
132
+ # Worth the simplicity over an extra lock-free per-thread cache.
133
+ REQUESTS_DISPATCH_TOTAL = :hyperion_requests_dispatch_total
134
+ WORKER_ID_LABEL_KEYS = %w[worker_id].freeze
135
+
136
+ def tick_worker_request(worker_id)
137
+ label = worker_id.nil? || worker_id.to_s.empty? ? '0' : worker_id.to_s
138
+ ensure_worker_request_family_registered!
139
+ increment_labeled_counter(REQUESTS_DISPATCH_TOTAL, [label])
140
+ end
141
+
142
+ # 2.12-E — Idempotently register the labeled-counter family. Public
143
+ # so `Server#run_c_accept_loop` can register at boot — the
144
+ # PrometheusExporter's C-loop fold-in is gated on the family being
145
+ # in the snapshot, and a 100% C-loop worker never goes through
146
+ # `tick_worker_request` to register lazily.
147
+ def ensure_worker_request_family_registered!
148
+ return if @worker_request_family_registered
149
+
150
+ register_labeled_counter(REQUESTS_DISPATCH_TOTAL, label_keys: WORKER_ID_LABEL_KEYS)
151
+ @worker_request_family_registered = true
152
+ end
153
+
116
154
  def snapshot
117
155
  result = Hash.new(0)
118
156
  counters_snapshot = @counters_mutex.synchronize { @thread_counters.dup }