hyperion-rb 2.10.1 → 2.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,132 @@
1
+ /* ----------------------------------------------------------------------
2
+ * page_cache_internal.h — internal C-ext sharing surface.
3
+ *
4
+ * 2.12-D — exposes the request-parsing + lookup + write helpers built by
5
+ * `page_cache.c`'s C accept loop so the io_uring sibling
6
+ * (`io_uring_loop.c`) can reuse them rather than copy-pasting. The
7
+ * helpers stay `static` inside `page_cache.c` and the symbols below are
8
+ * thin extern wrappers — one indirection per call, but the io_uring
9
+ * loop calls them at most once per request, so the cost is negligible
10
+ * (single-direct-call jump) compared to the syscall savings the loop
11
+ * delivers.
12
+ *
13
+ * NOT public surface. NOT installed in any include path. The header
14
+ * lives next to the .c files and is included only by the in-tree C
15
+ * sources.
16
+ * ---------------------------------------------------------------------- */
17
+ #ifndef HYP_PAGE_CACHE_INTERNAL_H
18
+ #define HYP_PAGE_CACHE_INTERNAL_H
19
+
20
+ #include <stddef.h>
21
+ #include <sys/types.h>
22
+
23
+ #ifdef __cplusplus
24
+ extern "C" {
25
+ #endif
26
+
27
+ /* Method classification (mirrors `hyp_pc_method_t` in page_cache.c). The
28
+ * io_uring loop uses this via `pc_internal_classify_method` to decide
29
+ * how much of the cached response to write (HEAD = headers only, GET =
30
+ * full response). */
31
+ typedef enum {
32
+ PC_INTERNAL_METHOD_GET = 0,
33
+ PC_INTERNAL_METHOD_HEAD = 1,
34
+ PC_INTERNAL_METHOD_OTHER = 2
35
+ } pc_internal_method_t;
36
+
37
+ /* End-of-headers scanner. Returns the byte offset PAST the trailing
38
+ * CRLFCRLF, or -1 if not found. */
39
+ long pc_internal_find_eoh(const char *buf, size_t len);
40
+
41
+ /* Request-line parser. On success fills *m_off, *m_len, *p_off, *p_len
42
+ * with offsets/lengths of METHOD and PATH inside `buf`, and returns the
43
+ * length of the request line including the trailing CRLF. Returns -1
44
+ * on malformed input or non-HTTP/1.1 versions (HTTP/1.0 differs in
45
+ * keep-alive defaults; the caller must hand it off to Ruby). */
46
+ long pc_internal_parse_request_line(const char *buf, size_t len,
47
+ size_t *m_off, size_t *m_len,
48
+ size_t *p_off, size_t *p_len);
49
+
50
+ /* Header-block scanner. `start` and `end` bracket the headers section
51
+ * (between request-line end and the closing CRLFCRLF). Reports:
52
+ * *connection_close — Connection: close seen
53
+ * *has_body — non-zero Content-Length OR Transfer-Encoding
54
+ * *upgrade_seen — Upgrade or HTTP2-Settings seen
55
+ * Returns 0 on success, -1 on malformed framing. */
56
+ int pc_internal_scan_headers(const char *buf, size_t start, size_t end,
57
+ int *connection_close, int *has_body,
58
+ int *upgrade_seen);
59
+
60
+ /* Method classifier. Returns GET / HEAD / OTHER. */
61
+ pc_internal_method_t pc_internal_classify_method(const char *m, size_t len);
62
+
63
+ /* Snapshot the response bytes for `(path, kind)` into a freshly malloc'd
64
+ * buffer. On hit: returns the malloc'd buffer (caller must `free()` it)
65
+ * and writes the byte length into *out_len. On miss: returns NULL and
66
+ * sets *out_len = 0. The buffer is whatever the page cache's lookup
67
+ * picks given the recheck/staleness rules; the io_uring loop writes it
68
+ * verbatim. Takes the C-side cache lock briefly; releases it before
69
+ * returning. Returns NULL on OOM as well — the caller treats both as
70
+ * "couldn't serve from C, hand off to Ruby". */
71
+ char *pc_internal_snapshot_response(const char *path, size_t path_len,
72
+ pc_internal_method_t kind,
73
+ size_t *out_len);
74
+
75
+ /* Apply TCP_NODELAY to an accepted fd (best-effort; failures swallowed). */
76
+ void pc_internal_apply_tcp_nodelay(int fd);
77
+
78
+ /* Lifecycle hook fire wrapper. The io_uring loop calls this AFTER the
79
+ * write completion arrives so observers see a finished request. The
80
+ * C-side gate (`lifecycle_active`) is checked inside; the wrapper is
81
+ * a no-op when no callback is registered or the gate is off. Must be
82
+ * called under the GVL. */
83
+ void pc_internal_fire_lifecycle(const char *method, size_t mlen,
84
+ const char *path, size_t plen);
85
+
86
+ /* Whether the lifecycle gate is currently on. The io_uring loop reads
87
+ * this BEFORE re-acquiring the GVL — when it's off, the loop skips
88
+ * the rb_thread_call_with_gvl round-trip entirely. */
89
+ int pc_internal_lifecycle_active(void);
90
+
91
+ /* Handoff wrapper — invokes the registered Ruby callback with
92
+ * (fd, partial_buffer_or_nil). Must be called under the GVL. Closes
93
+ * the fd locally if no callback is registered or if the callback
94
+ * raised. */
95
+ void pc_internal_handoff(int client_fd, const char *partial, size_t partial_len);
96
+
97
+ /* Read the stop flag flipped by `PageCache.stop_accept_loop`. Both the
98
+ * 2.12-C accept4 loop AND the 2.12-D io_uring loop honour it as a
99
+ * graceful-shutdown signal. */
100
+ int pc_internal_stop_requested(void);
101
+
102
+ /* Reset the stop flag to 0. Called by the loop entry points
103
+ * (`run_static_accept_loop`, `run_static_io_uring_loop`) so a previous
104
+ * invocation's `stop_accept_loop` doesn't immediately tear down a
105
+ * fresh loop. Specs hammer this path between examples — the 2.12-C
106
+ * loop resets inline; the io_uring sibling needs the same surface. */
107
+ void pc_internal_reset_stop(void);
108
+
109
+ /* 2.12-E — bump the per-process served-request counter (atomic; safe
110
+ * to call from any thread / fiber / accept-loop context). Both the
111
+ * 2.12-C accept4 loop and the 2.12-D io_uring loop call this after
112
+ * a successful response write so the SO_REUSEPORT distribution audit
113
+ * (`PageCache.c_loop_requests_total`) sees ticks regardless of which
114
+ * loop variant is active. */
115
+ void pc_internal_tick_request(void);
116
+
117
+ /* 2.12-E — reset the per-process served-request counter. Mirrors the
118
+ * stop-flag reset rationale: loop entry points call this so a prior
119
+ * invocation's count doesn't bleed into the new loop's snapshot. */
120
+ void pc_internal_reset_requests_served(void);
121
+
122
+ /* The 64 KiB header-cap shared with `page_cache.c`. Re-declared here
123
+ * so io_uring_loop.c doesn't need to mirror the magic number. */
124
+ #ifndef PC_INTERNAL_MAX_HEADER_BYTES
125
+ #define PC_INTERNAL_MAX_HEADER_BYTES 65536
126
+ #endif
127
+
128
+ #ifdef __cplusplus
129
+ }
130
+ #endif
131
+
132
+ #endif /* HYP_PAGE_CACHE_INTERNAL_H */
@@ -135,6 +135,12 @@ module Hyperion
135
135
  # keep the existing pattern of caching boot-time refs as ivars so
136
136
  # the per-request observe stays a single Hash lookup.
137
137
  @path_templater = path_templater || Hyperion::Metrics.default_path_templater
138
+ # 2.12-E — per-worker request counter label. Cached once per
139
+ # Connection (Process.pid is process-constant — re-reading it per
140
+ # request would allocate the to_s String every time the operator
141
+ # asked Ruby for the symbol/label). Each Connection lives in
142
+ # exactly one process, so the cache is tight and never stale.
143
+ @worker_id = Process.pid.to_s
138
144
  # 2.10-D — direct-dispatch route table. The hot-path lookup
139
145
  # is `@route_table&.lookup(method, path)` so the nil-default
140
146
  # case (no operator-registered direct routes — the
@@ -307,6 +313,14 @@ module Hyperion
307
313
  @metrics.increment(:bytes_read, body_end)
308
314
  @metrics.increment(:requests_total)
309
315
  @metrics.increment(:requests_in_flight)
316
+ # 2.12-E — per-worker request counter for the SO_REUSEPORT
317
+ # load-balancing audit. Worker_id is the OS pid (matches the
318
+ # 2.4-C `hyperion_io_uring_workers_active` convention). Single
319
+ # location for every Ruby-side dispatch shape: regular Rack
320
+ # via `dispatch_request`, direct dispatch via `dispatch_direct!`,
321
+ # and the StaticEntry fast path via `dispatch_direct_static!`
322
+ # all flow through this point in `serve`.
323
+ @metrics.tick_worker_request(@worker_id)
310
324
  # 2.4-C: capture start time for the per-route duration histogram.
311
325
  # Same Process.clock_gettime that the access-log path was already
312
326
  # paying — at default-ON log_requests the second call here is
@@ -46,8 +46,26 @@ module Hyperion
46
46
  # escape hatch via `env['hyperion.dispatch_mode']
47
47
  # = :inline_blocking` for routes the auto-
48
48
  # detect doesn't catch.
49
+ # 2.12-C — `:c_accept_loop_h1` is a connection-wide mode (NOT a
50
+ # per-response override): the entire accept-and-serve loop runs in
51
+ # C via `Hyperion::Http::PageCache.run_static_accept_loop`. Engaged
52
+ # only when the operator's route table is composed entirely of
53
+ # `Server.handle_static`-registered routes AND the listener is
54
+ # plain TCP. Counted under `:requests_dispatch_c_accept_loop_h1`
55
+ # at engage time (one bump per worker boot) so operators can see
56
+ # the path is on without scraping the per-request `:c_accept_loop_requests`
57
+ # counter.
58
+ # 2.12-D — `:c_accept_loop_io_uring_h1` is a sibling of
59
+ # `:c_accept_loop_h1`. Engaged when the operator opts into
60
+ # `HYPERION_IO_URING_ACCEPT=1` AND the C ext was compiled with
61
+ # liburing AND the runtime probe succeeded. Same eligibility gates
62
+ # as `:c_accept_loop_h1` (handle_static-only routes, plain TCP),
63
+ # different syscall shape (single `io_uring_enter` per N requests
64
+ # vs. N×3 syscalls). Counted under
65
+ # `:requests_dispatch_c_accept_loop_io_uring_h1` so operators can
66
+ # confirm the path is on without scraping logs.
49
67
  MODES = %i[tls_h2 tls_h1_inline async_io_h1_inline threadpool_h1 inline_h1_no_pool
50
- inline_blocking].freeze
68
+ inline_blocking c_accept_loop_h1 c_accept_loop_io_uring_h1].freeze
51
69
 
52
70
  INLINE_MODES = %i[tls_h1_inline async_io_h1_inline inline_h1_no_pool inline_blocking].freeze
53
71
 
@@ -45,11 +45,36 @@ module Hyperion
45
45
  @cglue_available == true
46
46
  end
47
47
 
48
+ # 2.11-B — operator-controllable gate that overlays CGlue
49
+ # availability. The Encoder/Decoder hot paths probe this (NOT
50
+ # `cglue_available?`) so a `HYPERION_H2_NATIVE_HPACK=v2` boot can
51
+ # force the Fiddle path even on a host where the C glue loaded
52
+ # successfully. This is the bench-isolation knob 2.11-B's
53
+ # `bench/h2_rails_shape.sh` needs to compare native-v2 against
54
+ # native-v3 honestly — without it, "native" and "cglue" variants
55
+ # would always pick the same physical path.
56
+ #
57
+ # `Http2Handler#initialize` writes the gate based on the env var;
58
+ # tests can flip `@cglue_disabled` directly. Default false (i.e.,
59
+ # gate is OPEN — same physical behavior as 2.4-A through 2.10).
60
+ def self.cglue_active?
61
+ cglue_available? && !@cglue_disabled
62
+ end
63
+
64
+ def self.cglue_disabled=(value)
65
+ @cglue_disabled = value ? true : false
66
+ end
67
+
68
+ def self.cglue_disabled
69
+ @cglue_disabled == true
70
+ end
71
+
48
72
  # Force a reload (test seam). Unsets the memoized state so the next
49
73
  # `available?` call probes the filesystem again.
50
74
  def self.reset!
51
75
  @available = nil
52
76
  @cglue_available = nil
77
+ @cglue_disabled = false
53
78
  @lib = nil
54
79
  end
55
80
 
@@ -126,7 +151,13 @@ module Hyperion
126
151
  # into a new owned String — that's the contract callers rely
127
152
  # on (`protocol-http2`'s Compressor#encode returns a String,
128
153
  # not a slice into shared mutable memory).
129
- if H2Codec.cglue_available?
154
+ #
155
+ # 2.11-B — probe `cglue_active?` (NOT `cglue_available?`) so an
156
+ # operator-set `HYPERION_H2_NATIVE_HPACK=v2` boot routes through
157
+ # Fiddle even when the C glue is physically present. Same
158
+ # branch shape; one extra ivar read on the hot path which
159
+ # disappears under YJIT inlining.
160
+ if H2Codec.cglue_active?
130
161
  # Pad the scratch String with zero bytes so its length matches
131
162
  # capacity — the C ext writes into RSTRING_PTR up to RSTRING_LEN
132
163
  # and then truncates back via rb_str_set_len after encoding.
@@ -272,7 +303,8 @@ module Hyperion
272
303
  # 2.4-A — fast path: reuse a per-decoder scratch and dispatch
273
304
  # through the C glue. The Rust ABI writes `[u32 name_len][name]
274
305
  # [u32 val_len][val]` repeated; we unpack that in Ruby.
275
- if H2Codec.cglue_available?
306
+ # 2.11-B — `cglue_active?` overlays an operator-set v2 force.
307
+ if H2Codec.cglue_active?
276
308
  if capacity > @scratch_out_capacity
277
309
  new_cap = @scratch_out_capacity
278
310
  new_cap *= 2 while new_cap < capacity
@@ -412,9 +444,24 @@ module Hyperion
412
444
  def self.candidate_paths
413
445
  gem_lib = File.expand_path('../hyperion_h2_codec', __dir__)
414
446
  ext_target = File.expand_path('../../ext/hyperion_h2_codec/target/release', __dir__)
415
- %w[libhyperion_h2_codec.dylib libhyperion_h2_codec.so].flat_map do |name|
416
- [File.join(gem_lib, name), File.join(ext_target, name)]
417
- end
447
+ # 2.11-B fix: order suffixes by host OS. Pre-2.11-B this was a
448
+ # static `[dylib, so]` order, which broke on Linux hosts that
449
+ # had a stale macOS `.dylib` on the path (e.g. a developer rsync
450
+ # leaking the `target/release` artifact across platforms). Fiddle
451
+ # would try the `.dylib` first, choke on the Mach-O binary with
452
+ # `ArgumentError: invalid byte sequence in UTF-8` from libffi,
453
+ # and the rescue in `load!` would silently fall back to the Ruby
454
+ # HPACK path with no warning visible to bench harnesses.
455
+ #
456
+ # Ordering by `host_os` makes Linux pick `.so` first and ignore
457
+ # any orphan `.dylib`; macOS keeps the `.dylib`-first behavior
458
+ # for back-compat with existing dev environments.
459
+ suffixes = if /darwin|mac/i.match?(RbConfig::CONFIG['host_os'])
460
+ %w[libhyperion_h2_codec.dylib libhyperion_h2_codec.so]
461
+ else
462
+ %w[libhyperion_h2_codec.so libhyperion_h2_codec.dylib]
463
+ end
464
+ suffixes.flat_map { |name| [File.join(gem_lib, name), File.join(ext_target, name)] }
418
465
  end
419
466
 
420
467
  # FFI wrappers — kept thin so callers don't see Fiddle::Pointer