hyperion-rb 2.10.1 → 2.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +771 -0
- data/README.md +135 -5
- data/ext/hyperion_http/extconf.rb +41 -0
- data/ext/hyperion_http/io_uring_loop.c +710 -0
- data/ext/hyperion_http/page_cache.c +1032 -0
- data/ext/hyperion_http/page_cache_internal.h +132 -0
- data/lib/hyperion/connection.rb +14 -0
- data/lib/hyperion/dispatch_mode.rb +19 -1
- data/lib/hyperion/h2_codec.rb +52 -5
- data/lib/hyperion/http2_handler.rb +399 -41
- data/lib/hyperion/metrics.rb +38 -0
- data/lib/hyperion/prometheus_exporter.rb +76 -1
- data/lib/hyperion/server/connection_loop.rb +159 -0
- data/lib/hyperion/server.rb +183 -0
- data/lib/hyperion/thread_pool.rb +23 -7
- data/lib/hyperion/version.rb +1 -1
- metadata +4 -1
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
/* ----------------------------------------------------------------------
|
|
2
|
+
* page_cache_internal.h — internal C-ext sharing surface.
|
|
3
|
+
*
|
|
4
|
+
* 2.12-D — exposes the request-parsing + lookup + write helpers built by
|
|
5
|
+
* `page_cache.c`'s C accept loop so the io_uring sibling
|
|
6
|
+
* (`io_uring_loop.c`) can reuse them rather than copy-pasting. The
|
|
7
|
+
* helpers stay `static` inside `page_cache.c` and the symbols below are
|
|
8
|
+
* thin extern wrappers — one indirection per call, but the io_uring
|
|
9
|
+
* loop calls them at most once per request, so the cost is negligible
|
|
10
|
+
* (single-direct-call jump) compared to the syscall savings the loop
|
|
11
|
+
* delivers.
|
|
12
|
+
*
|
|
13
|
+
* NOT public surface. NOT installed in any include path. The header
|
|
14
|
+
* lives next to the .c files and is included only by the in-tree C
|
|
15
|
+
* sources.
|
|
16
|
+
* ---------------------------------------------------------------------- */
|
|
17
|
+
#ifndef HYP_PAGE_CACHE_INTERNAL_H
|
|
18
|
+
#define HYP_PAGE_CACHE_INTERNAL_H
|
|
19
|
+
|
|
20
|
+
#include <stddef.h>
|
|
21
|
+
#include <sys/types.h>
|
|
22
|
+
|
|
23
|
+
#ifdef __cplusplus
|
|
24
|
+
extern "C" {
|
|
25
|
+
#endif
|
|
26
|
+
|
|
27
|
+
/* Method classification (mirrors `hyp_pc_method_t` in page_cache.c). The
|
|
28
|
+
* io_uring loop uses this via `pc_internal_classify_method` to decide
|
|
29
|
+
* how much of the cached response to write (HEAD = headers only, GET =
|
|
30
|
+
* full response). */
|
|
31
|
+
typedef enum {
|
|
32
|
+
PC_INTERNAL_METHOD_GET = 0,
|
|
33
|
+
PC_INTERNAL_METHOD_HEAD = 1,
|
|
34
|
+
PC_INTERNAL_METHOD_OTHER = 2
|
|
35
|
+
} pc_internal_method_t;
|
|
36
|
+
|
|
37
|
+
/* End-of-headers scanner. Returns the byte offset PAST the trailing
|
|
38
|
+
* CRLFCRLF, or -1 if not found. */
|
|
39
|
+
long pc_internal_find_eoh(const char *buf, size_t len);
|
|
40
|
+
|
|
41
|
+
/* Request-line parser. On success fills *m_off, *m_len, *p_off, *p_len
|
|
42
|
+
* with offsets/lengths of METHOD and PATH inside `buf`, and returns the
|
|
43
|
+
* length of the request line including the trailing CRLF. Returns -1
|
|
44
|
+
* on malformed input or non-HTTP/1.1 versions (HTTP/1.0 differs in
|
|
45
|
+
* keep-alive defaults; the caller must hand it off to Ruby). */
|
|
46
|
+
long pc_internal_parse_request_line(const char *buf, size_t len,
|
|
47
|
+
size_t *m_off, size_t *m_len,
|
|
48
|
+
size_t *p_off, size_t *p_len);
|
|
49
|
+
|
|
50
|
+
/* Header-block scanner. `start` and `end` bracket the headers section
|
|
51
|
+
* (between request-line end and the closing CRLFCRLF). Reports:
|
|
52
|
+
* *connection_close — Connection: close seen
|
|
53
|
+
* *has_body — non-zero Content-Length OR Transfer-Encoding
|
|
54
|
+
* *upgrade_seen — Upgrade or HTTP2-Settings seen
|
|
55
|
+
* Returns 0 on success, -1 on malformed framing. */
|
|
56
|
+
int pc_internal_scan_headers(const char *buf, size_t start, size_t end,
|
|
57
|
+
int *connection_close, int *has_body,
|
|
58
|
+
int *upgrade_seen);
|
|
59
|
+
|
|
60
|
+
/* Method classifier. Returns GET / HEAD / OTHER. */
|
|
61
|
+
pc_internal_method_t pc_internal_classify_method(const char *m, size_t len);
|
|
62
|
+
|
|
63
|
+
/* Snapshot the response bytes for `(path, kind)` into a freshly malloc'd
|
|
64
|
+
* buffer. On hit: returns the malloc'd buffer (caller must `free()` it)
|
|
65
|
+
* and writes the byte length into *out_len. On miss: returns NULL and
|
|
66
|
+
* sets *out_len = 0. The buffer is whatever the page cache's lookup
|
|
67
|
+
* picks given the recheck/staleness rules; the io_uring loop writes it
|
|
68
|
+
* verbatim. Takes the C-side cache lock briefly; releases it before
|
|
69
|
+
* returning. Returns NULL on OOM as well — the caller treats both as
|
|
70
|
+
* "couldn't serve from C, hand off to Ruby". */
|
|
71
|
+
char *pc_internal_snapshot_response(const char *path, size_t path_len,
|
|
72
|
+
pc_internal_method_t kind,
|
|
73
|
+
size_t *out_len);
|
|
74
|
+
|
|
75
|
+
/* Apply TCP_NODELAY to an accepted fd (best-effort; failures swallowed). */
|
|
76
|
+
void pc_internal_apply_tcp_nodelay(int fd);
|
|
77
|
+
|
|
78
|
+
/* Lifecycle hook fire wrapper. The io_uring loop calls this AFTER the
|
|
79
|
+
* write completion arrives so observers see a finished request. The
|
|
80
|
+
* C-side gate (`lifecycle_active`) is checked inside; the wrapper is
|
|
81
|
+
* a no-op when no callback is registered or the gate is off. Must be
|
|
82
|
+
* called under the GVL. */
|
|
83
|
+
void pc_internal_fire_lifecycle(const char *method, size_t mlen,
|
|
84
|
+
const char *path, size_t plen);
|
|
85
|
+
|
|
86
|
+
/* Whether the lifecycle gate is currently on. The io_uring loop reads
|
|
87
|
+
* this BEFORE re-acquiring the GVL — when it's off, the loop skips
|
|
88
|
+
* the rb_thread_call_with_gvl round-trip entirely. */
|
|
89
|
+
int pc_internal_lifecycle_active(void);
|
|
90
|
+
|
|
91
|
+
/* Handoff wrapper — invokes the registered Ruby callback with
|
|
92
|
+
* (fd, partial_buffer_or_nil). Must be called under the GVL. Closes
|
|
93
|
+
* the fd locally if no callback is registered or if the callback
|
|
94
|
+
* raised. */
|
|
95
|
+
void pc_internal_handoff(int client_fd, const char *partial, size_t partial_len);
|
|
96
|
+
|
|
97
|
+
/* Read the stop flag flipped by `PageCache.stop_accept_loop`. Both the
|
|
98
|
+
* 2.12-C accept4 loop AND the 2.12-D io_uring loop honour it as a
|
|
99
|
+
* graceful-shutdown signal. */
|
|
100
|
+
int pc_internal_stop_requested(void);
|
|
101
|
+
|
|
102
|
+
/* Reset the stop flag to 0. Called by the loop entry points
|
|
103
|
+
* (`run_static_accept_loop`, `run_static_io_uring_loop`) so a previous
|
|
104
|
+
* invocation's `stop_accept_loop` doesn't immediately tear down a
|
|
105
|
+
* fresh loop. Specs hammer this path between examples — the 2.12-C
|
|
106
|
+
* loop resets inline; the io_uring sibling needs the same surface. */
|
|
107
|
+
void pc_internal_reset_stop(void);
|
|
108
|
+
|
|
109
|
+
/* 2.12-E — bump the per-process served-request counter (atomic; safe
|
|
110
|
+
* to call from any thread / fiber / accept-loop context). Both the
|
|
111
|
+
* 2.12-C accept4 loop and the 2.12-D io_uring loop call this after
|
|
112
|
+
* a successful response write so the SO_REUSEPORT distribution audit
|
|
113
|
+
* (`PageCache.c_loop_requests_total`) sees ticks regardless of which
|
|
114
|
+
* loop variant is active. */
|
|
115
|
+
void pc_internal_tick_request(void);
|
|
116
|
+
|
|
117
|
+
/* 2.12-E — reset the per-process served-request counter. Mirrors the
|
|
118
|
+
* stop-flag reset rationale: loop entry points call this so a prior
|
|
119
|
+
* invocation's count doesn't bleed into the new loop's snapshot. */
|
|
120
|
+
void pc_internal_reset_requests_served(void);
|
|
121
|
+
|
|
122
|
+
/* The 64 KiB header-cap shared with `page_cache.c`. Re-declared here
|
|
123
|
+
* so io_uring_loop.c doesn't need to mirror the magic number. */
|
|
124
|
+
#ifndef PC_INTERNAL_MAX_HEADER_BYTES
|
|
125
|
+
#define PC_INTERNAL_MAX_HEADER_BYTES 65536
|
|
126
|
+
#endif
|
|
127
|
+
|
|
128
|
+
#ifdef __cplusplus
|
|
129
|
+
}
|
|
130
|
+
#endif
|
|
131
|
+
|
|
132
|
+
#endif /* HYP_PAGE_CACHE_INTERNAL_H */
|
data/lib/hyperion/connection.rb
CHANGED
|
@@ -135,6 +135,12 @@ module Hyperion
|
|
|
135
135
|
# keep the existing pattern of caching boot-time refs as ivars so
|
|
136
136
|
# the per-request observe stays a single Hash lookup.
|
|
137
137
|
@path_templater = path_templater || Hyperion::Metrics.default_path_templater
|
|
138
|
+
# 2.12-E — per-worker request counter label. Cached once per
|
|
139
|
+
# Connection (Process.pid is process-constant — re-reading it per
|
|
140
|
+
# request would allocate the to_s String every time the operator
|
|
141
|
+
# asked Ruby for the symbol/label). Each Connection lives in
|
|
142
|
+
# exactly one process, so the cache is tight and never stale.
|
|
143
|
+
@worker_id = Process.pid.to_s
|
|
138
144
|
# 2.10-D — direct-dispatch route table. The hot-path lookup
|
|
139
145
|
# is `@route_table&.lookup(method, path)` so the nil-default
|
|
140
146
|
# case (no operator-registered direct routes — the
|
|
@@ -307,6 +313,14 @@ module Hyperion
|
|
|
307
313
|
@metrics.increment(:bytes_read, body_end)
|
|
308
314
|
@metrics.increment(:requests_total)
|
|
309
315
|
@metrics.increment(:requests_in_flight)
|
|
316
|
+
# 2.12-E — per-worker request counter for the SO_REUSEPORT
|
|
317
|
+
# load-balancing audit. Worker_id is the OS pid (matches the
|
|
318
|
+
# 2.4-C `hyperion_io_uring_workers_active` convention). Single
|
|
319
|
+
# location for every Ruby-side dispatch shape: regular Rack
|
|
320
|
+
# via `dispatch_request`, direct dispatch via `dispatch_direct!`,
|
|
321
|
+
# and the StaticEntry fast path via `dispatch_direct_static!`
|
|
322
|
+
# all flow through this point in `serve`.
|
|
323
|
+
@metrics.tick_worker_request(@worker_id)
|
|
310
324
|
# 2.4-C: capture start time for the per-route duration histogram.
|
|
311
325
|
# Same Process.clock_gettime that the access-log path was already
|
|
312
326
|
# paying — at default-ON log_requests the second call here is
|
|
@@ -46,8 +46,26 @@ module Hyperion
|
|
|
46
46
|
# escape hatch via `env['hyperion.dispatch_mode']
|
|
47
47
|
# = :inline_blocking` for routes the auto-
|
|
48
48
|
# detect doesn't catch.
|
|
49
|
+
# 2.12-C — `:c_accept_loop_h1` is a connection-wide mode (NOT a
|
|
50
|
+
# per-response override): the entire accept-and-serve loop runs in
|
|
51
|
+
# C via `Hyperion::Http::PageCache.run_static_accept_loop`. Engaged
|
|
52
|
+
# only when the operator's route table is composed entirely of
|
|
53
|
+
# `Server.handle_static`-registered routes AND the listener is
|
|
54
|
+
# plain TCP. Counted under `:requests_dispatch_c_accept_loop_h1`
|
|
55
|
+
# at engage time (one bump per worker boot) so operators can see
|
|
56
|
+
# the path is on without scraping the per-request `:c_accept_loop_requests`
|
|
57
|
+
# counter.
|
|
58
|
+
# 2.12-D — `:c_accept_loop_io_uring_h1` is a sibling of
|
|
59
|
+
# `:c_accept_loop_h1`. Engaged when the operator opts into
|
|
60
|
+
# `HYPERION_IO_URING_ACCEPT=1` AND the C ext was compiled with
|
|
61
|
+
# liburing AND the runtime probe succeeded. Same eligibility gates
|
|
62
|
+
# as `:c_accept_loop_h1` (handle_static-only routes, plain TCP),
|
|
63
|
+
# different syscall shape (single `io_uring_enter` per N requests
|
|
64
|
+
# vs. N×3 syscalls). Counted under
|
|
65
|
+
# `:requests_dispatch_c_accept_loop_io_uring_h1` so operators can
|
|
66
|
+
# confirm the path is on without scraping logs.
|
|
49
67
|
MODES = %i[tls_h2 tls_h1_inline async_io_h1_inline threadpool_h1 inline_h1_no_pool
|
|
50
|
-
inline_blocking].freeze
|
|
68
|
+
inline_blocking c_accept_loop_h1 c_accept_loop_io_uring_h1].freeze
|
|
51
69
|
|
|
52
70
|
INLINE_MODES = %i[tls_h1_inline async_io_h1_inline inline_h1_no_pool inline_blocking].freeze
|
|
53
71
|
|
data/lib/hyperion/h2_codec.rb
CHANGED
|
@@ -45,11 +45,36 @@ module Hyperion
|
|
|
45
45
|
@cglue_available == true
|
|
46
46
|
end
|
|
47
47
|
|
|
48
|
+
# 2.11-B — operator-controllable gate that overlays CGlue
|
|
49
|
+
# availability. The Encoder/Decoder hot paths probe this (NOT
|
|
50
|
+
# `cglue_available?`) so a `HYPERION_H2_NATIVE_HPACK=v2` boot can
|
|
51
|
+
# force the Fiddle path even on a host where the C glue loaded
|
|
52
|
+
# successfully. This is the bench-isolation knob 2.11-B's
|
|
53
|
+
# `bench/h2_rails_shape.sh` needs to compare native-v2 against
|
|
54
|
+
# native-v3 honestly — without it, "native" and "cglue" variants
|
|
55
|
+
# would always pick the same physical path.
|
|
56
|
+
#
|
|
57
|
+
# `Http2Handler#initialize` writes the gate based on the env var;
|
|
58
|
+
# tests can flip `@cglue_disabled` directly. Default false (i.e.,
|
|
59
|
+
# gate is OPEN — same physical behavior as 2.4-A through 2.10).
|
|
60
|
+
def self.cglue_active?
|
|
61
|
+
cglue_available? && !@cglue_disabled
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def self.cglue_disabled=(value)
|
|
65
|
+
@cglue_disabled = value ? true : false
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def self.cglue_disabled
|
|
69
|
+
@cglue_disabled == true
|
|
70
|
+
end
|
|
71
|
+
|
|
48
72
|
# Force a reload (test seam). Unsets the memoized state so the next
|
|
49
73
|
# `available?` call probes the filesystem again.
|
|
50
74
|
def self.reset!
|
|
51
75
|
@available = nil
|
|
52
76
|
@cglue_available = nil
|
|
77
|
+
@cglue_disabled = false
|
|
53
78
|
@lib = nil
|
|
54
79
|
end
|
|
55
80
|
|
|
@@ -126,7 +151,13 @@ module Hyperion
|
|
|
126
151
|
# into a new owned String — that's the contract callers rely
|
|
127
152
|
# on (`protocol-http2`'s Compressor#encode returns a String,
|
|
128
153
|
# not a slice into shared mutable memory).
|
|
129
|
-
|
|
154
|
+
#
|
|
155
|
+
# 2.11-B — probe `cglue_active?` (NOT `cglue_available?`) so an
|
|
156
|
+
# operator-set `HYPERION_H2_NATIVE_HPACK=v2` boot routes through
|
|
157
|
+
# Fiddle even when the C glue is physically present. Same
|
|
158
|
+
# branch shape; one extra ivar read on the hot path which
|
|
159
|
+
# disappears under YJIT inlining.
|
|
160
|
+
if H2Codec.cglue_active?
|
|
130
161
|
# Pad the scratch String with zero bytes so its length matches
|
|
131
162
|
# capacity — the C ext writes into RSTRING_PTR up to RSTRING_LEN
|
|
132
163
|
# and then truncates back via rb_str_set_len after encoding.
|
|
@@ -272,7 +303,8 @@ module Hyperion
|
|
|
272
303
|
# 2.4-A — fast path: reuse a per-decoder scratch and dispatch
|
|
273
304
|
# through the C glue. The Rust ABI writes `[u32 name_len][name]
|
|
274
305
|
# [u32 val_len][val]` repeated; we unpack that in Ruby.
|
|
275
|
-
|
|
306
|
+
# 2.11-B — `cglue_active?` overlays an operator-set v2 force.
|
|
307
|
+
if H2Codec.cglue_active?
|
|
276
308
|
if capacity > @scratch_out_capacity
|
|
277
309
|
new_cap = @scratch_out_capacity
|
|
278
310
|
new_cap *= 2 while new_cap < capacity
|
|
@@ -412,9 +444,24 @@ module Hyperion
|
|
|
412
444
|
def self.candidate_paths
|
|
413
445
|
gem_lib = File.expand_path('../hyperion_h2_codec', __dir__)
|
|
414
446
|
ext_target = File.expand_path('../../ext/hyperion_h2_codec/target/release', __dir__)
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
447
|
+
# 2.11-B fix: order suffixes by host OS. Pre-2.11-B this was a
|
|
448
|
+
# static `[dylib, so]` order, which broke on Linux hosts that
|
|
449
|
+
# had a stale macOS `.dylib` on the path (e.g. a developer rsync
|
|
450
|
+
# leaking the `target/release` artifact across platforms). Fiddle
|
|
451
|
+
# would try the `.dylib` first, choke on the Mach-O binary with
|
|
452
|
+
# `ArgumentError: invalid byte sequence in UTF-8` from libffi,
|
|
453
|
+
# and the rescue in `load!` would silently fall back to the Ruby
|
|
454
|
+
# HPACK path with no warning visible to bench harnesses.
|
|
455
|
+
#
|
|
456
|
+
# Ordering by `host_os` makes Linux pick `.so` first and ignore
|
|
457
|
+
# any orphan `.dylib`; macOS keeps the `.dylib`-first behavior
|
|
458
|
+
# for back-compat with existing dev environments.
|
|
459
|
+
suffixes = if /darwin|mac/i.match?(RbConfig::CONFIG['host_os'])
|
|
460
|
+
%w[libhyperion_h2_codec.dylib libhyperion_h2_codec.so]
|
|
461
|
+
else
|
|
462
|
+
%w[libhyperion_h2_codec.so libhyperion_h2_codec.dylib]
|
|
463
|
+
end
|
|
464
|
+
suffixes.flat_map { |name| [File.join(gem_lib, name), File.join(ext_target, name)] }
|
|
418
465
|
end
|
|
419
466
|
|
|
420
467
|
# FFI wrappers — kept thin so callers don't see Fiddle::Pointer
|