hyperion-rb 2.11.0 → 2.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1079 -0
- data/README.md +220 -5
- data/ext/hyperion_http/extconf.rb +41 -0
- data/ext/hyperion_http/io_uring_loop.c +710 -0
- data/ext/hyperion_http/page_cache.c +1032 -0
- data/ext/hyperion_http/page_cache_internal.h +132 -0
- data/ext/hyperion_http/parser.c +382 -51
- data/lib/hyperion/adapter/rack.rb +18 -4
- data/lib/hyperion/connection.rb +78 -3
- data/lib/hyperion/dispatch_mode.rb +19 -1
- data/lib/hyperion/http2_handler.rb +458 -13
- data/lib/hyperion/metrics.rb +212 -38
- data/lib/hyperion/prometheus_exporter.rb +76 -1
- data/lib/hyperion/server/connection_loop.rb +159 -0
- data/lib/hyperion/server.rb +183 -0
- data/lib/hyperion/thread_pool.rb +23 -7
- data/lib/hyperion/version.rb +1 -1
- metadata +4 -1
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
/* ----------------------------------------------------------------------
|
|
2
|
+
* page_cache_internal.h — internal C-ext sharing surface.
|
|
3
|
+
*
|
|
4
|
+
* 2.12-D — exposes the request-parsing + lookup + write helpers built by
|
|
5
|
+
* `page_cache.c`'s C accept loop so the io_uring sibling
|
|
6
|
+
* (`io_uring_loop.c`) can reuse them rather than copy-pasting. The
|
|
7
|
+
* helpers stay `static` inside `page_cache.c` and the symbols below are
|
|
8
|
+
* thin extern wrappers — one indirection per call, but the io_uring
|
|
9
|
+
* loop calls them at most once per request, so the cost is negligible
|
|
10
|
+
* (single-direct-call jump) compared to the syscall savings the loop
|
|
11
|
+
* delivers.
|
|
12
|
+
*
|
|
13
|
+
* NOT public surface. NOT installed in any include path. The header
|
|
14
|
+
* lives next to the .c files and is included only by the in-tree C
|
|
15
|
+
* sources.
|
|
16
|
+
* ---------------------------------------------------------------------- */
|
|
17
|
+
#ifndef HYP_PAGE_CACHE_INTERNAL_H
|
|
18
|
+
#define HYP_PAGE_CACHE_INTERNAL_H
|
|
19
|
+
|
|
20
|
+
#include <stddef.h>
|
|
21
|
+
#include <sys/types.h>
|
|
22
|
+
|
|
23
|
+
#ifdef __cplusplus
|
|
24
|
+
extern "C" {
|
|
25
|
+
#endif
|
|
26
|
+
|
|
27
|
+
/* Method classification (mirrors `hyp_pc_method_t` in page_cache.c). The
|
|
28
|
+
* io_uring loop uses this via `pc_internal_classify_method` to decide
|
|
29
|
+
* how much of the cached response to write (HEAD = headers only, GET =
|
|
30
|
+
* full response). */
|
|
31
|
+
typedef enum {
|
|
32
|
+
PC_INTERNAL_METHOD_GET = 0,
|
|
33
|
+
PC_INTERNAL_METHOD_HEAD = 1,
|
|
34
|
+
PC_INTERNAL_METHOD_OTHER = 2
|
|
35
|
+
} pc_internal_method_t;
|
|
36
|
+
|
|
37
|
+
/* End-of-headers scanner. Returns the byte offset PAST the trailing
|
|
38
|
+
* CRLFCRLF, or -1 if not found. */
|
|
39
|
+
long pc_internal_find_eoh(const char *buf, size_t len);
|
|
40
|
+
|
|
41
|
+
/* Request-line parser. On success fills *m_off, *m_len, *p_off, *p_len
|
|
42
|
+
* with offsets/lengths of METHOD and PATH inside `buf`, and returns the
|
|
43
|
+
* length of the request line including the trailing CRLF. Returns -1
|
|
44
|
+
* on malformed input or non-HTTP/1.1 versions (HTTP/1.0 differs in
|
|
45
|
+
* keep-alive defaults; the caller must hand it off to Ruby). */
|
|
46
|
+
long pc_internal_parse_request_line(const char *buf, size_t len,
|
|
47
|
+
size_t *m_off, size_t *m_len,
|
|
48
|
+
size_t *p_off, size_t *p_len);
|
|
49
|
+
|
|
50
|
+
/* Header-block scanner. `start` and `end` bracket the headers section
|
|
51
|
+
* (between request-line end and the closing CRLFCRLF). Reports:
|
|
52
|
+
* *connection_close — Connection: close seen
|
|
53
|
+
* *has_body — non-zero Content-Length OR Transfer-Encoding
|
|
54
|
+
* *upgrade_seen — Upgrade or HTTP2-Settings seen
|
|
55
|
+
* Returns 0 on success, -1 on malformed framing. */
|
|
56
|
+
int pc_internal_scan_headers(const char *buf, size_t start, size_t end,
|
|
57
|
+
int *connection_close, int *has_body,
|
|
58
|
+
int *upgrade_seen);
|
|
59
|
+
|
|
60
|
+
/* Method classifier. Returns GET / HEAD / OTHER. */
|
|
61
|
+
pc_internal_method_t pc_internal_classify_method(const char *m, size_t len);
|
|
62
|
+
|
|
63
|
+
/* Snapshot the response bytes for `(path, kind)` into a freshly malloc'd
|
|
64
|
+
* buffer. On hit: returns the malloc'd buffer (caller must `free()` it)
|
|
65
|
+
* and writes the byte length into *out_len. On miss: returns NULL and
|
|
66
|
+
* sets *out_len = 0. The buffer is whatever the page cache's lookup
|
|
67
|
+
* picks given the recheck/staleness rules; the io_uring loop writes it
|
|
68
|
+
* verbatim. Takes the C-side cache lock briefly; releases it before
|
|
69
|
+
* returning. Returns NULL on OOM as well — the caller treats both as
|
|
70
|
+
* "couldn't serve from C, hand off to Ruby". */
|
|
71
|
+
char *pc_internal_snapshot_response(const char *path, size_t path_len,
|
|
72
|
+
pc_internal_method_t kind,
|
|
73
|
+
size_t *out_len);
|
|
74
|
+
|
|
75
|
+
/* Apply TCP_NODELAY to an accepted fd (best-effort; failures swallowed). */
|
|
76
|
+
void pc_internal_apply_tcp_nodelay(int fd);
|
|
77
|
+
|
|
78
|
+
/* Lifecycle hook fire wrapper. The io_uring loop calls this AFTER the
|
|
79
|
+
* write completion arrives so observers see a finished request. The
|
|
80
|
+
* C-side gate (`lifecycle_active`) is checked inside; the wrapper is
|
|
81
|
+
* a no-op when no callback is registered or the gate is off. Must be
|
|
82
|
+
* called under the GVL. */
|
|
83
|
+
void pc_internal_fire_lifecycle(const char *method, size_t mlen,
|
|
84
|
+
const char *path, size_t plen);
|
|
85
|
+
|
|
86
|
+
/* Whether the lifecycle gate is currently on. The io_uring loop reads
|
|
87
|
+
* this BEFORE re-acquiring the GVL — when it's off, the loop skips
|
|
88
|
+
* the rb_thread_call_with_gvl round-trip entirely. */
|
|
89
|
+
int pc_internal_lifecycle_active(void);
|
|
90
|
+
|
|
91
|
+
/* Handoff wrapper — invokes the registered Ruby callback with
|
|
92
|
+
* (fd, partial_buffer_or_nil). Must be called under the GVL. Closes
|
|
93
|
+
* the fd locally if no callback is registered or if the callback
|
|
94
|
+
* raised. */
|
|
95
|
+
void pc_internal_handoff(int client_fd, const char *partial, size_t partial_len);
|
|
96
|
+
|
|
97
|
+
/* Read the stop flag flipped by `PageCache.stop_accept_loop`. Both the
|
|
98
|
+
* 2.12-C accept4 loop AND the 2.12-D io_uring loop honour it as a
|
|
99
|
+
* graceful-shutdown signal. */
|
|
100
|
+
int pc_internal_stop_requested(void);
|
|
101
|
+
|
|
102
|
+
/* Reset the stop flag to 0. Called by the loop entry points
|
|
103
|
+
* (`run_static_accept_loop`, `run_static_io_uring_loop`) so a previous
|
|
104
|
+
* invocation's `stop_accept_loop` doesn't immediately tear down a
|
|
105
|
+
* fresh loop. Specs hammer this path between examples — the 2.12-C
|
|
106
|
+
* loop resets inline; the io_uring sibling needs the same surface. */
|
|
107
|
+
void pc_internal_reset_stop(void);
|
|
108
|
+
|
|
109
|
+
/* 2.12-E — bump the per-process served-request counter (atomic; safe
|
|
110
|
+
* to call from any thread / fiber / accept-loop context). Both the
|
|
111
|
+
* 2.12-C accept4 loop and the 2.12-D io_uring loop call this after
|
|
112
|
+
* a successful response write so the SO_REUSEPORT distribution audit
|
|
113
|
+
* (`PageCache.c_loop_requests_total`) sees ticks regardless of which
|
|
114
|
+
* loop variant is active. */
|
|
115
|
+
void pc_internal_tick_request(void);
|
|
116
|
+
|
|
117
|
+
/* 2.12-E — reset the per-process served-request counter. Mirrors the
|
|
118
|
+
* stop-flag reset rationale: loop entry points call this so a prior
|
|
119
|
+
* invocation's count doesn't bleed into the new loop's snapshot. */
|
|
120
|
+
void pc_internal_reset_requests_served(void);
|
|
121
|
+
|
|
122
|
+
/* The 64 KiB header-cap shared with `page_cache.c`. Re-declared here
|
|
123
|
+
* so io_uring_loop.c doesn't need to mirror the magic number. */
|
|
124
|
+
#ifndef PC_INTERNAL_MAX_HEADER_BYTES
|
|
125
|
+
#define PC_INTERNAL_MAX_HEADER_BYTES 65536
|
|
126
|
+
#endif
|
|
127
|
+
|
|
128
|
+
#ifdef __cplusplus
|
|
129
|
+
}
|
|
130
|
+
#endif
|
|
131
|
+
|
|
132
|
+
#endif /* HYP_PAGE_CACHE_INTERNAL_H */
|
data/ext/hyperion_http/parser.c
CHANGED
|
@@ -442,6 +442,327 @@ static VALUE cparser_parse(VALUE self, VALUE buffer) {
|
|
|
442
442
|
return rb_ary_new_from_args(2, request, ULONG2NUM((unsigned long)consumed));
|
|
443
443
|
}
|
|
444
444
|
|
|
445
|
+
/* 2.13-B — pre-baked status-line table for the most common HTTP status codes.
|
|
446
|
+
* The full "HTTP/1.1 NNN <reason>\r\n" line is a constant for any (status,
|
|
447
|
+
* reason) pair the server emits on the hot path, so we sidestep the
|
|
448
|
+
* per-request `snprintf("HTTP/1.1 %d ", status)` + reason-cat by switching
|
|
449
|
+
* on `status` and emitting a single literal-bytes cat. A non-cached status
|
|
450
|
+
* (or a non-default reason — operator override) still falls through to the
|
|
451
|
+
* generic snprintf path below. The table covers every code in
|
|
452
|
+
* `Hyperion::ResponseWriter::REASONS`. */
|
|
453
|
+
struct status_line {
|
|
454
|
+
int status;
|
|
455
|
+
const char *bytes;
|
|
456
|
+
long len; /* strlen of bytes (filled at extension load) */
|
|
457
|
+
};
|
|
458
|
+
|
|
459
|
+
#define STATUS_LINE(code, reason) { (code), "HTTP/1.1 " #code " " reason "\r\n", 0 }
|
|
460
|
+
static struct status_line k_status_lines[] = {
|
|
461
|
+
STATUS_LINE(200, "OK"),
|
|
462
|
+
STATUS_LINE(201, "Created"),
|
|
463
|
+
STATUS_LINE(204, "No Content"),
|
|
464
|
+
STATUS_LINE(301, "Moved Permanently"),
|
|
465
|
+
STATUS_LINE(302, "Found"),
|
|
466
|
+
STATUS_LINE(304, "Not Modified"),
|
|
467
|
+
STATUS_LINE(400, "Bad Request"),
|
|
468
|
+
STATUS_LINE(401, "Unauthorized"),
|
|
469
|
+
STATUS_LINE(403, "Forbidden"),
|
|
470
|
+
STATUS_LINE(404, "Not Found"),
|
|
471
|
+
STATUS_LINE(405, "Method Not Allowed"),
|
|
472
|
+
STATUS_LINE(408, "Request Timeout"),
|
|
473
|
+
STATUS_LINE(409, "Conflict"),
|
|
474
|
+
STATUS_LINE(410, "Gone"),
|
|
475
|
+
STATUS_LINE(413, "Payload Too Large"),
|
|
476
|
+
STATUS_LINE(414, "URI Too Long"),
|
|
477
|
+
STATUS_LINE(422, "Unprocessable Entity"),
|
|
478
|
+
STATUS_LINE(429, "Too Many Requests"),
|
|
479
|
+
STATUS_LINE(500, "Internal Server Error"),
|
|
480
|
+
STATUS_LINE(501, "Not Implemented"),
|
|
481
|
+
STATUS_LINE(502, "Bad Gateway"),
|
|
482
|
+
STATUS_LINE(503, "Service Unavailable"),
|
|
483
|
+
STATUS_LINE(504, "Gateway Timeout"),
|
|
484
|
+
{ 0, NULL, 0 }
|
|
485
|
+
};
|
|
486
|
+
#undef STATUS_LINE
|
|
487
|
+
|
|
488
|
+
/* Lookup a pre-baked status line by (status, reason). Returns NULL if
|
|
489
|
+
* the status isn't in the table OR the operator passed a custom reason
|
|
490
|
+
* phrase that doesn't match the table's default — in either case the
|
|
491
|
+
* caller falls through to the generic snprintf path. The reason match
|
|
492
|
+
* uses memcmp (NOT case-insensitive) — apps overriding to a different
|
|
493
|
+
* casing get the safe fallback rather than a wire-string mismatch. */
|
|
494
|
+
static const struct status_line *lookup_status_line(int status,
|
|
495
|
+
const char *reason_ptr,
|
|
496
|
+
long reason_len) {
|
|
497
|
+
for (struct status_line *e = k_status_lines; e->bytes != NULL; e++) {
|
|
498
|
+
if (e->status != status) continue;
|
|
499
|
+
/* Format of e->bytes: "HTTP/1.1 NNN <reason>\r\n". The reason
|
|
500
|
+
* starts at offset 13 (9 bytes "HTTP/1.1 " + 3 bytes status + 1
|
|
501
|
+
* byte space) and has length e->len - 13 - 2 (strip trailing CRLF). */
|
|
502
|
+
long table_reason_len = e->len - 13 - 2;
|
|
503
|
+
if (table_reason_len != reason_len) return NULL;
|
|
504
|
+
if (memcmp(e->bytes + 13, reason_ptr, reason_len) != 0) return NULL;
|
|
505
|
+
return e;
|
|
506
|
+
}
|
|
507
|
+
return NULL;
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
/* 2.13-B — hand-rolled positive-integer-to-decimal-ASCII writer. snprintf is
|
|
511
|
+
* 1 % of CPU on the CPU-JSON workload (per perf -F 199 -g sampling);
|
|
512
|
+
* `body_size` is always non-negative (bytesize of a buffered body) so the
|
|
513
|
+
* sign branch + locale logic in vfprintf are pure overhead. Writes the
|
|
514
|
+
* digits backwards into a 24-byte scratch then returns the offset+length
|
|
515
|
+
* pair so the caller can rb_str_cat without reordering. */
|
|
516
|
+
static int itoa_positive_decimal(long n, char *out, int out_size) {
|
|
517
|
+
/* out_size is the buffer; we fill from the right edge. */
|
|
518
|
+
int i = out_size;
|
|
519
|
+
if (n == 0) {
|
|
520
|
+
out[--i] = '0';
|
|
521
|
+
return i;
|
|
522
|
+
}
|
|
523
|
+
while (n > 0 && i > 0) {
|
|
524
|
+
out[--i] = (char)('0' + (n % 10));
|
|
525
|
+
n /= 10;
|
|
526
|
+
}
|
|
527
|
+
return i;
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
/* 2.13-B — per-key downcase result cache. Operators overwhelmingly call
|
|
531
|
+
* `build_response_head` with a fixed set of frozen-literal header keys
|
|
532
|
+
* (`'content-type'`, `'cache-control'`, etc.) — same String VALUE every
|
|
533
|
+
* request. Re-running `String#downcase` per call allocates a fresh
|
|
534
|
+
* lowercase String + crosses the FFI boundary; for `n_headers=4` that's
|
|
535
|
+
* 4 allocs + 4 method dispatches per response. The cache keys on the
|
|
536
|
+
* input String's object_id and stores the lowercase VALUE, the
|
|
537
|
+
* pre-built `lc + ": "` prefix line, and the cached length. Cap at 64
|
|
538
|
+
* entries so a misbehaving app emitting a unique `x-trace-<uuid>` key
|
|
539
|
+
* per request can't grow the cache without bound — it just falls
|
|
540
|
+
* through to the slow path on overflow.
|
|
541
|
+
*
|
|
542
|
+
* Pinning: each cached VALUE is anchored in a Ruby Array (`rb_aHeaderKeyCache`)
|
|
543
|
+
* registered as a global. The cache itself is an `st_table` keyed by
|
|
544
|
+
* VALUE bits (the input frozen String's id, since it's frozen and safe
|
|
545
|
+
* to reference forever). */
|
|
546
|
+
#define HEADER_KEY_CACHE_MAX 64
|
|
547
|
+
typedef struct {
|
|
548
|
+
VALUE key; /* original frozen input String */
|
|
549
|
+
VALUE lc; /* lowercase form (may be == key when already lowercase) */
|
|
550
|
+
VALUE prefix; /* "<lc>: " — pre-built byte buffer ready to cat */
|
|
551
|
+
long lc_len;
|
|
552
|
+
} header_key_cache_entry_t;
|
|
553
|
+
|
|
554
|
+
static st_table *g_header_key_cache = NULL;
|
|
555
|
+
static VALUE rb_aHeaderKeyAnchor; /* keeps cached VALUEs alive */
|
|
556
|
+
|
|
557
|
+
/* 2.13-B — full header-line cache. When BOTH the key AND the value of a
|
|
558
|
+
* header are frozen-literal Strings (the overwhelmingly common case for
|
|
559
|
+
* fixed Rack apps: `'cache-control' => 'no-store'`,
|
|
560
|
+
* `'content-type' => 'application/json'`), the entire wire line
|
|
561
|
+
* `"<lc-key>: <value>\r\n"` is identical every request. Cache it keyed
|
|
562
|
+
* on `(key.object_id, value.object_id)`; on hit the entire emit is one
|
|
563
|
+
* `rb_str_cat`. Same 64-entry cap + same anchor-Array pinning as the
|
|
564
|
+
* key cache. The `value` slot pins the original value VALUE so the
|
|
565
|
+
* frozen literal isn't reclaimed. */
|
|
566
|
+
#define HEADER_LINE_CACHE_MAX 256
|
|
567
|
+
typedef struct {
|
|
568
|
+
/* Two-word key: input key VALUE bits + value VALUE bits. */
|
|
569
|
+
VALUE key_v;
|
|
570
|
+
VALUE val_v;
|
|
571
|
+
VALUE line; /* "<lc-key>: <value>\r\n" buffer */
|
|
572
|
+
long line_len;
|
|
573
|
+
int is_date; /* 1 if lc-key == "date" — caller skips the date tail */
|
|
574
|
+
} header_line_cache_entry_t;
|
|
575
|
+
|
|
576
|
+
static st_table *g_header_line_cache = NULL;
|
|
577
|
+
static VALUE rb_aHeaderLineAnchor;
|
|
578
|
+
|
|
579
|
+
static st_index_t header_line_cache_hash(st_data_t a) {
|
|
580
|
+
/* Combine the two VALUEs via a simple xor+mul mix. The VALUEs are
|
|
581
|
+
* pointers to frozen Strings — the low 3 bits are alignment so we
|
|
582
|
+
* shift before mixing to avoid trivial collisions. */
|
|
583
|
+
const header_line_cache_entry_t *e = (const header_line_cache_entry_t *)a;
|
|
584
|
+
st_data_t x = ((st_data_t)e->key_v >> 3) * 0x9E3779B97F4A7C15ULL;
|
|
585
|
+
st_data_t y = ((st_data_t)e->val_v >> 3) * 0xBF58476D1CE4E5B9ULL;
|
|
586
|
+
return (st_index_t)(x ^ y);
|
|
587
|
+
}
|
|
588
|
+
static int header_line_cache_cmp(st_data_t a, st_data_t b) {
|
|
589
|
+
const header_line_cache_entry_t *ea = (const header_line_cache_entry_t *)a;
|
|
590
|
+
const header_line_cache_entry_t *eb = (const header_line_cache_entry_t *)b;
|
|
591
|
+
/* st returns 0 on match (same as memcmp). */
|
|
592
|
+
return !(ea->key_v == eb->key_v && ea->val_v == eb->val_v);
|
|
593
|
+
}
|
|
594
|
+
static const struct st_hash_type header_line_cache_type = {
|
|
595
|
+
header_line_cache_cmp,
|
|
596
|
+
header_line_cache_hash
|
|
597
|
+
};
|
|
598
|
+
|
|
599
|
+
/* Reuse the same cap-and-anchor strategy from the key cache. Look up by
|
|
600
|
+
* a stack-allocated probe entry; on miss + room, allocate a new entry
|
|
601
|
+
* and st_insert. */
|
|
602
|
+
static const header_line_cache_entry_t *header_line_cache_lookup(VALUE key, VALUE val) {
|
|
603
|
+
if (g_header_line_cache == NULL) return NULL;
|
|
604
|
+
header_line_cache_entry_t probe = { key, val, Qnil, 0, 0 };
|
|
605
|
+
st_data_t found_data;
|
|
606
|
+
if (st_lookup(g_header_line_cache, (st_data_t)&probe, &found_data)) {
|
|
607
|
+
return (const header_line_cache_entry_t *)found_data;
|
|
608
|
+
}
|
|
609
|
+
return NULL;
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
/* Lookup-or-build for the per-key downcase cache. Fast path: st hit, return
|
|
613
|
+
* the cached entry. Slow path: cap-bound check, freeze + lowercase the key,
|
|
614
|
+
* build the "<lc>: " prefix String, anchor both in rb_aHeaderKeyAnchor,
|
|
615
|
+
* st_insert. The anchor Array keeps the VALUEs alive across GC.
|
|
616
|
+
*
|
|
617
|
+
* Returns NULL when the cache is full AND the input key isn't already
|
|
618
|
+
* lowercase + already short — caller falls through to the per-call
|
|
619
|
+
* downcase path. */
|
|
620
|
+
static const header_key_cache_entry_t *header_key_cache_lookup(VALUE key_v) {
|
|
621
|
+
if (g_header_key_cache != NULL) {
|
|
622
|
+
st_data_t found_data;
|
|
623
|
+
if (st_lookup(g_header_key_cache, (st_data_t)key_v, &found_data)) {
|
|
624
|
+
return (const header_key_cache_entry_t *)found_data;
|
|
625
|
+
}
|
|
626
|
+
if (g_header_key_cache->num_entries >= HEADER_KEY_CACHE_MAX) {
|
|
627
|
+
return NULL; /* don't grow past cap */
|
|
628
|
+
}
|
|
629
|
+
} else {
|
|
630
|
+
g_header_key_cache = st_init_numtable();
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
/* Build the entry. Coerce to String, downcase, freeze, build prefix. */
|
|
634
|
+
VALUE k_s = rb_obj_as_string(key_v);
|
|
635
|
+
VALUE k_lower = rb_funcall(k_s, id_downcase, 0);
|
|
636
|
+
if (!OBJ_FROZEN(k_lower)) k_lower = rb_obj_freeze(k_lower);
|
|
637
|
+
|
|
638
|
+
long lc_len = RSTRING_LEN(k_lower);
|
|
639
|
+
VALUE prefix = rb_str_buf_new(lc_len + 2);
|
|
640
|
+
rb_str_cat(prefix, RSTRING_PTR(k_lower), lc_len);
|
|
641
|
+
rb_str_cat(prefix, ": ", 2);
|
|
642
|
+
rb_obj_freeze(prefix);
|
|
643
|
+
|
|
644
|
+
header_key_cache_entry_t *e = ALLOC(header_key_cache_entry_t);
|
|
645
|
+
e->key = key_v;
|
|
646
|
+
e->lc = k_lower;
|
|
647
|
+
e->prefix = prefix;
|
|
648
|
+
e->lc_len = lc_len;
|
|
649
|
+
|
|
650
|
+
/* Pin the VALUEs (key isn't ours to extend lifetime of, but lc/prefix
|
|
651
|
+
* are; rooting all three in the anchor Array is simplest + safest). */
|
|
652
|
+
rb_ary_push(rb_aHeaderKeyAnchor, key_v);
|
|
653
|
+
rb_ary_push(rb_aHeaderKeyAnchor, k_lower);
|
|
654
|
+
rb_ary_push(rb_aHeaderKeyAnchor, prefix);
|
|
655
|
+
|
|
656
|
+
st_insert(g_header_key_cache, (st_data_t)key_v, (st_data_t)e);
|
|
657
|
+
return e;
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
/* foreach state for the response-head builder. Threads the response buffer
|
|
661
|
+
* + framing flags through `rb_hash_foreach`. Errors propagate via
|
|
662
|
+
* `rb_raise` (longjmp-safe; the foreach unwinds and the buffer's RBasic
|
|
663
|
+
* pinning lets GC reclaim it). */
|
|
664
|
+
typedef struct {
|
|
665
|
+
VALUE buf;
|
|
666
|
+
int has_date;
|
|
667
|
+
} build_head_state_t;
|
|
668
|
+
|
|
669
|
+
static int build_head_each(VALUE k, VALUE v, VALUE arg) {
|
|
670
|
+
build_head_state_t *st = (build_head_state_t *)arg;
|
|
671
|
+
|
|
672
|
+
/* Full-line cache fast path: BOTH key AND value are frozen-literal
|
|
673
|
+
* Strings AND the (key, value) pair is already cached. ONE rb_str_cat
|
|
674
|
+
* consumes the entire prebuilt "<lc-key>: <value>\r\n" line. */
|
|
675
|
+
if (TYPE(k) == T_STRING && TYPE(v) == T_STRING &&
|
|
676
|
+
OBJ_FROZEN_RAW(k) && OBJ_FROZEN_RAW(v)) {
|
|
677
|
+
const header_line_cache_entry_t *line_e = header_line_cache_lookup(k, v);
|
|
678
|
+
if (line_e != NULL) {
|
|
679
|
+
rb_str_cat(st->buf, RSTRING_PTR(line_e->line), line_e->line_len);
|
|
680
|
+
if (line_e->is_date) st->has_date = 1;
|
|
681
|
+
return ST_CONTINUE;
|
|
682
|
+
}
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
/* Cached prefix path: lowercase form + "<lc>: " bytes already built. */
|
|
686
|
+
const header_key_cache_entry_t *e = header_key_cache_lookup(k);
|
|
687
|
+
VALUE lc;
|
|
688
|
+
const char *lc_ptr;
|
|
689
|
+
long lc_len;
|
|
690
|
+
VALUE prefix; /* always the cached "<lc>: " when e != NULL */
|
|
691
|
+
if (e != NULL) {
|
|
692
|
+
lc = e->lc;
|
|
693
|
+
lc_ptr = RSTRING_PTR(lc);
|
|
694
|
+
lc_len = e->lc_len;
|
|
695
|
+
prefix = e->prefix;
|
|
696
|
+
} else {
|
|
697
|
+
/* Cap exceeded: fall through to the per-call downcase. Still
|
|
698
|
+
* cheaper than the legacy path because we skip the keys-Array
|
|
699
|
+
* iteration overhead. */
|
|
700
|
+
VALUE k_s = rb_obj_as_string(k);
|
|
701
|
+
lc = rb_funcall(k_s, id_downcase, 0);
|
|
702
|
+
lc_ptr = RSTRING_PTR(lc);
|
|
703
|
+
lc_len = RSTRING_LEN(lc);
|
|
704
|
+
prefix = Qnil;
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
VALUE v_s = rb_obj_as_string(v);
|
|
708
|
+
const char *v_ptr = RSTRING_PTR(v_s);
|
|
709
|
+
long v_len = RSTRING_LEN(v_s);
|
|
710
|
+
|
|
711
|
+
/* CRLF injection guard on value. */
|
|
712
|
+
for (long j = 0; j < v_len; j++) {
|
|
713
|
+
if (v_ptr[j] == '\r' || v_ptr[j] == '\n') {
|
|
714
|
+
rb_raise(rb_eArgError, "header %s contains CR/LF",
|
|
715
|
+
RSTRING_PTR(rb_inspect(lc)));
|
|
716
|
+
}
|
|
717
|
+
}
|
|
718
|
+
|
|
719
|
+
/* Drop user-supplied content-length / connection — we always set
|
|
720
|
+
* these unconditionally below. */
|
|
721
|
+
if (lc_len == 14 && memcmp(lc_ptr, "content-length", 14) == 0) return ST_CONTINUE;
|
|
722
|
+
if (lc_len == 10 && memcmp(lc_ptr, "connection", 10) == 0) return ST_CONTINUE;
|
|
723
|
+
|
|
724
|
+
if (lc_len == 4 && memcmp(lc_ptr, "date", 4) == 0) st->has_date = 1;
|
|
725
|
+
|
|
726
|
+
if (prefix != Qnil) {
|
|
727
|
+
rb_str_cat(st->buf, RSTRING_PTR(prefix), lc_len + 2);
|
|
728
|
+
} else {
|
|
729
|
+
rb_str_cat(st->buf, lc_ptr, lc_len);
|
|
730
|
+
rb_str_cat(st->buf, ": ", 2);
|
|
731
|
+
}
|
|
732
|
+
rb_str_cat(st->buf, v_ptr, v_len);
|
|
733
|
+
rb_str_cat(st->buf, "\r\n", 2);
|
|
734
|
+
|
|
735
|
+
/* Populate the line cache for next time when both sides are frozen
|
|
736
|
+
* literals and we have room. */
|
|
737
|
+
if (g_header_line_cache != NULL &&
|
|
738
|
+
TYPE(k) == T_STRING && TYPE(v) == T_STRING &&
|
|
739
|
+
OBJ_FROZEN_RAW(k) && OBJ_FROZEN_RAW(v) &&
|
|
740
|
+
g_header_line_cache->num_entries < HEADER_LINE_CACHE_MAX) {
|
|
741
|
+
long line_len = lc_len + 2 + v_len + 2;
|
|
742
|
+
VALUE line = rb_str_buf_new(line_len);
|
|
743
|
+
rb_str_cat(line, lc_ptr, lc_len);
|
|
744
|
+
rb_str_cat(line, ": ", 2);
|
|
745
|
+
rb_str_cat(line, v_ptr, v_len);
|
|
746
|
+
rb_str_cat(line, "\r\n", 2);
|
|
747
|
+
rb_obj_freeze(line);
|
|
748
|
+
|
|
749
|
+
header_line_cache_entry_t *ne = ALLOC(header_line_cache_entry_t);
|
|
750
|
+
ne->key_v = k;
|
|
751
|
+
ne->val_v = v;
|
|
752
|
+
ne->line = line;
|
|
753
|
+
ne->line_len = line_len;
|
|
754
|
+
ne->is_date = (lc_len == 4 && memcmp(lc_ptr, "date", 4) == 0) ? 1 : 0;
|
|
755
|
+
|
|
756
|
+
rb_ary_push(rb_aHeaderLineAnchor, k);
|
|
757
|
+
rb_ary_push(rb_aHeaderLineAnchor, v);
|
|
758
|
+
rb_ary_push(rb_aHeaderLineAnchor, line);
|
|
759
|
+
|
|
760
|
+
st_insert(g_header_line_cache, (st_data_t)ne, (st_data_t)ne);
|
|
761
|
+
}
|
|
762
|
+
|
|
763
|
+
return ST_CONTINUE;
|
|
764
|
+
}
|
|
765
|
+
|
|
445
766
|
/* Hyperion::CParser.build_response_head(status, reason, headers, body_size,
|
|
446
767
|
* keep_alive, date_str) -> String
|
|
447
768
|
*
|
|
@@ -459,6 +780,24 @@ static VALUE cparser_parse(VALUE self, VALUE buffer) {
|
|
|
459
780
|
* Header values containing CR/LF raise ArgumentError (response-splitting
|
|
460
781
|
* guard). Bypasses Ruby Hash#each + per-line String#<< allocation; the
|
|
461
782
|
* status line, framing headers, and join slices live in C buffers.
|
|
783
|
+
*
|
|
784
|
+
* 2.13-B — three CPU savings over the rc17 baseline:
|
|
785
|
+
* 1. Common (status, reason) pairs hit a static table of pre-baked
|
|
786
|
+
* "HTTP/1.1 NNN <reason>\r\n" lines — one rb_str_cat replaces the
|
|
787
|
+
* per-request snprintf + reason-cat + CRLF-cat triple.
|
|
788
|
+
* 2. Header iteration uses rb_hash_foreach instead of
|
|
789
|
+
* `rb_funcall(:keys)` + per-key `rb_hash_aref` — eliminates the
|
|
790
|
+
* keys-Array allocation and the N hash lookups per call.
|
|
791
|
+
* 3. Per-key downcase result + "<lc>: " prefix is cached on the
|
|
792
|
+
* input frozen String's identity (capped at 64 entries; a
|
|
793
|
+
* misbehaving app emitting unique keys per request just falls
|
|
794
|
+
* back to the slow path on overflow). For the canonical Rack-3
|
|
795
|
+
* app emitting `'content-type' / 'cache-control' / ...` from
|
|
796
|
+
* frozen literals, every header lookup is a single st hit.
|
|
797
|
+
* 4. (key, value) full-line cache: both sides are frozen-literal
|
|
798
|
+
* Strings (e.g. `'cache-control' => 'no-store'`) — entire
|
|
799
|
+
* "<lc-key>: <value>\r\n" line is one rb_str_cat after the first
|
|
800
|
+
* request populates the cache. Capped at 256 entries.
|
|
462
801
|
*/
|
|
463
802
|
static VALUE cbuild_response_head(VALUE self, VALUE rb_status, VALUE rb_reason,
|
|
464
803
|
VALUE rb_headers, VALUE rb_body_size,
|
|
@@ -475,59 +814,35 @@ static VALUE cbuild_response_head(VALUE self, VALUE rb_status, VALUE rb_reason,
|
|
|
475
814
|
/* Most heads fit in 1 KiB; rb_str_cat grows on demand. */
|
|
476
815
|
VALUE buf = rb_str_buf_new(1024);
|
|
477
816
|
|
|
478
|
-
/* Status line:
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
for (long i = 0; i < n_keys; i++) {
|
|
491
|
-
VALUE k = rb_ary_entry(keys, i);
|
|
492
|
-
VALUE v = rb_hash_aref(rb_headers, k);
|
|
493
|
-
|
|
494
|
-
VALUE k_s = rb_obj_as_string(k);
|
|
495
|
-
VALUE v_s = rb_obj_as_string(v);
|
|
496
|
-
VALUE k_lower = rb_funcall(k_s, id_downcase, 0);
|
|
497
|
-
|
|
498
|
-
const char *k_ptr = RSTRING_PTR(k_lower);
|
|
499
|
-
long k_len = RSTRING_LEN(k_lower);
|
|
500
|
-
const char *v_ptr = RSTRING_PTR(v_s);
|
|
501
|
-
long v_len = RSTRING_LEN(v_s);
|
|
502
|
-
|
|
503
|
-
/* CRLF injection guard on value. */
|
|
504
|
-
for (long j = 0; j < v_len; j++) {
|
|
505
|
-
if (v_ptr[j] == '\r' || v_ptr[j] == '\n') {
|
|
506
|
-
rb_raise(rb_eArgError, "header %s contains CR/LF",
|
|
507
|
-
RSTRING_PTR(rb_inspect(k_lower)));
|
|
508
|
-
}
|
|
509
|
-
}
|
|
510
|
-
|
|
511
|
-
/* Drop user-supplied content-length / connection — we always set
|
|
512
|
-
* these unconditionally below (matches rc16 Ruby behaviour where
|
|
513
|
-
* the normalized hash overwrites in place). */
|
|
514
|
-
if (k_len == 14 && memcmp(k_ptr, "content-length", 14) == 0) continue;
|
|
515
|
-
if (k_len == 10 && memcmp(k_ptr, "connection", 10) == 0) continue;
|
|
516
|
-
|
|
517
|
-
if (k_len == 4 && memcmp(k_ptr, "date", 4) == 0) {
|
|
518
|
-
has_date = 1;
|
|
519
|
-
}
|
|
520
|
-
|
|
521
|
-
rb_str_cat(buf, k_ptr, k_len);
|
|
522
|
-
rb_str_cat(buf, ": ", 2);
|
|
523
|
-
rb_str_cat(buf, v_ptr, v_len);
|
|
817
|
+
/* Status line: pre-baked when (status, reason) is one of the well-known
|
|
818
|
+
* pairs in `Hyperion::ResponseWriter::REASONS`; falls back to
|
|
819
|
+
* `snprintf("HTTP/1.1 %d ", status)` + reason-cat for unknowns. */
|
|
820
|
+
const struct status_line *sline =
|
|
821
|
+
lookup_status_line(status, RSTRING_PTR(rb_reason), RSTRING_LEN(rb_reason));
|
|
822
|
+
if (sline != NULL) {
|
|
823
|
+
rb_str_cat(buf, sline->bytes, sline->len);
|
|
824
|
+
} else {
|
|
825
|
+
char status_line_buf[48];
|
|
826
|
+
int n = snprintf(status_line_buf, sizeof(status_line_buf), "HTTP/1.1 %d ", status);
|
|
827
|
+
rb_str_cat(buf, status_line_buf, n);
|
|
828
|
+
rb_str_cat(buf, RSTRING_PTR(rb_reason), RSTRING_LEN(rb_reason));
|
|
524
829
|
rb_str_cat(buf, "\r\n", 2);
|
|
525
830
|
}
|
|
526
831
|
|
|
527
|
-
/*
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
832
|
+
/* Iterate user headers — lowercase key, validate value, skip framing.
|
|
833
|
+
* Threaded through rb_hash_foreach so we can reuse the per-key
|
|
834
|
+
* downcase cache and skip the per-call `keys` Array allocation. */
|
|
835
|
+
build_head_state_t state = { buf, 0 };
|
|
836
|
+
rb_hash_foreach(rb_headers, build_head_each, (VALUE)&state);
|
|
837
|
+
|
|
838
|
+
/* Framing headers — always emitted. content-length uses a hand-rolled
|
|
839
|
+
* itoa rather than snprintf (vfprintf was 1 % of CPU on the
|
|
840
|
+
* CPU-JSON profile). */
|
|
841
|
+
char itoa_scratch[24];
|
|
842
|
+
int cl_off = itoa_positive_decimal(body_size, itoa_scratch, (int)sizeof(itoa_scratch));
|
|
843
|
+
rb_str_cat(buf, "content-length: ", 16);
|
|
844
|
+
rb_str_cat(buf, itoa_scratch + cl_off, sizeof(itoa_scratch) - cl_off);
|
|
845
|
+
rb_str_cat(buf, "\r\n", 2);
|
|
531
846
|
|
|
532
847
|
if (keep_alive) {
|
|
533
848
|
rb_str_cat(buf, "connection: keep-alive\r\n", 24);
|
|
@@ -535,7 +850,7 @@ static VALUE cbuild_response_head(VALUE self, VALUE rb_status, VALUE rb_reason,
|
|
|
535
850
|
rb_str_cat(buf, "connection: close\r\n", 19);
|
|
536
851
|
}
|
|
537
852
|
|
|
538
|
-
if (!has_date) {
|
|
853
|
+
if (!state.has_date) {
|
|
539
854
|
rb_str_cat(buf, "date: ", 6);
|
|
540
855
|
rb_str_cat(buf, RSTRING_PTR(rb_date), RSTRING_LEN(rb_date));
|
|
541
856
|
rb_str_cat(buf, "\r\n", 2);
|
|
@@ -1287,6 +1602,22 @@ void Init_hyperion_http(void) {
|
|
|
1287
1602
|
rb_obj_freeze(rb_aHeaderTable);
|
|
1288
1603
|
rb_define_const(rb_cCParser, "PREINTERNED_HEADERS", rb_aHeaderTable);
|
|
1289
1604
|
|
|
1605
|
+
/* 2.13-B — status-line, header-key, header-line caches used by
|
|
1606
|
+
* cbuild_response_head. The status-line table is fixed-size (no GC
|
|
1607
|
+
* concerns; bytes are .rodata). The two header caches are
|
|
1608
|
+
* GC-aware: their contents pin VALUEs through globally-rooted
|
|
1609
|
+
* Anchor Arrays, while the actual st_table maps live for the
|
|
1610
|
+
* extension lifetime (one per process; never freed). */
|
|
1611
|
+
for (struct status_line *e = k_status_lines; e->bytes != NULL; e++) {
|
|
1612
|
+
e->len = (long)strlen(e->bytes);
|
|
1613
|
+
}
|
|
1614
|
+
rb_aHeaderKeyAnchor = rb_ary_new();
|
|
1615
|
+
rb_aHeaderLineAnchor = rb_ary_new();
|
|
1616
|
+
rb_global_variable(&rb_aHeaderKeyAnchor);
|
|
1617
|
+
rb_global_variable(&rb_aHeaderLineAnchor);
|
|
1618
|
+
g_header_key_cache = st_init_numtable();
|
|
1619
|
+
g_header_line_cache = st_init_table(&header_line_cache_type);
|
|
1620
|
+
|
|
1290
1621
|
/* Phase 1 (1.7.0) — sibling C unit owns Hyperion::Http::Sendfile.
|
|
1291
1622
|
* Defined in sendfile.c; both objects link into the same .bundle/.so
|
|
1292
1623
|
* so a single `require 'hyperion_http/hyperion_http'` brings up the
|
|
@@ -362,9 +362,24 @@ module Hyperion
|
|
|
362
362
|
server_name, server_port = split_host(host_header)
|
|
363
363
|
|
|
364
364
|
env = ENV_POOL.acquire
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
365
|
+
# 2.13-D — gRPC streaming requests pass a non-String IO-shaped
|
|
366
|
+
# body (Hyperion::Http2Handler::StreamingInput) and must NOT go
|
|
367
|
+
# through the StringIO pool: the StringIO would `string=` consume
|
|
368
|
+
# it as a String and lose the streaming-read semantic. Fall back
|
|
369
|
+
# to the legacy buffered path only when `request.body` is a
|
|
370
|
+
# String — covers HTTP/1.1 (always String) and HTTP/2 unary
|
|
371
|
+
# (String per RequestStream#@request_body). The streaming path
|
|
372
|
+
# tags `input` as nil so the ensure-block release skips the
|
|
373
|
+
# pool return for this request.
|
|
374
|
+
if request.body.is_a?(String)
|
|
375
|
+
input = INPUT_POOL.acquire
|
|
376
|
+
input.string = request.body
|
|
377
|
+
input.rewind
|
|
378
|
+
env['rack.input'] = input
|
|
379
|
+
else
|
|
380
|
+
input = nil
|
|
381
|
+
env['rack.input'] = request.body
|
|
382
|
+
end
|
|
368
383
|
|
|
369
384
|
# Adapter-owned (non-header, non-request-line) env. SERVER_NAME/PORT
|
|
370
385
|
# need split_host, REMOTE_ADDR needs peer info, the rack.* keys are
|
|
@@ -379,7 +394,6 @@ module Hyperion
|
|
|
379
394
|
# without a backing socket.
|
|
380
395
|
env['REMOTE_ADDR'] = request.peer_address || '127.0.0.1'
|
|
381
396
|
env['rack.url_scheme'] = 'http'
|
|
382
|
-
env['rack.input'] = input
|
|
383
397
|
env['rack.errors'] = $stderr
|
|
384
398
|
if connection
|
|
385
399
|
# 2.1.0 (WS-1) — Rack 3 full-hijack. The proc captures the
|