hyperion-rb 1.6.2 → 2.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4563 -0
  3. data/README.md +189 -13
  4. data/ext/hyperion_h2_codec/Cargo.lock +7 -0
  5. data/ext/hyperion_h2_codec/Cargo.toml +33 -0
  6. data/ext/hyperion_h2_codec/extconf.rb +73 -0
  7. data/ext/hyperion_h2_codec/src/frames.rs +140 -0
  8. data/ext/hyperion_h2_codec/src/hpack/huffman.rs +161 -0
  9. data/ext/hyperion_h2_codec/src/hpack.rs +457 -0
  10. data/ext/hyperion_h2_codec/src/lib.rs +296 -0
  11. data/ext/hyperion_http/extconf.rb +28 -0
  12. data/ext/hyperion_http/h2_codec_glue.c +408 -0
  13. data/ext/hyperion_http/page_cache.c +1125 -0
  14. data/ext/hyperion_http/parser.c +473 -38
  15. data/ext/hyperion_http/sendfile.c +982 -0
  16. data/ext/hyperion_http/websocket.c +493 -0
  17. data/ext/hyperion_io_uring/Cargo.lock +33 -0
  18. data/ext/hyperion_io_uring/Cargo.toml +34 -0
  19. data/ext/hyperion_io_uring/extconf.rb +74 -0
  20. data/ext/hyperion_io_uring/src/lib.rs +316 -0
  21. data/lib/hyperion/adapter/rack.rb +370 -42
  22. data/lib/hyperion/admin_listener.rb +207 -0
  23. data/lib/hyperion/admin_middleware.rb +36 -7
  24. data/lib/hyperion/cli.rb +310 -11
  25. data/lib/hyperion/config.rb +440 -14
  26. data/lib/hyperion/connection.rb +679 -22
  27. data/lib/hyperion/deprecations.rb +81 -0
  28. data/lib/hyperion/dispatch_mode.rb +165 -0
  29. data/lib/hyperion/fiber_local.rb +75 -13
  30. data/lib/hyperion/h2_admission.rb +77 -0
  31. data/lib/hyperion/h2_codec.rb +452 -0
  32. data/lib/hyperion/http/page_cache.rb +122 -0
  33. data/lib/hyperion/http/sendfile.rb +696 -0
  34. data/lib/hyperion/http2/native_hpack_adapter.rb +70 -0
  35. data/lib/hyperion/http2_handler.rb +368 -9
  36. data/lib/hyperion/io_uring.rb +317 -0
  37. data/lib/hyperion/lint_wrapper_pool.rb +126 -0
  38. data/lib/hyperion/master.rb +96 -9
  39. data/lib/hyperion/metrics/path_templater.rb +68 -0
  40. data/lib/hyperion/metrics.rb +256 -0
  41. data/lib/hyperion/prometheus_exporter.rb +150 -0
  42. data/lib/hyperion/request.rb +13 -0
  43. data/lib/hyperion/response_writer.rb +477 -16
  44. data/lib/hyperion/runtime.rb +195 -0
  45. data/lib/hyperion/server/route_table.rb +179 -0
  46. data/lib/hyperion/server.rb +519 -55
  47. data/lib/hyperion/static_preload.rb +133 -0
  48. data/lib/hyperion/thread_pool.rb +61 -7
  49. data/lib/hyperion/tls.rb +343 -1
  50. data/lib/hyperion/version.rb +1 -1
  51. data/lib/hyperion/websocket/close_codes.rb +71 -0
  52. data/lib/hyperion/websocket/connection.rb +876 -0
  53. data/lib/hyperion/websocket/frame.rb +356 -0
  54. data/lib/hyperion/websocket/handshake.rb +525 -0
  55. data/lib/hyperion/worker.rb +111 -9
  56. data/lib/hyperion.rb +137 -3
  57. metadata +50 -1
@@ -1,5 +1,6 @@
1
1
  #include <ruby.h>
2
2
  #include <ruby/encoding.h>
3
+ #include <ruby/st.h>
3
4
  #include <string.h>
4
5
  #include "llhttp.h"
5
6
 
@@ -35,6 +36,90 @@ static ID id_http_version_kw;
35
36
  static ID id_headers_kw;
36
37
  static ID id_body_kw;
37
38
 
39
+ /* Phase 3a (1.7.1) — pre-built frozen Strings for the fixed Rack env keys
40
+ * we set on every request (REQUEST_METHOD, PATH_INFO, QUERY_STRING,
41
+ * HTTP_VERSION, SERVER_PROTOCOL) plus the two non-HTTP_ promotions
42
+ * (CONTENT_TYPE, CONTENT_LENGTH). Allocated once at extension load and
43
+ * reused as hash keys forever — saves an alloc per key per request. */
44
+ static VALUE rb_kREQUEST_METHOD;
45
+ static VALUE rb_kPATH_INFO;
46
+ static VALUE rb_kQUERY_STRING;
47
+ static VALUE rb_kHTTP_VERSION;
48
+ static VALUE rb_kSERVER_PROTOCOL;
49
+ static VALUE rb_kCONTENT_TYPE;
50
+ static VALUE rb_kCONTENT_LENGTH;
51
+
52
+ /* 2.4-B (S2): pre-interned frozen Strings used by on_headers_complete
53
+ * for the smuggling-defense lookups. Pre-2.4-B these were freshly
54
+ * allocated via rb_str_new_cstr on every parse(); promoting them to
55
+ * static globals removes 2 String allocations per parse. */
56
+ static VALUE rb_kHCONTENT_LENGTH_LC;
57
+ static VALUE rb_kHTRANSFER_ENCODING_LC;
58
+
59
+ /* Request ivar IDs, looked up once at extension load. Request is a frozen
60
+ * struct-like value so reading via rb_ivar_get is safe — no dispatch cost,
61
+ * no method-cache invalidation. */
62
+ static ID id_iv_method;
63
+ static ID id_iv_path;
64
+ static ID id_iv_query_string;
65
+ static ID id_iv_http_version;
66
+ static ID id_iv_headers;
67
+
68
+ /* Phase 2c (1.7.1) — pre-interned frozen lowercase keys for the 30 most
69
+ * common production HTTP request headers. When llhttp finishes a header
70
+ * the parser's `stash_pending_header` does a case-insensitive O(N=30)
71
+ * scan against this table; on hit, it stores the pre-frozen lowercase
72
+ * VALUE as the hash key instead of allocating a fresh `name.downcase`
73
+ * String per request. The table doubles as an exposure point for the
74
+ * Ruby-side adapter so HTTP_KEY_CACHE can widen to the same 30 names.
75
+ *
76
+ * Memory layout: a flat array of strings, even indices are lowercase
77
+ * names, odd indices are the corresponding "HTTP_<UPCASED_UNDERSCORED>"
78
+ * Rack env keys. Both halves are deeply frozen at extension load.
79
+ */
80
+ #define HEADER_TABLE_PAIRS 30
81
+ static VALUE rb_aHeaderTable; /* Array(2*30) — name+http key pairs */
82
+ static const char *header_table_lc[HEADER_TABLE_PAIRS] = {
83
+ "host", "user-agent", "accept", "accept-encoding",
84
+ "accept-language", "cache-control", "connection", "cookie",
85
+ "content-length", "content-type", "authorization", "referer",
86
+ "origin", "upgrade", "x-forwarded-for","x-forwarded-proto",
87
+ "x-forwarded-host", "x-real-ip", "x-request-id", "if-none-match",
88
+ "if-modified-since","if-match", "etag", "range",
89
+ "pragma", "dnt", "sec-ch-ua", "sec-fetch-dest",
90
+ "sec-fetch-mode", "sec-fetch-site"
91
+ };
92
+ static const char *header_table_http[HEADER_TABLE_PAIRS] = {
93
+ "HTTP_HOST", "HTTP_USER_AGENT", "HTTP_ACCEPT", "HTTP_ACCEPT_ENCODING",
94
+ "HTTP_ACCEPT_LANGUAGE", "HTTP_CACHE_CONTROL", "HTTP_CONNECTION", "HTTP_COOKIE",
95
+ "HTTP_CONTENT_LENGTH", "HTTP_CONTENT_TYPE", "HTTP_AUTHORIZATION", "HTTP_REFERER",
96
+ "HTTP_ORIGIN", "HTTP_UPGRADE", "HTTP_X_FORWARDED_FOR","HTTP_X_FORWARDED_PROTO",
97
+ "HTTP_X_FORWARDED_HOST","HTTP_X_REAL_IP", "HTTP_X_REQUEST_ID", "HTTP_IF_NONE_MATCH",
98
+ "HTTP_IF_MODIFIED_SINCE","HTTP_IF_MATCH", "HTTP_ETAG", "HTTP_RANGE",
99
+ "HTTP_PRAGMA", "HTTP_DNT", "HTTP_SEC_CH_UA", "HTTP_SEC_FETCH_DEST",
100
+ "HTTP_SEC_FETCH_MODE", "HTTP_SEC_FETCH_SITE"
101
+ };
102
+ static VALUE header_table_lc_v[HEADER_TABLE_PAIRS]; /* parallel cached frozen Strings */
103
+ static long header_table_lc_len[HEADER_TABLE_PAIRS]; /* cached strlen for fast compare */
104
+
105
+ /* Case-insensitive lookup against the pre-interned header table. Returns
106
+ * the table index on hit, -1 on miss. Bounded O(30) — vastly faster than
107
+ * spawning a `String#downcase` allocation per header. */
108
+ static int header_table_lookup(const char *name, long len) {
109
+ for (int i = 0; i < HEADER_TABLE_PAIRS; i++) {
110
+ if (header_table_lc_len[i] != len) continue;
111
+ const char *cand = header_table_lc[i];
112
+ int match = 1;
113
+ for (long j = 0; j < len; j++) {
114
+ unsigned char c = (unsigned char)name[j];
115
+ if (c >= 'A' && c <= 'Z') c |= 0x20;
116
+ if (c != (unsigned char)cand[j]) { match = 0; break; }
117
+ }
118
+ if (match) return i;
119
+ }
120
+ return -1;
121
+ }
122
+
38
123
  typedef struct {
39
124
  /* Request line + headers */
40
125
  VALUE method;
@@ -57,15 +142,25 @@ typedef struct {
57
142
  const char *error_message;
58
143
  } parser_state_t;
59
144
 
145
+ /* 2.4-B (S1): defer per-field String allocations until the relevant
146
+ * llhttp callback actually fires. A typical GET request never sends a
147
+ * body and may have no query_string; allocating empty placeholder
148
+ * Strings up-front cost 6 allocations per parse() before this change.
149
+ * After: allocate Qnil sentinels; the on_* callbacks lazy-allocate
150
+ * via the LAZY_ALLOC helper below; the Request build at the bottom
151
+ * coerces any remaining Qnil to the global empty-Strings table. */
152
+ static VALUE rb_kEMPTY_STR; /* frozen empty ASCII-8BIT String */
153
+ static VALUE rb_kHTTP_1_1; /* frozen "HTTP/1.1" String */
154
+
60
155
  static void state_init(parser_state_t *s) {
61
156
  s->method = Qnil;
62
- s->path = rb_str_new_cstr("");
63
- s->query_string = rb_str_new_cstr("");
64
- s->http_version = rb_str_new_cstr("HTTP/1.1");
157
+ s->path = Qnil; /* allocated in on_url first call */
158
+ s->query_string = Qnil; /* allocated in on_url_complete only if '?' present */
159
+ s->http_version = Qnil; /* allocated in on_version */
65
160
  s->headers = rb_hash_new();
66
- s->body = rb_str_new_cstr("");
67
- s->current_header_name = rb_str_new_cstr("");
68
- s->current_header_value = rb_str_new_cstr("");
161
+ s->body = Qnil; /* allocated in on_body first call */
162
+ s->current_header_name = Qnil; /* allocated in on_header_field */
163
+ s->current_header_value = Qnil; /* allocated in on_header_value */
69
164
  s->message_complete = 0;
70
165
  s->has_content_length = 0;
71
166
  s->has_transfer_encoding = 0;
@@ -78,21 +173,51 @@ static void state_init(parser_state_t *s) {
78
173
  #define MAX_FIELD_BYTES (64 * 1024)
79
174
  #define MAX_BODY_BYTES (16 * 1024 * 1024)
80
175
 
176
+ /* 2.4-B (S1): lazy field allocation. The slot may still be Qnil on
177
+ * first append (state_init left it nil to skip the empty-String
178
+ * allocation). Materialise on first append, cap-check on subsequent. */
81
179
  #define APPEND_OR_FAIL(dst, at, length, cap, who) do { \
82
- if (RSTRING_LEN(dst) + (long)(length) > (long)(cap)) { \
83
- s->parse_error = 1; \
84
- s->error_message = (who " too large"); \
85
- return -1; \
180
+ if (NIL_P(dst)) { \
181
+ if ((long)(length) > (long)(cap)) { \
182
+ s->parse_error = 1; \
183
+ s->error_message = (who " too large"); \
184
+ return -1; \
185
+ } \
186
+ (dst) = rb_str_new(at, length); \
187
+ } else { \
188
+ if (RSTRING_LEN(dst) + (long)(length) > (long)(cap)) { \
189
+ s->parse_error = 1; \
190
+ s->error_message = (who " too large"); \
191
+ return -1; \
192
+ } \
193
+ rb_str_cat(dst, at, length); \
86
194
  } \
87
- rb_str_cat(dst, at, length); \
88
195
  } while (0)
89
196
 
90
197
  static void stash_pending_header(parser_state_t *s) {
91
- if (RSTRING_LEN(s->current_header_name) > 0) {
92
- VALUE downcased = rb_funcall(s->current_header_name, id_downcase, 0);
93
- rb_hash_aset(s->headers, downcased, s->current_header_value);
94
- s->current_header_name = rb_str_new_cstr("");
95
- s->current_header_value = rb_str_new_cstr("");
198
+ /* 2.4-B (S1): the name/value slots are Qnil between headers; only
199
+ * an actually-populated header pair triggers the stash. Reset to
200
+ * Qnil after stash (not a fresh empty String) — the next
201
+ * on_header_field allocates lazily via APPEND_OR_FAIL. */
202
+ if (!NIL_P(s->current_header_name) && RSTRING_LEN(s->current_header_name) > 0) {
203
+ /* Phase 2c (1.7.1): try the pre-interned table first. On a hit
204
+ * we reuse the frozen lowercase VALUE — saves a String allocation
205
+ * per common header. On a miss, fall back to the original
206
+ * `String#downcase` path so unusual / vendor-specific headers
207
+ * still flow through unmolested. */
208
+ const char *name_ptr = RSTRING_PTR(s->current_header_name);
209
+ long name_len = RSTRING_LEN(s->current_header_name);
210
+ int idx = header_table_lookup(name_ptr, name_len);
211
+ VALUE key;
212
+ if (idx >= 0) {
213
+ key = header_table_lc_v[idx];
214
+ } else {
215
+ key = rb_funcall(s->current_header_name, id_downcase, 0);
216
+ }
217
+ VALUE val = NIL_P(s->current_header_value) ? rb_kEMPTY_STR : s->current_header_value;
218
+ rb_hash_aset(s->headers, key, val);
219
+ s->current_header_name = Qnil;
220
+ s->current_header_value = Qnil;
96
221
  }
97
222
  }
98
223
 
@@ -104,6 +229,11 @@ static int on_url(llhttp_t *p, const char *at, size_t length) {
104
229
 
105
230
  static int on_url_complete(llhttp_t *p) {
106
231
  parser_state_t *s = (parser_state_t *)p->data;
232
+ /* 2.4-B (S1): path is Qnil for the (rare) zero-length-URL pathological
233
+ * case; nothing to split. */
234
+ if (NIL_P(s->path)) {
235
+ return 0;
236
+ }
107
237
  /* Split path?query. */
108
238
  char *full = RSTRING_PTR(s->path);
109
239
  long full_len = RSTRING_LEN(s->path);
@@ -131,6 +261,13 @@ static int on_method(llhttp_t *p, const char *at, size_t length) {
131
261
  static int on_version(llhttp_t *p, const char *at, size_t length) {
132
262
  /* llhttp gives us "1.1"; we prepend "HTTP/" ourselves. */
133
263
  parser_state_t *s = (parser_state_t *)p->data;
264
+ /* 2.4-B (S1): fast path the common "1.1" case to the frozen
265
+ * "HTTP/1.1" constant; saves an allocation on the overwhelming
266
+ * majority of HTTP/1.1 requests. */
267
+ if (length == 3 && at[0] == '1' && at[1] == '.' && at[2] == '1') {
268
+ s->http_version = rb_kHTTP_1_1;
269
+ return 0;
270
+ }
134
271
  s->http_version = rb_str_new_cstr("HTTP/");
135
272
  rb_str_cat(s->http_version, at, length);
136
273
  return 0;
@@ -138,25 +275,18 @@ static int on_version(llhttp_t *p, const char *at, size_t length) {
138
275
 
139
276
  static int on_header_field(llhttp_t *p, const char *at, size_t length) {
140
277
  parser_state_t *s = (parser_state_t *)p->data;
141
- /* If current_header_value is non-empty, we just finished a header. */
142
- if (RSTRING_LEN(s->current_header_value) > 0) {
278
+ /* 2.4-B (S1): name/value are Qnil between headers. A pending value
279
+ * is the signal that we just finished one and need to stash. */
280
+ if (!NIL_P(s->current_header_value) && RSTRING_LEN(s->current_header_value) > 0) {
143
281
  stash_pending_header(s);
144
282
  }
145
- if (RSTRING_LEN(s->current_header_name) == 0) {
146
- s->current_header_name = rb_str_new(at, length);
147
- } else {
148
- APPEND_OR_FAIL(s->current_header_name, at, length, MAX_FIELD_BYTES, "header name");
149
- }
283
+ APPEND_OR_FAIL(s->current_header_name, at, length, MAX_FIELD_BYTES, "header name");
150
284
  return 0;
151
285
  }
152
286
 
153
287
  static int on_header_value(llhttp_t *p, const char *at, size_t length) {
154
288
  parser_state_t *s = (parser_state_t *)p->data;
155
- if (RSTRING_LEN(s->current_header_value) == 0) {
156
- s->current_header_value = rb_str_new(at, length);
157
- } else {
158
- APPEND_OR_FAIL(s->current_header_value, at, length, MAX_FIELD_BYTES, "header value");
159
- }
289
+ APPEND_OR_FAIL(s->current_header_value, at, length, MAX_FIELD_BYTES, "header value");
160
290
  return 0;
161
291
  }
162
292
 
@@ -164,11 +294,14 @@ static int on_headers_complete(llhttp_t *p) {
164
294
  parser_state_t *s = (parser_state_t *)p->data;
165
295
  stash_pending_header(s);
166
296
 
167
- /* Smuggling defense: both Content-Length and Transfer-Encoding present. */
168
- VALUE cl_key = rb_str_new_cstr("content-length");
169
- VALUE te_key = rb_str_new_cstr("transfer-encoding");
170
- VALUE cl = rb_hash_aref(s->headers, cl_key);
171
- VALUE te = rb_hash_aref(s->headers, te_key);
297
+ /* 2.4-B (S2): the lookup keys are pre-interned frozen Strings
298
+ * registered as globals in Init_hyperion_http. Pre-2.4-B these were
299
+ * freshly allocated via rb_str_new_cstr on every parse(); the new
300
+ * statics save 2 String allocations per parse() and (more
301
+ * importantly) make the Hash#[] lookup hit the internal frozen-key
302
+ * fast path because identity matches the keys we stored under. */
303
+ VALUE cl = rb_hash_aref(s->headers, rb_kHCONTENT_LENGTH_LC);
304
+ VALUE te = rb_hash_aref(s->headers, rb_kHTRANSFER_ENCODING_LC);
172
305
  s->has_content_length = !NIL_P(cl);
173
306
  s->has_transfer_encoding = !NIL_P(te);
174
307
  if (s->has_content_length && s->has_transfer_encoding) {
@@ -284,14 +417,24 @@ static VALUE cparser_parse(VALUE self, VALUE buffer) {
284
417
  consumed = len;
285
418
  }
286
419
 
420
+ /* 2.4-B (S1): Qnil-to-empty-String coercion for fields that the
421
+ * llhttp callbacks never touched (e.g. zero-length URL, GET with
422
+ * no body, HTTP/1.0 with no version detail). The frozen empty
423
+ * String is shared across every nil-coerced field — no allocation. */
424
+ VALUE method = NIL_P(s.method) ? rb_kEMPTY_STR : s.method;
425
+ VALUE path = NIL_P(s.path) ? rb_kEMPTY_STR : s.path;
426
+ VALUE query_string = NIL_P(s.query_string) ? rb_kEMPTY_STR : s.query_string;
427
+ VALUE http_version = NIL_P(s.http_version) ? rb_kHTTP_1_1 : s.http_version;
428
+ VALUE body = NIL_P(s.body) ? rb_kEMPTY_STR : s.body;
429
+
287
430
  /* Build the Request. */
288
431
  VALUE kwargs = rb_hash_new();
289
- rb_hash_aset(kwargs, ID2SYM(id_method_kw), s.method);
290
- rb_hash_aset(kwargs, ID2SYM(id_path_kw), s.path);
291
- rb_hash_aset(kwargs, ID2SYM(id_query_string_kw), s.query_string);
292
- rb_hash_aset(kwargs, ID2SYM(id_http_version_kw), s.http_version);
432
+ rb_hash_aset(kwargs, ID2SYM(id_method_kw), method);
433
+ rb_hash_aset(kwargs, ID2SYM(id_path_kw), path);
434
+ rb_hash_aset(kwargs, ID2SYM(id_query_string_kw), query_string);
435
+ rb_hash_aset(kwargs, ID2SYM(id_http_version_kw), http_version);
293
436
  rb_hash_aset(kwargs, ID2SYM(id_headers_kw), s.headers);
294
- rb_hash_aset(kwargs, ID2SYM(id_body_kw), s.body);
437
+ rb_hash_aset(kwargs, ID2SYM(id_body_kw), body);
295
438
 
296
439
  VALUE args[1] = { kwargs };
297
440
  VALUE request = rb_funcallv_kw(rb_cRequest, id_new, 1, args, RB_PASS_KEYWORDS);
@@ -859,6 +1002,197 @@ static VALUE cchunked_body_complete(VALUE self, VALUE rb_buffer, VALUE rb_body_s
859
1002
  }
860
1003
  }
861
1004
 
1005
+ /* Look up the pre-interned "HTTP_<UPCASED_UNDERSCORED>" Rack key for a
1006
+ * lowercase header name, or build a fresh one bytewise if it's not on the
1007
+ * 30-entry table. The fresh-build path mirrors cupcase_underscore exactly
1008
+ * — a single Ruby String allocation, US-ASCII encoded.
1009
+ *
1010
+ * Returns the (frozen, table-owned) VALUE on a hit; the freshly-built
1011
+ * (mutable, US-ASCII) VALUE on a miss. Both are safe as Hash keys: Ruby
1012
+ * Hash dups+freezes mutable String keys on insertion. */
1013
+ static VALUE http_key_for(VALUE name_str) {
1014
+ const char *src = RSTRING_PTR(name_str);
1015
+ long src_len = RSTRING_LEN(name_str);
1016
+
1017
+ /* The lowercase keys come straight from the parser's own
1018
+ * stash_pending_header — for the 30 pre-interned entries those
1019
+ * Strings are literally the same VALUE as header_table_lc_v[i],
1020
+ * so we can short-circuit with a pointer compare before falling
1021
+ * back to the byte-equality scan. */
1022
+ for (int i = 0; i < HEADER_TABLE_PAIRS; i++) {
1023
+ if (header_table_lc_v[i] == name_str) {
1024
+ return rb_ary_entry(rb_aHeaderTable, (i * 2) + 1);
1025
+ }
1026
+ }
1027
+ /* Fallback for headers that came in via a non-parser path (e.g.
1028
+ * adapter receives an artificially constructed Request in specs)
1029
+ * — case-insensitive scan against the same table. */
1030
+ int idx = header_table_lookup(src, src_len);
1031
+ if (idx >= 0) {
1032
+ return rb_ary_entry(rb_aHeaderTable, (idx * 2) + 1);
1033
+ }
1034
+
1035
+ /* Not on the table — build "HTTP_<UPCASED_UNDERSCORED>" in one alloc. */
1036
+ VALUE out = rb_str_new(NULL, 5 + src_len);
1037
+ char *dst = RSTRING_PTR(out);
1038
+ dst[0] = 'H'; dst[1] = 'T'; dst[2] = 'T'; dst[3] = 'P'; dst[4] = '_';
1039
+ for (long i = 0; i < src_len; i++) {
1040
+ unsigned char c = (unsigned char)src[i];
1041
+ if (c >= 'a' && c <= 'z') {
1042
+ dst[5 + i] = (char)(c - 32);
1043
+ } else if (c == '-') {
1044
+ dst[5 + i] = '_';
1045
+ } else {
1046
+ dst[5 + i] = (char)c;
1047
+ }
1048
+ }
1049
+ rb_enc_associate(out, rb_usascii_encoding());
1050
+ RB_GC_GUARD(name_str);
1051
+ return out;
1052
+ }
1053
+
1054
+ /* Iteration callback for the headers Hash in cbuild_env. `arg` is the env
1055
+ * Hash; we map the lowercase header name to its HTTP_* Rack key (via the
1056
+ * pre-interned table or a one-allocation upcase) and store the value. */
1057
+ static int build_env_iter(VALUE name, VALUE value, VALUE arg) {
1058
+ VALUE env = arg;
1059
+ if (TYPE(name) != T_STRING) return ST_CONTINUE;
1060
+
1061
+ VALUE http_key = http_key_for(name);
1062
+ rb_hash_aset(env, http_key, value);
1063
+
1064
+ /* Promote the two RFC-mandated non-HTTP_ env keys. We compare against
1065
+ * the pre-interned VALUEs first (pointer compare, common case) and
1066
+ * fall back to byte compare for off-table-but-still-named matches. */
1067
+ if (name == header_table_lc_v[8] /* "content-length" */ ||
1068
+ (RSTRING_LEN(name) == 14 &&
1069
+ memcmp(RSTRING_PTR(name), "content-length", 14) == 0)) {
1070
+ rb_hash_aset(env, rb_kCONTENT_LENGTH, value);
1071
+ } else if (name == header_table_lc_v[9] /* "content-type" */ ||
1072
+ (RSTRING_LEN(name) == 12 &&
1073
+ memcmp(RSTRING_PTR(name), "content-type", 12) == 0)) {
1074
+ rb_hash_aset(env, rb_kCONTENT_TYPE, value);
1075
+ }
1076
+ return ST_CONTINUE;
1077
+ }
1078
+
1079
+ /* Hyperion::CParser.build_env(env, request) -> env
1080
+ *
1081
+ * Phase 3a (1.7.1) — populate the Rack env hash with REQUEST_METHOD,
1082
+ * PATH_INFO, QUERY_STRING, HTTP_VERSION, SERVER_PROTOCOL, CONTENT_TYPE,
1083
+ * CONTENT_LENGTH, and HTTP_<UPCASED_UNDERSCORED> for every parsed header.
1084
+ *
1085
+ * The Ruby caller (Hyperion::Adapter::Rack#build_env) sets the rest of the
1086
+ * Rack-required keys (rack.input, REMOTE_ADDR, SERVER_NAME/PORT, …) since
1087
+ * those need a StringIO from a pool and a peer-address split. The header
1088
+ * loop is the bytewise-bound piece and the only thing worth pulling into
1089
+ * C — moving the full env build would mean threading the pool, host
1090
+ * splitter, and version constant through the FFI boundary for ~no extra
1091
+ * win.
1092
+ *
1093
+ * Returns the same env Hash (callers can either chain or ignore).
1094
+ */
1095
+ static VALUE cbuild_env(VALUE self, VALUE env, VALUE request) {
1096
+ (void)self;
1097
+ Check_Type(env, T_HASH);
1098
+
1099
+ /* Read Request ivars directly — Request is a frozen value object set
1100
+ * up in initialize; no risk of stale reads, no method-dispatch cost. */
1101
+ VALUE method = rb_ivar_get(request, id_iv_method);
1102
+ VALUE path = rb_ivar_get(request, id_iv_path);
1103
+ VALUE query_string = rb_ivar_get(request, id_iv_query_string);
1104
+ VALUE http_version = rb_ivar_get(request, id_iv_http_version);
1105
+ VALUE headers = rb_ivar_get(request, id_iv_headers);
1106
+
1107
+ rb_hash_aset(env, rb_kREQUEST_METHOD, method);
1108
+ rb_hash_aset(env, rb_kPATH_INFO, path);
1109
+ rb_hash_aset(env, rb_kQUERY_STRING, query_string);
1110
+ rb_hash_aset(env, rb_kSERVER_PROTOCOL, http_version);
1111
+ rb_hash_aset(env, rb_kHTTP_VERSION, http_version);
1112
+
1113
+ if (TYPE(headers) == T_HASH) {
1114
+ rb_hash_foreach(headers, build_env_iter, env);
1115
+ }
1116
+
1117
+ return env;
1118
+ }
1119
+
1120
+ /* Hyperion::CParser.parse_cookie_header(cookie_str) -> Hash
1121
+ *
1122
+ * Phase 3b (1.7.1) — split a single Cookie header value into its
1123
+ * { "name" => "value" } pairs.
1124
+ *
1125
+ * Standard format: "name1=val1; name2=val2; name3=val3".
1126
+ * Leading/trailing ASCII whitespace is trimmed around each pair and
1127
+ * around each key. Empty values are valid. Pairs without `=` are skipped
1128
+ * (RFC 6265 calls them ignorable). Repeated names are last-wins —
1129
+ * middlewares that need RFC-strict merge can override.
1130
+ *
1131
+ * Cookies are NOT URL-decoded by spec; values are opaque octets. We
1132
+ * leave them verbatim. The returned Hash is mutable so the caller can
1133
+ * extend it (e.g. for session-cookie hot-swaps).
1134
+ */
1135
+ static VALUE cparse_cookie_header(VALUE self, VALUE rb_cookie) {
1136
+ (void)self;
1137
+ Check_Type(rb_cookie, T_STRING);
1138
+
1139
+ VALUE result = rb_hash_new();
1140
+
1141
+ const char *src = RSTRING_PTR(rb_cookie);
1142
+ long src_len = RSTRING_LEN(rb_cookie);
1143
+ long i = 0;
1144
+
1145
+ while (i < src_len) {
1146
+ /* Skip leading whitespace and stray semicolons. */
1147
+ while (i < src_len && (src[i] == ' ' || src[i] == '\t' ||
1148
+ src[i] == ';')) {
1149
+ i++;
1150
+ }
1151
+ if (i >= src_len) break;
1152
+
1153
+ /* Pair runs to next ';' (or end of string). */
1154
+ long pair_start = i;
1155
+ while (i < src_len && src[i] != ';') i++;
1156
+ long pair_end = i;
1157
+
1158
+ /* Trim trailing whitespace inside the pair. */
1159
+ while (pair_end > pair_start &&
1160
+ (src[pair_end - 1] == ' ' || src[pair_end - 1] == '\t')) {
1161
+ pair_end--;
1162
+ }
1163
+ if (pair_end == pair_start) continue;
1164
+
1165
+ /* Find '=' inside [pair_start, pair_end). */
1166
+ long eq = -1;
1167
+ for (long j = pair_start; j < pair_end; j++) {
1168
+ if (src[j] == '=') { eq = j; break; }
1169
+ }
1170
+ if (eq < 0) continue; /* malformed — no '=' — skip per RFC 6265. */
1171
+
1172
+ /* Trim trailing ws on key (between pair_start and eq). */
1173
+ long key_end = eq;
1174
+ while (key_end > pair_start &&
1175
+ (src[key_end - 1] == ' ' || src[key_end - 1] == '\t')) {
1176
+ key_end--;
1177
+ }
1178
+ if (key_end == pair_start) continue; /* empty name — skip. */
1179
+
1180
+ /* Skip leading ws on value (between eq+1 and pair_end). */
1181
+ long val_start = eq + 1;
1182
+ while (val_start < pair_end &&
1183
+ (src[val_start] == ' ' || src[val_start] == '\t')) {
1184
+ val_start++;
1185
+ }
1186
+
1187
+ VALUE key = rb_str_new(src + pair_start, key_end - pair_start);
1188
+ VALUE val = rb_str_new(src + val_start, pair_end - val_start);
1189
+ rb_hash_aset(result, key, val);
1190
+ }
1191
+
1192
+ RB_GC_GUARD(rb_cookie);
1193
+ return result;
1194
+ }
1195
+
862
1196
  void Init_hyperion_http(void) {
863
1197
  install_settings();
864
1198
 
@@ -879,6 +1213,10 @@ void Init_hyperion_http(void) {
879
1213
  cupcase_underscore, 1);
880
1214
  rb_define_singleton_method(rb_cCParser, "chunked_body_complete?",
881
1215
  cchunked_body_complete, 2);
1216
+ rb_define_singleton_method(rb_cCParser, "build_env",
1217
+ cbuild_env, 2);
1218
+ rb_define_singleton_method(rb_cCParser, "parse_cookie_header",
1219
+ cparse_cookie_header, 1);
882
1220
 
883
1221
  id_new = rb_intern("new");
884
1222
  id_downcase = rb_intern("downcase");
@@ -888,4 +1226,101 @@ void Init_hyperion_http(void) {
888
1226
  id_http_version_kw = rb_intern("http_version");
889
1227
  id_headers_kw = rb_intern("headers");
890
1228
  id_body_kw = rb_intern("body");
1229
+
1230
+ /* Phase 3a (1.7.1) — Request ivars + fixed env-key Strings. The
1231
+ * env-key Strings are deeply frozen and registered via rb_global_variable
1232
+ * so the GC doesn't reclaim them; reusing a single VALUE per fixed key
1233
+ * eliminates a per-request String allocation on the hot path. */
1234
+ id_iv_method = rb_intern("@method");
1235
+ id_iv_path = rb_intern("@path");
1236
+ id_iv_query_string = rb_intern("@query_string");
1237
+ id_iv_http_version = rb_intern("@http_version");
1238
+ id_iv_headers = rb_intern("@headers");
1239
+
1240
+ rb_kREQUEST_METHOD = rb_obj_freeze(rb_str_new_cstr("REQUEST_METHOD"));
1241
+ rb_kPATH_INFO = rb_obj_freeze(rb_str_new_cstr("PATH_INFO"));
1242
+ rb_kQUERY_STRING = rb_obj_freeze(rb_str_new_cstr("QUERY_STRING"));
1243
+ rb_kHTTP_VERSION = rb_obj_freeze(rb_str_new_cstr("HTTP_VERSION"));
1244
+ rb_kSERVER_PROTOCOL = rb_obj_freeze(rb_str_new_cstr("SERVER_PROTOCOL"));
1245
+ rb_kCONTENT_TYPE = rb_obj_freeze(rb_str_new_cstr("CONTENT_TYPE"));
1246
+ rb_kCONTENT_LENGTH = rb_obj_freeze(rb_str_new_cstr("CONTENT_LENGTH"));
1247
+ rb_global_variable(&rb_kREQUEST_METHOD);
1248
+ rb_global_variable(&rb_kPATH_INFO);
1249
+ rb_global_variable(&rb_kQUERY_STRING);
1250
+ rb_global_variable(&rb_kHTTP_VERSION);
1251
+ rb_global_variable(&rb_kSERVER_PROTOCOL);
1252
+ rb_global_variable(&rb_kCONTENT_TYPE);
1253
+ rb_global_variable(&rb_kCONTENT_LENGTH);
1254
+
1255
+ /* 2.4-B (S1, S2): the Qnil-coercion sentinel + the smuggling-defense
1256
+ * lookup keys, all interned + frozen once at module init. They show
1257
+ * up in every parse() call. */
1258
+ rb_kEMPTY_STR = rb_obj_freeze(rb_str_new("", 0));
1259
+ rb_kHTTP_1_1 = rb_obj_freeze(rb_str_new_cstr("HTTP/1.1"));
1260
+ rb_kHCONTENT_LENGTH_LC = rb_obj_freeze(rb_str_new_cstr("content-length"));
1261
+ rb_kHTRANSFER_ENCODING_LC = rb_obj_freeze(rb_str_new_cstr("transfer-encoding"));
1262
+ rb_global_variable(&rb_kEMPTY_STR);
1263
+ rb_global_variable(&rb_kHTTP_1_1);
1264
+ rb_global_variable(&rb_kHCONTENT_LENGTH_LC);
1265
+ rb_global_variable(&rb_kHTRANSFER_ENCODING_LC);
1266
+
1267
+ /* Phase 2c (1.7.1): build the 30-entry pre-interned header table.
1268
+ * Each entry caches the frozen lowercase header name (used as the
1269
+ * env-hash key by stash_pending_header) and the corresponding frozen
1270
+ * "HTTP_<UPCASED_UNDERSCORED>" Rack key (consumed by the Ruby-side
1271
+ * Hyperion::Adapter::Rack via a class-level constant lookup, so all
1272
+ * three layers — parser, adapter, env hash — share string identity).
1273
+ * `rb_aHeaderTable` is registered as a global so the GC doesn't
1274
+ * reclaim its members. */
1275
+ rb_aHeaderTable = rb_ary_new_capa(HEADER_TABLE_PAIRS * 2);
1276
+ rb_global_variable(&rb_aHeaderTable);
1277
+ for (int i = 0; i < HEADER_TABLE_PAIRS; i++) {
1278
+ VALUE lc = rb_str_new_cstr(header_table_lc[i]);
1279
+ VALUE http = rb_str_new_cstr(header_table_http[i]);
1280
+ rb_obj_freeze(lc);
1281
+ rb_obj_freeze(http);
1282
+ header_table_lc_v[i] = lc;
1283
+ header_table_lc_len[i] = (long)strlen(header_table_lc[i]);
1284
+ rb_ary_push(rb_aHeaderTable, lc);
1285
+ rb_ary_push(rb_aHeaderTable, http);
1286
+ }
1287
+ rb_obj_freeze(rb_aHeaderTable);
1288
+ rb_define_const(rb_cCParser, "PREINTERNED_HEADERS", rb_aHeaderTable);
1289
+
1290
+ /* Phase 1 (1.7.0) — sibling C unit owns Hyperion::Http::Sendfile.
1291
+ * Defined in sendfile.c; both objects link into the same .bundle/.so
1292
+ * so a single `require 'hyperion_http/hyperion_http'` brings up the
1293
+ * full surface. */
1294
+ extern void Init_hyperion_sendfile(void);
1295
+ Init_hyperion_sendfile();
1296
+
1297
+ /* 2.10-C — sibling C unit owns Hyperion::Http::PageCache.
1298
+ * Pre-built static-response cache mirrored on agoo's agooPage:
1299
+ * each cached asset's full HTTP/1.1 response (status line +
1300
+ * Content-Type + Content-Length + body) lives in ONE contiguous
1301
+ * heap buffer. The hot path issues a single write() syscall
1302
+ * with zero Ruby-side allocation. See ext/hyperion_http/page_cache.c
1303
+ * for design notes; the Ruby façade lives in
1304
+ * lib/hyperion/http/page_cache.rb. */
1305
+ extern void Init_hyperion_page_cache(void);
1306
+ Init_hyperion_page_cache();
1307
+
1308
+ /* WS-3 (2.1.0) — sibling C unit owns Hyperion::WebSocket::CFrame.
1309
+ * RFC 6455 frame parse/build + GVL-releasing unmask. Same single-.so
1310
+ * link arrangement as sendfile. */
1311
+ extern void Init_hyperion_websocket(void);
1312
+ Init_hyperion_websocket();
1313
+
1314
+ /* 2.4-A (2.4.0) — sibling C unit owns Hyperion::H2Codec::CGlue.
1315
+ * Direct C → Rust HPACK encode/decode bridge that bypasses the
1316
+ * Fiddle layer on the per-call hot path. Defines the v3 ABI;
1317
+ * Ruby's H2Codec::Encoder/Decoder dispatch through CGlue when
1318
+ * `available?` returns true and fall back to the v2 (Fiddle) path
1319
+ * transparently otherwise. The `dlopen` of the Rust cdylib is
1320
+ * deferred to `H2Codec.load!`'s call to `CGlue.install(path)` —
1321
+ * we don't want to probe the filesystem from Init time because
1322
+ * the Rust crate may not have built (no cargo on host) and a
1323
+ * fail-fast crash here would break parser.c entirely. */
1324
+ extern void Init_hyperion_h2_codec_glue(void);
1325
+ Init_hyperion_h2_codec_glue();
891
1326
  }