picohttp 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8a11d937822d71f87cd78734c038f1a73a25af902b142958208af81b3397f3be
4
- data.tar.gz: 21d3337883257c321b37c602d52c7ab68896343d856bfafaec329476b43c35fe
3
+ metadata.gz: cd749cf10a5d5718c50fee68b7598b44cf01e3709789931d2ed8ee6c8d69adb0
4
+ data.tar.gz: 664f012d7379f4c4ae09213f6fc68aa7152ce86f1ba1e7138eddbb17a7e56864
5
5
  SHA512:
6
- metadata.gz: 5e73eead0ddf6378830a445562e827a77f235e1f26d37d18c7956ec790652c9f8efb23b82385d90c512dc47a864b796b8f9f7c2826e5f724dcaae63d45c39d43
7
- data.tar.gz: 92893c74ac27005fc38891c49ce4c2a5c72e0f7fd453fce38871ab5b626db8cc10f000263c01573b7cceac331dbf4f5089ee413ec71f3839cede7fa0bc29693e
6
+ metadata.gz: 5c3af0a35ffd06aefd5bb57f993394c775030aecd60c9dea69e9deb9c0342d1c4d8db89e3c722b68c7a7839fd02c0458f92613e66dcd115ac0153525f3695828
7
+ data.tar.gz: 719f7c6ded60d90a21e14cf9f3c9220fda942f79a2d2a73be1a16dfb90a14ee95dabddf55b37ca176460abd33874064a2f0838992b44f2781676106172a489c2
@@ -2,7 +2,8 @@
2
2
  #include "picohttpparser.h"
3
3
 
4
4
  #define MAX_HEADER_NAME_LEN 256
5
- #define ENV_HASH_INITIAL_CAPACITY 64
5
+ #define MAX_HTTP_HEADERS 100
6
+ #define EXTRA_RACK_HEADERS 8
6
7
 
7
8
  VALUE rb_mPicohttp;
8
9
  VALUE rb_ePicohttpParseError;
@@ -12,10 +13,16 @@ static VALUE rb_str_request_method;
12
13
  static VALUE rb_str_server_protocol;
13
14
  static VALUE rb_str_path_info;
14
15
  static VALUE rb_str_query_string;
16
+ static VALUE rb_str_request_uri;
17
+ static VALUE rb_str_script_name;
18
+ static VALUE rb_str_server_name;
19
+ static VALUE rb_str_server_port;
15
20
  static VALUE rb_str_empty;
16
21
  static VALUE rb_str_http_1_0;
17
22
  static VALUE rb_str_http_1_1;
18
23
 
24
+ #include "string_lookup.inc"
25
+
19
26
  static VALUE
20
27
  http_version_string(int minor_version)
21
28
  {
@@ -28,6 +35,14 @@ http_version_string(int minor_version)
28
35
  }
29
36
  }
30
37
 
38
+ static VALUE
39
+ http_method_string(const char *method, size_t method_len)
40
+ {
41
+ VALUE str = lookup_method(method, method_len);
42
+ if (str == Qnil) str = rb_str_new(method, method_len);
43
+ return str;
44
+ }
45
+
31
46
  static VALUE
32
47
  header_name_to_env_key(const char *name, size_t name_len)
33
48
  {
@@ -35,12 +50,9 @@ header_name_to_env_key(const char *name, size_t name_len)
35
50
  rb_raise(rb_ePicohttpParseError, "Header name too long");
36
51
  }
37
52
 
38
- // Special cases for Content-Type and Content-Length (no HTTP_ prefix)
39
- if (name_len == 12 && strncasecmp(name, "content-type", 12) == 0) {
40
- return rb_interned_str_cstr("CONTENT_TYPE");
41
- }
42
- if (name_len == 14 && strncasecmp(name, "content-length", 14) == 0) {
43
- return rb_interned_str_cstr("CONTENT_LENGTH");
53
+ VALUE str = lookup_header(name, name_len);
54
+ if (str != Qnil) {
55
+ return str;
44
56
  }
45
57
 
46
58
  char env_name[MAX_HEADER_NAME_LEN + 6]; // "HTTP_" + name + null terminator
@@ -71,7 +83,7 @@ picohttp_parse_request(VALUE self, VALUE str)
71
83
 
72
84
  const char *method, *path;
73
85
  int minor_version;
74
- struct phr_header headers[100];
86
+ struct phr_header headers[MAX_HTTP_HEADERS];
75
87
  size_t method_len, path_len, num_headers = sizeof(headers) / sizeof(headers[0]);
76
88
 
77
89
  int result = phr_parse_request(buf, len, &method, &method_len, &path, &path_len,
@@ -102,6 +114,24 @@ picohttp_parse_request(VALUE self, VALUE str)
102
114
  INT2FIX(result));
103
115
  }
104
116
 
117
+ static VALUE
118
+ build_hash_with_combined_duplicates(VALUE *header_values, int count)
119
+ {
120
+ VALUE env = rb_hash_new();
121
+ for (int i = 0; i < count; i += 2) {
122
+ VALUE key = header_values[i];
123
+ VALUE val = header_values[i + 1];
124
+ VALUE existing = rb_hash_aref(env, key);
125
+ if (existing == Qnil) {
126
+ rb_hash_aset(env, key, val);
127
+ } else {
128
+ rb_str_cat2(existing, ", ");
129
+ rb_str_cat(existing, RSTRING_PTR(val), RSTRING_LEN(val));
130
+ }
131
+ }
132
+ return env;
133
+ }
134
+
105
135
  static VALUE
106
136
  picohttp_parse_request_env(VALUE self, VALUE str)
107
137
  {
@@ -112,7 +142,7 @@ picohttp_parse_request_env(VALUE self, VALUE str)
112
142
 
113
143
  const char *method, *path;
114
144
  int minor_version;
115
- struct phr_header headers[100];
145
+ struct phr_header headers[MAX_HTTP_HEADERS];
116
146
  size_t method_len, path_len, num_headers = sizeof(headers) / sizeof(headers[0]);
117
147
 
118
148
  int result = phr_parse_request(buf, len, &method, &method_len, &path, &path_len,
@@ -125,42 +155,98 @@ picohttp_parse_request_env(VALUE self, VALUE str)
125
155
  rb_raise(rb_ePicohttpParseError, "Invalid HTTP request");
126
156
  }
127
157
 
128
- #ifdef HAVE_RB_HASH_NEW_CAPA
129
- VALUE env = rb_hash_new_capa(ENV_HASH_INITIAL_CAPACITY);
130
- #else
131
- VALUE env = rb_hash_new();
132
- #endif
158
+ VALUE header_values[(MAX_HTTP_HEADERS + EXTRA_RACK_HEADERS) * 2];
159
+ int idx = 0;
133
160
 
134
161
  // Standard CGI/Rack environment variables
135
- rb_hash_aset(env, rb_str_request_method, rb_str_new(method, method_len));
136
- rb_hash_aset(env, rb_str_server_protocol, http_version_string(minor_version));
162
+ header_values[idx++] = rb_str_request_method;
163
+ header_values[idx++] = http_method_string(method, method_len);
164
+
165
+ header_values[idx++] = rb_str_server_protocol;
166
+ header_values[idx++] = http_version_string(minor_version);
137
167
 
138
168
  // Parse path and query string in C
139
169
  const char *query_start = memchr(path, '?', path_len);
140
170
  if (query_start) {
141
171
  size_t path_info_len = query_start - path;
142
172
  size_t query_len = path_len - path_info_len - 1;
143
- rb_hash_aset(env, rb_str_path_info, rb_str_new(path, path_info_len));
144
- rb_hash_aset(env, rb_str_query_string, rb_str_new(query_start + 1, query_len));
173
+
174
+ header_values[idx++] = rb_str_path_info;
175
+ header_values[idx++] = rb_str_new(path, path_info_len);
176
+
177
+ header_values[idx++] = rb_str_query_string;
178
+ header_values[idx++] = rb_str_new(query_start + 1, query_len);
145
179
  } else {
146
- rb_hash_aset(env, rb_str_path_info, rb_str_new(path, path_len));
147
- rb_hash_aset(env, rb_str_query_string, rb_str_empty);
180
+ header_values[idx++] = rb_str_path_info;
181
+ header_values[idx++] = rb_str_new(path, path_len);
182
+
183
+ header_values[idx++] = rb_str_query_string;
184
+ header_values[idx++] = rb_str_empty;
148
185
  }
149
186
 
187
+ // REQUEST_URI is the full path including query string
188
+ header_values[idx++] = rb_str_request_uri;
189
+ header_values[idx++] = rb_str_new(path, path_len);
190
+
191
+ // SCRIPT_NAME is always empty
192
+ header_values[idx++] = rb_str_script_name;
193
+ header_values[idx++] = rb_str_empty;
194
+
150
195
  // Convert headers to HTTP_ prefixed environment variables
151
196
  for (size_t i = 0; i < num_headers; i++) {
152
197
  if (headers[i].name == NULL) {
153
198
  rb_raise(rb_ePicohttpParseError, "HTTP line folding not supported");
154
199
  }
155
200
 
156
- VALUE header_name = header_name_to_env_key(headers[i].name, headers[i].name_len);
157
- VALUE header_value = rb_str_new(headers[i].value, headers[i].value_len);
158
- rb_hash_aset(env, header_name, header_value);
201
+ header_values[idx++] = header_name_to_env_key(headers[i].name, headers[i].name_len);
202
+ header_values[idx++] = rb_str_new(headers[i].value, headers[i].value_len);
203
+
204
+ // Extract SERVER_NAME/SERVER_PORT from Host header
205
+ if (headers[i].name_len == 4 &&
206
+ (headers[i].name[0] | 0x20) == 'h' &&
207
+ (headers[i].name[1] | 0x20) == 'o' &&
208
+ (headers[i].name[2] | 0x20) == 's' &&
209
+ (headers[i].name[3] | 0x20) == 't') {
210
+ const char *host = headers[i].value;
211
+ size_t host_len = headers[i].value_len;
212
+ const char *colon = memchr(host, ':', host_len);
213
+
214
+ if (colon) {
215
+ header_values[idx++] = rb_str_server_name;
216
+ header_values[idx++] = rb_str_new(host, colon - host);
217
+ header_values[idx++] = rb_str_server_port;
218
+ header_values[idx++] = rb_str_new(colon + 1, host_len - (colon - host) - 1);
219
+ } else {
220
+ header_values[idx++] = rb_str_server_name;
221
+ header_values[idx++] = rb_str_new(host, host_len);
222
+ }
223
+ }
224
+ }
225
+
226
+ #ifdef HAVE_RB_HASH_NEW_CAPA
227
+ VALUE env = rb_hash_new_capa(idx / 2);
228
+ #else
229
+ VALUE env = rb_hash_new();
230
+ #endif
231
+
232
+ rb_hash_bulk_insert(idx, header_values, env);
233
+
234
+ // Handle duplicate headers per RFC 7230
235
+ if (RHASH_SIZE(env) != (size_t)(idx / 2)) {
236
+ return build_hash_with_combined_duplicates(header_values, idx);
159
237
  }
160
238
 
161
239
  return env;
162
240
  }
163
241
 
242
+ static VALUE
243
+ register_interned_string(const char *str)
244
+ {
245
+ VALUE val = rb_interned_str_cstr(str);
246
+ rb_gc_register_mark_object(val);
247
+ return val;
248
+ }
249
+
164
250
  RUBY_FUNC_EXPORTED void
165
251
  Init_picohttp(void)
166
252
  {
@@ -174,20 +260,17 @@ Init_picohttp(void)
174
260
  rb_define_module_function(rb_mPicohttp, "parse_request_env", picohttp_parse_request_env, 1);
175
261
 
176
262
  // Initialize interned string constants
177
- rb_str_request_method = rb_interned_str_cstr("REQUEST_METHOD");
178
- rb_str_server_protocol = rb_interned_str_cstr("SERVER_PROTOCOL");
179
- rb_str_path_info = rb_interned_str_cstr("PATH_INFO");
180
- rb_str_query_string = rb_interned_str_cstr("QUERY_STRING");
181
- rb_str_empty = rb_interned_str_cstr("");
182
- rb_str_http_1_0 = rb_interned_str_cstr("HTTP/1.0");
183
- rb_str_http_1_1 = rb_interned_str_cstr("HTTP/1.1");
184
-
185
- // Prevent garbage collection of constants
186
- rb_gc_register_address(&rb_str_request_method);
187
- rb_gc_register_address(&rb_str_server_protocol);
188
- rb_gc_register_address(&rb_str_path_info);
189
- rb_gc_register_address(&rb_str_query_string);
190
- rb_gc_register_address(&rb_str_empty);
191
- rb_gc_register_address(&rb_str_http_1_0);
192
- rb_gc_register_address(&rb_str_http_1_1);
263
+ init_string_lookup();
264
+
265
+ rb_str_request_method = register_interned_string("REQUEST_METHOD");
266
+ rb_str_server_protocol = register_interned_string("SERVER_PROTOCOL");
267
+ rb_str_path_info = register_interned_string("PATH_INFO");
268
+ rb_str_query_string = register_interned_string("QUERY_STRING");
269
+ rb_str_request_uri = register_interned_string("REQUEST_URI");
270
+ rb_str_script_name = register_interned_string("SCRIPT_NAME");
271
+ rb_str_server_name = register_interned_string("SERVER_NAME");
272
+ rb_str_server_port = register_interned_string("SERVER_PORT");
273
+ rb_str_empty = register_interned_string("");
274
+ rb_str_http_1_0 = register_interned_string("HTTP/1.0");
275
+ rb_str_http_1_1 = register_interned_string("HTTP/1.1");
193
276
  }
@@ -0,0 +1,166 @@
1
+ /* This file is auto-generated by tool/generate_lookup.rb */
2
+
3
+ static struct string_lookup_t {
4
+ VALUE http_host; /* "HTTP_HOST" */
5
+ VALUE http_accept; /* "HTTP_ACCEPT" */
6
+ VALUE http_cookie; /* "HTTP_COOKIE" */
7
+ VALUE http_referer; /* "HTTP_REFERER" */
8
+ VALUE http_user_agent; /* "HTTP_USER_AGENT" */
9
+ VALUE http_connection; /* "HTTP_CONNECTION" */
10
+ VALUE content_type; /* "CONTENT_TYPE" */
11
+ VALUE http_cache_control; /* "HTTP_CACHE_CONTROL" */
12
+ VALUE http_authorization; /* "HTTP_AUTHORIZATION" */
13
+ VALUE content_length; /* "CONTENT_LENGTH" */
14
+ VALUE http_accept_encoding; /* "HTTP_ACCEPT_ENCODING" */
15
+ VALUE http_accept_language; /* "HTTP_ACCEPT_LANGUAGE" */
16
+ VALUE get; /* "GET" */
17
+ VALUE put; /* "PUT" */
18
+ VALUE post; /* "POST" */
19
+ VALUE head; /* "HEAD" */
20
+ VALUE patch; /* "PATCH" */
21
+ VALUE delete; /* "DELETE" */
22
+ VALUE options; /* "OPTIONS" */
23
+ } string_lookup;
24
+
25
+ static VALUE lookup_header(const char *s, size_t len) {
26
+ switch (len) {
27
+ case 4:
28
+ if ((s[0] | 32) == 'h' && (s[1] | 32) == 'o' && (s[2] | 32) == 's' && (s[3] | 32) == 't') return string_lookup.http_host;
29
+ break;
30
+ case 6:
31
+ if ((s[0] | 32) == 'a' && (s[1] | 32) == 'c' && (s[2] | 32) == 'c' && (s[3] | 32) == 'e' && (s[4] | 32) == 'p' && (s[5] | 32) == 't') return string_lookup.http_accept;
32
+ if ((s[0] | 32) == 'c' && (s[1] | 32) == 'o' && (s[2] | 32) == 'o' && (s[3] | 32) == 'k' && (s[4] | 32) == 'i' && (s[5] | 32) == 'e') return string_lookup.http_cookie;
33
+ break;
34
+ case 7:
35
+ if ((s[0] | 32) == 'r' && (s[1] | 32) == 'e' && (s[2] | 32) == 'f' && (s[3] | 32) == 'e' && (s[4] | 32) == 'r' && (s[5] | 32) == 'e' && (s[6] | 32) == 'r') return string_lookup.http_referer;
36
+ break;
37
+ case 10:
38
+ if ((s[0] | 32) == 'u' && (s[1] | 32) == 's' && (s[2] | 32) == 'e' && (s[3] | 32) == 'r' && (s[4] | 32) == '-' && (s[5] | 32) == 'a' && (s[6] | 32) == 'g' && (s[7] | 32) == 'e' && (s[8] | 32) == 'n' && (s[9] | 32) == 't') return string_lookup.http_user_agent;
39
+ if ((s[0] | 32) == 'c' && (s[1] | 32) == 'o' && (s[2] | 32) == 'n' && (s[3] | 32) == 'n' && (s[4] | 32) == 'e' && (s[5] | 32) == 'c' && (s[6] | 32) == 't' && (s[7] | 32) == 'i' && (s[8] | 32) == 'o' && (s[9] | 32) == 'n') return string_lookup.http_connection;
40
+ break;
41
+ case 12:
42
+ if ((s[0] | 32) == 'c' && (s[1] | 32) == 'o' && (s[2] | 32) == 'n' && (s[3] | 32) == 't' && (s[4] | 32) == 'e' && (s[5] | 32) == 'n' && (s[6] | 32) == 't' && (s[7] | 32) == '-' && (s[8] | 32) == 't' && (s[9] | 32) == 'y' && (s[10] | 32) == 'p' && (s[11] | 32) == 'e') return string_lookup.content_type;
43
+ break;
44
+ case 13:
45
+ if ((s[0] | 32) == 'c' && (s[1] | 32) == 'a' && (s[2] | 32) == 'c' && (s[3] | 32) == 'h' && (s[4] | 32) == 'e' && (s[5] | 32) == '-' && (s[6] | 32) == 'c' && (s[7] | 32) == 'o' && (s[8] | 32) == 'n' && (s[9] | 32) == 't' && (s[10] | 32) == 'r' && (s[11] | 32) == 'o' && (s[12] | 32) == 'l') return string_lookup.http_cache_control;
46
+ if ((s[0] | 32) == 'a' && (s[1] | 32) == 'u' && (s[2] | 32) == 't' && (s[3] | 32) == 'h' && (s[4] | 32) == 'o' && (s[5] | 32) == 'r' && (s[6] | 32) == 'i' && (s[7] | 32) == 'z' && (s[8] | 32) == 'a' && (s[9] | 32) == 't' && (s[10] | 32) == 'i' && (s[11] | 32) == 'o' && (s[12] | 32) == 'n') return string_lookup.http_authorization;
47
+ break;
48
+ case 14:
49
+ if ((s[0] | 32) == 'c' && (s[1] | 32) == 'o' && (s[2] | 32) == 'n' && (s[3] | 32) == 't' && (s[4] | 32) == 'e' && (s[5] | 32) == 'n' && (s[6] | 32) == 't' && (s[7] | 32) == '-' && (s[8] | 32) == 'l' && (s[9] | 32) == 'e' && (s[10] | 32) == 'n' && (s[11] | 32) == 'g' && (s[12] | 32) == 't' && (s[13] | 32) == 'h') return string_lookup.content_length;
50
+ break;
51
+ case 15:
52
+ if ((s[0] | 32) == 'a' && (s[1] | 32) == 'c' && (s[2] | 32) == 'c' && (s[3] | 32) == 'e' && (s[4] | 32) == 'p' && (s[5] | 32) == 't' && (s[6] | 32) == '-' && (s[7] | 32) == 'e' && (s[8] | 32) == 'n' && (s[9] | 32) == 'c' && (s[10] | 32) == 'o' && (s[11] | 32) == 'd' && (s[12] | 32) == 'i' && (s[13] | 32) == 'n' && (s[14] | 32) == 'g') return string_lookup.http_accept_encoding;
53
+ if ((s[0] | 32) == 'a' && (s[1] | 32) == 'c' && (s[2] | 32) == 'c' && (s[3] | 32) == 'e' && (s[4] | 32) == 'p' && (s[5] | 32) == 't' && (s[6] | 32) == '-' && (s[7] | 32) == 'l' && (s[8] | 32) == 'a' && (s[9] | 32) == 'n' && (s[10] | 32) == 'g' && (s[11] | 32) == 'u' && (s[12] | 32) == 'a' && (s[13] | 32) == 'g' && (s[14] | 32) == 'e') return string_lookup.http_accept_language;
54
+ break;
55
+ }
56
+ return Qnil;
57
+ }
58
+
59
+ static VALUE lookup_method(const char *s, size_t len) {
60
+ switch (len) {
61
+ case 3:
62
+ if (s[0] == 'G' && s[1] == 'E' && s[2] == 'T') return string_lookup.get;
63
+ if (s[0] == 'P' && s[1] == 'U' && s[2] == 'T') return string_lookup.put;
64
+ break;
65
+ case 4:
66
+ if (s[0] == 'P' && s[1] == 'O' && s[2] == 'S' && s[3] == 'T') return string_lookup.post;
67
+ if (s[0] == 'H' && s[1] == 'E' && s[2] == 'A' && s[3] == 'D') return string_lookup.head;
68
+ break;
69
+ case 5:
70
+ if (s[0] == 'P' && s[1] == 'A' && s[2] == 'T' && s[3] == 'C' && s[4] == 'H') return string_lookup.patch;
71
+ break;
72
+ case 6:
73
+ if (s[0] == 'D' && s[1] == 'E' && s[2] == 'L' && s[3] == 'E' && s[4] == 'T' && s[5] == 'E') return string_lookup.delete;
74
+ break;
75
+ case 7:
76
+ if (s[0] == 'O' && s[1] == 'P' && s[2] == 'T' && s[3] == 'I' && s[4] == 'O' && s[5] == 'N' && s[6] == 'S') return string_lookup.options;
77
+ break;
78
+ }
79
+ return Qnil;
80
+ }
81
+
82
+ static void string_lookup_mark(void *ptr) {
83
+ struct string_lookup_t *s = ptr;
84
+ rb_gc_mark_movable(s->http_host);
85
+ rb_gc_mark_movable(s->http_accept);
86
+ rb_gc_mark_movable(s->http_cookie);
87
+ rb_gc_mark_movable(s->http_referer);
88
+ rb_gc_mark_movable(s->http_user_agent);
89
+ rb_gc_mark_movable(s->http_connection);
90
+ rb_gc_mark_movable(s->content_type);
91
+ rb_gc_mark_movable(s->http_cache_control);
92
+ rb_gc_mark_movable(s->http_authorization);
93
+ rb_gc_mark_movable(s->content_length);
94
+ rb_gc_mark_movable(s->http_accept_encoding);
95
+ rb_gc_mark_movable(s->http_accept_language);
96
+ rb_gc_mark_movable(s->get);
97
+ rb_gc_mark_movable(s->put);
98
+ rb_gc_mark_movable(s->post);
99
+ rb_gc_mark_movable(s->head);
100
+ rb_gc_mark_movable(s->patch);
101
+ rb_gc_mark_movable(s->delete);
102
+ rb_gc_mark_movable(s->options);
103
+ }
104
+
105
+ static void string_lookup_compact(void *ptr) {
106
+ struct string_lookup_t *s = ptr;
107
+ s->http_host = rb_gc_location(s->http_host);
108
+ s->http_accept = rb_gc_location(s->http_accept);
109
+ s->http_cookie = rb_gc_location(s->http_cookie);
110
+ s->http_referer = rb_gc_location(s->http_referer);
111
+ s->http_user_agent = rb_gc_location(s->http_user_agent);
112
+ s->http_connection = rb_gc_location(s->http_connection);
113
+ s->content_type = rb_gc_location(s->content_type);
114
+ s->http_cache_control = rb_gc_location(s->http_cache_control);
115
+ s->http_authorization = rb_gc_location(s->http_authorization);
116
+ s->content_length = rb_gc_location(s->content_length);
117
+ s->http_accept_encoding = rb_gc_location(s->http_accept_encoding);
118
+ s->http_accept_language = rb_gc_location(s->http_accept_language);
119
+ s->get = rb_gc_location(s->get);
120
+ s->put = rb_gc_location(s->put);
121
+ s->post = rb_gc_location(s->post);
122
+ s->head = rb_gc_location(s->head);
123
+ s->patch = rb_gc_location(s->patch);
124
+ s->delete = rb_gc_location(s->delete);
125
+ s->options = rb_gc_location(s->options);
126
+ }
127
+
128
+ static const rb_data_type_t string_lookup_type = {
129
+ .wrap_struct_name = "picohttp_string_lookup",
130
+ .function = {
131
+ .dmark = string_lookup_mark,
132
+ .dfree = NULL,
133
+ .dsize = NULL,
134
+ .dcompact = string_lookup_compact,
135
+ },
136
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
137
+ };
138
+
139
+ static void intern_str(VALUE wrapper, VALUE *field, const char *str) {
140
+ RB_OBJ_WRITE(wrapper, field, rb_interned_str_cstr(str));
141
+ }
142
+
143
+ static void init_string_lookup(void) {
144
+ VALUE wrapper = TypedData_Wrap_Struct(0, &string_lookup_type, &string_lookup);
145
+ rb_gc_register_mark_object(wrapper);
146
+ intern_str(wrapper, &string_lookup.http_host, "HTTP_HOST");
147
+ intern_str(wrapper, &string_lookup.http_accept, "HTTP_ACCEPT");
148
+ intern_str(wrapper, &string_lookup.http_cookie, "HTTP_COOKIE");
149
+ intern_str(wrapper, &string_lookup.http_referer, "HTTP_REFERER");
150
+ intern_str(wrapper, &string_lookup.http_user_agent, "HTTP_USER_AGENT");
151
+ intern_str(wrapper, &string_lookup.http_connection, "HTTP_CONNECTION");
152
+ intern_str(wrapper, &string_lookup.content_type, "CONTENT_TYPE");
153
+ intern_str(wrapper, &string_lookup.http_cache_control, "HTTP_CACHE_CONTROL");
154
+ intern_str(wrapper, &string_lookup.http_authorization, "HTTP_AUTHORIZATION");
155
+ intern_str(wrapper, &string_lookup.content_length, "CONTENT_LENGTH");
156
+ intern_str(wrapper, &string_lookup.http_accept_encoding, "HTTP_ACCEPT_ENCODING");
157
+ intern_str(wrapper, &string_lookup.http_accept_language, "HTTP_ACCEPT_LANGUAGE");
158
+ intern_str(wrapper, &string_lookup.get, "GET");
159
+ intern_str(wrapper, &string_lookup.put, "PUT");
160
+ intern_str(wrapper, &string_lookup.post, "POST");
161
+ intern_str(wrapper, &string_lookup.head, "HEAD");
162
+ intern_str(wrapper, &string_lookup.patch, "PATCH");
163
+ intern_str(wrapper, &string_lookup.delete, "DELETE");
164
+ intern_str(wrapper, &string_lookup.options, "OPTIONS");
165
+ }
166
+
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Picohttp
4
- VERSION = "0.1.1"
4
+ VERSION = "0.3.0"
5
5
  end
@@ -0,0 +1,138 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Generates fast C lookup code for known string sets
5
+
6
+ puts "/* This file is auto-generated by #{__FILE__} */"
7
+ puts
8
+
9
+ class StringLookup
10
+ def initialize
11
+ @string_table = Hash.new do |h, k|
12
+ h[k] = name_for_string(k)
13
+ end
14
+ @functions = []
15
+ end
16
+
17
+ def generate
18
+ # Generate the struct to hold all strings
19
+ puts "static struct string_lookup_t {"
20
+ @string_table.each do |v, name|
21
+ puts " VALUE #{name}; /* #{v.inspect} */"
22
+ end
23
+ puts "} string_lookup;"
24
+ puts
25
+
26
+ @functions.each do |code|
27
+ puts code
28
+ puts
29
+ end
30
+
31
+ # Generate mark function
32
+ puts "static void string_lookup_mark(void *ptr) {"
33
+ puts " struct string_lookup_t *s = ptr;"
34
+ @string_table.each do |v, name|
35
+ puts " rb_gc_mark_movable(s->#{name});"
36
+ end
37
+ puts "}"
38
+ puts
39
+
40
+ # Generate compact function
41
+ puts "static void string_lookup_compact(void *ptr) {"
42
+ puts " struct string_lookup_t *s = ptr;"
43
+ @string_table.each do |v, name|
44
+ puts " s->#{name} = rb_gc_location(s->#{name});"
45
+ end
46
+ puts "}"
47
+ puts
48
+
49
+ # Generate TypedData type
50
+ puts "static const rb_data_type_t string_lookup_type = {"
51
+ puts " .wrap_struct_name = \"picohttp_string_lookup\","
52
+ puts " .function = {"
53
+ puts " .dmark = string_lookup_mark,"
54
+ puts " .dfree = NULL,"
55
+ puts " .dsize = NULL,"
56
+ puts " .dcompact = string_lookup_compact,"
57
+ puts " },"
58
+ puts " .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,"
59
+ puts "};"
60
+ puts
61
+
62
+ puts "static void intern_str(VALUE wrapper, VALUE *field, const char *str) {"
63
+ puts " RB_OBJ_WRITE(wrapper, field, rb_interned_str_cstr(str));"
64
+ puts "}"
65
+ puts
66
+
67
+ puts "static void init_string_lookup(void) {"
68
+ puts " VALUE wrapper = TypedData_Wrap_Struct(0, &string_lookup_type, &string_lookup);"
69
+ puts " rb_gc_register_mark_object(wrapper);"
70
+ @string_table.each do |v, name|
71
+ puts " intern_str(wrapper, &string_lookup.#{name}, #{v.dump});"
72
+ end
73
+ puts "}"
74
+ puts
75
+ end
76
+
77
+ def name_for_string(string)
78
+ string.gsub(/[^a-z0-9]/i, "_").downcase
79
+ end
80
+
81
+ def generate_comparison(var, string, ignore_case: false)
82
+ 0.upto(string.length - 1).map do |i|
83
+ if ignore_case
84
+ "(#{var}[#{i}] | 32) == '#{string[i].downcase}'"
85
+ else
86
+ "#{var}[#{i}] == '#{string[i]}'"
87
+ end
88
+ end.join(" && ")
89
+ end
90
+
91
+ def add_function(function, mapping, ignore_case: false)
92
+ if mapping.is_a?(Array)
93
+ mapping = mapping.map do |str|
94
+ [str, str]
95
+ end.to_h
96
+ end
97
+ by_length = mapping.group_by do |(k,v)|
98
+ k.length
99
+ end
100
+
101
+ code = []
102
+ code << "static VALUE lookup_#{function}(const char *s, size_t len) {"
103
+ code << " switch (len) {"
104
+
105
+ by_length.sort.each do |len, strs|
106
+ code << " case #{len}:"
107
+ strs.each do |match, target|
108
+ name = @string_table[target]
109
+ code << " if (#{generate_comparison("s", match, ignore_case:)}) return string_lookup.#{name};"
110
+ end
111
+ code << " break;"
112
+ end
113
+
114
+ code << " }"
115
+ code << " return Qnil;"
116
+ code << "}"
117
+
118
+ @functions << code.join("\n")
119
+ end
120
+ end
121
+
122
+ HEADERS = %w[Content-Type Content-Length Host User-Agent Accept Accept-Encoding
123
+ Accept-Language Connection Cache-Control Cookie Authorization Referer]
124
+
125
+ METHODS = %w[GET POST PUT DELETE HEAD OPTIONS PATCH]
126
+
127
+ lookup = StringLookup.new
128
+ header_mapping = HEADERS.map do |header|
129
+ header = header.downcase
130
+ rack_key = header.upcase.gsub("-", "_")
131
+ unless header == "content-type" || header == "content-length"
132
+ rack_key = "HTTP_#{rack_key}"
133
+ end
134
+ [header, rack_key]
135
+ end.to_h
136
+ lookup.add_function("header", header_mapping, ignore_case: true)
137
+ lookup.add_function("method", METHODS)
138
+ lookup.generate
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: picohttp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Hawthorn
@@ -27,8 +27,10 @@ files:
27
27
  - ext/picohttp/picohttp.h
28
28
  - ext/picohttp/picohttpparser.c
29
29
  - ext/picohttp/picohttpparser.h
30
+ - ext/picohttp/string_lookup.inc
30
31
  - lib/picohttp.rb
31
32
  - lib/picohttp/version.rb
33
+ - tool/generate_lookup.rb
32
34
  homepage: https://github.com/jhawthorn/picohttp
33
35
  licenses:
34
36
  - MIT
@@ -52,7 +54,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
52
54
  - !ruby/object:Gem::Version
53
55
  version: '0'
54
56
  requirements: []
55
- rubygems_version: 3.6.9
57
+ rubygems_version: 4.0.3
56
58
  specification_version: 4
57
59
  summary: Fast HTTP request parser using picohttpparser
58
60
  test_files: []