http-parser-lite 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG ADDED
@@ -0,0 +1,3 @@
1
+ == 0.1.0 (2012-06-23)
2
+
3
+ * Initial version.
data/README.md ADDED
@@ -0,0 +1,77 @@
1
+ # HTTP Parser Lite
2
+
3
+ A Lite™ wrapper around the Joyent http-parser goodness for Ruby
4
+
5
+ ## Install
6
+
7
+ ```
8
+ gem install http-parser-lite
9
+ ```
10
+
11
+ ## Example
12
+
13
+ ```ruby
14
+ parser = HTTP::Parser.new
15
+
16
+ parser.on_message_begin do
17
+ puts "message begin"
18
+ end
19
+
20
+ parser.on_message_complete do
21
+ puts "message complete"
22
+ end
23
+
24
+ parser.on_url do |url|
25
+ puts "url: #{url}"
26
+ end
27
+
28
+ parser.on_header_field do |name|
29
+ puts "field: #{name}"
30
+ end
31
+
32
+ parser.on_header_value do |value|
33
+ puts "value: #{value}"
34
+ end
35
+
36
+ parser.on_body do |body|
37
+ puts "body: #{body}"
38
+ end
39
+
40
+ parser << "HTTP/1.1 200 OK\r\n"
41
+ parser << "Content-Type: text/plain;charset=utf-8\r\n"
42
+ parser << "Content-Length: 5\r\n"
43
+ parser << "Connection: close\r\n\r\n"
44
+ parser << "hello"
45
+
46
+ parser.reset
47
+
48
+ parser << "GET http://www.google.com/ HTTP/1.1\r\n\r\n"
49
+ ```
50
+
51
+ ## API
52
+
53
+ ```
54
+ HTTP::Parser
55
+ .new
56
+
57
+ #reset
58
+
59
+ #parse(data)
60
+ #<<(data)
61
+
62
+ #on_message_begin(&block)
63
+ #on_message_complete(&block)
64
+ #on_url(&block)
65
+ #on_header_field(&block)
66
+ #on_header_value(&block)
67
+ #on_body(&block)
68
+
69
+ #http_status
70
+ #http_method
71
+ #http_version
72
+ #http_content_length
73
+ ```
74
+
75
+ ## License
76
+
77
+ [Creative Commons Attribution - CC BY](http://creativecommons.org/licenses/by/3.0)
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'mkmf'
4
+ create_makefile 'http_parser'
@@ -0,0 +1,2059 @@
1
+ /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
2
+ *
3
+ * Additional changes are licensed under the same terms as NGINX and
4
+ * copyright Joyent, Inc. and other Node contributors. All rights reserved.
5
+ *
6
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ * of this software and associated documentation files (the "Software"), to
8
+ * deal in the Software without restriction, including without limitation the
9
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10
+ * sell copies of the Software, and to permit persons to whom the Software is
11
+ * furnished to do so, subject to the following conditions:
12
+ *
13
+ * The above copyright notice and this permission notice shall be included in
14
+ * all copies or substantial portions of the Software.
15
+ *
16
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22
+ * IN THE SOFTWARE.
23
+ */
24
+ #include "http_parser.h"
25
+ #include <assert.h>
26
+ #include <stddef.h>
27
+ #include <ctype.h>
28
+ #include <stdlib.h>
29
+ #include <string.h>
30
+ #include <limits.h>
31
+
32
+ #ifndef ULLONG_MAX
33
+ # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
34
+ #endif
35
+
36
+ #ifndef MIN
37
+ # define MIN(a,b) ((a) < (b) ? (a) : (b))
38
+ #endif
39
+
40
+
41
+ #if HTTP_PARSER_DEBUG
42
+ #define SET_ERRNO(e) \
43
+ do { \
44
+ parser->http_errno = (e); \
45
+ parser->error_lineno = __LINE__; \
46
+ } while (0)
47
+ #else
48
+ #define SET_ERRNO(e) \
49
+ do { \
50
+ parser->http_errno = (e); \
51
+ } while(0)
52
+ #endif
53
+
54
+
55
+ /* Run the notify callback FOR, returning ER if it fails */
56
+ #define CALLBACK_NOTIFY_(FOR, ER) \
57
+ do { \
58
+ assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
59
+ \
60
+ if (settings->on_##FOR) { \
61
+ if (0 != settings->on_##FOR(parser)) { \
62
+ SET_ERRNO(HPE_CB_##FOR); \
63
+ } \
64
+ \
65
+ /* We either errored above or got paused; get out */ \
66
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
67
+ return (ER); \
68
+ } \
69
+ } \
70
+ } while (0)
71
+
72
+ /* Run the notify callback FOR and consume the current byte */
73
+ #define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1)
74
+
75
+ /* Run the notify callback FOR and don't consume the current byte */
76
+ #define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data)
77
+
78
+ /* Run data callback FOR with LEN bytes, returning ER if it fails */
79
+ #define CALLBACK_DATA_(FOR, LEN, ER) \
80
+ do { \
81
+ assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
82
+ \
83
+ if (FOR##_mark) { \
84
+ if (settings->on_##FOR) { \
85
+ if (0 != settings->on_##FOR(parser, FOR##_mark, (LEN))) { \
86
+ SET_ERRNO(HPE_CB_##FOR); \
87
+ } \
88
+ \
89
+ /* We either errored above or got paused; get out */ \
90
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
91
+ return (ER); \
92
+ } \
93
+ } \
94
+ FOR##_mark = NULL; \
95
+ } \
96
+ } while (0)
97
+
98
+ /* Run the data callback FOR and consume the current byte */
99
+ #define CALLBACK_DATA(FOR) \
100
+ CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
101
+
102
+ /* Run the data callback FOR and don't consume the current byte */
103
+ #define CALLBACK_DATA_NOADVANCE(FOR) \
104
+ CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
105
+
106
+ /* Set the mark FOR; non-destructive if mark is already set */
107
+ #define MARK(FOR) \
108
+ do { \
109
+ if (!FOR##_mark) { \
110
+ FOR##_mark = p; \
111
+ } \
112
+ } while (0)
113
+
114
+
115
+ #define PROXY_CONNECTION "proxy-connection"
116
+ #define CONNECTION "connection"
117
+ #define CONTENT_LENGTH "content-length"
118
+ #define TRANSFER_ENCODING "transfer-encoding"
119
+ #define UPGRADE "upgrade"
120
+ #define CHUNKED "chunked"
121
+ #define KEEP_ALIVE "keep-alive"
122
+ #define CLOSE "close"
123
+
124
+
125
+ static const char *method_strings[] =
126
+ {
127
+ #define XX(num, name, string) #string,
128
+ HTTP_METHOD_MAP(XX)
129
+ #undef XX
130
+ };
131
+
132
+
133
+ /* Tokens as defined by rfc 2616. Also lowercases them.
134
+ * token = 1*<any CHAR except CTLs or separators>
135
+ * separators = "(" | ")" | "<" | ">" | "@"
136
+ * | "," | ";" | ":" | "\" | <">
137
+ * | "/" | "[" | "]" | "?" | "="
138
+ * | "{" | "}" | SP | HT
139
+ */
140
+ static const char tokens[256] = {
141
+ /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
142
+ 0, 0, 0, 0, 0, 0, 0, 0,
143
+ /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
144
+ 0, 0, 0, 0, 0, 0, 0, 0,
145
+ /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
146
+ 0, 0, 0, 0, 0, 0, 0, 0,
147
+ /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
148
+ 0, 0, 0, 0, 0, 0, 0, 0,
149
+ /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
150
+ 0, '!', 0, '#', '$', '%', '&', '\'',
151
+ /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
152
+ 0, 0, '*', '+', 0, '-', '.', 0,
153
+ /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
154
+ '0', '1', '2', '3', '4', '5', '6', '7',
155
+ /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
156
+ '8', '9', 0, 0, 0, 0, 0, 0,
157
+ /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
158
+ 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
159
+ /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
160
+ 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
161
+ /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
162
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
163
+ /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
164
+ 'x', 'y', 'z', 0, 0, 0, '^', '_',
165
+ /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
166
+ '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
167
+ /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
168
+ 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
169
+ /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
170
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
171
+ /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
172
+ 'x', 'y', 'z', 0, '|', 0, '~', 0 };
173
+
174
+
175
+ static const int8_t unhex[256] =
176
+ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
177
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
178
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
179
+ , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
180
+ ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
181
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
182
+ ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
183
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
184
+ };
185
+
186
+
187
+ #if HTTP_PARSER_STRICT
188
+ # define T 0
189
+ #else
190
+ # define T 1
191
+ #endif
192
+
193
+
194
+ static const uint8_t normal_url_char[256] = {
195
+ /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
196
+ 0, 0, 0, 0, 0, 0, 0, 0,
197
+ /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
198
+ 0, T, 0, 0, T, 0, 0, 0,
199
+ /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
200
+ 0, 0, 0, 0, 0, 0, 0, 0,
201
+ /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
202
+ 0, 0, 0, 0, 0, 0, 0, 0,
203
+ /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
204
+ 0, 1, 1, 0, 1, 1, 1, 1,
205
+ /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
206
+ 1, 1, 1, 1, 1, 1, 1, 1,
207
+ /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
208
+ 1, 1, 1, 1, 1, 1, 1, 1,
209
+ /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
210
+ 1, 1, 1, 1, 1, 1, 1, 0,
211
+ /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
212
+ 1, 1, 1, 1, 1, 1, 1, 1,
213
+ /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
214
+ 1, 1, 1, 1, 1, 1, 1, 1,
215
+ /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
216
+ 1, 1, 1, 1, 1, 1, 1, 1,
217
+ /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
218
+ 1, 1, 1, 1, 1, 1, 1, 1,
219
+ /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
220
+ 1, 1, 1, 1, 1, 1, 1, 1,
221
+ /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
222
+ 1, 1, 1, 1, 1, 1, 1, 1,
223
+ /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
224
+ 1, 1, 1, 1, 1, 1, 1, 1,
225
+ /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
226
+ 1, 1, 1, 1, 1, 1, 1, 0, };
227
+
228
+ #undef T
229
+
230
+ enum state
231
+ { s_dead = 1 /* important that this is > 0 */
232
+
233
+ , s_start_req_or_res
234
+ , s_res_or_resp_H
235
+ , s_start_res
236
+ , s_res_H
237
+ , s_res_HT
238
+ , s_res_HTT
239
+ , s_res_HTTP
240
+ , s_res_first_http_major
241
+ , s_res_http_major
242
+ , s_res_first_http_minor
243
+ , s_res_http_minor
244
+ , s_res_first_status_code
245
+ , s_res_status_code
246
+ , s_res_status
247
+ , s_res_line_almost_done
248
+
249
+ , s_start_req
250
+
251
+ , s_req_method
252
+ , s_req_spaces_before_url
253
+ , s_req_schema
254
+ , s_req_schema_slash
255
+ , s_req_schema_slash_slash
256
+ , s_req_host_start
257
+ , s_req_host_v6_start
258
+ , s_req_host_v6
259
+ , s_req_host_v6_end
260
+ , s_req_host
261
+ , s_req_port_start
262
+ , s_req_port
263
+ , s_req_path
264
+ , s_req_query_string_start
265
+ , s_req_query_string
266
+ , s_req_fragment_start
267
+ , s_req_fragment
268
+ , s_req_http_start
269
+ , s_req_http_H
270
+ , s_req_http_HT
271
+ , s_req_http_HTT
272
+ , s_req_http_HTTP
273
+ , s_req_first_http_major
274
+ , s_req_http_major
275
+ , s_req_first_http_minor
276
+ , s_req_http_minor
277
+ , s_req_line_almost_done
278
+
279
+ , s_header_field_start
280
+ , s_header_field
281
+ , s_header_value_start
282
+ , s_header_value
283
+ , s_header_value_lws
284
+
285
+ , s_header_almost_done
286
+
287
+ , s_chunk_size_start
288
+ , s_chunk_size
289
+ , s_chunk_parameters
290
+ , s_chunk_size_almost_done
291
+
292
+ , s_headers_almost_done
293
+ , s_headers_done
294
+
295
+ /* Important: 's_headers_done' must be the last 'header' state. All
296
+ * states beyond this must be 'body' states. It is used for overflow
297
+ * checking. See the PARSING_HEADER() macro.
298
+ */
299
+
300
+ , s_chunk_data
301
+ , s_chunk_data_almost_done
302
+ , s_chunk_data_done
303
+
304
+ , s_body_identity
305
+ , s_body_identity_eof
306
+
307
+ , s_message_done
308
+ };
309
+
310
+
311
+ #define PARSING_HEADER(state) (state <= s_headers_done)
312
+
313
+
314
+ enum header_states
315
+ { h_general = 0
316
+ , h_C
317
+ , h_CO
318
+ , h_CON
319
+
320
+ , h_matching_connection
321
+ , h_matching_proxy_connection
322
+ , h_matching_content_length
323
+ , h_matching_transfer_encoding
324
+ , h_matching_upgrade
325
+
326
+ , h_connection
327
+ , h_content_length
328
+ , h_transfer_encoding
329
+ , h_upgrade
330
+
331
+ , h_matching_transfer_encoding_chunked
332
+ , h_matching_connection_keep_alive
333
+ , h_matching_connection_close
334
+
335
+ , h_transfer_encoding_chunked
336
+ , h_connection_keep_alive
337
+ , h_connection_close
338
+ };
339
+
340
+
341
+ /* Macros for character classes; depends on strict-mode */
342
+ #define CR '\r'
343
+ #define LF '\n'
344
+ #define LOWER(c) (unsigned char)(c | 0x20)
345
+ #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
346
+ #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
347
+ #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
348
+ #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
349
+
350
+ #if HTTP_PARSER_STRICT
351
+ #define TOKEN(c) (tokens[(unsigned char)c])
352
+ #define IS_URL_CHAR(c) (normal_url_char[(unsigned char) (c)])
353
+ #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
354
+ #else
355
+ #define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
356
+ #define IS_URL_CHAR(c) \
357
+ (normal_url_char[(unsigned char) (c)] || ((c) & 0x80))
358
+ #define IS_HOST_CHAR(c) \
359
+ (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
360
+ #endif
361
+
362
+
363
+ #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
364
+
365
+
366
+ #if HTTP_PARSER_STRICT
367
+ # define STRICT_CHECK(cond) \
368
+ do { \
369
+ if (cond) { \
370
+ SET_ERRNO(HPE_STRICT); \
371
+ goto error; \
372
+ } \
373
+ } while (0)
374
+ # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
375
+ #else
376
+ # define STRICT_CHECK(cond)
377
+ # define NEW_MESSAGE() start_state
378
+ #endif
379
+
380
+
381
+ /* Map errno values to strings for human-readable output */
382
+ #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
383
+ static struct {
384
+ const char *name;
385
+ const char *description;
386
+ } http_strerror_tab[] = {
387
+ HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
388
+ };
389
+ #undef HTTP_STRERROR_GEN
390
+
391
+ int http_message_needs_eof(http_parser *parser);
392
+
393
+ /* Our URL parser.
394
+ *
395
+ * This is designed to be shared by http_parser_execute() for URL validation,
396
+ * hence it has a state transition + byte-for-byte interface. In addition, it
397
+ * is meant to be embedded in http_parser_parse_url(), which does the dirty
398
+ * work of turning state transitions URL components for its API.
399
+ *
400
+ * This function should only be invoked with non-space characters. It is
401
+ * assumed that the caller cares about (and can detect) the transition between
402
+ * URL and non-URL states by looking for these.
403
+ */
404
+ static enum state
405
+ parse_url_char(enum state s, const char ch)
406
+ {
407
+ if (ch == ' ' || ch == '\r' || ch == '\n') {
408
+ return s_dead;
409
+ }
410
+
411
+ #if HTTP_PARSER_STRICT
412
+ if (ch == '\t' || ch == '\f') {
413
+ return s_dead;
414
+ }
415
+ #endif
416
+
417
+ switch (s) {
418
+ case s_req_spaces_before_url:
419
+ /* Proxied requests are followed by scheme of an absolute URI (alpha).
420
+ * All methods except CONNECT are followed by '/' or '*'.
421
+ */
422
+
423
+ if (ch == '/' || ch == '*') {
424
+ return s_req_path;
425
+ }
426
+
427
+ if (IS_ALPHA(ch)) {
428
+ return s_req_schema;
429
+ }
430
+
431
+ break;
432
+
433
+ case s_req_schema:
434
+ if (IS_ALPHA(ch)) {
435
+ return s;
436
+ }
437
+
438
+ if (ch == ':') {
439
+ return s_req_schema_slash;
440
+ }
441
+
442
+ break;
443
+
444
+ case s_req_schema_slash:
445
+ if (ch == '/') {
446
+ return s_req_schema_slash_slash;
447
+ }
448
+
449
+ break;
450
+
451
+ case s_req_schema_slash_slash:
452
+ if (ch == '/') {
453
+ return s_req_host_start;
454
+ }
455
+
456
+ break;
457
+
458
+ case s_req_host_start:
459
+ if (ch == '[') {
460
+ return s_req_host_v6_start;
461
+ }
462
+
463
+ if (IS_HOST_CHAR(ch)) {
464
+ return s_req_host;
465
+ }
466
+
467
+ break;
468
+
469
+ case s_req_host:
470
+ if (IS_HOST_CHAR(ch)) {
471
+ return s_req_host;
472
+ }
473
+
474
+ /* FALLTHROUGH */
475
+ case s_req_host_v6_end:
476
+ switch (ch) {
477
+ case ':':
478
+ return s_req_port_start;
479
+
480
+ case '/':
481
+ return s_req_path;
482
+
483
+ case '?':
484
+ return s_req_query_string_start;
485
+ }
486
+
487
+ break;
488
+
489
+ case s_req_host_v6:
490
+ if (ch == ']') {
491
+ return s_req_host_v6_end;
492
+ }
493
+
494
+ /* FALLTHROUGH */
495
+ case s_req_host_v6_start:
496
+ if (IS_HEX(ch) || ch == ':') {
497
+ return s_req_host_v6;
498
+ }
499
+ break;
500
+
501
+ case s_req_port:
502
+ switch (ch) {
503
+ case '/':
504
+ return s_req_path;
505
+
506
+ case '?':
507
+ return s_req_query_string_start;
508
+ }
509
+
510
+ /* FALLTHROUGH */
511
+ case s_req_port_start:
512
+ if (IS_NUM(ch)) {
513
+ return s_req_port;
514
+ }
515
+
516
+ break;
517
+
518
+ case s_req_path:
519
+ if (IS_URL_CHAR(ch)) {
520
+ return s;
521
+ }
522
+
523
+ switch (ch) {
524
+ case '?':
525
+ return s_req_query_string_start;
526
+
527
+ case '#':
528
+ return s_req_fragment_start;
529
+ }
530
+
531
+ break;
532
+
533
+ case s_req_query_string_start:
534
+ case s_req_query_string:
535
+ if (IS_URL_CHAR(ch)) {
536
+ return s_req_query_string;
537
+ }
538
+
539
+ switch (ch) {
540
+ case '?':
541
+ /* allow extra '?' in query string */
542
+ return s_req_query_string;
543
+
544
+ case '#':
545
+ return s_req_fragment_start;
546
+ }
547
+
548
+ break;
549
+
550
+ case s_req_fragment_start:
551
+ if (IS_URL_CHAR(ch)) {
552
+ return s_req_fragment;
553
+ }
554
+
555
+ switch (ch) {
556
+ case '?':
557
+ return s_req_fragment;
558
+
559
+ case '#':
560
+ return s;
561
+ }
562
+
563
+ break;
564
+
565
+ case s_req_fragment:
566
+ if (IS_URL_CHAR(ch)) {
567
+ return s;
568
+ }
569
+
570
+ switch (ch) {
571
+ case '?':
572
+ case '#':
573
+ return s;
574
+ }
575
+
576
+ break;
577
+
578
+ default:
579
+ break;
580
+ }
581
+
582
+ /* We should never fall out of the switch above unless there's an error */
583
+ return s_dead;
584
+ }
585
+
586
+ size_t http_parser_execute (http_parser *parser,
587
+ const http_parser_settings *settings,
588
+ const char *data,
589
+ size_t len)
590
+ {
591
+ char c, ch;
592
+ int8_t unhex_val;
593
+ const char *p = data;
594
+ const char *header_field_mark = 0;
595
+ const char *header_value_mark = 0;
596
+ const char *url_mark = 0;
597
+ const char *body_mark = 0;
598
+
599
+ /* We're in an error state. Don't bother doing anything. */
600
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
601
+ return 0;
602
+ }
603
+
604
+ if (len == 0) {
605
+ switch (parser->state) {
606
+ case s_body_identity_eof:
607
+ /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
608
+ * we got paused.
609
+ */
610
+ CALLBACK_NOTIFY_NOADVANCE(message_complete);
611
+ return 0;
612
+
613
+ case s_dead:
614
+ case s_start_req_or_res:
615
+ case s_start_res:
616
+ case s_start_req:
617
+ return 0;
618
+
619
+ default:
620
+ SET_ERRNO(HPE_INVALID_EOF_STATE);
621
+ return 1;
622
+ }
623
+ }
624
+
625
+
626
+ if (parser->state == s_header_field)
627
+ header_field_mark = data;
628
+ if (parser->state == s_header_value)
629
+ header_value_mark = data;
630
+ switch (parser->state) {
631
+ case s_req_path:
632
+ case s_req_schema:
633
+ case s_req_schema_slash:
634
+ case s_req_schema_slash_slash:
635
+ case s_req_host_start:
636
+ case s_req_host_v6_start:
637
+ case s_req_host_v6:
638
+ case s_req_host_v6_end:
639
+ case s_req_host:
640
+ case s_req_port_start:
641
+ case s_req_port:
642
+ case s_req_query_string_start:
643
+ case s_req_query_string:
644
+ case s_req_fragment_start:
645
+ case s_req_fragment:
646
+ url_mark = data;
647
+ break;
648
+ }
649
+
650
+ for (p=data; p != data + len; p++) {
651
+ ch = *p;
652
+
653
+ if (PARSING_HEADER(parser->state)) {
654
+ ++parser->nread;
655
+ /* Buffer overflow attack */
656
+ if (parser->nread > HTTP_MAX_HEADER_SIZE) {
657
+ SET_ERRNO(HPE_HEADER_OVERFLOW);
658
+ goto error;
659
+ }
660
+ }
661
+
662
+ reexecute_byte:
663
+ switch (parser->state) {
664
+
665
+ case s_dead:
666
+ /* this state is used after a 'Connection: close' message
667
+ * the parser will error out if it reads another message
668
+ */
669
+ if (ch == CR || ch == LF)
670
+ break;
671
+
672
+ SET_ERRNO(HPE_CLOSED_CONNECTION);
673
+ goto error;
674
+
675
+ case s_start_req_or_res:
676
+ {
677
+ if (ch == CR || ch == LF)
678
+ break;
679
+ parser->flags = 0;
680
+ parser->content_length = ULLONG_MAX;
681
+
682
+ if (ch == 'H') {
683
+ parser->state = s_res_or_resp_H;
684
+
685
+ CALLBACK_NOTIFY(message_begin);
686
+ } else {
687
+ parser->type = HTTP_REQUEST;
688
+ parser->state = s_start_req;
689
+ goto reexecute_byte;
690
+ }
691
+
692
+ break;
693
+ }
694
+
695
+ case s_res_or_resp_H:
696
+ if (ch == 'T') {
697
+ parser->type = HTTP_RESPONSE;
698
+ parser->state = s_res_HT;
699
+ } else {
700
+ if (ch != 'E') {
701
+ SET_ERRNO(HPE_INVALID_CONSTANT);
702
+ goto error;
703
+ }
704
+
705
+ parser->type = HTTP_REQUEST;
706
+ parser->method = HTTP_HEAD;
707
+ parser->index = 2;
708
+ parser->state = s_req_method;
709
+ }
710
+ break;
711
+
712
+ case s_start_res:
713
+ {
714
+ parser->flags = 0;
715
+ parser->content_length = ULLONG_MAX;
716
+
717
+ switch (ch) {
718
+ case 'H':
719
+ parser->state = s_res_H;
720
+ break;
721
+
722
+ case CR:
723
+ case LF:
724
+ break;
725
+
726
+ default:
727
+ SET_ERRNO(HPE_INVALID_CONSTANT);
728
+ goto error;
729
+ }
730
+
731
+ CALLBACK_NOTIFY(message_begin);
732
+ break;
733
+ }
734
+
735
+ case s_res_H:
736
+ STRICT_CHECK(ch != 'T');
737
+ parser->state = s_res_HT;
738
+ break;
739
+
740
+ case s_res_HT:
741
+ STRICT_CHECK(ch != 'T');
742
+ parser->state = s_res_HTT;
743
+ break;
744
+
745
+ case s_res_HTT:
746
+ STRICT_CHECK(ch != 'P');
747
+ parser->state = s_res_HTTP;
748
+ break;
749
+
750
+ case s_res_HTTP:
751
+ STRICT_CHECK(ch != '/');
752
+ parser->state = s_res_first_http_major;
753
+ break;
754
+
755
+ case s_res_first_http_major:
756
+ if (ch < '0' || ch > '9') {
757
+ SET_ERRNO(HPE_INVALID_VERSION);
758
+ goto error;
759
+ }
760
+
761
+ parser->http_major = ch - '0';
762
+ parser->state = s_res_http_major;
763
+ break;
764
+
765
+ /* major HTTP version or dot */
766
+ case s_res_http_major:
767
+ {
768
+ if (ch == '.') {
769
+ parser->state = s_res_first_http_minor;
770
+ break;
771
+ }
772
+
773
+ if (!IS_NUM(ch)) {
774
+ SET_ERRNO(HPE_INVALID_VERSION);
775
+ goto error;
776
+ }
777
+
778
+ parser->http_major *= 10;
779
+ parser->http_major += ch - '0';
780
+
781
+ if (parser->http_major > 999) {
782
+ SET_ERRNO(HPE_INVALID_VERSION);
783
+ goto error;
784
+ }
785
+
786
+ break;
787
+ }
788
+
789
+ /* first digit of minor HTTP version */
790
+ case s_res_first_http_minor:
791
+ if (!IS_NUM(ch)) {
792
+ SET_ERRNO(HPE_INVALID_VERSION);
793
+ goto error;
794
+ }
795
+
796
+ parser->http_minor = ch - '0';
797
+ parser->state = s_res_http_minor;
798
+ break;
799
+
800
+ /* minor HTTP version or end of request line */
801
+ case s_res_http_minor:
802
+ {
803
+ if (ch == ' ') {
804
+ parser->state = s_res_first_status_code;
805
+ break;
806
+ }
807
+
808
+ if (!IS_NUM(ch)) {
809
+ SET_ERRNO(HPE_INVALID_VERSION);
810
+ goto error;
811
+ }
812
+
813
+ parser->http_minor *= 10;
814
+ parser->http_minor += ch - '0';
815
+
816
+ if (parser->http_minor > 999) {
817
+ SET_ERRNO(HPE_INVALID_VERSION);
818
+ goto error;
819
+ }
820
+
821
+ break;
822
+ }
823
+
824
+ case s_res_first_status_code:
825
+ {
826
+ if (!IS_NUM(ch)) {
827
+ if (ch == ' ') {
828
+ break;
829
+ }
830
+
831
+ SET_ERRNO(HPE_INVALID_STATUS);
832
+ goto error;
833
+ }
834
+ parser->status_code = ch - '0';
835
+ parser->state = s_res_status_code;
836
+ break;
837
+ }
838
+
839
+ case s_res_status_code:
840
+ {
841
+ if (!IS_NUM(ch)) {
842
+ switch (ch) {
843
+ case ' ':
844
+ parser->state = s_res_status;
845
+ break;
846
+ case CR:
847
+ parser->state = s_res_line_almost_done;
848
+ break;
849
+ case LF:
850
+ parser->state = s_header_field_start;
851
+ break;
852
+ default:
853
+ SET_ERRNO(HPE_INVALID_STATUS);
854
+ goto error;
855
+ }
856
+ break;
857
+ }
858
+
859
+ parser->status_code *= 10;
860
+ parser->status_code += ch - '0';
861
+
862
+ if (parser->status_code > 999) {
863
+ SET_ERRNO(HPE_INVALID_STATUS);
864
+ goto error;
865
+ }
866
+
867
+ break;
868
+ }
869
+
870
+ case s_res_status:
871
+ /* the human readable status. e.g. "NOT FOUND"
872
+ * we are not humans so just ignore this */
873
+ if (ch == CR) {
874
+ parser->state = s_res_line_almost_done;
875
+ break;
876
+ }
877
+
878
+ if (ch == LF) {
879
+ parser->state = s_header_field_start;
880
+ break;
881
+ }
882
+ break;
883
+
884
+ case s_res_line_almost_done:
885
+ STRICT_CHECK(ch != LF);
886
+ parser->state = s_header_field_start;
887
+ break;
888
+
889
+ case s_start_req:
890
+ {
891
+ if (ch == CR || ch == LF)
892
+ break;
893
+ parser->flags = 0;
894
+ parser->content_length = ULLONG_MAX;
895
+
896
+ if (!IS_ALPHA(ch)) {
897
+ SET_ERRNO(HPE_INVALID_METHOD);
898
+ goto error;
899
+ }
900
+
901
+ parser->method = (enum http_method) 0;
902
+ parser->index = 1;
903
+ switch (ch) {
904
+ case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
905
+ case 'D': parser->method = HTTP_DELETE; break;
906
+ case 'G': parser->method = HTTP_GET; break;
907
+ case 'H': parser->method = HTTP_HEAD; break;
908
+ case 'L': parser->method = HTTP_LOCK; break;
909
+ case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
910
+ case 'N': parser->method = HTTP_NOTIFY; break;
911
+ case 'O': parser->method = HTTP_OPTIONS; break;
912
+ case 'P': parser->method = HTTP_POST;
913
+ /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
914
+ break;
915
+ case 'R': parser->method = HTTP_REPORT; break;
916
+ case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break;
917
+ case 'T': parser->method = HTTP_TRACE; break;
918
+ case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
919
+ default:
920
+ SET_ERRNO(HPE_INVALID_METHOD);
921
+ goto error;
922
+ }
923
+ parser->state = s_req_method;
924
+
925
+ CALLBACK_NOTIFY(message_begin);
926
+
927
+ break;
928
+ }
929
+
930
+ case s_req_method:
931
+ {
932
+ const char *matcher;
933
+ if (ch == '\0') {
934
+ SET_ERRNO(HPE_INVALID_METHOD);
935
+ goto error;
936
+ }
937
+
938
+ matcher = method_strings[parser->method];
939
+ if (ch == ' ' && matcher[parser->index] == '\0') {
940
+ parser->state = s_req_spaces_before_url;
941
+ } else if (ch == matcher[parser->index]) {
942
+ ; /* nada */
943
+ } else if (parser->method == HTTP_CONNECT) {
944
+ if (parser->index == 1 && ch == 'H') {
945
+ parser->method = HTTP_CHECKOUT;
946
+ } else if (parser->index == 2 && ch == 'P') {
947
+ parser->method = HTTP_COPY;
948
+ } else {
949
+ goto error;
950
+ }
951
+ } else if (parser->method == HTTP_MKCOL) {
952
+ if (parser->index == 1 && ch == 'O') {
953
+ parser->method = HTTP_MOVE;
954
+ } else if (parser->index == 1 && ch == 'E') {
955
+ parser->method = HTTP_MERGE;
956
+ } else if (parser->index == 1 && ch == '-') {
957
+ parser->method = HTTP_MSEARCH;
958
+ } else if (parser->index == 2 && ch == 'A') {
959
+ parser->method = HTTP_MKACTIVITY;
960
+ } else {
961
+ goto error;
962
+ }
963
+ } else if (parser->method == HTTP_SUBSCRIBE) {
964
+ if (parser->index == 1 && ch == 'E') {
965
+ parser->method = HTTP_SEARCH;
966
+ } else {
967
+ goto error;
968
+ }
969
+ } else if (parser->index == 1 && parser->method == HTTP_POST) {
970
+ if (ch == 'R') {
971
+ parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
972
+ } else if (ch == 'U') {
973
+ parser->method = HTTP_PUT; /* or HTTP_PURGE */
974
+ } else if (ch == 'A') {
975
+ parser->method = HTTP_PATCH;
976
+ } else {
977
+ goto error;
978
+ }
979
+ } else if (parser->index == 2) {
980
+ if (parser->method == HTTP_PUT) {
981
+ if (ch == 'R') parser->method = HTTP_PURGE;
982
+ } else if (parser->method == HTTP_UNLOCK) {
983
+ if (ch == 'S') parser->method = HTTP_UNSUBSCRIBE;
984
+ }
985
+ } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
986
+ parser->method = HTTP_PROPPATCH;
987
+ } else {
988
+ SET_ERRNO(HPE_INVALID_METHOD);
989
+ goto error;
990
+ }
991
+
992
+ ++parser->index;
993
+ break;
994
+ }
995
+
996
+ case s_req_spaces_before_url:
997
+ {
998
+ if (ch == ' ') break;
999
+
1000
+ MARK(url);
1001
+ if (parser->method == HTTP_CONNECT) {
1002
+ parser->state = s_req_host_start;
1003
+ }
1004
+
1005
+ parser->state = parse_url_char((enum state)parser->state, ch);
1006
+ if (parser->state == s_dead) {
1007
+ SET_ERRNO(HPE_INVALID_URL);
1008
+ goto error;
1009
+ }
1010
+
1011
+ break;
1012
+ }
1013
+
1014
+ case s_req_schema:
1015
+ case s_req_schema_slash:
1016
+ case s_req_schema_slash_slash:
1017
+ case s_req_host_start:
1018
+ case s_req_host_v6_start:
1019
+ case s_req_host_v6:
1020
+ case s_req_port_start:
1021
+ {
1022
+ switch (ch) {
1023
+ /* No whitespace allowed here */
1024
+ case ' ':
1025
+ case CR:
1026
+ case LF:
1027
+ SET_ERRNO(HPE_INVALID_URL);
1028
+ goto error;
1029
+ default:
1030
+ parser->state = parse_url_char((enum state)parser->state, ch);
1031
+ if (parser->state == s_dead) {
1032
+ SET_ERRNO(HPE_INVALID_URL);
1033
+ goto error;
1034
+ }
1035
+ }
1036
+
1037
+ break;
1038
+ }
1039
+
1040
+ case s_req_host:
1041
+ case s_req_host_v6_end:
1042
+ case s_req_port:
1043
+ case s_req_path:
1044
+ case s_req_query_string_start:
1045
+ case s_req_query_string:
1046
+ case s_req_fragment_start:
1047
+ case s_req_fragment:
1048
+ {
1049
+ switch (ch) {
1050
+ case ' ':
1051
+ parser->state = s_req_http_start;
1052
+ CALLBACK_DATA(url);
1053
+ break;
1054
+ case CR:
1055
+ case LF:
1056
+ parser->http_major = 0;
1057
+ parser->http_minor = 9;
1058
+ parser->state = (ch == CR) ?
1059
+ s_req_line_almost_done :
1060
+ s_header_field_start;
1061
+ CALLBACK_DATA(url);
1062
+ break;
1063
+ default:
1064
+ parser->state = parse_url_char((enum state)parser->state, ch);
1065
+ if (parser->state == s_dead) {
1066
+ SET_ERRNO(HPE_INVALID_URL);
1067
+ goto error;
1068
+ }
1069
+ }
1070
+ break;
1071
+ }
1072
+
1073
+ case s_req_http_start:
1074
+ switch (ch) {
1075
+ case 'H':
1076
+ parser->state = s_req_http_H;
1077
+ break;
1078
+ case ' ':
1079
+ break;
1080
+ default:
1081
+ SET_ERRNO(HPE_INVALID_CONSTANT);
1082
+ goto error;
1083
+ }
1084
+ break;
1085
+
1086
+ case s_req_http_H:
1087
+ STRICT_CHECK(ch != 'T');
1088
+ parser->state = s_req_http_HT;
1089
+ break;
1090
+
1091
+ case s_req_http_HT:
1092
+ STRICT_CHECK(ch != 'T');
1093
+ parser->state = s_req_http_HTT;
1094
+ break;
1095
+
1096
+ case s_req_http_HTT:
1097
+ STRICT_CHECK(ch != 'P');
1098
+ parser->state = s_req_http_HTTP;
1099
+ break;
1100
+
1101
+ case s_req_http_HTTP:
1102
+ STRICT_CHECK(ch != '/');
1103
+ parser->state = s_req_first_http_major;
1104
+ break;
1105
+
1106
+ /* first digit of major HTTP version */
1107
+ case s_req_first_http_major:
1108
+ if (ch < '1' || ch > '9') {
1109
+ SET_ERRNO(HPE_INVALID_VERSION);
1110
+ goto error;
1111
+ }
1112
+
1113
+ parser->http_major = ch - '0';
1114
+ parser->state = s_req_http_major;
1115
+ break;
1116
+
1117
+ /* major HTTP version or dot */
1118
+ case s_req_http_major:
1119
+ {
1120
+ if (ch == '.') {
1121
+ parser->state = s_req_first_http_minor;
1122
+ break;
1123
+ }
1124
+
1125
+ if (!IS_NUM(ch)) {
1126
+ SET_ERRNO(HPE_INVALID_VERSION);
1127
+ goto error;
1128
+ }
1129
+
1130
+ parser->http_major *= 10;
1131
+ parser->http_major += ch - '0';
1132
+
1133
+ if (parser->http_major > 999) {
1134
+ SET_ERRNO(HPE_INVALID_VERSION);
1135
+ goto error;
1136
+ }
1137
+
1138
+ break;
1139
+ }
1140
+
1141
+ /* first digit of minor HTTP version */
1142
+ case s_req_first_http_minor:
1143
+ if (!IS_NUM(ch)) {
1144
+ SET_ERRNO(HPE_INVALID_VERSION);
1145
+ goto error;
1146
+ }
1147
+
1148
+ parser->http_minor = ch - '0';
1149
+ parser->state = s_req_http_minor;
1150
+ break;
1151
+
1152
+ /* minor HTTP version or end of request line */
1153
+ case s_req_http_minor:
1154
+ {
1155
+ if (ch == CR) {
1156
+ parser->state = s_req_line_almost_done;
1157
+ break;
1158
+ }
1159
+
1160
+ if (ch == LF) {
1161
+ parser->state = s_header_field_start;
1162
+ break;
1163
+ }
1164
+
1165
+ /* XXX allow spaces after digit? */
1166
+
1167
+ if (!IS_NUM(ch)) {
1168
+ SET_ERRNO(HPE_INVALID_VERSION);
1169
+ goto error;
1170
+ }
1171
+
1172
+ parser->http_minor *= 10;
1173
+ parser->http_minor += ch - '0';
1174
+
1175
+ if (parser->http_minor > 999) {
1176
+ SET_ERRNO(HPE_INVALID_VERSION);
1177
+ goto error;
1178
+ }
1179
+
1180
+ break;
1181
+ }
1182
+
1183
+ /* end of request line */
1184
+ case s_req_line_almost_done:
1185
+ {
1186
+ if (ch != LF) {
1187
+ SET_ERRNO(HPE_LF_EXPECTED);
1188
+ goto error;
1189
+ }
1190
+
1191
+ parser->state = s_header_field_start;
1192
+ break;
1193
+ }
1194
+
1195
+ case s_header_field_start:
1196
+ {
1197
+ if (ch == CR) {
1198
+ parser->state = s_headers_almost_done;
1199
+ break;
1200
+ }
1201
+
1202
+ if (ch == LF) {
1203
+ /* they might be just sending \n instead of \r\n so this would be
1204
+ * the second \n to denote the end of headers*/
1205
+ parser->state = s_headers_almost_done;
1206
+ goto reexecute_byte;
1207
+ }
1208
+
1209
+ c = TOKEN(ch);
1210
+
1211
+ if (!c) {
1212
+ SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1213
+ goto error;
1214
+ }
1215
+
1216
+ MARK(header_field);
1217
+
1218
+ parser->index = 0;
1219
+ parser->state = s_header_field;
1220
+
1221
+ switch (c) {
1222
+ case 'c':
1223
+ parser->header_state = h_C;
1224
+ break;
1225
+
1226
+ case 'p':
1227
+ parser->header_state = h_matching_proxy_connection;
1228
+ break;
1229
+
1230
+ case 't':
1231
+ parser->header_state = h_matching_transfer_encoding;
1232
+ break;
1233
+
1234
+ case 'u':
1235
+ parser->header_state = h_matching_upgrade;
1236
+ break;
1237
+
1238
+ default:
1239
+ parser->header_state = h_general;
1240
+ break;
1241
+ }
1242
+ break;
1243
+ }
1244
+
1245
+ case s_header_field:
1246
+ {
1247
+ c = TOKEN(ch);
1248
+
1249
+ if (c) {
1250
+ switch (parser->header_state) {
1251
+ case h_general:
1252
+ break;
1253
+
1254
+ case h_C:
1255
+ parser->index++;
1256
+ parser->header_state = (c == 'o' ? h_CO : h_general);
1257
+ break;
1258
+
1259
+ case h_CO:
1260
+ parser->index++;
1261
+ parser->header_state = (c == 'n' ? h_CON : h_general);
1262
+ break;
1263
+
1264
+ case h_CON:
1265
+ parser->index++;
1266
+ switch (c) {
1267
+ case 'n':
1268
+ parser->header_state = h_matching_connection;
1269
+ break;
1270
+ case 't':
1271
+ parser->header_state = h_matching_content_length;
1272
+ break;
1273
+ default:
1274
+ parser->header_state = h_general;
1275
+ break;
1276
+ }
1277
+ break;
1278
+
1279
+ /* connection */
1280
+
1281
+ case h_matching_connection:
1282
+ parser->index++;
1283
+ if (parser->index > sizeof(CONNECTION)-1
1284
+ || c != CONNECTION[parser->index]) {
1285
+ parser->header_state = h_general;
1286
+ } else if (parser->index == sizeof(CONNECTION)-2) {
1287
+ parser->header_state = h_connection;
1288
+ }
1289
+ break;
1290
+
1291
+ /* proxy-connection */
1292
+
1293
+ case h_matching_proxy_connection:
1294
+ parser->index++;
1295
+ if (parser->index > sizeof(PROXY_CONNECTION)-1
1296
+ || c != PROXY_CONNECTION[parser->index]) {
1297
+ parser->header_state = h_general;
1298
+ } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1299
+ parser->header_state = h_connection;
1300
+ }
1301
+ break;
1302
+
1303
+ /* content-length */
1304
+
1305
+ case h_matching_content_length:
1306
+ parser->index++;
1307
+ if (parser->index > sizeof(CONTENT_LENGTH)-1
1308
+ || c != CONTENT_LENGTH[parser->index]) {
1309
+ parser->header_state = h_general;
1310
+ } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1311
+ parser->header_state = h_content_length;
1312
+ }
1313
+ break;
1314
+
1315
+ /* transfer-encoding */
1316
+
1317
+ case h_matching_transfer_encoding:
1318
+ parser->index++;
1319
+ if (parser->index > sizeof(TRANSFER_ENCODING)-1
1320
+ || c != TRANSFER_ENCODING[parser->index]) {
1321
+ parser->header_state = h_general;
1322
+ } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1323
+ parser->header_state = h_transfer_encoding;
1324
+ }
1325
+ break;
1326
+
1327
+ /* upgrade */
1328
+
1329
+ case h_matching_upgrade:
1330
+ parser->index++;
1331
+ if (parser->index > sizeof(UPGRADE)-1
1332
+ || c != UPGRADE[parser->index]) {
1333
+ parser->header_state = h_general;
1334
+ } else if (parser->index == sizeof(UPGRADE)-2) {
1335
+ parser->header_state = h_upgrade;
1336
+ }
1337
+ break;
1338
+
1339
+ case h_connection:
1340
+ case h_content_length:
1341
+ case h_transfer_encoding:
1342
+ case h_upgrade:
1343
+ if (ch != ' ') parser->header_state = h_general;
1344
+ break;
1345
+
1346
+ default:
1347
+ assert(0 && "Unknown header_state");
1348
+ break;
1349
+ }
1350
+ break;
1351
+ }
1352
+
1353
+ if (ch == ':') {
1354
+ parser->state = s_header_value_start;
1355
+ CALLBACK_DATA(header_field);
1356
+ break;
1357
+ }
1358
+
1359
+ if (ch == CR) {
1360
+ parser->state = s_header_almost_done;
1361
+ CALLBACK_DATA(header_field);
1362
+ break;
1363
+ }
1364
+
1365
+ if (ch == LF) {
1366
+ parser->state = s_header_field_start;
1367
+ CALLBACK_DATA(header_field);
1368
+ break;
1369
+ }
1370
+
1371
+ SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1372
+ goto error;
1373
+ }
1374
+
1375
+ case s_header_value_start:
1376
+ {
1377
+ if (ch == ' ' || ch == '\t') break;
1378
+
1379
+ MARK(header_value);
1380
+
1381
+ parser->state = s_header_value;
1382
+ parser->index = 0;
1383
+
1384
+ if (ch == CR) {
1385
+ parser->header_state = h_general;
1386
+ parser->state = s_header_almost_done;
1387
+ CALLBACK_DATA(header_value);
1388
+ break;
1389
+ }
1390
+
1391
+ if (ch == LF) {
1392
+ parser->state = s_header_field_start;
1393
+ CALLBACK_DATA(header_value);
1394
+ break;
1395
+ }
1396
+
1397
+ c = LOWER(ch);
1398
+
1399
+ switch (parser->header_state) {
1400
+ case h_upgrade:
1401
+ parser->flags |= F_UPGRADE;
1402
+ parser->header_state = h_general;
1403
+ break;
1404
+
1405
+ case h_transfer_encoding:
1406
+ /* looking for 'Transfer-Encoding: chunked' */
1407
+ if ('c' == c) {
1408
+ parser->header_state = h_matching_transfer_encoding_chunked;
1409
+ } else {
1410
+ parser->header_state = h_general;
1411
+ }
1412
+ break;
1413
+
1414
+ case h_content_length:
1415
+ if (!IS_NUM(ch)) {
1416
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1417
+ goto error;
1418
+ }
1419
+
1420
+ parser->content_length = ch - '0';
1421
+ break;
1422
+
1423
+ case h_connection:
1424
+ /* looking for 'Connection: keep-alive' */
1425
+ if (c == 'k') {
1426
+ parser->header_state = h_matching_connection_keep_alive;
1427
+ /* looking for 'Connection: close' */
1428
+ } else if (c == 'c') {
1429
+ parser->header_state = h_matching_connection_close;
1430
+ } else {
1431
+ parser->header_state = h_general;
1432
+ }
1433
+ break;
1434
+
1435
+ default:
1436
+ parser->header_state = h_general;
1437
+ break;
1438
+ }
1439
+ break;
1440
+ }
1441
+
1442
+ case s_header_value:
1443
+ {
1444
+
1445
+ if (ch == CR) {
1446
+ parser->state = s_header_almost_done;
1447
+ CALLBACK_DATA(header_value);
1448
+ break;
1449
+ }
1450
+
1451
+ if (ch == LF) {
1452
+ parser->state = s_header_almost_done;
1453
+ CALLBACK_DATA_NOADVANCE(header_value);
1454
+ goto reexecute_byte;
1455
+ }
1456
+
1457
+ c = LOWER(ch);
1458
+
1459
+ switch (parser->header_state) {
1460
+ case h_general:
1461
+ break;
1462
+
1463
+ case h_connection:
1464
+ case h_transfer_encoding:
1465
+ assert(0 && "Shouldn't get here.");
1466
+ break;
1467
+
1468
+ case h_content_length:
1469
+ {
1470
+ uint64_t t;
1471
+
1472
+ if (ch == ' ') break;
1473
+
1474
+ if (!IS_NUM(ch)) {
1475
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1476
+ goto error;
1477
+ }
1478
+
1479
+ t = parser->content_length;
1480
+ t *= 10;
1481
+ t += ch - '0';
1482
+
1483
+ /* Overflow? */
1484
+ if (t < parser->content_length || t == ULLONG_MAX) {
1485
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1486
+ goto error;
1487
+ }
1488
+
1489
+ parser->content_length = t;
1490
+ break;
1491
+ }
1492
+
1493
+ /* Transfer-Encoding: chunked */
1494
+ case h_matching_transfer_encoding_chunked:
1495
+ parser->index++;
1496
+ if (parser->index > sizeof(CHUNKED)-1
1497
+ || c != CHUNKED[parser->index]) {
1498
+ parser->header_state = h_general;
1499
+ } else if (parser->index == sizeof(CHUNKED)-2) {
1500
+ parser->header_state = h_transfer_encoding_chunked;
1501
+ }
1502
+ break;
1503
+
1504
+ /* looking for 'Connection: keep-alive' */
1505
+ case h_matching_connection_keep_alive:
1506
+ parser->index++;
1507
+ if (parser->index > sizeof(KEEP_ALIVE)-1
1508
+ || c != KEEP_ALIVE[parser->index]) {
1509
+ parser->header_state = h_general;
1510
+ } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1511
+ parser->header_state = h_connection_keep_alive;
1512
+ }
1513
+ break;
1514
+
1515
+ /* looking for 'Connection: close' */
1516
+ case h_matching_connection_close:
1517
+ parser->index++;
1518
+ if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1519
+ parser->header_state = h_general;
1520
+ } else if (parser->index == sizeof(CLOSE)-2) {
1521
+ parser->header_state = h_connection_close;
1522
+ }
1523
+ break;
1524
+
1525
+ case h_transfer_encoding_chunked:
1526
+ case h_connection_keep_alive:
1527
+ case h_connection_close:
1528
+ if (ch != ' ') parser->header_state = h_general;
1529
+ break;
1530
+
1531
+ default:
1532
+ parser->state = s_header_value;
1533
+ parser->header_state = h_general;
1534
+ break;
1535
+ }
1536
+ break;
1537
+ }
1538
+
1539
+ case s_header_almost_done:
1540
+ {
1541
+ STRICT_CHECK(ch != LF);
1542
+
1543
+ parser->state = s_header_value_lws;
1544
+
1545
+ switch (parser->header_state) {
1546
+ case h_connection_keep_alive:
1547
+ parser->flags |= F_CONNECTION_KEEP_ALIVE;
1548
+ break;
1549
+ case h_connection_close:
1550
+ parser->flags |= F_CONNECTION_CLOSE;
1551
+ break;
1552
+ case h_transfer_encoding_chunked:
1553
+ parser->flags |= F_CHUNKED;
1554
+ break;
1555
+ default:
1556
+ break;
1557
+ }
1558
+
1559
+ break;
1560
+ }
1561
+
1562
+ case s_header_value_lws:
1563
+ {
1564
+ if (ch == ' ' || ch == '\t')
1565
+ parser->state = s_header_value_start;
1566
+ else
1567
+ {
1568
+ parser->state = s_header_field_start;
1569
+ goto reexecute_byte;
1570
+ }
1571
+ break;
1572
+ }
1573
+
1574
+ case s_headers_almost_done:
1575
+ {
1576
+ STRICT_CHECK(ch != LF);
1577
+
1578
+ if (parser->flags & F_TRAILING) {
1579
+ /* End of a chunked request */
1580
+ parser->state = NEW_MESSAGE();
1581
+ CALLBACK_NOTIFY(message_complete);
1582
+ break;
1583
+ }
1584
+
1585
+ parser->state = s_headers_done;
1586
+
1587
+ /* Set this here so that on_headers_complete() callbacks can see it */
1588
+ parser->upgrade =
1589
+ (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT);
1590
+
1591
+ /* Here we call the headers_complete callback. This is somewhat
1592
+ * different than other callbacks because if the user returns 1, we
1593
+ * will interpret that as saying that this message has no body. This
1594
+ * is needed for the annoying case of recieving a response to a HEAD
1595
+ * request.
1596
+ *
1597
+ * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1598
+ * we have to simulate it by handling a change in errno below.
1599
+ */
1600
+ if (settings->on_headers_complete) {
1601
+ switch (settings->on_headers_complete(parser)) {
1602
+ case 0:
1603
+ break;
1604
+
1605
+ case 1:
1606
+ parser->flags |= F_SKIPBODY;
1607
+ break;
1608
+
1609
+ default:
1610
+ SET_ERRNO(HPE_CB_headers_complete);
1611
+ return p - data; /* Error */
1612
+ }
1613
+ }
1614
+
1615
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1616
+ return p - data;
1617
+ }
1618
+
1619
+ goto reexecute_byte;
1620
+ }
1621
+
1622
+ case s_headers_done:
1623
+ {
1624
+ STRICT_CHECK(ch != LF);
1625
+
1626
+ parser->nread = 0;
1627
+
1628
+ /* Exit, the rest of the connect is in a different protocol. */
1629
+ if (parser->upgrade) {
1630
+ parser->state = NEW_MESSAGE();
1631
+ CALLBACK_NOTIFY(message_complete);
1632
+ return (p - data) + 1;
1633
+ }
1634
+
1635
+ if (parser->flags & F_SKIPBODY) {
1636
+ parser->state = NEW_MESSAGE();
1637
+ CALLBACK_NOTIFY(message_complete);
1638
+ } else if (parser->flags & F_CHUNKED) {
1639
+ /* chunked encoding - ignore Content-Length header */
1640
+ parser->state = s_chunk_size_start;
1641
+ } else {
1642
+ if (parser->content_length == 0) {
1643
+ /* Content-Length header given but zero: Content-Length: 0\r\n */
1644
+ parser->state = NEW_MESSAGE();
1645
+ CALLBACK_NOTIFY(message_complete);
1646
+ } else if (parser->content_length != ULLONG_MAX) {
1647
+ /* Content-Length header given and non-zero */
1648
+ parser->state = s_body_identity;
1649
+ } else {
1650
+ if (parser->type == HTTP_REQUEST ||
1651
+ !http_message_needs_eof(parser)) {
1652
+ /* Assume content-length 0 - read the next */
1653
+ parser->state = NEW_MESSAGE();
1654
+ CALLBACK_NOTIFY(message_complete);
1655
+ } else {
1656
+ /* Read body until EOF */
1657
+ parser->state = s_body_identity_eof;
1658
+ }
1659
+ }
1660
+ }
1661
+
1662
+ break;
1663
+ }
1664
+
1665
+ case s_body_identity:
1666
+ {
1667
+ uint64_t to_read = MIN(parser->content_length,
1668
+ (uint64_t) ((data + len) - p));
1669
+
1670
+ assert(parser->content_length != 0
1671
+ && parser->content_length != ULLONG_MAX);
1672
+
1673
+ /* The difference between advancing content_length and p is because
1674
+ * the latter will automaticaly advance on the next loop iteration.
1675
+ * Further, if content_length ends up at 0, we want to see the last
1676
+ * byte again for our message complete callback.
1677
+ */
1678
+ MARK(body);
1679
+ parser->content_length -= to_read;
1680
+ p += to_read - 1;
1681
+
1682
+ if (parser->content_length == 0) {
1683
+ parser->state = s_message_done;
1684
+
1685
+ /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1686
+ *
1687
+ * The alternative to doing this is to wait for the next byte to
1688
+ * trigger the data callback, just as in every other case. The
1689
+ * problem with this is that this makes it difficult for the test
1690
+ * harness to distinguish between complete-on-EOF and
1691
+ * complete-on-length. It's not clear that this distinction is
1692
+ * important for applications, but let's keep it for now.
1693
+ */
1694
+ CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1695
+ goto reexecute_byte;
1696
+ }
1697
+
1698
+ break;
1699
+ }
1700
+
1701
+ /* read until EOF */
1702
+ case s_body_identity_eof:
1703
+ MARK(body);
1704
+ p = data + len - 1;
1705
+
1706
+ break;
1707
+
1708
+ case s_message_done:
1709
+ parser->state = NEW_MESSAGE();
1710
+ CALLBACK_NOTIFY(message_complete);
1711
+ break;
1712
+
1713
+ case s_chunk_size_start:
1714
+ {
1715
+ assert(parser->nread == 1);
1716
+ assert(parser->flags & F_CHUNKED);
1717
+
1718
+ unhex_val = unhex[(unsigned char)ch];
1719
+ if (unhex_val == -1) {
1720
+ SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1721
+ goto error;
1722
+ }
1723
+
1724
+ parser->content_length = unhex_val;
1725
+ parser->state = s_chunk_size;
1726
+ break;
1727
+ }
1728
+
1729
+ case s_chunk_size:
1730
+ {
1731
+ uint64_t t;
1732
+
1733
+ assert(parser->flags & F_CHUNKED);
1734
+
1735
+ if (ch == CR) {
1736
+ parser->state = s_chunk_size_almost_done;
1737
+ break;
1738
+ }
1739
+
1740
+ unhex_val = unhex[(unsigned char)ch];
1741
+
1742
+ if (unhex_val == -1) {
1743
+ if (ch == ';' || ch == ' ') {
1744
+ parser->state = s_chunk_parameters;
1745
+ break;
1746
+ }
1747
+
1748
+ SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1749
+ goto error;
1750
+ }
1751
+
1752
+ t = parser->content_length;
1753
+ t *= 16;
1754
+ t += unhex_val;
1755
+
1756
+ /* Overflow? */
1757
+ if (t < parser->content_length || t == ULLONG_MAX) {
1758
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1759
+ goto error;
1760
+ }
1761
+
1762
+ parser->content_length = t;
1763
+ break;
1764
+ }
1765
+
1766
+ case s_chunk_parameters:
1767
+ {
1768
+ assert(parser->flags & F_CHUNKED);
1769
+ /* just ignore this shit. TODO check for overflow */
1770
+ if (ch == CR) {
1771
+ parser->state = s_chunk_size_almost_done;
1772
+ break;
1773
+ }
1774
+ break;
1775
+ }
1776
+
1777
+ case s_chunk_size_almost_done:
1778
+ {
1779
+ assert(parser->flags & F_CHUNKED);
1780
+ STRICT_CHECK(ch != LF);
1781
+
1782
+ parser->nread = 0;
1783
+
1784
+ if (parser->content_length == 0) {
1785
+ parser->flags |= F_TRAILING;
1786
+ parser->state = s_header_field_start;
1787
+ } else {
1788
+ parser->state = s_chunk_data;
1789
+ }
1790
+ break;
1791
+ }
1792
+
1793
+ case s_chunk_data:
1794
+ {
1795
+ uint64_t to_read = MIN(parser->content_length,
1796
+ (uint64_t) ((data + len) - p));
1797
+
1798
+ assert(parser->flags & F_CHUNKED);
1799
+ assert(parser->content_length != 0
1800
+ && parser->content_length != ULLONG_MAX);
1801
+
1802
+ /* See the explanation in s_body_identity for why the content
1803
+ * length and data pointers are managed this way.
1804
+ */
1805
+ MARK(body);
1806
+ parser->content_length -= to_read;
1807
+ p += to_read - 1;
1808
+
1809
+ if (parser->content_length == 0) {
1810
+ parser->state = s_chunk_data_almost_done;
1811
+ }
1812
+
1813
+ break;
1814
+ }
1815
+
1816
+ case s_chunk_data_almost_done:
1817
+ assert(parser->flags & F_CHUNKED);
1818
+ assert(parser->content_length == 0);
1819
+ STRICT_CHECK(ch != CR);
1820
+ parser->state = s_chunk_data_done;
1821
+ CALLBACK_DATA(body);
1822
+ break;
1823
+
1824
+ case s_chunk_data_done:
1825
+ assert(parser->flags & F_CHUNKED);
1826
+ STRICT_CHECK(ch != LF);
1827
+ parser->nread = 0;
1828
+ parser->state = s_chunk_size_start;
1829
+ break;
1830
+
1831
+ default:
1832
+ assert(0 && "unhandled state");
1833
+ SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
1834
+ goto error;
1835
+ }
1836
+ }
1837
+
1838
+ /* Run callbacks for any marks that we have leftover after we ran our of
1839
+ * bytes. There should be at most one of these set, so it's OK to invoke
1840
+ * them in series (unset marks will not result in callbacks).
1841
+ *
1842
+ * We use the NOADVANCE() variety of callbacks here because 'p' has already
1843
+ * overflowed 'data' and this allows us to correct for the off-by-one that
1844
+ * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
1845
+ * value that's in-bounds).
1846
+ */
1847
+
1848
+ assert(((header_field_mark ? 1 : 0) +
1849
+ (header_value_mark ? 1 : 0) +
1850
+ (url_mark ? 1 : 0) +
1851
+ (body_mark ? 1 : 0)) <= 1);
1852
+
1853
+ CALLBACK_DATA_NOADVANCE(header_field);
1854
+ CALLBACK_DATA_NOADVANCE(header_value);
1855
+ CALLBACK_DATA_NOADVANCE(url);
1856
+ CALLBACK_DATA_NOADVANCE(body);
1857
+
1858
+ return len;
1859
+
1860
+ error:
1861
+ if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
1862
+ SET_ERRNO(HPE_UNKNOWN);
1863
+ }
1864
+
1865
+ return (p - data);
1866
+ }
1867
+
1868
+
1869
+ /* Does the parser need to see an EOF to find the end of the message? */
1870
+ int
1871
+ http_message_needs_eof (http_parser *parser)
1872
+ {
1873
+ if (parser->type == HTTP_REQUEST) {
1874
+ return 0;
1875
+ }
1876
+
1877
+ /* See RFC 2616 section 4.4 */
1878
+ if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
1879
+ parser->status_code == 204 || /* No Content */
1880
+ parser->status_code == 304 || /* Not Modified */
1881
+ parser->flags & F_SKIPBODY) { /* response to a HEAD request */
1882
+ return 0;
1883
+ }
1884
+
1885
+ if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
1886
+ return 0;
1887
+ }
1888
+
1889
+ return 1;
1890
+ }
1891
+
1892
+
1893
+ int
1894
+ http_should_keep_alive (http_parser *parser)
1895
+ {
1896
+ if (parser->http_major > 0 && parser->http_minor > 0) {
1897
+ /* HTTP/1.1 */
1898
+ if (parser->flags & F_CONNECTION_CLOSE) {
1899
+ return 0;
1900
+ }
1901
+ } else {
1902
+ /* HTTP/1.0 or earlier */
1903
+ if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
1904
+ return 0;
1905
+ }
1906
+ }
1907
+
1908
+ return !http_message_needs_eof(parser);
1909
+ }
1910
+
1911
+
1912
+ const char * http_method_str (enum http_method m)
1913
+ {
1914
+ return method_strings[m];
1915
+ }
1916
+
1917
+
1918
+ void
1919
+ http_parser_init (http_parser *parser, enum http_parser_type t)
1920
+ {
1921
+ void *data = parser->data; /* preserve application data */
1922
+ memset(parser, 0, sizeof(*parser));
1923
+ parser->data = data;
1924
+ parser->type = t;
1925
+ parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
1926
+ parser->http_errno = HPE_OK;
1927
+ }
1928
+
1929
+ const char *
1930
+ http_errno_name(enum http_errno err) {
1931
+ assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1932
+ return http_strerror_tab[err].name;
1933
+ }
1934
+
1935
+ const char *
1936
+ http_errno_description(enum http_errno err) {
1937
+ assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1938
+ return http_strerror_tab[err].description;
1939
+ }
1940
+
1941
+ int
1942
+ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
1943
+ struct http_parser_url *u)
1944
+ {
1945
+ enum state s;
1946
+ const char *p;
1947
+ enum http_parser_url_fields uf, old_uf;
1948
+
1949
+ u->port = u->field_set = 0;
1950
+ s = is_connect ? s_req_host_start : s_req_spaces_before_url;
1951
+ uf = old_uf = UF_MAX;
1952
+
1953
+ for (p = buf; p < buf + buflen; p++) {
1954
+ s = parse_url_char(s, *p);
1955
+
1956
+ /* Figure out the next field that we're operating on */
1957
+ switch (s) {
1958
+ case s_dead:
1959
+ return 1;
1960
+
1961
+ /* Skip delimeters */
1962
+ case s_req_schema_slash:
1963
+ case s_req_schema_slash_slash:
1964
+ case s_req_host_start:
1965
+ case s_req_host_v6_start:
1966
+ case s_req_host_v6_end:
1967
+ case s_req_port_start:
1968
+ case s_req_query_string_start:
1969
+ case s_req_fragment_start:
1970
+ continue;
1971
+
1972
+ case s_req_schema:
1973
+ uf = UF_SCHEMA;
1974
+ break;
1975
+
1976
+ case s_req_host:
1977
+ case s_req_host_v6:
1978
+ uf = UF_HOST;
1979
+ break;
1980
+
1981
+ case s_req_port:
1982
+ uf = UF_PORT;
1983
+ break;
1984
+
1985
+ case s_req_path:
1986
+ uf = UF_PATH;
1987
+ break;
1988
+
1989
+ case s_req_query_string:
1990
+ uf = UF_QUERY;
1991
+ break;
1992
+
1993
+ case s_req_fragment:
1994
+ uf = UF_FRAGMENT;
1995
+ break;
1996
+
1997
+ default:
1998
+ assert(!"Unexpected state");
1999
+ return 1;
2000
+ }
2001
+
2002
+ /* Nothing's changed; soldier on */
2003
+ if (uf == old_uf) {
2004
+ u->field_data[uf].len++;
2005
+ continue;
2006
+ }
2007
+
2008
+ u->field_data[uf].off = p - buf;
2009
+ u->field_data[uf].len = 1;
2010
+
2011
+ u->field_set |= (1 << uf);
2012
+ old_uf = uf;
2013
+ }
2014
+
2015
+ /* CONNECT requests can only contain "hostname:port" */
2016
+ if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2017
+ return 1;
2018
+ }
2019
+
2020
+ /* Make sure we don't end somewhere unexpected */
2021
+ switch (s) {
2022
+ case s_req_host_v6_start:
2023
+ case s_req_host_v6:
2024
+ case s_req_host_v6_end:
2025
+ case s_req_host:
2026
+ case s_req_port_start:
2027
+ return 1;
2028
+ default:
2029
+ break;
2030
+ }
2031
+
2032
+ if (u->field_set & (1 << UF_PORT)) {
2033
+ /* Don't bother with endp; we've already validated the string */
2034
+ unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
2035
+
2036
+ /* Ports have a max value of 2^16 */
2037
+ if (v > 0xffff) {
2038
+ return 1;
2039
+ }
2040
+
2041
+ u->port = (uint16_t) v;
2042
+ }
2043
+
2044
+ return 0;
2045
+ }
2046
+
2047
+ void
2048
+ http_parser_pause(http_parser *parser, int paused) {
2049
+ /* Users should only be pausing/unpausing a parser that is not in an error
2050
+ * state. In non-debug builds, there's not much that we can do about this
2051
+ * other than ignore it.
2052
+ */
2053
+ if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2054
+ HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2055
+ SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2056
+ } else {
2057
+ assert(0 && "Attempting to pause parser in error state");
2058
+ }
2059
+ }