http_parser.rb 0.6.0 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,128 @@
1
+ /* Copyright Fedor Indutny. All rights reserved.
2
+ *
3
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ * of this software and associated documentation files (the "Software"), to
5
+ * deal in the Software without restriction, including without limitation the
6
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7
+ * sell copies of the Software, and to permit persons to whom the Software is
8
+ * furnished to do so, subject to the following conditions:
9
+ *
10
+ * The above copyright notice and this permission notice shall be included in
11
+ * all copies or substantial portions of the Software.
12
+ *
13
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19
+ * IN THE SOFTWARE.
20
+ */
21
+ #include "http_parser.h"
22
+ #include <assert.h>
23
+ #include <stdint.h>
24
+ #include <stdio.h>
25
+ #include <string.h>
26
+ #include <sys/time.h>
27
+
28
+ /* 8 gb */
29
+ static const int64_t kBytes = 8LL << 30;
30
+
31
+ static const char data[] =
32
+ "POST /joyent/http-parser HTTP/1.1\r\n"
33
+ "Host: github.com\r\n"
34
+ "DNT: 1\r\n"
35
+ "Accept-Encoding: gzip, deflate, sdch\r\n"
36
+ "Accept-Language: ru-RU,ru;q=0.8,en-US;q=0.6,en;q=0.4\r\n"
37
+ "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) "
38
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
39
+ "Chrome/39.0.2171.65 Safari/537.36\r\n"
40
+ "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,"
41
+ "image/webp,*/*;q=0.8\r\n"
42
+ "Referer: https://github.com/joyent/http-parser\r\n"
43
+ "Connection: keep-alive\r\n"
44
+ "Transfer-Encoding: chunked\r\n"
45
+ "Cache-Control: max-age=0\r\n\r\nb\r\nhello world\r\n0\r\n";
46
+ static const size_t data_len = sizeof(data) - 1;
47
+
48
+ static int on_info(http_parser* p) {
49
+ return 0;
50
+ }
51
+
52
+
53
+ static int on_data(http_parser* p, const char *at, size_t length) {
54
+ return 0;
55
+ }
56
+
57
+ static http_parser_settings settings = {
58
+ .on_message_begin = on_info,
59
+ .on_headers_complete = on_info,
60
+ .on_message_complete = on_info,
61
+ .on_header_field = on_data,
62
+ .on_header_value = on_data,
63
+ .on_url = on_data,
64
+ .on_status = on_data,
65
+ .on_body = on_data
66
+ };
67
+
68
+ int bench(int iter_count, int silent) {
69
+ struct http_parser parser;
70
+ int i;
71
+ int err;
72
+ struct timeval start;
73
+ struct timeval end;
74
+
75
+ if (!silent) {
76
+ err = gettimeofday(&start, NULL);
77
+ assert(err == 0);
78
+ }
79
+
80
+ fprintf(stderr, "req_len=%d\n", (int) data_len);
81
+ for (i = 0; i < iter_count; i++) {
82
+ size_t parsed;
83
+ http_parser_init(&parser, HTTP_REQUEST);
84
+
85
+ parsed = http_parser_execute(&parser, &settings, data, data_len);
86
+ assert(parsed == data_len);
87
+ }
88
+
89
+ if (!silent) {
90
+ double elapsed;
91
+ double bw;
92
+ double total;
93
+
94
+ err = gettimeofday(&end, NULL);
95
+ assert(err == 0);
96
+
97
+ fprintf(stdout, "Benchmark result:\n");
98
+
99
+ elapsed = (double) (end.tv_sec - start.tv_sec) +
100
+ (end.tv_usec - start.tv_usec) * 1e-6f;
101
+
102
+ total = (double) iter_count * data_len;
103
+ bw = (double) total / elapsed;
104
+
105
+ fprintf(stdout, "%.2f mb | %.2f mb/s | %.2f req/sec | %.2f s\n",
106
+ (double) total / (1024 * 1024),
107
+ bw / (1024 * 1024),
108
+ (double) iter_count / elapsed,
109
+ elapsed);
110
+
111
+ fflush(stdout);
112
+ }
113
+
114
+ return 0;
115
+ }
116
+
117
+ int main(int argc, char** argv) {
118
+ int64_t iterations;
119
+
120
+ iterations = kBytes / (int64_t) data_len;
121
+ if (argc == 2 && strcmp(argv[1], "infinite") == 0) {
122
+ for (;;)
123
+ bench(iterations, 1);
124
+ return 0;
125
+ } else {
126
+ return bench(iterations, 0);
127
+ }
128
+ }
@@ -0,0 +1,157 @@
1
+ /* Copyright Joyent, Inc. and other Node contributors.
2
+ *
3
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ * of this software and associated documentation files (the "Software"), to
5
+ * deal in the Software without restriction, including without limitation the
6
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7
+ * sell copies of the Software, and to permit persons to whom the Software is
8
+ * furnished to do so, subject to the following conditions:
9
+ *
10
+ * The above copyright notice and this permission notice shall be included in
11
+ * all copies or substantial portions of the Software.
12
+ *
13
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19
+ * IN THE SOFTWARE.
20
+ */
21
+
22
+ /* Dump what the parser finds to stdout as it happen */
23
+
24
+ #include "http_parser.h"
25
+ #include <stdio.h>
26
+ #include <stdlib.h>
27
+ #include <string.h>
28
+
29
+ int on_message_begin(http_parser* _) {
30
+ (void)_;
31
+ printf("\n***MESSAGE BEGIN***\n\n");
32
+ return 0;
33
+ }
34
+
35
+ int on_headers_complete(http_parser* _) {
36
+ (void)_;
37
+ printf("\n***HEADERS COMPLETE***\n\n");
38
+ return 0;
39
+ }
40
+
41
+ int on_message_complete(http_parser* _) {
42
+ (void)_;
43
+ printf("\n***MESSAGE COMPLETE***\n\n");
44
+ return 0;
45
+ }
46
+
47
+ int on_url(http_parser* _, const char* at, size_t length) {
48
+ (void)_;
49
+ printf("Url: %.*s\n", (int)length, at);
50
+ return 0;
51
+ }
52
+
53
+ int on_header_field(http_parser* _, const char* at, size_t length) {
54
+ (void)_;
55
+ printf("Header field: %.*s\n", (int)length, at);
56
+ return 0;
57
+ }
58
+
59
+ int on_header_value(http_parser* _, const char* at, size_t length) {
60
+ (void)_;
61
+ printf("Header value: %.*s\n", (int)length, at);
62
+ return 0;
63
+ }
64
+
65
+ int on_body(http_parser* _, const char* at, size_t length) {
66
+ (void)_;
67
+ printf("Body: %.*s\n", (int)length, at);
68
+ return 0;
69
+ }
70
+
71
+ void usage(const char* name) {
72
+ fprintf(stderr,
73
+ "Usage: %s $type $filename\n"
74
+ " type: -x, where x is one of {r,b,q}\n"
75
+ " parses file as a Response, reQuest, or Both\n",
76
+ name);
77
+ exit(EXIT_FAILURE);
78
+ }
79
+
80
+ int main(int argc, char* argv[]) {
81
+ enum http_parser_type file_type;
82
+
83
+ if (argc != 3) {
84
+ usage(argv[0]);
85
+ }
86
+
87
+ char* type = argv[1];
88
+ if (type[0] != '-') {
89
+ usage(argv[0]);
90
+ }
91
+
92
+ switch (type[1]) {
93
+ /* in the case of "-", type[1] will be NUL */
94
+ case 'r':
95
+ file_type = HTTP_RESPONSE;
96
+ break;
97
+ case 'q':
98
+ file_type = HTTP_REQUEST;
99
+ break;
100
+ case 'b':
101
+ file_type = HTTP_BOTH;
102
+ break;
103
+ default:
104
+ usage(argv[0]);
105
+ }
106
+
107
+ char* filename = argv[2];
108
+ FILE* file = fopen(filename, "r");
109
+ if (file == NULL) {
110
+ perror("fopen");
111
+ goto fail;
112
+ }
113
+
114
+ fseek(file, 0, SEEK_END);
115
+ long file_length = ftell(file);
116
+ if (file_length == -1) {
117
+ perror("ftell");
118
+ goto fail;
119
+ }
120
+ fseek(file, 0, SEEK_SET);
121
+
122
+ char* data = malloc(file_length);
123
+ if (fread(data, 1, file_length, file) != (size_t)file_length) {
124
+ fprintf(stderr, "couldn't read entire file\n");
125
+ free(data);
126
+ goto fail;
127
+ }
128
+
129
+ http_parser_settings settings;
130
+ memset(&settings, 0, sizeof(settings));
131
+ settings.on_message_begin = on_message_begin;
132
+ settings.on_url = on_url;
133
+ settings.on_header_field = on_header_field;
134
+ settings.on_header_value = on_header_value;
135
+ settings.on_headers_complete = on_headers_complete;
136
+ settings.on_body = on_body;
137
+ settings.on_message_complete = on_message_complete;
138
+
139
+ http_parser parser;
140
+ http_parser_init(&parser, file_type);
141
+ size_t nparsed = http_parser_execute(&parser, &settings, data, file_length);
142
+ free(data);
143
+
144
+ if (nparsed != (size_t)file_length) {
145
+ fprintf(stderr,
146
+ "Error: %s (%s)\n",
147
+ http_errno_description(HTTP_PARSER_ERRNO(&parser)),
148
+ http_errno_name(HTTP_PARSER_ERRNO(&parser)));
149
+ goto fail;
150
+ }
151
+
152
+ return EXIT_SUCCESS;
153
+
154
+ fail:
155
+ fclose(file);
156
+ return EXIT_FAILURE;
157
+ }
@@ -0,0 +1,47 @@
1
+ #include "http_parser.h"
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+
5
+ void
6
+ dump_url (const char *url, const struct http_parser_url *u)
7
+ {
8
+ unsigned int i;
9
+
10
+ printf("\tfield_set: 0x%x, port: %u\n", u->field_set, u->port);
11
+ for (i = 0; i < UF_MAX; i++) {
12
+ if ((u->field_set & (1 << i)) == 0) {
13
+ printf("\tfield_data[%u]: unset\n", i);
14
+ continue;
15
+ }
16
+
17
+ printf("\tfield_data[%u]: off: %u, len: %u, part: %.*s\n",
18
+ i,
19
+ u->field_data[i].off,
20
+ u->field_data[i].len,
21
+ u->field_data[i].len,
22
+ url + u->field_data[i].off);
23
+ }
24
+ }
25
+
26
+ int main(int argc, char ** argv) {
27
+ struct http_parser_url u;
28
+ int len, connect, result;
29
+
30
+ if (argc != 3) {
31
+ printf("Syntax : %s connect|get url\n", argv[0]);
32
+ return 1;
33
+ }
34
+ len = strlen(argv[2]);
35
+ connect = strcmp("connect", argv[1]) == 0 ? 1 : 0;
36
+ printf("Parsing %s, connect %d\n", argv[2], connect);
37
+
38
+ http_parser_url_init(&u);
39
+ result = http_parser_parse_url(argv[2], len, connect, &u);
40
+ if (result != 0) {
41
+ printf("Parse error : %d\n", result);
42
+ return result;
43
+ }
44
+ printf("Parse ok, result : \n");
45
+ dump_url(argv[2], &u);
46
+ return 0;
47
+ }
@@ -1,7 +1,4 @@
1
- /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
2
- *
3
- * Additional changes are licensed under the same terms as NGINX and
4
- * copyright Joyent, Inc. and other Node contributors. All rights reserved.
1
+ /* Copyright Joyent, Inc. and other Node contributors.
5
2
  *
6
3
  * Permission is hereby granted, free of charge, to any person obtaining a copy
7
4
  * of this software and associated documentation files (the "Software"), to
@@ -25,7 +22,6 @@
25
22
  #include <assert.h>
26
23
  #include <stddef.h>
27
24
  #include <ctype.h>
28
- #include <stdlib.h>
29
25
  #include <string.h>
30
26
  #include <limits.h>
31
27
 
@@ -37,18 +33,42 @@
37
33
  # define MIN(a,b) ((a) < (b) ? (a) : (b))
38
34
  #endif
39
35
 
36
+ #ifndef ARRAY_SIZE
37
+ # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
38
+ #endif
39
+
40
+ #ifndef BIT_AT
41
+ # define BIT_AT(a, i) \
42
+ (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
43
+ (1 << ((unsigned int) (i) & 7))))
44
+ #endif
45
+
46
+ #ifndef ELEM_AT
47
+ # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
48
+ #endif
40
49
 
41
- #if HTTP_PARSER_DEBUG
42
- #define SET_ERRNO(e) \
43
- do { \
44
- parser->http_errno = (e); \
45
- parser->error_lineno = __LINE__; \
46
- } while (0)
47
- #else
48
50
  #define SET_ERRNO(e) \
49
51
  do { \
50
52
  parser->http_errno = (e); \
51
53
  } while(0)
54
+
55
+ #define CURRENT_STATE() p_state
56
+ #define UPDATE_STATE(V) p_state = (enum state) (V);
57
+ #define RETURN(V) \
58
+ do { \
59
+ parser->state = CURRENT_STATE(); \
60
+ return (V); \
61
+ } while (0);
62
+ #define REEXECUTE() \
63
+ goto reexecute; \
64
+
65
+
66
+ #ifdef __GNUC__
67
+ # define LIKELY(X) __builtin_expect(!!(X), 1)
68
+ # define UNLIKELY(X) __builtin_expect(!!(X), 0)
69
+ #else
70
+ # define LIKELY(X) (X)
71
+ # define UNLIKELY(X) (X)
52
72
  #endif
53
73
 
54
74
 
@@ -57,13 +77,15 @@ do { \
57
77
  do { \
58
78
  assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
59
79
  \
60
- if (settings->on_##FOR) { \
61
- if (0 != settings->on_##FOR(parser)) { \
80
+ if (LIKELY(settings->on_##FOR)) { \
81
+ parser->state = CURRENT_STATE(); \
82
+ if (UNLIKELY(0 != settings->on_##FOR(parser))) { \
62
83
  SET_ERRNO(HPE_CB_##FOR); \
63
84
  } \
85
+ UPDATE_STATE(parser->state); \
64
86
  \
65
87
  /* We either errored above or got paused; get out */ \
66
- if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
88
+ if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
67
89
  return (ER); \
68
90
  } \
69
91
  } \
@@ -81,20 +103,23 @@ do { \
81
103
  assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
82
104
  \
83
105
  if (FOR##_mark) { \
84
- if (settings->on_##FOR) { \
85
- if (0 != settings->on_##FOR(parser, FOR##_mark, (LEN))) { \
106
+ if (LIKELY(settings->on_##FOR)) { \
107
+ parser->state = CURRENT_STATE(); \
108
+ if (UNLIKELY(0 != \
109
+ settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \
86
110
  SET_ERRNO(HPE_CB_##FOR); \
87
111
  } \
112
+ UPDATE_STATE(parser->state); \
88
113
  \
89
114
  /* We either errored above or got paused; get out */ \
90
- if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
115
+ if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
91
116
  return (ER); \
92
117
  } \
93
118
  } \
94
119
  FOR##_mark = NULL; \
95
120
  } \
96
121
  } while (0)
97
-
122
+
98
123
  /* Run the data callback FOR and consume the current byte */
99
124
  #define CALLBACK_DATA(FOR) \
100
125
  CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
@@ -111,6 +136,26 @@ do { \
111
136
  } \
112
137
  } while (0)
113
138
 
139
+ /* Don't allow the total size of the HTTP headers (including the status
140
+ * line) to exceed HTTP_MAX_HEADER_SIZE. This check is here to protect
141
+ * embedders against denial-of-service attacks where the attacker feeds
142
+ * us a never-ending header that the embedder keeps buffering.
143
+ *
144
+ * This check is arguably the responsibility of embedders but we're doing
145
+ * it on the embedder's behalf because most won't bother and this way we
146
+ * make the web a little safer. HTTP_MAX_HEADER_SIZE is still far bigger
147
+ * than any reasonable request or response so this should never affect
148
+ * day-to-day operation.
149
+ */
150
+ #define COUNT_HEADER_SIZE(V) \
151
+ do { \
152
+ parser->nread += (V); \
153
+ if (UNLIKELY(parser->nread > (HTTP_MAX_HEADER_SIZE))) { \
154
+ SET_ERRNO(HPE_HEADER_OVERFLOW); \
155
+ goto error; \
156
+ } \
157
+ } while (0)
158
+
114
159
 
115
160
  #define PROXY_CONNECTION "proxy-connection"
116
161
  #define CONNECTION "connection"
@@ -123,31 +168,10 @@ do { \
123
168
 
124
169
 
125
170
  static const char *method_strings[] =
126
- { "DELETE"
127
- , "GET"
128
- , "HEAD"
129
- , "POST"
130
- , "PUT"
131
- , "CONNECT"
132
- , "OPTIONS"
133
- , "TRACE"
134
- , "COPY"
135
- , "LOCK"
136
- , "MKCOL"
137
- , "MOVE"
138
- , "PROPFIND"
139
- , "PROPPATCH"
140
- , "UNLOCK"
141
- , "REPORT"
142
- , "MKACTIVITY"
143
- , "CHECKOUT"
144
- , "MERGE"
145
- , "M-SEARCH"
146
- , "NOTIFY"
147
- , "SUBSCRIBE"
148
- , "UNSUBSCRIBE"
149
- , "PATCH"
150
- , "PURGE"
171
+ {
172
+ #define XX(num, name, string) #string,
173
+ HTTP_METHOD_MAP(XX)
174
+ #undef XX
151
175
  };
152
176
 
153
177
 
@@ -205,40 +229,48 @@ static const int8_t unhex[256] =
205
229
  };
206
230
 
207
231
 
208
- static const uint8_t normal_url_char[256] = {
232
+ #if HTTP_PARSER_STRICT
233
+ # define T(v) 0
234
+ #else
235
+ # define T(v) v
236
+ #endif
237
+
238
+
239
+ static const uint8_t normal_url_char[32] = {
209
240
  /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
210
- 0, 0, 0, 0, 0, 0, 0, 0,
241
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
211
242
  /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
212
- 0, 0, 0, 0, 0, 0, 0, 0,
243
+ 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
213
244
  /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
214
- 0, 0, 0, 0, 0, 0, 0, 0,
245
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
215
246
  /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
216
- 0, 0, 0, 0, 0, 0, 0, 0,
247
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
217
248
  /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
218
- 0, 1, 1, 0, 1, 1, 1, 1,
249
+ 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
219
250
  /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
220
- 1, 1, 1, 1, 1, 1, 1, 1,
251
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
221
252
  /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
222
- 1, 1, 1, 1, 1, 1, 1, 1,
253
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
223
254
  /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
224
- 1, 1, 1, 1, 1, 1, 1, 0,
255
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
225
256
  /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
226
- 1, 1, 1, 1, 1, 1, 1, 1,
257
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
227
258
  /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
228
- 1, 1, 1, 1, 1, 1, 1, 1,
259
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
229
260
  /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
230
- 1, 1, 1, 1, 1, 1, 1, 1,
261
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
231
262
  /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
232
- 1, 1, 1, 1, 1, 1, 1, 1,
263
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
233
264
  /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
234
- 1, 1, 1, 1, 1, 1, 1, 1,
265
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
235
266
  /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
236
- 1, 1, 1, 1, 1, 1, 1, 1,
267
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
237
268
  /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
238
- 1, 1, 1, 1, 1, 1, 1, 1,
269
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
239
270
  /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
240
- 1, 1, 1, 1, 1, 1, 1, 0, };
271
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
241
272
 
273
+ #undef T
242
274
 
243
275
  enum state
244
276
  { s_dead = 1 /* important that this is > 0 */
@@ -250,12 +282,13 @@ enum state
250
282
  , s_res_HT
251
283
  , s_res_HTT
252
284
  , s_res_HTTP
253
- , s_res_first_http_major
254
285
  , s_res_http_major
255
- , s_res_first_http_minor
286
+ , s_res_http_dot
256
287
  , s_res_http_minor
288
+ , s_res_http_end
257
289
  , s_res_first_status_code
258
290
  , s_res_status_code
291
+ , s_res_status_start
259
292
  , s_res_status
260
293
  , s_res_line_almost_done
261
294
 
@@ -266,13 +299,9 @@ enum state
266
299
  , s_req_schema
267
300
  , s_req_schema_slash
268
301
  , s_req_schema_slash_slash
269
- , s_req_host_start
270
- , s_req_host_v6_start
271
- , s_req_host_v6
272
- , s_req_host_v6_end
273
- , s_req_host
274
- , s_req_port_start
275
- , s_req_port
302
+ , s_req_server_start
303
+ , s_req_server
304
+ , s_req_server_with_at
276
305
  , s_req_path
277
306
  , s_req_query_string_start
278
307
  , s_req_query_string
@@ -283,14 +312,17 @@ enum state
283
312
  , s_req_http_HT
284
313
  , s_req_http_HTT
285
314
  , s_req_http_HTTP
286
- , s_req_first_http_major
287
315
  , s_req_http_major
288
- , s_req_first_http_minor
316
+ , s_req_http_dot
289
317
  , s_req_http_minor
318
+ , s_req_http_end
290
319
  , s_req_line_almost_done
291
320
 
292
321
  , s_header_field_start
293
322
  , s_header_field
323
+ , s_header_value_discard_ws
324
+ , s_header_value_discard_ws_almost_done
325
+ , s_header_value_discard_lws
294
326
  , s_header_value_start
295
327
  , s_header_value
296
328
  , s_header_value_lws
@@ -338,18 +370,39 @@ enum header_states
338
370
 
339
371
  , h_connection
340
372
  , h_content_length
373
+ , h_content_length_num
374
+ , h_content_length_ws
341
375
  , h_transfer_encoding
342
376
  , h_upgrade
343
377
 
344
378
  , h_matching_transfer_encoding_chunked
379
+ , h_matching_connection_token_start
345
380
  , h_matching_connection_keep_alive
346
381
  , h_matching_connection_close
382
+ , h_matching_connection_upgrade
383
+ , h_matching_connection_token
347
384
 
348
385
  , h_transfer_encoding_chunked
349
386
  , h_connection_keep_alive
350
387
  , h_connection_close
388
+ , h_connection_upgrade
351
389
  };
352
390
 
391
+ enum http_host_state
392
+ {
393
+ s_http_host_dead = 1
394
+ , s_http_userinfo_start
395
+ , s_http_userinfo
396
+ , s_http_host_start
397
+ , s_http_host_v6_start
398
+ , s_http_host
399
+ , s_http_host_v6
400
+ , s_http_host_v6_end
401
+ , s_http_host_v6_zone_start
402
+ , s_http_host_v6_zone
403
+ , s_http_host_port_start
404
+ , s_http_host_port
405
+ };
353
406
 
354
407
  /* Macros for character classes; depends on strict-mode */
355
408
  #define CR '\r'
@@ -359,19 +412,33 @@ enum header_states
359
412
  #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
360
413
  #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
361
414
  #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
415
+ #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
416
+ (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
417
+ (c) == ')')
418
+ #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
419
+ (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
420
+ (c) == '$' || (c) == ',')
421
+
422
+ #define STRICT_TOKEN(c) (tokens[(unsigned char)c])
362
423
 
363
424
  #if HTTP_PARSER_STRICT
364
425
  #define TOKEN(c) (tokens[(unsigned char)c])
365
- #define IS_URL_CHAR(c) (normal_url_char[(unsigned char) (c)])
426
+ #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
366
427
  #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
367
428
  #else
368
429
  #define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
369
430
  #define IS_URL_CHAR(c) \
370
- (normal_url_char[(unsigned char) (c)] || ((c) & 0x80))
431
+ (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
371
432
  #define IS_HOST_CHAR(c) \
372
433
  (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
373
434
  #endif
374
435
 
436
+ /**
437
+ * Verify that a char is a valid visible (printable) US-ASCII
438
+ * character or %x80-FF
439
+ **/
440
+ #define IS_HEADER_CHAR(ch) \
441
+ (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127))
375
442
 
376
443
  #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
377
444
 
@@ -401,7 +468,7 @@ static struct {
401
468
  };
402
469
  #undef HTTP_STRERROR_GEN
403
470
 
404
- int http_message_needs_eof(http_parser *parser);
471
+ int http_message_needs_eof(const http_parser *parser);
405
472
 
406
473
  /* Our URL parser.
407
474
  *
@@ -417,7 +484,15 @@ int http_message_needs_eof(http_parser *parser);
417
484
  static enum state
418
485
  parse_url_char(enum state s, const char ch)
419
486
  {
420
- assert(!isspace(ch));
487
+ if (ch == ' ' || ch == '\r' || ch == '\n') {
488
+ return s_dead;
489
+ }
490
+
491
+ #if HTTP_PARSER_STRICT
492
+ if (ch == '\t' || ch == '\f') {
493
+ return s_dead;
494
+ }
495
+ #endif
421
496
 
422
497
  switch (s) {
423
498
  case s_req_spaces_before_url:
@@ -455,67 +530,33 @@ parse_url_char(enum state s, const char ch)
455
530
 
456
531
  case s_req_schema_slash_slash:
457
532
  if (ch == '/') {
458
- return s_req_host_start;
533
+ return s_req_server_start;
459
534
  }
460
535
 
461
536
  break;
462
537
 
463
- case s_req_host_start:
464
- if (ch == '[') {
465
- return s_req_host_v6_start;
466
- }
467
-
468
- if (IS_HOST_CHAR(ch)) {
469
- return s_req_host;
470
- }
471
-
472
- break;
473
-
474
- case s_req_host:
475
- if (IS_HOST_CHAR(ch)) {
476
- return s_req_host;
477
- }
478
-
479
- /* FALLTHROUGH */
480
- case s_req_host_v6_end:
481
- switch (ch) {
482
- case ':':
483
- return s_req_port_start;
484
-
485
- case '/':
486
- return s_req_path;
487
-
488
- case '?':
489
- return s_req_query_string_start;
538
+ case s_req_server_with_at:
539
+ if (ch == '@') {
540
+ return s_dead;
490
541
  }
491
542
 
492
- break;
493
-
494
- case s_req_host_v6:
495
- if (ch == ']') {
496
- return s_req_host_v6_end;
543
+ /* FALLTHROUGH */
544
+ case s_req_server_start:
545
+ case s_req_server:
546
+ if (ch == '/') {
547
+ return s_req_path;
497
548
  }
498
549
 
499
- /* FALLTHROUGH */
500
- case s_req_host_v6_start:
501
- if (IS_HEX(ch) || ch == ':') {
502
- return s_req_host_v6;
550
+ if (ch == '?') {
551
+ return s_req_query_string_start;
503
552
  }
504
- break;
505
553
 
506
- case s_req_port:
507
- switch (ch) {
508
- case '/':
509
- return s_req_path;
510
-
511
- case '?':
512
- return s_req_query_string_start;
554
+ if (ch == '@') {
555
+ return s_req_server_with_at;
513
556
  }
514
557
 
515
- /* FALLTHROUGH */
516
- case s_req_port_start:
517
- if (IS_NUM(ch)) {
518
- return s_req_port;
558
+ if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
559
+ return s_req_server;
519
560
  }
520
561
 
521
562
  break;
@@ -600,6 +641,9 @@ size_t http_parser_execute (http_parser *parser,
600
641
  const char *header_value_mark = 0;
601
642
  const char *url_mark = 0;
602
643
  const char *body_mark = 0;
644
+ const char *status_mark = 0;
645
+ enum state p_state = (enum state) parser->state;
646
+ const unsigned int lenient = parser->lenient_http_headers;
603
647
 
604
648
  /* We're in an error state. Don't bother doing anything. */
605
649
  if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
@@ -607,7 +651,7 @@ size_t http_parser_execute (http_parser *parser,
607
651
  }
608
652
 
609
653
  if (len == 0) {
610
- switch (parser->state) {
654
+ switch (CURRENT_STATE()) {
611
655
  case s_body_identity_eof:
612
656
  /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
613
657
  * we got paused.
@@ -628,50 +672,45 @@ size_t http_parser_execute (http_parser *parser,
628
672
  }
629
673
 
630
674
 
631
- if (parser->state == s_header_field)
675
+ if (CURRENT_STATE() == s_header_field)
632
676
  header_field_mark = data;
633
- if (parser->state == s_header_value)
677
+ if (CURRENT_STATE() == s_header_value)
634
678
  header_value_mark = data;
635
- switch (parser->state) {
679
+ switch (CURRENT_STATE()) {
636
680
  case s_req_path:
637
681
  case s_req_schema:
638
682
  case s_req_schema_slash:
639
683
  case s_req_schema_slash_slash:
640
- case s_req_host_start:
641
- case s_req_host_v6_start:
642
- case s_req_host_v6:
643
- case s_req_host_v6_end:
644
- case s_req_host:
645
- case s_req_port_start:
646
- case s_req_port:
684
+ case s_req_server_start:
685
+ case s_req_server:
686
+ case s_req_server_with_at:
647
687
  case s_req_query_string_start:
648
688
  case s_req_query_string:
649
689
  case s_req_fragment_start:
650
690
  case s_req_fragment:
651
691
  url_mark = data;
652
692
  break;
693
+ case s_res_status:
694
+ status_mark = data;
695
+ break;
696
+ default:
697
+ break;
653
698
  }
654
699
 
655
700
  for (p=data; p != data + len; p++) {
656
701
  ch = *p;
657
702
 
658
- if (PARSING_HEADER(parser->state)) {
659
- ++parser->nread;
660
- /* Buffer overflow attack */
661
- if (parser->nread > HTTP_MAX_HEADER_SIZE) {
662
- SET_ERRNO(HPE_HEADER_OVERFLOW);
663
- goto error;
664
- }
665
- }
703
+ if (PARSING_HEADER(CURRENT_STATE()))
704
+ COUNT_HEADER_SIZE(1);
666
705
 
667
- reexecute_byte:
668
- switch (parser->state) {
706
+ reexecute:
707
+ switch (CURRENT_STATE()) {
669
708
 
670
709
  case s_dead:
671
710
  /* this state is used after a 'Connection: close' message
672
711
  * the parser will error out if it reads another message
673
712
  */
674
- if (ch == CR || ch == LF)
713
+ if (LIKELY(ch == CR || ch == LF))
675
714
  break;
676
715
 
677
716
  SET_ERRNO(HPE_CLOSED_CONNECTION);
@@ -685,13 +724,13 @@ size_t http_parser_execute (http_parser *parser,
685
724
  parser->content_length = ULLONG_MAX;
686
725
 
687
726
  if (ch == 'H') {
688
- parser->state = s_res_or_resp_H;
727
+ UPDATE_STATE(s_res_or_resp_H);
689
728
 
690
729
  CALLBACK_NOTIFY(message_begin);
691
730
  } else {
692
731
  parser->type = HTTP_REQUEST;
693
- parser->state = s_start_req;
694
- goto reexecute_byte;
732
+ UPDATE_STATE(s_start_req);
733
+ REEXECUTE();
695
734
  }
696
735
 
697
736
  break;
@@ -700,9 +739,9 @@ size_t http_parser_execute (http_parser *parser,
700
739
  case s_res_or_resp_H:
701
740
  if (ch == 'T') {
702
741
  parser->type = HTTP_RESPONSE;
703
- parser->state = s_res_HT;
742
+ UPDATE_STATE(s_res_HT);
704
743
  } else {
705
- if (ch != 'E') {
744
+ if (UNLIKELY(ch != 'E')) {
706
745
  SET_ERRNO(HPE_INVALID_CONSTANT);
707
746
  goto error;
708
747
  }
@@ -710,7 +749,7 @@ size_t http_parser_execute (http_parser *parser,
710
749
  parser->type = HTTP_REQUEST;
711
750
  parser->method = HTTP_HEAD;
712
751
  parser->index = 2;
713
- parser->state = s_req_method;
752
+ UPDATE_STATE(s_req_method);
714
753
  }
715
754
  break;
716
755
 
@@ -721,7 +760,7 @@ size_t http_parser_execute (http_parser *parser,
721
760
 
722
761
  switch (ch) {
723
762
  case 'H':
724
- parser->state = s_res_H;
763
+ UPDATE_STATE(s_res_H);
725
764
  break;
726
765
 
727
766
  case CR:
@@ -739,90 +778,63 @@ size_t http_parser_execute (http_parser *parser,
739
778
 
740
779
  case s_res_H:
741
780
  STRICT_CHECK(ch != 'T');
742
- parser->state = s_res_HT;
781
+ UPDATE_STATE(s_res_HT);
743
782
  break;
744
783
 
745
784
  case s_res_HT:
746
785
  STRICT_CHECK(ch != 'T');
747
- parser->state = s_res_HTT;
786
+ UPDATE_STATE(s_res_HTT);
748
787
  break;
749
788
 
750
789
  case s_res_HTT:
751
790
  STRICT_CHECK(ch != 'P');
752
- parser->state = s_res_HTTP;
791
+ UPDATE_STATE(s_res_HTTP);
753
792
  break;
754
793
 
755
794
  case s_res_HTTP:
756
795
  STRICT_CHECK(ch != '/');
757
- parser->state = s_res_first_http_major;
796
+ UPDATE_STATE(s_res_http_major);
758
797
  break;
759
798
 
760
- case s_res_first_http_major:
761
- if (ch < '0' || ch > '9') {
799
+ case s_res_http_major:
800
+ if (UNLIKELY(!IS_NUM(ch))) {
762
801
  SET_ERRNO(HPE_INVALID_VERSION);
763
802
  goto error;
764
803
  }
765
804
 
766
805
  parser->http_major = ch - '0';
767
- parser->state = s_res_http_major;
806
+ UPDATE_STATE(s_res_http_dot);
768
807
  break;
769
808
 
770
- /* major HTTP version or dot */
771
- case s_res_http_major:
809
+ case s_res_http_dot:
772
810
  {
773
- if (ch == '.') {
774
- parser->state = s_res_first_http_minor;
775
- break;
776
- }
777
-
778
- if (!IS_NUM(ch)) {
779
- SET_ERRNO(HPE_INVALID_VERSION);
780
- goto error;
781
- }
782
-
783
- parser->http_major *= 10;
784
- parser->http_major += ch - '0';
785
-
786
- if (parser->http_major > 999) {
811
+ if (UNLIKELY(ch != '.')) {
787
812
  SET_ERRNO(HPE_INVALID_VERSION);
788
813
  goto error;
789
814
  }
790
815
 
816
+ UPDATE_STATE(s_res_http_minor);
791
817
  break;
792
818
  }
793
819
 
794
- /* first digit of minor HTTP version */
795
- case s_res_first_http_minor:
796
- if (!IS_NUM(ch)) {
820
+ case s_res_http_minor:
821
+ if (UNLIKELY(!IS_NUM(ch))) {
797
822
  SET_ERRNO(HPE_INVALID_VERSION);
798
823
  goto error;
799
824
  }
800
825
 
801
826
  parser->http_minor = ch - '0';
802
- parser->state = s_res_http_minor;
827
+ UPDATE_STATE(s_res_http_end);
803
828
  break;
804
829
 
805
- /* minor HTTP version or end of request line */
806
- case s_res_http_minor:
830
+ case s_res_http_end:
807
831
  {
808
- if (ch == ' ') {
809
- parser->state = s_res_first_status_code;
810
- break;
811
- }
812
-
813
- if (!IS_NUM(ch)) {
814
- SET_ERRNO(HPE_INVALID_VERSION);
815
- goto error;
816
- }
817
-
818
- parser->http_minor *= 10;
819
- parser->http_minor += ch - '0';
820
-
821
- if (parser->http_minor > 999) {
832
+ if (UNLIKELY(ch != ' ')) {
822
833
  SET_ERRNO(HPE_INVALID_VERSION);
823
834
  goto error;
824
835
  }
825
836
 
837
+ UPDATE_STATE(s_res_first_status_code);
826
838
  break;
827
839
  }
828
840
 
@@ -837,7 +849,7 @@ size_t http_parser_execute (http_parser *parser,
837
849
  goto error;
838
850
  }
839
851
  parser->status_code = ch - '0';
840
- parser->state = s_res_status_code;
852
+ UPDATE_STATE(s_res_status_code);
841
853
  break;
842
854
  }
843
855
 
@@ -846,13 +858,12 @@ size_t http_parser_execute (http_parser *parser,
846
858
  if (!IS_NUM(ch)) {
847
859
  switch (ch) {
848
860
  case ' ':
849
- parser->state = s_res_status;
861
+ UPDATE_STATE(s_res_status_start);
850
862
  break;
851
863
  case CR:
852
- parser->state = s_res_line_almost_done;
853
- break;
854
864
  case LF:
855
- parser->state = s_header_field_start;
865
+ UPDATE_STATE(s_res_status_start);
866
+ REEXECUTE();
856
867
  break;
857
868
  default:
858
869
  SET_ERRNO(HPE_INVALID_STATUS);
@@ -864,7 +875,7 @@ size_t http_parser_execute (http_parser *parser,
864
875
  parser->status_code *= 10;
865
876
  parser->status_code += ch - '0';
866
877
 
867
- if (parser->status_code > 999) {
878
+ if (UNLIKELY(parser->status_code > 999)) {
868
879
  SET_ERRNO(HPE_INVALID_STATUS);
869
880
  goto error;
870
881
  }
@@ -872,23 +883,36 @@ size_t http_parser_execute (http_parser *parser,
872
883
  break;
873
884
  }
874
885
 
886
+ case s_res_status_start:
887
+ {
888
+ MARK(status);
889
+ UPDATE_STATE(s_res_status);
890
+ parser->index = 0;
891
+
892
+ if (ch == CR || ch == LF)
893
+ REEXECUTE();
894
+
895
+ break;
896
+ }
897
+
875
898
  case s_res_status:
876
- /* the human readable status. e.g. "NOT FOUND"
877
- * we are not humans so just ignore this */
878
899
  if (ch == CR) {
879
- parser->state = s_res_line_almost_done;
900
+ UPDATE_STATE(s_res_line_almost_done);
901
+ CALLBACK_DATA(status);
880
902
  break;
881
903
  }
882
904
 
883
905
  if (ch == LF) {
884
- parser->state = s_header_field_start;
906
+ UPDATE_STATE(s_header_field_start);
907
+ CALLBACK_DATA(status);
885
908
  break;
886
909
  }
910
+
887
911
  break;
888
912
 
889
913
  case s_res_line_almost_done:
890
914
  STRICT_CHECK(ch != LF);
891
- parser->state = s_header_field_start;
915
+ UPDATE_STATE(s_header_field_start);
892
916
  break;
893
917
 
894
918
  case s_start_req:
@@ -898,7 +922,7 @@ size_t http_parser_execute (http_parser *parser,
898
922
  parser->flags = 0;
899
923
  parser->content_length = ULLONG_MAX;
900
924
 
901
- if (!IS_ALPHA(ch)) {
925
+ if (UNLIKELY(!IS_ALPHA(ch))) {
902
926
  SET_ERRNO(HPE_INVALID_METHOD);
903
927
  goto error;
904
928
  }
@@ -906,26 +930,28 @@ size_t http_parser_execute (http_parser *parser,
906
930
  parser->method = (enum http_method) 0;
907
931
  parser->index = 1;
908
932
  switch (ch) {
933
+ case 'A': parser->method = HTTP_ACL; break;
934
+ case 'B': parser->method = HTTP_BIND; break;
909
935
  case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
910
936
  case 'D': parser->method = HTTP_DELETE; break;
911
937
  case 'G': parser->method = HTTP_GET; break;
912
938
  case 'H': parser->method = HTTP_HEAD; break;
913
- case 'L': parser->method = HTTP_LOCK; break;
914
- case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
939
+ case 'L': parser->method = HTTP_LOCK; /* or LINK */ break;
940
+ case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break;
915
941
  case 'N': parser->method = HTTP_NOTIFY; break;
916
942
  case 'O': parser->method = HTTP_OPTIONS; break;
917
943
  case 'P': parser->method = HTTP_POST;
918
944
  /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
919
945
  break;
920
- case 'R': parser->method = HTTP_REPORT; break;
921
- case 'S': parser->method = HTTP_SUBSCRIBE; break;
946
+ case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break;
947
+ case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH, SOURCE */ break;
922
948
  case 'T': parser->method = HTTP_TRACE; break;
923
- case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
949
+ case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break;
924
950
  default:
925
951
  SET_ERRNO(HPE_INVALID_METHOD);
926
952
  goto error;
927
953
  }
928
- parser->state = s_req_method;
954
+ UPDATE_STATE(s_req_method);
929
955
 
930
956
  CALLBACK_NOTIFY(message_begin);
931
957
 
@@ -935,54 +961,47 @@ size_t http_parser_execute (http_parser *parser,
935
961
  case s_req_method:
936
962
  {
937
963
  const char *matcher;
938
- if (ch == '\0') {
964
+ if (UNLIKELY(ch == '\0')) {
939
965
  SET_ERRNO(HPE_INVALID_METHOD);
940
966
  goto error;
941
967
  }
942
968
 
943
969
  matcher = method_strings[parser->method];
944
970
  if (ch == ' ' && matcher[parser->index] == '\0') {
945
- parser->state = s_req_spaces_before_url;
971
+ UPDATE_STATE(s_req_spaces_before_url);
946
972
  } else if (ch == matcher[parser->index]) {
947
973
  ; /* nada */
948
- } else if (parser->method == HTTP_CONNECT) {
949
- if (parser->index == 1 && ch == 'H') {
950
- parser->method = HTTP_CHECKOUT;
951
- } else if (parser->index == 2 && ch == 'P') {
952
- parser->method = HTTP_COPY;
953
- } else {
954
- goto error;
955
- }
956
- } else if (parser->method == HTTP_MKCOL) {
957
- if (parser->index == 1 && ch == 'O') {
958
- parser->method = HTTP_MOVE;
959
- } else if (parser->index == 1 && ch == 'E') {
960
- parser->method = HTTP_MERGE;
961
- } else if (parser->index == 1 && ch == '-') {
962
- parser->method = HTTP_MSEARCH;
963
- } else if (parser->index == 2 && ch == 'A') {
964
- parser->method = HTTP_MKACTIVITY;
965
- } else {
966
- goto error;
967
- }
968
- } else if (parser->index == 1 && parser->method == HTTP_POST) {
969
- if (ch == 'R') {
970
- parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
971
- } else if (ch == 'U') {
972
- parser->method = HTTP_PUT; /* or HTTP_PURGE */
973
- } else if (ch == 'A') {
974
- parser->method = HTTP_PATCH;
975
- } else {
976
- goto error;
977
- }
978
- } else if (parser->index == 2) {
979
- if (parser->method == HTTP_PUT) {
980
- if (ch == 'R') parser->method = HTTP_PURGE;
981
- } else if (parser->method == HTTP_UNLOCK) {
982
- if (ch == 'S') parser->method = HTTP_UNSUBSCRIBE;
974
+ } else if ((ch >= 'A' && ch <= 'Z') || ch == '-') {
975
+
976
+ switch (parser->method << 16 | parser->index << 8 | ch) {
977
+ #define XX(meth, pos, ch, new_meth) \
978
+ case (HTTP_##meth << 16 | pos << 8 | ch): \
979
+ parser->method = HTTP_##new_meth; break;
980
+
981
+ XX(POST, 1, 'U', PUT)
982
+ XX(POST, 1, 'A', PATCH)
983
+ XX(POST, 1, 'R', PROPFIND)
984
+ XX(PUT, 2, 'R', PURGE)
985
+ XX(CONNECT, 1, 'H', CHECKOUT)
986
+ XX(CONNECT, 2, 'P', COPY)
987
+ XX(MKCOL, 1, 'O', MOVE)
988
+ XX(MKCOL, 1, 'E', MERGE)
989
+ XX(MKCOL, 1, '-', MSEARCH)
990
+ XX(MKCOL, 2, 'A', MKACTIVITY)
991
+ XX(MKCOL, 3, 'A', MKCALENDAR)
992
+ XX(SUBSCRIBE, 1, 'E', SEARCH)
993
+ XX(SUBSCRIBE, 1, 'O', SOURCE)
994
+ XX(REPORT, 2, 'B', REBIND)
995
+ XX(PROPFIND, 4, 'P', PROPPATCH)
996
+ XX(LOCK, 1, 'I', LINK)
997
+ XX(UNLOCK, 2, 'S', UNSUBSCRIBE)
998
+ XX(UNLOCK, 2, 'B', UNBIND)
999
+ XX(UNLOCK, 3, 'I', UNLINK)
1000
+ #undef XX
1001
+ default:
1002
+ SET_ERRNO(HPE_INVALID_METHOD);
1003
+ goto error;
983
1004
  }
984
- } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
985
- parser->method = HTTP_PROPPATCH;
986
1005
  } else {
987
1006
  SET_ERRNO(HPE_INVALID_METHOD);
988
1007
  goto error;
@@ -998,11 +1017,11 @@ size_t http_parser_execute (http_parser *parser,
998
1017
 
999
1018
  MARK(url);
1000
1019
  if (parser->method == HTTP_CONNECT) {
1001
- parser->state = s_req_host_start;
1020
+ UPDATE_STATE(s_req_server_start);
1002
1021
  }
1003
1022
 
1004
- parser->state = parse_url_char((enum state)parser->state, ch);
1005
- if (parser->state == s_dead) {
1023
+ UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1024
+ if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1006
1025
  SET_ERRNO(HPE_INVALID_URL);
1007
1026
  goto error;
1008
1027
  }
@@ -1013,10 +1032,7 @@ size_t http_parser_execute (http_parser *parser,
1013
1032
  case s_req_schema:
1014
1033
  case s_req_schema_slash:
1015
1034
  case s_req_schema_slash_slash:
1016
- case s_req_host_start:
1017
- case s_req_host_v6_start:
1018
- case s_req_host_v6:
1019
- case s_req_port_start:
1035
+ case s_req_server_start:
1020
1036
  {
1021
1037
  switch (ch) {
1022
1038
  /* No whitespace allowed here */
@@ -1026,8 +1042,8 @@ size_t http_parser_execute (http_parser *parser,
1026
1042
  SET_ERRNO(HPE_INVALID_URL);
1027
1043
  goto error;
1028
1044
  default:
1029
- parser->state = parse_url_char((enum state)parser->state, ch);
1030
- if (parser->state == s_dead) {
1045
+ UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1046
+ if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1031
1047
  SET_ERRNO(HPE_INVALID_URL);
1032
1048
  goto error;
1033
1049
  }
@@ -1036,9 +1052,8 @@ size_t http_parser_execute (http_parser *parser,
1036
1052
  break;
1037
1053
  }
1038
1054
 
1039
- case s_req_host:
1040
- case s_req_host_v6_end:
1041
- case s_req_port:
1055
+ case s_req_server:
1056
+ case s_req_server_with_at:
1042
1057
  case s_req_path:
1043
1058
  case s_req_query_string_start:
1044
1059
  case s_req_query_string:
@@ -1047,21 +1062,21 @@ size_t http_parser_execute (http_parser *parser,
1047
1062
  {
1048
1063
  switch (ch) {
1049
1064
  case ' ':
1050
- parser->state = s_req_http_start;
1065
+ UPDATE_STATE(s_req_http_start);
1051
1066
  CALLBACK_DATA(url);
1052
1067
  break;
1053
1068
  case CR:
1054
1069
  case LF:
1055
1070
  parser->http_major = 0;
1056
1071
  parser->http_minor = 9;
1057
- parser->state = (ch == CR) ?
1072
+ UPDATE_STATE((ch == CR) ?
1058
1073
  s_req_line_almost_done :
1059
- s_header_field_start;
1074
+ s_header_field_start);
1060
1075
  CALLBACK_DATA(url);
1061
1076
  break;
1062
1077
  default:
1063
- parser->state = parse_url_char((enum state)parser->state, ch);
1064
- if (parser->state == s_dead) {
1078
+ UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1079
+ if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1065
1080
  SET_ERRNO(HPE_INVALID_URL);
1066
1081
  goto error;
1067
1082
  }
@@ -1072,7 +1087,7 @@ size_t http_parser_execute (http_parser *parser,
1072
1087
  case s_req_http_start:
1073
1088
  switch (ch) {
1074
1089
  case 'H':
1075
- parser->state = s_req_http_H;
1090
+ UPDATE_STATE(s_req_http_H);
1076
1091
  break;
1077
1092
  case ' ':
1078
1093
  break;
@@ -1084,130 +1099,101 @@ size_t http_parser_execute (http_parser *parser,
1084
1099
 
1085
1100
  case s_req_http_H:
1086
1101
  STRICT_CHECK(ch != 'T');
1087
- parser->state = s_req_http_HT;
1102
+ UPDATE_STATE(s_req_http_HT);
1088
1103
  break;
1089
1104
 
1090
1105
  case s_req_http_HT:
1091
1106
  STRICT_CHECK(ch != 'T');
1092
- parser->state = s_req_http_HTT;
1107
+ UPDATE_STATE(s_req_http_HTT);
1093
1108
  break;
1094
1109
 
1095
1110
  case s_req_http_HTT:
1096
1111
  STRICT_CHECK(ch != 'P');
1097
- parser->state = s_req_http_HTTP;
1112
+ UPDATE_STATE(s_req_http_HTTP);
1098
1113
  break;
1099
1114
 
1100
1115
  case s_req_http_HTTP:
1101
1116
  STRICT_CHECK(ch != '/');
1102
- parser->state = s_req_first_http_major;
1117
+ UPDATE_STATE(s_req_http_major);
1103
1118
  break;
1104
1119
 
1105
- /* first digit of major HTTP version */
1106
- case s_req_first_http_major:
1107
- if (ch < '1' || ch > '9') {
1120
+ case s_req_http_major:
1121
+ if (UNLIKELY(!IS_NUM(ch))) {
1108
1122
  SET_ERRNO(HPE_INVALID_VERSION);
1109
1123
  goto error;
1110
1124
  }
1111
1125
 
1112
1126
  parser->http_major = ch - '0';
1113
- parser->state = s_req_http_major;
1127
+ UPDATE_STATE(s_req_http_dot);
1114
1128
  break;
1115
1129
 
1116
- /* major HTTP version or dot */
1117
- case s_req_http_major:
1130
+ case s_req_http_dot:
1118
1131
  {
1119
- if (ch == '.') {
1120
- parser->state = s_req_first_http_minor;
1121
- break;
1122
- }
1123
-
1124
- if (!IS_NUM(ch)) {
1125
- SET_ERRNO(HPE_INVALID_VERSION);
1126
- goto error;
1127
- }
1128
-
1129
- parser->http_major *= 10;
1130
- parser->http_major += ch - '0';
1131
-
1132
- if (parser->http_major > 999) {
1132
+ if (UNLIKELY(ch != '.')) {
1133
1133
  SET_ERRNO(HPE_INVALID_VERSION);
1134
1134
  goto error;
1135
1135
  }
1136
1136
 
1137
+ UPDATE_STATE(s_req_http_minor);
1137
1138
  break;
1138
1139
  }
1139
1140
 
1140
- /* first digit of minor HTTP version */
1141
- case s_req_first_http_minor:
1142
- if (!IS_NUM(ch)) {
1141
+ case s_req_http_minor:
1142
+ if (UNLIKELY(!IS_NUM(ch))) {
1143
1143
  SET_ERRNO(HPE_INVALID_VERSION);
1144
1144
  goto error;
1145
1145
  }
1146
1146
 
1147
1147
  parser->http_minor = ch - '0';
1148
- parser->state = s_req_http_minor;
1148
+ UPDATE_STATE(s_req_http_end);
1149
1149
  break;
1150
1150
 
1151
- /* minor HTTP version or end of request line */
1152
- case s_req_http_minor:
1151
+ case s_req_http_end:
1153
1152
  {
1154
1153
  if (ch == CR) {
1155
- parser->state = s_req_line_almost_done;
1154
+ UPDATE_STATE(s_req_line_almost_done);
1156
1155
  break;
1157
1156
  }
1158
1157
 
1159
1158
  if (ch == LF) {
1160
- parser->state = s_header_field_start;
1159
+ UPDATE_STATE(s_header_field_start);
1161
1160
  break;
1162
1161
  }
1163
1162
 
1164
- /* XXX allow spaces after digit? */
1165
-
1166
- if (!IS_NUM(ch)) {
1167
- SET_ERRNO(HPE_INVALID_VERSION);
1168
- goto error;
1169
- }
1170
-
1171
- parser->http_minor *= 10;
1172
- parser->http_minor += ch - '0';
1173
-
1174
- if (parser->http_minor > 999) {
1175
- SET_ERRNO(HPE_INVALID_VERSION);
1176
- goto error;
1177
- }
1178
-
1163
+ SET_ERRNO(HPE_INVALID_VERSION);
1164
+ goto error;
1179
1165
  break;
1180
1166
  }
1181
1167
 
1182
1168
  /* end of request line */
1183
1169
  case s_req_line_almost_done:
1184
1170
  {
1185
- if (ch != LF) {
1171
+ if (UNLIKELY(ch != LF)) {
1186
1172
  SET_ERRNO(HPE_LF_EXPECTED);
1187
1173
  goto error;
1188
1174
  }
1189
1175
 
1190
- parser->state = s_header_field_start;
1176
+ UPDATE_STATE(s_header_field_start);
1191
1177
  break;
1192
1178
  }
1193
1179
 
1194
1180
  case s_header_field_start:
1195
1181
  {
1196
1182
  if (ch == CR) {
1197
- parser->state = s_headers_almost_done;
1183
+ UPDATE_STATE(s_headers_almost_done);
1198
1184
  break;
1199
1185
  }
1200
1186
 
1201
1187
  if (ch == LF) {
1202
1188
  /* they might be just sending \n instead of \r\n so this would be
1203
1189
  * the second \n to denote the end of headers*/
1204
- parser->state = s_headers_almost_done;
1205
- goto reexecute_byte;
1190
+ UPDATE_STATE(s_headers_almost_done);
1191
+ REEXECUTE();
1206
1192
  }
1207
1193
 
1208
1194
  c = TOKEN(ch);
1209
1195
 
1210
- if (!c) {
1196
+ if (UNLIKELY(!c)) {
1211
1197
  SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1212
1198
  goto error;
1213
1199
  }
@@ -1215,7 +1201,7 @@ size_t http_parser_execute (http_parser *parser,
1215
1201
  MARK(header_field);
1216
1202
 
1217
1203
  parser->index = 0;
1218
- parser->state = s_header_field;
1204
+ UPDATE_STATE(s_header_field);
1219
1205
 
1220
1206
  switch (c) {
1221
1207
  case 'c':
@@ -1243,9 +1229,14 @@ size_t http_parser_execute (http_parser *parser,
1243
1229
 
1244
1230
  case s_header_field:
1245
1231
  {
1246
- c = TOKEN(ch);
1232
+ const char* start = p;
1233
+ for (; p != data + len; p++) {
1234
+ ch = *p;
1235
+ c = TOKEN(ch);
1236
+
1237
+ if (!c)
1238
+ break;
1247
1239
 
1248
- if (c) {
1249
1240
  switch (parser->header_state) {
1250
1241
  case h_general:
1251
1242
  break;
@@ -1346,23 +1337,17 @@ size_t http_parser_execute (http_parser *parser,
1346
1337
  assert(0 && "Unknown header_state");
1347
1338
  break;
1348
1339
  }
1349
- break;
1350
1340
  }
1351
1341
 
1352
- if (ch == ':') {
1353
- parser->state = s_header_value_start;
1354
- CALLBACK_DATA(header_field);
1355
- break;
1356
- }
1342
+ COUNT_HEADER_SIZE(p - start);
1357
1343
 
1358
- if (ch == CR) {
1359
- parser->state = s_header_almost_done;
1360
- CALLBACK_DATA(header_field);
1344
+ if (p == data + len) {
1345
+ --p;
1361
1346
  break;
1362
1347
  }
1363
1348
 
1364
- if (ch == LF) {
1365
- parser->state = s_header_field_start;
1349
+ if (ch == ':') {
1350
+ UPDATE_STATE(s_header_value_discard_ws);
1366
1351
  CALLBACK_DATA(header_field);
1367
1352
  break;
1368
1353
  }
@@ -1371,28 +1356,28 @@ size_t http_parser_execute (http_parser *parser,
1371
1356
  goto error;
1372
1357
  }
1373
1358
 
1374
- case s_header_value_start:
1375
- {
1359
+ case s_header_value_discard_ws:
1376
1360
  if (ch == ' ' || ch == '\t') break;
1377
1361
 
1378
- MARK(header_value);
1379
-
1380
- parser->state = s_header_value;
1381
- parser->index = 0;
1382
-
1383
1362
  if (ch == CR) {
1384
- parser->header_state = h_general;
1385
- parser->state = s_header_almost_done;
1386
- CALLBACK_DATA(header_value);
1363
+ UPDATE_STATE(s_header_value_discard_ws_almost_done);
1387
1364
  break;
1388
1365
  }
1389
1366
 
1390
1367
  if (ch == LF) {
1391
- parser->state = s_header_field_start;
1392
- CALLBACK_DATA(header_value);
1368
+ UPDATE_STATE(s_header_value_discard_lws);
1393
1369
  break;
1394
1370
  }
1395
1371
 
1372
+ /* FALLTHROUGH */
1373
+
1374
+ case s_header_value_start:
1375
+ {
1376
+ MARK(header_value);
1377
+
1378
+ UPDATE_STATE(s_header_value);
1379
+ parser->index = 0;
1380
+
1396
1381
  c = LOWER(ch);
1397
1382
 
1398
1383
  switch (parser->header_state) {
@@ -1411,12 +1396,19 @@ size_t http_parser_execute (http_parser *parser,
1411
1396
  break;
1412
1397
 
1413
1398
  case h_content_length:
1414
- if (!IS_NUM(ch)) {
1399
+ if (UNLIKELY(!IS_NUM(ch))) {
1415
1400
  SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1416
1401
  goto error;
1417
1402
  }
1418
1403
 
1404
+ if (parser->flags & F_CONTENTLENGTH) {
1405
+ SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1406
+ goto error;
1407
+ }
1408
+
1409
+ parser->flags |= F_CONTENTLENGTH;
1419
1410
  parser->content_length = ch - '0';
1411
+ parser->header_state = h_content_length_num;
1420
1412
  break;
1421
1413
 
1422
1414
  case h_connection:
@@ -1426,11 +1418,17 @@ size_t http_parser_execute (http_parser *parser,
1426
1418
  /* looking for 'Connection: close' */
1427
1419
  } else if (c == 'c') {
1428
1420
  parser->header_state = h_matching_connection_close;
1421
+ } else if (c == 'u') {
1422
+ parser->header_state = h_matching_connection_upgrade;
1429
1423
  } else {
1430
- parser->header_state = h_general;
1424
+ parser->header_state = h_matching_connection_token;
1431
1425
  }
1432
1426
  break;
1433
1427
 
1428
+ /* Multi-value `Connection` header */
1429
+ case h_matching_connection_token_start:
1430
+ break;
1431
+
1434
1432
  default:
1435
1433
  parser->header_state = h_general;
1436
1434
  break;
@@ -1440,107 +1438,228 @@ size_t http_parser_execute (http_parser *parser,
1440
1438
 
1441
1439
  case s_header_value:
1442
1440
  {
1441
+ const char* start = p;
1442
+ enum header_states h_state = (enum header_states) parser->header_state;
1443
+ for (; p != data + len; p++) {
1444
+ ch = *p;
1445
+ if (ch == CR) {
1446
+ UPDATE_STATE(s_header_almost_done);
1447
+ parser->header_state = h_state;
1448
+ CALLBACK_DATA(header_value);
1449
+ break;
1450
+ }
1443
1451
 
1444
- if (ch == CR) {
1445
- parser->state = s_header_almost_done;
1446
- CALLBACK_DATA(header_value);
1447
- break;
1448
- }
1452
+ if (ch == LF) {
1453
+ UPDATE_STATE(s_header_almost_done);
1454
+ COUNT_HEADER_SIZE(p - start);
1455
+ parser->header_state = h_state;
1456
+ CALLBACK_DATA_NOADVANCE(header_value);
1457
+ REEXECUTE();
1458
+ }
1449
1459
 
1450
- if (ch == LF) {
1451
- parser->state = s_header_almost_done;
1452
- CALLBACK_DATA_NOADVANCE(header_value);
1453
- goto reexecute_byte;
1454
- }
1460
+ if (!lenient && !IS_HEADER_CHAR(ch)) {
1461
+ SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1462
+ goto error;
1463
+ }
1455
1464
 
1456
- c = LOWER(ch);
1465
+ c = LOWER(ch);
1457
1466
 
1458
- switch (parser->header_state) {
1459
- case h_general:
1460
- break;
1467
+ switch (h_state) {
1468
+ case h_general:
1469
+ {
1470
+ const char* p_cr;
1471
+ const char* p_lf;
1472
+ size_t limit = data + len - p;
1473
+
1474
+ limit = MIN(limit, HTTP_MAX_HEADER_SIZE);
1475
+
1476
+ p_cr = (const char*) memchr(p, CR, limit);
1477
+ p_lf = (const char*) memchr(p, LF, limit);
1478
+ if (p_cr != NULL) {
1479
+ if (p_lf != NULL && p_cr >= p_lf)
1480
+ p = p_lf;
1481
+ else
1482
+ p = p_cr;
1483
+ } else if (UNLIKELY(p_lf != NULL)) {
1484
+ p = p_lf;
1485
+ } else {
1486
+ p = data + len;
1487
+ }
1488
+ --p;
1461
1489
 
1462
- case h_connection:
1463
- case h_transfer_encoding:
1464
- assert(0 && "Shouldn't get here.");
1465
- break;
1490
+ break;
1491
+ }
1466
1492
 
1467
- case h_content_length:
1468
- {
1469
- uint64_t t;
1493
+ case h_connection:
1494
+ case h_transfer_encoding:
1495
+ assert(0 && "Shouldn't get here.");
1496
+ break;
1470
1497
 
1471
- if (ch == ' ') break;
1498
+ case h_content_length:
1499
+ if (ch == ' ') break;
1500
+ h_state = h_content_length_num;
1501
+ /* FALLTHROUGH */
1472
1502
 
1473
- if (!IS_NUM(ch)) {
1474
- SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1475
- goto error;
1476
- }
1503
+ case h_content_length_num:
1504
+ {
1505
+ uint64_t t;
1506
+
1507
+ if (ch == ' ') {
1508
+ h_state = h_content_length_ws;
1509
+ break;
1510
+ }
1511
+
1512
+ if (UNLIKELY(!IS_NUM(ch))) {
1513
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1514
+ parser->header_state = h_state;
1515
+ goto error;
1516
+ }
1517
+
1518
+ t = parser->content_length;
1519
+ t *= 10;
1520
+ t += ch - '0';
1477
1521
 
1478
- t = parser->content_length;
1479
- t *= 10;
1480
- t += ch - '0';
1522
+ /* Overflow? Test against a conservative limit for simplicity. */
1523
+ if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) {
1524
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1525
+ parser->header_state = h_state;
1526
+ goto error;
1527
+ }
1528
+
1529
+ parser->content_length = t;
1530
+ break;
1531
+ }
1481
1532
 
1482
- /* Overflow? */
1483
- if (t < parser->content_length || t == ULLONG_MAX) {
1533
+ case h_content_length_ws:
1534
+ if (ch == ' ') break;
1484
1535
  SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1536
+ parser->header_state = h_state;
1485
1537
  goto error;
1486
- }
1487
1538
 
1488
- parser->content_length = t;
1489
- break;
1490
- }
1539
+ /* Transfer-Encoding: chunked */
1540
+ case h_matching_transfer_encoding_chunked:
1541
+ parser->index++;
1542
+ if (parser->index > sizeof(CHUNKED)-1
1543
+ || c != CHUNKED[parser->index]) {
1544
+ h_state = h_general;
1545
+ } else if (parser->index == sizeof(CHUNKED)-2) {
1546
+ h_state = h_transfer_encoding_chunked;
1547
+ }
1548
+ break;
1491
1549
 
1492
- /* Transfer-Encoding: chunked */
1493
- case h_matching_transfer_encoding_chunked:
1494
- parser->index++;
1495
- if (parser->index > sizeof(CHUNKED)-1
1496
- || c != CHUNKED[parser->index]) {
1497
- parser->header_state = h_general;
1498
- } else if (parser->index == sizeof(CHUNKED)-2) {
1499
- parser->header_state = h_transfer_encoding_chunked;
1500
- }
1501
- break;
1550
+ case h_matching_connection_token_start:
1551
+ /* looking for 'Connection: keep-alive' */
1552
+ if (c == 'k') {
1553
+ h_state = h_matching_connection_keep_alive;
1554
+ /* looking for 'Connection: close' */
1555
+ } else if (c == 'c') {
1556
+ h_state = h_matching_connection_close;
1557
+ } else if (c == 'u') {
1558
+ h_state = h_matching_connection_upgrade;
1559
+ } else if (STRICT_TOKEN(c)) {
1560
+ h_state = h_matching_connection_token;
1561
+ } else if (c == ' ' || c == '\t') {
1562
+ /* Skip lws */
1563
+ } else {
1564
+ h_state = h_general;
1565
+ }
1566
+ break;
1502
1567
 
1503
- /* looking for 'Connection: keep-alive' */
1504
- case h_matching_connection_keep_alive:
1505
- parser->index++;
1506
- if (parser->index > sizeof(KEEP_ALIVE)-1
1507
- || c != KEEP_ALIVE[parser->index]) {
1508
- parser->header_state = h_general;
1509
- } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1510
- parser->header_state = h_connection_keep_alive;
1511
- }
1512
- break;
1568
+ /* looking for 'Connection: keep-alive' */
1569
+ case h_matching_connection_keep_alive:
1570
+ parser->index++;
1571
+ if (parser->index > sizeof(KEEP_ALIVE)-1
1572
+ || c != KEEP_ALIVE[parser->index]) {
1573
+ h_state = h_matching_connection_token;
1574
+ } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1575
+ h_state = h_connection_keep_alive;
1576
+ }
1577
+ break;
1513
1578
 
1514
- /* looking for 'Connection: close' */
1515
- case h_matching_connection_close:
1516
- parser->index++;
1517
- if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1518
- parser->header_state = h_general;
1519
- } else if (parser->index == sizeof(CLOSE)-2) {
1520
- parser->header_state = h_connection_close;
1521
- }
1522
- break;
1579
+ /* looking for 'Connection: close' */
1580
+ case h_matching_connection_close:
1581
+ parser->index++;
1582
+ if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1583
+ h_state = h_matching_connection_token;
1584
+ } else if (parser->index == sizeof(CLOSE)-2) {
1585
+ h_state = h_connection_close;
1586
+ }
1587
+ break;
1523
1588
 
1524
- case h_transfer_encoding_chunked:
1525
- case h_connection_keep_alive:
1526
- case h_connection_close:
1527
- if (ch != ' ') parser->header_state = h_general;
1528
- break;
1589
+ /* looking for 'Connection: upgrade' */
1590
+ case h_matching_connection_upgrade:
1591
+ parser->index++;
1592
+ if (parser->index > sizeof(UPGRADE) - 1 ||
1593
+ c != UPGRADE[parser->index]) {
1594
+ h_state = h_matching_connection_token;
1595
+ } else if (parser->index == sizeof(UPGRADE)-2) {
1596
+ h_state = h_connection_upgrade;
1597
+ }
1598
+ break;
1529
1599
 
1530
- default:
1531
- parser->state = s_header_value;
1532
- parser->header_state = h_general;
1533
- break;
1600
+ case h_matching_connection_token:
1601
+ if (ch == ',') {
1602
+ h_state = h_matching_connection_token_start;
1603
+ parser->index = 0;
1604
+ }
1605
+ break;
1606
+
1607
+ case h_transfer_encoding_chunked:
1608
+ if (ch != ' ') h_state = h_general;
1609
+ break;
1610
+
1611
+ case h_connection_keep_alive:
1612
+ case h_connection_close:
1613
+ case h_connection_upgrade:
1614
+ if (ch == ',') {
1615
+ if (h_state == h_connection_keep_alive) {
1616
+ parser->flags |= F_CONNECTION_KEEP_ALIVE;
1617
+ } else if (h_state == h_connection_close) {
1618
+ parser->flags |= F_CONNECTION_CLOSE;
1619
+ } else if (h_state == h_connection_upgrade) {
1620
+ parser->flags |= F_CONNECTION_UPGRADE;
1621
+ }
1622
+ h_state = h_matching_connection_token_start;
1623
+ parser->index = 0;
1624
+ } else if (ch != ' ') {
1625
+ h_state = h_matching_connection_token;
1626
+ }
1627
+ break;
1628
+
1629
+ default:
1630
+ UPDATE_STATE(s_header_value);
1631
+ h_state = h_general;
1632
+ break;
1633
+ }
1534
1634
  }
1635
+ parser->header_state = h_state;
1636
+
1637
+ COUNT_HEADER_SIZE(p - start);
1638
+
1639
+ if (p == data + len)
1640
+ --p;
1535
1641
  break;
1536
1642
  }
1537
1643
 
1538
1644
  case s_header_almost_done:
1539
1645
  {
1540
- STRICT_CHECK(ch != LF);
1646
+ if (UNLIKELY(ch != LF)) {
1647
+ SET_ERRNO(HPE_LF_EXPECTED);
1648
+ goto error;
1649
+ }
1650
+
1651
+ UPDATE_STATE(s_header_value_lws);
1652
+ break;
1653
+ }
1541
1654
 
1542
- parser->state = s_header_value_lws;
1655
+ case s_header_value_lws:
1656
+ {
1657
+ if (ch == ' ' || ch == '\t') {
1658
+ UPDATE_STATE(s_header_value_start);
1659
+ REEXECUTE();
1660
+ }
1543
1661
 
1662
+ /* finished the header */
1544
1663
  switch (parser->header_state) {
1545
1664
  case h_connection_keep_alive:
1546
1665
  parser->flags |= F_CONNECTION_KEEP_ALIVE;
@@ -1551,23 +1670,53 @@ size_t http_parser_execute (http_parser *parser,
1551
1670
  case h_transfer_encoding_chunked:
1552
1671
  parser->flags |= F_CHUNKED;
1553
1672
  break;
1673
+ case h_connection_upgrade:
1674
+ parser->flags |= F_CONNECTION_UPGRADE;
1675
+ break;
1554
1676
  default:
1555
1677
  break;
1556
1678
  }
1557
1679
 
1680
+ UPDATE_STATE(s_header_field_start);
1681
+ REEXECUTE();
1682
+ }
1683
+
1684
+ case s_header_value_discard_ws_almost_done:
1685
+ {
1686
+ STRICT_CHECK(ch != LF);
1687
+ UPDATE_STATE(s_header_value_discard_lws);
1558
1688
  break;
1559
1689
  }
1560
1690
 
1561
- case s_header_value_lws:
1691
+ case s_header_value_discard_lws:
1562
1692
  {
1563
- if (ch == ' ' || ch == '\t')
1564
- parser->state = s_header_value_start;
1565
- else
1566
- {
1567
- parser->state = s_header_field_start;
1568
- goto reexecute_byte;
1693
+ if (ch == ' ' || ch == '\t') {
1694
+ UPDATE_STATE(s_header_value_discard_ws);
1695
+ break;
1696
+ } else {
1697
+ switch (parser->header_state) {
1698
+ case h_connection_keep_alive:
1699
+ parser->flags |= F_CONNECTION_KEEP_ALIVE;
1700
+ break;
1701
+ case h_connection_close:
1702
+ parser->flags |= F_CONNECTION_CLOSE;
1703
+ break;
1704
+ case h_connection_upgrade:
1705
+ parser->flags |= F_CONNECTION_UPGRADE;
1706
+ break;
1707
+ case h_transfer_encoding_chunked:
1708
+ parser->flags |= F_CHUNKED;
1709
+ break;
1710
+ default:
1711
+ break;
1712
+ }
1713
+
1714
+ /* header value was empty */
1715
+ MARK(header_value);
1716
+ UPDATE_STATE(s_header_field_start);
1717
+ CALLBACK_DATA_NOADVANCE(header_value);
1718
+ REEXECUTE();
1569
1719
  }
1570
- break;
1571
1720
  }
1572
1721
 
1573
1722
  case s_headers_almost_done:
@@ -1576,16 +1725,33 @@ size_t http_parser_execute (http_parser *parser,
1576
1725
 
1577
1726
  if (parser->flags & F_TRAILING) {
1578
1727
  /* End of a chunked request */
1579
- parser->state = NEW_MESSAGE();
1580
- CALLBACK_NOTIFY(message_complete);
1581
- break;
1728
+ UPDATE_STATE(s_message_done);
1729
+ CALLBACK_NOTIFY_NOADVANCE(chunk_complete);
1730
+ REEXECUTE();
1731
+ }
1732
+
1733
+ /* Cannot use chunked encoding and a content-length header together
1734
+ per the HTTP specification. */
1735
+ if ((parser->flags & F_CHUNKED) &&
1736
+ (parser->flags & F_CONTENTLENGTH)) {
1737
+ SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1738
+ goto error;
1582
1739
  }
1583
1740
 
1584
- parser->state = s_headers_done;
1741
+ UPDATE_STATE(s_headers_done);
1585
1742
 
1586
1743
  /* Set this here so that on_headers_complete() callbacks can see it */
1587
- parser->upgrade =
1588
- (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT);
1744
+ if ((parser->flags & F_UPGRADE) &&
1745
+ (parser->flags & F_CONNECTION_UPGRADE)) {
1746
+ /* For responses, "Upgrade: foo" and "Connection: upgrade" are
1747
+ * mandatory only when it is a 101 Switching Protocols response,
1748
+ * otherwise it is purely informational, to announce support.
1749
+ */
1750
+ parser->upgrade =
1751
+ (parser->type == HTTP_REQUEST || parser->status_code == 101);
1752
+ } else {
1753
+ parser->upgrade = (parser->method == HTTP_CONNECT);
1754
+ }
1589
1755
 
1590
1756
  /* Here we call the headers_complete callback. This is somewhat
1591
1757
  * different than other callbacks because if the user returns 1, we
@@ -1601,59 +1767,66 @@ size_t http_parser_execute (http_parser *parser,
1601
1767
  case 0:
1602
1768
  break;
1603
1769
 
1770
+ case 2:
1771
+ parser->upgrade = 1;
1772
+
1773
+ /* FALLTHROUGH */
1604
1774
  case 1:
1605
1775
  parser->flags |= F_SKIPBODY;
1606
1776
  break;
1607
1777
 
1608
1778
  default:
1609
1779
  SET_ERRNO(HPE_CB_headers_complete);
1610
- return p - data; /* Error */
1780
+ RETURN(p - data); /* Error */
1611
1781
  }
1612
1782
  }
1613
1783
 
1614
1784
  if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1615
- return p - data;
1785
+ RETURN(p - data);
1616
1786
  }
1617
1787
 
1618
- goto reexecute_byte;
1788
+ REEXECUTE();
1619
1789
  }
1620
1790
 
1621
1791
  case s_headers_done:
1622
1792
  {
1793
+ int hasBody;
1623
1794
  STRICT_CHECK(ch != LF);
1624
1795
 
1625
1796
  parser->nread = 0;
1626
1797
 
1627
- /* Exit, the rest of the connect is in a different protocol. */
1628
- if (parser->upgrade) {
1629
- parser->state = NEW_MESSAGE();
1798
+ hasBody = parser->flags & F_CHUNKED ||
1799
+ (parser->content_length > 0 && parser->content_length != ULLONG_MAX);
1800
+ if (parser->upgrade && (parser->method == HTTP_CONNECT ||
1801
+ (parser->flags & F_SKIPBODY) || !hasBody)) {
1802
+ /* Exit, the rest of the message is in a different protocol. */
1803
+ UPDATE_STATE(NEW_MESSAGE());
1630
1804
  CALLBACK_NOTIFY(message_complete);
1631
- return (p - data) + 1;
1805
+ RETURN((p - data) + 1);
1632
1806
  }
1633
1807
 
1634
1808
  if (parser->flags & F_SKIPBODY) {
1635
- parser->state = NEW_MESSAGE();
1809
+ UPDATE_STATE(NEW_MESSAGE());
1636
1810
  CALLBACK_NOTIFY(message_complete);
1637
1811
  } else if (parser->flags & F_CHUNKED) {
1638
1812
  /* chunked encoding - ignore Content-Length header */
1639
- parser->state = s_chunk_size_start;
1813
+ UPDATE_STATE(s_chunk_size_start);
1640
1814
  } else {
1641
1815
  if (parser->content_length == 0) {
1642
1816
  /* Content-Length header given but zero: Content-Length: 0\r\n */
1643
- parser->state = NEW_MESSAGE();
1817
+ UPDATE_STATE(NEW_MESSAGE());
1644
1818
  CALLBACK_NOTIFY(message_complete);
1645
1819
  } else if (parser->content_length != ULLONG_MAX) {
1646
1820
  /* Content-Length header given and non-zero */
1647
- parser->state = s_body_identity;
1821
+ UPDATE_STATE(s_body_identity);
1648
1822
  } else {
1649
- if (parser->type == HTTP_REQUEST ||
1650
- !http_message_needs_eof(parser)) {
1823
+ if (!http_message_needs_eof(parser)) {
1651
1824
  /* Assume content-length 0 - read the next */
1652
- parser->state = NEW_MESSAGE();
1825
+ UPDATE_STATE(NEW_MESSAGE());
1653
1826
  CALLBACK_NOTIFY(message_complete);
1654
1827
  } else {
1655
1828
  /* Read body until EOF */
1656
- parser->state = s_body_identity_eof;
1829
+ UPDATE_STATE(s_body_identity_eof);
1657
1830
  }
1658
1831
  }
1659
1832
  }
@@ -1679,7 +1852,7 @@ size_t http_parser_execute (http_parser *parser,
1679
1852
  p += to_read - 1;
1680
1853
 
1681
1854
  if (parser->content_length == 0) {
1682
- parser->state = s_message_done;
1855
+ UPDATE_STATE(s_message_done);
1683
1856
 
1684
1857
  /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1685
1858
  *
@@ -1691,7 +1864,7 @@ size_t http_parser_execute (http_parser *parser,
1691
1864
  * important for applications, but let's keep it for now.
1692
1865
  */
1693
1866
  CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1694
- goto reexecute_byte;
1867
+ REEXECUTE();
1695
1868
  }
1696
1869
 
1697
1870
  break;
@@ -1705,8 +1878,12 @@ size_t http_parser_execute (http_parser *parser,
1705
1878
  break;
1706
1879
 
1707
1880
  case s_message_done:
1708
- parser->state = NEW_MESSAGE();
1881
+ UPDATE_STATE(NEW_MESSAGE());
1709
1882
  CALLBACK_NOTIFY(message_complete);
1883
+ if (parser->upgrade) {
1884
+ /* Exit, the rest of the message is in a different protocol. */
1885
+ RETURN((p - data) + 1);
1886
+ }
1710
1887
  break;
1711
1888
 
1712
1889
  case s_chunk_size_start:
@@ -1715,13 +1892,13 @@ size_t http_parser_execute (http_parser *parser,
1715
1892
  assert(parser->flags & F_CHUNKED);
1716
1893
 
1717
1894
  unhex_val = unhex[(unsigned char)ch];
1718
- if (unhex_val == -1) {
1895
+ if (UNLIKELY(unhex_val == -1)) {
1719
1896
  SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1720
1897
  goto error;
1721
1898
  }
1722
1899
 
1723
1900
  parser->content_length = unhex_val;
1724
- parser->state = s_chunk_size;
1901
+ UPDATE_STATE(s_chunk_size);
1725
1902
  break;
1726
1903
  }
1727
1904
 
@@ -1732,7 +1909,7 @@ size_t http_parser_execute (http_parser *parser,
1732
1909
  assert(parser->flags & F_CHUNKED);
1733
1910
 
1734
1911
  if (ch == CR) {
1735
- parser->state = s_chunk_size_almost_done;
1912
+ UPDATE_STATE(s_chunk_size_almost_done);
1736
1913
  break;
1737
1914
  }
1738
1915
 
@@ -1740,7 +1917,7 @@ size_t http_parser_execute (http_parser *parser,
1740
1917
 
1741
1918
  if (unhex_val == -1) {
1742
1919
  if (ch == ';' || ch == ' ') {
1743
- parser->state = s_chunk_parameters;
1920
+ UPDATE_STATE(s_chunk_parameters);
1744
1921
  break;
1745
1922
  }
1746
1923
 
@@ -1752,8 +1929,8 @@ size_t http_parser_execute (http_parser *parser,
1752
1929
  t *= 16;
1753
1930
  t += unhex_val;
1754
1931
 
1755
- /* Overflow? */
1756
- if (t < parser->content_length || t == ULLONG_MAX) {
1932
+ /* Overflow? Test against a conservative limit for simplicity. */
1933
+ if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) {
1757
1934
  SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1758
1935
  goto error;
1759
1936
  }
@@ -1767,7 +1944,7 @@ size_t http_parser_execute (http_parser *parser,
1767
1944
  assert(parser->flags & F_CHUNKED);
1768
1945
  /* just ignore this shit. TODO check for overflow */
1769
1946
  if (ch == CR) {
1770
- parser->state = s_chunk_size_almost_done;
1947
+ UPDATE_STATE(s_chunk_size_almost_done);
1771
1948
  break;
1772
1949
  }
1773
1950
  break;
@@ -1782,10 +1959,11 @@ size_t http_parser_execute (http_parser *parser,
1782
1959
 
1783
1960
  if (parser->content_length == 0) {
1784
1961
  parser->flags |= F_TRAILING;
1785
- parser->state = s_header_field_start;
1962
+ UPDATE_STATE(s_header_field_start);
1786
1963
  } else {
1787
- parser->state = s_chunk_data;
1964
+ UPDATE_STATE(s_chunk_data);
1788
1965
  }
1966
+ CALLBACK_NOTIFY(chunk_header);
1789
1967
  break;
1790
1968
  }
1791
1969
 
@@ -1806,7 +1984,7 @@ size_t http_parser_execute (http_parser *parser,
1806
1984
  p += to_read - 1;
1807
1985
 
1808
1986
  if (parser->content_length == 0) {
1809
- parser->state = s_chunk_data_almost_done;
1987
+ UPDATE_STATE(s_chunk_data_almost_done);
1810
1988
  }
1811
1989
 
1812
1990
  break;
@@ -1816,7 +1994,7 @@ size_t http_parser_execute (http_parser *parser,
1816
1994
  assert(parser->flags & F_CHUNKED);
1817
1995
  assert(parser->content_length == 0);
1818
1996
  STRICT_CHECK(ch != CR);
1819
- parser->state = s_chunk_data_done;
1997
+ UPDATE_STATE(s_chunk_data_done);
1820
1998
  CALLBACK_DATA(body);
1821
1999
  break;
1822
2000
 
@@ -1824,7 +2002,8 @@ size_t http_parser_execute (http_parser *parser,
1824
2002
  assert(parser->flags & F_CHUNKED);
1825
2003
  STRICT_CHECK(ch != LF);
1826
2004
  parser->nread = 0;
1827
- parser->state = s_chunk_size_start;
2005
+ UPDATE_STATE(s_chunk_size_start);
2006
+ CALLBACK_NOTIFY(chunk_complete);
1828
2007
  break;
1829
2008
 
1830
2009
  default:
@@ -1847,27 +2026,29 @@ size_t http_parser_execute (http_parser *parser,
1847
2026
  assert(((header_field_mark ? 1 : 0) +
1848
2027
  (header_value_mark ? 1 : 0) +
1849
2028
  (url_mark ? 1 : 0) +
1850
- (body_mark ? 1 : 0)) <= 1);
2029
+ (body_mark ? 1 : 0) +
2030
+ (status_mark ? 1 : 0)) <= 1);
1851
2031
 
1852
2032
  CALLBACK_DATA_NOADVANCE(header_field);
1853
2033
  CALLBACK_DATA_NOADVANCE(header_value);
1854
2034
  CALLBACK_DATA_NOADVANCE(url);
1855
2035
  CALLBACK_DATA_NOADVANCE(body);
2036
+ CALLBACK_DATA_NOADVANCE(status);
1856
2037
 
1857
- return len;
2038
+ RETURN(len);
1858
2039
 
1859
2040
  error:
1860
2041
  if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
1861
2042
  SET_ERRNO(HPE_UNKNOWN);
1862
2043
  }
1863
2044
 
1864
- return (p - data);
2045
+ RETURN(p - data);
1865
2046
  }
1866
2047
 
1867
2048
 
1868
2049
  /* Does the parser need to see an EOF to find the end of the message? */
1869
2050
  int
1870
- http_message_needs_eof (http_parser *parser)
2051
+ http_message_needs_eof (const http_parser *parser)
1871
2052
  {
1872
2053
  if (parser->type == HTTP_REQUEST) {
1873
2054
  return 0;
@@ -1890,7 +2071,7 @@ http_message_needs_eof (http_parser *parser)
1890
2071
 
1891
2072
 
1892
2073
  int
1893
- http_should_keep_alive (http_parser *parser)
2074
+ http_should_keep_alive (const http_parser *parser)
1894
2075
  {
1895
2076
  if (parser->http_major > 0 && parser->http_minor > 0) {
1896
2077
  /* HTTP/1.1 */
@@ -1908,9 +2089,10 @@ http_should_keep_alive (http_parser *parser)
1908
2089
  }
1909
2090
 
1910
2091
 
1911
- const char * http_method_str (enum http_method m)
2092
+ const char *
2093
+ http_method_str (enum http_method m)
1912
2094
  {
1913
- return method_strings[m];
2095
+ return ELEM_AT(method_strings, m, "<unknown>");
1914
2096
  }
1915
2097
 
1916
2098
 
@@ -1925,18 +2107,193 @@ http_parser_init (http_parser *parser, enum http_parser_type t)
1925
2107
  parser->http_errno = HPE_OK;
1926
2108
  }
1927
2109
 
2110
+ void
2111
+ http_parser_settings_init(http_parser_settings *settings)
2112
+ {
2113
+ memset(settings, 0, sizeof(*settings));
2114
+ }
2115
+
1928
2116
  const char *
1929
2117
  http_errno_name(enum http_errno err) {
1930
- assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
2118
+ assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
1931
2119
  return http_strerror_tab[err].name;
1932
2120
  }
1933
2121
 
1934
2122
  const char *
1935
2123
  http_errno_description(enum http_errno err) {
1936
- assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
2124
+ assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
1937
2125
  return http_strerror_tab[err].description;
1938
2126
  }
1939
2127
 
2128
+ static enum http_host_state
2129
+ http_parse_host_char(enum http_host_state s, const char ch) {
2130
+ switch(s) {
2131
+ case s_http_userinfo:
2132
+ case s_http_userinfo_start:
2133
+ if (ch == '@') {
2134
+ return s_http_host_start;
2135
+ }
2136
+
2137
+ if (IS_USERINFO_CHAR(ch)) {
2138
+ return s_http_userinfo;
2139
+ }
2140
+ break;
2141
+
2142
+ case s_http_host_start:
2143
+ if (ch == '[') {
2144
+ return s_http_host_v6_start;
2145
+ }
2146
+
2147
+ if (IS_HOST_CHAR(ch)) {
2148
+ return s_http_host;
2149
+ }
2150
+
2151
+ break;
2152
+
2153
+ case s_http_host:
2154
+ if (IS_HOST_CHAR(ch)) {
2155
+ return s_http_host;
2156
+ }
2157
+
2158
+ /* FALLTHROUGH */
2159
+ case s_http_host_v6_end:
2160
+ if (ch == ':') {
2161
+ return s_http_host_port_start;
2162
+ }
2163
+
2164
+ break;
2165
+
2166
+ case s_http_host_v6:
2167
+ if (ch == ']') {
2168
+ return s_http_host_v6_end;
2169
+ }
2170
+
2171
+ /* FALLTHROUGH */
2172
+ case s_http_host_v6_start:
2173
+ if (IS_HEX(ch) || ch == ':' || ch == '.') {
2174
+ return s_http_host_v6;
2175
+ }
2176
+
2177
+ if (s == s_http_host_v6 && ch == '%') {
2178
+ return s_http_host_v6_zone_start;
2179
+ }
2180
+ break;
2181
+
2182
+ case s_http_host_v6_zone:
2183
+ if (ch == ']') {
2184
+ return s_http_host_v6_end;
2185
+ }
2186
+
2187
+ /* FALLTHROUGH */
2188
+ case s_http_host_v6_zone_start:
2189
+ /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */
2190
+ if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' ||
2191
+ ch == '~') {
2192
+ return s_http_host_v6_zone;
2193
+ }
2194
+ break;
2195
+
2196
+ case s_http_host_port:
2197
+ case s_http_host_port_start:
2198
+ if (IS_NUM(ch)) {
2199
+ return s_http_host_port;
2200
+ }
2201
+
2202
+ break;
2203
+
2204
+ default:
2205
+ break;
2206
+ }
2207
+ return s_http_host_dead;
2208
+ }
2209
+
2210
+ static int
2211
+ http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2212
+ enum http_host_state s;
2213
+
2214
+ const char *p;
2215
+ size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
2216
+
2217
+ assert(u->field_set & (1 << UF_HOST));
2218
+
2219
+ u->field_data[UF_HOST].len = 0;
2220
+
2221
+ s = found_at ? s_http_userinfo_start : s_http_host_start;
2222
+
2223
+ for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2224
+ enum http_host_state new_s = http_parse_host_char(s, *p);
2225
+
2226
+ if (new_s == s_http_host_dead) {
2227
+ return 1;
2228
+ }
2229
+
2230
+ switch(new_s) {
2231
+ case s_http_host:
2232
+ if (s != s_http_host) {
2233
+ u->field_data[UF_HOST].off = p - buf;
2234
+ }
2235
+ u->field_data[UF_HOST].len++;
2236
+ break;
2237
+
2238
+ case s_http_host_v6:
2239
+ if (s != s_http_host_v6) {
2240
+ u->field_data[UF_HOST].off = p - buf;
2241
+ }
2242
+ u->field_data[UF_HOST].len++;
2243
+ break;
2244
+
2245
+ case s_http_host_v6_zone_start:
2246
+ case s_http_host_v6_zone:
2247
+ u->field_data[UF_HOST].len++;
2248
+ break;
2249
+
2250
+ case s_http_host_port:
2251
+ if (s != s_http_host_port) {
2252
+ u->field_data[UF_PORT].off = p - buf;
2253
+ u->field_data[UF_PORT].len = 0;
2254
+ u->field_set |= (1 << UF_PORT);
2255
+ }
2256
+ u->field_data[UF_PORT].len++;
2257
+ break;
2258
+
2259
+ case s_http_userinfo:
2260
+ if (s != s_http_userinfo) {
2261
+ u->field_data[UF_USERINFO].off = p - buf ;
2262
+ u->field_data[UF_USERINFO].len = 0;
2263
+ u->field_set |= (1 << UF_USERINFO);
2264
+ }
2265
+ u->field_data[UF_USERINFO].len++;
2266
+ break;
2267
+
2268
+ default:
2269
+ break;
2270
+ }
2271
+ s = new_s;
2272
+ }
2273
+
2274
+ /* Make sure we don't end somewhere unexpected */
2275
+ switch (s) {
2276
+ case s_http_host_start:
2277
+ case s_http_host_v6_start:
2278
+ case s_http_host_v6:
2279
+ case s_http_host_v6_zone_start:
2280
+ case s_http_host_v6_zone:
2281
+ case s_http_host_port_start:
2282
+ case s_http_userinfo:
2283
+ case s_http_userinfo_start:
2284
+ return 1;
2285
+ default:
2286
+ break;
2287
+ }
2288
+
2289
+ return 0;
2290
+ }
2291
+
2292
+ void
2293
+ http_parser_url_init(struct http_parser_url *u) {
2294
+ memset(u, 0, sizeof(*u));
2295
+ }
2296
+
1940
2297
  int
1941
2298
  http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
1942
2299
  struct http_parser_url *u)
@@ -1944,10 +2301,11 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
1944
2301
  enum state s;
1945
2302
  const char *p;
1946
2303
  enum http_parser_url_fields uf, old_uf;
2304
+ int found_at = 0;
1947
2305
 
1948
2306
  u->port = u->field_set = 0;
1949
- s = is_connect ? s_req_host_start : s_req_spaces_before_url;
1950
- uf = old_uf = UF_MAX;
2307
+ s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2308
+ old_uf = UF_MAX;
1951
2309
 
1952
2310
  for (p = buf; p < buf + buflen; p++) {
1953
2311
  s = parse_url_char(s, *p);
@@ -1960,10 +2318,7 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
1960
2318
  /* Skip delimeters */
1961
2319
  case s_req_schema_slash:
1962
2320
  case s_req_schema_slash_slash:
1963
- case s_req_host_start:
1964
- case s_req_host_v6_start:
1965
- case s_req_host_v6_end:
1966
- case s_req_port_start:
2321
+ case s_req_server_start:
1967
2322
  case s_req_query_string_start:
1968
2323
  case s_req_fragment_start:
1969
2324
  continue;
@@ -1972,13 +2327,12 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
1972
2327
  uf = UF_SCHEMA;
1973
2328
  break;
1974
2329
 
1975
- case s_req_host:
1976
- case s_req_host_v6:
1977
- uf = UF_HOST;
1978
- break;
2330
+ case s_req_server_with_at:
2331
+ found_at = 1;
1979
2332
 
1980
- case s_req_port:
1981
- uf = UF_PORT;
2333
+ /* FALLTHROUGH */
2334
+ case s_req_server:
2335
+ uf = UF_HOST;
1982
2336
  break;
1983
2337
 
1984
2338
  case s_req_path:
@@ -2011,30 +2365,46 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2011
2365
  old_uf = uf;
2012
2366
  }
2013
2367
 
2014
- /* CONNECT requests can only contain "hostname:port" */
2015
- if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2368
+ /* host must be present if there is a schema */
2369
+ /* parsing http:///toto will fail */
2370
+ if ((u->field_set & (1 << UF_SCHEMA)) &&
2371
+ (u->field_set & (1 << UF_HOST)) == 0) {
2016
2372
  return 1;
2017
2373
  }
2018
2374
 
2019
- /* Make sure we don't end somewhere unexpected */
2020
- switch (s) {
2021
- case s_req_host_v6_start:
2022
- case s_req_host_v6:
2023
- case s_req_host_v6_end:
2024
- case s_req_host:
2025
- case s_req_port_start:
2375
+ if (u->field_set & (1 << UF_HOST)) {
2376
+ if (http_parse_host(buf, u, found_at) != 0) {
2377
+ return 1;
2378
+ }
2379
+ }
2380
+
2381
+ /* CONNECT requests can only contain "hostname:port" */
2382
+ if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2026
2383
  return 1;
2027
- default:
2028
- break;
2029
2384
  }
2030
2385
 
2031
2386
  if (u->field_set & (1 << UF_PORT)) {
2032
- /* Don't bother with endp; we've already validated the string */
2033
- unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
2034
-
2035
- /* Ports have a max value of 2^16 */
2036
- if (v > 0xffff) {
2037
- return 1;
2387
+ uint16_t off;
2388
+ uint16_t len;
2389
+ const char* p;
2390
+ const char* end;
2391
+ unsigned long v;
2392
+
2393
+ off = u->field_data[UF_PORT].off;
2394
+ len = u->field_data[UF_PORT].len;
2395
+ end = buf + off + len;
2396
+
2397
+ /* NOTE: The characters are already validated and are in the [0-9] range */
2398
+ assert(off + len <= buflen && "Port number overflow");
2399
+ v = 0;
2400
+ for (p = buf + off; p < end; p++) {
2401
+ v *= 10;
2402
+ v += *p - '0';
2403
+
2404
+ /* Ports have a max value of 2^16 */
2405
+ if (v > 0xffff) {
2406
+ return 1;
2407
+ }
2038
2408
  }
2039
2409
 
2040
2410
  u->port = (uint16_t) v;
@@ -2056,3 +2426,15 @@ http_parser_pause(http_parser *parser, int paused) {
2056
2426
  assert(0 && "Attempting to pause parser in error state");
2057
2427
  }
2058
2428
  }
2429
+
2430
+ int
2431
+ http_body_is_final(const struct http_parser *parser) {
2432
+ return parser->state == s_message_done;
2433
+ }
2434
+
2435
+ unsigned long
2436
+ http_parser_version(void) {
2437
+ return HTTP_PARSER_VERSION_MAJOR * 0x10000 |
2438
+ HTTP_PARSER_VERSION_MINOR * 0x00100 |
2439
+ HTTP_PARSER_VERSION_PATCH * 0x00001;
2440
+ }