http_parser.rb 0.6.0.beta.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/linux.yml +23 -0
  3. data/.github/workflows/windows.yml +23 -0
  4. data/.gitignore +5 -4
  5. data/.gitmodules +2 -2
  6. data/README.md +2 -2
  7. data/Rakefile +4 -2
  8. data/ext/ruby_http_parser/extconf.rb +1 -1
  9. data/ext/ruby_http_parser/org/ruby_http_parser/RubyHttpParser.java +86 -52
  10. data/ext/ruby_http_parser/ruby_http_parser.c +53 -7
  11. data/ext/ruby_http_parser/vendor/http-parser/AUTHORS +37 -1
  12. data/ext/ruby_http_parser/vendor/http-parser/LICENSE-MIT +1 -5
  13. data/ext/ruby_http_parser/vendor/http-parser/Makefile +110 -8
  14. data/ext/ruby_http_parser/vendor/http-parser/README.md +105 -37
  15. data/ext/ruby_http_parser/vendor/http-parser/bench.c +128 -0
  16. data/ext/ruby_http_parser/vendor/http-parser/contrib/parsertrace.c +157 -0
  17. data/ext/ruby_http_parser/vendor/http-parser/contrib/url_parser.c +47 -0
  18. data/ext/ruby_http_parser/vendor/http-parser/http_parser.c +892 -510
  19. data/ext/ruby_http_parser/vendor/http-parser/http_parser.gyp +34 -2
  20. data/ext/ruby_http_parser/vendor/http-parser/http_parser.h +198 -77
  21. data/ext/ruby_http_parser/vendor/http-parser/test.c +1781 -201
  22. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.c +271 -154
  23. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.h +48 -61
  24. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPMethod.java +5 -3
  25. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/ParserSettings.java +37 -104
  26. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPParser.java +116 -101
  27. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/ParserSettings.java +9 -5
  28. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Message.java +1 -1
  29. data/ext/ruby_http_parser/vendor/http-parser-java/test.c +579 -153
  30. data/http_parser.rb.gemspec +14 -9
  31. data/spec/parser_spec.rb +177 -99
  32. data/spec/support/requests.json +2 -2
  33. data/spec/support/responses.json +20 -0
  34. data/tasks/spec.rake +1 -1
  35. metadata +131 -162
  36. data/Gemfile.lock +0 -39
  37. data/ext/ruby_http_parser/vendor/http-parser/CONTRIBUTIONS +0 -4
  38. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPHeadersCompleteCallback.java +0 -13
  39. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPHeadersCompleteCallback.java +0 -12
@@ -0,0 +1,157 @@
1
+ /* Copyright Joyent, Inc. and other Node contributors.
2
+ *
3
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ * of this software and associated documentation files (the "Software"), to
5
+ * deal in the Software without restriction, including without limitation the
6
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7
+ * sell copies of the Software, and to permit persons to whom the Software is
8
+ * furnished to do so, subject to the following conditions:
9
+ *
10
+ * The above copyright notice and this permission notice shall be included in
11
+ * all copies or substantial portions of the Software.
12
+ *
13
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19
+ * IN THE SOFTWARE.
20
+ */
21
+
22
+ /* Dump what the parser finds to stdout as it happen */
23
+
24
+ #include "http_parser.h"
25
+ #include <stdio.h>
26
+ #include <stdlib.h>
27
+ #include <string.h>
28
+
29
+ int on_message_begin(http_parser* _) {
30
+ (void)_;
31
+ printf("\n***MESSAGE BEGIN***\n\n");
32
+ return 0;
33
+ }
34
+
35
+ int on_headers_complete(http_parser* _) {
36
+ (void)_;
37
+ printf("\n***HEADERS COMPLETE***\n\n");
38
+ return 0;
39
+ }
40
+
41
+ int on_message_complete(http_parser* _) {
42
+ (void)_;
43
+ printf("\n***MESSAGE COMPLETE***\n\n");
44
+ return 0;
45
+ }
46
+
47
+ int on_url(http_parser* _, const char* at, size_t length) {
48
+ (void)_;
49
+ printf("Url: %.*s\n", (int)length, at);
50
+ return 0;
51
+ }
52
+
53
+ int on_header_field(http_parser* _, const char* at, size_t length) {
54
+ (void)_;
55
+ printf("Header field: %.*s\n", (int)length, at);
56
+ return 0;
57
+ }
58
+
59
+ int on_header_value(http_parser* _, const char* at, size_t length) {
60
+ (void)_;
61
+ printf("Header value: %.*s\n", (int)length, at);
62
+ return 0;
63
+ }
64
+
65
+ int on_body(http_parser* _, const char* at, size_t length) {
66
+ (void)_;
67
+ printf("Body: %.*s\n", (int)length, at);
68
+ return 0;
69
+ }
70
+
71
+ void usage(const char* name) {
72
+ fprintf(stderr,
73
+ "Usage: %s $type $filename\n"
74
+ " type: -x, where x is one of {r,b,q}\n"
75
+ " parses file as a Response, reQuest, or Both\n",
76
+ name);
77
+ exit(EXIT_FAILURE);
78
+ }
79
+
80
+ int main(int argc, char* argv[]) {
81
+ enum http_parser_type file_type;
82
+
83
+ if (argc != 3) {
84
+ usage(argv[0]);
85
+ }
86
+
87
+ char* type = argv[1];
88
+ if (type[0] != '-') {
89
+ usage(argv[0]);
90
+ }
91
+
92
+ switch (type[1]) {
93
+ /* in the case of "-", type[1] will be NUL */
94
+ case 'r':
95
+ file_type = HTTP_RESPONSE;
96
+ break;
97
+ case 'q':
98
+ file_type = HTTP_REQUEST;
99
+ break;
100
+ case 'b':
101
+ file_type = HTTP_BOTH;
102
+ break;
103
+ default:
104
+ usage(argv[0]);
105
+ }
106
+
107
+ char* filename = argv[2];
108
+ FILE* file = fopen(filename, "r");
109
+ if (file == NULL) {
110
+ perror("fopen");
111
+ goto fail;
112
+ }
113
+
114
+ fseek(file, 0, SEEK_END);
115
+ long file_length = ftell(file);
116
+ if (file_length == -1) {
117
+ perror("ftell");
118
+ goto fail;
119
+ }
120
+ fseek(file, 0, SEEK_SET);
121
+
122
+ char* data = malloc(file_length);
123
+ if (fread(data, 1, file_length, file) != (size_t)file_length) {
124
+ fprintf(stderr, "couldn't read entire file\n");
125
+ free(data);
126
+ goto fail;
127
+ }
128
+
129
+ http_parser_settings settings;
130
+ memset(&settings, 0, sizeof(settings));
131
+ settings.on_message_begin = on_message_begin;
132
+ settings.on_url = on_url;
133
+ settings.on_header_field = on_header_field;
134
+ settings.on_header_value = on_header_value;
135
+ settings.on_headers_complete = on_headers_complete;
136
+ settings.on_body = on_body;
137
+ settings.on_message_complete = on_message_complete;
138
+
139
+ http_parser parser;
140
+ http_parser_init(&parser, file_type);
141
+ size_t nparsed = http_parser_execute(&parser, &settings, data, file_length);
142
+ free(data);
143
+
144
+ if (nparsed != (size_t)file_length) {
145
+ fprintf(stderr,
146
+ "Error: %s (%s)\n",
147
+ http_errno_description(HTTP_PARSER_ERRNO(&parser)),
148
+ http_errno_name(HTTP_PARSER_ERRNO(&parser)));
149
+ goto fail;
150
+ }
151
+
152
+ return EXIT_SUCCESS;
153
+
154
+ fail:
155
+ fclose(file);
156
+ return EXIT_FAILURE;
157
+ }
@@ -0,0 +1,47 @@
1
+ #include "http_parser.h"
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+
5
+ void
6
+ dump_url (const char *url, const struct http_parser_url *u)
7
+ {
8
+ unsigned int i;
9
+
10
+ printf("\tfield_set: 0x%x, port: %u\n", u->field_set, u->port);
11
+ for (i = 0; i < UF_MAX; i++) {
12
+ if ((u->field_set & (1 << i)) == 0) {
13
+ printf("\tfield_data[%u]: unset\n", i);
14
+ continue;
15
+ }
16
+
17
+ printf("\tfield_data[%u]: off: %u, len: %u, part: %.*s\n",
18
+ i,
19
+ u->field_data[i].off,
20
+ u->field_data[i].len,
21
+ u->field_data[i].len,
22
+ url + u->field_data[i].off);
23
+ }
24
+ }
25
+
26
+ int main(int argc, char ** argv) {
27
+ struct http_parser_url u;
28
+ int len, connect, result;
29
+
30
+ if (argc != 3) {
31
+ printf("Syntax : %s connect|get url\n", argv[0]);
32
+ return 1;
33
+ }
34
+ len = strlen(argv[2]);
35
+ connect = strcmp("connect", argv[1]) == 0 ? 1 : 0;
36
+ printf("Parsing %s, connect %d\n", argv[2], connect);
37
+
38
+ http_parser_url_init(&u);
39
+ result = http_parser_parse_url(argv[2], len, connect, &u);
40
+ if (result != 0) {
41
+ printf("Parse error : %d\n", result);
42
+ return result;
43
+ }
44
+ printf("Parse ok, result : \n");
45
+ dump_url(argv[2], &u);
46
+ return 0;
47
+ }
@@ -1,7 +1,4 @@
1
- /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
2
- *
3
- * Additional changes are licensed under the same terms as NGINX and
4
- * copyright Joyent, Inc. and other Node contributors. All rights reserved.
1
+ /* Copyright Joyent, Inc. and other Node contributors.
5
2
  *
6
3
  * Permission is hereby granted, free of charge, to any person obtaining a copy
7
4
  * of this software and associated documentation files (the "Software"), to
@@ -25,7 +22,6 @@
25
22
  #include <assert.h>
26
23
  #include <stddef.h>
27
24
  #include <ctype.h>
28
- #include <stdlib.h>
29
25
  #include <string.h>
30
26
  #include <limits.h>
31
27
 
@@ -37,18 +33,42 @@
37
33
  # define MIN(a,b) ((a) < (b) ? (a) : (b))
38
34
  #endif
39
35
 
36
+ #ifndef ARRAY_SIZE
37
+ # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
38
+ #endif
39
+
40
+ #ifndef BIT_AT
41
+ # define BIT_AT(a, i) \
42
+ (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
43
+ (1 << ((unsigned int) (i) & 7))))
44
+ #endif
45
+
46
+ #ifndef ELEM_AT
47
+ # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
48
+ #endif
40
49
 
41
- #if HTTP_PARSER_DEBUG
42
- #define SET_ERRNO(e) \
43
- do { \
44
- parser->http_errno = (e); \
45
- parser->error_lineno = __LINE__; \
46
- } while (0)
47
- #else
48
50
  #define SET_ERRNO(e) \
49
51
  do { \
50
52
  parser->http_errno = (e); \
51
53
  } while(0)
54
+
55
+ #define CURRENT_STATE() p_state
56
+ #define UPDATE_STATE(V) p_state = (enum state) (V);
57
+ #define RETURN(V) \
58
+ do { \
59
+ parser->state = CURRENT_STATE(); \
60
+ return (V); \
61
+ } while (0);
62
+ #define REEXECUTE() \
63
+ goto reexecute; \
64
+
65
+
66
+ #ifdef __GNUC__
67
+ # define LIKELY(X) __builtin_expect(!!(X), 1)
68
+ # define UNLIKELY(X) __builtin_expect(!!(X), 0)
69
+ #else
70
+ # define LIKELY(X) (X)
71
+ # define UNLIKELY(X) (X)
52
72
  #endif
53
73
 
54
74
 
@@ -57,13 +77,15 @@ do { \
57
77
  do { \
58
78
  assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
59
79
  \
60
- if (settings->on_##FOR) { \
61
- if (0 != settings->on_##FOR(parser)) { \
80
+ if (LIKELY(settings->on_##FOR)) { \
81
+ parser->state = CURRENT_STATE(); \
82
+ if (UNLIKELY(0 != settings->on_##FOR(parser))) { \
62
83
  SET_ERRNO(HPE_CB_##FOR); \
63
84
  } \
85
+ UPDATE_STATE(parser->state); \
64
86
  \
65
87
  /* We either errored above or got paused; get out */ \
66
- if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
88
+ if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
67
89
  return (ER); \
68
90
  } \
69
91
  } \
@@ -81,20 +103,23 @@ do { \
81
103
  assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
82
104
  \
83
105
  if (FOR##_mark) { \
84
- if (settings->on_##FOR) { \
85
- if (0 != settings->on_##FOR(parser, FOR##_mark, (LEN))) { \
106
+ if (LIKELY(settings->on_##FOR)) { \
107
+ parser->state = CURRENT_STATE(); \
108
+ if (UNLIKELY(0 != \
109
+ settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \
86
110
  SET_ERRNO(HPE_CB_##FOR); \
87
111
  } \
112
+ UPDATE_STATE(parser->state); \
88
113
  \
89
114
  /* We either errored above or got paused; get out */ \
90
- if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
115
+ if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
91
116
  return (ER); \
92
117
  } \
93
118
  } \
94
119
  FOR##_mark = NULL; \
95
120
  } \
96
121
  } while (0)
97
-
122
+
98
123
  /* Run the data callback FOR and consume the current byte */
99
124
  #define CALLBACK_DATA(FOR) \
100
125
  CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
@@ -111,6 +136,26 @@ do { \
111
136
  } \
112
137
  } while (0)
113
138
 
139
+ /* Don't allow the total size of the HTTP headers (including the status
140
+ * line) to exceed HTTP_MAX_HEADER_SIZE. This check is here to protect
141
+ * embedders against denial-of-service attacks where the attacker feeds
142
+ * us a never-ending header that the embedder keeps buffering.
143
+ *
144
+ * This check is arguably the responsibility of embedders but we're doing
145
+ * it on the embedder's behalf because most won't bother and this way we
146
+ * make the web a little safer. HTTP_MAX_HEADER_SIZE is still far bigger
147
+ * than any reasonable request or response so this should never affect
148
+ * day-to-day operation.
149
+ */
150
+ #define COUNT_HEADER_SIZE(V) \
151
+ do { \
152
+ parser->nread += (V); \
153
+ if (UNLIKELY(parser->nread > (HTTP_MAX_HEADER_SIZE))) { \
154
+ SET_ERRNO(HPE_HEADER_OVERFLOW); \
155
+ goto error; \
156
+ } \
157
+ } while (0)
158
+
114
159
 
115
160
  #define PROXY_CONNECTION "proxy-connection"
116
161
  #define CONNECTION "connection"
@@ -123,31 +168,10 @@ do { \
123
168
 
124
169
 
125
170
  static const char *method_strings[] =
126
- { "DELETE"
127
- , "GET"
128
- , "HEAD"
129
- , "POST"
130
- , "PUT"
131
- , "CONNECT"
132
- , "OPTIONS"
133
- , "TRACE"
134
- , "COPY"
135
- , "LOCK"
136
- , "MKCOL"
137
- , "MOVE"
138
- , "PROPFIND"
139
- , "PROPPATCH"
140
- , "UNLOCK"
141
- , "REPORT"
142
- , "MKACTIVITY"
143
- , "CHECKOUT"
144
- , "MERGE"
145
- , "M-SEARCH"
146
- , "NOTIFY"
147
- , "SUBSCRIBE"
148
- , "UNSUBSCRIBE"
149
- , "PATCH"
150
- , "PURGE"
171
+ {
172
+ #define XX(num, name, string) #string,
173
+ HTTP_METHOD_MAP(XX)
174
+ #undef XX
151
175
  };
152
176
 
153
177
 
@@ -205,40 +229,48 @@ static const int8_t unhex[256] =
205
229
  };
206
230
 
207
231
 
208
- static const uint8_t normal_url_char[256] = {
232
+ #if HTTP_PARSER_STRICT
233
+ # define T(v) 0
234
+ #else
235
+ # define T(v) v
236
+ #endif
237
+
238
+
239
+ static const uint8_t normal_url_char[32] = {
209
240
  /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
210
- 0, 0, 0, 0, 0, 0, 0, 0,
241
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
211
242
  /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
212
- 0, 0, 0, 0, 0, 0, 0, 0,
243
+ 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
213
244
  /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
214
- 0, 0, 0, 0, 0, 0, 0, 0,
245
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
215
246
  /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
216
- 0, 0, 0, 0, 0, 0, 0, 0,
247
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
217
248
  /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
218
- 0, 1, 1, 0, 1, 1, 1, 1,
249
+ 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
219
250
  /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
220
- 1, 1, 1, 1, 1, 1, 1, 1,
251
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
221
252
  /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
222
- 1, 1, 1, 1, 1, 1, 1, 1,
253
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
223
254
  /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
224
- 1, 1, 1, 1, 1, 1, 1, 0,
255
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
225
256
  /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
226
- 1, 1, 1, 1, 1, 1, 1, 1,
257
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
227
258
  /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
228
- 1, 1, 1, 1, 1, 1, 1, 1,
259
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
229
260
  /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
230
- 1, 1, 1, 1, 1, 1, 1, 1,
261
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
231
262
  /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
232
- 1, 1, 1, 1, 1, 1, 1, 1,
263
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
233
264
  /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
234
- 1, 1, 1, 1, 1, 1, 1, 1,
265
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
235
266
  /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
236
- 1, 1, 1, 1, 1, 1, 1, 1,
267
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
237
268
  /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
238
- 1, 1, 1, 1, 1, 1, 1, 1,
269
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
239
270
  /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
240
- 1, 1, 1, 1, 1, 1, 1, 0, };
271
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
241
272
 
273
+ #undef T
242
274
 
243
275
  enum state
244
276
  { s_dead = 1 /* important that this is > 0 */
@@ -250,12 +282,13 @@ enum state
250
282
  , s_res_HT
251
283
  , s_res_HTT
252
284
  , s_res_HTTP
253
- , s_res_first_http_major
254
285
  , s_res_http_major
255
- , s_res_first_http_minor
286
+ , s_res_http_dot
256
287
  , s_res_http_minor
288
+ , s_res_http_end
257
289
  , s_res_first_status_code
258
290
  , s_res_status_code
291
+ , s_res_status_start
259
292
  , s_res_status
260
293
  , s_res_line_almost_done
261
294
 
@@ -266,13 +299,9 @@ enum state
266
299
  , s_req_schema
267
300
  , s_req_schema_slash
268
301
  , s_req_schema_slash_slash
269
- , s_req_host_start
270
- , s_req_host_v6_start
271
- , s_req_host_v6
272
- , s_req_host_v6_end
273
- , s_req_host
274
- , s_req_port_start
275
- , s_req_port
302
+ , s_req_server_start
303
+ , s_req_server
304
+ , s_req_server_with_at
276
305
  , s_req_path
277
306
  , s_req_query_string_start
278
307
  , s_req_query_string
@@ -283,14 +312,17 @@ enum state
283
312
  , s_req_http_HT
284
313
  , s_req_http_HTT
285
314
  , s_req_http_HTTP
286
- , s_req_first_http_major
287
315
  , s_req_http_major
288
- , s_req_first_http_minor
316
+ , s_req_http_dot
289
317
  , s_req_http_minor
318
+ , s_req_http_end
290
319
  , s_req_line_almost_done
291
320
 
292
321
  , s_header_field_start
293
322
  , s_header_field
323
+ , s_header_value_discard_ws
324
+ , s_header_value_discard_ws_almost_done
325
+ , s_header_value_discard_lws
294
326
  , s_header_value_start
295
327
  , s_header_value
296
328
  , s_header_value_lws
@@ -338,18 +370,39 @@ enum header_states
338
370
 
339
371
  , h_connection
340
372
  , h_content_length
373
+ , h_content_length_num
374
+ , h_content_length_ws
341
375
  , h_transfer_encoding
342
376
  , h_upgrade
343
377
 
344
378
  , h_matching_transfer_encoding_chunked
379
+ , h_matching_connection_token_start
345
380
  , h_matching_connection_keep_alive
346
381
  , h_matching_connection_close
382
+ , h_matching_connection_upgrade
383
+ , h_matching_connection_token
347
384
 
348
385
  , h_transfer_encoding_chunked
349
386
  , h_connection_keep_alive
350
387
  , h_connection_close
388
+ , h_connection_upgrade
351
389
  };
352
390
 
391
+ enum http_host_state
392
+ {
393
+ s_http_host_dead = 1
394
+ , s_http_userinfo_start
395
+ , s_http_userinfo
396
+ , s_http_host_start
397
+ , s_http_host_v6_start
398
+ , s_http_host
399
+ , s_http_host_v6
400
+ , s_http_host_v6_end
401
+ , s_http_host_v6_zone_start
402
+ , s_http_host_v6_zone
403
+ , s_http_host_port_start
404
+ , s_http_host_port
405
+ };
353
406
 
354
407
  /* Macros for character classes; depends on strict-mode */
355
408
  #define CR '\r'
@@ -359,19 +412,33 @@ enum header_states
359
412
  #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
360
413
  #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
361
414
  #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
415
+ #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
416
+ (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
417
+ (c) == ')')
418
+ #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
419
+ (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
420
+ (c) == '$' || (c) == ',')
421
+
422
+ #define STRICT_TOKEN(c) (tokens[(unsigned char)c])
362
423
 
363
424
  #if HTTP_PARSER_STRICT
364
425
  #define TOKEN(c) (tokens[(unsigned char)c])
365
- #define IS_URL_CHAR(c) (normal_url_char[(unsigned char) (c)])
426
+ #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
366
427
  #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
367
428
  #else
368
429
  #define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
369
430
  #define IS_URL_CHAR(c) \
370
- (normal_url_char[(unsigned char) (c)] || ((c) & 0x80))
431
+ (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
371
432
  #define IS_HOST_CHAR(c) \
372
433
  (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
373
434
  #endif
374
435
 
436
+ /**
437
+ * Verify that a char is a valid visible (printable) US-ASCII
438
+ * character or %x80-FF
439
+ **/
440
+ #define IS_HEADER_CHAR(ch) \
441
+ (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127))
375
442
 
376
443
  #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
377
444
 
@@ -401,7 +468,7 @@ static struct {
401
468
  };
402
469
  #undef HTTP_STRERROR_GEN
403
470
 
404
- int http_message_needs_eof(http_parser *parser);
471
+ int http_message_needs_eof(const http_parser *parser);
405
472
 
406
473
  /* Our URL parser.
407
474
  *
@@ -417,7 +484,15 @@ int http_message_needs_eof(http_parser *parser);
417
484
  static enum state
418
485
  parse_url_char(enum state s, const char ch)
419
486
  {
420
- assert(!isspace(ch));
487
+ if (ch == ' ' || ch == '\r' || ch == '\n') {
488
+ return s_dead;
489
+ }
490
+
491
+ #if HTTP_PARSER_STRICT
492
+ if (ch == '\t' || ch == '\f') {
493
+ return s_dead;
494
+ }
495
+ #endif
421
496
 
422
497
  switch (s) {
423
498
  case s_req_spaces_before_url:
@@ -455,67 +530,33 @@ parse_url_char(enum state s, const char ch)
455
530
 
456
531
  case s_req_schema_slash_slash:
457
532
  if (ch == '/') {
458
- return s_req_host_start;
533
+ return s_req_server_start;
459
534
  }
460
535
 
461
536
  break;
462
537
 
463
- case s_req_host_start:
464
- if (ch == '[') {
465
- return s_req_host_v6_start;
466
- }
467
-
468
- if (IS_HOST_CHAR(ch)) {
469
- return s_req_host;
470
- }
471
-
472
- break;
473
-
474
- case s_req_host:
475
- if (IS_HOST_CHAR(ch)) {
476
- return s_req_host;
477
- }
478
-
479
- /* FALLTHROUGH */
480
- case s_req_host_v6_end:
481
- switch (ch) {
482
- case ':':
483
- return s_req_port_start;
484
-
485
- case '/':
486
- return s_req_path;
487
-
488
- case '?':
489
- return s_req_query_string_start;
538
+ case s_req_server_with_at:
539
+ if (ch == '@') {
540
+ return s_dead;
490
541
  }
491
542
 
492
- break;
493
-
494
- case s_req_host_v6:
495
- if (ch == ']') {
496
- return s_req_host_v6_end;
543
+ /* FALLTHROUGH */
544
+ case s_req_server_start:
545
+ case s_req_server:
546
+ if (ch == '/') {
547
+ return s_req_path;
497
548
  }
498
549
 
499
- /* FALLTHROUGH */
500
- case s_req_host_v6_start:
501
- if (IS_HEX(ch) || ch == ':') {
502
- return s_req_host_v6;
550
+ if (ch == '?') {
551
+ return s_req_query_string_start;
503
552
  }
504
- break;
505
553
 
506
- case s_req_port:
507
- switch (ch) {
508
- case '/':
509
- return s_req_path;
510
-
511
- case '?':
512
- return s_req_query_string_start;
554
+ if (ch == '@') {
555
+ return s_req_server_with_at;
513
556
  }
514
557
 
515
- /* FALLTHROUGH */
516
- case s_req_port_start:
517
- if (IS_NUM(ch)) {
518
- return s_req_port;
558
+ if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
559
+ return s_req_server;
519
560
  }
520
561
 
521
562
  break;
@@ -600,6 +641,9 @@ size_t http_parser_execute (http_parser *parser,
600
641
  const char *header_value_mark = 0;
601
642
  const char *url_mark = 0;
602
643
  const char *body_mark = 0;
644
+ const char *status_mark = 0;
645
+ enum state p_state = (enum state) parser->state;
646
+ const unsigned int lenient = parser->lenient_http_headers;
603
647
 
604
648
  /* We're in an error state. Don't bother doing anything. */
605
649
  if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
@@ -607,7 +651,7 @@ size_t http_parser_execute (http_parser *parser,
607
651
  }
608
652
 
609
653
  if (len == 0) {
610
- switch (parser->state) {
654
+ switch (CURRENT_STATE()) {
611
655
  case s_body_identity_eof:
612
656
  /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
613
657
  * we got paused.
@@ -628,50 +672,45 @@ size_t http_parser_execute (http_parser *parser,
628
672
  }
629
673
 
630
674
 
631
- if (parser->state == s_header_field)
675
+ if (CURRENT_STATE() == s_header_field)
632
676
  header_field_mark = data;
633
- if (parser->state == s_header_value)
677
+ if (CURRENT_STATE() == s_header_value)
634
678
  header_value_mark = data;
635
- switch (parser->state) {
679
+ switch (CURRENT_STATE()) {
636
680
  case s_req_path:
637
681
  case s_req_schema:
638
682
  case s_req_schema_slash:
639
683
  case s_req_schema_slash_slash:
640
- case s_req_host_start:
641
- case s_req_host_v6_start:
642
- case s_req_host_v6:
643
- case s_req_host_v6_end:
644
- case s_req_host:
645
- case s_req_port_start:
646
- case s_req_port:
684
+ case s_req_server_start:
685
+ case s_req_server:
686
+ case s_req_server_with_at:
647
687
  case s_req_query_string_start:
648
688
  case s_req_query_string:
649
689
  case s_req_fragment_start:
650
690
  case s_req_fragment:
651
691
  url_mark = data;
652
692
  break;
693
+ case s_res_status:
694
+ status_mark = data;
695
+ break;
696
+ default:
697
+ break;
653
698
  }
654
699
 
655
700
  for (p=data; p != data + len; p++) {
656
701
  ch = *p;
657
702
 
658
- if (PARSING_HEADER(parser->state)) {
659
- ++parser->nread;
660
- /* Buffer overflow attack */
661
- if (parser->nread > HTTP_MAX_HEADER_SIZE) {
662
- SET_ERRNO(HPE_HEADER_OVERFLOW);
663
- goto error;
664
- }
665
- }
703
+ if (PARSING_HEADER(CURRENT_STATE()))
704
+ COUNT_HEADER_SIZE(1);
666
705
 
667
- reexecute_byte:
668
- switch (parser->state) {
706
+ reexecute:
707
+ switch (CURRENT_STATE()) {
669
708
 
670
709
  case s_dead:
671
710
  /* this state is used after a 'Connection: close' message
672
711
  * the parser will error out if it reads another message
673
712
  */
674
- if (ch == CR || ch == LF)
713
+ if (LIKELY(ch == CR || ch == LF))
675
714
  break;
676
715
 
677
716
  SET_ERRNO(HPE_CLOSED_CONNECTION);
@@ -685,13 +724,13 @@ size_t http_parser_execute (http_parser *parser,
685
724
  parser->content_length = ULLONG_MAX;
686
725
 
687
726
  if (ch == 'H') {
688
- parser->state = s_res_or_resp_H;
727
+ UPDATE_STATE(s_res_or_resp_H);
689
728
 
690
729
  CALLBACK_NOTIFY(message_begin);
691
730
  } else {
692
731
  parser->type = HTTP_REQUEST;
693
- parser->state = s_start_req;
694
- goto reexecute_byte;
732
+ UPDATE_STATE(s_start_req);
733
+ REEXECUTE();
695
734
  }
696
735
 
697
736
  break;
@@ -700,9 +739,9 @@ size_t http_parser_execute (http_parser *parser,
700
739
  case s_res_or_resp_H:
701
740
  if (ch == 'T') {
702
741
  parser->type = HTTP_RESPONSE;
703
- parser->state = s_res_HT;
742
+ UPDATE_STATE(s_res_HT);
704
743
  } else {
705
- if (ch != 'E') {
744
+ if (UNLIKELY(ch != 'E')) {
706
745
  SET_ERRNO(HPE_INVALID_CONSTANT);
707
746
  goto error;
708
747
  }
@@ -710,7 +749,7 @@ size_t http_parser_execute (http_parser *parser,
710
749
  parser->type = HTTP_REQUEST;
711
750
  parser->method = HTTP_HEAD;
712
751
  parser->index = 2;
713
- parser->state = s_req_method;
752
+ UPDATE_STATE(s_req_method);
714
753
  }
715
754
  break;
716
755
 
@@ -721,7 +760,7 @@ size_t http_parser_execute (http_parser *parser,
721
760
 
722
761
  switch (ch) {
723
762
  case 'H':
724
- parser->state = s_res_H;
763
+ UPDATE_STATE(s_res_H);
725
764
  break;
726
765
 
727
766
  case CR:
@@ -739,90 +778,63 @@ size_t http_parser_execute (http_parser *parser,
739
778
 
740
779
  case s_res_H:
741
780
  STRICT_CHECK(ch != 'T');
742
- parser->state = s_res_HT;
781
+ UPDATE_STATE(s_res_HT);
743
782
  break;
744
783
 
745
784
  case s_res_HT:
746
785
  STRICT_CHECK(ch != 'T');
747
- parser->state = s_res_HTT;
786
+ UPDATE_STATE(s_res_HTT);
748
787
  break;
749
788
 
750
789
  case s_res_HTT:
751
790
  STRICT_CHECK(ch != 'P');
752
- parser->state = s_res_HTTP;
791
+ UPDATE_STATE(s_res_HTTP);
753
792
  break;
754
793
 
755
794
  case s_res_HTTP:
756
795
  STRICT_CHECK(ch != '/');
757
- parser->state = s_res_first_http_major;
796
+ UPDATE_STATE(s_res_http_major);
758
797
  break;
759
798
 
760
- case s_res_first_http_major:
761
- if (ch < '0' || ch > '9') {
799
+ case s_res_http_major:
800
+ if (UNLIKELY(!IS_NUM(ch))) {
762
801
  SET_ERRNO(HPE_INVALID_VERSION);
763
802
  goto error;
764
803
  }
765
804
 
766
805
  parser->http_major = ch - '0';
767
- parser->state = s_res_http_major;
806
+ UPDATE_STATE(s_res_http_dot);
768
807
  break;
769
808
 
770
- /* major HTTP version or dot */
771
- case s_res_http_major:
809
+ case s_res_http_dot:
772
810
  {
773
- if (ch == '.') {
774
- parser->state = s_res_first_http_minor;
775
- break;
776
- }
777
-
778
- if (!IS_NUM(ch)) {
779
- SET_ERRNO(HPE_INVALID_VERSION);
780
- goto error;
781
- }
782
-
783
- parser->http_major *= 10;
784
- parser->http_major += ch - '0';
785
-
786
- if (parser->http_major > 999) {
811
+ if (UNLIKELY(ch != '.')) {
787
812
  SET_ERRNO(HPE_INVALID_VERSION);
788
813
  goto error;
789
814
  }
790
815
 
816
+ UPDATE_STATE(s_res_http_minor);
791
817
  break;
792
818
  }
793
819
 
794
- /* first digit of minor HTTP version */
795
- case s_res_first_http_minor:
796
- if (!IS_NUM(ch)) {
820
+ case s_res_http_minor:
821
+ if (UNLIKELY(!IS_NUM(ch))) {
797
822
  SET_ERRNO(HPE_INVALID_VERSION);
798
823
  goto error;
799
824
  }
800
825
 
801
826
  parser->http_minor = ch - '0';
802
- parser->state = s_res_http_minor;
827
+ UPDATE_STATE(s_res_http_end);
803
828
  break;
804
829
 
805
- /* minor HTTP version or end of request line */
806
- case s_res_http_minor:
830
+ case s_res_http_end:
807
831
  {
808
- if (ch == ' ') {
809
- parser->state = s_res_first_status_code;
810
- break;
811
- }
812
-
813
- if (!IS_NUM(ch)) {
814
- SET_ERRNO(HPE_INVALID_VERSION);
815
- goto error;
816
- }
817
-
818
- parser->http_minor *= 10;
819
- parser->http_minor += ch - '0';
820
-
821
- if (parser->http_minor > 999) {
832
+ if (UNLIKELY(ch != ' ')) {
822
833
  SET_ERRNO(HPE_INVALID_VERSION);
823
834
  goto error;
824
835
  }
825
836
 
837
+ UPDATE_STATE(s_res_first_status_code);
826
838
  break;
827
839
  }
828
840
 
@@ -837,7 +849,7 @@ size_t http_parser_execute (http_parser *parser,
837
849
  goto error;
838
850
  }
839
851
  parser->status_code = ch - '0';
840
- parser->state = s_res_status_code;
852
+ UPDATE_STATE(s_res_status_code);
841
853
  break;
842
854
  }
843
855
 
@@ -846,13 +858,12 @@ size_t http_parser_execute (http_parser *parser,
846
858
  if (!IS_NUM(ch)) {
847
859
  switch (ch) {
848
860
  case ' ':
849
- parser->state = s_res_status;
861
+ UPDATE_STATE(s_res_status_start);
850
862
  break;
851
863
  case CR:
852
- parser->state = s_res_line_almost_done;
853
- break;
854
864
  case LF:
855
- parser->state = s_header_field_start;
865
+ UPDATE_STATE(s_res_status_start);
866
+ REEXECUTE();
856
867
  break;
857
868
  default:
858
869
  SET_ERRNO(HPE_INVALID_STATUS);
@@ -864,7 +875,7 @@ size_t http_parser_execute (http_parser *parser,
864
875
  parser->status_code *= 10;
865
876
  parser->status_code += ch - '0';
866
877
 
867
- if (parser->status_code > 999) {
878
+ if (UNLIKELY(parser->status_code > 999)) {
868
879
  SET_ERRNO(HPE_INVALID_STATUS);
869
880
  goto error;
870
881
  }
@@ -872,23 +883,36 @@ size_t http_parser_execute (http_parser *parser,
872
883
  break;
873
884
  }
874
885
 
886
+ case s_res_status_start:
887
+ {
888
+ MARK(status);
889
+ UPDATE_STATE(s_res_status);
890
+ parser->index = 0;
891
+
892
+ if (ch == CR || ch == LF)
893
+ REEXECUTE();
894
+
895
+ break;
896
+ }
897
+
875
898
  case s_res_status:
876
- /* the human readable status. e.g. "NOT FOUND"
877
- * we are not humans so just ignore this */
878
899
  if (ch == CR) {
879
- parser->state = s_res_line_almost_done;
900
+ UPDATE_STATE(s_res_line_almost_done);
901
+ CALLBACK_DATA(status);
880
902
  break;
881
903
  }
882
904
 
883
905
  if (ch == LF) {
884
- parser->state = s_header_field_start;
906
+ UPDATE_STATE(s_header_field_start);
907
+ CALLBACK_DATA(status);
885
908
  break;
886
909
  }
910
+
887
911
  break;
888
912
 
889
913
  case s_res_line_almost_done:
890
914
  STRICT_CHECK(ch != LF);
891
- parser->state = s_header_field_start;
915
+ UPDATE_STATE(s_header_field_start);
892
916
  break;
893
917
 
894
918
  case s_start_req:
@@ -898,7 +922,7 @@ size_t http_parser_execute (http_parser *parser,
898
922
  parser->flags = 0;
899
923
  parser->content_length = ULLONG_MAX;
900
924
 
901
- if (!IS_ALPHA(ch)) {
925
+ if (UNLIKELY(!IS_ALPHA(ch))) {
902
926
  SET_ERRNO(HPE_INVALID_METHOD);
903
927
  goto error;
904
928
  }
@@ -906,26 +930,28 @@ size_t http_parser_execute (http_parser *parser,
906
930
  parser->method = (enum http_method) 0;
907
931
  parser->index = 1;
908
932
  switch (ch) {
933
+ case 'A': parser->method = HTTP_ACL; break;
934
+ case 'B': parser->method = HTTP_BIND; break;
909
935
  case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
910
936
  case 'D': parser->method = HTTP_DELETE; break;
911
937
  case 'G': parser->method = HTTP_GET; break;
912
938
  case 'H': parser->method = HTTP_HEAD; break;
913
- case 'L': parser->method = HTTP_LOCK; break;
914
- case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
939
+ case 'L': parser->method = HTTP_LOCK; /* or LINK */ break;
940
+ case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break;
915
941
  case 'N': parser->method = HTTP_NOTIFY; break;
916
942
  case 'O': parser->method = HTTP_OPTIONS; break;
917
943
  case 'P': parser->method = HTTP_POST;
918
944
  /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
919
945
  break;
920
- case 'R': parser->method = HTTP_REPORT; break;
921
- case 'S': parser->method = HTTP_SUBSCRIBE; break;
946
+ case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break;
947
+ case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH, SOURCE */ break;
922
948
  case 'T': parser->method = HTTP_TRACE; break;
923
- case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
949
+ case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break;
924
950
  default:
925
951
  SET_ERRNO(HPE_INVALID_METHOD);
926
952
  goto error;
927
953
  }
928
- parser->state = s_req_method;
954
+ UPDATE_STATE(s_req_method);
929
955
 
930
956
  CALLBACK_NOTIFY(message_begin);
931
957
 
@@ -935,54 +961,47 @@ size_t http_parser_execute (http_parser *parser,
935
961
  case s_req_method:
936
962
  {
937
963
  const char *matcher;
938
- if (ch == '\0') {
964
+ if (UNLIKELY(ch == '\0')) {
939
965
  SET_ERRNO(HPE_INVALID_METHOD);
940
966
  goto error;
941
967
  }
942
968
 
943
969
  matcher = method_strings[parser->method];
944
970
  if (ch == ' ' && matcher[parser->index] == '\0') {
945
- parser->state = s_req_spaces_before_url;
971
+ UPDATE_STATE(s_req_spaces_before_url);
946
972
  } else if (ch == matcher[parser->index]) {
947
973
  ; /* nada */
948
- } else if (parser->method == HTTP_CONNECT) {
949
- if (parser->index == 1 && ch == 'H') {
950
- parser->method = HTTP_CHECKOUT;
951
- } else if (parser->index == 2 && ch == 'P') {
952
- parser->method = HTTP_COPY;
953
- } else {
954
- goto error;
955
- }
956
- } else if (parser->method == HTTP_MKCOL) {
957
- if (parser->index == 1 && ch == 'O') {
958
- parser->method = HTTP_MOVE;
959
- } else if (parser->index == 1 && ch == 'E') {
960
- parser->method = HTTP_MERGE;
961
- } else if (parser->index == 1 && ch == '-') {
962
- parser->method = HTTP_MSEARCH;
963
- } else if (parser->index == 2 && ch == 'A') {
964
- parser->method = HTTP_MKACTIVITY;
965
- } else {
966
- goto error;
967
- }
968
- } else if (parser->index == 1 && parser->method == HTTP_POST) {
969
- if (ch == 'R') {
970
- parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
971
- } else if (ch == 'U') {
972
- parser->method = HTTP_PUT; /* or HTTP_PURGE */
973
- } else if (ch == 'A') {
974
- parser->method = HTTP_PATCH;
975
- } else {
976
- goto error;
977
- }
978
- } else if (parser->index == 2) {
979
- if (parser->method == HTTP_PUT) {
980
- if (ch == 'R') parser->method = HTTP_PURGE;
981
- } else if (parser->method == HTTP_UNLOCK) {
982
- if (ch == 'S') parser->method = HTTP_UNSUBSCRIBE;
974
+ } else if ((ch >= 'A' && ch <= 'Z') || ch == '-') {
975
+
976
+ switch (parser->method << 16 | parser->index << 8 | ch) {
977
+ #define XX(meth, pos, ch, new_meth) \
978
+ case (HTTP_##meth << 16 | pos << 8 | ch): \
979
+ parser->method = HTTP_##new_meth; break;
980
+
981
+ XX(POST, 1, 'U', PUT)
982
+ XX(POST, 1, 'A', PATCH)
983
+ XX(POST, 1, 'R', PROPFIND)
984
+ XX(PUT, 2, 'R', PURGE)
985
+ XX(CONNECT, 1, 'H', CHECKOUT)
986
+ XX(CONNECT, 2, 'P', COPY)
987
+ XX(MKCOL, 1, 'O', MOVE)
988
+ XX(MKCOL, 1, 'E', MERGE)
989
+ XX(MKCOL, 1, '-', MSEARCH)
990
+ XX(MKCOL, 2, 'A', MKACTIVITY)
991
+ XX(MKCOL, 3, 'A', MKCALENDAR)
992
+ XX(SUBSCRIBE, 1, 'E', SEARCH)
993
+ XX(SUBSCRIBE, 1, 'O', SOURCE)
994
+ XX(REPORT, 2, 'B', REBIND)
995
+ XX(PROPFIND, 4, 'P', PROPPATCH)
996
+ XX(LOCK, 1, 'I', LINK)
997
+ XX(UNLOCK, 2, 'S', UNSUBSCRIBE)
998
+ XX(UNLOCK, 2, 'B', UNBIND)
999
+ XX(UNLOCK, 3, 'I', UNLINK)
1000
+ #undef XX
1001
+ default:
1002
+ SET_ERRNO(HPE_INVALID_METHOD);
1003
+ goto error;
983
1004
  }
984
- } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
985
- parser->method = HTTP_PROPPATCH;
986
1005
  } else {
987
1006
  SET_ERRNO(HPE_INVALID_METHOD);
988
1007
  goto error;
@@ -998,11 +1017,11 @@ size_t http_parser_execute (http_parser *parser,
998
1017
 
999
1018
  MARK(url);
1000
1019
  if (parser->method == HTTP_CONNECT) {
1001
- parser->state = s_req_host_start;
1020
+ UPDATE_STATE(s_req_server_start);
1002
1021
  }
1003
1022
 
1004
- parser->state = parse_url_char((enum state)parser->state, ch);
1005
- if (parser->state == s_dead) {
1023
+ UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1024
+ if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1006
1025
  SET_ERRNO(HPE_INVALID_URL);
1007
1026
  goto error;
1008
1027
  }
@@ -1013,10 +1032,7 @@ size_t http_parser_execute (http_parser *parser,
1013
1032
  case s_req_schema:
1014
1033
  case s_req_schema_slash:
1015
1034
  case s_req_schema_slash_slash:
1016
- case s_req_host_start:
1017
- case s_req_host_v6_start:
1018
- case s_req_host_v6:
1019
- case s_req_port_start:
1035
+ case s_req_server_start:
1020
1036
  {
1021
1037
  switch (ch) {
1022
1038
  /* No whitespace allowed here */
@@ -1026,8 +1042,8 @@ size_t http_parser_execute (http_parser *parser,
1026
1042
  SET_ERRNO(HPE_INVALID_URL);
1027
1043
  goto error;
1028
1044
  default:
1029
- parser->state = parse_url_char((enum state)parser->state, ch);
1030
- if (parser->state == s_dead) {
1045
+ UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1046
+ if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1031
1047
  SET_ERRNO(HPE_INVALID_URL);
1032
1048
  goto error;
1033
1049
  }
@@ -1036,9 +1052,8 @@ size_t http_parser_execute (http_parser *parser,
1036
1052
  break;
1037
1053
  }
1038
1054
 
1039
- case s_req_host:
1040
- case s_req_host_v6_end:
1041
- case s_req_port:
1055
+ case s_req_server:
1056
+ case s_req_server_with_at:
1042
1057
  case s_req_path:
1043
1058
  case s_req_query_string_start:
1044
1059
  case s_req_query_string:
@@ -1047,21 +1062,21 @@ size_t http_parser_execute (http_parser *parser,
1047
1062
  {
1048
1063
  switch (ch) {
1049
1064
  case ' ':
1050
- parser->state = s_req_http_start;
1065
+ UPDATE_STATE(s_req_http_start);
1051
1066
  CALLBACK_DATA(url);
1052
1067
  break;
1053
1068
  case CR:
1054
1069
  case LF:
1055
1070
  parser->http_major = 0;
1056
1071
  parser->http_minor = 9;
1057
- parser->state = (ch == CR) ?
1072
+ UPDATE_STATE((ch == CR) ?
1058
1073
  s_req_line_almost_done :
1059
- s_header_field_start;
1074
+ s_header_field_start);
1060
1075
  CALLBACK_DATA(url);
1061
1076
  break;
1062
1077
  default:
1063
- parser->state = parse_url_char((enum state)parser->state, ch);
1064
- if (parser->state == s_dead) {
1078
+ UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1079
+ if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1065
1080
  SET_ERRNO(HPE_INVALID_URL);
1066
1081
  goto error;
1067
1082
  }
@@ -1072,7 +1087,7 @@ size_t http_parser_execute (http_parser *parser,
1072
1087
  case s_req_http_start:
1073
1088
  switch (ch) {
1074
1089
  case 'H':
1075
- parser->state = s_req_http_H;
1090
+ UPDATE_STATE(s_req_http_H);
1076
1091
  break;
1077
1092
  case ' ':
1078
1093
  break;
@@ -1084,130 +1099,101 @@ size_t http_parser_execute (http_parser *parser,
1084
1099
 
1085
1100
  case s_req_http_H:
1086
1101
  STRICT_CHECK(ch != 'T');
1087
- parser->state = s_req_http_HT;
1102
+ UPDATE_STATE(s_req_http_HT);
1088
1103
  break;
1089
1104
 
1090
1105
  case s_req_http_HT:
1091
1106
  STRICT_CHECK(ch != 'T');
1092
- parser->state = s_req_http_HTT;
1107
+ UPDATE_STATE(s_req_http_HTT);
1093
1108
  break;
1094
1109
 
1095
1110
  case s_req_http_HTT:
1096
1111
  STRICT_CHECK(ch != 'P');
1097
- parser->state = s_req_http_HTTP;
1112
+ UPDATE_STATE(s_req_http_HTTP);
1098
1113
  break;
1099
1114
 
1100
1115
  case s_req_http_HTTP:
1101
1116
  STRICT_CHECK(ch != '/');
1102
- parser->state = s_req_first_http_major;
1117
+ UPDATE_STATE(s_req_http_major);
1103
1118
  break;
1104
1119
 
1105
- /* first digit of major HTTP version */
1106
- case s_req_first_http_major:
1107
- if (ch < '1' || ch > '9') {
1120
+ case s_req_http_major:
1121
+ if (UNLIKELY(!IS_NUM(ch))) {
1108
1122
  SET_ERRNO(HPE_INVALID_VERSION);
1109
1123
  goto error;
1110
1124
  }
1111
1125
 
1112
1126
  parser->http_major = ch - '0';
1113
- parser->state = s_req_http_major;
1127
+ UPDATE_STATE(s_req_http_dot);
1114
1128
  break;
1115
1129
 
1116
- /* major HTTP version or dot */
1117
- case s_req_http_major:
1130
+ case s_req_http_dot:
1118
1131
  {
1119
- if (ch == '.') {
1120
- parser->state = s_req_first_http_minor;
1121
- break;
1122
- }
1123
-
1124
- if (!IS_NUM(ch)) {
1125
- SET_ERRNO(HPE_INVALID_VERSION);
1126
- goto error;
1127
- }
1128
-
1129
- parser->http_major *= 10;
1130
- parser->http_major += ch - '0';
1131
-
1132
- if (parser->http_major > 999) {
1132
+ if (UNLIKELY(ch != '.')) {
1133
1133
  SET_ERRNO(HPE_INVALID_VERSION);
1134
1134
  goto error;
1135
1135
  }
1136
1136
 
1137
+ UPDATE_STATE(s_req_http_minor);
1137
1138
  break;
1138
1139
  }
1139
1140
 
1140
- /* first digit of minor HTTP version */
1141
- case s_req_first_http_minor:
1142
- if (!IS_NUM(ch)) {
1141
+ case s_req_http_minor:
1142
+ if (UNLIKELY(!IS_NUM(ch))) {
1143
1143
  SET_ERRNO(HPE_INVALID_VERSION);
1144
1144
  goto error;
1145
1145
  }
1146
1146
 
1147
1147
  parser->http_minor = ch - '0';
1148
- parser->state = s_req_http_minor;
1148
+ UPDATE_STATE(s_req_http_end);
1149
1149
  break;
1150
1150
 
1151
- /* minor HTTP version or end of request line */
1152
- case s_req_http_minor:
1151
+ case s_req_http_end:
1153
1152
  {
1154
1153
  if (ch == CR) {
1155
- parser->state = s_req_line_almost_done;
1154
+ UPDATE_STATE(s_req_line_almost_done);
1156
1155
  break;
1157
1156
  }
1158
1157
 
1159
1158
  if (ch == LF) {
1160
- parser->state = s_header_field_start;
1159
+ UPDATE_STATE(s_header_field_start);
1161
1160
  break;
1162
1161
  }
1163
1162
 
1164
- /* XXX allow spaces after digit? */
1165
-
1166
- if (!IS_NUM(ch)) {
1167
- SET_ERRNO(HPE_INVALID_VERSION);
1168
- goto error;
1169
- }
1170
-
1171
- parser->http_minor *= 10;
1172
- parser->http_minor += ch - '0';
1173
-
1174
- if (parser->http_minor > 999) {
1175
- SET_ERRNO(HPE_INVALID_VERSION);
1176
- goto error;
1177
- }
1178
-
1163
+ SET_ERRNO(HPE_INVALID_VERSION);
1164
+ goto error;
1179
1165
  break;
1180
1166
  }
1181
1167
 
1182
1168
  /* end of request line */
1183
1169
  case s_req_line_almost_done:
1184
1170
  {
1185
- if (ch != LF) {
1171
+ if (UNLIKELY(ch != LF)) {
1186
1172
  SET_ERRNO(HPE_LF_EXPECTED);
1187
1173
  goto error;
1188
1174
  }
1189
1175
 
1190
- parser->state = s_header_field_start;
1176
+ UPDATE_STATE(s_header_field_start);
1191
1177
  break;
1192
1178
  }
1193
1179
 
1194
1180
  case s_header_field_start:
1195
1181
  {
1196
1182
  if (ch == CR) {
1197
- parser->state = s_headers_almost_done;
1183
+ UPDATE_STATE(s_headers_almost_done);
1198
1184
  break;
1199
1185
  }
1200
1186
 
1201
1187
  if (ch == LF) {
1202
1188
  /* they might be just sending \n instead of \r\n so this would be
1203
1189
  * the second \n to denote the end of headers*/
1204
- parser->state = s_headers_almost_done;
1205
- goto reexecute_byte;
1190
+ UPDATE_STATE(s_headers_almost_done);
1191
+ REEXECUTE();
1206
1192
  }
1207
1193
 
1208
1194
  c = TOKEN(ch);
1209
1195
 
1210
- if (!c) {
1196
+ if (UNLIKELY(!c)) {
1211
1197
  SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1212
1198
  goto error;
1213
1199
  }
@@ -1215,7 +1201,7 @@ size_t http_parser_execute (http_parser *parser,
1215
1201
  MARK(header_field);
1216
1202
 
1217
1203
  parser->index = 0;
1218
- parser->state = s_header_field;
1204
+ UPDATE_STATE(s_header_field);
1219
1205
 
1220
1206
  switch (c) {
1221
1207
  case 'c':
@@ -1243,9 +1229,14 @@ size_t http_parser_execute (http_parser *parser,
1243
1229
 
1244
1230
  case s_header_field:
1245
1231
  {
1246
- c = TOKEN(ch);
1232
+ const char* start = p;
1233
+ for (; p != data + len; p++) {
1234
+ ch = *p;
1235
+ c = TOKEN(ch);
1236
+
1237
+ if (!c)
1238
+ break;
1247
1239
 
1248
- if (c) {
1249
1240
  switch (parser->header_state) {
1250
1241
  case h_general:
1251
1242
  break;
@@ -1346,23 +1337,17 @@ size_t http_parser_execute (http_parser *parser,
1346
1337
  assert(0 && "Unknown header_state");
1347
1338
  break;
1348
1339
  }
1349
- break;
1350
1340
  }
1351
1341
 
1352
- if (ch == ':') {
1353
- parser->state = s_header_value_start;
1354
- CALLBACK_DATA(header_field);
1355
- break;
1356
- }
1342
+ COUNT_HEADER_SIZE(p - start);
1357
1343
 
1358
- if (ch == CR) {
1359
- parser->state = s_header_almost_done;
1360
- CALLBACK_DATA(header_field);
1344
+ if (p == data + len) {
1345
+ --p;
1361
1346
  break;
1362
1347
  }
1363
1348
 
1364
- if (ch == LF) {
1365
- parser->state = s_header_field_start;
1349
+ if (ch == ':') {
1350
+ UPDATE_STATE(s_header_value_discard_ws);
1366
1351
  CALLBACK_DATA(header_field);
1367
1352
  break;
1368
1353
  }
@@ -1371,28 +1356,28 @@ size_t http_parser_execute (http_parser *parser,
1371
1356
  goto error;
1372
1357
  }
1373
1358
 
1374
- case s_header_value_start:
1375
- {
1359
+ case s_header_value_discard_ws:
1376
1360
  if (ch == ' ' || ch == '\t') break;
1377
1361
 
1378
- MARK(header_value);
1379
-
1380
- parser->state = s_header_value;
1381
- parser->index = 0;
1382
-
1383
1362
  if (ch == CR) {
1384
- parser->header_state = h_general;
1385
- parser->state = s_header_almost_done;
1386
- CALLBACK_DATA(header_value);
1363
+ UPDATE_STATE(s_header_value_discard_ws_almost_done);
1387
1364
  break;
1388
1365
  }
1389
1366
 
1390
1367
  if (ch == LF) {
1391
- parser->state = s_header_field_start;
1392
- CALLBACK_DATA(header_value);
1368
+ UPDATE_STATE(s_header_value_discard_lws);
1393
1369
  break;
1394
1370
  }
1395
1371
 
1372
+ /* FALLTHROUGH */
1373
+
1374
+ case s_header_value_start:
1375
+ {
1376
+ MARK(header_value);
1377
+
1378
+ UPDATE_STATE(s_header_value);
1379
+ parser->index = 0;
1380
+
1396
1381
  c = LOWER(ch);
1397
1382
 
1398
1383
  switch (parser->header_state) {
@@ -1411,12 +1396,19 @@ size_t http_parser_execute (http_parser *parser,
1411
1396
  break;
1412
1397
 
1413
1398
  case h_content_length:
1414
- if (!IS_NUM(ch)) {
1399
+ if (UNLIKELY(!IS_NUM(ch))) {
1415
1400
  SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1416
1401
  goto error;
1417
1402
  }
1418
1403
 
1404
+ if (parser->flags & F_CONTENTLENGTH) {
1405
+ SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1406
+ goto error;
1407
+ }
1408
+
1409
+ parser->flags |= F_CONTENTLENGTH;
1419
1410
  parser->content_length = ch - '0';
1411
+ parser->header_state = h_content_length_num;
1420
1412
  break;
1421
1413
 
1422
1414
  case h_connection:
@@ -1426,11 +1418,17 @@ size_t http_parser_execute (http_parser *parser,
1426
1418
  /* looking for 'Connection: close' */
1427
1419
  } else if (c == 'c') {
1428
1420
  parser->header_state = h_matching_connection_close;
1421
+ } else if (c == 'u') {
1422
+ parser->header_state = h_matching_connection_upgrade;
1429
1423
  } else {
1430
- parser->header_state = h_general;
1424
+ parser->header_state = h_matching_connection_token;
1431
1425
  }
1432
1426
  break;
1433
1427
 
1428
+ /* Multi-value `Connection` header */
1429
+ case h_matching_connection_token_start:
1430
+ break;
1431
+
1434
1432
  default:
1435
1433
  parser->header_state = h_general;
1436
1434
  break;
@@ -1440,107 +1438,228 @@ size_t http_parser_execute (http_parser *parser,
1440
1438
 
1441
1439
  case s_header_value:
1442
1440
  {
1441
+ const char* start = p;
1442
+ enum header_states h_state = (enum header_states) parser->header_state;
1443
+ for (; p != data + len; p++) {
1444
+ ch = *p;
1445
+ if (ch == CR) {
1446
+ UPDATE_STATE(s_header_almost_done);
1447
+ parser->header_state = h_state;
1448
+ CALLBACK_DATA(header_value);
1449
+ break;
1450
+ }
1443
1451
 
1444
- if (ch == CR) {
1445
- parser->state = s_header_almost_done;
1446
- CALLBACK_DATA(header_value);
1447
- break;
1448
- }
1452
+ if (ch == LF) {
1453
+ UPDATE_STATE(s_header_almost_done);
1454
+ COUNT_HEADER_SIZE(p - start);
1455
+ parser->header_state = h_state;
1456
+ CALLBACK_DATA_NOADVANCE(header_value);
1457
+ REEXECUTE();
1458
+ }
1449
1459
 
1450
- if (ch == LF) {
1451
- parser->state = s_header_almost_done;
1452
- CALLBACK_DATA_NOADVANCE(header_value);
1453
- goto reexecute_byte;
1454
- }
1460
+ if (!lenient && !IS_HEADER_CHAR(ch)) {
1461
+ SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1462
+ goto error;
1463
+ }
1455
1464
 
1456
- c = LOWER(ch);
1465
+ c = LOWER(ch);
1457
1466
 
1458
- switch (parser->header_state) {
1459
- case h_general:
1460
- break;
1467
+ switch (h_state) {
1468
+ case h_general:
1469
+ {
1470
+ const char* p_cr;
1471
+ const char* p_lf;
1472
+ size_t limit = data + len - p;
1473
+
1474
+ limit = MIN(limit, HTTP_MAX_HEADER_SIZE);
1475
+
1476
+ p_cr = (const char*) memchr(p, CR, limit);
1477
+ p_lf = (const char*) memchr(p, LF, limit);
1478
+ if (p_cr != NULL) {
1479
+ if (p_lf != NULL && p_cr >= p_lf)
1480
+ p = p_lf;
1481
+ else
1482
+ p = p_cr;
1483
+ } else if (UNLIKELY(p_lf != NULL)) {
1484
+ p = p_lf;
1485
+ } else {
1486
+ p = data + len;
1487
+ }
1488
+ --p;
1461
1489
 
1462
- case h_connection:
1463
- case h_transfer_encoding:
1464
- assert(0 && "Shouldn't get here.");
1465
- break;
1490
+ break;
1491
+ }
1466
1492
 
1467
- case h_content_length:
1468
- {
1469
- uint64_t t;
1493
+ case h_connection:
1494
+ case h_transfer_encoding:
1495
+ assert(0 && "Shouldn't get here.");
1496
+ break;
1470
1497
 
1471
- if (ch == ' ') break;
1498
+ case h_content_length:
1499
+ if (ch == ' ') break;
1500
+ h_state = h_content_length_num;
1501
+ /* FALLTHROUGH */
1472
1502
 
1473
- if (!IS_NUM(ch)) {
1474
- SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1475
- goto error;
1476
- }
1503
+ case h_content_length_num:
1504
+ {
1505
+ uint64_t t;
1506
+
1507
+ if (ch == ' ') {
1508
+ h_state = h_content_length_ws;
1509
+ break;
1510
+ }
1511
+
1512
+ if (UNLIKELY(!IS_NUM(ch))) {
1513
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1514
+ parser->header_state = h_state;
1515
+ goto error;
1516
+ }
1517
+
1518
+ t = parser->content_length;
1519
+ t *= 10;
1520
+ t += ch - '0';
1477
1521
 
1478
- t = parser->content_length;
1479
- t *= 10;
1480
- t += ch - '0';
1522
+ /* Overflow? Test against a conservative limit for simplicity. */
1523
+ if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) {
1524
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1525
+ parser->header_state = h_state;
1526
+ goto error;
1527
+ }
1528
+
1529
+ parser->content_length = t;
1530
+ break;
1531
+ }
1481
1532
 
1482
- /* Overflow? */
1483
- if (t < parser->content_length || t == ULLONG_MAX) {
1533
+ case h_content_length_ws:
1534
+ if (ch == ' ') break;
1484
1535
  SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1536
+ parser->header_state = h_state;
1485
1537
  goto error;
1486
- }
1487
1538
 
1488
- parser->content_length = t;
1489
- break;
1490
- }
1539
+ /* Transfer-Encoding: chunked */
1540
+ case h_matching_transfer_encoding_chunked:
1541
+ parser->index++;
1542
+ if (parser->index > sizeof(CHUNKED)-1
1543
+ || c != CHUNKED[parser->index]) {
1544
+ h_state = h_general;
1545
+ } else if (parser->index == sizeof(CHUNKED)-2) {
1546
+ h_state = h_transfer_encoding_chunked;
1547
+ }
1548
+ break;
1491
1549
 
1492
- /* Transfer-Encoding: chunked */
1493
- case h_matching_transfer_encoding_chunked:
1494
- parser->index++;
1495
- if (parser->index > sizeof(CHUNKED)-1
1496
- || c != CHUNKED[parser->index]) {
1497
- parser->header_state = h_general;
1498
- } else if (parser->index == sizeof(CHUNKED)-2) {
1499
- parser->header_state = h_transfer_encoding_chunked;
1500
- }
1501
- break;
1550
+ case h_matching_connection_token_start:
1551
+ /* looking for 'Connection: keep-alive' */
1552
+ if (c == 'k') {
1553
+ h_state = h_matching_connection_keep_alive;
1554
+ /* looking for 'Connection: close' */
1555
+ } else if (c == 'c') {
1556
+ h_state = h_matching_connection_close;
1557
+ } else if (c == 'u') {
1558
+ h_state = h_matching_connection_upgrade;
1559
+ } else if (STRICT_TOKEN(c)) {
1560
+ h_state = h_matching_connection_token;
1561
+ } else if (c == ' ' || c == '\t') {
1562
+ /* Skip lws */
1563
+ } else {
1564
+ h_state = h_general;
1565
+ }
1566
+ break;
1502
1567
 
1503
- /* looking for 'Connection: keep-alive' */
1504
- case h_matching_connection_keep_alive:
1505
- parser->index++;
1506
- if (parser->index > sizeof(KEEP_ALIVE)-1
1507
- || c != KEEP_ALIVE[parser->index]) {
1508
- parser->header_state = h_general;
1509
- } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1510
- parser->header_state = h_connection_keep_alive;
1511
- }
1512
- break;
1568
+ /* looking for 'Connection: keep-alive' */
1569
+ case h_matching_connection_keep_alive:
1570
+ parser->index++;
1571
+ if (parser->index > sizeof(KEEP_ALIVE)-1
1572
+ || c != KEEP_ALIVE[parser->index]) {
1573
+ h_state = h_matching_connection_token;
1574
+ } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1575
+ h_state = h_connection_keep_alive;
1576
+ }
1577
+ break;
1513
1578
 
1514
- /* looking for 'Connection: close' */
1515
- case h_matching_connection_close:
1516
- parser->index++;
1517
- if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1518
- parser->header_state = h_general;
1519
- } else if (parser->index == sizeof(CLOSE)-2) {
1520
- parser->header_state = h_connection_close;
1521
- }
1522
- break;
1579
+ /* looking for 'Connection: close' */
1580
+ case h_matching_connection_close:
1581
+ parser->index++;
1582
+ if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1583
+ h_state = h_matching_connection_token;
1584
+ } else if (parser->index == sizeof(CLOSE)-2) {
1585
+ h_state = h_connection_close;
1586
+ }
1587
+ break;
1523
1588
 
1524
- case h_transfer_encoding_chunked:
1525
- case h_connection_keep_alive:
1526
- case h_connection_close:
1527
- if (ch != ' ') parser->header_state = h_general;
1528
- break;
1589
+ /* looking for 'Connection: upgrade' */
1590
+ case h_matching_connection_upgrade:
1591
+ parser->index++;
1592
+ if (parser->index > sizeof(UPGRADE) - 1 ||
1593
+ c != UPGRADE[parser->index]) {
1594
+ h_state = h_matching_connection_token;
1595
+ } else if (parser->index == sizeof(UPGRADE)-2) {
1596
+ h_state = h_connection_upgrade;
1597
+ }
1598
+ break;
1529
1599
 
1530
- default:
1531
- parser->state = s_header_value;
1532
- parser->header_state = h_general;
1533
- break;
1600
+ case h_matching_connection_token:
1601
+ if (ch == ',') {
1602
+ h_state = h_matching_connection_token_start;
1603
+ parser->index = 0;
1604
+ }
1605
+ break;
1606
+
1607
+ case h_transfer_encoding_chunked:
1608
+ if (ch != ' ') h_state = h_general;
1609
+ break;
1610
+
1611
+ case h_connection_keep_alive:
1612
+ case h_connection_close:
1613
+ case h_connection_upgrade:
1614
+ if (ch == ',') {
1615
+ if (h_state == h_connection_keep_alive) {
1616
+ parser->flags |= F_CONNECTION_KEEP_ALIVE;
1617
+ } else if (h_state == h_connection_close) {
1618
+ parser->flags |= F_CONNECTION_CLOSE;
1619
+ } else if (h_state == h_connection_upgrade) {
1620
+ parser->flags |= F_CONNECTION_UPGRADE;
1621
+ }
1622
+ h_state = h_matching_connection_token_start;
1623
+ parser->index = 0;
1624
+ } else if (ch != ' ') {
1625
+ h_state = h_matching_connection_token;
1626
+ }
1627
+ break;
1628
+
1629
+ default:
1630
+ UPDATE_STATE(s_header_value);
1631
+ h_state = h_general;
1632
+ break;
1633
+ }
1534
1634
  }
1635
+ parser->header_state = h_state;
1636
+
1637
+ COUNT_HEADER_SIZE(p - start);
1638
+
1639
+ if (p == data + len)
1640
+ --p;
1535
1641
  break;
1536
1642
  }
1537
1643
 
1538
1644
  case s_header_almost_done:
1539
1645
  {
1540
- STRICT_CHECK(ch != LF);
1646
+ if (UNLIKELY(ch != LF)) {
1647
+ SET_ERRNO(HPE_LF_EXPECTED);
1648
+ goto error;
1649
+ }
1650
+
1651
+ UPDATE_STATE(s_header_value_lws);
1652
+ break;
1653
+ }
1541
1654
 
1542
- parser->state = s_header_value_lws;
1655
+ case s_header_value_lws:
1656
+ {
1657
+ if (ch == ' ' || ch == '\t') {
1658
+ UPDATE_STATE(s_header_value_start);
1659
+ REEXECUTE();
1660
+ }
1543
1661
 
1662
+ /* finished the header */
1544
1663
  switch (parser->header_state) {
1545
1664
  case h_connection_keep_alive:
1546
1665
  parser->flags |= F_CONNECTION_KEEP_ALIVE;
@@ -1551,23 +1670,53 @@ size_t http_parser_execute (http_parser *parser,
1551
1670
  case h_transfer_encoding_chunked:
1552
1671
  parser->flags |= F_CHUNKED;
1553
1672
  break;
1673
+ case h_connection_upgrade:
1674
+ parser->flags |= F_CONNECTION_UPGRADE;
1675
+ break;
1554
1676
  default:
1555
1677
  break;
1556
1678
  }
1557
1679
 
1680
+ UPDATE_STATE(s_header_field_start);
1681
+ REEXECUTE();
1682
+ }
1683
+
1684
+ case s_header_value_discard_ws_almost_done:
1685
+ {
1686
+ STRICT_CHECK(ch != LF);
1687
+ UPDATE_STATE(s_header_value_discard_lws);
1558
1688
  break;
1559
1689
  }
1560
1690
 
1561
- case s_header_value_lws:
1691
+ case s_header_value_discard_lws:
1562
1692
  {
1563
- if (ch == ' ' || ch == '\t')
1564
- parser->state = s_header_value_start;
1565
- else
1566
- {
1567
- parser->state = s_header_field_start;
1568
- goto reexecute_byte;
1693
+ if (ch == ' ' || ch == '\t') {
1694
+ UPDATE_STATE(s_header_value_discard_ws);
1695
+ break;
1696
+ } else {
1697
+ switch (parser->header_state) {
1698
+ case h_connection_keep_alive:
1699
+ parser->flags |= F_CONNECTION_KEEP_ALIVE;
1700
+ break;
1701
+ case h_connection_close:
1702
+ parser->flags |= F_CONNECTION_CLOSE;
1703
+ break;
1704
+ case h_connection_upgrade:
1705
+ parser->flags |= F_CONNECTION_UPGRADE;
1706
+ break;
1707
+ case h_transfer_encoding_chunked:
1708
+ parser->flags |= F_CHUNKED;
1709
+ break;
1710
+ default:
1711
+ break;
1712
+ }
1713
+
1714
+ /* header value was empty */
1715
+ MARK(header_value);
1716
+ UPDATE_STATE(s_header_field_start);
1717
+ CALLBACK_DATA_NOADVANCE(header_value);
1718
+ REEXECUTE();
1569
1719
  }
1570
- break;
1571
1720
  }
1572
1721
 
1573
1722
  case s_headers_almost_done:
@@ -1576,16 +1725,33 @@ size_t http_parser_execute (http_parser *parser,
1576
1725
 
1577
1726
  if (parser->flags & F_TRAILING) {
1578
1727
  /* End of a chunked request */
1579
- parser->state = NEW_MESSAGE();
1580
- CALLBACK_NOTIFY(message_complete);
1581
- break;
1728
+ UPDATE_STATE(s_message_done);
1729
+ CALLBACK_NOTIFY_NOADVANCE(chunk_complete);
1730
+ REEXECUTE();
1731
+ }
1732
+
1733
+ /* Cannot use chunked encoding and a content-length header together
1734
+ per the HTTP specification. */
1735
+ if ((parser->flags & F_CHUNKED) &&
1736
+ (parser->flags & F_CONTENTLENGTH)) {
1737
+ SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1738
+ goto error;
1582
1739
  }
1583
1740
 
1584
- parser->state = s_headers_done;
1741
+ UPDATE_STATE(s_headers_done);
1585
1742
 
1586
1743
  /* Set this here so that on_headers_complete() callbacks can see it */
1587
- parser->upgrade =
1588
- (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT);
1744
+ if ((parser->flags & F_UPGRADE) &&
1745
+ (parser->flags & F_CONNECTION_UPGRADE)) {
1746
+ /* For responses, "Upgrade: foo" and "Connection: upgrade" are
1747
+ * mandatory only when it is a 101 Switching Protocols response,
1748
+ * otherwise it is purely informational, to announce support.
1749
+ */
1750
+ parser->upgrade =
1751
+ (parser->type == HTTP_REQUEST || parser->status_code == 101);
1752
+ } else {
1753
+ parser->upgrade = (parser->method == HTTP_CONNECT);
1754
+ }
1589
1755
 
1590
1756
  /* Here we call the headers_complete callback. This is somewhat
1591
1757
  * different than other callbacks because if the user returns 1, we
@@ -1601,59 +1767,66 @@ size_t http_parser_execute (http_parser *parser,
1601
1767
  case 0:
1602
1768
  break;
1603
1769
 
1770
+ case 2:
1771
+ parser->upgrade = 1;
1772
+
1773
+ /* FALLTHROUGH */
1604
1774
  case 1:
1605
1775
  parser->flags |= F_SKIPBODY;
1606
1776
  break;
1607
1777
 
1608
1778
  default:
1609
1779
  SET_ERRNO(HPE_CB_headers_complete);
1610
- return p - data; /* Error */
1780
+ RETURN(p - data); /* Error */
1611
1781
  }
1612
1782
  }
1613
1783
 
1614
1784
  if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1615
- return p - data;
1785
+ RETURN(p - data);
1616
1786
  }
1617
1787
 
1618
- goto reexecute_byte;
1788
+ REEXECUTE();
1619
1789
  }
1620
1790
 
1621
1791
  case s_headers_done:
1622
1792
  {
1793
+ int hasBody;
1623
1794
  STRICT_CHECK(ch != LF);
1624
1795
 
1625
1796
  parser->nread = 0;
1626
1797
 
1627
- /* Exit, the rest of the connect is in a different protocol. */
1628
- if (parser->upgrade) {
1629
- parser->state = NEW_MESSAGE();
1798
+ hasBody = parser->flags & F_CHUNKED ||
1799
+ (parser->content_length > 0 && parser->content_length != ULLONG_MAX);
1800
+ if (parser->upgrade && (parser->method == HTTP_CONNECT ||
1801
+ (parser->flags & F_SKIPBODY) || !hasBody)) {
1802
+ /* Exit, the rest of the message is in a different protocol. */
1803
+ UPDATE_STATE(NEW_MESSAGE());
1630
1804
  CALLBACK_NOTIFY(message_complete);
1631
- return (p - data) + 1;
1805
+ RETURN((p - data) + 1);
1632
1806
  }
1633
1807
 
1634
1808
  if (parser->flags & F_SKIPBODY) {
1635
- parser->state = NEW_MESSAGE();
1809
+ UPDATE_STATE(NEW_MESSAGE());
1636
1810
  CALLBACK_NOTIFY(message_complete);
1637
1811
  } else if (parser->flags & F_CHUNKED) {
1638
1812
  /* chunked encoding - ignore Content-Length header */
1639
- parser->state = s_chunk_size_start;
1813
+ UPDATE_STATE(s_chunk_size_start);
1640
1814
  } else {
1641
1815
  if (parser->content_length == 0) {
1642
1816
  /* Content-Length header given but zero: Content-Length: 0\r\n */
1643
- parser->state = NEW_MESSAGE();
1817
+ UPDATE_STATE(NEW_MESSAGE());
1644
1818
  CALLBACK_NOTIFY(message_complete);
1645
1819
  } else if (parser->content_length != ULLONG_MAX) {
1646
1820
  /* Content-Length header given and non-zero */
1647
- parser->state = s_body_identity;
1821
+ UPDATE_STATE(s_body_identity);
1648
1822
  } else {
1649
- if (parser->type == HTTP_REQUEST ||
1650
- !http_message_needs_eof(parser)) {
1823
+ if (!http_message_needs_eof(parser)) {
1651
1824
  /* Assume content-length 0 - read the next */
1652
- parser->state = NEW_MESSAGE();
1825
+ UPDATE_STATE(NEW_MESSAGE());
1653
1826
  CALLBACK_NOTIFY(message_complete);
1654
1827
  } else {
1655
1828
  /* Read body until EOF */
1656
- parser->state = s_body_identity_eof;
1829
+ UPDATE_STATE(s_body_identity_eof);
1657
1830
  }
1658
1831
  }
1659
1832
  }
@@ -1679,7 +1852,7 @@ size_t http_parser_execute (http_parser *parser,
1679
1852
  p += to_read - 1;
1680
1853
 
1681
1854
  if (parser->content_length == 0) {
1682
- parser->state = s_message_done;
1855
+ UPDATE_STATE(s_message_done);
1683
1856
 
1684
1857
  /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1685
1858
  *
@@ -1691,7 +1864,7 @@ size_t http_parser_execute (http_parser *parser,
1691
1864
  * important for applications, but let's keep it for now.
1692
1865
  */
1693
1866
  CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1694
- goto reexecute_byte;
1867
+ REEXECUTE();
1695
1868
  }
1696
1869
 
1697
1870
  break;
@@ -1705,8 +1878,12 @@ size_t http_parser_execute (http_parser *parser,
1705
1878
  break;
1706
1879
 
1707
1880
  case s_message_done:
1708
- parser->state = NEW_MESSAGE();
1881
+ UPDATE_STATE(NEW_MESSAGE());
1709
1882
  CALLBACK_NOTIFY(message_complete);
1883
+ if (parser->upgrade) {
1884
+ /* Exit, the rest of the message is in a different protocol. */
1885
+ RETURN((p - data) + 1);
1886
+ }
1710
1887
  break;
1711
1888
 
1712
1889
  case s_chunk_size_start:
@@ -1715,13 +1892,13 @@ size_t http_parser_execute (http_parser *parser,
1715
1892
  assert(parser->flags & F_CHUNKED);
1716
1893
 
1717
1894
  unhex_val = unhex[(unsigned char)ch];
1718
- if (unhex_val == -1) {
1895
+ if (UNLIKELY(unhex_val == -1)) {
1719
1896
  SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1720
1897
  goto error;
1721
1898
  }
1722
1899
 
1723
1900
  parser->content_length = unhex_val;
1724
- parser->state = s_chunk_size;
1901
+ UPDATE_STATE(s_chunk_size);
1725
1902
  break;
1726
1903
  }
1727
1904
 
@@ -1732,7 +1909,7 @@ size_t http_parser_execute (http_parser *parser,
1732
1909
  assert(parser->flags & F_CHUNKED);
1733
1910
 
1734
1911
  if (ch == CR) {
1735
- parser->state = s_chunk_size_almost_done;
1912
+ UPDATE_STATE(s_chunk_size_almost_done);
1736
1913
  break;
1737
1914
  }
1738
1915
 
@@ -1740,7 +1917,7 @@ size_t http_parser_execute (http_parser *parser,
1740
1917
 
1741
1918
  if (unhex_val == -1) {
1742
1919
  if (ch == ';' || ch == ' ') {
1743
- parser->state = s_chunk_parameters;
1920
+ UPDATE_STATE(s_chunk_parameters);
1744
1921
  break;
1745
1922
  }
1746
1923
 
@@ -1752,8 +1929,8 @@ size_t http_parser_execute (http_parser *parser,
1752
1929
  t *= 16;
1753
1930
  t += unhex_val;
1754
1931
 
1755
- /* Overflow? */
1756
- if (t < parser->content_length || t == ULLONG_MAX) {
1932
+ /* Overflow? Test against a conservative limit for simplicity. */
1933
+ if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) {
1757
1934
  SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1758
1935
  goto error;
1759
1936
  }
@@ -1767,7 +1944,7 @@ size_t http_parser_execute (http_parser *parser,
1767
1944
  assert(parser->flags & F_CHUNKED);
1768
1945
  /* just ignore this shit. TODO check for overflow */
1769
1946
  if (ch == CR) {
1770
- parser->state = s_chunk_size_almost_done;
1947
+ UPDATE_STATE(s_chunk_size_almost_done);
1771
1948
  break;
1772
1949
  }
1773
1950
  break;
@@ -1782,10 +1959,11 @@ size_t http_parser_execute (http_parser *parser,
1782
1959
 
1783
1960
  if (parser->content_length == 0) {
1784
1961
  parser->flags |= F_TRAILING;
1785
- parser->state = s_header_field_start;
1962
+ UPDATE_STATE(s_header_field_start);
1786
1963
  } else {
1787
- parser->state = s_chunk_data;
1964
+ UPDATE_STATE(s_chunk_data);
1788
1965
  }
1966
+ CALLBACK_NOTIFY(chunk_header);
1789
1967
  break;
1790
1968
  }
1791
1969
 
@@ -1806,7 +1984,7 @@ size_t http_parser_execute (http_parser *parser,
1806
1984
  p += to_read - 1;
1807
1985
 
1808
1986
  if (parser->content_length == 0) {
1809
- parser->state = s_chunk_data_almost_done;
1987
+ UPDATE_STATE(s_chunk_data_almost_done);
1810
1988
  }
1811
1989
 
1812
1990
  break;
@@ -1816,7 +1994,7 @@ size_t http_parser_execute (http_parser *parser,
1816
1994
  assert(parser->flags & F_CHUNKED);
1817
1995
  assert(parser->content_length == 0);
1818
1996
  STRICT_CHECK(ch != CR);
1819
- parser->state = s_chunk_data_done;
1997
+ UPDATE_STATE(s_chunk_data_done);
1820
1998
  CALLBACK_DATA(body);
1821
1999
  break;
1822
2000
 
@@ -1824,7 +2002,8 @@ size_t http_parser_execute (http_parser *parser,
1824
2002
  assert(parser->flags & F_CHUNKED);
1825
2003
  STRICT_CHECK(ch != LF);
1826
2004
  parser->nread = 0;
1827
- parser->state = s_chunk_size_start;
2005
+ UPDATE_STATE(s_chunk_size_start);
2006
+ CALLBACK_NOTIFY(chunk_complete);
1828
2007
  break;
1829
2008
 
1830
2009
  default:
@@ -1847,27 +2026,29 @@ size_t http_parser_execute (http_parser *parser,
1847
2026
  assert(((header_field_mark ? 1 : 0) +
1848
2027
  (header_value_mark ? 1 : 0) +
1849
2028
  (url_mark ? 1 : 0) +
1850
- (body_mark ? 1 : 0)) <= 1);
2029
+ (body_mark ? 1 : 0) +
2030
+ (status_mark ? 1 : 0)) <= 1);
1851
2031
 
1852
2032
  CALLBACK_DATA_NOADVANCE(header_field);
1853
2033
  CALLBACK_DATA_NOADVANCE(header_value);
1854
2034
  CALLBACK_DATA_NOADVANCE(url);
1855
2035
  CALLBACK_DATA_NOADVANCE(body);
2036
+ CALLBACK_DATA_NOADVANCE(status);
1856
2037
 
1857
- return len;
2038
+ RETURN(len);
1858
2039
 
1859
2040
  error:
1860
2041
  if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
1861
2042
  SET_ERRNO(HPE_UNKNOWN);
1862
2043
  }
1863
2044
 
1864
- return (p - data);
2045
+ RETURN(p - data);
1865
2046
  }
1866
2047
 
1867
2048
 
1868
2049
  /* Does the parser need to see an EOF to find the end of the message? */
1869
2050
  int
1870
- http_message_needs_eof (http_parser *parser)
2051
+ http_message_needs_eof (const http_parser *parser)
1871
2052
  {
1872
2053
  if (parser->type == HTTP_REQUEST) {
1873
2054
  return 0;
@@ -1890,7 +2071,7 @@ http_message_needs_eof (http_parser *parser)
1890
2071
 
1891
2072
 
1892
2073
  int
1893
- http_should_keep_alive (http_parser *parser)
2074
+ http_should_keep_alive (const http_parser *parser)
1894
2075
  {
1895
2076
  if (parser->http_major > 0 && parser->http_minor > 0) {
1896
2077
  /* HTTP/1.1 */
@@ -1908,9 +2089,10 @@ http_should_keep_alive (http_parser *parser)
1908
2089
  }
1909
2090
 
1910
2091
 
1911
- const char * http_method_str (enum http_method m)
2092
+ const char *
2093
+ http_method_str (enum http_method m)
1912
2094
  {
1913
- return method_strings[m];
2095
+ return ELEM_AT(method_strings, m, "<unknown>");
1914
2096
  }
1915
2097
 
1916
2098
 
@@ -1925,18 +2107,193 @@ http_parser_init (http_parser *parser, enum http_parser_type t)
1925
2107
  parser->http_errno = HPE_OK;
1926
2108
  }
1927
2109
 
2110
+ void
2111
+ http_parser_settings_init(http_parser_settings *settings)
2112
+ {
2113
+ memset(settings, 0, sizeof(*settings));
2114
+ }
2115
+
1928
2116
  const char *
1929
2117
  http_errno_name(enum http_errno err) {
1930
- assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
2118
+ assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
1931
2119
  return http_strerror_tab[err].name;
1932
2120
  }
1933
2121
 
1934
2122
  const char *
1935
2123
  http_errno_description(enum http_errno err) {
1936
- assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
2124
+ assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
1937
2125
  return http_strerror_tab[err].description;
1938
2126
  }
1939
2127
 
2128
+ static enum http_host_state
2129
+ http_parse_host_char(enum http_host_state s, const char ch) {
2130
+ switch(s) {
2131
+ case s_http_userinfo:
2132
+ case s_http_userinfo_start:
2133
+ if (ch == '@') {
2134
+ return s_http_host_start;
2135
+ }
2136
+
2137
+ if (IS_USERINFO_CHAR(ch)) {
2138
+ return s_http_userinfo;
2139
+ }
2140
+ break;
2141
+
2142
+ case s_http_host_start:
2143
+ if (ch == '[') {
2144
+ return s_http_host_v6_start;
2145
+ }
2146
+
2147
+ if (IS_HOST_CHAR(ch)) {
2148
+ return s_http_host;
2149
+ }
2150
+
2151
+ break;
2152
+
2153
+ case s_http_host:
2154
+ if (IS_HOST_CHAR(ch)) {
2155
+ return s_http_host;
2156
+ }
2157
+
2158
+ /* FALLTHROUGH */
2159
+ case s_http_host_v6_end:
2160
+ if (ch == ':') {
2161
+ return s_http_host_port_start;
2162
+ }
2163
+
2164
+ break;
2165
+
2166
+ case s_http_host_v6:
2167
+ if (ch == ']') {
2168
+ return s_http_host_v6_end;
2169
+ }
2170
+
2171
+ /* FALLTHROUGH */
2172
+ case s_http_host_v6_start:
2173
+ if (IS_HEX(ch) || ch == ':' || ch == '.') {
2174
+ return s_http_host_v6;
2175
+ }
2176
+
2177
+ if (s == s_http_host_v6 && ch == '%') {
2178
+ return s_http_host_v6_zone_start;
2179
+ }
2180
+ break;
2181
+
2182
+ case s_http_host_v6_zone:
2183
+ if (ch == ']') {
2184
+ return s_http_host_v6_end;
2185
+ }
2186
+
2187
+ /* FALLTHROUGH */
2188
+ case s_http_host_v6_zone_start:
2189
+ /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */
2190
+ if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' ||
2191
+ ch == '~') {
2192
+ return s_http_host_v6_zone;
2193
+ }
2194
+ break;
2195
+
2196
+ case s_http_host_port:
2197
+ case s_http_host_port_start:
2198
+ if (IS_NUM(ch)) {
2199
+ return s_http_host_port;
2200
+ }
2201
+
2202
+ break;
2203
+
2204
+ default:
2205
+ break;
2206
+ }
2207
+ return s_http_host_dead;
2208
+ }
2209
+
2210
+ static int
2211
+ http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2212
+ enum http_host_state s;
2213
+
2214
+ const char *p;
2215
+ size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
2216
+
2217
+ assert(u->field_set & (1 << UF_HOST));
2218
+
2219
+ u->field_data[UF_HOST].len = 0;
2220
+
2221
+ s = found_at ? s_http_userinfo_start : s_http_host_start;
2222
+
2223
+ for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2224
+ enum http_host_state new_s = http_parse_host_char(s, *p);
2225
+
2226
+ if (new_s == s_http_host_dead) {
2227
+ return 1;
2228
+ }
2229
+
2230
+ switch(new_s) {
2231
+ case s_http_host:
2232
+ if (s != s_http_host) {
2233
+ u->field_data[UF_HOST].off = p - buf;
2234
+ }
2235
+ u->field_data[UF_HOST].len++;
2236
+ break;
2237
+
2238
+ case s_http_host_v6:
2239
+ if (s != s_http_host_v6) {
2240
+ u->field_data[UF_HOST].off = p - buf;
2241
+ }
2242
+ u->field_data[UF_HOST].len++;
2243
+ break;
2244
+
2245
+ case s_http_host_v6_zone_start:
2246
+ case s_http_host_v6_zone:
2247
+ u->field_data[UF_HOST].len++;
2248
+ break;
2249
+
2250
+ case s_http_host_port:
2251
+ if (s != s_http_host_port) {
2252
+ u->field_data[UF_PORT].off = p - buf;
2253
+ u->field_data[UF_PORT].len = 0;
2254
+ u->field_set |= (1 << UF_PORT);
2255
+ }
2256
+ u->field_data[UF_PORT].len++;
2257
+ break;
2258
+
2259
+ case s_http_userinfo:
2260
+ if (s != s_http_userinfo) {
2261
+ u->field_data[UF_USERINFO].off = p - buf ;
2262
+ u->field_data[UF_USERINFO].len = 0;
2263
+ u->field_set |= (1 << UF_USERINFO);
2264
+ }
2265
+ u->field_data[UF_USERINFO].len++;
2266
+ break;
2267
+
2268
+ default:
2269
+ break;
2270
+ }
2271
+ s = new_s;
2272
+ }
2273
+
2274
+ /* Make sure we don't end somewhere unexpected */
2275
+ switch (s) {
2276
+ case s_http_host_start:
2277
+ case s_http_host_v6_start:
2278
+ case s_http_host_v6:
2279
+ case s_http_host_v6_zone_start:
2280
+ case s_http_host_v6_zone:
2281
+ case s_http_host_port_start:
2282
+ case s_http_userinfo:
2283
+ case s_http_userinfo_start:
2284
+ return 1;
2285
+ default:
2286
+ break;
2287
+ }
2288
+
2289
+ return 0;
2290
+ }
2291
+
2292
+ void
2293
+ http_parser_url_init(struct http_parser_url *u) {
2294
+ memset(u, 0, sizeof(*u));
2295
+ }
2296
+
1940
2297
  int
1941
2298
  http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
1942
2299
  struct http_parser_url *u)
@@ -1944,10 +2301,11 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
1944
2301
  enum state s;
1945
2302
  const char *p;
1946
2303
  enum http_parser_url_fields uf, old_uf;
2304
+ int found_at = 0;
1947
2305
 
1948
2306
  u->port = u->field_set = 0;
1949
- s = is_connect ? s_req_host_start : s_req_spaces_before_url;
1950
- uf = old_uf = UF_MAX;
2307
+ s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2308
+ old_uf = UF_MAX;
1951
2309
 
1952
2310
  for (p = buf; p < buf + buflen; p++) {
1953
2311
  s = parse_url_char(s, *p);
@@ -1960,10 +2318,7 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
1960
2318
  /* Skip delimeters */
1961
2319
  case s_req_schema_slash:
1962
2320
  case s_req_schema_slash_slash:
1963
- case s_req_host_start:
1964
- case s_req_host_v6_start:
1965
- case s_req_host_v6_end:
1966
- case s_req_port_start:
2321
+ case s_req_server_start:
1967
2322
  case s_req_query_string_start:
1968
2323
  case s_req_fragment_start:
1969
2324
  continue;
@@ -1972,13 +2327,12 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
1972
2327
  uf = UF_SCHEMA;
1973
2328
  break;
1974
2329
 
1975
- case s_req_host:
1976
- case s_req_host_v6:
1977
- uf = UF_HOST;
1978
- break;
2330
+ case s_req_server_with_at:
2331
+ found_at = 1;
1979
2332
 
1980
- case s_req_port:
1981
- uf = UF_PORT;
2333
+ /* FALLTHROUGH */
2334
+ case s_req_server:
2335
+ uf = UF_HOST;
1982
2336
  break;
1983
2337
 
1984
2338
  case s_req_path:
@@ -2011,30 +2365,46 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2011
2365
  old_uf = uf;
2012
2366
  }
2013
2367
 
2014
- /* CONNECT requests can only contain "hostname:port" */
2015
- if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2368
+ /* host must be present if there is a schema */
2369
+ /* parsing http:///toto will fail */
2370
+ if ((u->field_set & (1 << UF_SCHEMA)) &&
2371
+ (u->field_set & (1 << UF_HOST)) == 0) {
2016
2372
  return 1;
2017
2373
  }
2018
2374
 
2019
- /* Make sure we don't end somewhere unexpected */
2020
- switch (s) {
2021
- case s_req_host_v6_start:
2022
- case s_req_host_v6:
2023
- case s_req_host_v6_end:
2024
- case s_req_host:
2025
- case s_req_port_start:
2375
+ if (u->field_set & (1 << UF_HOST)) {
2376
+ if (http_parse_host(buf, u, found_at) != 0) {
2377
+ return 1;
2378
+ }
2379
+ }
2380
+
2381
+ /* CONNECT requests can only contain "hostname:port" */
2382
+ if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2026
2383
  return 1;
2027
- default:
2028
- break;
2029
2384
  }
2030
2385
 
2031
2386
  if (u->field_set & (1 << UF_PORT)) {
2032
- /* Don't bother with endp; we've already validated the string */
2033
- unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
2034
-
2035
- /* Ports have a max value of 2^16 */
2036
- if (v > 0xffff) {
2037
- return 1;
2387
+ uint16_t off;
2388
+ uint16_t len;
2389
+ const char* p;
2390
+ const char* end;
2391
+ unsigned long v;
2392
+
2393
+ off = u->field_data[UF_PORT].off;
2394
+ len = u->field_data[UF_PORT].len;
2395
+ end = buf + off + len;
2396
+
2397
+ /* NOTE: The characters are already validated and are in the [0-9] range */
2398
+ assert(off + len <= buflen && "Port number overflow");
2399
+ v = 0;
2400
+ for (p = buf + off; p < end; p++) {
2401
+ v *= 10;
2402
+ v += *p - '0';
2403
+
2404
+ /* Ports have a max value of 2^16 */
2405
+ if (v > 0xffff) {
2406
+ return 1;
2407
+ }
2038
2408
  }
2039
2409
 
2040
2410
  u->port = (uint16_t) v;
@@ -2056,3 +2426,15 @@ http_parser_pause(http_parser *parser, int paused) {
2056
2426
  assert(0 && "Attempting to pause parser in error state");
2057
2427
  }
2058
2428
  }
2429
+
2430
+ int
2431
+ http_body_is_final(const struct http_parser *parser) {
2432
+ return parser->state == s_message_done;
2433
+ }
2434
+
2435
+ unsigned long
2436
+ http_parser_version(void) {
2437
+ return HTTP_PARSER_VERSION_MAJOR * 0x10000 |
2438
+ HTTP_PARSER_VERSION_MINOR * 0x00100 |
2439
+ HTTP_PARSER_VERSION_PATCH * 0x00001;
2440
+ }