http_parser.rb 0.5.2 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/linux.yml +23 -0
  3. data/.github/workflows/windows.yml +23 -0
  4. data/.gitignore +5 -4
  5. data/.gitmodules +4 -4
  6. data/Gemfile +1 -1
  7. data/README.md +52 -47
  8. data/Rakefile +1 -0
  9. data/bench/standalone.rb +23 -0
  10. data/bench/thin.rb +1 -0
  11. data/ext/ruby_http_parser/extconf.rb +1 -1
  12. data/ext/ruby_http_parser/org/ruby_http_parser/RubyHttpParser.java +139 -83
  13. data/ext/ruby_http_parser/ruby_http_parser.c +40 -41
  14. data/ext/ruby_http_parser/vendor/http-parser-java/AUTHORS +32 -0
  15. data/ext/ruby_http_parser/vendor/http-parser-java/LICENSE-MIT +5 -1
  16. data/ext/ruby_http_parser/vendor/http-parser-java/README.md +133 -1
  17. data/ext/ruby_http_parser/vendor/http-parser-java/TODO +6 -0
  18. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.c +1202 -671
  19. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.gyp +79 -0
  20. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.h +172 -51
  21. data/ext/ruby_http_parser/vendor/http-parser-java/src/Http-parser.java.iml +22 -0
  22. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/FieldData.java +41 -0
  23. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPMethod.java +8 -3
  24. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPParserUrl.java +76 -0
  25. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/ParserSettings.java +35 -102
  26. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/Util.java +6 -6
  27. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPParser.java +775 -682
  28. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/ParserSettings.java +8 -4
  29. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Message.java +70 -20
  30. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/ParseUrl.java +51 -0
  31. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Requests.java +1 -1
  32. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Responses.java +1 -0
  33. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Test.java +2 -1
  34. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestHeaderOverflowError.java +1 -0
  35. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestLoaderNG.java +6 -17
  36. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestNoOverflowLongBody.java +1 -0
  37. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/UnitTest.java +1 -0
  38. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Upgrade.java +1 -0
  39. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Url.java +127 -0
  40. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Util.java +80 -9
  41. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/WrongContentLength.java +2 -1
  42. data/ext/ruby_http_parser/vendor/http-parser-java/test.c +1637 -280
  43. data/ext/ruby_http_parser/vendor/http-parser-java/tests.dumped +230 -71
  44. data/ext/ruby_http_parser/vendor/http-parser/AUTHORS +68 -0
  45. data/ext/ruby_http_parser/vendor/http-parser/LICENSE-MIT +1 -1
  46. data/ext/ruby_http_parser/vendor/http-parser/README.md +113 -38
  47. data/ext/ruby_http_parser/vendor/http-parser/bench.c +128 -0
  48. data/ext/ruby_http_parser/vendor/http-parser/contrib/parsertrace.c +157 -0
  49. data/ext/ruby_http_parser/vendor/http-parser/contrib/url_parser.c +47 -0
  50. data/ext/ruby_http_parser/vendor/http-parser/http_parser.c +1576 -780
  51. data/ext/ruby_http_parser/vendor/http-parser/http_parser.gyp +111 -0
  52. data/ext/ruby_http_parser/vendor/http-parser/http_parser.h +308 -58
  53. data/ext/ruby_http_parser/vendor/http-parser/test.c +2964 -460
  54. data/http_parser.rb.gemspec +14 -7
  55. data/spec/parser_spec.rb +196 -102
  56. data/spec/support/requests.json +236 -24
  57. data/spec/support/responses.json +202 -36
  58. data/tasks/compile.rake +2 -2
  59. data/tasks/fixtures.rake +8 -2
  60. data/tasks/spec.rake +1 -1
  61. metadata +141 -134
  62. data/Gemfile.lock +0 -32
  63. data/ext/ruby_http_parser/vendor/http-parser-java/compile +0 -1
  64. data/ext/ruby_http_parser/vendor/http-parser-java/test_permutations +0 -1
  65. data/ext/ruby_http_parser/vendor/http-parser-java/test_unit +0 -1
  66. data/ext/ruby_http_parser/vendor/http-parser-java/test_utf8 +0 -1
  67. data/ext/ruby_http_parser/vendor/http-parser/CONTRIBUTIONS +0 -4
@@ -0,0 +1,47 @@
1
+ #include "http_parser.h"
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+
5
+ void
6
+ dump_url (const char *url, const struct http_parser_url *u)
7
+ {
8
+ unsigned int i;
9
+
10
+ printf("\tfield_set: 0x%x, port: %u\n", u->field_set, u->port);
11
+ for (i = 0; i < UF_MAX; i++) {
12
+ if ((u->field_set & (1 << i)) == 0) {
13
+ printf("\tfield_data[%u]: unset\n", i);
14
+ continue;
15
+ }
16
+
17
+ printf("\tfield_data[%u]: off: %u, len: %u, part: %.*s\n",
18
+ i,
19
+ u->field_data[i].off,
20
+ u->field_data[i].len,
21
+ u->field_data[i].len,
22
+ url + u->field_data[i].off);
23
+ }
24
+ }
25
+
26
+ int main(int argc, char ** argv) {
27
+ struct http_parser_url u;
28
+ int len, connect, result;
29
+
30
+ if (argc != 3) {
31
+ printf("Syntax : %s connect|get url\n", argv[0]);
32
+ return 1;
33
+ }
34
+ len = strlen(argv[2]);
35
+ connect = strcmp("connect", argv[1]) == 0 ? 1 : 0;
36
+ printf("Parsing %s, connect %d\n", argv[2], connect);
37
+
38
+ http_parser_url_init(&u);
39
+ result = http_parser_parse_url(argv[2], len, connect, &u);
40
+ if (result != 0) {
41
+ printf("Parse error : %d\n", result);
42
+ return result;
43
+ }
44
+ printf("Parse ok, result : \n");
45
+ dump_url(argv[2], &u);
46
+ return 0;
47
+ }
@@ -1,4 +1,4 @@
1
- /* Copyright Joyent, Inc. and other Node contributors. All rights reserved.
1
+ /* Copyright Joyent, Inc. and other Node contributors.
2
2
  *
3
3
  * Permission is hereby granted, free of charge, to any person obtaining a copy
4
4
  * of this software and associated documentation files (the "Software"), to
@@ -18,48 +18,142 @@
18
18
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19
19
  * IN THE SOFTWARE.
20
20
  */
21
- #include <http_parser.h>
21
+ #include "http_parser.h"
22
22
  #include <assert.h>
23
23
  #include <stddef.h>
24
+ #include <ctype.h>
25
+ #include <string.h>
26
+ #include <limits.h>
24
27
 
28
+ #ifndef ULLONG_MAX
29
+ # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
30
+ #endif
25
31
 
26
32
  #ifndef MIN
27
33
  # define MIN(a,b) ((a) < (b) ? (a) : (b))
28
34
  #endif
29
35
 
36
+ #ifndef ARRAY_SIZE
37
+ # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
38
+ #endif
39
+
40
+ #ifndef BIT_AT
41
+ # define BIT_AT(a, i) \
42
+ (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
43
+ (1 << ((unsigned int) (i) & 7))))
44
+ #endif
45
+
46
+ #ifndef ELEM_AT
47
+ # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
48
+ #endif
30
49
 
31
- #define CALLBACK2(FOR) \
50
+ #define SET_ERRNO(e) \
32
51
  do { \
33
- if (settings->on_##FOR) { \
34
- if (0 != settings->on_##FOR(parser)) return (p - data); \
35
- } \
36
- } while (0)
52
+ parser->http_errno = (e); \
53
+ } while(0)
37
54
 
55
+ #define CURRENT_STATE() p_state
56
+ #define UPDATE_STATE(V) p_state = (enum state) (V);
57
+ #define RETURN(V) \
58
+ do { \
59
+ parser->state = CURRENT_STATE(); \
60
+ return (V); \
61
+ } while (0);
62
+ #define REEXECUTE() \
63
+ goto reexecute; \
38
64
 
39
- #define MARK(FOR) \
65
+
66
+ #ifdef __GNUC__
67
+ # define LIKELY(X) __builtin_expect(!!(X), 1)
68
+ # define UNLIKELY(X) __builtin_expect(!!(X), 0)
69
+ #else
70
+ # define LIKELY(X) (X)
71
+ # define UNLIKELY(X) (X)
72
+ #endif
73
+
74
+
75
+ /* Run the notify callback FOR, returning ER if it fails */
76
+ #define CALLBACK_NOTIFY_(FOR, ER) \
40
77
  do { \
41
- FOR##_mark = p; \
78
+ assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
79
+ \
80
+ if (LIKELY(settings->on_##FOR)) { \
81
+ parser->state = CURRENT_STATE(); \
82
+ if (UNLIKELY(0 != settings->on_##FOR(parser))) { \
83
+ SET_ERRNO(HPE_CB_##FOR); \
84
+ } \
85
+ UPDATE_STATE(parser->state); \
86
+ \
87
+ /* We either errored above or got paused; get out */ \
88
+ if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
89
+ return (ER); \
90
+ } \
91
+ } \
42
92
  } while (0)
43
93
 
44
- #define CALLBACK_NOCLEAR(FOR) \
94
+ /* Run the notify callback FOR and consume the current byte */
95
+ #define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1)
96
+
97
+ /* Run the notify callback FOR and don't consume the current byte */
98
+ #define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data)
99
+
100
+ /* Run data callback FOR with LEN bytes, returning ER if it fails */
101
+ #define CALLBACK_DATA_(FOR, LEN, ER) \
45
102
  do { \
103
+ assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
104
+ \
46
105
  if (FOR##_mark) { \
47
- if (settings->on_##FOR) { \
48
- if (0 != settings->on_##FOR(parser, \
49
- FOR##_mark, \
50
- p - FOR##_mark)) \
51
- { \
52
- return (p - data); \
106
+ if (LIKELY(settings->on_##FOR)) { \
107
+ parser->state = CURRENT_STATE(); \
108
+ if (UNLIKELY(0 != \
109
+ settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \
110
+ SET_ERRNO(HPE_CB_##FOR); \
111
+ } \
112
+ UPDATE_STATE(parser->state); \
113
+ \
114
+ /* We either errored above or got paused; get out */ \
115
+ if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
116
+ return (ER); \
53
117
  } \
54
118
  } \
119
+ FOR##_mark = NULL; \
55
120
  } \
56
121
  } while (0)
57
122
 
123
+ /* Run the data callback FOR and consume the current byte */
124
+ #define CALLBACK_DATA(FOR) \
125
+ CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
126
+
127
+ /* Run the data callback FOR and don't consume the current byte */
128
+ #define CALLBACK_DATA_NOADVANCE(FOR) \
129
+ CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
130
+
131
+ /* Set the mark FOR; non-destructive if mark is already set */
132
+ #define MARK(FOR) \
133
+ do { \
134
+ if (!FOR##_mark) { \
135
+ FOR##_mark = p; \
136
+ } \
137
+ } while (0)
58
138
 
59
- #define CALLBACK(FOR) \
139
+ /* Don't allow the total size of the HTTP headers (including the status
140
+ * line) to exceed HTTP_MAX_HEADER_SIZE. This check is here to protect
141
+ * embedders against denial-of-service attacks where the attacker feeds
142
+ * us a never-ending header that the embedder keeps buffering.
143
+ *
144
+ * This check is arguably the responsibility of embedders but we're doing
145
+ * it on the embedder's behalf because most won't bother and this way we
146
+ * make the web a little safer. HTTP_MAX_HEADER_SIZE is still far bigger
147
+ * than any reasonable request or response so this should never affect
148
+ * day-to-day operation.
149
+ */
150
+ #define COUNT_HEADER_SIZE(V) \
60
151
  do { \
61
- CALLBACK_NOCLEAR(FOR); \
62
- FOR##_mark = NULL; \
152
+ parser->nread += (V); \
153
+ if (UNLIKELY(parser->nread > (HTTP_MAX_HEADER_SIZE))) { \
154
+ SET_ERRNO(HPE_HEADER_OVERFLOW); \
155
+ goto error; \
156
+ } \
63
157
  } while (0)
64
158
 
65
159
 
@@ -74,29 +168,10 @@ do { \
74
168
 
75
169
 
76
170
  static const char *method_strings[] =
77
- { "DELETE"
78
- , "GET"
79
- , "HEAD"
80
- , "POST"
81
- , "PUT"
82
- , "CONNECT"
83
- , "OPTIONS"
84
- , "TRACE"
85
- , "COPY"
86
- , "LOCK"
87
- , "MKCOL"
88
- , "MOVE"
89
- , "PROPFIND"
90
- , "PROPPATCH"
91
- , "UNLOCK"
92
- , "REPORT"
93
- , "MKACTIVITY"
94
- , "CHECKOUT"
95
- , "MERGE"
96
- , "M-SEARCH"
97
- , "NOTIFY"
98
- , "SUBSCRIBE"
99
- , "UNSUBSCRIBE"
171
+ {
172
+ #define XX(num, name, string) #string,
173
+ HTTP_METHOD_MAP(XX)
174
+ #undef XX
100
175
  };
101
176
 
102
177
 
@@ -117,9 +192,9 @@ static const char tokens[256] = {
117
192
  /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
118
193
  0, 0, 0, 0, 0, 0, 0, 0,
119
194
  /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
120
- ' ', '!', '"', '#', '$', '%', '&', '\'',
195
+ 0, '!', 0, '#', '$', '%', '&', '\'',
121
196
  /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
122
- 0, 0, '*', '+', 0, '-', '.', '/',
197
+ 0, 0, '*', '+', 0, '-', '.', 0,
123
198
  /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
124
199
  '0', '1', '2', '3', '4', '5', '6', '7',
125
200
  /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
@@ -139,7 +214,7 @@ static const char tokens[256] = {
139
214
  /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
140
215
  'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
141
216
  /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
142
- 'x', 'y', 'z', 0, '|', '}', '~', 0 };
217
+ 'x', 'y', 'z', 0, '|', 0, '~', 0 };
143
218
 
144
219
 
145
220
  static const int8_t unhex[256] =
@@ -154,61 +229,48 @@ static const int8_t unhex[256] =
154
229
  };
155
230
 
156
231
 
157
- static const uint8_t normal_url_char[256] = {
232
+ #if HTTP_PARSER_STRICT
233
+ # define T(v) 0
234
+ #else
235
+ # define T(v) v
236
+ #endif
237
+
238
+
239
+ static const uint8_t normal_url_char[32] = {
158
240
  /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
159
- 0, 0, 0, 0, 0, 0, 0, 0,
241
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
160
242
  /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
161
- 0, 0, 0, 0, 0, 0, 0, 0,
243
+ 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
162
244
  /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
163
- 0, 0, 0, 0, 0, 0, 0, 0,
245
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
164
246
  /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
165
- 0, 0, 0, 0, 0, 0, 0, 0,
247
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
166
248
  /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
167
- 0, 1, 1, 0, 1, 1, 1, 1,
249
+ 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
168
250
  /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
169
- 1, 1, 1, 1, 1, 1, 1, 1,
251
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
170
252
  /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
171
- 1, 1, 1, 1, 1, 1, 1, 1,
253
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
172
254
  /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
173
- 1, 1, 1, 1, 1, 1, 1, 0,
255
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
174
256
  /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
175
- 1, 1, 1, 1, 1, 1, 1, 1,
257
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
176
258
  /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
177
- 1, 1, 1, 1, 1, 1, 1, 1,
259
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
178
260
  /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
179
- 1, 1, 1, 1, 1, 1, 1, 1,
261
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
180
262
  /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
181
- 1, 1, 1, 1, 1, 1, 1, 1,
263
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
182
264
  /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
183
- 1, 1, 1, 1, 1, 1, 1, 1,
265
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
184
266
  /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
185
- 1, 1, 1, 1, 1, 1, 1, 1,
267
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
186
268
  /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
187
- 1, 1, 1, 1, 1, 1, 1, 1,
269
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
188
270
  /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
189
- 1, 1, 1, 1, 1, 1, 1, 0,
190
-
191
- /* Remainder of non-ASCII range are accepted as-is to support implicitly UTF-8
192
- encoded paths. This is out of spec, but clients generate this and most other
193
- HTTP servers support it. We should, too. */
194
-
195
- 1, 1, 1, 1, 1, 1, 1, 1,
196
- 1, 1, 1, 1, 1, 1, 1, 1,
197
- 1, 1, 1, 1, 1, 1, 1, 1,
198
- 1, 1, 1, 1, 1, 1, 1, 1,
199
- 1, 1, 1, 1, 1, 1, 1, 1,
200
- 1, 1, 1, 1, 1, 1, 1, 1,
201
- 1, 1, 1, 1, 1, 1, 1, 1,
202
- 1, 1, 1, 1, 1, 1, 1, 1,
203
- 1, 1, 1, 1, 1, 1, 1, 1,
204
- 1, 1, 1, 1, 1, 1, 1, 1,
205
- 1, 1, 1, 1, 1, 1, 1, 1,
206
- 1, 1, 1, 1, 1, 1, 1, 1,
207
- 1, 1, 1, 1, 1, 1, 1, 1,
208
- 1, 1, 1, 1, 1, 1, 1, 1,
209
- 1, 1, 1, 1, 1, 1, 1, 1,
210
- 1, 1, 1, 1, 1, 1, 1, 1 };
271
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
211
272
 
273
+ #undef T
212
274
 
213
275
  enum state
214
276
  { s_dead = 1 /* important that this is > 0 */
@@ -220,12 +282,13 @@ enum state
220
282
  , s_res_HT
221
283
  , s_res_HTT
222
284
  , s_res_HTTP
223
- , s_res_first_http_major
224
285
  , s_res_http_major
225
- , s_res_first_http_minor
286
+ , s_res_http_dot
226
287
  , s_res_http_minor
288
+ , s_res_http_end
227
289
  , s_res_first_status_code
228
290
  , s_res_status_code
291
+ , s_res_status_start
229
292
  , s_res_status
230
293
  , s_res_line_almost_done
231
294
 
@@ -236,8 +299,9 @@ enum state
236
299
  , s_req_schema
237
300
  , s_req_schema_slash
238
301
  , s_req_schema_slash_slash
239
- , s_req_host
240
- , s_req_port
302
+ , s_req_server_start
303
+ , s_req_server
304
+ , s_req_server_with_at
241
305
  , s_req_path
242
306
  , s_req_query_string_start
243
307
  , s_req_query_string
@@ -248,16 +312,20 @@ enum state
248
312
  , s_req_http_HT
249
313
  , s_req_http_HTT
250
314
  , s_req_http_HTTP
251
- , s_req_first_http_major
252
315
  , s_req_http_major
253
- , s_req_first_http_minor
316
+ , s_req_http_dot
254
317
  , s_req_http_minor
318
+ , s_req_http_end
255
319
  , s_req_line_almost_done
256
320
 
257
321
  , s_header_field_start
258
322
  , s_header_field
323
+ , s_header_value_discard_ws
324
+ , s_header_value_discard_ws_almost_done
325
+ , s_header_value_discard_lws
259
326
  , s_header_value_start
260
327
  , s_header_value
328
+ , s_header_value_lws
261
329
 
262
330
  , s_header_almost_done
263
331
 
@@ -265,9 +333,11 @@ enum state
265
333
  , s_chunk_size
266
334
  , s_chunk_parameters
267
335
  , s_chunk_size_almost_done
268
-
336
+
269
337
  , s_headers_almost_done
270
- /* Important: 's_headers_almost_done' must be the last 'header' state. All
338
+ , s_headers_done
339
+
340
+ /* Important: 's_headers_done' must be the last 'header' state. All
271
341
  * states beyond this must be 'body' states. It is used for overflow
272
342
  * checking. See the PARSING_HEADER() macro.
273
343
  */
@@ -278,10 +348,12 @@ enum state
278
348
 
279
349
  , s_body_identity
280
350
  , s_body_identity_eof
351
+
352
+ , s_message_done
281
353
  };
282
354
 
283
355
 
284
- #define PARSING_HEADER(state) (state <= s_headers_almost_done)
356
+ #define PARSING_HEADER(state) (state <= s_headers_done)
285
357
 
286
358
 
287
359
  enum header_states
@@ -298,40 +370,87 @@ enum header_states
298
370
 
299
371
  , h_connection
300
372
  , h_content_length
373
+ , h_content_length_num
374
+ , h_content_length_ws
301
375
  , h_transfer_encoding
302
376
  , h_upgrade
303
377
 
304
378
  , h_matching_transfer_encoding_chunked
379
+ , h_matching_connection_token_start
305
380
  , h_matching_connection_keep_alive
306
381
  , h_matching_connection_close
382
+ , h_matching_connection_upgrade
383
+ , h_matching_connection_token
307
384
 
308
385
  , h_transfer_encoding_chunked
309
386
  , h_connection_keep_alive
310
387
  , h_connection_close
388
+ , h_connection_upgrade
311
389
  };
312
390
 
391
+ enum http_host_state
392
+ {
393
+ s_http_host_dead = 1
394
+ , s_http_userinfo_start
395
+ , s_http_userinfo
396
+ , s_http_host_start
397
+ , s_http_host_v6_start
398
+ , s_http_host
399
+ , s_http_host_v6
400
+ , s_http_host_v6_end
401
+ , s_http_host_v6_zone_start
402
+ , s_http_host_v6_zone
403
+ , s_http_host_port_start
404
+ , s_http_host_port
405
+ };
406
+
407
+ /* Macros for character classes; depends on strict-mode */
408
+ #define CR '\r'
409
+ #define LF '\n'
410
+ #define LOWER(c) (unsigned char)(c | 0x20)
411
+ #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
412
+ #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
413
+ #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
414
+ #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
415
+ #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
416
+ (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
417
+ (c) == ')')
418
+ #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
419
+ (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
420
+ (c) == '$' || (c) == ',')
421
+
422
+ #define STRICT_TOKEN(c) (tokens[(unsigned char)c])
313
423
 
314
- enum flags
315
- { F_CHUNKED = 1 << 0
316
- , F_CONNECTION_KEEP_ALIVE = 1 << 1
317
- , F_CONNECTION_CLOSE = 1 << 2
318
- , F_TRAILING = 1 << 3
319
- , F_UPGRADE = 1 << 4
320
- , F_SKIPBODY = 1 << 5
321
- };
322
-
323
-
324
- #define CR '\r'
325
- #define LF '\n'
326
- #define LOWER(c) (unsigned char)(c | 0x20)
327
- #define TOKEN(c) tokens[(unsigned char)c]
424
+ #if HTTP_PARSER_STRICT
425
+ #define TOKEN(c) (tokens[(unsigned char)c])
426
+ #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
427
+ #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
428
+ #else
429
+ #define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
430
+ #define IS_URL_CHAR(c) \
431
+ (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
432
+ #define IS_HOST_CHAR(c) \
433
+ (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
434
+ #endif
328
435
 
436
+ /**
437
+ * Verify that a char is a valid visible (printable) US-ASCII
438
+ * character or %x80-FF
439
+ **/
440
+ #define IS_HEADER_CHAR(ch) \
441
+ (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127))
329
442
 
330
443
  #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
331
444
 
332
445
 
333
446
  #if HTTP_PARSER_STRICT
334
- # define STRICT_CHECK(cond) if (cond) goto error
447
+ # define STRICT_CHECK(cond) \
448
+ do { \
449
+ if (cond) { \
450
+ SET_ERRNO(HPE_STRICT); \
451
+ goto error; \
452
+ } \
453
+ } while (0)
335
454
  # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
336
455
  #else
337
456
  # define STRICT_CHECK(cond)
@@ -339,24 +458,205 @@ enum flags
339
458
  #endif
340
459
 
341
460
 
461
+ /* Map errno values to strings for human-readable output */
462
+ #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
463
+ static struct {
464
+ const char *name;
465
+ const char *description;
466
+ } http_strerror_tab[] = {
467
+ HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
468
+ };
469
+ #undef HTTP_STRERROR_GEN
470
+
471
+ int http_message_needs_eof(const http_parser *parser);
472
+
473
+ /* Our URL parser.
474
+ *
475
+ * This is designed to be shared by http_parser_execute() for URL validation,
476
+ * hence it has a state transition + byte-for-byte interface. In addition, it
477
+ * is meant to be embedded in http_parser_parse_url(), which does the dirty
478
+ * work of turning state transitions URL components for its API.
479
+ *
480
+ * This function should only be invoked with non-space characters. It is
481
+ * assumed that the caller cares about (and can detect) the transition between
482
+ * URL and non-URL states by looking for these.
483
+ */
484
+ static enum state
485
+ parse_url_char(enum state s, const char ch)
486
+ {
487
+ if (ch == ' ' || ch == '\r' || ch == '\n') {
488
+ return s_dead;
489
+ }
490
+
491
+ #if HTTP_PARSER_STRICT
492
+ if (ch == '\t' || ch == '\f') {
493
+ return s_dead;
494
+ }
495
+ #endif
496
+
497
+ switch (s) {
498
+ case s_req_spaces_before_url:
499
+ /* Proxied requests are followed by scheme of an absolute URI (alpha).
500
+ * All methods except CONNECT are followed by '/' or '*'.
501
+ */
502
+
503
+ if (ch == '/' || ch == '*') {
504
+ return s_req_path;
505
+ }
506
+
507
+ if (IS_ALPHA(ch)) {
508
+ return s_req_schema;
509
+ }
510
+
511
+ break;
512
+
513
+ case s_req_schema:
514
+ if (IS_ALPHA(ch)) {
515
+ return s;
516
+ }
517
+
518
+ if (ch == ':') {
519
+ return s_req_schema_slash;
520
+ }
521
+
522
+ break;
523
+
524
+ case s_req_schema_slash:
525
+ if (ch == '/') {
526
+ return s_req_schema_slash_slash;
527
+ }
528
+
529
+ break;
530
+
531
+ case s_req_schema_slash_slash:
532
+ if (ch == '/') {
533
+ return s_req_server_start;
534
+ }
535
+
536
+ break;
537
+
538
+ case s_req_server_with_at:
539
+ if (ch == '@') {
540
+ return s_dead;
541
+ }
542
+
543
+ /* FALLTHROUGH */
544
+ case s_req_server_start:
545
+ case s_req_server:
546
+ if (ch == '/') {
547
+ return s_req_path;
548
+ }
549
+
550
+ if (ch == '?') {
551
+ return s_req_query_string_start;
552
+ }
553
+
554
+ if (ch == '@') {
555
+ return s_req_server_with_at;
556
+ }
557
+
558
+ if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
559
+ return s_req_server;
560
+ }
561
+
562
+ break;
563
+
564
+ case s_req_path:
565
+ if (IS_URL_CHAR(ch)) {
566
+ return s;
567
+ }
568
+
569
+ switch (ch) {
570
+ case '?':
571
+ return s_req_query_string_start;
572
+
573
+ case '#':
574
+ return s_req_fragment_start;
575
+ }
576
+
577
+ break;
578
+
579
+ case s_req_query_string_start:
580
+ case s_req_query_string:
581
+ if (IS_URL_CHAR(ch)) {
582
+ return s_req_query_string;
583
+ }
584
+
585
+ switch (ch) {
586
+ case '?':
587
+ /* allow extra '?' in query string */
588
+ return s_req_query_string;
589
+
590
+ case '#':
591
+ return s_req_fragment_start;
592
+ }
593
+
594
+ break;
595
+
596
+ case s_req_fragment_start:
597
+ if (IS_URL_CHAR(ch)) {
598
+ return s_req_fragment;
599
+ }
600
+
601
+ switch (ch) {
602
+ case '?':
603
+ return s_req_fragment;
604
+
605
+ case '#':
606
+ return s;
607
+ }
608
+
609
+ break;
610
+
611
+ case s_req_fragment:
612
+ if (IS_URL_CHAR(ch)) {
613
+ return s;
614
+ }
615
+
616
+ switch (ch) {
617
+ case '?':
618
+ case '#':
619
+ return s;
620
+ }
621
+
622
+ break;
623
+
624
+ default:
625
+ break;
626
+ }
627
+
628
+ /* We should never fall out of the switch above unless there's an error */
629
+ return s_dead;
630
+ }
631
+
342
632
  size_t http_parser_execute (http_parser *parser,
343
633
  const http_parser_settings *settings,
344
634
  const char *data,
345
635
  size_t len)
346
636
  {
347
637
  char c, ch;
348
- const char *p = data, *pe;
349
- int64_t to_read;
350
-
351
- enum state state = (enum state) parser->state;
352
- enum header_states header_state = (enum header_states) parser->header_state;
353
- uint64_t index = parser->index;
354
- uint64_t nread = parser->nread;
638
+ int8_t unhex_val;
639
+ const char *p = data;
640
+ const char *header_field_mark = 0;
641
+ const char *header_value_mark = 0;
642
+ const char *url_mark = 0;
643
+ const char *body_mark = 0;
644
+ const char *status_mark = 0;
645
+ enum state p_state = (enum state) parser->state;
646
+ const unsigned int lenient = parser->lenient_http_headers;
647
+
648
+ /* We're in an error state. Don't bother doing anything. */
649
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
650
+ return 0;
651
+ }
355
652
 
356
653
  if (len == 0) {
357
- switch (state) {
654
+ switch (CURRENT_STATE()) {
358
655
  case s_body_identity_eof:
359
- CALLBACK2(message_complete);
656
+ /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
657
+ * we got paused.
658
+ */
659
+ CALLBACK_NOTIFY_NOADVANCE(message_complete);
360
660
  return 0;
361
661
 
362
662
  case s_dead:
@@ -366,52 +666,54 @@ size_t http_parser_execute (http_parser *parser,
366
666
  return 0;
367
667
 
368
668
  default:
369
- return 1; // error
669
+ SET_ERRNO(HPE_INVALID_EOF_STATE);
670
+ return 1;
370
671
  }
371
672
  }
372
673
 
373
- /* technically we could combine all of these (except for url_mark) into one
374
- variable, saving stack space, but it seems more clear to have them
375
- separated. */
376
- const char *header_field_mark = 0;
377
- const char *header_value_mark = 0;
378
- const char *fragment_mark = 0;
379
- const char *query_string_mark = 0;
380
- const char *path_mark = 0;
381
- const char *url_mark = 0;
382
674
 
383
- if (state == s_header_field)
675
+ if (CURRENT_STATE() == s_header_field)
384
676
  header_field_mark = data;
385
- if (state == s_header_value)
677
+ if (CURRENT_STATE() == s_header_value)
386
678
  header_value_mark = data;
387
- if (state == s_req_fragment)
388
- fragment_mark = data;
389
- if (state == s_req_query_string)
390
- query_string_mark = data;
391
- if (state == s_req_path)
392
- path_mark = data;
393
- if (state == s_req_path || state == s_req_schema || state == s_req_schema_slash
394
- || state == s_req_schema_slash_slash || state == s_req_port
395
- || state == s_req_query_string_start || state == s_req_query_string
396
- || state == s_req_host
397
- || state == s_req_fragment_start || state == s_req_fragment)
679
+ switch (CURRENT_STATE()) {
680
+ case s_req_path:
681
+ case s_req_schema:
682
+ case s_req_schema_slash:
683
+ case s_req_schema_slash_slash:
684
+ case s_req_server_start:
685
+ case s_req_server:
686
+ case s_req_server_with_at:
687
+ case s_req_query_string_start:
688
+ case s_req_query_string:
689
+ case s_req_fragment_start:
690
+ case s_req_fragment:
398
691
  url_mark = data;
692
+ break;
693
+ case s_res_status:
694
+ status_mark = data;
695
+ break;
696
+ default:
697
+ break;
698
+ }
399
699
 
400
- for (p=data, pe=data+len; p != pe; p++) {
700
+ for (p=data; p != data + len; p++) {
401
701
  ch = *p;
402
702
 
403
- if (PARSING_HEADER(state)) {
404
- ++nread;
405
- /* Buffer overflow attack */
406
- if (nread > HTTP_MAX_HEADER_SIZE) goto error;
407
- }
703
+ if (PARSING_HEADER(CURRENT_STATE()))
704
+ COUNT_HEADER_SIZE(1);
408
705
 
409
- switch (state) {
706
+ reexecute:
707
+ switch (CURRENT_STATE()) {
410
708
 
411
709
  case s_dead:
412
710
  /* this state is used after a 'Connection: close' message
413
711
  * the parser will error out if it reads another message
414
712
  */
713
+ if (LIKELY(ch == CR || ch == LF))
714
+ break;
715
+
716
+ SET_ERRNO(HPE_CLOSED_CONNECTION);
415
717
  goto error;
416
718
 
417
719
  case s_start_req_or_res:
@@ -419,42 +721,46 @@ size_t http_parser_execute (http_parser *parser,
419
721
  if (ch == CR || ch == LF)
420
722
  break;
421
723
  parser->flags = 0;
422
- parser->content_length = -1;
724
+ parser->content_length = ULLONG_MAX;
423
725
 
424
- CALLBACK2(message_begin);
726
+ if (ch == 'H') {
727
+ UPDATE_STATE(s_res_or_resp_H);
425
728
 
426
- if (ch == 'H')
427
- state = s_res_or_resp_H;
428
- else {
729
+ CALLBACK_NOTIFY(message_begin);
730
+ } else {
429
731
  parser->type = HTTP_REQUEST;
430
- goto start_req_method_assign;
732
+ UPDATE_STATE(s_start_req);
733
+ REEXECUTE();
431
734
  }
735
+
432
736
  break;
433
737
  }
434
738
 
435
739
  case s_res_or_resp_H:
436
740
  if (ch == 'T') {
437
741
  parser->type = HTTP_RESPONSE;
438
- state = s_res_HT;
742
+ UPDATE_STATE(s_res_HT);
439
743
  } else {
440
- if (ch != 'E') goto error;
744
+ if (UNLIKELY(ch != 'E')) {
745
+ SET_ERRNO(HPE_INVALID_CONSTANT);
746
+ goto error;
747
+ }
748
+
441
749
  parser->type = HTTP_REQUEST;
442
750
  parser->method = HTTP_HEAD;
443
- index = 2;
444
- state = s_req_method;
751
+ parser->index = 2;
752
+ UPDATE_STATE(s_req_method);
445
753
  }
446
754
  break;
447
755
 
448
756
  case s_start_res:
449
757
  {
450
758
  parser->flags = 0;
451
- parser->content_length = -1;
452
-
453
- CALLBACK2(message_begin);
759
+ parser->content_length = ULLONG_MAX;
454
760
 
455
761
  switch (ch) {
456
762
  case 'H':
457
- state = s_res_H;
763
+ UPDATE_STATE(s_res_H);
458
764
  break;
459
765
 
460
766
  case CR:
@@ -462,105 +768,105 @@ size_t http_parser_execute (http_parser *parser,
462
768
  break;
463
769
 
464
770
  default:
771
+ SET_ERRNO(HPE_INVALID_CONSTANT);
465
772
  goto error;
466
773
  }
774
+
775
+ CALLBACK_NOTIFY(message_begin);
467
776
  break;
468
777
  }
469
778
 
470
779
  case s_res_H:
471
780
  STRICT_CHECK(ch != 'T');
472
- state = s_res_HT;
781
+ UPDATE_STATE(s_res_HT);
473
782
  break;
474
783
 
475
784
  case s_res_HT:
476
785
  STRICT_CHECK(ch != 'T');
477
- state = s_res_HTT;
786
+ UPDATE_STATE(s_res_HTT);
478
787
  break;
479
788
 
480
789
  case s_res_HTT:
481
790
  STRICT_CHECK(ch != 'P');
482
- state = s_res_HTTP;
791
+ UPDATE_STATE(s_res_HTTP);
483
792
  break;
484
793
 
485
794
  case s_res_HTTP:
486
795
  STRICT_CHECK(ch != '/');
487
- state = s_res_first_http_major;
796
+ UPDATE_STATE(s_res_http_major);
488
797
  break;
489
798
 
490
- case s_res_first_http_major:
491
- if (ch < '1' || ch > '9') goto error;
799
+ case s_res_http_major:
800
+ if (UNLIKELY(!IS_NUM(ch))) {
801
+ SET_ERRNO(HPE_INVALID_VERSION);
802
+ goto error;
803
+ }
804
+
492
805
  parser->http_major = ch - '0';
493
- state = s_res_http_major;
806
+ UPDATE_STATE(s_res_http_dot);
494
807
  break;
495
808
 
496
- /* major HTTP version or dot */
497
- case s_res_http_major:
809
+ case s_res_http_dot:
498
810
  {
499
- if (ch == '.') {
500
- state = s_res_first_http_minor;
501
- break;
811
+ if (UNLIKELY(ch != '.')) {
812
+ SET_ERRNO(HPE_INVALID_VERSION);
813
+ goto error;
502
814
  }
503
815
 
504
- if (ch < '0' || ch > '9') goto error;
505
-
506
- parser->http_major *= 10;
507
- parser->http_major += ch - '0';
508
-
509
- if (parser->http_major > 999) goto error;
816
+ UPDATE_STATE(s_res_http_minor);
510
817
  break;
511
818
  }
512
819
 
513
- /* first digit of minor HTTP version */
514
- case s_res_first_http_minor:
515
- if (ch < '0' || ch > '9') goto error;
820
+ case s_res_http_minor:
821
+ if (UNLIKELY(!IS_NUM(ch))) {
822
+ SET_ERRNO(HPE_INVALID_VERSION);
823
+ goto error;
824
+ }
825
+
516
826
  parser->http_minor = ch - '0';
517
- state = s_res_http_minor;
827
+ UPDATE_STATE(s_res_http_end);
518
828
  break;
519
829
 
520
- /* minor HTTP version or end of request line */
521
- case s_res_http_minor:
830
+ case s_res_http_end:
522
831
  {
523
- if (ch == ' ') {
524
- state = s_res_first_status_code;
525
- break;
832
+ if (UNLIKELY(ch != ' ')) {
833
+ SET_ERRNO(HPE_INVALID_VERSION);
834
+ goto error;
526
835
  }
527
836
 
528
- if (ch < '0' || ch > '9') goto error;
529
-
530
- parser->http_minor *= 10;
531
- parser->http_minor += ch - '0';
532
-
533
- if (parser->http_minor > 999) goto error;
837
+ UPDATE_STATE(s_res_first_status_code);
534
838
  break;
535
839
  }
536
840
 
537
841
  case s_res_first_status_code:
538
842
  {
539
- if (ch < '0' || ch > '9') {
843
+ if (!IS_NUM(ch)) {
540
844
  if (ch == ' ') {
541
845
  break;
542
846
  }
847
+
848
+ SET_ERRNO(HPE_INVALID_STATUS);
543
849
  goto error;
544
850
  }
545
851
  parser->status_code = ch - '0';
546
- state = s_res_status_code;
852
+ UPDATE_STATE(s_res_status_code);
547
853
  break;
548
854
  }
549
855
 
550
856
  case s_res_status_code:
551
857
  {
552
- if (ch < '0' || ch > '9') {
858
+ if (!IS_NUM(ch)) {
553
859
  switch (ch) {
554
860
  case ' ':
555
- state = s_res_status;
861
+ UPDATE_STATE(s_res_status_start);
556
862
  break;
557
863
  case CR:
558
- state = s_res_line_almost_done;
559
- break;
560
864
  case LF:
561
- state = s_header_field_start;
865
+ UPDATE_STATE(s_res_status_start);
866
+ REEXECUTE();
562
867
  break;
563
868
  default:
869
+ SET_ERRNO(HPE_INVALID_STATUS);
564
870
  goto error;
565
871
  }
566
872
  break;
@@ -569,27 +875,44 @@ size_t http_parser_execute (http_parser *parser,
569
875
  parser->status_code *= 10;
570
876
  parser->status_code += ch - '0';
571
877
 
572
- if (parser->status_code > 999) goto error;
878
+ if (UNLIKELY(parser->status_code > 999)) {
879
+ SET_ERRNO(HPE_INVALID_STATUS);
880
+ goto error;
881
+ }
882
+
883
+ break;
884
+ }
885
+
886
+ case s_res_status_start:
887
+ {
888
+ MARK(status);
889
+ UPDATE_STATE(s_res_status);
890
+ parser->index = 0;
891
+
892
+ if (ch == CR || ch == LF)
893
+ REEXECUTE();
894
+
573
895
  break;
574
896
  }
575
897
 
576
898
  case s_res_status:
577
- /* the human readable status. e.g. "NOT FOUND"
578
- * we are not humans so just ignore this */
579
899
  if (ch == CR) {
580
- state = s_res_line_almost_done;
900
+ UPDATE_STATE(s_res_line_almost_done);
901
+ CALLBACK_DATA(status);
581
902
  break;
582
903
  }
583
904
 
584
905
  if (ch == LF) {
585
- state = s_header_field_start;
906
+ UPDATE_STATE(s_header_field_start);
907
+ CALLBACK_DATA(status);
586
908
  break;
587
909
  }
910
+
588
911
  break;
589
912
 
590
913
  case s_res_line_almost_done:
591
914
  STRICT_CHECK(ch != LF);
592
- state = s_header_field_start;
915
+ UPDATE_STATE(s_header_field_start);
593
916
  break;
594
917
 
595
918
  case s_start_req:
@@ -597,501 +920,308 @@ size_t http_parser_execute (http_parser *parser,
597
920
  if (ch == CR || ch == LF)
598
921
  break;
599
922
  parser->flags = 0;
600
- parser->content_length = -1;
601
-
602
- CALLBACK2(message_begin);
923
+ parser->content_length = ULLONG_MAX;
603
924
 
604
- if (ch < 'A' || 'Z' < ch) goto error;
925
+ if (UNLIKELY(!IS_ALPHA(ch))) {
926
+ SET_ERRNO(HPE_INVALID_METHOD);
927
+ goto error;
928
+ }
605
929
 
606
- start_req_method_assign:
607
930
  parser->method = (enum http_method) 0;
608
- index = 1;
931
+ parser->index = 1;
609
932
  switch (ch) {
933
+ case 'A': parser->method = HTTP_ACL; break;
934
+ case 'B': parser->method = HTTP_BIND; break;
610
935
  case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
611
936
  case 'D': parser->method = HTTP_DELETE; break;
612
937
  case 'G': parser->method = HTTP_GET; break;
613
938
  case 'H': parser->method = HTTP_HEAD; break;
614
- case 'L': parser->method = HTTP_LOCK; break;
615
- case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
939
+ case 'L': parser->method = HTTP_LOCK; /* or LINK */ break;
940
+ case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break;
616
941
  case 'N': parser->method = HTTP_NOTIFY; break;
617
942
  case 'O': parser->method = HTTP_OPTIONS; break;
618
- case 'P': parser->method = HTTP_POST; /* or PROPFIND or PROPPATCH or PUT */ break;
619
- case 'R': parser->method = HTTP_REPORT; break;
620
- case 'S': parser->method = HTTP_SUBSCRIBE; break;
943
+ case 'P': parser->method = HTTP_POST;
944
+ /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
945
+ break;
946
+ case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break;
947
+ case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH, SOURCE */ break;
621
948
  case 'T': parser->method = HTTP_TRACE; break;
622
- case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
623
- default: goto error;
949
+ case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break;
950
+ default:
951
+ SET_ERRNO(HPE_INVALID_METHOD);
952
+ goto error;
624
953
  }
625
- state = s_req_method;
954
+ UPDATE_STATE(s_req_method);
955
+
956
+ CALLBACK_NOTIFY(message_begin);
957
+
626
958
  break;
627
959
  }
628
960
 
629
961
  case s_req_method:
630
962
  {
631
- if (ch == '\0')
963
+ const char *matcher;
964
+ if (UNLIKELY(ch == '\0')) {
965
+ SET_ERRNO(HPE_INVALID_METHOD);
632
966
  goto error;
967
+ }
633
968
 
634
- const char *matcher = method_strings[parser->method];
635
- if (ch == ' ' && matcher[index] == '\0') {
636
- state = s_req_spaces_before_url;
637
- } else if (ch == matcher[index]) {
969
+ matcher = method_strings[parser->method];
970
+ if (ch == ' ' && matcher[parser->index] == '\0') {
971
+ UPDATE_STATE(s_req_spaces_before_url);
972
+ } else if (ch == matcher[parser->index]) {
638
973
  ; /* nada */
639
- } else if (parser->method == HTTP_CONNECT) {
640
- if (index == 1 && ch == 'H') {
641
- parser->method = HTTP_CHECKOUT;
642
- } else if (index == 2 && ch == 'P') {
643
- parser->method = HTTP_COPY;
644
- }
645
- } else if (parser->method == HTTP_MKCOL) {
646
- if (index == 1 && ch == 'O') {
647
- parser->method = HTTP_MOVE;
648
- } else if (index == 1 && ch == 'E') {
649
- parser->method = HTTP_MERGE;
650
- } else if (index == 1 && ch == '-') {
651
- parser->method = HTTP_MSEARCH;
652
- } else if (index == 2 && ch == 'A') {
653
- parser->method = HTTP_MKACTIVITY;
974
+ } else if ((ch >= 'A' && ch <= 'Z') || ch == '-') {
975
+
976
+ switch (parser->method << 16 | parser->index << 8 | ch) {
977
+ #define XX(meth, pos, ch, new_meth) \
978
+ case (HTTP_##meth << 16 | pos << 8 | ch): \
979
+ parser->method = HTTP_##new_meth; break;
980
+
981
+ XX(POST, 1, 'U', PUT)
982
+ XX(POST, 1, 'A', PATCH)
983
+ XX(POST, 1, 'R', PROPFIND)
984
+ XX(PUT, 2, 'R', PURGE)
985
+ XX(CONNECT, 1, 'H', CHECKOUT)
986
+ XX(CONNECT, 2, 'P', COPY)
987
+ XX(MKCOL, 1, 'O', MOVE)
988
+ XX(MKCOL, 1, 'E', MERGE)
989
+ XX(MKCOL, 1, '-', MSEARCH)
990
+ XX(MKCOL, 2, 'A', MKACTIVITY)
991
+ XX(MKCOL, 3, 'A', MKCALENDAR)
992
+ XX(SUBSCRIBE, 1, 'E', SEARCH)
993
+ XX(SUBSCRIBE, 1, 'O', SOURCE)
994
+ XX(REPORT, 2, 'B', REBIND)
995
+ XX(PROPFIND, 4, 'P', PROPPATCH)
996
+ XX(LOCK, 1, 'I', LINK)
997
+ XX(UNLOCK, 2, 'S', UNSUBSCRIBE)
998
+ XX(UNLOCK, 2, 'B', UNBIND)
999
+ XX(UNLOCK, 3, 'I', UNLINK)
1000
+ #undef XX
1001
+ default:
1002
+ SET_ERRNO(HPE_INVALID_METHOD);
1003
+ goto error;
654
1004
  }
655
- } else if (index == 1 && parser->method == HTTP_POST && ch == 'R') {
656
- parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
657
- } else if (index == 1 && parser->method == HTTP_POST && ch == 'U') {
658
- parser->method = HTTP_PUT;
659
- } else if (index == 2 && parser->method == HTTP_UNLOCK && ch == 'S') {
660
- parser->method = HTTP_UNSUBSCRIBE;
661
- } else if (index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
662
- parser->method = HTTP_PROPPATCH;
663
1005
  } else {
1006
+ SET_ERRNO(HPE_INVALID_METHOD);
664
1007
  goto error;
665
1008
  }
666
1009
 
667
- ++index;
1010
+ ++parser->index;
668
1011
  break;
669
1012
  }
1013
+
670
1014
  case s_req_spaces_before_url:
671
1015
  {
672
1016
  if (ch == ' ') break;
673
1017
 
674
- if (ch == '/' || ch == '*') {
675
- MARK(url);
676
- MARK(path);
677
- state = s_req_path;
678
- break;
1018
+ MARK(url);
1019
+ if (parser->method == HTTP_CONNECT) {
1020
+ UPDATE_STATE(s_req_server_start);
679
1021
  }
680
1022
 
681
- c = LOWER(ch);
682
-
683
- if (c >= 'a' && c <= 'z') {
684
- MARK(url);
685
- state = s_req_schema;
686
- break;
1023
+ UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1024
+ if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1025
+ SET_ERRNO(HPE_INVALID_URL);
1026
+ goto error;
687
1027
  }
688
1028
 
689
- goto error;
1029
+ break;
690
1030
  }
691
1031
 
692
1032
  case s_req_schema:
693
- {
694
- c = LOWER(ch);
695
-
696
- if (c >= 'a' && c <= 'z') break;
697
-
698
- if (ch == ':') {
699
- state = s_req_schema_slash;
700
- break;
701
- } else if (ch == '.') {
702
- state = s_req_host;
703
- break;
704
- } else if ('0' <= ch && ch <= '9') {
705
- state = s_req_host;
706
- break;
707
- }
708
-
709
- goto error;
710
- }
711
-
712
1033
  case s_req_schema_slash:
713
- STRICT_CHECK(ch != '/');
714
- state = s_req_schema_slash_slash;
715
- break;
716
-
717
1034
  case s_req_schema_slash_slash:
718
- STRICT_CHECK(ch != '/');
719
- state = s_req_host;
720
- break;
721
-
722
- case s_req_host:
1035
+ case s_req_server_start:
723
1036
  {
724
- c = LOWER(ch);
725
- if (c >= 'a' && c <= 'z') break;
726
- if ((ch >= '0' && ch <= '9') || ch == '.' || ch == '-') break;
727
1037
  switch (ch) {
728
- case ':':
729
- state = s_req_port;
730
- break;
731
- case '/':
732
- MARK(path);
733
- state = s_req_path;
734
- break;
1038
+ /* No whitespace allowed here */
735
1039
  case ' ':
736
- /* The request line looks like:
737
- * "GET http://foo.bar.com HTTP/1.1"
738
- * That is, there is no path.
739
- */
740
- CALLBACK(url);
741
- state = s_req_http_start;
742
- break;
743
- case '?':
744
- state = s_req_query_string_start;
745
- break;
746
- default:
1040
+ case CR:
1041
+ case LF:
1042
+ SET_ERRNO(HPE_INVALID_URL);
747
1043
  goto error;
748
- }
749
- break;
750
- }
751
-
752
- case s_req_port:
753
- {
754
- if (ch >= '0' && ch <= '9') break;
755
- switch (ch) {
756
- case '/':
757
- MARK(path);
758
- state = s_req_path;
759
- break;
760
- case ' ':
761
- /* The request line looks like:
762
- * "GET http://foo.bar.com:1234 HTTP/1.1"
763
- * That is, there is no path.
764
- */
765
- CALLBACK(url);
766
- state = s_req_http_start;
767
- break;
768
- case '?':
769
- state = s_req_query_string_start;
770
- break;
771
1044
  default:
772
- goto error;
1045
+ UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1046
+ if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1047
+ SET_ERRNO(HPE_INVALID_URL);
1048
+ goto error;
1049
+ }
773
1050
  }
1051
+
774
1052
  break;
775
1053
  }
776
1054
 
1055
+ case s_req_server:
1056
+ case s_req_server_with_at:
777
1057
  case s_req_path:
1058
+ case s_req_query_string_start:
1059
+ case s_req_query_string:
1060
+ case s_req_fragment_start:
1061
+ case s_req_fragment:
778
1062
  {
779
- if (normal_url_char[(unsigned char)ch]) break;
780
-
781
1063
  switch (ch) {
782
1064
  case ' ':
783
- CALLBACK(url);
784
- CALLBACK(path);
785
- state = s_req_http_start;
1065
+ UPDATE_STATE(s_req_http_start);
1066
+ CALLBACK_DATA(url);
786
1067
  break;
787
1068
  case CR:
788
- CALLBACK(url);
789
- CALLBACK(path);
790
- parser->http_major = 0;
791
- parser->http_minor = 9;
792
- state = s_req_line_almost_done;
793
- break;
794
1069
  case LF:
795
- CALLBACK(url);
796
- CALLBACK(path);
797
1070
  parser->http_major = 0;
798
1071
  parser->http_minor = 9;
799
- state = s_header_field_start;
800
- break;
801
- case '?':
802
- CALLBACK(path);
803
- state = s_req_query_string_start;
804
- break;
805
- case '#':
806
- CALLBACK(path);
807
- state = s_req_fragment_start;
1072
+ UPDATE_STATE((ch == CR) ?
1073
+ s_req_line_almost_done :
1074
+ s_header_field_start);
1075
+ CALLBACK_DATA(url);
808
1076
  break;
809
1077
  default:
810
- goto error;
1078
+ UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1079
+ if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1080
+ SET_ERRNO(HPE_INVALID_URL);
1081
+ goto error;
1082
+ }
811
1083
  }
812
1084
  break;
813
1085
  }
814
1086
 
815
- case s_req_query_string_start:
816
- {
817
- if (normal_url_char[(unsigned char)ch]) {
818
- MARK(query_string);
819
- state = s_req_query_string;
820
- break;
821
- }
822
-
1087
+ case s_req_http_start:
823
1088
  switch (ch) {
824
- case '?':
825
- break; /* XXX ignore extra '?' ... is this right? */
826
- case ' ':
827
- CALLBACK(url);
828
- state = s_req_http_start;
1089
+ case 'H':
1090
+ UPDATE_STATE(s_req_http_H);
829
1091
  break;
830
- case CR:
831
- CALLBACK(url);
832
- parser->http_major = 0;
833
- parser->http_minor = 9;
834
- state = s_req_line_almost_done;
835
- break;
836
- case LF:
837
- CALLBACK(url);
838
- parser->http_major = 0;
839
- parser->http_minor = 9;
840
- state = s_header_field_start;
841
- break;
842
- case '#':
843
- state = s_req_fragment_start;
844
- break;
845
- default:
846
- goto error;
847
- }
848
- break;
849
- }
850
-
851
- case s_req_query_string:
852
- {
853
- if (normal_url_char[(unsigned char)ch]) break;
854
-
855
- switch (ch) {
856
- case '?':
857
- /* allow extra '?' in query string */
858
- break;
859
- case ' ':
860
- CALLBACK(url);
861
- CALLBACK(query_string);
862
- state = s_req_http_start;
863
- break;
864
- case CR:
865
- CALLBACK(url);
866
- CALLBACK(query_string);
867
- parser->http_major = 0;
868
- parser->http_minor = 9;
869
- state = s_req_line_almost_done;
870
- break;
871
- case LF:
872
- CALLBACK(url);
873
- CALLBACK(query_string);
874
- parser->http_major = 0;
875
- parser->http_minor = 9;
876
- state = s_header_field_start;
877
- break;
878
- case '#':
879
- CALLBACK(query_string);
880
- state = s_req_fragment_start;
881
- break;
882
- default:
883
- goto error;
884
- }
885
- break;
886
- }
887
-
888
- case s_req_fragment_start:
889
- {
890
- if (normal_url_char[(unsigned char)ch]) {
891
- MARK(fragment);
892
- state = s_req_fragment;
893
- break;
894
- }
895
-
896
- switch (ch) {
897
- case ' ':
898
- CALLBACK(url);
899
- state = s_req_http_start;
900
- break;
901
- case CR:
902
- CALLBACK(url);
903
- parser->http_major = 0;
904
- parser->http_minor = 9;
905
- state = s_req_line_almost_done;
906
- break;
907
- case LF:
908
- CALLBACK(url);
909
- parser->http_major = 0;
910
- parser->http_minor = 9;
911
- state = s_header_field_start;
912
- break;
913
- case '?':
914
- MARK(fragment);
915
- state = s_req_fragment;
916
- break;
917
- case '#':
918
- break;
919
- default:
920
- goto error;
921
- }
922
- break;
923
- }
924
-
925
- case s_req_fragment:
926
- {
927
- if (normal_url_char[(unsigned char)ch]) break;
928
-
929
- switch (ch) {
930
- case ' ':
931
- CALLBACK(url);
932
- CALLBACK(fragment);
933
- state = s_req_http_start;
934
- break;
935
- case CR:
936
- CALLBACK(url);
937
- CALLBACK(fragment);
938
- parser->http_major = 0;
939
- parser->http_minor = 9;
940
- state = s_req_line_almost_done;
941
- break;
942
- case LF:
943
- CALLBACK(url);
944
- CALLBACK(fragment);
945
- parser->http_major = 0;
946
- parser->http_minor = 9;
947
- state = s_header_field_start;
948
- break;
949
- case '?':
950
- case '#':
951
- break;
952
- default:
953
- goto error;
954
- }
955
- break;
956
- }
957
-
958
- case s_req_http_start:
959
- switch (ch) {
960
- case 'H':
961
- state = s_req_http_H;
962
- break;
963
- case ' ':
1092
+ case ' ':
964
1093
  break;
965
1094
  default:
1095
+ SET_ERRNO(HPE_INVALID_CONSTANT);
966
1096
  goto error;
967
1097
  }
968
1098
  break;
969
1099
 
970
1100
  case s_req_http_H:
971
1101
  STRICT_CHECK(ch != 'T');
972
- state = s_req_http_HT;
1102
+ UPDATE_STATE(s_req_http_HT);
973
1103
  break;
974
1104
 
975
1105
  case s_req_http_HT:
976
1106
  STRICT_CHECK(ch != 'T');
977
- state = s_req_http_HTT;
1107
+ UPDATE_STATE(s_req_http_HTT);
978
1108
  break;
979
1109
 
980
1110
  case s_req_http_HTT:
981
1111
  STRICT_CHECK(ch != 'P');
982
- state = s_req_http_HTTP;
1112
+ UPDATE_STATE(s_req_http_HTTP);
983
1113
  break;
984
1114
 
985
1115
  case s_req_http_HTTP:
986
1116
  STRICT_CHECK(ch != '/');
987
- state = s_req_first_http_major;
1117
+ UPDATE_STATE(s_req_http_major);
988
1118
  break;
989
1119
 
990
- /* first digit of major HTTP version */
991
- case s_req_first_http_major:
992
- if (ch < '1' || ch > '9') goto error;
1120
+ case s_req_http_major:
1121
+ if (UNLIKELY(!IS_NUM(ch))) {
1122
+ SET_ERRNO(HPE_INVALID_VERSION);
1123
+ goto error;
1124
+ }
1125
+
993
1126
  parser->http_major = ch - '0';
994
- state = s_req_http_major;
1127
+ UPDATE_STATE(s_req_http_dot);
995
1128
  break;
996
1129
 
997
- /* major HTTP version or dot */
998
- case s_req_http_major:
1130
+ case s_req_http_dot:
999
1131
  {
1000
- if (ch == '.') {
1001
- state = s_req_first_http_minor;
1002
- break;
1132
+ if (UNLIKELY(ch != '.')) {
1133
+ SET_ERRNO(HPE_INVALID_VERSION);
1134
+ goto error;
1003
1135
  }
1004
1136
 
1005
- if (ch < '0' || ch > '9') goto error;
1006
-
1007
- parser->http_major *= 10;
1008
- parser->http_major += ch - '0';
1009
-
1010
- if (parser->http_major > 999) goto error;
1137
+ UPDATE_STATE(s_req_http_minor);
1011
1138
  break;
1012
1139
  }
1013
1140
 
1014
- /* first digit of minor HTTP version */
1015
- case s_req_first_http_minor:
1016
- if (ch < '0' || ch > '9') goto error;
1141
+ case s_req_http_minor:
1142
+ if (UNLIKELY(!IS_NUM(ch))) {
1143
+ SET_ERRNO(HPE_INVALID_VERSION);
1144
+ goto error;
1145
+ }
1146
+
1017
1147
  parser->http_minor = ch - '0';
1018
- state = s_req_http_minor;
1148
+ UPDATE_STATE(s_req_http_end);
1019
1149
  break;
1020
1150
 
1021
- /* minor HTTP version or end of request line */
1022
- case s_req_http_minor:
1151
+ case s_req_http_end:
1023
1152
  {
1024
1153
  if (ch == CR) {
1025
- state = s_req_line_almost_done;
1154
+ UPDATE_STATE(s_req_line_almost_done);
1026
1155
  break;
1027
1156
  }
1028
1157
 
1029
1158
  if (ch == LF) {
1030
- state = s_header_field_start;
1159
+ UPDATE_STATE(s_header_field_start);
1031
1160
  break;
1032
1161
  }
1033
1162
 
1034
- /* XXX allow spaces after digit? */
1035
-
1036
- if (ch < '0' || ch > '9') goto error;
1037
-
1038
- parser->http_minor *= 10;
1039
- parser->http_minor += ch - '0';
1040
-
1041
- if (parser->http_minor > 999) goto error;
1163
+ SET_ERRNO(HPE_INVALID_VERSION);
1164
+ goto error;
1042
1165
  break;
1043
1166
  }
1044
1167
 
1045
1168
  /* end of request line */
1046
1169
  case s_req_line_almost_done:
1047
1170
  {
1048
- if (ch != LF) goto error;
1049
- state = s_header_field_start;
1171
+ if (UNLIKELY(ch != LF)) {
1172
+ SET_ERRNO(HPE_LF_EXPECTED);
1173
+ goto error;
1174
+ }
1175
+
1176
+ UPDATE_STATE(s_header_field_start);
1050
1177
  break;
1051
1178
  }
1052
1179
 
1053
1180
  case s_header_field_start:
1054
1181
  {
1055
1182
  if (ch == CR) {
1056
- state = s_headers_almost_done;
1183
+ UPDATE_STATE(s_headers_almost_done);
1057
1184
  break;
1058
1185
  }
1059
1186
 
1060
1187
  if (ch == LF) {
1061
1188
  /* they might be just sending \n instead of \r\n so this would be
1062
1189
  * the second \n to denote the end of headers*/
1063
- state = s_headers_almost_done;
1064
- goto headers_almost_done;
1190
+ UPDATE_STATE(s_headers_almost_done);
1191
+ REEXECUTE();
1065
1192
  }
1066
1193
 
1067
1194
  c = TOKEN(ch);
1068
1195
 
1069
- if (!c) goto error;
1196
+ if (UNLIKELY(!c)) {
1197
+ SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1198
+ goto error;
1199
+ }
1070
1200
 
1071
1201
  MARK(header_field);
1072
1202
 
1073
- index = 0;
1074
- state = s_header_field;
1203
+ parser->index = 0;
1204
+ UPDATE_STATE(s_header_field);
1075
1205
 
1076
1206
  switch (c) {
1077
1207
  case 'c':
1078
- header_state = h_C;
1208
+ parser->header_state = h_C;
1079
1209
  break;
1080
1210
 
1081
1211
  case 'p':
1082
- header_state = h_matching_proxy_connection;
1212
+ parser->header_state = h_matching_proxy_connection;
1083
1213
  break;
1084
1214
 
1085
1215
  case 't':
1086
- header_state = h_matching_transfer_encoding;
1216
+ parser->header_state = h_matching_transfer_encoding;
1087
1217
  break;
1088
1218
 
1089
1219
  case 'u':
1090
- header_state = h_matching_upgrade;
1220
+ parser->header_state = h_matching_upgrade;
1091
1221
  break;
1092
1222
 
1093
1223
  default:
1094
- header_state = h_general;
1224
+ parser->header_state = h_general;
1095
1225
  break;
1096
1226
  }
1097
1227
  break;
@@ -1099,34 +1229,39 @@ size_t http_parser_execute (http_parser *parser,
1099
1229
 
1100
1230
  case s_header_field:
1101
1231
  {
1102
- c = TOKEN(ch);
1232
+ const char* start = p;
1233
+ for (; p != data + len; p++) {
1234
+ ch = *p;
1235
+ c = TOKEN(ch);
1103
1236
 
1104
- if (c) {
1105
- switch (header_state) {
1237
+ if (!c)
1238
+ break;
1239
+
1240
+ switch (parser->header_state) {
1106
1241
  case h_general:
1107
1242
  break;
1108
1243
 
1109
1244
  case h_C:
1110
- index++;
1111
- header_state = (c == 'o' ? h_CO : h_general);
1245
+ parser->index++;
1246
+ parser->header_state = (c == 'o' ? h_CO : h_general);
1112
1247
  break;
1113
1248
 
1114
1249
  case h_CO:
1115
- index++;
1116
- header_state = (c == 'n' ? h_CON : h_general);
1250
+ parser->index++;
1251
+ parser->header_state = (c == 'n' ? h_CON : h_general);
1117
1252
  break;
1118
1253
 
1119
1254
  case h_CON:
1120
- index++;
1255
+ parser->index++;
1121
1256
  switch (c) {
1122
1257
  case 'n':
1123
- header_state = h_matching_connection;
1258
+ parser->header_state = h_matching_connection;
1124
1259
  break;
1125
1260
  case 't':
1126
- header_state = h_matching_content_length;
1261
+ parser->header_state = h_matching_content_length;
1127
1262
  break;
1128
1263
  default:
1129
- header_state = h_general;
1264
+ parser->header_state = h_general;
1130
1265
  break;
1131
1266
  }
1132
1267
  break;
@@ -1134,60 +1269,60 @@ size_t http_parser_execute (http_parser *parser,
1134
1269
  /* connection */
1135
1270
 
1136
1271
  case h_matching_connection:
1137
- index++;
1138
- if (index > sizeof(CONNECTION)-1
1139
- || c != CONNECTION[index]) {
1140
- header_state = h_general;
1141
- } else if (index == sizeof(CONNECTION)-2) {
1142
- header_state = h_connection;
1272
+ parser->index++;
1273
+ if (parser->index > sizeof(CONNECTION)-1
1274
+ || c != CONNECTION[parser->index]) {
1275
+ parser->header_state = h_general;
1276
+ } else if (parser->index == sizeof(CONNECTION)-2) {
1277
+ parser->header_state = h_connection;
1143
1278
  }
1144
1279
  break;
1145
1280
 
1146
1281
  /* proxy-connection */
1147
1282
 
1148
1283
  case h_matching_proxy_connection:
1149
- index++;
1150
- if (index > sizeof(PROXY_CONNECTION)-1
1151
- || c != PROXY_CONNECTION[index]) {
1152
- header_state = h_general;
1153
- } else if (index == sizeof(PROXY_CONNECTION)-2) {
1154
- header_state = h_connection;
1284
+ parser->index++;
1285
+ if (parser->index > sizeof(PROXY_CONNECTION)-1
1286
+ || c != PROXY_CONNECTION[parser->index]) {
1287
+ parser->header_state = h_general;
1288
+ } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1289
+ parser->header_state = h_connection;
1155
1290
  }
1156
1291
  break;
1157
1292
 
1158
1293
  /* content-length */
1159
1294
 
1160
1295
  case h_matching_content_length:
1161
- index++;
1162
- if (index > sizeof(CONTENT_LENGTH)-1
1163
- || c != CONTENT_LENGTH[index]) {
1164
- header_state = h_general;
1165
- } else if (index == sizeof(CONTENT_LENGTH)-2) {
1166
- header_state = h_content_length;
1296
+ parser->index++;
1297
+ if (parser->index > sizeof(CONTENT_LENGTH)-1
1298
+ || c != CONTENT_LENGTH[parser->index]) {
1299
+ parser->header_state = h_general;
1300
+ } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1301
+ parser->header_state = h_content_length;
1167
1302
  }
1168
1303
  break;
1169
1304
 
1170
1305
  /* transfer-encoding */
1171
1306
 
1172
1307
  case h_matching_transfer_encoding:
1173
- index++;
1174
- if (index > sizeof(TRANSFER_ENCODING)-1
1175
- || c != TRANSFER_ENCODING[index]) {
1176
- header_state = h_general;
1177
- } else if (index == sizeof(TRANSFER_ENCODING)-2) {
1178
- header_state = h_transfer_encoding;
1308
+ parser->index++;
1309
+ if (parser->index > sizeof(TRANSFER_ENCODING)-1
1310
+ || c != TRANSFER_ENCODING[parser->index]) {
1311
+ parser->header_state = h_general;
1312
+ } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1313
+ parser->header_state = h_transfer_encoding;
1179
1314
  }
1180
1315
  break;
1181
1316
 
1182
1317
  /* upgrade */
1183
1318
 
1184
1319
  case h_matching_upgrade:
1185
- index++;
1186
- if (index > sizeof(UPGRADE)-1
1187
- || c != UPGRADE[index]) {
1188
- header_state = h_general;
1189
- } else if (index == sizeof(UPGRADE)-2) {
1190
- header_state = h_upgrade;
1320
+ parser->index++;
1321
+ if (parser->index > sizeof(UPGRADE)-1
1322
+ || c != UPGRADE[parser->index]) {
1323
+ parser->header_state = h_general;
1324
+ } else if (parser->index == sizeof(UPGRADE)-2) {
1325
+ parser->header_state = h_upgrade;
1191
1326
  }
1192
1327
  break;
1193
1328
 
@@ -1195,95 +1330,107 @@ size_t http_parser_execute (http_parser *parser,
1195
1330
  case h_content_length:
1196
1331
  case h_transfer_encoding:
1197
1332
  case h_upgrade:
1198
- if (ch != ' ') header_state = h_general;
1333
+ if (ch != ' ') parser->header_state = h_general;
1199
1334
  break;
1200
1335
 
1201
1336
  default:
1202
1337
  assert(0 && "Unknown header_state");
1203
1338
  break;
1204
1339
  }
1340
+ }
1341
+
1342
+ COUNT_HEADER_SIZE(p - start);
1343
+
1344
+ if (p == data + len) {
1345
+ --p;
1205
1346
  break;
1206
1347
  }
1207
1348
 
1208
1349
  if (ch == ':') {
1209
- CALLBACK(header_field);
1210
- state = s_header_value_start;
1350
+ UPDATE_STATE(s_header_value_discard_ws);
1351
+ CALLBACK_DATA(header_field);
1211
1352
  break;
1212
1353
  }
1213
1354
 
1355
+ SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1356
+ goto error;
1357
+ }
1358
+
1359
+ case s_header_value_discard_ws:
1360
+ if (ch == ' ' || ch == '\t') break;
1361
+
1214
1362
  if (ch == CR) {
1215
- state = s_header_almost_done;
1216
- CALLBACK(header_field);
1363
+ UPDATE_STATE(s_header_value_discard_ws_almost_done);
1217
1364
  break;
1218
1365
  }
1219
1366
 
1220
1367
  if (ch == LF) {
1221
- CALLBACK(header_field);
1222
- state = s_header_field_start;
1368
+ UPDATE_STATE(s_header_value_discard_lws);
1223
1369
  break;
1224
1370
  }
1225
1371
 
1226
- goto error;
1227
- }
1372
+ /* FALLTHROUGH */
1228
1373
 
1229
1374
  case s_header_value_start:
1230
1375
  {
1231
- if (ch == ' ') break;
1232
-
1233
1376
  MARK(header_value);
1234
1377
 
1235
- state = s_header_value;
1236
- index = 0;
1378
+ UPDATE_STATE(s_header_value);
1379
+ parser->index = 0;
1237
1380
 
1238
1381
  c = LOWER(ch);
1239
1382
 
1240
- if (ch == CR) {
1241
- CALLBACK(header_value);
1242
- header_state = h_general;
1243
- state = s_header_almost_done;
1244
- break;
1245
- }
1246
-
1247
- if (ch == LF) {
1248
- CALLBACK(header_value);
1249
- state = s_header_field_start;
1250
- break;
1251
- }
1252
-
1253
- switch (header_state) {
1383
+ switch (parser->header_state) {
1254
1384
  case h_upgrade:
1255
1385
  parser->flags |= F_UPGRADE;
1256
- header_state = h_general;
1386
+ parser->header_state = h_general;
1257
1387
  break;
1258
1388
 
1259
1389
  case h_transfer_encoding:
1260
1390
  /* looking for 'Transfer-Encoding: chunked' */
1261
1391
  if ('c' == c) {
1262
- header_state = h_matching_transfer_encoding_chunked;
1392
+ parser->header_state = h_matching_transfer_encoding_chunked;
1263
1393
  } else {
1264
- header_state = h_general;
1394
+ parser->header_state = h_general;
1265
1395
  }
1266
1396
  break;
1267
1397
 
1268
1398
  case h_content_length:
1269
- if (ch < '0' || ch > '9') goto error;
1399
+ if (UNLIKELY(!IS_NUM(ch))) {
1400
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1401
+ goto error;
1402
+ }
1403
+
1404
+ if (parser->flags & F_CONTENTLENGTH) {
1405
+ SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1406
+ goto error;
1407
+ }
1408
+
1409
+ parser->flags |= F_CONTENTLENGTH;
1270
1410
  parser->content_length = ch - '0';
1411
+ parser->header_state = h_content_length_num;
1271
1412
  break;
1272
1413
 
1273
1414
  case h_connection:
1274
1415
  /* looking for 'Connection: keep-alive' */
1275
1416
  if (c == 'k') {
1276
- header_state = h_matching_connection_keep_alive;
1417
+ parser->header_state = h_matching_connection_keep_alive;
1277
1418
  /* looking for 'Connection: close' */
1278
1419
  } else if (c == 'c') {
1279
- header_state = h_matching_connection_close;
1420
+ parser->header_state = h_matching_connection_close;
1421
+ } else if (c == 'u') {
1422
+ parser->header_state = h_matching_connection_upgrade;
1280
1423
  } else {
1281
- header_state = h_general;
1424
+ parser->header_state = h_matching_connection_token;
1282
1425
  }
1283
1426
  break;
1284
1427
 
1428
+ /* Multi-value `Connection` header */
1429
+ case h_matching_connection_token_start:
1430
+ break;
1431
+
1285
1432
  default:
1286
- header_state = h_general;
1433
+ parser->header_state = h_general;
1287
1434
  break;
1288
1435
  }
1289
1436
  break;
@@ -1291,89 +1438,229 @@ size_t http_parser_execute (http_parser *parser,
1291
1438
 
1292
1439
  case s_header_value:
1293
1440
  {
1294
- c = LOWER(ch);
1295
-
1296
- if (ch == CR) {
1297
- CALLBACK(header_value);
1298
- state = s_header_almost_done;
1299
- break;
1300
- }
1441
+ const char* start = p;
1442
+ enum header_states h_state = (enum header_states) parser->header_state;
1443
+ for (; p != data + len; p++) {
1444
+ ch = *p;
1445
+ if (ch == CR) {
1446
+ UPDATE_STATE(s_header_almost_done);
1447
+ parser->header_state = h_state;
1448
+ CALLBACK_DATA(header_value);
1449
+ break;
1450
+ }
1301
1451
 
1302
- if (ch == LF) {
1303
- CALLBACK(header_value);
1304
- goto header_almost_done;
1305
- }
1452
+ if (ch == LF) {
1453
+ UPDATE_STATE(s_header_almost_done);
1454
+ COUNT_HEADER_SIZE(p - start);
1455
+ parser->header_state = h_state;
1456
+ CALLBACK_DATA_NOADVANCE(header_value);
1457
+ REEXECUTE();
1458
+ }
1306
1459
 
1307
- switch (header_state) {
1308
- case h_general:
1309
- break;
1460
+ if (!lenient && !IS_HEADER_CHAR(ch)) {
1461
+ SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1462
+ goto error;
1463
+ }
1310
1464
 
1311
- case h_connection:
1312
- case h_transfer_encoding:
1313
- assert(0 && "Shouldn't get here.");
1314
- break;
1465
+ c = LOWER(ch);
1315
1466
 
1316
- case h_content_length:
1317
- if (ch == ' ') break;
1318
- if (ch < '0' || ch > '9') goto error;
1319
- parser->content_length *= 10;
1320
- parser->content_length += ch - '0';
1321
- break;
1467
+ switch (h_state) {
1468
+ case h_general:
1469
+ {
1470
+ const char* p_cr;
1471
+ const char* p_lf;
1472
+ size_t limit = data + len - p;
1473
+
1474
+ limit = MIN(limit, HTTP_MAX_HEADER_SIZE);
1475
+
1476
+ p_cr = (const char*) memchr(p, CR, limit);
1477
+ p_lf = (const char*) memchr(p, LF, limit);
1478
+ if (p_cr != NULL) {
1479
+ if (p_lf != NULL && p_cr >= p_lf)
1480
+ p = p_lf;
1481
+ else
1482
+ p = p_cr;
1483
+ } else if (UNLIKELY(p_lf != NULL)) {
1484
+ p = p_lf;
1485
+ } else {
1486
+ p = data + len;
1487
+ }
1488
+ --p;
1322
1489
 
1323
- /* Transfer-Encoding: chunked */
1324
- case h_matching_transfer_encoding_chunked:
1325
- index++;
1326
- if (index > sizeof(CHUNKED)-1
1327
- || c != CHUNKED[index]) {
1328
- header_state = h_general;
1329
- } else if (index == sizeof(CHUNKED)-2) {
1330
- header_state = h_transfer_encoding_chunked;
1490
+ break;
1331
1491
  }
1332
- break;
1333
1492
 
1334
- /* looking for 'Connection: keep-alive' */
1335
- case h_matching_connection_keep_alive:
1336
- index++;
1337
- if (index > sizeof(KEEP_ALIVE)-1
1338
- || c != KEEP_ALIVE[index]) {
1339
- header_state = h_general;
1340
- } else if (index == sizeof(KEEP_ALIVE)-2) {
1341
- header_state = h_connection_keep_alive;
1342
- }
1343
- break;
1493
+ case h_connection:
1494
+ case h_transfer_encoding:
1495
+ assert(0 && "Shouldn't get here.");
1496
+ break;
1497
+
1498
+ case h_content_length:
1499
+ if (ch == ' ') break;
1500
+ h_state = h_content_length_num;
1501
+ /* FALLTHROUGH */
1344
1502
 
1345
- /* looking for 'Connection: close' */
1346
- case h_matching_connection_close:
1347
- index++;
1348
- if (index > sizeof(CLOSE)-1 || c != CLOSE[index]) {
1349
- header_state = h_general;
1350
- } else if (index == sizeof(CLOSE)-2) {
1351
- header_state = h_connection_close;
1503
+ case h_content_length_num:
1504
+ {
1505
+ uint64_t t;
1506
+
1507
+ if (ch == ' ') {
1508
+ h_state = h_content_length_ws;
1509
+ break;
1510
+ }
1511
+
1512
+ if (UNLIKELY(!IS_NUM(ch))) {
1513
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1514
+ parser->header_state = h_state;
1515
+ goto error;
1516
+ }
1517
+
1518
+ t = parser->content_length;
1519
+ t *= 10;
1520
+ t += ch - '0';
1521
+
1522
+ /* Overflow? Test against a conservative limit for simplicity. */
1523
+ if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) {
1524
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1525
+ parser->header_state = h_state;
1526
+ goto error;
1527
+ }
1528
+
1529
+ parser->content_length = t;
1530
+ break;
1352
1531
  }
1353
- break;
1354
1532
 
1355
- case h_transfer_encoding_chunked:
1356
- case h_connection_keep_alive:
1357
- case h_connection_close:
1358
- if (ch != ' ') header_state = h_general;
1359
- break;
1533
+ case h_content_length_ws:
1534
+ if (ch == ' ') break;
1535
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1536
+ parser->header_state = h_state;
1537
+ goto error;
1360
1538
 
1361
- default:
1362
- state = s_header_value;
1363
- header_state = h_general;
1364
- break;
1539
+ /* Transfer-Encoding: chunked */
1540
+ case h_matching_transfer_encoding_chunked:
1541
+ parser->index++;
1542
+ if (parser->index > sizeof(CHUNKED)-1
1543
+ || c != CHUNKED[parser->index]) {
1544
+ h_state = h_general;
1545
+ } else if (parser->index == sizeof(CHUNKED)-2) {
1546
+ h_state = h_transfer_encoding_chunked;
1547
+ }
1548
+ break;
1549
+
1550
+ case h_matching_connection_token_start:
1551
+ /* looking for 'Connection: keep-alive' */
1552
+ if (c == 'k') {
1553
+ h_state = h_matching_connection_keep_alive;
1554
+ /* looking for 'Connection: close' */
1555
+ } else if (c == 'c') {
1556
+ h_state = h_matching_connection_close;
1557
+ } else if (c == 'u') {
1558
+ h_state = h_matching_connection_upgrade;
1559
+ } else if (STRICT_TOKEN(c)) {
1560
+ h_state = h_matching_connection_token;
1561
+ } else if (c == ' ' || c == '\t') {
1562
+ /* Skip lws */
1563
+ } else {
1564
+ h_state = h_general;
1565
+ }
1566
+ break;
1567
+
1568
+ /* looking for 'Connection: keep-alive' */
1569
+ case h_matching_connection_keep_alive:
1570
+ parser->index++;
1571
+ if (parser->index > sizeof(KEEP_ALIVE)-1
1572
+ || c != KEEP_ALIVE[parser->index]) {
1573
+ h_state = h_matching_connection_token;
1574
+ } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1575
+ h_state = h_connection_keep_alive;
1576
+ }
1577
+ break;
1578
+
1579
+ /* looking for 'Connection: close' */
1580
+ case h_matching_connection_close:
1581
+ parser->index++;
1582
+ if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1583
+ h_state = h_matching_connection_token;
1584
+ } else if (parser->index == sizeof(CLOSE)-2) {
1585
+ h_state = h_connection_close;
1586
+ }
1587
+ break;
1588
+
1589
+ /* looking for 'Connection: upgrade' */
1590
+ case h_matching_connection_upgrade:
1591
+ parser->index++;
1592
+ if (parser->index > sizeof(UPGRADE) - 1 ||
1593
+ c != UPGRADE[parser->index]) {
1594
+ h_state = h_matching_connection_token;
1595
+ } else if (parser->index == sizeof(UPGRADE)-2) {
1596
+ h_state = h_connection_upgrade;
1597
+ }
1598
+ break;
1599
+
1600
+ case h_matching_connection_token:
1601
+ if (ch == ',') {
1602
+ h_state = h_matching_connection_token_start;
1603
+ parser->index = 0;
1604
+ }
1605
+ break;
1606
+
1607
+ case h_transfer_encoding_chunked:
1608
+ if (ch != ' ') h_state = h_general;
1609
+ break;
1610
+
1611
+ case h_connection_keep_alive:
1612
+ case h_connection_close:
1613
+ case h_connection_upgrade:
1614
+ if (ch == ',') {
1615
+ if (h_state == h_connection_keep_alive) {
1616
+ parser->flags |= F_CONNECTION_KEEP_ALIVE;
1617
+ } else if (h_state == h_connection_close) {
1618
+ parser->flags |= F_CONNECTION_CLOSE;
1619
+ } else if (h_state == h_connection_upgrade) {
1620
+ parser->flags |= F_CONNECTION_UPGRADE;
1621
+ }
1622
+ h_state = h_matching_connection_token_start;
1623
+ parser->index = 0;
1624
+ } else if (ch != ' ') {
1625
+ h_state = h_matching_connection_token;
1626
+ }
1627
+ break;
1628
+
1629
+ default:
1630
+ UPDATE_STATE(s_header_value);
1631
+ h_state = h_general;
1632
+ break;
1633
+ }
1365
1634
  }
1635
+ parser->header_state = h_state;
1636
+
1637
+ COUNT_HEADER_SIZE(p - start);
1638
+
1639
+ if (p == data + len)
1640
+ --p;
1366
1641
  break;
1367
1642
  }
1368
1643
 
1369
1644
  case s_header_almost_done:
1370
- header_almost_done:
1371
1645
  {
1372
- STRICT_CHECK(ch != LF);
1646
+ if (UNLIKELY(ch != LF)) {
1647
+ SET_ERRNO(HPE_LF_EXPECTED);
1648
+ goto error;
1649
+ }
1650
+
1651
+ UPDATE_STATE(s_header_value_lws);
1652
+ break;
1653
+ }
1373
1654
 
1374
- state = s_header_field_start;
1655
+ case s_header_value_lws:
1656
+ {
1657
+ if (ch == ' ' || ch == '\t') {
1658
+ UPDATE_STATE(s_header_value_start);
1659
+ REEXECUTE();
1660
+ }
1375
1661
 
1376
- switch (header_state) {
1662
+ /* finished the header */
1663
+ switch (parser->header_state) {
1377
1664
  case h_connection_keep_alive:
1378
1665
  parser->flags |= F_CONNECTION_KEEP_ALIVE;
1379
1666
  break;
@@ -1383,28 +1670,87 @@ size_t http_parser_execute (http_parser *parser,
1383
1670
  case h_transfer_encoding_chunked:
1384
1671
  parser->flags |= F_CHUNKED;
1385
1672
  break;
1673
+ case h_connection_upgrade:
1674
+ parser->flags |= F_CONNECTION_UPGRADE;
1675
+ break;
1386
1676
  default:
1387
1677
  break;
1388
1678
  }
1679
+
1680
+ UPDATE_STATE(s_header_field_start);
1681
+ REEXECUTE();
1682
+ }
1683
+
1684
+ case s_header_value_discard_ws_almost_done:
1685
+ {
1686
+ STRICT_CHECK(ch != LF);
1687
+ UPDATE_STATE(s_header_value_discard_lws);
1389
1688
  break;
1390
1689
  }
1391
1690
 
1691
+ case s_header_value_discard_lws:
1692
+ {
1693
+ if (ch == ' ' || ch == '\t') {
1694
+ UPDATE_STATE(s_header_value_discard_ws);
1695
+ break;
1696
+ } else {
1697
+ switch (parser->header_state) {
1698
+ case h_connection_keep_alive:
1699
+ parser->flags |= F_CONNECTION_KEEP_ALIVE;
1700
+ break;
1701
+ case h_connection_close:
1702
+ parser->flags |= F_CONNECTION_CLOSE;
1703
+ break;
1704
+ case h_connection_upgrade:
1705
+ parser->flags |= F_CONNECTION_UPGRADE;
1706
+ break;
1707
+ case h_transfer_encoding_chunked:
1708
+ parser->flags |= F_CHUNKED;
1709
+ break;
1710
+ default:
1711
+ break;
1712
+ }
1713
+
1714
+ /* header value was empty */
1715
+ MARK(header_value);
1716
+ UPDATE_STATE(s_header_field_start);
1717
+ CALLBACK_DATA_NOADVANCE(header_value);
1718
+ REEXECUTE();
1719
+ }
1720
+ }
1721
+
1392
1722
  case s_headers_almost_done:
1393
- headers_almost_done:
1394
1723
  {
1395
1724
  STRICT_CHECK(ch != LF);
1396
1725
 
1397
1726
  if (parser->flags & F_TRAILING) {
1398
1727
  /* End of a chunked request */
1399
- CALLBACK2(message_complete);
1400
- state = NEW_MESSAGE();
1401
- break;
1728
+ UPDATE_STATE(s_message_done);
1729
+ CALLBACK_NOTIFY_NOADVANCE(chunk_complete);
1730
+ REEXECUTE();
1402
1731
  }
1403
1732
 
1404
- nread = 0;
1733
+ /* Cannot use chunked encoding and a content-length header together
1734
+ per the HTTP specification. */
1735
+ if ((parser->flags & F_CHUNKED) &&
1736
+ (parser->flags & F_CONTENTLENGTH)) {
1737
+ SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1738
+ goto error;
1739
+ }
1405
1740
 
1406
- if (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT) {
1407
- parser->upgrade = 1;
1741
+ UPDATE_STATE(s_headers_done);
1742
+
1743
+ /* Set this here so that on_headers_complete() callbacks can see it */
1744
+ if ((parser->flags & F_UPGRADE) &&
1745
+ (parser->flags & F_CONNECTION_UPGRADE)) {
1746
+ /* For responses, "Upgrade: foo" and "Connection: upgrade" are
1747
+ * mandatory only when it is a 101 Switching Protocols response,
1748
+ * otherwise it is purely informational, to announce support.
1749
+ */
1750
+ parser->upgrade =
1751
+ (parser->type == HTTP_REQUEST || parser->status_code == 101);
1752
+ } else {
1753
+ parser->upgrade = (parser->method == HTTP_CONNECT);
1408
1754
  }
1409
1755
 
1410
1756
  /* Here we call the headers_complete callback. This is somewhat
@@ -1412,50 +1758,75 @@ size_t http_parser_execute (http_parser *parser,
1412
1758
  * will interpret that as saying that this message has no body. This
1413
1759
  * is needed for the annoying case of recieving a response to a HEAD
1414
1760
  * request.
1761
+ *
1762
+ * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1763
+ * we have to simulate it by handling a change in errno below.
1415
1764
  */
1416
1765
  if (settings->on_headers_complete) {
1417
1766
  switch (settings->on_headers_complete(parser)) {
1418
1767
  case 0:
1419
1768
  break;
1420
1769
 
1770
+ case 2:
1771
+ parser->upgrade = 1;
1772
+
1773
+ /* FALLTHROUGH */
1421
1774
  case 1:
1422
1775
  parser->flags |= F_SKIPBODY;
1423
1776
  break;
1424
1777
 
1425
1778
  default:
1426
- parser->state = state;
1427
- return p - data; /* Error */
1779
+ SET_ERRNO(HPE_CB_headers_complete);
1780
+ RETURN(p - data); /* Error */
1428
1781
  }
1429
1782
  }
1430
1783
 
1431
- /* Exit, the rest of the connect is in a different protocol. */
1432
- if (parser->upgrade) {
1433
- CALLBACK2(message_complete);
1434
- return (p - data);
1784
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1785
+ RETURN(p - data);
1786
+ }
1787
+
1788
+ REEXECUTE();
1789
+ }
1790
+
1791
+ case s_headers_done:
1792
+ {
1793
+ int hasBody;
1794
+ STRICT_CHECK(ch != LF);
1795
+
1796
+ parser->nread = 0;
1797
+
1798
+ hasBody = parser->flags & F_CHUNKED ||
1799
+ (parser->content_length > 0 && parser->content_length != ULLONG_MAX);
1800
+ if (parser->upgrade && (parser->method == HTTP_CONNECT ||
1801
+ (parser->flags & F_SKIPBODY) || !hasBody)) {
1802
+ /* Exit, the rest of the message is in a different protocol. */
1803
+ UPDATE_STATE(NEW_MESSAGE());
1804
+ CALLBACK_NOTIFY(message_complete);
1805
+ RETURN((p - data) + 1);
1435
1806
  }
1436
1807
 
1437
1808
  if (parser->flags & F_SKIPBODY) {
1438
- CALLBACK2(message_complete);
1439
- state = NEW_MESSAGE();
1809
+ UPDATE_STATE(NEW_MESSAGE());
1810
+ CALLBACK_NOTIFY(message_complete);
1440
1811
  } else if (parser->flags & F_CHUNKED) {
1441
1812
  /* chunked encoding - ignore Content-Length header */
1442
- state = s_chunk_size_start;
1813
+ UPDATE_STATE(s_chunk_size_start);
1443
1814
  } else {
1444
1815
  if (parser->content_length == 0) {
1445
1816
  /* Content-Length header given but zero: Content-Length: 0\r\n */
1446
- CALLBACK2(message_complete);
1447
- state = NEW_MESSAGE();
1448
- } else if (parser->content_length > 0) {
1817
+ UPDATE_STATE(NEW_MESSAGE());
1818
+ CALLBACK_NOTIFY(message_complete);
1819
+ } else if (parser->content_length != ULLONG_MAX) {
1449
1820
  /* Content-Length header given and non-zero */
1450
- state = s_body_identity;
1821
+ UPDATE_STATE(s_body_identity);
1451
1822
  } else {
1452
- if (parser->type == HTTP_REQUEST || http_should_keep_alive(parser)) {
1823
+ if (!http_message_needs_eof(parser)) {
1453
1824
  /* Assume content-length 0 - read the next */
1454
- CALLBACK2(message_complete);
1455
- state = NEW_MESSAGE();
1825
+ UPDATE_STATE(NEW_MESSAGE());
1826
+ CALLBACK_NOTIFY(message_complete);
1456
1827
  } else {
1457
1828
  /* Read body until EOF */
1458
- state = s_body_identity_eof;
1829
+ UPDATE_STATE(s_body_identity_eof);
1459
1830
  }
1460
1831
  }
1461
1832
  }
@@ -1464,60 +1835,107 @@ size_t http_parser_execute (http_parser *parser,
1464
1835
  }
1465
1836
 
1466
1837
  case s_body_identity:
1467
- to_read = MIN(pe - p, (int64_t)parser->content_length);
1468
- if (to_read > 0) {
1469
- if (settings->on_body) settings->on_body(parser, p, to_read);
1470
- p += to_read - 1;
1471
- parser->content_length -= to_read;
1472
- if (parser->content_length == 0) {
1473
- CALLBACK2(message_complete);
1474
- state = NEW_MESSAGE();
1475
- }
1838
+ {
1839
+ uint64_t to_read = MIN(parser->content_length,
1840
+ (uint64_t) ((data + len) - p));
1841
+
1842
+ assert(parser->content_length != 0
1843
+ && parser->content_length != ULLONG_MAX);
1844
+
1845
+ /* The difference between advancing content_length and p is because
1846
+ * the latter will automaticaly advance on the next loop iteration.
1847
+ * Further, if content_length ends up at 0, we want to see the last
1848
+ * byte again for our message complete callback.
1849
+ */
1850
+ MARK(body);
1851
+ parser->content_length -= to_read;
1852
+ p += to_read - 1;
1853
+
1854
+ if (parser->content_length == 0) {
1855
+ UPDATE_STATE(s_message_done);
1856
+
1857
+ /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1858
+ *
1859
+ * The alternative to doing this is to wait for the next byte to
1860
+ * trigger the data callback, just as in every other case. The
1861
+ * problem with this is that this makes it difficult for the test
1862
+ * harness to distinguish between complete-on-EOF and
1863
+ * complete-on-length. It's not clear that this distinction is
1864
+ * important for applications, but let's keep it for now.
1865
+ */
1866
+ CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1867
+ REEXECUTE();
1476
1868
  }
1869
+
1477
1870
  break;
1871
+ }
1478
1872
 
1479
1873
  /* read until EOF */
1480
1874
  case s_body_identity_eof:
1481
- to_read = pe - p;
1482
- if (to_read > 0) {
1483
- if (settings->on_body) settings->on_body(parser, p, to_read);
1484
- p += to_read - 1;
1875
+ MARK(body);
1876
+ p = data + len - 1;
1877
+
1878
+ break;
1879
+
1880
+ case s_message_done:
1881
+ UPDATE_STATE(NEW_MESSAGE());
1882
+ CALLBACK_NOTIFY(message_complete);
1883
+ if (parser->upgrade) {
1884
+ /* Exit, the rest of the message is in a different protocol. */
1885
+ RETURN((p - data) + 1);
1485
1886
  }
1486
1887
  break;
1487
1888
 
1488
1889
  case s_chunk_size_start:
1489
1890
  {
1490
- assert(nread == 1);
1891
+ assert(parser->nread == 1);
1491
1892
  assert(parser->flags & F_CHUNKED);
1492
1893
 
1493
- c = unhex[(unsigned char)ch];
1494
- if (c == -1) goto error;
1495
- parser->content_length = c;
1496
- state = s_chunk_size;
1894
+ unhex_val = unhex[(unsigned char)ch];
1895
+ if (UNLIKELY(unhex_val == -1)) {
1896
+ SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1897
+ goto error;
1898
+ }
1899
+
1900
+ parser->content_length = unhex_val;
1901
+ UPDATE_STATE(s_chunk_size);
1497
1902
  break;
1498
1903
  }
1499
1904
 
1500
1905
  case s_chunk_size:
1501
1906
  {
1907
+ uint64_t t;
1908
+
1502
1909
  assert(parser->flags & F_CHUNKED);
1503
1910
 
1504
1911
  if (ch == CR) {
1505
- state = s_chunk_size_almost_done;
1912
+ UPDATE_STATE(s_chunk_size_almost_done);
1506
1913
  break;
1507
1914
  }
1508
1915
 
1509
- c = unhex[(unsigned char)ch];
1916
+ unhex_val = unhex[(unsigned char)ch];
1510
1917
 
1511
- if (c == -1) {
1918
+ if (unhex_val == -1) {
1512
1919
  if (ch == ';' || ch == ' ') {
1513
- state = s_chunk_parameters;
1920
+ UPDATE_STATE(s_chunk_parameters);
1514
1921
  break;
1515
1922
  }
1923
+
1924
+ SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1516
1925
  goto error;
1517
1926
  }
1518
1927
 
1519
- parser->content_length *= 16;
1520
- parser->content_length += c;
1928
+ t = parser->content_length;
1929
+ t *= 16;
1930
+ t += unhex_val;
1931
+
1932
+ /* Overflow? Test against a conservative limit for simplicity. */
1933
+ if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) {
1934
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1935
+ goto error;
1936
+ }
1937
+
1938
+ parser->content_length = t;
1521
1939
  break;
1522
1940
  }
1523
1941
 
@@ -1526,7 +1944,7 @@ size_t http_parser_execute (http_parser *parser,
1526
1944
  assert(parser->flags & F_CHUNKED);
1527
1945
  /* just ignore this shit. TODO check for overflow */
1528
1946
  if (ch == CR) {
1529
- state = s_chunk_size_almost_done;
1947
+ UPDATE_STATE(s_chunk_size_almost_done);
1530
1948
  break;
1531
1949
  }
1532
1950
  break;
@@ -1537,108 +1955,486 @@ size_t http_parser_execute (http_parser *parser,
1537
1955
  assert(parser->flags & F_CHUNKED);
1538
1956
  STRICT_CHECK(ch != LF);
1539
1957
 
1540
- nread = 0;
1958
+ parser->nread = 0;
1541
1959
 
1542
1960
  if (parser->content_length == 0) {
1543
1961
  parser->flags |= F_TRAILING;
1544
- state = s_header_field_start;
1962
+ UPDATE_STATE(s_header_field_start);
1545
1963
  } else {
1546
- state = s_chunk_data;
1964
+ UPDATE_STATE(s_chunk_data);
1547
1965
  }
1966
+ CALLBACK_NOTIFY(chunk_header);
1548
1967
  break;
1549
1968
  }
1550
1969
 
1551
1970
  case s_chunk_data:
1552
1971
  {
1553
- assert(parser->flags & F_CHUNKED);
1972
+ uint64_t to_read = MIN(parser->content_length,
1973
+ (uint64_t) ((data + len) - p));
1554
1974
 
1555
- to_read = MIN(pe - p, (int64_t)(parser->content_length));
1975
+ assert(parser->flags & F_CHUNKED);
1976
+ assert(parser->content_length != 0
1977
+ && parser->content_length != ULLONG_MAX);
1556
1978
 
1557
- if (to_read > 0) {
1558
- if (settings->on_body) settings->on_body(parser, p, to_read);
1559
- p += to_read - 1;
1560
- }
1979
+ /* See the explanation in s_body_identity for why the content
1980
+ * length and data pointers are managed this way.
1981
+ */
1982
+ MARK(body);
1983
+ parser->content_length -= to_read;
1984
+ p += to_read - 1;
1561
1985
 
1562
- if (to_read == parser->content_length) {
1563
- state = s_chunk_data_almost_done;
1986
+ if (parser->content_length == 0) {
1987
+ UPDATE_STATE(s_chunk_data_almost_done);
1564
1988
  }
1565
1989
 
1566
- parser->content_length -= to_read;
1567
1990
  break;
1568
1991
  }
1569
1992
 
1570
1993
  case s_chunk_data_almost_done:
1571
1994
  assert(parser->flags & F_CHUNKED);
1995
+ assert(parser->content_length == 0);
1572
1996
  STRICT_CHECK(ch != CR);
1573
- state = s_chunk_data_done;
1997
+ UPDATE_STATE(s_chunk_data_done);
1998
+ CALLBACK_DATA(body);
1574
1999
  break;
1575
2000
 
1576
2001
  case s_chunk_data_done:
1577
2002
  assert(parser->flags & F_CHUNKED);
1578
2003
  STRICT_CHECK(ch != LF);
1579
- state = s_chunk_size_start;
2004
+ parser->nread = 0;
2005
+ UPDATE_STATE(s_chunk_size_start);
2006
+ CALLBACK_NOTIFY(chunk_complete);
1580
2007
  break;
1581
2008
 
1582
2009
  default:
1583
2010
  assert(0 && "unhandled state");
2011
+ SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
1584
2012
  goto error;
1585
2013
  }
1586
2014
  }
1587
2015
 
1588
- CALLBACK_NOCLEAR(header_field);
1589
- CALLBACK_NOCLEAR(header_value);
1590
- CALLBACK_NOCLEAR(fragment);
1591
- CALLBACK_NOCLEAR(query_string);
1592
- CALLBACK_NOCLEAR(path);
1593
- CALLBACK_NOCLEAR(url);
2016
+ /* Run callbacks for any marks that we have leftover after we ran our of
2017
+ * bytes. There should be at most one of these set, so it's OK to invoke
2018
+ * them in series (unset marks will not result in callbacks).
2019
+ *
2020
+ * We use the NOADVANCE() variety of callbacks here because 'p' has already
2021
+ * overflowed 'data' and this allows us to correct for the off-by-one that
2022
+ * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
2023
+ * value that's in-bounds).
2024
+ */
2025
+
2026
+ assert(((header_field_mark ? 1 : 0) +
2027
+ (header_value_mark ? 1 : 0) +
2028
+ (url_mark ? 1 : 0) +
2029
+ (body_mark ? 1 : 0) +
2030
+ (status_mark ? 1 : 0)) <= 1);
1594
2031
 
1595
- parser->state = state;
1596
- parser->header_state = header_state;
1597
- parser->index = index;
1598
- parser->nread = nread;
2032
+ CALLBACK_DATA_NOADVANCE(header_field);
2033
+ CALLBACK_DATA_NOADVANCE(header_value);
2034
+ CALLBACK_DATA_NOADVANCE(url);
2035
+ CALLBACK_DATA_NOADVANCE(body);
2036
+ CALLBACK_DATA_NOADVANCE(status);
1599
2037
 
1600
- return len;
2038
+ RETURN(len);
1601
2039
 
1602
2040
  error:
1603
- parser->state = s_dead;
1604
- return (p - data);
2041
+ if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
2042
+ SET_ERRNO(HPE_UNKNOWN);
2043
+ }
2044
+
2045
+ RETURN(p - data);
2046
+ }
2047
+
2048
+
2049
+ /* Does the parser need to see an EOF to find the end of the message? */
2050
+ int
2051
+ http_message_needs_eof (const http_parser *parser)
2052
+ {
2053
+ if (parser->type == HTTP_REQUEST) {
2054
+ return 0;
2055
+ }
2056
+
2057
+ /* See RFC 2616 section 4.4 */
2058
+ if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
2059
+ parser->status_code == 204 || /* No Content */
2060
+ parser->status_code == 304 || /* Not Modified */
2061
+ parser->flags & F_SKIPBODY) { /* response to a HEAD request */
2062
+ return 0;
2063
+ }
2064
+
2065
+ if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
2066
+ return 0;
2067
+ }
2068
+
2069
+ return 1;
1605
2070
  }
1606
2071
 
1607
2072
 
1608
2073
  int
1609
- http_should_keep_alive (http_parser *parser)
2074
+ http_should_keep_alive (const http_parser *parser)
1610
2075
  {
1611
2076
  if (parser->http_major > 0 && parser->http_minor > 0) {
1612
2077
  /* HTTP/1.1 */
1613
2078
  if (parser->flags & F_CONNECTION_CLOSE) {
1614
2079
  return 0;
1615
- } else {
1616
- return 1;
1617
2080
  }
1618
2081
  } else {
1619
2082
  /* HTTP/1.0 or earlier */
1620
- if (parser->flags & F_CONNECTION_KEEP_ALIVE) {
1621
- return 1;
1622
- } else {
2083
+ if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
1623
2084
  return 0;
1624
2085
  }
1625
2086
  }
2087
+
2088
+ return !http_message_needs_eof(parser);
1626
2089
  }
1627
2090
 
1628
2091
 
1629
- const char * http_method_str (enum http_method m)
2092
+ const char *
2093
+ http_method_str (enum http_method m)
1630
2094
  {
1631
- return method_strings[m];
2095
+ return ELEM_AT(method_strings, m, "<unknown>");
1632
2096
  }
1633
2097
 
1634
2098
 
1635
2099
  void
1636
2100
  http_parser_init (http_parser *parser, enum http_parser_type t)
1637
2101
  {
2102
+ void *data = parser->data; /* preserve application data */
2103
+ memset(parser, 0, sizeof(*parser));
2104
+ parser->data = data;
1638
2105
  parser->type = t;
1639
2106
  parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
1640
- parser->nread = 0;
1641
- parser->upgrade = 0;
1642
- parser->flags = 0;
1643
- parser->method = 0;
2107
+ parser->http_errno = HPE_OK;
2108
+ }
2109
+
2110
+ void
2111
+ http_parser_settings_init(http_parser_settings *settings)
2112
+ {
2113
+ memset(settings, 0, sizeof(*settings));
2114
+ }
2115
+
2116
+ const char *
2117
+ http_errno_name(enum http_errno err) {
2118
+ assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2119
+ return http_strerror_tab[err].name;
2120
+ }
2121
+
2122
+ const char *
2123
+ http_errno_description(enum http_errno err) {
2124
+ assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2125
+ return http_strerror_tab[err].description;
2126
+ }
2127
+
2128
+ static enum http_host_state
2129
+ http_parse_host_char(enum http_host_state s, const char ch) {
2130
+ switch(s) {
2131
+ case s_http_userinfo:
2132
+ case s_http_userinfo_start:
2133
+ if (ch == '@') {
2134
+ return s_http_host_start;
2135
+ }
2136
+
2137
+ if (IS_USERINFO_CHAR(ch)) {
2138
+ return s_http_userinfo;
2139
+ }
2140
+ break;
2141
+
2142
+ case s_http_host_start:
2143
+ if (ch == '[') {
2144
+ return s_http_host_v6_start;
2145
+ }
2146
+
2147
+ if (IS_HOST_CHAR(ch)) {
2148
+ return s_http_host;
2149
+ }
2150
+
2151
+ break;
2152
+
2153
+ case s_http_host:
2154
+ if (IS_HOST_CHAR(ch)) {
2155
+ return s_http_host;
2156
+ }
2157
+
2158
+ /* FALLTHROUGH */
2159
+ case s_http_host_v6_end:
2160
+ if (ch == ':') {
2161
+ return s_http_host_port_start;
2162
+ }
2163
+
2164
+ break;
2165
+
2166
+ case s_http_host_v6:
2167
+ if (ch == ']') {
2168
+ return s_http_host_v6_end;
2169
+ }
2170
+
2171
+ /* FALLTHROUGH */
2172
+ case s_http_host_v6_start:
2173
+ if (IS_HEX(ch) || ch == ':' || ch == '.') {
2174
+ return s_http_host_v6;
2175
+ }
2176
+
2177
+ if (s == s_http_host_v6 && ch == '%') {
2178
+ return s_http_host_v6_zone_start;
2179
+ }
2180
+ break;
2181
+
2182
+ case s_http_host_v6_zone:
2183
+ if (ch == ']') {
2184
+ return s_http_host_v6_end;
2185
+ }
2186
+
2187
+ /* FALLTHROUGH */
2188
+ case s_http_host_v6_zone_start:
2189
+ /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */
2190
+ if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' ||
2191
+ ch == '~') {
2192
+ return s_http_host_v6_zone;
2193
+ }
2194
+ break;
2195
+
2196
+ case s_http_host_port:
2197
+ case s_http_host_port_start:
2198
+ if (IS_NUM(ch)) {
2199
+ return s_http_host_port;
2200
+ }
2201
+
2202
+ break;
2203
+
2204
+ default:
2205
+ break;
2206
+ }
2207
+ return s_http_host_dead;
2208
+ }
2209
+
2210
+ static int
2211
+ http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2212
+ enum http_host_state s;
2213
+
2214
+ const char *p;
2215
+ size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
2216
+
2217
+ assert(u->field_set & (1 << UF_HOST));
2218
+
2219
+ u->field_data[UF_HOST].len = 0;
2220
+
2221
+ s = found_at ? s_http_userinfo_start : s_http_host_start;
2222
+
2223
+ for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2224
+ enum http_host_state new_s = http_parse_host_char(s, *p);
2225
+
2226
+ if (new_s == s_http_host_dead) {
2227
+ return 1;
2228
+ }
2229
+
2230
+ switch(new_s) {
2231
+ case s_http_host:
2232
+ if (s != s_http_host) {
2233
+ u->field_data[UF_HOST].off = p - buf;
2234
+ }
2235
+ u->field_data[UF_HOST].len++;
2236
+ break;
2237
+
2238
+ case s_http_host_v6:
2239
+ if (s != s_http_host_v6) {
2240
+ u->field_data[UF_HOST].off = p - buf;
2241
+ }
2242
+ u->field_data[UF_HOST].len++;
2243
+ break;
2244
+
2245
+ case s_http_host_v6_zone_start:
2246
+ case s_http_host_v6_zone:
2247
+ u->field_data[UF_HOST].len++;
2248
+ break;
2249
+
2250
+ case s_http_host_port:
2251
+ if (s != s_http_host_port) {
2252
+ u->field_data[UF_PORT].off = p - buf;
2253
+ u->field_data[UF_PORT].len = 0;
2254
+ u->field_set |= (1 << UF_PORT);
2255
+ }
2256
+ u->field_data[UF_PORT].len++;
2257
+ break;
2258
+
2259
+ case s_http_userinfo:
2260
+ if (s != s_http_userinfo) {
2261
+ u->field_data[UF_USERINFO].off = p - buf ;
2262
+ u->field_data[UF_USERINFO].len = 0;
2263
+ u->field_set |= (1 << UF_USERINFO);
2264
+ }
2265
+ u->field_data[UF_USERINFO].len++;
2266
+ break;
2267
+
2268
+ default:
2269
+ break;
2270
+ }
2271
+ s = new_s;
2272
+ }
2273
+
2274
+ /* Make sure we don't end somewhere unexpected */
2275
+ switch (s) {
2276
+ case s_http_host_start:
2277
+ case s_http_host_v6_start:
2278
+ case s_http_host_v6:
2279
+ case s_http_host_v6_zone_start:
2280
+ case s_http_host_v6_zone:
2281
+ case s_http_host_port_start:
2282
+ case s_http_userinfo:
2283
+ case s_http_userinfo_start:
2284
+ return 1;
2285
+ default:
2286
+ break;
2287
+ }
2288
+
2289
+ return 0;
2290
+ }
2291
+
2292
+ void
2293
+ http_parser_url_init(struct http_parser_url *u) {
2294
+ memset(u, 0, sizeof(*u));
2295
+ }
2296
+
2297
+ int
2298
+ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2299
+ struct http_parser_url *u)
2300
+ {
2301
+ enum state s;
2302
+ const char *p;
2303
+ enum http_parser_url_fields uf, old_uf;
2304
+ int found_at = 0;
2305
+
2306
+ u->port = u->field_set = 0;
2307
+ s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2308
+ old_uf = UF_MAX;
2309
+
2310
+ for (p = buf; p < buf + buflen; p++) {
2311
+ s = parse_url_char(s, *p);
2312
+
2313
+ /* Figure out the next field that we're operating on */
2314
+ switch (s) {
2315
+ case s_dead:
2316
+ return 1;
2317
+
2318
+ /* Skip delimeters */
2319
+ case s_req_schema_slash:
2320
+ case s_req_schema_slash_slash:
2321
+ case s_req_server_start:
2322
+ case s_req_query_string_start:
2323
+ case s_req_fragment_start:
2324
+ continue;
2325
+
2326
+ case s_req_schema:
2327
+ uf = UF_SCHEMA;
2328
+ break;
2329
+
2330
+ case s_req_server_with_at:
2331
+ found_at = 1;
2332
+
2333
+ /* FALLTHROUGH */
2334
+ case s_req_server:
2335
+ uf = UF_HOST;
2336
+ break;
2337
+
2338
+ case s_req_path:
2339
+ uf = UF_PATH;
2340
+ break;
2341
+
2342
+ case s_req_query_string:
2343
+ uf = UF_QUERY;
2344
+ break;
2345
+
2346
+ case s_req_fragment:
2347
+ uf = UF_FRAGMENT;
2348
+ break;
2349
+
2350
+ default:
2351
+ assert(!"Unexpected state");
2352
+ return 1;
2353
+ }
2354
+
2355
+ /* Nothing's changed; soldier on */
2356
+ if (uf == old_uf) {
2357
+ u->field_data[uf].len++;
2358
+ continue;
2359
+ }
2360
+
2361
+ u->field_data[uf].off = p - buf;
2362
+ u->field_data[uf].len = 1;
2363
+
2364
+ u->field_set |= (1 << uf);
2365
+ old_uf = uf;
2366
+ }
2367
+
2368
+ /* host must be present if there is a schema */
2369
+ /* parsing http:///toto will fail */
2370
+ if ((u->field_set & (1 << UF_SCHEMA)) &&
2371
+ (u->field_set & (1 << UF_HOST)) == 0) {
2372
+ return 1;
2373
+ }
2374
+
2375
+ if (u->field_set & (1 << UF_HOST)) {
2376
+ if (http_parse_host(buf, u, found_at) != 0) {
2377
+ return 1;
2378
+ }
2379
+ }
2380
+
2381
+ /* CONNECT requests can only contain "hostname:port" */
2382
+ if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2383
+ return 1;
2384
+ }
2385
+
2386
+ if (u->field_set & (1 << UF_PORT)) {
2387
+ uint16_t off;
2388
+ uint16_t len;
2389
+ const char* p;
2390
+ const char* end;
2391
+ unsigned long v;
2392
+
2393
+ off = u->field_data[UF_PORT].off;
2394
+ len = u->field_data[UF_PORT].len;
2395
+ end = buf + off + len;
2396
+
2397
+ /* NOTE: The characters are already validated and are in the [0-9] range */
2398
+ assert(off + len <= buflen && "Port number overflow");
2399
+ v = 0;
2400
+ for (p = buf + off; p < end; p++) {
2401
+ v *= 10;
2402
+ v += *p - '0';
2403
+
2404
+ /* Ports have a max value of 2^16 */
2405
+ if (v > 0xffff) {
2406
+ return 1;
2407
+ }
2408
+ }
2409
+
2410
+ u->port = (uint16_t) v;
2411
+ }
2412
+
2413
+ return 0;
2414
+ }
2415
+
2416
+ void
2417
+ http_parser_pause(http_parser *parser, int paused) {
2418
+ /* Users should only be pausing/unpausing a parser that is not in an error
2419
+ * state. In non-debug builds, there's not much that we can do about this
2420
+ * other than ignore it.
2421
+ */
2422
+ if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2423
+ HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2424
+ SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2425
+ } else {
2426
+ assert(0 && "Attempting to pause parser in error state");
2427
+ }
2428
+ }
2429
+
2430
+ int
2431
+ http_body_is_final(const struct http_parser *parser) {
2432
+ return parser->state == s_message_done;
2433
+ }
2434
+
2435
+ unsigned long
2436
+ http_parser_version(void) {
2437
+ return HTTP_PARSER_VERSION_MAJOR * 0x10000 |
2438
+ HTTP_PARSER_VERSION_MINOR * 0x00100 |
2439
+ HTTP_PARSER_VERSION_PATCH * 0x00001;
1644
2440
  }