http_parser.rb 0.5.2 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/linux.yml +23 -0
  3. data/.github/workflows/windows.yml +23 -0
  4. data/.gitignore +5 -4
  5. data/.gitmodules +4 -4
  6. data/Gemfile +1 -1
  7. data/README.md +52 -47
  8. data/Rakefile +1 -0
  9. data/bench/standalone.rb +23 -0
  10. data/bench/thin.rb +1 -0
  11. data/ext/ruby_http_parser/extconf.rb +1 -1
  12. data/ext/ruby_http_parser/org/ruby_http_parser/RubyHttpParser.java +139 -83
  13. data/ext/ruby_http_parser/ruby_http_parser.c +40 -41
  14. data/ext/ruby_http_parser/vendor/http-parser-java/AUTHORS +32 -0
  15. data/ext/ruby_http_parser/vendor/http-parser-java/LICENSE-MIT +5 -1
  16. data/ext/ruby_http_parser/vendor/http-parser-java/README.md +133 -1
  17. data/ext/ruby_http_parser/vendor/http-parser-java/TODO +6 -0
  18. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.c +1202 -671
  19. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.gyp +79 -0
  20. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.h +172 -51
  21. data/ext/ruby_http_parser/vendor/http-parser-java/src/Http-parser.java.iml +22 -0
  22. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/FieldData.java +41 -0
  23. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPMethod.java +8 -3
  24. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPParserUrl.java +76 -0
  25. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/ParserSettings.java +35 -102
  26. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/Util.java +6 -6
  27. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPParser.java +775 -682
  28. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/ParserSettings.java +8 -4
  29. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Message.java +70 -20
  30. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/ParseUrl.java +51 -0
  31. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Requests.java +1 -1
  32. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Responses.java +1 -0
  33. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Test.java +2 -1
  34. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestHeaderOverflowError.java +1 -0
  35. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestLoaderNG.java +6 -17
  36. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestNoOverflowLongBody.java +1 -0
  37. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/UnitTest.java +1 -0
  38. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Upgrade.java +1 -0
  39. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Url.java +127 -0
  40. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Util.java +80 -9
  41. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/WrongContentLength.java +2 -1
  42. data/ext/ruby_http_parser/vendor/http-parser-java/test.c +1637 -280
  43. data/ext/ruby_http_parser/vendor/http-parser-java/tests.dumped +230 -71
  44. data/ext/ruby_http_parser/vendor/http-parser/AUTHORS +68 -0
  45. data/ext/ruby_http_parser/vendor/http-parser/LICENSE-MIT +1 -1
  46. data/ext/ruby_http_parser/vendor/http-parser/README.md +113 -38
  47. data/ext/ruby_http_parser/vendor/http-parser/bench.c +128 -0
  48. data/ext/ruby_http_parser/vendor/http-parser/contrib/parsertrace.c +157 -0
  49. data/ext/ruby_http_parser/vendor/http-parser/contrib/url_parser.c +47 -0
  50. data/ext/ruby_http_parser/vendor/http-parser/http_parser.c +1576 -780
  51. data/ext/ruby_http_parser/vendor/http-parser/http_parser.gyp +111 -0
  52. data/ext/ruby_http_parser/vendor/http-parser/http_parser.h +308 -58
  53. data/ext/ruby_http_parser/vendor/http-parser/test.c +2964 -460
  54. data/http_parser.rb.gemspec +14 -7
  55. data/spec/parser_spec.rb +196 -102
  56. data/spec/support/requests.json +236 -24
  57. data/spec/support/responses.json +202 -36
  58. data/tasks/compile.rake +2 -2
  59. data/tasks/fixtures.rake +8 -2
  60. data/tasks/spec.rake +1 -1
  61. metadata +141 -134
  62. data/Gemfile.lock +0 -32
  63. data/ext/ruby_http_parser/vendor/http-parser-java/compile +0 -1
  64. data/ext/ruby_http_parser/vendor/http-parser-java/test_permutations +0 -1
  65. data/ext/ruby_http_parser/vendor/http-parser-java/test_unit +0 -1
  66. data/ext/ruby_http_parser/vendor/http-parser-java/test_utf8 +0 -1
  67. data/ext/ruby_http_parser/vendor/http-parser/CONTRIBUTIONS +0 -4
@@ -0,0 +1,47 @@
1
+ #include "http_parser.h"
2
+ #include <stdio.h>
3
+ #include <string.h>
4
+
5
+ void
6
+ dump_url (const char *url, const struct http_parser_url *u)
7
+ {
8
+ unsigned int i;
9
+
10
+ printf("\tfield_set: 0x%x, port: %u\n", u->field_set, u->port);
11
+ for (i = 0; i < UF_MAX; i++) {
12
+ if ((u->field_set & (1 << i)) == 0) {
13
+ printf("\tfield_data[%u]: unset\n", i);
14
+ continue;
15
+ }
16
+
17
+ printf("\tfield_data[%u]: off: %u, len: %u, part: %.*s\n",
18
+ i,
19
+ u->field_data[i].off,
20
+ u->field_data[i].len,
21
+ u->field_data[i].len,
22
+ url + u->field_data[i].off);
23
+ }
24
+ }
25
+
26
+ int main(int argc, char ** argv) {
27
+ struct http_parser_url u;
28
+ int len, connect, result;
29
+
30
+ if (argc != 3) {
31
+ printf("Syntax : %s connect|get url\n", argv[0]);
32
+ return 1;
33
+ }
34
+ len = strlen(argv[2]);
35
+ connect = strcmp("connect", argv[1]) == 0 ? 1 : 0;
36
+ printf("Parsing %s, connect %d\n", argv[2], connect);
37
+
38
+ http_parser_url_init(&u);
39
+ result = http_parser_parse_url(argv[2], len, connect, &u);
40
+ if (result != 0) {
41
+ printf("Parse error : %d\n", result);
42
+ return result;
43
+ }
44
+ printf("Parse ok, result : \n");
45
+ dump_url(argv[2], &u);
46
+ return 0;
47
+ }
@@ -1,4 +1,4 @@
1
- /* Copyright Joyent, Inc. and other Node contributors. All rights reserved.
1
+ /* Copyright Joyent, Inc. and other Node contributors.
2
2
  *
3
3
  * Permission is hereby granted, free of charge, to any person obtaining a copy
4
4
  * of this software and associated documentation files (the "Software"), to
@@ -18,48 +18,142 @@
18
18
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19
19
  * IN THE SOFTWARE.
20
20
  */
21
- #include <http_parser.h>
21
+ #include "http_parser.h"
22
22
  #include <assert.h>
23
23
  #include <stddef.h>
24
+ #include <ctype.h>
25
+ #include <string.h>
26
+ #include <limits.h>
24
27
 
28
+ #ifndef ULLONG_MAX
29
+ # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
30
+ #endif
25
31
 
26
32
  #ifndef MIN
27
33
  # define MIN(a,b) ((a) < (b) ? (a) : (b))
28
34
  #endif
29
35
 
36
+ #ifndef ARRAY_SIZE
37
+ # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
38
+ #endif
39
+
40
+ #ifndef BIT_AT
41
+ # define BIT_AT(a, i) \
42
+ (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
43
+ (1 << ((unsigned int) (i) & 7))))
44
+ #endif
45
+
46
+ #ifndef ELEM_AT
47
+ # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
48
+ #endif
30
49
 
31
- #define CALLBACK2(FOR) \
50
+ #define SET_ERRNO(e) \
32
51
  do { \
33
- if (settings->on_##FOR) { \
34
- if (0 != settings->on_##FOR(parser)) return (p - data); \
35
- } \
36
- } while (0)
52
+ parser->http_errno = (e); \
53
+ } while(0)
37
54
 
55
+ #define CURRENT_STATE() p_state
56
+ #define UPDATE_STATE(V) p_state = (enum state) (V);
57
+ #define RETURN(V) \
58
+ do { \
59
+ parser->state = CURRENT_STATE(); \
60
+ return (V); \
61
+ } while (0);
62
+ #define REEXECUTE() \
63
+ goto reexecute; \
38
64
 
39
- #define MARK(FOR) \
65
+
66
+ #ifdef __GNUC__
67
+ # define LIKELY(X) __builtin_expect(!!(X), 1)
68
+ # define UNLIKELY(X) __builtin_expect(!!(X), 0)
69
+ #else
70
+ # define LIKELY(X) (X)
71
+ # define UNLIKELY(X) (X)
72
+ #endif
73
+
74
+
75
+ /* Run the notify callback FOR, returning ER if it fails */
76
+ #define CALLBACK_NOTIFY_(FOR, ER) \
40
77
  do { \
41
- FOR##_mark = p; \
78
+ assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
79
+ \
80
+ if (LIKELY(settings->on_##FOR)) { \
81
+ parser->state = CURRENT_STATE(); \
82
+ if (UNLIKELY(0 != settings->on_##FOR(parser))) { \
83
+ SET_ERRNO(HPE_CB_##FOR); \
84
+ } \
85
+ UPDATE_STATE(parser->state); \
86
+ \
87
+ /* We either errored above or got paused; get out */ \
88
+ if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
89
+ return (ER); \
90
+ } \
91
+ } \
42
92
  } while (0)
43
93
 
44
- #define CALLBACK_NOCLEAR(FOR) \
94
+ /* Run the notify callback FOR and consume the current byte */
95
+ #define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1)
96
+
97
+ /* Run the notify callback FOR and don't consume the current byte */
98
+ #define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data)
99
+
100
+ /* Run data callback FOR with LEN bytes, returning ER if it fails */
101
+ #define CALLBACK_DATA_(FOR, LEN, ER) \
45
102
  do { \
103
+ assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
104
+ \
46
105
  if (FOR##_mark) { \
47
- if (settings->on_##FOR) { \
48
- if (0 != settings->on_##FOR(parser, \
49
- FOR##_mark, \
50
- p - FOR##_mark)) \
51
- { \
52
- return (p - data); \
106
+ if (LIKELY(settings->on_##FOR)) { \
107
+ parser->state = CURRENT_STATE(); \
108
+ if (UNLIKELY(0 != \
109
+ settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \
110
+ SET_ERRNO(HPE_CB_##FOR); \
111
+ } \
112
+ UPDATE_STATE(parser->state); \
113
+ \
114
+ /* We either errored above or got paused; get out */ \
115
+ if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
116
+ return (ER); \
53
117
  } \
54
118
  } \
119
+ FOR##_mark = NULL; \
55
120
  } \
56
121
  } while (0)
57
122
 
123
+ /* Run the data callback FOR and consume the current byte */
124
+ #define CALLBACK_DATA(FOR) \
125
+ CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
126
+
127
+ /* Run the data callback FOR and don't consume the current byte */
128
+ #define CALLBACK_DATA_NOADVANCE(FOR) \
129
+ CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
130
+
131
+ /* Set the mark FOR; non-destructive if mark is already set */
132
+ #define MARK(FOR) \
133
+ do { \
134
+ if (!FOR##_mark) { \
135
+ FOR##_mark = p; \
136
+ } \
137
+ } while (0)
58
138
 
59
- #define CALLBACK(FOR) \
139
+ /* Don't allow the total size of the HTTP headers (including the status
140
+ * line) to exceed HTTP_MAX_HEADER_SIZE. This check is here to protect
141
+ * embedders against denial-of-service attacks where the attacker feeds
142
+ * us a never-ending header that the embedder keeps buffering.
143
+ *
144
+ * This check is arguably the responsibility of embedders but we're doing
145
+ * it on the embedder's behalf because most won't bother and this way we
146
+ * make the web a little safer. HTTP_MAX_HEADER_SIZE is still far bigger
147
+ * than any reasonable request or response so this should never affect
148
+ * day-to-day operation.
149
+ */
150
+ #define COUNT_HEADER_SIZE(V) \
60
151
  do { \
61
- CALLBACK_NOCLEAR(FOR); \
62
- FOR##_mark = NULL; \
152
+ parser->nread += (V); \
153
+ if (UNLIKELY(parser->nread > (HTTP_MAX_HEADER_SIZE))) { \
154
+ SET_ERRNO(HPE_HEADER_OVERFLOW); \
155
+ goto error; \
156
+ } \
63
157
  } while (0)
64
158
 
65
159
 
@@ -74,29 +168,10 @@ do { \
74
168
 
75
169
 
76
170
  static const char *method_strings[] =
77
- { "DELETE"
78
- , "GET"
79
- , "HEAD"
80
- , "POST"
81
- , "PUT"
82
- , "CONNECT"
83
- , "OPTIONS"
84
- , "TRACE"
85
- , "COPY"
86
- , "LOCK"
87
- , "MKCOL"
88
- , "MOVE"
89
- , "PROPFIND"
90
- , "PROPPATCH"
91
- , "UNLOCK"
92
- , "REPORT"
93
- , "MKACTIVITY"
94
- , "CHECKOUT"
95
- , "MERGE"
96
- , "M-SEARCH"
97
- , "NOTIFY"
98
- , "SUBSCRIBE"
99
- , "UNSUBSCRIBE"
171
+ {
172
+ #define XX(num, name, string) #string,
173
+ HTTP_METHOD_MAP(XX)
174
+ #undef XX
100
175
  };
101
176
 
102
177
 
@@ -117,9 +192,9 @@ static const char tokens[256] = {
117
192
  /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
118
193
  0, 0, 0, 0, 0, 0, 0, 0,
119
194
  /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
120
- ' ', '!', '"', '#', '$', '%', '&', '\'',
195
+ 0, '!', 0, '#', '$', '%', '&', '\'',
121
196
  /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
122
- 0, 0, '*', '+', 0, '-', '.', '/',
197
+ 0, 0, '*', '+', 0, '-', '.', 0,
123
198
  /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
124
199
  '0', '1', '2', '3', '4', '5', '6', '7',
125
200
  /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
@@ -139,7 +214,7 @@ static const char tokens[256] = {
139
214
  /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
140
215
  'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
141
216
  /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
142
- 'x', 'y', 'z', 0, '|', '}', '~', 0 };
217
+ 'x', 'y', 'z', 0, '|', 0, '~', 0 };
143
218
 
144
219
 
145
220
  static const int8_t unhex[256] =
@@ -154,61 +229,48 @@ static const int8_t unhex[256] =
154
229
  };
155
230
 
156
231
 
157
- static const uint8_t normal_url_char[256] = {
232
+ #if HTTP_PARSER_STRICT
233
+ # define T(v) 0
234
+ #else
235
+ # define T(v) v
236
+ #endif
237
+
238
+
239
+ static const uint8_t normal_url_char[32] = {
158
240
  /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
159
- 0, 0, 0, 0, 0, 0, 0, 0,
241
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
160
242
  /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
161
- 0, 0, 0, 0, 0, 0, 0, 0,
243
+ 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
162
244
  /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
163
- 0, 0, 0, 0, 0, 0, 0, 0,
245
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
164
246
  /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
165
- 0, 0, 0, 0, 0, 0, 0, 0,
247
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
166
248
  /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
167
- 0, 1, 1, 0, 1, 1, 1, 1,
249
+ 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
168
250
  /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
169
- 1, 1, 1, 1, 1, 1, 1, 1,
251
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
170
252
  /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
171
- 1, 1, 1, 1, 1, 1, 1, 1,
253
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
172
254
  /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
173
- 1, 1, 1, 1, 1, 1, 1, 0,
255
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
174
256
  /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
175
- 1, 1, 1, 1, 1, 1, 1, 1,
257
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
176
258
  /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
177
- 1, 1, 1, 1, 1, 1, 1, 1,
259
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
178
260
  /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
179
- 1, 1, 1, 1, 1, 1, 1, 1,
261
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
180
262
  /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
181
- 1, 1, 1, 1, 1, 1, 1, 1,
263
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
182
264
  /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
183
- 1, 1, 1, 1, 1, 1, 1, 1,
265
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
184
266
  /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
185
- 1, 1, 1, 1, 1, 1, 1, 1,
267
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
186
268
  /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
187
- 1, 1, 1, 1, 1, 1, 1, 1,
269
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
188
270
  /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
189
- 1, 1, 1, 1, 1, 1, 1, 0,
190
-
191
- /* Remainder of non-ASCII range are accepted as-is to support implicitly UTF-8
192
- encoded paths. This is out of spec, but clients generate this and most other
193
- HTTP servers support it. We should, too. */
194
-
195
- 1, 1, 1, 1, 1, 1, 1, 1,
196
- 1, 1, 1, 1, 1, 1, 1, 1,
197
- 1, 1, 1, 1, 1, 1, 1, 1,
198
- 1, 1, 1, 1, 1, 1, 1, 1,
199
- 1, 1, 1, 1, 1, 1, 1, 1,
200
- 1, 1, 1, 1, 1, 1, 1, 1,
201
- 1, 1, 1, 1, 1, 1, 1, 1,
202
- 1, 1, 1, 1, 1, 1, 1, 1,
203
- 1, 1, 1, 1, 1, 1, 1, 1,
204
- 1, 1, 1, 1, 1, 1, 1, 1,
205
- 1, 1, 1, 1, 1, 1, 1, 1,
206
- 1, 1, 1, 1, 1, 1, 1, 1,
207
- 1, 1, 1, 1, 1, 1, 1, 1,
208
- 1, 1, 1, 1, 1, 1, 1, 1,
209
- 1, 1, 1, 1, 1, 1, 1, 1,
210
- 1, 1, 1, 1, 1, 1, 1, 1 };
271
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
211
272
 
273
+ #undef T
212
274
 
213
275
  enum state
214
276
  { s_dead = 1 /* important that this is > 0 */
@@ -220,12 +282,13 @@ enum state
220
282
  , s_res_HT
221
283
  , s_res_HTT
222
284
  , s_res_HTTP
223
- , s_res_first_http_major
224
285
  , s_res_http_major
225
- , s_res_first_http_minor
286
+ , s_res_http_dot
226
287
  , s_res_http_minor
288
+ , s_res_http_end
227
289
  , s_res_first_status_code
228
290
  , s_res_status_code
291
+ , s_res_status_start
229
292
  , s_res_status
230
293
  , s_res_line_almost_done
231
294
 
@@ -236,8 +299,9 @@ enum state
236
299
  , s_req_schema
237
300
  , s_req_schema_slash
238
301
  , s_req_schema_slash_slash
239
- , s_req_host
240
- , s_req_port
302
+ , s_req_server_start
303
+ , s_req_server
304
+ , s_req_server_with_at
241
305
  , s_req_path
242
306
  , s_req_query_string_start
243
307
  , s_req_query_string
@@ -248,16 +312,20 @@ enum state
248
312
  , s_req_http_HT
249
313
  , s_req_http_HTT
250
314
  , s_req_http_HTTP
251
- , s_req_first_http_major
252
315
  , s_req_http_major
253
- , s_req_first_http_minor
316
+ , s_req_http_dot
254
317
  , s_req_http_minor
318
+ , s_req_http_end
255
319
  , s_req_line_almost_done
256
320
 
257
321
  , s_header_field_start
258
322
  , s_header_field
323
+ , s_header_value_discard_ws
324
+ , s_header_value_discard_ws_almost_done
325
+ , s_header_value_discard_lws
259
326
  , s_header_value_start
260
327
  , s_header_value
328
+ , s_header_value_lws
261
329
 
262
330
  , s_header_almost_done
263
331
 
@@ -265,9 +333,11 @@ enum state
265
333
  , s_chunk_size
266
334
  , s_chunk_parameters
267
335
  , s_chunk_size_almost_done
268
-
336
+
269
337
  , s_headers_almost_done
270
- /* Important: 's_headers_almost_done' must be the last 'header' state. All
338
+ , s_headers_done
339
+
340
+ /* Important: 's_headers_done' must be the last 'header' state. All
271
341
  * states beyond this must be 'body' states. It is used for overflow
272
342
  * checking. See the PARSING_HEADER() macro.
273
343
  */
@@ -278,10 +348,12 @@ enum state
278
348
 
279
349
  , s_body_identity
280
350
  , s_body_identity_eof
351
+
352
+ , s_message_done
281
353
  };
282
354
 
283
355
 
284
- #define PARSING_HEADER(state) (state <= s_headers_almost_done)
356
+ #define PARSING_HEADER(state) (state <= s_headers_done)
285
357
 
286
358
 
287
359
  enum header_states
@@ -298,40 +370,87 @@ enum header_states
298
370
 
299
371
  , h_connection
300
372
  , h_content_length
373
+ , h_content_length_num
374
+ , h_content_length_ws
301
375
  , h_transfer_encoding
302
376
  , h_upgrade
303
377
 
304
378
  , h_matching_transfer_encoding_chunked
379
+ , h_matching_connection_token_start
305
380
  , h_matching_connection_keep_alive
306
381
  , h_matching_connection_close
382
+ , h_matching_connection_upgrade
383
+ , h_matching_connection_token
307
384
 
308
385
  , h_transfer_encoding_chunked
309
386
  , h_connection_keep_alive
310
387
  , h_connection_close
388
+ , h_connection_upgrade
311
389
  };
312
390
 
391
+ enum http_host_state
392
+ {
393
+ s_http_host_dead = 1
394
+ , s_http_userinfo_start
395
+ , s_http_userinfo
396
+ , s_http_host_start
397
+ , s_http_host_v6_start
398
+ , s_http_host
399
+ , s_http_host_v6
400
+ , s_http_host_v6_end
401
+ , s_http_host_v6_zone_start
402
+ , s_http_host_v6_zone
403
+ , s_http_host_port_start
404
+ , s_http_host_port
405
+ };
406
+
407
+ /* Macros for character classes; depends on strict-mode */
408
+ #define CR '\r'
409
+ #define LF '\n'
410
+ #define LOWER(c) (unsigned char)(c | 0x20)
411
+ #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
412
+ #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
413
+ #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
414
+ #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
415
+ #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
416
+ (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
417
+ (c) == ')')
418
+ #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
419
+ (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
420
+ (c) == '$' || (c) == ',')
421
+
422
+ #define STRICT_TOKEN(c) (tokens[(unsigned char)c])
313
423
 
314
- enum flags
315
- { F_CHUNKED = 1 << 0
316
- , F_CONNECTION_KEEP_ALIVE = 1 << 1
317
- , F_CONNECTION_CLOSE = 1 << 2
318
- , F_TRAILING = 1 << 3
319
- , F_UPGRADE = 1 << 4
320
- , F_SKIPBODY = 1 << 5
321
- };
322
-
323
-
324
- #define CR '\r'
325
- #define LF '\n'
326
- #define LOWER(c) (unsigned char)(c | 0x20)
327
- #define TOKEN(c) tokens[(unsigned char)c]
424
+ #if HTTP_PARSER_STRICT
425
+ #define TOKEN(c) (tokens[(unsigned char)c])
426
+ #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
427
+ #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
428
+ #else
429
+ #define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
430
+ #define IS_URL_CHAR(c) \
431
+ (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
432
+ #define IS_HOST_CHAR(c) \
433
+ (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
434
+ #endif
328
435
 
436
+ /**
437
+ * Verify that a char is a valid visible (printable) US-ASCII
438
+ * character or %x80-FF
439
+ **/
440
+ #define IS_HEADER_CHAR(ch) \
441
+ (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127))
329
442
 
330
443
  #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
331
444
 
332
445
 
333
446
  #if HTTP_PARSER_STRICT
334
- # define STRICT_CHECK(cond) if (cond) goto error
447
+ # define STRICT_CHECK(cond) \
448
+ do { \
449
+ if (cond) { \
450
+ SET_ERRNO(HPE_STRICT); \
451
+ goto error; \
452
+ } \
453
+ } while (0)
335
454
  # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
336
455
  #else
337
456
  # define STRICT_CHECK(cond)
@@ -339,24 +458,205 @@ enum flags
339
458
  #endif
340
459
 
341
460
 
461
+ /* Map errno values to strings for human-readable output */
462
+ #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
463
+ static struct {
464
+ const char *name;
465
+ const char *description;
466
+ } http_strerror_tab[] = {
467
+ HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
468
+ };
469
+ #undef HTTP_STRERROR_GEN
470
+
471
+ int http_message_needs_eof(const http_parser *parser);
472
+
473
+ /* Our URL parser.
474
+ *
475
+ * This is designed to be shared by http_parser_execute() for URL validation,
476
+ * hence it has a state transition + byte-for-byte interface. In addition, it
477
+ * is meant to be embedded in http_parser_parse_url(), which does the dirty
478
+ * work of turning state transitions URL components for its API.
479
+ *
480
+ * This function should only be invoked with non-space characters. It is
481
+ * assumed that the caller cares about (and can detect) the transition between
482
+ * URL and non-URL states by looking for these.
483
+ */
484
+ static enum state
485
+ parse_url_char(enum state s, const char ch)
486
+ {
487
+ if (ch == ' ' || ch == '\r' || ch == '\n') {
488
+ return s_dead;
489
+ }
490
+
491
+ #if HTTP_PARSER_STRICT
492
+ if (ch == '\t' || ch == '\f') {
493
+ return s_dead;
494
+ }
495
+ #endif
496
+
497
+ switch (s) {
498
+ case s_req_spaces_before_url:
499
+ /* Proxied requests are followed by scheme of an absolute URI (alpha).
500
+ * All methods except CONNECT are followed by '/' or '*'.
501
+ */
502
+
503
+ if (ch == '/' || ch == '*') {
504
+ return s_req_path;
505
+ }
506
+
507
+ if (IS_ALPHA(ch)) {
508
+ return s_req_schema;
509
+ }
510
+
511
+ break;
512
+
513
+ case s_req_schema:
514
+ if (IS_ALPHA(ch)) {
515
+ return s;
516
+ }
517
+
518
+ if (ch == ':') {
519
+ return s_req_schema_slash;
520
+ }
521
+
522
+ break;
523
+
524
+ case s_req_schema_slash:
525
+ if (ch == '/') {
526
+ return s_req_schema_slash_slash;
527
+ }
528
+
529
+ break;
530
+
531
+ case s_req_schema_slash_slash:
532
+ if (ch == '/') {
533
+ return s_req_server_start;
534
+ }
535
+
536
+ break;
537
+
538
+ case s_req_server_with_at:
539
+ if (ch == '@') {
540
+ return s_dead;
541
+ }
542
+
543
+ /* FALLTHROUGH */
544
+ case s_req_server_start:
545
+ case s_req_server:
546
+ if (ch == '/') {
547
+ return s_req_path;
548
+ }
549
+
550
+ if (ch == '?') {
551
+ return s_req_query_string_start;
552
+ }
553
+
554
+ if (ch == '@') {
555
+ return s_req_server_with_at;
556
+ }
557
+
558
+ if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
559
+ return s_req_server;
560
+ }
561
+
562
+ break;
563
+
564
+ case s_req_path:
565
+ if (IS_URL_CHAR(ch)) {
566
+ return s;
567
+ }
568
+
569
+ switch (ch) {
570
+ case '?':
571
+ return s_req_query_string_start;
572
+
573
+ case '#':
574
+ return s_req_fragment_start;
575
+ }
576
+
577
+ break;
578
+
579
+ case s_req_query_string_start:
580
+ case s_req_query_string:
581
+ if (IS_URL_CHAR(ch)) {
582
+ return s_req_query_string;
583
+ }
584
+
585
+ switch (ch) {
586
+ case '?':
587
+ /* allow extra '?' in query string */
588
+ return s_req_query_string;
589
+
590
+ case '#':
591
+ return s_req_fragment_start;
592
+ }
593
+
594
+ break;
595
+
596
+ case s_req_fragment_start:
597
+ if (IS_URL_CHAR(ch)) {
598
+ return s_req_fragment;
599
+ }
600
+
601
+ switch (ch) {
602
+ case '?':
603
+ return s_req_fragment;
604
+
605
+ case '#':
606
+ return s;
607
+ }
608
+
609
+ break;
610
+
611
+ case s_req_fragment:
612
+ if (IS_URL_CHAR(ch)) {
613
+ return s;
614
+ }
615
+
616
+ switch (ch) {
617
+ case '?':
618
+ case '#':
619
+ return s;
620
+ }
621
+
622
+ break;
623
+
624
+ default:
625
+ break;
626
+ }
627
+
628
+ /* We should never fall out of the switch above unless there's an error */
629
+ return s_dead;
630
+ }
631
+
342
632
  size_t http_parser_execute (http_parser *parser,
343
633
  const http_parser_settings *settings,
344
634
  const char *data,
345
635
  size_t len)
346
636
  {
347
637
  char c, ch;
348
- const char *p = data, *pe;
349
- int64_t to_read;
350
-
351
- enum state state = (enum state) parser->state;
352
- enum header_states header_state = (enum header_states) parser->header_state;
353
- uint64_t index = parser->index;
354
- uint64_t nread = parser->nread;
638
+ int8_t unhex_val;
639
+ const char *p = data;
640
+ const char *header_field_mark = 0;
641
+ const char *header_value_mark = 0;
642
+ const char *url_mark = 0;
643
+ const char *body_mark = 0;
644
+ const char *status_mark = 0;
645
+ enum state p_state = (enum state) parser->state;
646
+ const unsigned int lenient = parser->lenient_http_headers;
647
+
648
+ /* We're in an error state. Don't bother doing anything. */
649
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
650
+ return 0;
651
+ }
355
652
 
356
653
  if (len == 0) {
357
- switch (state) {
654
+ switch (CURRENT_STATE()) {
358
655
  case s_body_identity_eof:
359
- CALLBACK2(message_complete);
656
+ /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
657
+ * we got paused.
658
+ */
659
+ CALLBACK_NOTIFY_NOADVANCE(message_complete);
360
660
  return 0;
361
661
 
362
662
  case s_dead:
@@ -366,52 +666,54 @@ size_t http_parser_execute (http_parser *parser,
366
666
  return 0;
367
667
 
368
668
  default:
369
- return 1; // error
669
+ SET_ERRNO(HPE_INVALID_EOF_STATE);
670
+ return 1;
370
671
  }
371
672
  }
372
673
 
373
- /* technically we could combine all of these (except for url_mark) into one
374
- variable, saving stack space, but it seems more clear to have them
375
- separated. */
376
- const char *header_field_mark = 0;
377
- const char *header_value_mark = 0;
378
- const char *fragment_mark = 0;
379
- const char *query_string_mark = 0;
380
- const char *path_mark = 0;
381
- const char *url_mark = 0;
382
674
 
383
- if (state == s_header_field)
675
+ if (CURRENT_STATE() == s_header_field)
384
676
  header_field_mark = data;
385
- if (state == s_header_value)
677
+ if (CURRENT_STATE() == s_header_value)
386
678
  header_value_mark = data;
387
- if (state == s_req_fragment)
388
- fragment_mark = data;
389
- if (state == s_req_query_string)
390
- query_string_mark = data;
391
- if (state == s_req_path)
392
- path_mark = data;
393
- if (state == s_req_path || state == s_req_schema || state == s_req_schema_slash
394
- || state == s_req_schema_slash_slash || state == s_req_port
395
- || state == s_req_query_string_start || state == s_req_query_string
396
- || state == s_req_host
397
- || state == s_req_fragment_start || state == s_req_fragment)
679
+ switch (CURRENT_STATE()) {
680
+ case s_req_path:
681
+ case s_req_schema:
682
+ case s_req_schema_slash:
683
+ case s_req_schema_slash_slash:
684
+ case s_req_server_start:
685
+ case s_req_server:
686
+ case s_req_server_with_at:
687
+ case s_req_query_string_start:
688
+ case s_req_query_string:
689
+ case s_req_fragment_start:
690
+ case s_req_fragment:
398
691
  url_mark = data;
692
+ break;
693
+ case s_res_status:
694
+ status_mark = data;
695
+ break;
696
+ default:
697
+ break;
698
+ }
399
699
 
400
- for (p=data, pe=data+len; p != pe; p++) {
700
+ for (p=data; p != data + len; p++) {
401
701
  ch = *p;
402
702
 
403
- if (PARSING_HEADER(state)) {
404
- ++nread;
405
- /* Buffer overflow attack */
406
- if (nread > HTTP_MAX_HEADER_SIZE) goto error;
407
- }
703
+ if (PARSING_HEADER(CURRENT_STATE()))
704
+ COUNT_HEADER_SIZE(1);
408
705
 
409
- switch (state) {
706
+ reexecute:
707
+ switch (CURRENT_STATE()) {
410
708
 
411
709
  case s_dead:
412
710
  /* this state is used after a 'Connection: close' message
413
711
  * the parser will error out if it reads another message
414
712
  */
713
+ if (LIKELY(ch == CR || ch == LF))
714
+ break;
715
+
716
+ SET_ERRNO(HPE_CLOSED_CONNECTION);
415
717
  goto error;
416
718
 
417
719
  case s_start_req_or_res:
@@ -419,42 +721,46 @@ size_t http_parser_execute (http_parser *parser,
419
721
  if (ch == CR || ch == LF)
420
722
  break;
421
723
  parser->flags = 0;
422
- parser->content_length = -1;
724
+ parser->content_length = ULLONG_MAX;
423
725
 
424
- CALLBACK2(message_begin);
726
+ if (ch == 'H') {
727
+ UPDATE_STATE(s_res_or_resp_H);
425
728
 
426
- if (ch == 'H')
427
- state = s_res_or_resp_H;
428
- else {
729
+ CALLBACK_NOTIFY(message_begin);
730
+ } else {
429
731
  parser->type = HTTP_REQUEST;
430
- goto start_req_method_assign;
732
+ UPDATE_STATE(s_start_req);
733
+ REEXECUTE();
431
734
  }
735
+
432
736
  break;
433
737
  }
434
738
 
435
739
  case s_res_or_resp_H:
436
740
  if (ch == 'T') {
437
741
  parser->type = HTTP_RESPONSE;
438
- state = s_res_HT;
742
+ UPDATE_STATE(s_res_HT);
439
743
  } else {
440
- if (ch != 'E') goto error;
744
+ if (UNLIKELY(ch != 'E')) {
745
+ SET_ERRNO(HPE_INVALID_CONSTANT);
746
+ goto error;
747
+ }
748
+
441
749
  parser->type = HTTP_REQUEST;
442
750
  parser->method = HTTP_HEAD;
443
- index = 2;
444
- state = s_req_method;
751
+ parser->index = 2;
752
+ UPDATE_STATE(s_req_method);
445
753
  }
446
754
  break;
447
755
 
448
756
  case s_start_res:
449
757
  {
450
758
  parser->flags = 0;
451
- parser->content_length = -1;
452
-
453
- CALLBACK2(message_begin);
759
+ parser->content_length = ULLONG_MAX;
454
760
 
455
761
  switch (ch) {
456
762
  case 'H':
457
- state = s_res_H;
763
+ UPDATE_STATE(s_res_H);
458
764
  break;
459
765
 
460
766
  case CR:
@@ -462,105 +768,105 @@ size_t http_parser_execute (http_parser *parser,
462
768
  break;
463
769
 
464
770
  default:
771
+ SET_ERRNO(HPE_INVALID_CONSTANT);
465
772
  goto error;
466
773
  }
774
+
775
+ CALLBACK_NOTIFY(message_begin);
467
776
  break;
468
777
  }
469
778
 
470
779
  case s_res_H:
471
780
  STRICT_CHECK(ch != 'T');
472
- state = s_res_HT;
781
+ UPDATE_STATE(s_res_HT);
473
782
  break;
474
783
 
475
784
  case s_res_HT:
476
785
  STRICT_CHECK(ch != 'T');
477
- state = s_res_HTT;
786
+ UPDATE_STATE(s_res_HTT);
478
787
  break;
479
788
 
480
789
  case s_res_HTT:
481
790
  STRICT_CHECK(ch != 'P');
482
- state = s_res_HTTP;
791
+ UPDATE_STATE(s_res_HTTP);
483
792
  break;
484
793
 
485
794
  case s_res_HTTP:
486
795
  STRICT_CHECK(ch != '/');
487
- state = s_res_first_http_major;
796
+ UPDATE_STATE(s_res_http_major);
488
797
  break;
489
798
 
490
- case s_res_first_http_major:
491
- if (ch < '1' || ch > '9') goto error;
799
+ case s_res_http_major:
800
+ if (UNLIKELY(!IS_NUM(ch))) {
801
+ SET_ERRNO(HPE_INVALID_VERSION);
802
+ goto error;
803
+ }
804
+
492
805
  parser->http_major = ch - '0';
493
- state = s_res_http_major;
806
+ UPDATE_STATE(s_res_http_dot);
494
807
  break;
495
808
 
496
- /* major HTTP version or dot */
497
- case s_res_http_major:
809
+ case s_res_http_dot:
498
810
  {
499
- if (ch == '.') {
500
- state = s_res_first_http_minor;
501
- break;
811
+ if (UNLIKELY(ch != '.')) {
812
+ SET_ERRNO(HPE_INVALID_VERSION);
813
+ goto error;
502
814
  }
503
815
 
504
- if (ch < '0' || ch > '9') goto error;
505
-
506
- parser->http_major *= 10;
507
- parser->http_major += ch - '0';
508
-
509
- if (parser->http_major > 999) goto error;
816
+ UPDATE_STATE(s_res_http_minor);
510
817
  break;
511
818
  }
512
819
 
513
- /* first digit of minor HTTP version */
514
- case s_res_first_http_minor:
515
- if (ch < '0' || ch > '9') goto error;
820
+ case s_res_http_minor:
821
+ if (UNLIKELY(!IS_NUM(ch))) {
822
+ SET_ERRNO(HPE_INVALID_VERSION);
823
+ goto error;
824
+ }
825
+
516
826
  parser->http_minor = ch - '0';
517
- state = s_res_http_minor;
827
+ UPDATE_STATE(s_res_http_end);
518
828
  break;
519
829
 
520
- /* minor HTTP version or end of request line */
521
- case s_res_http_minor:
830
+ case s_res_http_end:
522
831
  {
523
- if (ch == ' ') {
524
- state = s_res_first_status_code;
525
- break;
832
+ if (UNLIKELY(ch != ' ')) {
833
+ SET_ERRNO(HPE_INVALID_VERSION);
834
+ goto error;
526
835
  }
527
836
 
528
- if (ch < '0' || ch > '9') goto error;
529
-
530
- parser->http_minor *= 10;
531
- parser->http_minor += ch - '0';
532
-
533
- if (parser->http_minor > 999) goto error;
837
+ UPDATE_STATE(s_res_first_status_code);
534
838
  break;
535
839
  }
536
840
 
537
841
  case s_res_first_status_code:
538
842
  {
539
- if (ch < '0' || ch > '9') {
843
+ if (!IS_NUM(ch)) {
540
844
  if (ch == ' ') {
541
845
  break;
542
846
  }
847
+
848
+ SET_ERRNO(HPE_INVALID_STATUS);
543
849
  goto error;
544
850
  }
545
851
  parser->status_code = ch - '0';
546
- state = s_res_status_code;
852
+ UPDATE_STATE(s_res_status_code);
547
853
  break;
548
854
  }
549
855
 
550
856
  case s_res_status_code:
551
857
  {
552
- if (ch < '0' || ch > '9') {
858
+ if (!IS_NUM(ch)) {
553
859
  switch (ch) {
554
860
  case ' ':
555
- state = s_res_status;
861
+ UPDATE_STATE(s_res_status_start);
556
862
  break;
557
863
  case CR:
558
- state = s_res_line_almost_done;
559
- break;
560
864
  case LF:
561
- state = s_header_field_start;
865
+ UPDATE_STATE(s_res_status_start);
866
+ REEXECUTE();
562
867
  break;
563
868
  default:
869
+ SET_ERRNO(HPE_INVALID_STATUS);
564
870
  goto error;
565
871
  }
566
872
  break;
@@ -569,27 +875,44 @@ size_t http_parser_execute (http_parser *parser,
569
875
  parser->status_code *= 10;
570
876
  parser->status_code += ch - '0';
571
877
 
572
- if (parser->status_code > 999) goto error;
878
+ if (UNLIKELY(parser->status_code > 999)) {
879
+ SET_ERRNO(HPE_INVALID_STATUS);
880
+ goto error;
881
+ }
882
+
883
+ break;
884
+ }
885
+
886
+ case s_res_status_start:
887
+ {
888
+ MARK(status);
889
+ UPDATE_STATE(s_res_status);
890
+ parser->index = 0;
891
+
892
+ if (ch == CR || ch == LF)
893
+ REEXECUTE();
894
+
573
895
  break;
574
896
  }
575
897
 
576
898
  case s_res_status:
577
- /* the human readable status. e.g. "NOT FOUND"
578
- * we are not humans so just ignore this */
579
899
  if (ch == CR) {
580
- state = s_res_line_almost_done;
900
+ UPDATE_STATE(s_res_line_almost_done);
901
+ CALLBACK_DATA(status);
581
902
  break;
582
903
  }
583
904
 
584
905
  if (ch == LF) {
585
- state = s_header_field_start;
906
+ UPDATE_STATE(s_header_field_start);
907
+ CALLBACK_DATA(status);
586
908
  break;
587
909
  }
910
+
588
911
  break;
589
912
 
590
913
  case s_res_line_almost_done:
591
914
  STRICT_CHECK(ch != LF);
592
- state = s_header_field_start;
915
+ UPDATE_STATE(s_header_field_start);
593
916
  break;
594
917
 
595
918
  case s_start_req:
@@ -597,501 +920,308 @@ size_t http_parser_execute (http_parser *parser,
597
920
  if (ch == CR || ch == LF)
598
921
  break;
599
922
  parser->flags = 0;
600
- parser->content_length = -1;
601
-
602
- CALLBACK2(message_begin);
923
+ parser->content_length = ULLONG_MAX;
603
924
 
604
- if (ch < 'A' || 'Z' < ch) goto error;
925
+ if (UNLIKELY(!IS_ALPHA(ch))) {
926
+ SET_ERRNO(HPE_INVALID_METHOD);
927
+ goto error;
928
+ }
605
929
 
606
- start_req_method_assign:
607
930
  parser->method = (enum http_method) 0;
608
- index = 1;
931
+ parser->index = 1;
609
932
  switch (ch) {
933
+ case 'A': parser->method = HTTP_ACL; break;
934
+ case 'B': parser->method = HTTP_BIND; break;
610
935
  case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
611
936
  case 'D': parser->method = HTTP_DELETE; break;
612
937
  case 'G': parser->method = HTTP_GET; break;
613
938
  case 'H': parser->method = HTTP_HEAD; break;
614
- case 'L': parser->method = HTTP_LOCK; break;
615
- case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
939
+ case 'L': parser->method = HTTP_LOCK; /* or LINK */ break;
940
+ case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break;
616
941
  case 'N': parser->method = HTTP_NOTIFY; break;
617
942
  case 'O': parser->method = HTTP_OPTIONS; break;
618
- case 'P': parser->method = HTTP_POST; /* or PROPFIND or PROPPATCH or PUT */ break;
619
- case 'R': parser->method = HTTP_REPORT; break;
620
- case 'S': parser->method = HTTP_SUBSCRIBE; break;
943
+ case 'P': parser->method = HTTP_POST;
944
+ /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
945
+ break;
946
+ case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break;
947
+ case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH, SOURCE */ break;
621
948
  case 'T': parser->method = HTTP_TRACE; break;
622
- case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
623
- default: goto error;
949
+ case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break;
950
+ default:
951
+ SET_ERRNO(HPE_INVALID_METHOD);
952
+ goto error;
624
953
  }
625
- state = s_req_method;
954
+ UPDATE_STATE(s_req_method);
955
+
956
+ CALLBACK_NOTIFY(message_begin);
957
+
626
958
  break;
627
959
  }
628
960
 
629
961
  case s_req_method:
630
962
  {
631
- if (ch == '\0')
963
+ const char *matcher;
964
+ if (UNLIKELY(ch == '\0')) {
965
+ SET_ERRNO(HPE_INVALID_METHOD);
632
966
  goto error;
967
+ }
633
968
 
634
- const char *matcher = method_strings[parser->method];
635
- if (ch == ' ' && matcher[index] == '\0') {
636
- state = s_req_spaces_before_url;
637
- } else if (ch == matcher[index]) {
969
+ matcher = method_strings[parser->method];
970
+ if (ch == ' ' && matcher[parser->index] == '\0') {
971
+ UPDATE_STATE(s_req_spaces_before_url);
972
+ } else if (ch == matcher[parser->index]) {
638
973
  ; /* nada */
639
- } else if (parser->method == HTTP_CONNECT) {
640
- if (index == 1 && ch == 'H') {
641
- parser->method = HTTP_CHECKOUT;
642
- } else if (index == 2 && ch == 'P') {
643
- parser->method = HTTP_COPY;
644
- }
645
- } else if (parser->method == HTTP_MKCOL) {
646
- if (index == 1 && ch == 'O') {
647
- parser->method = HTTP_MOVE;
648
- } else if (index == 1 && ch == 'E') {
649
- parser->method = HTTP_MERGE;
650
- } else if (index == 1 && ch == '-') {
651
- parser->method = HTTP_MSEARCH;
652
- } else if (index == 2 && ch == 'A') {
653
- parser->method = HTTP_MKACTIVITY;
974
+ } else if ((ch >= 'A' && ch <= 'Z') || ch == '-') {
975
+
976
+ switch (parser->method << 16 | parser->index << 8 | ch) {
977
+ #define XX(meth, pos, ch, new_meth) \
978
+ case (HTTP_##meth << 16 | pos << 8 | ch): \
979
+ parser->method = HTTP_##new_meth; break;
980
+
981
+ XX(POST, 1, 'U', PUT)
982
+ XX(POST, 1, 'A', PATCH)
983
+ XX(POST, 1, 'R', PROPFIND)
984
+ XX(PUT, 2, 'R', PURGE)
985
+ XX(CONNECT, 1, 'H', CHECKOUT)
986
+ XX(CONNECT, 2, 'P', COPY)
987
+ XX(MKCOL, 1, 'O', MOVE)
988
+ XX(MKCOL, 1, 'E', MERGE)
989
+ XX(MKCOL, 1, '-', MSEARCH)
990
+ XX(MKCOL, 2, 'A', MKACTIVITY)
991
+ XX(MKCOL, 3, 'A', MKCALENDAR)
992
+ XX(SUBSCRIBE, 1, 'E', SEARCH)
993
+ XX(SUBSCRIBE, 1, 'O', SOURCE)
994
+ XX(REPORT, 2, 'B', REBIND)
995
+ XX(PROPFIND, 4, 'P', PROPPATCH)
996
+ XX(LOCK, 1, 'I', LINK)
997
+ XX(UNLOCK, 2, 'S', UNSUBSCRIBE)
998
+ XX(UNLOCK, 2, 'B', UNBIND)
999
+ XX(UNLOCK, 3, 'I', UNLINK)
1000
+ #undef XX
1001
+ default:
1002
+ SET_ERRNO(HPE_INVALID_METHOD);
1003
+ goto error;
654
1004
  }
655
- } else if (index == 1 && parser->method == HTTP_POST && ch == 'R') {
656
- parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
657
- } else if (index == 1 && parser->method == HTTP_POST && ch == 'U') {
658
- parser->method = HTTP_PUT;
659
- } else if (index == 2 && parser->method == HTTP_UNLOCK && ch == 'S') {
660
- parser->method = HTTP_UNSUBSCRIBE;
661
- } else if (index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
662
- parser->method = HTTP_PROPPATCH;
663
1005
  } else {
1006
+ SET_ERRNO(HPE_INVALID_METHOD);
664
1007
  goto error;
665
1008
  }
666
1009
 
667
- ++index;
1010
+ ++parser->index;
668
1011
  break;
669
1012
  }
1013
+
670
1014
  case s_req_spaces_before_url:
671
1015
  {
672
1016
  if (ch == ' ') break;
673
1017
 
674
- if (ch == '/' || ch == '*') {
675
- MARK(url);
676
- MARK(path);
677
- state = s_req_path;
678
- break;
1018
+ MARK(url);
1019
+ if (parser->method == HTTP_CONNECT) {
1020
+ UPDATE_STATE(s_req_server_start);
679
1021
  }
680
1022
 
681
- c = LOWER(ch);
682
-
683
- if (c >= 'a' && c <= 'z') {
684
- MARK(url);
685
- state = s_req_schema;
686
- break;
1023
+ UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1024
+ if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1025
+ SET_ERRNO(HPE_INVALID_URL);
1026
+ goto error;
687
1027
  }
688
1028
 
689
- goto error;
1029
+ break;
690
1030
  }
691
1031
 
692
1032
  case s_req_schema:
693
- {
694
- c = LOWER(ch);
695
-
696
- if (c >= 'a' && c <= 'z') break;
697
-
698
- if (ch == ':') {
699
- state = s_req_schema_slash;
700
- break;
701
- } else if (ch == '.') {
702
- state = s_req_host;
703
- break;
704
- } else if ('0' <= ch && ch <= '9') {
705
- state = s_req_host;
706
- break;
707
- }
708
-
709
- goto error;
710
- }
711
-
712
1033
  case s_req_schema_slash:
713
- STRICT_CHECK(ch != '/');
714
- state = s_req_schema_slash_slash;
715
- break;
716
-
717
1034
  case s_req_schema_slash_slash:
718
- STRICT_CHECK(ch != '/');
719
- state = s_req_host;
720
- break;
721
-
722
- case s_req_host:
1035
+ case s_req_server_start:
723
1036
  {
724
- c = LOWER(ch);
725
- if (c >= 'a' && c <= 'z') break;
726
- if ((ch >= '0' && ch <= '9') || ch == '.' || ch == '-') break;
727
1037
  switch (ch) {
728
- case ':':
729
- state = s_req_port;
730
- break;
731
- case '/':
732
- MARK(path);
733
- state = s_req_path;
734
- break;
1038
+ /* No whitespace allowed here */
735
1039
  case ' ':
736
- /* The request line looks like:
737
- * "GET http://foo.bar.com HTTP/1.1"
738
- * That is, there is no path.
739
- */
740
- CALLBACK(url);
741
- state = s_req_http_start;
742
- break;
743
- case '?':
744
- state = s_req_query_string_start;
745
- break;
746
- default:
1040
+ case CR:
1041
+ case LF:
1042
+ SET_ERRNO(HPE_INVALID_URL);
747
1043
  goto error;
748
- }
749
- break;
750
- }
751
-
752
- case s_req_port:
753
- {
754
- if (ch >= '0' && ch <= '9') break;
755
- switch (ch) {
756
- case '/':
757
- MARK(path);
758
- state = s_req_path;
759
- break;
760
- case ' ':
761
- /* The request line looks like:
762
- * "GET http://foo.bar.com:1234 HTTP/1.1"
763
- * That is, there is no path.
764
- */
765
- CALLBACK(url);
766
- state = s_req_http_start;
767
- break;
768
- case '?':
769
- state = s_req_query_string_start;
770
- break;
771
1044
  default:
772
- goto error;
1045
+ UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1046
+ if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1047
+ SET_ERRNO(HPE_INVALID_URL);
1048
+ goto error;
1049
+ }
773
1050
  }
1051
+
774
1052
  break;
775
1053
  }
776
1054
 
1055
+ case s_req_server:
1056
+ case s_req_server_with_at:
777
1057
  case s_req_path:
1058
+ case s_req_query_string_start:
1059
+ case s_req_query_string:
1060
+ case s_req_fragment_start:
1061
+ case s_req_fragment:
778
1062
  {
779
- if (normal_url_char[(unsigned char)ch]) break;
780
-
781
1063
  switch (ch) {
782
1064
  case ' ':
783
- CALLBACK(url);
784
- CALLBACK(path);
785
- state = s_req_http_start;
1065
+ UPDATE_STATE(s_req_http_start);
1066
+ CALLBACK_DATA(url);
786
1067
  break;
787
1068
  case CR:
788
- CALLBACK(url);
789
- CALLBACK(path);
790
- parser->http_major = 0;
791
- parser->http_minor = 9;
792
- state = s_req_line_almost_done;
793
- break;
794
1069
  case LF:
795
- CALLBACK(url);
796
- CALLBACK(path);
797
1070
  parser->http_major = 0;
798
1071
  parser->http_minor = 9;
799
- state = s_header_field_start;
800
- break;
801
- case '?':
802
- CALLBACK(path);
803
- state = s_req_query_string_start;
804
- break;
805
- case '#':
806
- CALLBACK(path);
807
- state = s_req_fragment_start;
1072
+ UPDATE_STATE((ch == CR) ?
1073
+ s_req_line_almost_done :
1074
+ s_header_field_start);
1075
+ CALLBACK_DATA(url);
808
1076
  break;
809
1077
  default:
810
- goto error;
1078
+ UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
1079
+ if (UNLIKELY(CURRENT_STATE() == s_dead)) {
1080
+ SET_ERRNO(HPE_INVALID_URL);
1081
+ goto error;
1082
+ }
811
1083
  }
812
1084
  break;
813
1085
  }
814
1086
 
815
- case s_req_query_string_start:
816
- {
817
- if (normal_url_char[(unsigned char)ch]) {
818
- MARK(query_string);
819
- state = s_req_query_string;
820
- break;
821
- }
822
-
1087
+ case s_req_http_start:
823
1088
  switch (ch) {
824
- case '?':
825
- break; /* XXX ignore extra '?' ... is this right? */
826
- case ' ':
827
- CALLBACK(url);
828
- state = s_req_http_start;
1089
+ case 'H':
1090
+ UPDATE_STATE(s_req_http_H);
829
1091
  break;
830
- case CR:
831
- CALLBACK(url);
832
- parser->http_major = 0;
833
- parser->http_minor = 9;
834
- state = s_req_line_almost_done;
835
- break;
836
- case LF:
837
- CALLBACK(url);
838
- parser->http_major = 0;
839
- parser->http_minor = 9;
840
- state = s_header_field_start;
841
- break;
842
- case '#':
843
- state = s_req_fragment_start;
844
- break;
845
- default:
846
- goto error;
847
- }
848
- break;
849
- }
850
-
851
- case s_req_query_string:
852
- {
853
- if (normal_url_char[(unsigned char)ch]) break;
854
-
855
- switch (ch) {
856
- case '?':
857
- /* allow extra '?' in query string */
858
- break;
859
- case ' ':
860
- CALLBACK(url);
861
- CALLBACK(query_string);
862
- state = s_req_http_start;
863
- break;
864
- case CR:
865
- CALLBACK(url);
866
- CALLBACK(query_string);
867
- parser->http_major = 0;
868
- parser->http_minor = 9;
869
- state = s_req_line_almost_done;
870
- break;
871
- case LF:
872
- CALLBACK(url);
873
- CALLBACK(query_string);
874
- parser->http_major = 0;
875
- parser->http_minor = 9;
876
- state = s_header_field_start;
877
- break;
878
- case '#':
879
- CALLBACK(query_string);
880
- state = s_req_fragment_start;
881
- break;
882
- default:
883
- goto error;
884
- }
885
- break;
886
- }
887
-
888
- case s_req_fragment_start:
889
- {
890
- if (normal_url_char[(unsigned char)ch]) {
891
- MARK(fragment);
892
- state = s_req_fragment;
893
- break;
894
- }
895
-
896
- switch (ch) {
897
- case ' ':
898
- CALLBACK(url);
899
- state = s_req_http_start;
900
- break;
901
- case CR:
902
- CALLBACK(url);
903
- parser->http_major = 0;
904
- parser->http_minor = 9;
905
- state = s_req_line_almost_done;
906
- break;
907
- case LF:
908
- CALLBACK(url);
909
- parser->http_major = 0;
910
- parser->http_minor = 9;
911
- state = s_header_field_start;
912
- break;
913
- case '?':
914
- MARK(fragment);
915
- state = s_req_fragment;
916
- break;
917
- case '#':
918
- break;
919
- default:
920
- goto error;
921
- }
922
- break;
923
- }
924
-
925
- case s_req_fragment:
926
- {
927
- if (normal_url_char[(unsigned char)ch]) break;
928
-
929
- switch (ch) {
930
- case ' ':
931
- CALLBACK(url);
932
- CALLBACK(fragment);
933
- state = s_req_http_start;
934
- break;
935
- case CR:
936
- CALLBACK(url);
937
- CALLBACK(fragment);
938
- parser->http_major = 0;
939
- parser->http_minor = 9;
940
- state = s_req_line_almost_done;
941
- break;
942
- case LF:
943
- CALLBACK(url);
944
- CALLBACK(fragment);
945
- parser->http_major = 0;
946
- parser->http_minor = 9;
947
- state = s_header_field_start;
948
- break;
949
- case '?':
950
- case '#':
951
- break;
952
- default:
953
- goto error;
954
- }
955
- break;
956
- }
957
-
958
- case s_req_http_start:
959
- switch (ch) {
960
- case 'H':
961
- state = s_req_http_H;
962
- break;
963
- case ' ':
1092
+ case ' ':
964
1093
  break;
965
1094
  default:
1095
+ SET_ERRNO(HPE_INVALID_CONSTANT);
966
1096
  goto error;
967
1097
  }
968
1098
  break;
969
1099
 
970
1100
  case s_req_http_H:
971
1101
  STRICT_CHECK(ch != 'T');
972
- state = s_req_http_HT;
1102
+ UPDATE_STATE(s_req_http_HT);
973
1103
  break;
974
1104
 
975
1105
  case s_req_http_HT:
976
1106
  STRICT_CHECK(ch != 'T');
977
- state = s_req_http_HTT;
1107
+ UPDATE_STATE(s_req_http_HTT);
978
1108
  break;
979
1109
 
980
1110
  case s_req_http_HTT:
981
1111
  STRICT_CHECK(ch != 'P');
982
- state = s_req_http_HTTP;
1112
+ UPDATE_STATE(s_req_http_HTTP);
983
1113
  break;
984
1114
 
985
1115
  case s_req_http_HTTP:
986
1116
  STRICT_CHECK(ch != '/');
987
- state = s_req_first_http_major;
1117
+ UPDATE_STATE(s_req_http_major);
988
1118
  break;
989
1119
 
990
- /* first digit of major HTTP version */
991
- case s_req_first_http_major:
992
- if (ch < '1' || ch > '9') goto error;
1120
+ case s_req_http_major:
1121
+ if (UNLIKELY(!IS_NUM(ch))) {
1122
+ SET_ERRNO(HPE_INVALID_VERSION);
1123
+ goto error;
1124
+ }
1125
+
993
1126
  parser->http_major = ch - '0';
994
- state = s_req_http_major;
1127
+ UPDATE_STATE(s_req_http_dot);
995
1128
  break;
996
1129
 
997
- /* major HTTP version or dot */
998
- case s_req_http_major:
1130
+ case s_req_http_dot:
999
1131
  {
1000
- if (ch == '.') {
1001
- state = s_req_first_http_minor;
1002
- break;
1132
+ if (UNLIKELY(ch != '.')) {
1133
+ SET_ERRNO(HPE_INVALID_VERSION);
1134
+ goto error;
1003
1135
  }
1004
1136
 
1005
- if (ch < '0' || ch > '9') goto error;
1006
-
1007
- parser->http_major *= 10;
1008
- parser->http_major += ch - '0';
1009
-
1010
- if (parser->http_major > 999) goto error;
1137
+ UPDATE_STATE(s_req_http_minor);
1011
1138
  break;
1012
1139
  }
1013
1140
 
1014
- /* first digit of minor HTTP version */
1015
- case s_req_first_http_minor:
1016
- if (ch < '0' || ch > '9') goto error;
1141
+ case s_req_http_minor:
1142
+ if (UNLIKELY(!IS_NUM(ch))) {
1143
+ SET_ERRNO(HPE_INVALID_VERSION);
1144
+ goto error;
1145
+ }
1146
+
1017
1147
  parser->http_minor = ch - '0';
1018
- state = s_req_http_minor;
1148
+ UPDATE_STATE(s_req_http_end);
1019
1149
  break;
1020
1150
 
1021
- /* minor HTTP version or end of request line */
1022
- case s_req_http_minor:
1151
+ case s_req_http_end:
1023
1152
  {
1024
1153
  if (ch == CR) {
1025
- state = s_req_line_almost_done;
1154
+ UPDATE_STATE(s_req_line_almost_done);
1026
1155
  break;
1027
1156
  }
1028
1157
 
1029
1158
  if (ch == LF) {
1030
- state = s_header_field_start;
1159
+ UPDATE_STATE(s_header_field_start);
1031
1160
  break;
1032
1161
  }
1033
1162
 
1034
- /* XXX allow spaces after digit? */
1035
-
1036
- if (ch < '0' || ch > '9') goto error;
1037
-
1038
- parser->http_minor *= 10;
1039
- parser->http_minor += ch - '0';
1040
-
1041
- if (parser->http_minor > 999) goto error;
1163
+ SET_ERRNO(HPE_INVALID_VERSION);
1164
+ goto error;
1042
1165
  break;
1043
1166
  }
1044
1167
 
1045
1168
  /* end of request line */
1046
1169
  case s_req_line_almost_done:
1047
1170
  {
1048
- if (ch != LF) goto error;
1049
- state = s_header_field_start;
1171
+ if (UNLIKELY(ch != LF)) {
1172
+ SET_ERRNO(HPE_LF_EXPECTED);
1173
+ goto error;
1174
+ }
1175
+
1176
+ UPDATE_STATE(s_header_field_start);
1050
1177
  break;
1051
1178
  }
1052
1179
 
1053
1180
  case s_header_field_start:
1054
1181
  {
1055
1182
  if (ch == CR) {
1056
- state = s_headers_almost_done;
1183
+ UPDATE_STATE(s_headers_almost_done);
1057
1184
  break;
1058
1185
  }
1059
1186
 
1060
1187
  if (ch == LF) {
1061
1188
  /* they might be just sending \n instead of \r\n so this would be
1062
1189
  * the second \n to denote the end of headers*/
1063
- state = s_headers_almost_done;
1064
- goto headers_almost_done;
1190
+ UPDATE_STATE(s_headers_almost_done);
1191
+ REEXECUTE();
1065
1192
  }
1066
1193
 
1067
1194
  c = TOKEN(ch);
1068
1195
 
1069
- if (!c) goto error;
1196
+ if (UNLIKELY(!c)) {
1197
+ SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1198
+ goto error;
1199
+ }
1070
1200
 
1071
1201
  MARK(header_field);
1072
1202
 
1073
- index = 0;
1074
- state = s_header_field;
1203
+ parser->index = 0;
1204
+ UPDATE_STATE(s_header_field);
1075
1205
 
1076
1206
  switch (c) {
1077
1207
  case 'c':
1078
- header_state = h_C;
1208
+ parser->header_state = h_C;
1079
1209
  break;
1080
1210
 
1081
1211
  case 'p':
1082
- header_state = h_matching_proxy_connection;
1212
+ parser->header_state = h_matching_proxy_connection;
1083
1213
  break;
1084
1214
 
1085
1215
  case 't':
1086
- header_state = h_matching_transfer_encoding;
1216
+ parser->header_state = h_matching_transfer_encoding;
1087
1217
  break;
1088
1218
 
1089
1219
  case 'u':
1090
- header_state = h_matching_upgrade;
1220
+ parser->header_state = h_matching_upgrade;
1091
1221
  break;
1092
1222
 
1093
1223
  default:
1094
- header_state = h_general;
1224
+ parser->header_state = h_general;
1095
1225
  break;
1096
1226
  }
1097
1227
  break;
@@ -1099,34 +1229,39 @@ size_t http_parser_execute (http_parser *parser,
1099
1229
 
1100
1230
  case s_header_field:
1101
1231
  {
1102
- c = TOKEN(ch);
1232
+ const char* start = p;
1233
+ for (; p != data + len; p++) {
1234
+ ch = *p;
1235
+ c = TOKEN(ch);
1103
1236
 
1104
- if (c) {
1105
- switch (header_state) {
1237
+ if (!c)
1238
+ break;
1239
+
1240
+ switch (parser->header_state) {
1106
1241
  case h_general:
1107
1242
  break;
1108
1243
 
1109
1244
  case h_C:
1110
- index++;
1111
- header_state = (c == 'o' ? h_CO : h_general);
1245
+ parser->index++;
1246
+ parser->header_state = (c == 'o' ? h_CO : h_general);
1112
1247
  break;
1113
1248
 
1114
1249
  case h_CO:
1115
- index++;
1116
- header_state = (c == 'n' ? h_CON : h_general);
1250
+ parser->index++;
1251
+ parser->header_state = (c == 'n' ? h_CON : h_general);
1117
1252
  break;
1118
1253
 
1119
1254
  case h_CON:
1120
- index++;
1255
+ parser->index++;
1121
1256
  switch (c) {
1122
1257
  case 'n':
1123
- header_state = h_matching_connection;
1258
+ parser->header_state = h_matching_connection;
1124
1259
  break;
1125
1260
  case 't':
1126
- header_state = h_matching_content_length;
1261
+ parser->header_state = h_matching_content_length;
1127
1262
  break;
1128
1263
  default:
1129
- header_state = h_general;
1264
+ parser->header_state = h_general;
1130
1265
  break;
1131
1266
  }
1132
1267
  break;
@@ -1134,60 +1269,60 @@ size_t http_parser_execute (http_parser *parser,
1134
1269
  /* connection */
1135
1270
 
1136
1271
  case h_matching_connection:
1137
- index++;
1138
- if (index > sizeof(CONNECTION)-1
1139
- || c != CONNECTION[index]) {
1140
- header_state = h_general;
1141
- } else if (index == sizeof(CONNECTION)-2) {
1142
- header_state = h_connection;
1272
+ parser->index++;
1273
+ if (parser->index > sizeof(CONNECTION)-1
1274
+ || c != CONNECTION[parser->index]) {
1275
+ parser->header_state = h_general;
1276
+ } else if (parser->index == sizeof(CONNECTION)-2) {
1277
+ parser->header_state = h_connection;
1143
1278
  }
1144
1279
  break;
1145
1280
 
1146
1281
  /* proxy-connection */
1147
1282
 
1148
1283
  case h_matching_proxy_connection:
1149
- index++;
1150
- if (index > sizeof(PROXY_CONNECTION)-1
1151
- || c != PROXY_CONNECTION[index]) {
1152
- header_state = h_general;
1153
- } else if (index == sizeof(PROXY_CONNECTION)-2) {
1154
- header_state = h_connection;
1284
+ parser->index++;
1285
+ if (parser->index > sizeof(PROXY_CONNECTION)-1
1286
+ || c != PROXY_CONNECTION[parser->index]) {
1287
+ parser->header_state = h_general;
1288
+ } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1289
+ parser->header_state = h_connection;
1155
1290
  }
1156
1291
  break;
1157
1292
 
1158
1293
  /* content-length */
1159
1294
 
1160
1295
  case h_matching_content_length:
1161
- index++;
1162
- if (index > sizeof(CONTENT_LENGTH)-1
1163
- || c != CONTENT_LENGTH[index]) {
1164
- header_state = h_general;
1165
- } else if (index == sizeof(CONTENT_LENGTH)-2) {
1166
- header_state = h_content_length;
1296
+ parser->index++;
1297
+ if (parser->index > sizeof(CONTENT_LENGTH)-1
1298
+ || c != CONTENT_LENGTH[parser->index]) {
1299
+ parser->header_state = h_general;
1300
+ } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1301
+ parser->header_state = h_content_length;
1167
1302
  }
1168
1303
  break;
1169
1304
 
1170
1305
  /* transfer-encoding */
1171
1306
 
1172
1307
  case h_matching_transfer_encoding:
1173
- index++;
1174
- if (index > sizeof(TRANSFER_ENCODING)-1
1175
- || c != TRANSFER_ENCODING[index]) {
1176
- header_state = h_general;
1177
- } else if (index == sizeof(TRANSFER_ENCODING)-2) {
1178
- header_state = h_transfer_encoding;
1308
+ parser->index++;
1309
+ if (parser->index > sizeof(TRANSFER_ENCODING)-1
1310
+ || c != TRANSFER_ENCODING[parser->index]) {
1311
+ parser->header_state = h_general;
1312
+ } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1313
+ parser->header_state = h_transfer_encoding;
1179
1314
  }
1180
1315
  break;
1181
1316
 
1182
1317
  /* upgrade */
1183
1318
 
1184
1319
  case h_matching_upgrade:
1185
- index++;
1186
- if (index > sizeof(UPGRADE)-1
1187
- || c != UPGRADE[index]) {
1188
- header_state = h_general;
1189
- } else if (index == sizeof(UPGRADE)-2) {
1190
- header_state = h_upgrade;
1320
+ parser->index++;
1321
+ if (parser->index > sizeof(UPGRADE)-1
1322
+ || c != UPGRADE[parser->index]) {
1323
+ parser->header_state = h_general;
1324
+ } else if (parser->index == sizeof(UPGRADE)-2) {
1325
+ parser->header_state = h_upgrade;
1191
1326
  }
1192
1327
  break;
1193
1328
 
@@ -1195,95 +1330,107 @@ size_t http_parser_execute (http_parser *parser,
1195
1330
  case h_content_length:
1196
1331
  case h_transfer_encoding:
1197
1332
  case h_upgrade:
1198
- if (ch != ' ') header_state = h_general;
1333
+ if (ch != ' ') parser->header_state = h_general;
1199
1334
  break;
1200
1335
 
1201
1336
  default:
1202
1337
  assert(0 && "Unknown header_state");
1203
1338
  break;
1204
1339
  }
1340
+ }
1341
+
1342
+ COUNT_HEADER_SIZE(p - start);
1343
+
1344
+ if (p == data + len) {
1345
+ --p;
1205
1346
  break;
1206
1347
  }
1207
1348
 
1208
1349
  if (ch == ':') {
1209
- CALLBACK(header_field);
1210
- state = s_header_value_start;
1350
+ UPDATE_STATE(s_header_value_discard_ws);
1351
+ CALLBACK_DATA(header_field);
1211
1352
  break;
1212
1353
  }
1213
1354
 
1355
+ SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1356
+ goto error;
1357
+ }
1358
+
1359
+ case s_header_value_discard_ws:
1360
+ if (ch == ' ' || ch == '\t') break;
1361
+
1214
1362
  if (ch == CR) {
1215
- state = s_header_almost_done;
1216
- CALLBACK(header_field);
1363
+ UPDATE_STATE(s_header_value_discard_ws_almost_done);
1217
1364
  break;
1218
1365
  }
1219
1366
 
1220
1367
  if (ch == LF) {
1221
- CALLBACK(header_field);
1222
- state = s_header_field_start;
1368
+ UPDATE_STATE(s_header_value_discard_lws);
1223
1369
  break;
1224
1370
  }
1225
1371
 
1226
- goto error;
1227
- }
1372
+ /* FALLTHROUGH */
1228
1373
 
1229
1374
  case s_header_value_start:
1230
1375
  {
1231
- if (ch == ' ') break;
1232
-
1233
1376
  MARK(header_value);
1234
1377
 
1235
- state = s_header_value;
1236
- index = 0;
1378
+ UPDATE_STATE(s_header_value);
1379
+ parser->index = 0;
1237
1380
 
1238
1381
  c = LOWER(ch);
1239
1382
 
1240
- if (ch == CR) {
1241
- CALLBACK(header_value);
1242
- header_state = h_general;
1243
- state = s_header_almost_done;
1244
- break;
1245
- }
1246
-
1247
- if (ch == LF) {
1248
- CALLBACK(header_value);
1249
- state = s_header_field_start;
1250
- break;
1251
- }
1252
-
1253
- switch (header_state) {
1383
+ switch (parser->header_state) {
1254
1384
  case h_upgrade:
1255
1385
  parser->flags |= F_UPGRADE;
1256
- header_state = h_general;
1386
+ parser->header_state = h_general;
1257
1387
  break;
1258
1388
 
1259
1389
  case h_transfer_encoding:
1260
1390
  /* looking for 'Transfer-Encoding: chunked' */
1261
1391
  if ('c' == c) {
1262
- header_state = h_matching_transfer_encoding_chunked;
1392
+ parser->header_state = h_matching_transfer_encoding_chunked;
1263
1393
  } else {
1264
- header_state = h_general;
1394
+ parser->header_state = h_general;
1265
1395
  }
1266
1396
  break;
1267
1397
 
1268
1398
  case h_content_length:
1269
- if (ch < '0' || ch > '9') goto error;
1399
+ if (UNLIKELY(!IS_NUM(ch))) {
1400
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1401
+ goto error;
1402
+ }
1403
+
1404
+ if (parser->flags & F_CONTENTLENGTH) {
1405
+ SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1406
+ goto error;
1407
+ }
1408
+
1409
+ parser->flags |= F_CONTENTLENGTH;
1270
1410
  parser->content_length = ch - '0';
1411
+ parser->header_state = h_content_length_num;
1271
1412
  break;
1272
1413
 
1273
1414
  case h_connection:
1274
1415
  /* looking for 'Connection: keep-alive' */
1275
1416
  if (c == 'k') {
1276
- header_state = h_matching_connection_keep_alive;
1417
+ parser->header_state = h_matching_connection_keep_alive;
1277
1418
  /* looking for 'Connection: close' */
1278
1419
  } else if (c == 'c') {
1279
- header_state = h_matching_connection_close;
1420
+ parser->header_state = h_matching_connection_close;
1421
+ } else if (c == 'u') {
1422
+ parser->header_state = h_matching_connection_upgrade;
1280
1423
  } else {
1281
- header_state = h_general;
1424
+ parser->header_state = h_matching_connection_token;
1282
1425
  }
1283
1426
  break;
1284
1427
 
1428
+ /* Multi-value `Connection` header */
1429
+ case h_matching_connection_token_start:
1430
+ break;
1431
+
1285
1432
  default:
1286
- header_state = h_general;
1433
+ parser->header_state = h_general;
1287
1434
  break;
1288
1435
  }
1289
1436
  break;
@@ -1291,89 +1438,229 @@ size_t http_parser_execute (http_parser *parser,
1291
1438
 
1292
1439
  case s_header_value:
1293
1440
  {
1294
- c = LOWER(ch);
1295
-
1296
- if (ch == CR) {
1297
- CALLBACK(header_value);
1298
- state = s_header_almost_done;
1299
- break;
1300
- }
1441
+ const char* start = p;
1442
+ enum header_states h_state = (enum header_states) parser->header_state;
1443
+ for (; p != data + len; p++) {
1444
+ ch = *p;
1445
+ if (ch == CR) {
1446
+ UPDATE_STATE(s_header_almost_done);
1447
+ parser->header_state = h_state;
1448
+ CALLBACK_DATA(header_value);
1449
+ break;
1450
+ }
1301
1451
 
1302
- if (ch == LF) {
1303
- CALLBACK(header_value);
1304
- goto header_almost_done;
1305
- }
1452
+ if (ch == LF) {
1453
+ UPDATE_STATE(s_header_almost_done);
1454
+ COUNT_HEADER_SIZE(p - start);
1455
+ parser->header_state = h_state;
1456
+ CALLBACK_DATA_NOADVANCE(header_value);
1457
+ REEXECUTE();
1458
+ }
1306
1459
 
1307
- switch (header_state) {
1308
- case h_general:
1309
- break;
1460
+ if (!lenient && !IS_HEADER_CHAR(ch)) {
1461
+ SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1462
+ goto error;
1463
+ }
1310
1464
 
1311
- case h_connection:
1312
- case h_transfer_encoding:
1313
- assert(0 && "Shouldn't get here.");
1314
- break;
1465
+ c = LOWER(ch);
1315
1466
 
1316
- case h_content_length:
1317
- if (ch == ' ') break;
1318
- if (ch < '0' || ch > '9') goto error;
1319
- parser->content_length *= 10;
1320
- parser->content_length += ch - '0';
1321
- break;
1467
+ switch (h_state) {
1468
+ case h_general:
1469
+ {
1470
+ const char* p_cr;
1471
+ const char* p_lf;
1472
+ size_t limit = data + len - p;
1473
+
1474
+ limit = MIN(limit, HTTP_MAX_HEADER_SIZE);
1475
+
1476
+ p_cr = (const char*) memchr(p, CR, limit);
1477
+ p_lf = (const char*) memchr(p, LF, limit);
1478
+ if (p_cr != NULL) {
1479
+ if (p_lf != NULL && p_cr >= p_lf)
1480
+ p = p_lf;
1481
+ else
1482
+ p = p_cr;
1483
+ } else if (UNLIKELY(p_lf != NULL)) {
1484
+ p = p_lf;
1485
+ } else {
1486
+ p = data + len;
1487
+ }
1488
+ --p;
1322
1489
 
1323
- /* Transfer-Encoding: chunked */
1324
- case h_matching_transfer_encoding_chunked:
1325
- index++;
1326
- if (index > sizeof(CHUNKED)-1
1327
- || c != CHUNKED[index]) {
1328
- header_state = h_general;
1329
- } else if (index == sizeof(CHUNKED)-2) {
1330
- header_state = h_transfer_encoding_chunked;
1490
+ break;
1331
1491
  }
1332
- break;
1333
1492
 
1334
- /* looking for 'Connection: keep-alive' */
1335
- case h_matching_connection_keep_alive:
1336
- index++;
1337
- if (index > sizeof(KEEP_ALIVE)-1
1338
- || c != KEEP_ALIVE[index]) {
1339
- header_state = h_general;
1340
- } else if (index == sizeof(KEEP_ALIVE)-2) {
1341
- header_state = h_connection_keep_alive;
1342
- }
1343
- break;
1493
+ case h_connection:
1494
+ case h_transfer_encoding:
1495
+ assert(0 && "Shouldn't get here.");
1496
+ break;
1497
+
1498
+ case h_content_length:
1499
+ if (ch == ' ') break;
1500
+ h_state = h_content_length_num;
1501
+ /* FALLTHROUGH */
1344
1502
 
1345
- /* looking for 'Connection: close' */
1346
- case h_matching_connection_close:
1347
- index++;
1348
- if (index > sizeof(CLOSE)-1 || c != CLOSE[index]) {
1349
- header_state = h_general;
1350
- } else if (index == sizeof(CLOSE)-2) {
1351
- header_state = h_connection_close;
1503
+ case h_content_length_num:
1504
+ {
1505
+ uint64_t t;
1506
+
1507
+ if (ch == ' ') {
1508
+ h_state = h_content_length_ws;
1509
+ break;
1510
+ }
1511
+
1512
+ if (UNLIKELY(!IS_NUM(ch))) {
1513
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1514
+ parser->header_state = h_state;
1515
+ goto error;
1516
+ }
1517
+
1518
+ t = parser->content_length;
1519
+ t *= 10;
1520
+ t += ch - '0';
1521
+
1522
+ /* Overflow? Test against a conservative limit for simplicity. */
1523
+ if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) {
1524
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1525
+ parser->header_state = h_state;
1526
+ goto error;
1527
+ }
1528
+
1529
+ parser->content_length = t;
1530
+ break;
1352
1531
  }
1353
- break;
1354
1532
 
1355
- case h_transfer_encoding_chunked:
1356
- case h_connection_keep_alive:
1357
- case h_connection_close:
1358
- if (ch != ' ') header_state = h_general;
1359
- break;
1533
+ case h_content_length_ws:
1534
+ if (ch == ' ') break;
1535
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1536
+ parser->header_state = h_state;
1537
+ goto error;
1360
1538
 
1361
- default:
1362
- state = s_header_value;
1363
- header_state = h_general;
1364
- break;
1539
+ /* Transfer-Encoding: chunked */
1540
+ case h_matching_transfer_encoding_chunked:
1541
+ parser->index++;
1542
+ if (parser->index > sizeof(CHUNKED)-1
1543
+ || c != CHUNKED[parser->index]) {
1544
+ h_state = h_general;
1545
+ } else if (parser->index == sizeof(CHUNKED)-2) {
1546
+ h_state = h_transfer_encoding_chunked;
1547
+ }
1548
+ break;
1549
+
1550
+ case h_matching_connection_token_start:
1551
+ /* looking for 'Connection: keep-alive' */
1552
+ if (c == 'k') {
1553
+ h_state = h_matching_connection_keep_alive;
1554
+ /* looking for 'Connection: close' */
1555
+ } else if (c == 'c') {
1556
+ h_state = h_matching_connection_close;
1557
+ } else if (c == 'u') {
1558
+ h_state = h_matching_connection_upgrade;
1559
+ } else if (STRICT_TOKEN(c)) {
1560
+ h_state = h_matching_connection_token;
1561
+ } else if (c == ' ' || c == '\t') {
1562
+ /* Skip lws */
1563
+ } else {
1564
+ h_state = h_general;
1565
+ }
1566
+ break;
1567
+
1568
+ /* looking for 'Connection: keep-alive' */
1569
+ case h_matching_connection_keep_alive:
1570
+ parser->index++;
1571
+ if (parser->index > sizeof(KEEP_ALIVE)-1
1572
+ || c != KEEP_ALIVE[parser->index]) {
1573
+ h_state = h_matching_connection_token;
1574
+ } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1575
+ h_state = h_connection_keep_alive;
1576
+ }
1577
+ break;
1578
+
1579
+ /* looking for 'Connection: close' */
1580
+ case h_matching_connection_close:
1581
+ parser->index++;
1582
+ if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1583
+ h_state = h_matching_connection_token;
1584
+ } else if (parser->index == sizeof(CLOSE)-2) {
1585
+ h_state = h_connection_close;
1586
+ }
1587
+ break;
1588
+
1589
+ /* looking for 'Connection: upgrade' */
1590
+ case h_matching_connection_upgrade:
1591
+ parser->index++;
1592
+ if (parser->index > sizeof(UPGRADE) - 1 ||
1593
+ c != UPGRADE[parser->index]) {
1594
+ h_state = h_matching_connection_token;
1595
+ } else if (parser->index == sizeof(UPGRADE)-2) {
1596
+ h_state = h_connection_upgrade;
1597
+ }
1598
+ break;
1599
+
1600
+ case h_matching_connection_token:
1601
+ if (ch == ',') {
1602
+ h_state = h_matching_connection_token_start;
1603
+ parser->index = 0;
1604
+ }
1605
+ break;
1606
+
1607
+ case h_transfer_encoding_chunked:
1608
+ if (ch != ' ') h_state = h_general;
1609
+ break;
1610
+
1611
+ case h_connection_keep_alive:
1612
+ case h_connection_close:
1613
+ case h_connection_upgrade:
1614
+ if (ch == ',') {
1615
+ if (h_state == h_connection_keep_alive) {
1616
+ parser->flags |= F_CONNECTION_KEEP_ALIVE;
1617
+ } else if (h_state == h_connection_close) {
1618
+ parser->flags |= F_CONNECTION_CLOSE;
1619
+ } else if (h_state == h_connection_upgrade) {
1620
+ parser->flags |= F_CONNECTION_UPGRADE;
1621
+ }
1622
+ h_state = h_matching_connection_token_start;
1623
+ parser->index = 0;
1624
+ } else if (ch != ' ') {
1625
+ h_state = h_matching_connection_token;
1626
+ }
1627
+ break;
1628
+
1629
+ default:
1630
+ UPDATE_STATE(s_header_value);
1631
+ h_state = h_general;
1632
+ break;
1633
+ }
1365
1634
  }
1635
+ parser->header_state = h_state;
1636
+
1637
+ COUNT_HEADER_SIZE(p - start);
1638
+
1639
+ if (p == data + len)
1640
+ --p;
1366
1641
  break;
1367
1642
  }
1368
1643
 
1369
1644
  case s_header_almost_done:
1370
- header_almost_done:
1371
1645
  {
1372
- STRICT_CHECK(ch != LF);
1646
+ if (UNLIKELY(ch != LF)) {
1647
+ SET_ERRNO(HPE_LF_EXPECTED);
1648
+ goto error;
1649
+ }
1650
+
1651
+ UPDATE_STATE(s_header_value_lws);
1652
+ break;
1653
+ }
1373
1654
 
1374
- state = s_header_field_start;
1655
+ case s_header_value_lws:
1656
+ {
1657
+ if (ch == ' ' || ch == '\t') {
1658
+ UPDATE_STATE(s_header_value_start);
1659
+ REEXECUTE();
1660
+ }
1375
1661
 
1376
- switch (header_state) {
1662
+ /* finished the header */
1663
+ switch (parser->header_state) {
1377
1664
  case h_connection_keep_alive:
1378
1665
  parser->flags |= F_CONNECTION_KEEP_ALIVE;
1379
1666
  break;
@@ -1383,28 +1670,87 @@ size_t http_parser_execute (http_parser *parser,
1383
1670
  case h_transfer_encoding_chunked:
1384
1671
  parser->flags |= F_CHUNKED;
1385
1672
  break;
1673
+ case h_connection_upgrade:
1674
+ parser->flags |= F_CONNECTION_UPGRADE;
1675
+ break;
1386
1676
  default:
1387
1677
  break;
1388
1678
  }
1679
+
1680
+ UPDATE_STATE(s_header_field_start);
1681
+ REEXECUTE();
1682
+ }
1683
+
1684
+ case s_header_value_discard_ws_almost_done:
1685
+ {
1686
+ STRICT_CHECK(ch != LF);
1687
+ UPDATE_STATE(s_header_value_discard_lws);
1389
1688
  break;
1390
1689
  }
1391
1690
 
1691
+ case s_header_value_discard_lws:
1692
+ {
1693
+ if (ch == ' ' || ch == '\t') {
1694
+ UPDATE_STATE(s_header_value_discard_ws);
1695
+ break;
1696
+ } else {
1697
+ switch (parser->header_state) {
1698
+ case h_connection_keep_alive:
1699
+ parser->flags |= F_CONNECTION_KEEP_ALIVE;
1700
+ break;
1701
+ case h_connection_close:
1702
+ parser->flags |= F_CONNECTION_CLOSE;
1703
+ break;
1704
+ case h_connection_upgrade:
1705
+ parser->flags |= F_CONNECTION_UPGRADE;
1706
+ break;
1707
+ case h_transfer_encoding_chunked:
1708
+ parser->flags |= F_CHUNKED;
1709
+ break;
1710
+ default:
1711
+ break;
1712
+ }
1713
+
1714
+ /* header value was empty */
1715
+ MARK(header_value);
1716
+ UPDATE_STATE(s_header_field_start);
1717
+ CALLBACK_DATA_NOADVANCE(header_value);
1718
+ REEXECUTE();
1719
+ }
1720
+ }
1721
+
1392
1722
  case s_headers_almost_done:
1393
- headers_almost_done:
1394
1723
  {
1395
1724
  STRICT_CHECK(ch != LF);
1396
1725
 
1397
1726
  if (parser->flags & F_TRAILING) {
1398
1727
  /* End of a chunked request */
1399
- CALLBACK2(message_complete);
1400
- state = NEW_MESSAGE();
1401
- break;
1728
+ UPDATE_STATE(s_message_done);
1729
+ CALLBACK_NOTIFY_NOADVANCE(chunk_complete);
1730
+ REEXECUTE();
1402
1731
  }
1403
1732
 
1404
- nread = 0;
1733
+ /* Cannot use chunked encoding and a content-length header together
1734
+ per the HTTP specification. */
1735
+ if ((parser->flags & F_CHUNKED) &&
1736
+ (parser->flags & F_CONTENTLENGTH)) {
1737
+ SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
1738
+ goto error;
1739
+ }
1405
1740
 
1406
- if (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT) {
1407
- parser->upgrade = 1;
1741
+ UPDATE_STATE(s_headers_done);
1742
+
1743
+ /* Set this here so that on_headers_complete() callbacks can see it */
1744
+ if ((parser->flags & F_UPGRADE) &&
1745
+ (parser->flags & F_CONNECTION_UPGRADE)) {
1746
+ /* For responses, "Upgrade: foo" and "Connection: upgrade" are
1747
+ * mandatory only when it is a 101 Switching Protocols response,
1748
+ * otherwise it is purely informational, to announce support.
1749
+ */
1750
+ parser->upgrade =
1751
+ (parser->type == HTTP_REQUEST || parser->status_code == 101);
1752
+ } else {
1753
+ parser->upgrade = (parser->method == HTTP_CONNECT);
1408
1754
  }
1409
1755
 
1410
1756
  /* Here we call the headers_complete callback. This is somewhat
@@ -1412,50 +1758,75 @@ size_t http_parser_execute (http_parser *parser,
1412
1758
  * will interpret that as saying that this message has no body. This
1413
1759
  * is needed for the annoying case of recieving a response to a HEAD
1414
1760
  * request.
1761
+ *
1762
+ * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1763
+ * we have to simulate it by handling a change in errno below.
1415
1764
  */
1416
1765
  if (settings->on_headers_complete) {
1417
1766
  switch (settings->on_headers_complete(parser)) {
1418
1767
  case 0:
1419
1768
  break;
1420
1769
 
1770
+ case 2:
1771
+ parser->upgrade = 1;
1772
+
1773
+ /* FALLTHROUGH */
1421
1774
  case 1:
1422
1775
  parser->flags |= F_SKIPBODY;
1423
1776
  break;
1424
1777
 
1425
1778
  default:
1426
- parser->state = state;
1427
- return p - data; /* Error */
1779
+ SET_ERRNO(HPE_CB_headers_complete);
1780
+ RETURN(p - data); /* Error */
1428
1781
  }
1429
1782
  }
1430
1783
 
1431
- /* Exit, the rest of the connect is in a different protocol. */
1432
- if (parser->upgrade) {
1433
- CALLBACK2(message_complete);
1434
- return (p - data);
1784
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1785
+ RETURN(p - data);
1786
+ }
1787
+
1788
+ REEXECUTE();
1789
+ }
1790
+
1791
+ case s_headers_done:
1792
+ {
1793
+ int hasBody;
1794
+ STRICT_CHECK(ch != LF);
1795
+
1796
+ parser->nread = 0;
1797
+
1798
+ hasBody = parser->flags & F_CHUNKED ||
1799
+ (parser->content_length > 0 && parser->content_length != ULLONG_MAX);
1800
+ if (parser->upgrade && (parser->method == HTTP_CONNECT ||
1801
+ (parser->flags & F_SKIPBODY) || !hasBody)) {
1802
+ /* Exit, the rest of the message is in a different protocol. */
1803
+ UPDATE_STATE(NEW_MESSAGE());
1804
+ CALLBACK_NOTIFY(message_complete);
1805
+ RETURN((p - data) + 1);
1435
1806
  }
1436
1807
 
1437
1808
  if (parser->flags & F_SKIPBODY) {
1438
- CALLBACK2(message_complete);
1439
- state = NEW_MESSAGE();
1809
+ UPDATE_STATE(NEW_MESSAGE());
1810
+ CALLBACK_NOTIFY(message_complete);
1440
1811
  } else if (parser->flags & F_CHUNKED) {
1441
1812
  /* chunked encoding - ignore Content-Length header */
1442
- state = s_chunk_size_start;
1813
+ UPDATE_STATE(s_chunk_size_start);
1443
1814
  } else {
1444
1815
  if (parser->content_length == 0) {
1445
1816
  /* Content-Length header given but zero: Content-Length: 0\r\n */
1446
- CALLBACK2(message_complete);
1447
- state = NEW_MESSAGE();
1448
- } else if (parser->content_length > 0) {
1817
+ UPDATE_STATE(NEW_MESSAGE());
1818
+ CALLBACK_NOTIFY(message_complete);
1819
+ } else if (parser->content_length != ULLONG_MAX) {
1449
1820
  /* Content-Length header given and non-zero */
1450
- state = s_body_identity;
1821
+ UPDATE_STATE(s_body_identity);
1451
1822
  } else {
1452
- if (parser->type == HTTP_REQUEST || http_should_keep_alive(parser)) {
1823
+ if (!http_message_needs_eof(parser)) {
1453
1824
  /* Assume content-length 0 - read the next */
1454
- CALLBACK2(message_complete);
1455
- state = NEW_MESSAGE();
1825
+ UPDATE_STATE(NEW_MESSAGE());
1826
+ CALLBACK_NOTIFY(message_complete);
1456
1827
  } else {
1457
1828
  /* Read body until EOF */
1458
- state = s_body_identity_eof;
1829
+ UPDATE_STATE(s_body_identity_eof);
1459
1830
  }
1460
1831
  }
1461
1832
  }
@@ -1464,60 +1835,107 @@ size_t http_parser_execute (http_parser *parser,
1464
1835
  }
1465
1836
 
1466
1837
  case s_body_identity:
1467
- to_read = MIN(pe - p, (int64_t)parser->content_length);
1468
- if (to_read > 0) {
1469
- if (settings->on_body) settings->on_body(parser, p, to_read);
1470
- p += to_read - 1;
1471
- parser->content_length -= to_read;
1472
- if (parser->content_length == 0) {
1473
- CALLBACK2(message_complete);
1474
- state = NEW_MESSAGE();
1475
- }
1838
+ {
1839
+ uint64_t to_read = MIN(parser->content_length,
1840
+ (uint64_t) ((data + len) - p));
1841
+
1842
+ assert(parser->content_length != 0
1843
+ && parser->content_length != ULLONG_MAX);
1844
+
1845
+ /* The difference between advancing content_length and p is because
1846
+ * the latter will automaticaly advance on the next loop iteration.
1847
+ * Further, if content_length ends up at 0, we want to see the last
1848
+ * byte again for our message complete callback.
1849
+ */
1850
+ MARK(body);
1851
+ parser->content_length -= to_read;
1852
+ p += to_read - 1;
1853
+
1854
+ if (parser->content_length == 0) {
1855
+ UPDATE_STATE(s_message_done);
1856
+
1857
+ /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1858
+ *
1859
+ * The alternative to doing this is to wait for the next byte to
1860
+ * trigger the data callback, just as in every other case. The
1861
+ * problem with this is that this makes it difficult for the test
1862
+ * harness to distinguish between complete-on-EOF and
1863
+ * complete-on-length. It's not clear that this distinction is
1864
+ * important for applications, but let's keep it for now.
1865
+ */
1866
+ CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1867
+ REEXECUTE();
1476
1868
  }
1869
+
1477
1870
  break;
1871
+ }
1478
1872
 
1479
1873
  /* read until EOF */
1480
1874
  case s_body_identity_eof:
1481
- to_read = pe - p;
1482
- if (to_read > 0) {
1483
- if (settings->on_body) settings->on_body(parser, p, to_read);
1484
- p += to_read - 1;
1875
+ MARK(body);
1876
+ p = data + len - 1;
1877
+
1878
+ break;
1879
+
1880
+ case s_message_done:
1881
+ UPDATE_STATE(NEW_MESSAGE());
1882
+ CALLBACK_NOTIFY(message_complete);
1883
+ if (parser->upgrade) {
1884
+ /* Exit, the rest of the message is in a different protocol. */
1885
+ RETURN((p - data) + 1);
1485
1886
  }
1486
1887
  break;
1487
1888
 
1488
1889
  case s_chunk_size_start:
1489
1890
  {
1490
- assert(nread == 1);
1891
+ assert(parser->nread == 1);
1491
1892
  assert(parser->flags & F_CHUNKED);
1492
1893
 
1493
- c = unhex[(unsigned char)ch];
1494
- if (c == -1) goto error;
1495
- parser->content_length = c;
1496
- state = s_chunk_size;
1894
+ unhex_val = unhex[(unsigned char)ch];
1895
+ if (UNLIKELY(unhex_val == -1)) {
1896
+ SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1897
+ goto error;
1898
+ }
1899
+
1900
+ parser->content_length = unhex_val;
1901
+ UPDATE_STATE(s_chunk_size);
1497
1902
  break;
1498
1903
  }
1499
1904
 
1500
1905
  case s_chunk_size:
1501
1906
  {
1907
+ uint64_t t;
1908
+
1502
1909
  assert(parser->flags & F_CHUNKED);
1503
1910
 
1504
1911
  if (ch == CR) {
1505
- state = s_chunk_size_almost_done;
1912
+ UPDATE_STATE(s_chunk_size_almost_done);
1506
1913
  break;
1507
1914
  }
1508
1915
 
1509
- c = unhex[(unsigned char)ch];
1916
+ unhex_val = unhex[(unsigned char)ch];
1510
1917
 
1511
- if (c == -1) {
1918
+ if (unhex_val == -1) {
1512
1919
  if (ch == ';' || ch == ' ') {
1513
- state = s_chunk_parameters;
1920
+ UPDATE_STATE(s_chunk_parameters);
1514
1921
  break;
1515
1922
  }
1923
+
1924
+ SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1516
1925
  goto error;
1517
1926
  }
1518
1927
 
1519
- parser->content_length *= 16;
1520
- parser->content_length += c;
1928
+ t = parser->content_length;
1929
+ t *= 16;
1930
+ t += unhex_val;
1931
+
1932
+ /* Overflow? Test against a conservative limit for simplicity. */
1933
+ if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) {
1934
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1935
+ goto error;
1936
+ }
1937
+
1938
+ parser->content_length = t;
1521
1939
  break;
1522
1940
  }
1523
1941
 
@@ -1526,7 +1944,7 @@ size_t http_parser_execute (http_parser *parser,
1526
1944
  assert(parser->flags & F_CHUNKED);
1527
1945
  /* just ignore this shit. TODO check for overflow */
1528
1946
  if (ch == CR) {
1529
- state = s_chunk_size_almost_done;
1947
+ UPDATE_STATE(s_chunk_size_almost_done);
1530
1948
  break;
1531
1949
  }
1532
1950
  break;
@@ -1537,108 +1955,486 @@ size_t http_parser_execute (http_parser *parser,
1537
1955
  assert(parser->flags & F_CHUNKED);
1538
1956
  STRICT_CHECK(ch != LF);
1539
1957
 
1540
- nread = 0;
1958
+ parser->nread = 0;
1541
1959
 
1542
1960
  if (parser->content_length == 0) {
1543
1961
  parser->flags |= F_TRAILING;
1544
- state = s_header_field_start;
1962
+ UPDATE_STATE(s_header_field_start);
1545
1963
  } else {
1546
- state = s_chunk_data;
1964
+ UPDATE_STATE(s_chunk_data);
1547
1965
  }
1966
+ CALLBACK_NOTIFY(chunk_header);
1548
1967
  break;
1549
1968
  }
1550
1969
 
1551
1970
  case s_chunk_data:
1552
1971
  {
1553
- assert(parser->flags & F_CHUNKED);
1972
+ uint64_t to_read = MIN(parser->content_length,
1973
+ (uint64_t) ((data + len) - p));
1554
1974
 
1555
- to_read = MIN(pe - p, (int64_t)(parser->content_length));
1975
+ assert(parser->flags & F_CHUNKED);
1976
+ assert(parser->content_length != 0
1977
+ && parser->content_length != ULLONG_MAX);
1556
1978
 
1557
- if (to_read > 0) {
1558
- if (settings->on_body) settings->on_body(parser, p, to_read);
1559
- p += to_read - 1;
1560
- }
1979
+ /* See the explanation in s_body_identity for why the content
1980
+ * length and data pointers are managed this way.
1981
+ */
1982
+ MARK(body);
1983
+ parser->content_length -= to_read;
1984
+ p += to_read - 1;
1561
1985
 
1562
- if (to_read == parser->content_length) {
1563
- state = s_chunk_data_almost_done;
1986
+ if (parser->content_length == 0) {
1987
+ UPDATE_STATE(s_chunk_data_almost_done);
1564
1988
  }
1565
1989
 
1566
- parser->content_length -= to_read;
1567
1990
  break;
1568
1991
  }
1569
1992
 
1570
1993
  case s_chunk_data_almost_done:
1571
1994
  assert(parser->flags & F_CHUNKED);
1995
+ assert(parser->content_length == 0);
1572
1996
  STRICT_CHECK(ch != CR);
1573
- state = s_chunk_data_done;
1997
+ UPDATE_STATE(s_chunk_data_done);
1998
+ CALLBACK_DATA(body);
1574
1999
  break;
1575
2000
 
1576
2001
  case s_chunk_data_done:
1577
2002
  assert(parser->flags & F_CHUNKED);
1578
2003
  STRICT_CHECK(ch != LF);
1579
- state = s_chunk_size_start;
2004
+ parser->nread = 0;
2005
+ UPDATE_STATE(s_chunk_size_start);
2006
+ CALLBACK_NOTIFY(chunk_complete);
1580
2007
  break;
1581
2008
 
1582
2009
  default:
1583
2010
  assert(0 && "unhandled state");
2011
+ SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
1584
2012
  goto error;
1585
2013
  }
1586
2014
  }
1587
2015
 
1588
- CALLBACK_NOCLEAR(header_field);
1589
- CALLBACK_NOCLEAR(header_value);
1590
- CALLBACK_NOCLEAR(fragment);
1591
- CALLBACK_NOCLEAR(query_string);
1592
- CALLBACK_NOCLEAR(path);
1593
- CALLBACK_NOCLEAR(url);
2016
+ /* Run callbacks for any marks that we have leftover after we ran our of
2017
+ * bytes. There should be at most one of these set, so it's OK to invoke
2018
+ * them in series (unset marks will not result in callbacks).
2019
+ *
2020
+ * We use the NOADVANCE() variety of callbacks here because 'p' has already
2021
+ * overflowed 'data' and this allows us to correct for the off-by-one that
2022
+ * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
2023
+ * value that's in-bounds).
2024
+ */
2025
+
2026
+ assert(((header_field_mark ? 1 : 0) +
2027
+ (header_value_mark ? 1 : 0) +
2028
+ (url_mark ? 1 : 0) +
2029
+ (body_mark ? 1 : 0) +
2030
+ (status_mark ? 1 : 0)) <= 1);
1594
2031
 
1595
- parser->state = state;
1596
- parser->header_state = header_state;
1597
- parser->index = index;
1598
- parser->nread = nread;
2032
+ CALLBACK_DATA_NOADVANCE(header_field);
2033
+ CALLBACK_DATA_NOADVANCE(header_value);
2034
+ CALLBACK_DATA_NOADVANCE(url);
2035
+ CALLBACK_DATA_NOADVANCE(body);
2036
+ CALLBACK_DATA_NOADVANCE(status);
1599
2037
 
1600
- return len;
2038
+ RETURN(len);
1601
2039
 
1602
2040
  error:
1603
- parser->state = s_dead;
1604
- return (p - data);
2041
+ if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
2042
+ SET_ERRNO(HPE_UNKNOWN);
2043
+ }
2044
+
2045
+ RETURN(p - data);
2046
+ }
2047
+
2048
+
2049
+ /* Does the parser need to see an EOF to find the end of the message? */
2050
+ int
2051
+ http_message_needs_eof (const http_parser *parser)
2052
+ {
2053
+ if (parser->type == HTTP_REQUEST) {
2054
+ return 0;
2055
+ }
2056
+
2057
+ /* See RFC 2616 section 4.4 */
2058
+ if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
2059
+ parser->status_code == 204 || /* No Content */
2060
+ parser->status_code == 304 || /* Not Modified */
2061
+ parser->flags & F_SKIPBODY) { /* response to a HEAD request */
2062
+ return 0;
2063
+ }
2064
+
2065
+ if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
2066
+ return 0;
2067
+ }
2068
+
2069
+ return 1;
1605
2070
  }
1606
2071
 
1607
2072
 
1608
2073
  int
1609
- http_should_keep_alive (http_parser *parser)
2074
+ http_should_keep_alive (const http_parser *parser)
1610
2075
  {
1611
2076
  if (parser->http_major > 0 && parser->http_minor > 0) {
1612
2077
  /* HTTP/1.1 */
1613
2078
  if (parser->flags & F_CONNECTION_CLOSE) {
1614
2079
  return 0;
1615
- } else {
1616
- return 1;
1617
2080
  }
1618
2081
  } else {
1619
2082
  /* HTTP/1.0 or earlier */
1620
- if (parser->flags & F_CONNECTION_KEEP_ALIVE) {
1621
- return 1;
1622
- } else {
2083
+ if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
1623
2084
  return 0;
1624
2085
  }
1625
2086
  }
2087
+
2088
+ return !http_message_needs_eof(parser);
1626
2089
  }
1627
2090
 
1628
2091
 
1629
- const char * http_method_str (enum http_method m)
2092
+ const char *
2093
+ http_method_str (enum http_method m)
1630
2094
  {
1631
- return method_strings[m];
2095
+ return ELEM_AT(method_strings, m, "<unknown>");
1632
2096
  }
1633
2097
 
1634
2098
 
1635
2099
  void
1636
2100
  http_parser_init (http_parser *parser, enum http_parser_type t)
1637
2101
  {
2102
+ void *data = parser->data; /* preserve application data */
2103
+ memset(parser, 0, sizeof(*parser));
2104
+ parser->data = data;
1638
2105
  parser->type = t;
1639
2106
  parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
1640
- parser->nread = 0;
1641
- parser->upgrade = 0;
1642
- parser->flags = 0;
1643
- parser->method = 0;
2107
+ parser->http_errno = HPE_OK;
2108
+ }
2109
+
2110
+ void
2111
+ http_parser_settings_init(http_parser_settings *settings)
2112
+ {
2113
+ memset(settings, 0, sizeof(*settings));
2114
+ }
2115
+
2116
+ const char *
2117
+ http_errno_name(enum http_errno err) {
2118
+ assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2119
+ return http_strerror_tab[err].name;
2120
+ }
2121
+
2122
+ const char *
2123
+ http_errno_description(enum http_errno err) {
2124
+ assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
2125
+ return http_strerror_tab[err].description;
2126
+ }
2127
+
2128
+ static enum http_host_state
2129
+ http_parse_host_char(enum http_host_state s, const char ch) {
2130
+ switch(s) {
2131
+ case s_http_userinfo:
2132
+ case s_http_userinfo_start:
2133
+ if (ch == '@') {
2134
+ return s_http_host_start;
2135
+ }
2136
+
2137
+ if (IS_USERINFO_CHAR(ch)) {
2138
+ return s_http_userinfo;
2139
+ }
2140
+ break;
2141
+
2142
+ case s_http_host_start:
2143
+ if (ch == '[') {
2144
+ return s_http_host_v6_start;
2145
+ }
2146
+
2147
+ if (IS_HOST_CHAR(ch)) {
2148
+ return s_http_host;
2149
+ }
2150
+
2151
+ break;
2152
+
2153
+ case s_http_host:
2154
+ if (IS_HOST_CHAR(ch)) {
2155
+ return s_http_host;
2156
+ }
2157
+
2158
+ /* FALLTHROUGH */
2159
+ case s_http_host_v6_end:
2160
+ if (ch == ':') {
2161
+ return s_http_host_port_start;
2162
+ }
2163
+
2164
+ break;
2165
+
2166
+ case s_http_host_v6:
2167
+ if (ch == ']') {
2168
+ return s_http_host_v6_end;
2169
+ }
2170
+
2171
+ /* FALLTHROUGH */
2172
+ case s_http_host_v6_start:
2173
+ if (IS_HEX(ch) || ch == ':' || ch == '.') {
2174
+ return s_http_host_v6;
2175
+ }
2176
+
2177
+ if (s == s_http_host_v6 && ch == '%') {
2178
+ return s_http_host_v6_zone_start;
2179
+ }
2180
+ break;
2181
+
2182
+ case s_http_host_v6_zone:
2183
+ if (ch == ']') {
2184
+ return s_http_host_v6_end;
2185
+ }
2186
+
2187
+ /* FALLTHROUGH */
2188
+ case s_http_host_v6_zone_start:
2189
+ /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */
2190
+ if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' ||
2191
+ ch == '~') {
2192
+ return s_http_host_v6_zone;
2193
+ }
2194
+ break;
2195
+
2196
+ case s_http_host_port:
2197
+ case s_http_host_port_start:
2198
+ if (IS_NUM(ch)) {
2199
+ return s_http_host_port;
2200
+ }
2201
+
2202
+ break;
2203
+
2204
+ default:
2205
+ break;
2206
+ }
2207
+ return s_http_host_dead;
2208
+ }
2209
+
2210
+ static int
2211
+ http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2212
+ enum http_host_state s;
2213
+
2214
+ const char *p;
2215
+ size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
2216
+
2217
+ assert(u->field_set & (1 << UF_HOST));
2218
+
2219
+ u->field_data[UF_HOST].len = 0;
2220
+
2221
+ s = found_at ? s_http_userinfo_start : s_http_host_start;
2222
+
2223
+ for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2224
+ enum http_host_state new_s = http_parse_host_char(s, *p);
2225
+
2226
+ if (new_s == s_http_host_dead) {
2227
+ return 1;
2228
+ }
2229
+
2230
+ switch(new_s) {
2231
+ case s_http_host:
2232
+ if (s != s_http_host) {
2233
+ u->field_data[UF_HOST].off = p - buf;
2234
+ }
2235
+ u->field_data[UF_HOST].len++;
2236
+ break;
2237
+
2238
+ case s_http_host_v6:
2239
+ if (s != s_http_host_v6) {
2240
+ u->field_data[UF_HOST].off = p - buf;
2241
+ }
2242
+ u->field_data[UF_HOST].len++;
2243
+ break;
2244
+
2245
+ case s_http_host_v6_zone_start:
2246
+ case s_http_host_v6_zone:
2247
+ u->field_data[UF_HOST].len++;
2248
+ break;
2249
+
2250
+ case s_http_host_port:
2251
+ if (s != s_http_host_port) {
2252
+ u->field_data[UF_PORT].off = p - buf;
2253
+ u->field_data[UF_PORT].len = 0;
2254
+ u->field_set |= (1 << UF_PORT);
2255
+ }
2256
+ u->field_data[UF_PORT].len++;
2257
+ break;
2258
+
2259
+ case s_http_userinfo:
2260
+ if (s != s_http_userinfo) {
2261
+ u->field_data[UF_USERINFO].off = p - buf ;
2262
+ u->field_data[UF_USERINFO].len = 0;
2263
+ u->field_set |= (1 << UF_USERINFO);
2264
+ }
2265
+ u->field_data[UF_USERINFO].len++;
2266
+ break;
2267
+
2268
+ default:
2269
+ break;
2270
+ }
2271
+ s = new_s;
2272
+ }
2273
+
2274
+ /* Make sure we don't end somewhere unexpected */
2275
+ switch (s) {
2276
+ case s_http_host_start:
2277
+ case s_http_host_v6_start:
2278
+ case s_http_host_v6:
2279
+ case s_http_host_v6_zone_start:
2280
+ case s_http_host_v6_zone:
2281
+ case s_http_host_port_start:
2282
+ case s_http_userinfo:
2283
+ case s_http_userinfo_start:
2284
+ return 1;
2285
+ default:
2286
+ break;
2287
+ }
2288
+
2289
+ return 0;
2290
+ }
2291
+
2292
+ void
2293
+ http_parser_url_init(struct http_parser_url *u) {
2294
+ memset(u, 0, sizeof(*u));
2295
+ }
2296
+
2297
+ int
2298
+ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2299
+ struct http_parser_url *u)
2300
+ {
2301
+ enum state s;
2302
+ const char *p;
2303
+ enum http_parser_url_fields uf, old_uf;
2304
+ int found_at = 0;
2305
+
2306
+ u->port = u->field_set = 0;
2307
+ s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2308
+ old_uf = UF_MAX;
2309
+
2310
+ for (p = buf; p < buf + buflen; p++) {
2311
+ s = parse_url_char(s, *p);
2312
+
2313
+ /* Figure out the next field that we're operating on */
2314
+ switch (s) {
2315
+ case s_dead:
2316
+ return 1;
2317
+
2318
+ /* Skip delimeters */
2319
+ case s_req_schema_slash:
2320
+ case s_req_schema_slash_slash:
2321
+ case s_req_server_start:
2322
+ case s_req_query_string_start:
2323
+ case s_req_fragment_start:
2324
+ continue;
2325
+
2326
+ case s_req_schema:
2327
+ uf = UF_SCHEMA;
2328
+ break;
2329
+
2330
+ case s_req_server_with_at:
2331
+ found_at = 1;
2332
+
2333
+ /* FALLTHROUGH */
2334
+ case s_req_server:
2335
+ uf = UF_HOST;
2336
+ break;
2337
+
2338
+ case s_req_path:
2339
+ uf = UF_PATH;
2340
+ break;
2341
+
2342
+ case s_req_query_string:
2343
+ uf = UF_QUERY;
2344
+ break;
2345
+
2346
+ case s_req_fragment:
2347
+ uf = UF_FRAGMENT;
2348
+ break;
2349
+
2350
+ default:
2351
+ assert(!"Unexpected state");
2352
+ return 1;
2353
+ }
2354
+
2355
+ /* Nothing's changed; soldier on */
2356
+ if (uf == old_uf) {
2357
+ u->field_data[uf].len++;
2358
+ continue;
2359
+ }
2360
+
2361
+ u->field_data[uf].off = p - buf;
2362
+ u->field_data[uf].len = 1;
2363
+
2364
+ u->field_set |= (1 << uf);
2365
+ old_uf = uf;
2366
+ }
2367
+
2368
+ /* host must be present if there is a schema */
2369
+ /* parsing http:///toto will fail */
2370
+ if ((u->field_set & (1 << UF_SCHEMA)) &&
2371
+ (u->field_set & (1 << UF_HOST)) == 0) {
2372
+ return 1;
2373
+ }
2374
+
2375
+ if (u->field_set & (1 << UF_HOST)) {
2376
+ if (http_parse_host(buf, u, found_at) != 0) {
2377
+ return 1;
2378
+ }
2379
+ }
2380
+
2381
+ /* CONNECT requests can only contain "hostname:port" */
2382
+ if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2383
+ return 1;
2384
+ }
2385
+
2386
+ if (u->field_set & (1 << UF_PORT)) {
2387
+ uint16_t off;
2388
+ uint16_t len;
2389
+ const char* p;
2390
+ const char* end;
2391
+ unsigned long v;
2392
+
2393
+ off = u->field_data[UF_PORT].off;
2394
+ len = u->field_data[UF_PORT].len;
2395
+ end = buf + off + len;
2396
+
2397
+ /* NOTE: The characters are already validated and are in the [0-9] range */
2398
+ assert(off + len <= buflen && "Port number overflow");
2399
+ v = 0;
2400
+ for (p = buf + off; p < end; p++) {
2401
+ v *= 10;
2402
+ v += *p - '0';
2403
+
2404
+ /* Ports have a max value of 2^16 */
2405
+ if (v > 0xffff) {
2406
+ return 1;
2407
+ }
2408
+ }
2409
+
2410
+ u->port = (uint16_t) v;
2411
+ }
2412
+
2413
+ return 0;
2414
+ }
2415
+
2416
+ void
2417
+ http_parser_pause(http_parser *parser, int paused) {
2418
+ /* Users should only be pausing/unpausing a parser that is not in an error
2419
+ * state. In non-debug builds, there's not much that we can do about this
2420
+ * other than ignore it.
2421
+ */
2422
+ if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2423
+ HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2424
+ SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2425
+ } else {
2426
+ assert(0 && "Attempting to pause parser in error state");
2427
+ }
2428
+ }
2429
+
2430
+ int
2431
+ http_body_is_final(const struct http_parser *parser) {
2432
+ return parser->state == s_message_done;
2433
+ }
2434
+
2435
+ unsigned long
2436
+ http_parser_version(void) {
2437
+ return HTTP_PARSER_VERSION_MAJOR * 0x10000 |
2438
+ HTTP_PARSER_VERSION_MINOR * 0x00100 |
2439
+ HTTP_PARSER_VERSION_PATCH * 0x00001;
1644
2440
  }