bossan 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,12 +2,12 @@
2
2
 
3
3
  #define LIMIT_MAX 1024 * 1024 * 1024
4
4
 
5
- buffer *
5
+ buffer_t *
6
6
  new_buffer(size_t buf_size, size_t limit)
7
7
  {
8
- buffer *buf;
9
- buf = ruby_xmalloc(sizeof(buffer));
10
- memset(buf, 0, sizeof(buffer));
8
+ buffer_t *buf;
9
+ buf = ruby_xmalloc(sizeof(buffer_t));
10
+ memset(buf, 0, sizeof(buffer_t));
11
11
  buf->buf = ruby_xmalloc(sizeof(char) * buf_size);
12
12
  buf->buf_size = buf_size;
13
13
  if(limit){
@@ -20,7 +20,7 @@ new_buffer(size_t buf_size, size_t limit)
20
20
 
21
21
 
22
22
  buffer_result
23
- write2buf(buffer *buf, const char *c, size_t l)
23
+ write2buf(buffer_t *buf, const char *c, size_t l)
24
24
  {
25
25
  size_t newl;
26
26
  char *newbuf;
@@ -49,7 +49,7 @@ write2buf(buffer *buf, const char *c, size_t l)
49
49
 
50
50
 
51
51
  void
52
- free_buffer(buffer *buf)
52
+ free_buffer(buffer_t *buf)
53
53
  {
54
54
  ruby_xfree(buf->buf);
55
55
  ruby_xfree(buf);
@@ -57,7 +57,7 @@ free_buffer(buffer *buf)
57
57
 
58
58
 
59
59
  VALUE
60
- getRbString(buffer *buf)
60
+ getRbString(buffer_t *buf)
61
61
  {
62
62
  VALUE o;
63
63
  o = rb_str_new(buf->buf, buf->len);
@@ -67,7 +67,7 @@ getRbString(buffer *buf)
67
67
 
68
68
 
69
69
  char *
70
- getString(buffer *buf)
70
+ getString(buffer_t *buf)
71
71
  {
72
72
  buf->buf[buf->len] = '\0';
73
73
  return buf->buf;
@@ -18,26 +18,31 @@ typedef struct {
18
18
  size_t buf_size;
19
19
  size_t len;
20
20
  size_t limit;
21
- } buffer;
21
+ } buffer_t;
22
22
 
23
- buffer *
23
+ buffer_t *
24
24
  new_buffer(size_t buf_size, size_t limit);
25
25
 
26
26
  buffer_result
27
- write2buf(buffer *buf, const char *c, size_t l);
27
+ write2buf(buffer_t *buf, const char *c, size_t l);
28
28
 
29
29
  void
30
- free_buffer(buffer *buf);
30
+ free_buffer(buffer_t *buf);
31
31
 
32
32
  VALUE
33
- getRbString(buffer *buf);
33
+ getRbString(buffer_t *buf);
34
34
 
35
35
  char *
36
- getString(buffer *buf);
37
-
36
+ getString(buffer_t *buf);
38
37
 
39
38
  #endif
40
39
 
41
40
 
42
41
 
43
42
 
43
+
44
+
45
+
46
+
47
+
48
+
@@ -7,24 +7,23 @@ typedef struct _client {
7
7
  int fd;
8
8
  char *remote_addr;
9
9
  uint32_t remote_port;
10
+
10
11
  uint8_t keep_alive;
11
- request *req;
12
- uint32_t body_length;
13
- int body_readed;
14
- void *body;
15
- int bad_request_code;
16
- request_body_type body_type;
17
- uint8_t complete;
12
+ char upgrade;
13
+ request *current_req;
14
+ request_queue *request_queue;
15
+
16
+ char complete;
18
17
 
19
- http_parser *http; // http req parser
20
- VALUE environ; // rack environ
21
- int status_code; // response status code
18
+ http_parser *http_parser; // http req parser
19
+ uint16_t status_code; // response status code
22
20
 
23
21
  VALUE http_status; // response status line
24
22
  VALUE headers; // http response headers
25
23
  uint8_t header_done; // header write status
26
24
  VALUE response; // rack response object
27
25
  VALUE response_iter; // rack response object
26
+ uint8_t chunked_response; // use Transfer-Encoding: chunked
28
27
  uint8_t content_length_set; // content_length_set flag
29
28
  uint32_t content_length; // content_length
30
29
  uint32_t write_bytes; // send body length
@@ -1,4 +1,7 @@
1
- /* Copyright 2009,2010 Ryan Dahl <ry@tinyclouds.org>
1
+ /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
2
+ *
3
+ * Additional changes are licensed under the same terms as NGINX and
4
+ * copyright Joyent, Inc. and other Node contributors. All rights reserved.
2
5
  *
3
6
  * Permission is hereby granted, free of charge, to any person obtaining a copy
4
7
  * of this software and associated documentation files (the "Software"), to
@@ -19,70 +22,114 @@
19
22
  * IN THE SOFTWARE.
20
23
  */
21
24
  #include "http_parser.h"
22
- #ifdef _WIN32
23
- typedef __int8 int8_t;
24
- typedef unsigned __int8 uint8_t;
25
- typedef __int16 int16_t;
26
- typedef unsigned __int16 uint16_t;
27
- typedef __int16 int32_t;
28
- typedef unsigned __int32 uint32_t;
29
- #else
30
- #include <stdint.h>
31
- #endif
32
25
  #include <assert.h>
33
26
  #include <stddef.h>
27
+ #include <ctype.h>
28
+ #include <stdlib.h>
29
+ #include <string.h>
30
+ #include <limits.h>
31
+
32
+ #if __GNUC__ >= 3
33
+ # define likely(x) __builtin_expect(!!(x), 1)
34
+ # define unlikely(x) __builtin_expect(!!(x), 0)
35
+ #else
36
+ # define likely(x) (x)
37
+ # define unlikely(x) (x)
38
+ #endif
34
39
 
40
+ #ifndef ULLONG_MAX
41
+ # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
42
+ #endif
35
43
 
36
44
  #ifndef MIN
37
45
  # define MIN(a,b) ((a) < (b) ? (a) : (b))
38
46
  #endif
39
47
 
48
+ #ifndef ARRAY_SIZE
49
+ # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
50
+ #endif
40
51
 
41
- #define CALLBACK2(FOR) \
42
- do { \
43
- if (settings->on_##FOR) { \
44
- if (0 != settings->on_##FOR(parser)) return (p - data); \
45
- } \
46
- } while (0)
52
+ #ifndef BIT_AT
53
+ # define BIT_AT(a, i) \
54
+ (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
55
+ (1 << ((unsigned int) (i) & 7))))
56
+ #endif
47
57
 
58
+ #ifndef ELEM_AT
59
+ # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
60
+ #endif
48
61
 
49
- #define MARK(FOR) \
62
+ #if HTTP_PARSER_DEBUG
63
+ #define SET_ERRNO(e) \
50
64
  do { \
51
- FOR##_mark = p; \
65
+ parser->http_errno = (e); \
66
+ parser->error_lineno = __LINE__; \
52
67
  } while (0)
68
+ #else
69
+ #define SET_ERRNO(e) \
70
+ do { \
71
+ parser->http_errno = (e); \
72
+ } while(0)
73
+ #endif
53
74
 
54
- #define CALLBACK_NOCLEAR(FOR) \
75
+
76
+ /* Run the notify callback FOR, returning ER if it fails */
77
+ #define CALLBACK_NOTIFY_(FOR, ER) \
55
78
  do { \
56
- if (FOR##_mark) { \
57
- if (settings->on_##FOR) { \
58
- if (0 != settings->on_##FOR(parser, \
59
- FOR##_mark, \
60
- p - FOR##_mark, 1)) \
61
- { \
62
- return (p - data); \
63
- } \
79
+ assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
80
+ \
81
+ if ( likely(settings->on_##FOR) ) { \
82
+ if (0 != settings->on_##FOR(parser)) { \
83
+ SET_ERRNO(HPE_CB_##FOR); \
84
+ } \
85
+ \
86
+ /* We either errored above or got paused; get out */ \
87
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
88
+ return (ER); \
64
89
  } \
65
90
  } \
66
91
  } while (0)
67
92
 
68
- #define CALLBACK_CLEAR(FOR) \
93
+ /* Run the notify callback FOR and consume the current byte */
94
+ #define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1)
95
+
96
+ /* Run the notify callback FOR and don't consume the current byte */
97
+ #define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data)
98
+
99
+ /* Run data callback FOR with LEN bytes, returning ER if it fails */
100
+ #define CALLBACK_DATA_(FOR, LEN, ER) \
69
101
  do { \
70
- if (FOR##_mark) { \
71
- if (settings->on_##FOR) { \
72
- if (0 != settings->on_##FOR(parser, \
73
- FOR##_mark, \
74
- p - FOR##_mark, 0)) \
75
- { \
76
- return (p - data); \
102
+ assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
103
+ \
104
+ if (likely(FOR##_mark)) { \
105
+ if (likely(settings->on_##FOR)) { \
106
+ if (0 != settings->on_##FOR(parser, FOR##_mark, (LEN))) { \
107
+ SET_ERRNO(HPE_CB_##FOR); \
108
+ } \
109
+ \
110
+ /* We either errored above or got paused; get out */ \
111
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
112
+ return (ER); \
77
113
  } \
78
114
  } \
115
+ FOR##_mark = NULL; \
79
116
  } \
80
117
  } while (0)
118
+
119
+ /* Run the data callback FOR and consume the current byte */
120
+ #define CALLBACK_DATA(FOR) \
121
+ CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
122
+
123
+ /* Run the data callback FOR and don't consume the current byte */
124
+ #define CALLBACK_DATA_NOADVANCE(FOR) \
125
+ CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
81
126
 
82
- #define CALLBACK(FOR) \
127
+ /* Set the mark FOR; non-destructive if mark is already set */
128
+ #define MARK(FOR) \
83
129
  do { \
84
- CALLBACK_CLEAR(FOR); \
85
- FOR##_mark = NULL; \
130
+ if (!FOR##_mark) { \
131
+ FOR##_mark = p; \
132
+ } \
86
133
  } while (0)
87
134
 
88
135
 
@@ -97,31 +144,21 @@ do { \
97
144
 
98
145
 
99
146
  static const char *method_strings[] =
100
- { "DELETE"
101
- , "GET"
102
- , "HEAD"
103
- , "POST"
104
- , "PUT"
105
- , "CONNECT"
106
- , "OPTIONS"
107
- , "TRACE"
108
- , "COPY"
109
- , "LOCK"
110
- , "MKCOL"
111
- , "MOVE"
112
- , "PROPFIND"
113
- , "PROPPATCH"
114
- , "UNLOCK"
115
- , "REPORT"
116
- , "MKACTIVITY"
117
- , "CHECKOUT"
118
- , "MERGE"
147
+ {
148
+ #define XX(num, name, string) #string,
149
+ HTTP_METHOD_MAP(XX)
150
+ #undef XX
119
151
  };
120
152
 
121
153
 
122
- /* ' ', '_', '-' and all alpha-numeric ascii characters are accepted by acceptable_header.
123
- The 'A'-'Z' are lower-cased. */
124
- static const char acceptable_header[256] = {
154
+ /* Tokens as defined by rfc 2616. Also lowercases them.
155
+ * token = 1*<any CHAR except CTLs or separators>
156
+ * separators = "(" | ")" | "<" | ">" | "@"
157
+ * | "," | ";" | ":" | "\" | <">
158
+ * | "/" | "[" | "]" | "?" | "="
159
+ * | "{" | "}" | SP | HT
160
+ */
161
+ static const char tokens[256] = {
125
162
  /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
126
163
  0, 0, 0, 0, 0, 0, 0, 0,
127
164
  /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
@@ -131,9 +168,9 @@ static const char acceptable_header[256] = {
131
168
  /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
132
169
  0, 0, 0, 0, 0, 0, 0, 0,
133
170
  /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
134
- ' ', 0, 0, 0, 0, 0, 0, 0,
171
+ 0, '!', 0, '#', '$', '%', '&', '\'',
135
172
  /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
136
- 0, 0, 0, 0, 0, '-', 0, 0,
173
+ 0, 0, '*', '+', 0, '-', '.', 0,
137
174
  /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
138
175
  '0', '1', '2', '3', '4', '5', '6', '7',
139
176
  /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
@@ -145,15 +182,15 @@ static const char acceptable_header[256] = {
145
182
  /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
146
183
  'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
147
184
  /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
148
- 'x', 'y', 'z', 0, 0, 0, 0, '_',
185
+ 'x', 'y', 'z', 0, 0, 0, '^', '_',
149
186
  /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
150
- 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
187
+ '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
151
188
  /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
152
189
  'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
153
190
  /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
154
191
  'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
155
192
  /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
156
- 'x', 'y', 'z', 0, 0, 0, 0, 0 };
193
+ 'x', 'y', 'z', 0, '|', 0, '~', 0 };
157
194
 
158
195
 
159
196
  static const int8_t unhex[256] =
@@ -168,40 +205,48 @@ static const int8_t unhex[256] =
168
205
  };
169
206
 
170
207
 
171
- static const uint8_t normal_url_char[256] = {
208
+ #if HTTP_PARSER_STRICT
209
+ # define T(v) 0
210
+ #else
211
+ # define T(v) v
212
+ #endif
213
+
214
+
215
+ static const uint8_t normal_url_char[32] = {
172
216
  /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
173
- 0, 0, 0, 0, 0, 0, 0, 0,
217
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
174
218
  /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
175
- 0, 0, 0, 0, 0, 0, 0, 0,
219
+ 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
176
220
  /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
177
- 0, 0, 0, 0, 0, 0, 0, 0,
221
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
178
222
  /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
179
- 0, 0, 0, 0, 0, 0, 0, 0,
223
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
180
224
  /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
181
- 0, 1, 1, 0, 1, 1, 1, 1,
225
+ 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
182
226
  /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
183
- 1, 1, 1, 1, 1, 1, 1, 1,
227
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
184
228
  /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
185
- 1, 1, 1, 1, 1, 1, 1, 1,
229
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
186
230
  /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
187
- 1, 1, 1, 1, 1, 1, 1, 0,
231
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
188
232
  /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
189
- 1, 1, 1, 1, 1, 1, 1, 1,
233
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
190
234
  /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
191
- 1, 1, 1, 1, 1, 1, 1, 1,
235
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
192
236
  /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
193
- 1, 1, 1, 1, 1, 1, 1, 1,
237
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
194
238
  /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
195
- 1, 1, 1, 1, 1, 1, 1, 1,
239
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
196
240
  /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
197
- 1, 1, 1, 1, 1, 1, 1, 1,
241
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
198
242
  /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
199
- 1, 1, 1, 1, 1, 1, 1, 1,
243
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
200
244
  /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
201
- 1, 1, 1, 1, 1, 1, 1, 1,
245
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
202
246
  /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
203
- 1, 1, 1, 1, 1, 1, 1, 0 };
247
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
204
248
 
249
+ #undef T
205
250
 
206
251
  enum state
207
252
  { s_dead = 1 /* important that this is > 0 */
@@ -229,8 +274,9 @@ enum state
229
274
  , s_req_schema
230
275
  , s_req_schema_slash
231
276
  , s_req_schema_slash_slash
232
- , s_req_host
233
- , s_req_port
277
+ , s_req_server_start
278
+ , s_req_server
279
+ , s_req_server_with_at
234
280
  , s_req_path
235
281
  , s_req_query_string_start
236
282
  , s_req_query_string
@@ -251,28 +297,35 @@ enum state
251
297
  , s_header_field
252
298
  , s_header_value_start
253
299
  , s_header_value
300
+ , s_header_value_lws
254
301
 
255
302
  , s_header_almost_done
256
303
 
304
+ , s_chunk_size_start
305
+ , s_chunk_size
306
+ , s_chunk_parameters
307
+ , s_chunk_size_almost_done
308
+
257
309
  , s_headers_almost_done
258
- /* Important: 's_headers_almost_done' must be the last 'header' state. All
310
+ , s_headers_done
311
+
312
+ /* Important: 's_headers_done' must be the last 'header' state. All
259
313
  * states beyond this must be 'body' states. It is used for overflow
260
314
  * checking. See the PARSING_HEADER() macro.
261
315
  */
262
- , s_chunk_size_start
263
- , s_chunk_size
264
- , s_chunk_size_almost_done
265
- , s_chunk_parameters
316
+
266
317
  , s_chunk_data
267
318
  , s_chunk_data_almost_done
268
319
  , s_chunk_data_done
269
320
 
270
321
  , s_body_identity
271
322
  , s_body_identity_eof
323
+
324
+ , s_message_done
272
325
  };
273
326
 
274
327
 
275
- #define PARSING_HEADER(state) (state <= s_headers_almost_done && 0 == (parser->flags & F_TRAILING))
328
+ #define PARSING_HEADER(state) (state <= s_headers_done)
276
329
 
277
330
 
278
331
  enum header_states
@@ -301,27 +354,59 @@ enum header_states
301
354
  , h_connection_close
302
355
  };
303
356
 
357
+ enum http_host_state
358
+ {
359
+ s_http_host_dead = 1
360
+ , s_http_userinfo_start
361
+ , s_http_userinfo
362
+ , s_http_host_start
363
+ , s_http_host_v6_start
364
+ , s_http_host
365
+ , s_http_host_v6
366
+ , s_http_host_v6_end
367
+ , s_http_host_port_start
368
+ , s_http_host_port
369
+ };
370
+
371
+ /* Macros for character classes; depends on strict-mode */
372
+ #define CR '\r'
373
+ #define LF '\n'
374
+ #define LOWER(c) (unsigned char)(c | 0x20)
375
+ #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
376
+ #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
377
+ #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
378
+ #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
379
+ #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
380
+ (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
381
+ (c) == ')')
382
+ #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
383
+ (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
384
+ (c) == '$' || (c) == ',')
304
385
 
305
- enum flags
306
- { F_CHUNKED = 1 << 0
307
- , F_CONNECTION_KEEP_ALIVE = 1 << 1
308
- , F_CONNECTION_CLOSE = 1 << 2
309
- , F_TRAILING = 1 << 3
310
- , F_UPGRADE = 1 << 4
311
- , F_SKIPBODY = 1 << 5
312
- };
313
-
314
-
315
- #define CR '\r'
316
- #define LF '\n'
317
- #define LOWER(c) (unsigned char)(c | 0x20)
386
+ #if HTTP_PARSER_STRICT
387
+ #define TOKEN(c) (tokens[(unsigned char)c])
388
+ #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
389
+ #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
390
+ #else
391
+ #define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
392
+ #define IS_URL_CHAR(c) \
393
+ (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
394
+ #define IS_HOST_CHAR(c) \
395
+ (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
396
+ #endif
318
397
 
319
398
 
320
399
  #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
321
400
 
322
401
 
323
402
  #if HTTP_PARSER_STRICT
324
- # define STRICT_CHECK(cond) if (cond) goto error
403
+ # define STRICT_CHECK(cond) \
404
+ do { \
405
+ if (cond) { \
406
+ SET_ERRNO(HPE_STRICT); \
407
+ goto error; \
408
+ } \
409
+ } while (0)
325
410
  # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
326
411
  #else
327
412
  # define STRICT_CHECK(cond)
@@ -329,69 +414,260 @@ enum flags
329
414
  #endif
330
415
 
331
416
 
417
+ /* Map errno values to strings for human-readable output */
418
+ #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
419
+ static struct {
420
+ const char *name;
421
+ const char *description;
422
+ } http_strerror_tab[] = {
423
+ HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
424
+ };
425
+ #undef HTTP_STRERROR_GEN
426
+
427
+ int http_message_needs_eof(const http_parser *parser);
428
+
429
+ /* Our URL parser.
430
+ *
431
+ * This is designed to be shared by http_parser_execute() for URL validation,
432
+ * hence it has a state transition + byte-for-byte interface. In addition, it
433
+ * is meant to be embedded in http_parser_parse_url(), which does the dirty
434
+ * work of turning state transitions URL components for its API.
435
+ *
436
+ * This function should only be invoked with non-space characters. It is
437
+ * assumed that the caller cares about (and can detect) the transition between
438
+ * URL and non-URL states by looking for these.
439
+ */
440
+ static enum state
441
+ parse_url_char(enum state s, const char ch)
442
+ {
443
+ if (ch == ' ' || ch == '\r' || ch == '\n') {
444
+ return s_dead;
445
+ }
446
+
447
+ #if HTTP_PARSER_STRICT
448
+ if (ch == '\t' || ch == '\f') {
449
+ return s_dead;
450
+ }
451
+ #endif
452
+
453
+ switch (s) {
454
+ case s_req_spaces_before_url:
455
+ /* Proxied requests are followed by scheme of an absolute URI (alpha).
456
+ * All methods except CONNECT are followed by '/' or '*'.
457
+ */
458
+
459
+ if (ch == '/' || ch == '*') {
460
+ return s_req_path;
461
+ }
462
+
463
+ if (IS_ALPHA(ch)) {
464
+ return s_req_schema;
465
+ }
466
+
467
+ break;
468
+
469
+ case s_req_schema:
470
+ if (IS_ALPHA(ch)) {
471
+ return s;
472
+ }
473
+
474
+ if (ch == ':') {
475
+ return s_req_schema_slash;
476
+ }
477
+
478
+ break;
479
+
480
+ case s_req_schema_slash:
481
+ if (ch == '/') {
482
+ return s_req_schema_slash_slash;
483
+ }
484
+
485
+ break;
486
+
487
+ case s_req_schema_slash_slash:
488
+ if (ch == '/') {
489
+ return s_req_server_start;
490
+ }
491
+
492
+ break;
493
+
494
+ case s_req_server_with_at:
495
+ if (ch == '@') {
496
+ return s_dead;
497
+ }
498
+
499
+ /* FALLTHROUGH */
500
+ case s_req_server_start:
501
+ case s_req_server:
502
+ if (ch == '/') {
503
+ return s_req_path;
504
+ }
505
+
506
+ if (ch == '?') {
507
+ return s_req_query_string_start;
508
+ }
509
+
510
+ if (ch == '@') {
511
+ return s_req_server_with_at;
512
+ }
513
+
514
+ if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
515
+ return s_req_server;
516
+ }
517
+
518
+ break;
519
+
520
+ case s_req_path:
521
+ if (IS_URL_CHAR(ch)) {
522
+ return s;
523
+ }
524
+
525
+ switch (ch) {
526
+ case '?':
527
+ return s_req_query_string_start;
528
+
529
+ case '#':
530
+ return s_req_fragment_start;
531
+ }
532
+
533
+ break;
534
+
535
+ case s_req_query_string_start:
536
+ case s_req_query_string:
537
+ if (IS_URL_CHAR(ch)) {
538
+ return s_req_query_string;
539
+ }
540
+
541
+ switch (ch) {
542
+ case '?':
543
+ /* allow extra '?' in query string */
544
+ return s_req_query_string;
545
+
546
+ case '#':
547
+ return s_req_fragment_start;
548
+ }
549
+
550
+ break;
551
+
552
+ case s_req_fragment_start:
553
+ if (IS_URL_CHAR(ch)) {
554
+ return s_req_fragment;
555
+ }
556
+
557
+ switch (ch) {
558
+ case '?':
559
+ return s_req_fragment;
560
+
561
+ case '#':
562
+ return s;
563
+ }
564
+
565
+ break;
566
+
567
+ case s_req_fragment:
568
+ if (IS_URL_CHAR(ch)) {
569
+ return s;
570
+ }
571
+
572
+ switch (ch) {
573
+ case '?':
574
+ case '#':
575
+ return s;
576
+ }
577
+
578
+ break;
579
+
580
+ default:
581
+ break;
582
+ }
583
+
584
+ /* We should never fall out of the switch above unless there's an error */
585
+ return s_dead;
586
+ }
587
+
332
588
  size_t http_parser_execute (http_parser *parser,
333
589
  const http_parser_settings *settings,
334
590
  const char *data,
335
- size_t len)
591
+ const size_t len)
336
592
  {
337
593
  char c, ch;
338
- const char *p = data, *pe;
339
- int64_t to_read;
594
+ int8_t unhex_val;
595
+ const char *p = data;
596
+ const char *header_field_mark = 0;
597
+ const char *header_value_mark = 0;
598
+ const char *url_mark = 0;
599
+ const char *body_mark = 0;
340
600
 
341
- enum state state = (enum state) parser->state;
342
- enum header_states header_state = (enum header_states) parser->header_state;
343
- uint64_t index = parser->index;
344
- uint64_t nread = parser->nread;
601
+ /* We're in an error state. Don't bother doing anything. */
602
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
603
+ return 0;
604
+ }
345
605
 
346
606
  if (len == 0) {
347
- if (state == s_body_identity_eof) {
348
- CALLBACK2(message_complete);
607
+ switch (parser->state) {
608
+ case s_body_identity_eof:
609
+ /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
610
+ * we got paused.
611
+ */
612
+ CALLBACK_NOTIFY_NOADVANCE(message_complete);
613
+ return 0;
614
+
615
+ case s_dead:
616
+ case s_start_req_or_res:
617
+ case s_start_res:
618
+ case s_start_req:
619
+ return 0;
620
+
621
+ default:
622
+ SET_ERRNO(HPE_INVALID_EOF_STATE);
623
+ return 1;
349
624
  }
350
- return 0;
351
625
  }
352
626
 
353
- /* technically we could combine all of these (except for url_mark) into one
354
- variable, saving stack space, but it seems more clear to have them
355
- separated. */
356
- const char *header_field_mark = 0;
357
- const char *header_value_mark = 0;
358
- const char *fragment_mark = 0;
359
- const char *query_string_mark = 0;
360
- const char *path_mark = 0;
361
- const char *url_mark = 0;
362
627
 
363
- if (state == s_header_field)
628
+ if (parser->state == s_header_field)
364
629
  header_field_mark = data;
365
- if (state == s_header_value)
630
+ if (parser->state == s_header_value)
366
631
  header_value_mark = data;
367
- if (state == s_req_fragment)
368
- fragment_mark = data;
369
- if (state == s_req_query_string)
370
- query_string_mark = data;
371
- if (state == s_req_path)
372
- path_mark = data;
373
- if (state == s_req_path || state == s_req_schema || state == s_req_schema_slash
374
- || state == s_req_schema_slash_slash || state == s_req_port
375
- || state == s_req_query_string_start || state == s_req_query_string
376
- || state == s_req_host
377
- || state == s_req_fragment_start || state == s_req_fragment)
632
+ switch (parser->state) {
633
+ case s_req_path:
634
+ case s_req_schema:
635
+ case s_req_schema_slash:
636
+ case s_req_schema_slash_slash:
637
+ case s_req_server_start:
638
+ case s_req_server:
639
+ case s_req_server_with_at:
640
+ case s_req_query_string_start:
641
+ case s_req_query_string:
642
+ case s_req_fragment_start:
643
+ case s_req_fragment:
378
644
  url_mark = data;
645
+ break;
646
+ }
379
647
 
380
- for (p=data, pe=data+len; p != pe; p++) {
648
+ for (p=data; likely(p != data + len); p++) {
381
649
  ch = *p;
382
650
 
383
- if (PARSING_HEADER(state)) {
384
- ++nread;
651
+ if ( likely(PARSING_HEADER(parser->state)) ) {
652
+ ++parser->nread;
385
653
  /* Buffer overflow attack */
386
- if (nread > HTTP_MAX_HEADER_SIZE) goto error;
654
+ if ( unlikely(parser->nread > HTTP_MAX_HEADER_SIZE) ) {
655
+ SET_ERRNO(HPE_HEADER_OVERFLOW);
656
+ goto error;
657
+ }
387
658
  }
388
659
 
389
- switch (state) {
660
+ reexecute_byte:
661
+ switch (parser->state) {
390
662
 
391
663
  case s_dead:
392
664
  /* this state is used after a 'Connection: close' message
393
665
  * the parser will error out if it reads another message
394
666
  */
667
+ if (ch == CR || ch == LF)
668
+ break;
669
+
670
+ SET_ERRNO(HPE_CLOSED_CONNECTION);
395
671
  goto error;
396
672
 
397
673
  case s_start_req_or_res:
@@ -399,42 +675,46 @@ size_t http_parser_execute (http_parser *parser,
399
675
  if (ch == CR || ch == LF)
400
676
  break;
401
677
  parser->flags = 0;
402
- parser->content_length = -1;
678
+ parser->content_length = ULLONG_MAX;
403
679
 
404
- CALLBACK2(message_begin);
680
+ if (ch == 'H') {
681
+ parser->state = s_res_or_resp_H;
405
682
 
406
- if (ch == 'H')
407
- state = s_res_or_resp_H;
408
- else {
683
+ CALLBACK_NOTIFY(message_begin);
684
+ } else {
409
685
  parser->type = HTTP_REQUEST;
410
- goto start_req_method_assign;
686
+ parser->state = s_start_req;
687
+ goto reexecute_byte;
411
688
  }
689
+
412
690
  break;
413
691
  }
414
692
 
415
693
  case s_res_or_resp_H:
416
694
  if (ch == 'T') {
417
695
  parser->type = HTTP_RESPONSE;
418
- state = s_res_HT;
696
+ parser->state = s_res_HT;
419
697
  } else {
420
- if (ch != 'E') goto error;
698
+ if (ch != 'E') {
699
+ SET_ERRNO(HPE_INVALID_CONSTANT);
700
+ goto error;
701
+ }
702
+
421
703
  parser->type = HTTP_REQUEST;
422
704
  parser->method = HTTP_HEAD;
423
- index = 2;
424
- state = s_req_method;
705
+ parser->index = 2;
706
+ parser->state = s_req_method;
425
707
  }
426
708
  break;
427
709
 
428
710
  case s_start_res:
429
711
  {
430
712
  parser->flags = 0;
431
- parser->content_length = -1;
432
-
433
- CALLBACK2(message_begin);
713
+ parser->content_length = ULLONG_MAX;
434
714
 
435
715
  switch (ch) {
436
716
  case 'H':
437
- state = s_res_H;
717
+ parser->state = s_res_H;
438
718
  break;
439
719
 
440
720
  case CR:
@@ -442,105 +722,133 @@ size_t http_parser_execute (http_parser *parser,
442
722
  break;
443
723
 
444
724
  default:
725
+ SET_ERRNO(HPE_INVALID_CONSTANT);
445
726
  goto error;
446
727
  }
728
+
729
+ CALLBACK_NOTIFY(message_begin);
447
730
  break;
448
731
  }
449
732
 
450
733
  case s_res_H:
451
734
  STRICT_CHECK(ch != 'T');
452
- state = s_res_HT;
735
+ parser->state = s_res_HT;
453
736
  break;
454
737
 
455
738
  case s_res_HT:
456
739
  STRICT_CHECK(ch != 'T');
457
- state = s_res_HTT;
740
+ parser->state = s_res_HTT;
458
741
  break;
459
742
 
460
743
  case s_res_HTT:
461
744
  STRICT_CHECK(ch != 'P');
462
- state = s_res_HTTP;
745
+ parser->state = s_res_HTTP;
463
746
  break;
464
747
 
465
748
  case s_res_HTTP:
466
749
  STRICT_CHECK(ch != '/');
467
- state = s_res_first_http_major;
750
+ parser->state = s_res_first_http_major;
468
751
  break;
469
752
 
470
753
  case s_res_first_http_major:
471
- if (ch < '1' || ch > '9') goto error;
754
+ if (ch < '0' || ch > '9') {
755
+ SET_ERRNO(HPE_INVALID_VERSION);
756
+ goto error;
757
+ }
758
+
472
759
  parser->http_major = ch - '0';
473
- state = s_res_http_major;
760
+ parser->state = s_res_http_major;
474
761
  break;
475
762
 
476
763
  /* major HTTP version or dot */
477
764
  case s_res_http_major:
478
765
  {
479
766
  if (ch == '.') {
480
- state = s_res_first_http_minor;
767
+ parser->state = s_res_first_http_minor;
481
768
  break;
482
769
  }
483
770
 
484
- if (ch < '0' || ch > '9') goto error;
771
+ if (!IS_NUM(ch)) {
772
+ SET_ERRNO(HPE_INVALID_VERSION);
773
+ goto error;
774
+ }
485
775
 
486
776
  parser->http_major *= 10;
487
777
  parser->http_major += ch - '0';
488
778
 
489
- if (parser->http_major > 999) goto error;
779
+ if (parser->http_major > 999) {
780
+ SET_ERRNO(HPE_INVALID_VERSION);
781
+ goto error;
782
+ }
783
+
490
784
  break;
491
785
  }
492
786
 
493
787
  /* first digit of minor HTTP version */
494
788
  case s_res_first_http_minor:
495
- if (ch < '0' || ch > '9') goto error;
789
+ if (!IS_NUM(ch)) {
790
+ SET_ERRNO(HPE_INVALID_VERSION);
791
+ goto error;
792
+ }
793
+
496
794
  parser->http_minor = ch - '0';
497
- state = s_res_http_minor;
795
+ parser->state = s_res_http_minor;
498
796
  break;
499
797
 
500
798
  /* minor HTTP version or end of request line */
501
799
  case s_res_http_minor:
502
800
  {
503
801
  if (ch == ' ') {
504
- state = s_res_first_status_code;
802
+ parser->state = s_res_first_status_code;
505
803
  break;
506
804
  }
507
805
 
508
- if (ch < '0' || ch > '9') goto error;
806
+ if (!IS_NUM(ch)) {
807
+ SET_ERRNO(HPE_INVALID_VERSION);
808
+ goto error;
809
+ }
509
810
 
510
811
  parser->http_minor *= 10;
511
812
  parser->http_minor += ch - '0';
512
813
 
513
- if (parser->http_minor > 999) goto error;
814
+ if (parser->http_minor > 999) {
815
+ SET_ERRNO(HPE_INVALID_VERSION);
816
+ goto error;
817
+ }
818
+
514
819
  break;
515
820
  }
516
821
 
517
822
  case s_res_first_status_code:
518
823
  {
519
- if (ch < '0' || ch > '9') {
824
+ if (!IS_NUM(ch)) {
520
825
  if (ch == ' ') {
521
826
  break;
522
827
  }
828
+
829
+ SET_ERRNO(HPE_INVALID_STATUS);
523
830
  goto error;
524
831
  }
525
832
  parser->status_code = ch - '0';
526
- state = s_res_status_code;
833
+ parser->state = s_res_status_code;
527
834
  break;
528
835
  }
529
836
 
530
837
  case s_res_status_code:
531
838
  {
532
- if (ch < '0' || ch > '9') {
839
+ if (!IS_NUM(ch)) {
533
840
  switch (ch) {
534
841
  case ' ':
535
- state = s_res_status;
842
+ parser->state = s_res_status;
536
843
  break;
537
844
  case CR:
538
- state = s_res_line_almost_done;
845
+ parser->state = s_res_line_almost_done;
539
846
  break;
540
847
  case LF:
541
- state = s_header_field_start;
848
+ parser->state = s_header_field_start;
542
849
  break;
543
850
  default:
851
+ SET_ERRNO(HPE_INVALID_STATUS);
544
852
  goto error;
545
853
  }
546
854
  break;
@@ -549,7 +857,11 @@ size_t http_parser_execute (http_parser *parser,
549
857
  parser->status_code *= 10;
550
858
  parser->status_code += ch - '0';
551
859
 
552
- if (parser->status_code > 999) goto error;
860
+ if (parser->status_code > 999) {
861
+ SET_ERRNO(HPE_INVALID_STATUS);
862
+ goto error;
863
+ }
864
+
553
865
  break;
554
866
  }
555
867
 
@@ -557,19 +869,19 @@ size_t http_parser_execute (http_parser *parser,
557
869
  /* the human readable status. e.g. "NOT FOUND"
558
870
  * we are not humans so just ignore this */
559
871
  if (ch == CR) {
560
- state = s_res_line_almost_done;
872
+ parser->state = s_res_line_almost_done;
561
873
  break;
562
874
  }
563
875
 
564
876
  if (ch == LF) {
565
- state = s_header_field_start;
877
+ parser->state = s_header_field_start;
566
878
  break;
567
879
  }
568
880
  break;
569
881
 
570
882
  case s_res_line_almost_done:
571
883
  STRICT_CHECK(ch != LF);
572
- state = s_header_field_start;
884
+ parser->state = s_header_field_start;
573
885
  break;
574
886
 
575
887
  case s_start_req:
@@ -577,335 +889,177 @@ size_t http_parser_execute (http_parser *parser,
577
889
  if (ch == CR || ch == LF)
578
890
  break;
579
891
  parser->flags = 0;
580
- parser->content_length = -1;
892
+ parser->content_length = ULLONG_MAX;
581
893
 
582
- CALLBACK2(message_begin);
583
-
584
- if (ch < 'A' || 'Z' < ch) goto error;
894
+ if (!IS_ALPHA(ch)) {
895
+ SET_ERRNO(HPE_INVALID_METHOD);
896
+ goto error;
897
+ }
585
898
 
586
- start_req_method_assign:
587
899
  parser->method = (enum http_method) 0;
588
- index = 1;
900
+ parser->index = 1;
589
901
  switch (ch) {
590
902
  case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
591
903
  case 'D': parser->method = HTTP_DELETE; break;
592
904
  case 'G': parser->method = HTTP_GET; break;
593
905
  case 'H': parser->method = HTTP_HEAD; break;
594
906
  case 'L': parser->method = HTTP_LOCK; break;
595
- case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE */ break;
907
+ case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
908
+ case 'N': parser->method = HTTP_NOTIFY; break;
596
909
  case 'O': parser->method = HTTP_OPTIONS; break;
597
- case 'P': parser->method = HTTP_POST; /* or PROPFIND or PROPPATCH or PUT */ break;
910
+ case 'P': parser->method = HTTP_POST;
911
+ /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
912
+ break;
598
913
  case 'R': parser->method = HTTP_REPORT; break;
914
+ case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break;
599
915
  case 'T': parser->method = HTTP_TRACE; break;
600
- case 'U': parser->method = HTTP_UNLOCK; break;
601
- default: goto error;
916
+ case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
917
+ default:
918
+ SET_ERRNO(HPE_INVALID_METHOD);
919
+ goto error;
602
920
  }
603
- state = s_req_method;
921
+ parser->state = s_req_method;
922
+
923
+ CALLBACK_NOTIFY(message_begin);
924
+
604
925
  break;
605
926
  }
606
927
 
607
928
  case s_req_method:
608
929
  {
609
- if (ch == '\0')
930
+ const char *matcher;
931
+ if (ch == '\0') {
932
+ SET_ERRNO(HPE_INVALID_METHOD);
610
933
  goto error;
934
+ }
611
935
 
612
- const char *matcher = method_strings[parser->method];
613
- if (ch == ' ' && matcher[index] == '\0') {
614
- state = s_req_spaces_before_url;
615
- } else if (ch == matcher[index]) {
616
- ; // nada
936
+ matcher = method_strings[parser->method];
937
+ if (ch == ' ' && matcher[parser->index] == '\0') {
938
+ parser->state = s_req_spaces_before_url;
939
+ } else if (ch == matcher[parser->index]) {
940
+ ; /* nada */
617
941
  } else if (parser->method == HTTP_CONNECT) {
618
- if (index == 1 && ch == 'H') {
942
+ if (parser->index == 1 && ch == 'H') {
619
943
  parser->method = HTTP_CHECKOUT;
620
- } else if (index == 2 && ch == 'P') {
944
+ } else if (parser->index == 2 && ch == 'P') {
621
945
  parser->method = HTTP_COPY;
946
+ } else {
947
+ goto error;
622
948
  }
623
949
  } else if (parser->method == HTTP_MKCOL) {
624
- if (index == 1 && ch == 'O') {
950
+ if (parser->index == 1 && ch == 'O') {
625
951
  parser->method = HTTP_MOVE;
626
- } else if (index == 1 && ch == 'E') {
952
+ } else if (parser->index == 1 && ch == 'E') {
627
953
  parser->method = HTTP_MERGE;
628
- } else if (index == 2 && ch == 'A') {
954
+ } else if (parser->index == 1 && ch == '-') {
955
+ parser->method = HTTP_MSEARCH;
956
+ } else if (parser->index == 2 && ch == 'A') {
629
957
  parser->method = HTTP_MKACTIVITY;
958
+ } else {
959
+ goto error;
960
+ }
961
+ } else if (parser->method == HTTP_SUBSCRIBE) {
962
+ if (parser->index == 1 && ch == 'E') {
963
+ parser->method = HTTP_SEARCH;
964
+ } else {
965
+ goto error;
966
+ }
967
+ } else if (parser->index == 1 && parser->method == HTTP_POST) {
968
+ if (ch == 'R') {
969
+ parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
970
+ } else if (ch == 'U') {
971
+ parser->method = HTTP_PUT; /* or HTTP_PURGE */
972
+ } else if (ch == 'A') {
973
+ parser->method = HTTP_PATCH;
974
+ } else {
975
+ goto error;
976
+ }
977
+ } else if (parser->index == 2) {
978
+ if (parser->method == HTTP_PUT) {
979
+ if (ch == 'R') parser->method = HTTP_PURGE;
980
+ } else if (parser->method == HTTP_UNLOCK) {
981
+ if (ch == 'S') parser->method = HTTP_UNSUBSCRIBE;
630
982
  }
631
- } else if (index == 1 && parser->method == HTTP_POST && ch == 'R') {
632
- parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
633
- } else if (index == 1 && parser->method == HTTP_POST && ch == 'U') {
634
- parser->method = HTTP_PUT;
635
- } else if (index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
983
+ } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
636
984
  parser->method = HTTP_PROPPATCH;
637
985
  } else {
986
+ SET_ERRNO(HPE_INVALID_METHOD);
638
987
  goto error;
639
988
  }
640
989
 
641
- ++index;
990
+ ++parser->index;
642
991
  break;
643
992
  }
993
+
644
994
  case s_req_spaces_before_url:
645
995
  {
646
996
  if (ch == ' ') break;
647
997
 
648
- if (ch == '/') {
649
- MARK(url);
650
- MARK(path);
651
- state = s_req_path;
652
- break;
998
+ MARK(url);
999
+ if (parser->method == HTTP_CONNECT) {
1000
+ parser->state = s_req_server_start;
653
1001
  }
654
1002
 
655
- c = LOWER(ch);
656
-
657
- if (c >= 'a' && c <= 'z') {
658
- MARK(url);
659
- state = s_req_schema;
660
- break;
1003
+ parser->state = parse_url_char((enum state)parser->state, ch);
1004
+ if (parser->state == s_dead) {
1005
+ SET_ERRNO(HPE_INVALID_URL);
1006
+ goto error;
661
1007
  }
662
1008
 
663
- goto error;
1009
+ break;
664
1010
  }
665
1011
 
666
1012
  case s_req_schema:
667
- {
668
- c = LOWER(ch);
669
-
670
- if (c >= 'a' && c <= 'z') break;
671
-
672
- if (ch == ':') {
673
- state = s_req_schema_slash;
674
- break;
675
- } else if (ch == '.') {
676
- state = s_req_host;
677
- break;
678
- }
679
-
680
- goto error;
681
- }
682
-
683
1013
  case s_req_schema_slash:
684
- STRICT_CHECK(ch != '/');
685
- state = s_req_schema_slash_slash;
686
- break;
687
-
688
1014
  case s_req_schema_slash_slash:
689
- STRICT_CHECK(ch != '/');
690
- state = s_req_host;
691
- break;
692
-
693
- case s_req_host:
694
- {
695
- c = LOWER(ch);
696
- if (c >= 'a' && c <= 'z') break;
697
- if ((ch >= '0' && ch <= '9') || ch == '.' || ch == '-') break;
698
- switch (ch) {
699
- case ':':
700
- state = s_req_port;
701
- break;
702
- case '/':
703
- MARK(path);
704
- state = s_req_path;
705
- break;
706
- case ' ':
707
- /* The request line looks like:
708
- * "GET http://foo.bar.com HTTP/1.1"
709
- * That is, there is no path.
710
- */
711
- CALLBACK(url);
712
- state = s_req_http_start;
713
- break;
714
- default:
715
- goto error;
716
- }
717
- break;
718
- }
719
-
720
- case s_req_port:
721
- {
722
- if (ch >= '0' && ch <= '9') break;
723
- switch (ch) {
724
- case '/':
725
- MARK(path);
726
- state = s_req_path;
727
- break;
728
- case ' ':
729
- /* The request line looks like:
730
- * "GET http://foo.bar.com:1234 HTTP/1.1"
731
- * That is, there is no path.
732
- */
733
- CALLBACK(url);
734
- state = s_req_http_start;
735
- break;
736
- default:
737
- goto error;
738
- }
739
- break;
740
- }
741
-
742
- case s_req_path:
1015
+ case s_req_server_start:
743
1016
  {
744
- if (normal_url_char[(unsigned char)ch]) break;
745
-
746
1017
  switch (ch) {
1018
+ /* No whitespace allowed here */
747
1019
  case ' ':
748
- CALLBACK(url);
749
- CALLBACK(path);
750
- state = s_req_http_start;
751
- break;
752
1020
  case CR:
753
- CALLBACK(url);
754
- CALLBACK(path);
755
- parser->http_minor = 9;
756
- state = s_req_line_almost_done;
757
- break;
758
1021
  case LF:
759
- CALLBACK(url);
760
- CALLBACK(path);
761
- parser->http_minor = 9;
762
- state = s_header_field_start;
763
- break;
764
- case '?':
765
- CALLBACK(path);
766
- state = s_req_query_string_start;
767
- break;
768
- case '#':
769
- CALLBACK(path);
770
- state = s_req_fragment_start;
771
- break;
772
- default:
1022
+ SET_ERRNO(HPE_INVALID_URL);
773
1023
  goto error;
774
- }
775
- break;
776
- }
777
-
778
- case s_req_query_string_start:
779
- {
780
- if (normal_url_char[(unsigned char)ch]) {
781
- MARK(query_string);
782
- state = s_req_query_string;
783
- break;
784
- }
785
-
786
- switch (ch) {
787
- case '?':
788
- break; // XXX ignore extra '?' ... is this right?
789
- case ' ':
790
- CALLBACK(url);
791
- state = s_req_http_start;
792
- break;
793
- case CR:
794
- CALLBACK(url);
795
- parser->http_minor = 9;
796
- state = s_req_line_almost_done;
797
- break;
798
- case LF:
799
- CALLBACK(url);
800
- parser->http_minor = 9;
801
- state = s_header_field_start;
802
- break;
803
- case '#':
804
- state = s_req_fragment_start;
805
- break;
806
1024
  default:
807
- goto error;
1025
+ parser->state = parse_url_char((enum state)parser->state, ch);
1026
+ if (parser->state == s_dead) {
1027
+ SET_ERRNO(HPE_INVALID_URL);
1028
+ goto error;
1029
+ }
808
1030
  }
809
- break;
810
- }
811
1031
 
812
- case s_req_query_string:
813
- {
814
- if (normal_url_char[(unsigned char)ch]) break;
815
-
816
- switch (ch) {
817
- case '?':
818
- // allow extra '?' in query string
819
- break;
820
- case ' ':
821
- CALLBACK(url);
822
- CALLBACK(query_string);
823
- state = s_req_http_start;
824
- break;
825
- case CR:
826
- CALLBACK(url);
827
- CALLBACK(query_string);
828
- parser->http_minor = 9;
829
- state = s_req_line_almost_done;
830
- break;
831
- case LF:
832
- CALLBACK(url);
833
- CALLBACK(query_string);
834
- parser->http_minor = 9;
835
- state = s_header_field_start;
836
- break;
837
- case '#':
838
- CALLBACK(query_string);
839
- state = s_req_fragment_start;
840
- break;
841
- default:
842
- goto error;
843
- }
844
1032
  break;
845
1033
  }
846
1034
 
1035
+ case s_req_server:
1036
+ case s_req_server_with_at:
1037
+ case s_req_path:
1038
+ case s_req_query_string_start:
1039
+ case s_req_query_string:
847
1040
  case s_req_fragment_start:
848
- {
849
- if (normal_url_char[(unsigned char)ch]) {
850
- MARK(fragment);
851
- state = s_req_fragment;
852
- break;
853
- }
854
-
855
- switch (ch) {
856
- case ' ':
857
- CALLBACK(url);
858
- state = s_req_http_start;
859
- break;
860
- case CR:
861
- CALLBACK(url);
862
- parser->http_minor = 9;
863
- state = s_req_line_almost_done;
864
- break;
865
- case LF:
866
- CALLBACK(url);
867
- parser->http_minor = 9;
868
- state = s_header_field_start;
869
- break;
870
- case '?':
871
- MARK(fragment);
872
- state = s_req_fragment;
873
- break;
874
- case '#':
875
- break;
876
- default:
877
- goto error;
878
- }
879
- break;
880
- }
881
-
882
1041
  case s_req_fragment:
883
1042
  {
884
- if (normal_url_char[(unsigned char)ch]) break;
885
-
886
1043
  switch (ch) {
887
1044
  case ' ':
888
- CALLBACK(url);
889
- CALLBACK(fragment);
890
- state = s_req_http_start;
1045
+ parser->state = s_req_http_start;
1046
+ CALLBACK_DATA(url);
891
1047
  break;
892
1048
  case CR:
893
- CALLBACK(url);
894
- CALLBACK(fragment);
895
- parser->http_minor = 9;
896
- state = s_req_line_almost_done;
897
- break;
898
1049
  case LF:
899
- CALLBACK(url);
900
- CALLBACK(fragment);
1050
+ parser->http_major = 0;
901
1051
  parser->http_minor = 9;
902
- state = s_header_field_start;
903
- break;
904
- case '?':
905
- case '#':
1052
+ parser->state = (ch == CR) ?
1053
+ s_req_line_almost_done :
1054
+ s_header_field_start;
1055
+ CALLBACK_DATA(url);
906
1056
  break;
907
1057
  default:
908
- goto error;
1058
+ parser->state = parse_url_char((enum state)parser->state, ch);
1059
+ if (parser->state == s_dead) {
1060
+ SET_ERRNO(HPE_INVALID_URL);
1061
+ goto error;
1062
+ }
909
1063
  }
910
1064
  break;
911
1065
  }
@@ -913,143 +1067,170 @@ size_t http_parser_execute (http_parser *parser,
913
1067
  case s_req_http_start:
914
1068
  switch (ch) {
915
1069
  case 'H':
916
- state = s_req_http_H;
1070
+ parser->state = s_req_http_H;
917
1071
  break;
918
1072
  case ' ':
919
1073
  break;
920
1074
  default:
1075
+ SET_ERRNO(HPE_INVALID_CONSTANT);
921
1076
  goto error;
922
1077
  }
923
1078
  break;
924
1079
 
925
1080
  case s_req_http_H:
926
1081
  STRICT_CHECK(ch != 'T');
927
- state = s_req_http_HT;
1082
+ parser->state = s_req_http_HT;
928
1083
  break;
929
1084
 
930
1085
  case s_req_http_HT:
931
1086
  STRICT_CHECK(ch != 'T');
932
- state = s_req_http_HTT;
1087
+ parser->state = s_req_http_HTT;
933
1088
  break;
934
1089
 
935
1090
  case s_req_http_HTT:
936
1091
  STRICT_CHECK(ch != 'P');
937
- state = s_req_http_HTTP;
1092
+ parser->state = s_req_http_HTTP;
938
1093
  break;
939
1094
 
940
1095
  case s_req_http_HTTP:
941
1096
  STRICT_CHECK(ch != '/');
942
- state = s_req_first_http_major;
1097
+ parser->state = s_req_first_http_major;
943
1098
  break;
944
1099
 
945
1100
  /* first digit of major HTTP version */
946
1101
  case s_req_first_http_major:
947
- if (ch < '1' || ch > '9') goto error;
1102
+ if (ch < '1' || ch > '9') {
1103
+ SET_ERRNO(HPE_INVALID_VERSION);
1104
+ goto error;
1105
+ }
1106
+
948
1107
  parser->http_major = ch - '0';
949
- state = s_req_http_major;
1108
+ parser->state = s_req_http_major;
950
1109
  break;
951
1110
 
952
1111
  /* major HTTP version or dot */
953
1112
  case s_req_http_major:
954
1113
  {
955
1114
  if (ch == '.') {
956
- state = s_req_first_http_minor;
1115
+ parser->state = s_req_first_http_minor;
957
1116
  break;
958
1117
  }
959
1118
 
960
- if (ch < '0' || ch > '9') goto error;
1119
+ if (!IS_NUM(ch)) {
1120
+ SET_ERRNO(HPE_INVALID_VERSION);
1121
+ goto error;
1122
+ }
961
1123
 
962
1124
  parser->http_major *= 10;
963
1125
  parser->http_major += ch - '0';
964
1126
 
965
- if (parser->http_major > 999) goto error;
1127
+ if (parser->http_major > 999) {
1128
+ SET_ERRNO(HPE_INVALID_VERSION);
1129
+ goto error;
1130
+ }
1131
+
966
1132
  break;
967
1133
  }
968
1134
 
969
1135
  /* first digit of minor HTTP version */
970
1136
  case s_req_first_http_minor:
971
- if (ch < '0' || ch > '9') goto error;
1137
+ if (!IS_NUM(ch)) {
1138
+ SET_ERRNO(HPE_INVALID_VERSION);
1139
+ goto error;
1140
+ }
1141
+
972
1142
  parser->http_minor = ch - '0';
973
- state = s_req_http_minor;
1143
+ parser->state = s_req_http_minor;
974
1144
  break;
975
1145
 
976
1146
  /* minor HTTP version or end of request line */
977
1147
  case s_req_http_minor:
978
1148
  {
979
1149
  if (ch == CR) {
980
- state = s_req_line_almost_done;
1150
+ parser->state = s_req_line_almost_done;
981
1151
  break;
982
1152
  }
983
1153
 
984
1154
  if (ch == LF) {
985
- state = s_header_field_start;
1155
+ parser->state = s_header_field_start;
986
1156
  break;
987
1157
  }
988
1158
 
989
1159
  /* XXX allow spaces after digit? */
990
1160
 
991
- if (ch < '0' || ch > '9') goto error;
1161
+ if (!IS_NUM(ch)) {
1162
+ SET_ERRNO(HPE_INVALID_VERSION);
1163
+ goto error;
1164
+ }
992
1165
 
993
1166
  parser->http_minor *= 10;
994
1167
  parser->http_minor += ch - '0';
995
1168
 
996
- if (parser->http_minor > 999) goto error;
1169
+ if (parser->http_minor > 999) {
1170
+ SET_ERRNO(HPE_INVALID_VERSION);
1171
+ goto error;
1172
+ }
1173
+
997
1174
  break;
998
1175
  }
999
1176
 
1000
1177
  /* end of request line */
1001
1178
  case s_req_line_almost_done:
1002
1179
  {
1003
- if (ch != LF) goto error;
1004
- state = s_header_field_start;
1180
+ if (ch != LF) {
1181
+ SET_ERRNO(HPE_LF_EXPECTED);
1182
+ goto error;
1183
+ }
1184
+
1185
+ parser->state = s_header_field_start;
1005
1186
  break;
1006
1187
  }
1007
1188
 
1008
1189
  case s_header_field_start:
1009
1190
  {
1010
1191
  if (ch == CR) {
1011
- state = s_headers_almost_done;
1192
+ parser->state = s_headers_almost_done;
1012
1193
  break;
1013
1194
  }
1014
1195
 
1015
1196
  if (ch == LF) {
1016
1197
  /* they might be just sending \n instead of \r\n so this would be
1017
1198
  * the second \n to denote the end of headers*/
1018
- state = s_headers_almost_done;
1019
- goto headers_almost_done;
1199
+ parser->state = s_headers_almost_done;
1200
+ goto reexecute_byte;
1020
1201
  }
1021
1202
 
1022
- if (parser->maybe_ml && (ch == ' '|| ch == '\t')) goto s_header_value_start_;
1023
- c = LOWER(ch);
1024
-
1025
- if (c < 'a' || 'z' < c) goto error;
1026
-
1027
- parser->maybe_ml = 0;
1203
+ c = TOKEN(ch);
1204
+
1205
+ if (!c) {
1206
+ SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1207
+ goto error;
1208
+ }
1028
1209
 
1029
1210
  MARK(header_field);
1030
1211
 
1031
- index = 0;
1032
- state = s_header_field;
1212
+ parser->index = 0;
1213
+ parser->state = s_header_field;
1033
1214
 
1034
1215
  switch (c) {
1035
1216
  case 'c':
1036
- header_state = h_C;
1217
+ parser->header_state = h_C;
1037
1218
  break;
1038
1219
 
1039
1220
  case 'p':
1040
- header_state = h_matching_proxy_connection;
1221
+ parser->header_state = h_matching_proxy_connection;
1041
1222
  break;
1042
1223
 
1043
1224
  case 't':
1044
- header_state = h_matching_transfer_encoding;
1225
+ parser->header_state = h_matching_transfer_encoding;
1045
1226
  break;
1046
1227
 
1047
1228
  case 'u':
1048
- header_state = h_matching_upgrade;
1229
+ parser->header_state = h_matching_upgrade;
1049
1230
  break;
1050
1231
 
1051
1232
  default:
1052
- header_state = h_general;
1233
+ parser->header_state = h_general;
1053
1234
  break;
1054
1235
  }
1055
1236
  break;
@@ -1057,34 +1238,34 @@ size_t http_parser_execute (http_parser *parser,
1057
1238
 
1058
1239
  case s_header_field:
1059
1240
  {
1060
- c = acceptable_header[(unsigned char)ch];
1241
+ c = TOKEN(ch);
1061
1242
 
1062
1243
  if (c) {
1063
- switch (header_state) {
1244
+ switch (parser->header_state) {
1064
1245
  case h_general:
1065
1246
  break;
1066
1247
 
1067
1248
  case h_C:
1068
- index++;
1069
- header_state = (c == 'o' ? h_CO : h_general);
1249
+ parser->index++;
1250
+ parser->header_state = (c == 'o' ? h_CO : h_general);
1070
1251
  break;
1071
1252
 
1072
1253
  case h_CO:
1073
- index++;
1074
- header_state = (c == 'n' ? h_CON : h_general);
1254
+ parser->index++;
1255
+ parser->header_state = (c == 'n' ? h_CON : h_general);
1075
1256
  break;
1076
1257
 
1077
1258
  case h_CON:
1078
- index++;
1259
+ parser->index++;
1079
1260
  switch (c) {
1080
1261
  case 'n':
1081
- header_state = h_matching_connection;
1262
+ parser->header_state = h_matching_connection;
1082
1263
  break;
1083
1264
  case 't':
1084
- header_state = h_matching_content_length;
1265
+ parser->header_state = h_matching_content_length;
1085
1266
  break;
1086
1267
  default:
1087
- header_state = h_general;
1268
+ parser->header_state = h_general;
1088
1269
  break;
1089
1270
  }
1090
1271
  break;
@@ -1092,60 +1273,60 @@ size_t http_parser_execute (http_parser *parser,
1092
1273
  /* connection */
1093
1274
 
1094
1275
  case h_matching_connection:
1095
- index++;
1096
- if (index > sizeof(CONNECTION)-1
1097
- || c != CONNECTION[index]) {
1098
- header_state = h_general;
1099
- } else if (index == sizeof(CONNECTION)-2) {
1100
- header_state = h_connection;
1276
+ parser->index++;
1277
+ if (parser->index > sizeof(CONNECTION)-1
1278
+ || c != CONNECTION[parser->index]) {
1279
+ parser->header_state = h_general;
1280
+ } else if (parser->index == sizeof(CONNECTION)-2) {
1281
+ parser->header_state = h_connection;
1101
1282
  }
1102
1283
  break;
1103
1284
 
1104
1285
  /* proxy-connection */
1105
1286
 
1106
1287
  case h_matching_proxy_connection:
1107
- index++;
1108
- if (index > sizeof(PROXY_CONNECTION)-1
1109
- || c != PROXY_CONNECTION[index]) {
1110
- header_state = h_general;
1111
- } else if (index == sizeof(PROXY_CONNECTION)-2) {
1112
- header_state = h_connection;
1288
+ parser->index++;
1289
+ if (parser->index > sizeof(PROXY_CONNECTION)-1
1290
+ || c != PROXY_CONNECTION[parser->index]) {
1291
+ parser->header_state = h_general;
1292
+ } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1293
+ parser->header_state = h_connection;
1113
1294
  }
1114
1295
  break;
1115
1296
 
1116
1297
  /* content-length */
1117
1298
 
1118
1299
  case h_matching_content_length:
1119
- index++;
1120
- if (index > sizeof(CONTENT_LENGTH)-1
1121
- || c != CONTENT_LENGTH[index]) {
1122
- header_state = h_general;
1123
- } else if (index == sizeof(CONTENT_LENGTH)-2) {
1124
- header_state = h_content_length;
1300
+ parser->index++;
1301
+ if (parser->index > sizeof(CONTENT_LENGTH)-1
1302
+ || c != CONTENT_LENGTH[parser->index]) {
1303
+ parser->header_state = h_general;
1304
+ } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1305
+ parser->header_state = h_content_length;
1125
1306
  }
1126
1307
  break;
1127
1308
 
1128
1309
  /* transfer-encoding */
1129
1310
 
1130
1311
  case h_matching_transfer_encoding:
1131
- index++;
1132
- if (index > sizeof(TRANSFER_ENCODING)-1
1133
- || c != TRANSFER_ENCODING[index]) {
1134
- header_state = h_general;
1135
- } else if (index == sizeof(TRANSFER_ENCODING)-2) {
1136
- header_state = h_transfer_encoding;
1312
+ parser->index++;
1313
+ if (parser->index > sizeof(TRANSFER_ENCODING)-1
1314
+ || c != TRANSFER_ENCODING[parser->index]) {
1315
+ parser->header_state = h_general;
1316
+ } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1317
+ parser->header_state = h_transfer_encoding;
1137
1318
  }
1138
1319
  break;
1139
1320
 
1140
1321
  /* upgrade */
1141
1322
 
1142
1323
  case h_matching_upgrade:
1143
- index++;
1144
- if (index > sizeof(UPGRADE)-1
1145
- || c != UPGRADE[index]) {
1146
- header_state = h_general;
1147
- } else if (index == sizeof(UPGRADE)-2) {
1148
- header_state = h_upgrade;
1324
+ parser->index++;
1325
+ if (parser->index > sizeof(UPGRADE)-1
1326
+ || c != UPGRADE[parser->index]) {
1327
+ parser->header_state = h_general;
1328
+ } else if (parser->index == sizeof(UPGRADE)-2) {
1329
+ parser->header_state = h_upgrade;
1149
1330
  }
1150
1331
  break;
1151
1332
 
@@ -1153,7 +1334,7 @@ size_t http_parser_execute (http_parser *parser,
1153
1334
  case h_content_length:
1154
1335
  case h_transfer_encoding:
1155
1336
  case h_upgrade:
1156
- if (ch != ' ') header_state = h_general;
1337
+ if (ch != ' ') parser->header_state = h_general;
1157
1338
  break;
1158
1339
 
1159
1340
  default:
@@ -1164,92 +1345,89 @@ size_t http_parser_execute (http_parser *parser,
1164
1345
  }
1165
1346
 
1166
1347
  if (ch == ':') {
1167
- CALLBACK(header_field);
1168
- state = s_header_value_start;
1348
+ parser->state = s_header_value_start;
1349
+ CALLBACK_DATA(header_field);
1169
1350
  break;
1170
1351
  }
1171
1352
 
1172
1353
  if (ch == CR) {
1173
- state = s_header_almost_done;
1174
- CALLBACK(header_field);
1354
+ parser->state = s_header_almost_done;
1355
+ CALLBACK_DATA(header_field);
1175
1356
  break;
1176
1357
  }
1177
1358
 
1178
1359
  if (ch == LF) {
1179
- CALLBACK(header_field);
1180
- state = s_header_field_start;
1360
+ parser->state = s_header_field_start;
1361
+ CALLBACK_DATA(header_field);
1181
1362
  break;
1182
1363
  }
1183
1364
 
1365
+ SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1184
1366
  goto error;
1185
1367
  }
1186
1368
 
1187
1369
  case s_header_value_start:
1188
- s_header_value_start_:
1189
1370
  {
1190
- if (!parser->maybe_ml && (ch == ' ' || ch == '\t')) break;
1371
+ if (ch == ' ' || ch == '\t') break;
1191
1372
 
1192
1373
  MARK(header_value);
1193
1374
 
1194
- state = s_header_value;
1195
- index = 0;
1196
-
1197
- c = acceptable_header[(unsigned char)ch];
1375
+ parser->state = s_header_value;
1376
+ parser->index = 0;
1198
1377
 
1199
- if (!c) {
1200
- if (ch == CR) {
1201
- CALLBACK(header_value);
1202
- header_state = h_general;
1203
- state = s_header_almost_done;
1204
- parser->maybe_ml = 1;
1205
- break;
1206
- }
1207
-
1208
- if (ch == LF) {
1209
- CALLBACK(header_value);
1210
- state = s_header_field_start;
1211
- parser->maybe_ml = 1;
1212
- break;
1213
- }
1378
+ if (ch == CR) {
1379
+ parser->header_state = h_general;
1380
+ parser->state = s_header_almost_done;
1381
+ CALLBACK_DATA(header_value);
1382
+ break;
1383
+ }
1214
1384
 
1215
- header_state = h_general;
1385
+ if (ch == LF) {
1386
+ parser->state = s_header_field_start;
1387
+ CALLBACK_DATA(header_value);
1216
1388
  break;
1217
1389
  }
1218
1390
 
1219
- switch (header_state) {
1391
+ c = LOWER(ch);
1392
+
1393
+ switch (parser->header_state) {
1220
1394
  case h_upgrade:
1221
1395
  parser->flags |= F_UPGRADE;
1222
- header_state = h_general;
1396
+ parser->header_state = h_general;
1223
1397
  break;
1224
1398
 
1225
1399
  case h_transfer_encoding:
1226
1400
  /* looking for 'Transfer-Encoding: chunked' */
1227
1401
  if ('c' == c) {
1228
- header_state = h_matching_transfer_encoding_chunked;
1402
+ parser->header_state = h_matching_transfer_encoding_chunked;
1229
1403
  } else {
1230
- header_state = h_general;
1404
+ parser->header_state = h_general;
1231
1405
  }
1232
1406
  break;
1233
1407
 
1234
1408
  case h_content_length:
1235
- if (ch < '0' || ch > '9') goto error;
1409
+ if (!IS_NUM(ch)) {
1410
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1411
+ goto error;
1412
+ }
1413
+
1236
1414
  parser->content_length = ch - '0';
1237
1415
  break;
1238
1416
 
1239
1417
  case h_connection:
1240
1418
  /* looking for 'Connection: keep-alive' */
1241
1419
  if (c == 'k') {
1242
- header_state = h_matching_connection_keep_alive;
1420
+ parser->header_state = h_matching_connection_keep_alive;
1243
1421
  /* looking for 'Connection: close' */
1244
1422
  } else if (c == 'c') {
1245
- header_state = h_matching_connection_close;
1423
+ parser->header_state = h_matching_connection_close;
1246
1424
  } else {
1247
- header_state = h_general;
1425
+ parser->header_state = h_general;
1248
1426
  }
1249
1427
  break;
1250
1428
 
1251
1429
  default:
1252
- header_state = h_general;
1430
+ parser->header_state = h_general;
1253
1431
  break;
1254
1432
  }
1255
1433
  break;
@@ -1257,30 +1435,22 @@ size_t http_parser_execute (http_parser *parser,
1257
1435
 
1258
1436
  case s_header_value:
1259
1437
  {
1260
- c = acceptable_header[(unsigned char)ch];
1261
-
1262
- if (!c) {
1263
- if (ch == CR) {
1264
- CALLBACK(header_value);
1265
- state = s_header_almost_done;
1266
- if(header_state == h_general){
1267
- parser->maybe_ml = 1;
1268
- }
1269
-
1270
- break;
1271
- }
1272
1438
 
1273
- if (ch == LF) {
1274
- CALLBACK(header_value);
1275
- if(header_state == h_general){
1276
- parser->maybe_ml = 1;
1277
- }
1278
- goto header_almost_done;
1279
- }
1439
+ if (ch == CR) {
1440
+ parser->state = s_header_almost_done;
1441
+ CALLBACK_DATA(header_value);
1280
1442
  break;
1281
1443
  }
1282
1444
 
1283
- switch (header_state) {
1445
+ if (ch == LF) {
1446
+ parser->state = s_header_almost_done;
1447
+ CALLBACK_DATA_NOADVANCE(header_value);
1448
+ goto reexecute_byte;
1449
+ }
1450
+
1451
+ c = LOWER(ch);
1452
+
1453
+ switch (parser->header_state) {
1284
1454
  case h_general:
1285
1455
  break;
1286
1456
 
@@ -1290,65 +1460,83 @@ size_t http_parser_execute (http_parser *parser,
1290
1460
  break;
1291
1461
 
1292
1462
  case h_content_length:
1293
- if (ch < '0' || ch > '9') goto error;
1294
- parser->content_length *= 10;
1295
- parser->content_length += ch - '0';
1463
+ {
1464
+ uint64_t t;
1465
+
1466
+ if (ch == ' ') break;
1467
+
1468
+ if (!IS_NUM(ch)) {
1469
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1470
+ goto error;
1471
+ }
1472
+
1473
+ t = parser->content_length;
1474
+ t *= 10;
1475
+ t += ch - '0';
1476
+
1477
+ /* Overflow? */
1478
+ if (t < parser->content_length || t == ULLONG_MAX) {
1479
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1480
+ goto error;
1481
+ }
1482
+
1483
+ parser->content_length = t;
1296
1484
  break;
1485
+ }
1297
1486
 
1298
1487
  /* Transfer-Encoding: chunked */
1299
1488
  case h_matching_transfer_encoding_chunked:
1300
- index++;
1301
- if (index > sizeof(CHUNKED)-1
1302
- || c != CHUNKED[index]) {
1303
- header_state = h_general;
1304
- } else if (index == sizeof(CHUNKED)-2) {
1305
- header_state = h_transfer_encoding_chunked;
1489
+ parser->index++;
1490
+ if (parser->index > sizeof(CHUNKED)-1
1491
+ || c != CHUNKED[parser->index]) {
1492
+ parser->header_state = h_general;
1493
+ } else if (parser->index == sizeof(CHUNKED)-2) {
1494
+ parser->header_state = h_transfer_encoding_chunked;
1306
1495
  }
1307
1496
  break;
1308
1497
 
1309
1498
  /* looking for 'Connection: keep-alive' */
1310
1499
  case h_matching_connection_keep_alive:
1311
- index++;
1312
- if (index > sizeof(KEEP_ALIVE)-1
1313
- || c != KEEP_ALIVE[index]) {
1314
- header_state = h_general;
1315
- } else if (index == sizeof(KEEP_ALIVE)-2) {
1316
- header_state = h_connection_keep_alive;
1500
+ parser->index++;
1501
+ if (parser->index > sizeof(KEEP_ALIVE)-1
1502
+ || c != KEEP_ALIVE[parser->index]) {
1503
+ parser->header_state = h_general;
1504
+ } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1505
+ parser->header_state = h_connection_keep_alive;
1317
1506
  }
1318
1507
  break;
1319
1508
 
1320
1509
  /* looking for 'Connection: close' */
1321
1510
  case h_matching_connection_close:
1322
- index++;
1323
- if (index > sizeof(CLOSE)-1 || c != CLOSE[index]) {
1324
- header_state = h_general;
1325
- } else if (index == sizeof(CLOSE)-2) {
1326
- header_state = h_connection_close;
1511
+ parser->index++;
1512
+ if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1513
+ parser->header_state = h_general;
1514
+ } else if (parser->index == sizeof(CLOSE)-2) {
1515
+ parser->header_state = h_connection_close;
1327
1516
  }
1328
1517
  break;
1329
1518
 
1330
1519
  case h_transfer_encoding_chunked:
1331
1520
  case h_connection_keep_alive:
1332
1521
  case h_connection_close:
1333
- if (ch != ' ') header_state = h_general;
1522
+ if (ch != ' ') parser->header_state = h_general;
1334
1523
  break;
1335
1524
 
1336
1525
  default:
1337
- state = s_header_value;
1338
- header_state = h_general;
1526
+ parser->state = s_header_value;
1527
+ parser->header_state = h_general;
1339
1528
  break;
1340
1529
  }
1341
1530
  break;
1342
1531
  }
1343
1532
 
1344
1533
  case s_header_almost_done:
1345
- header_almost_done:
1346
1534
  {
1347
1535
  STRICT_CHECK(ch != LF);
1348
1536
 
1349
- state = s_header_field_start;
1537
+ parser->state = s_header_value_lws;
1350
1538
 
1351
- switch (header_state) {
1539
+ switch (parser->header_state) {
1352
1540
  case h_connection_keep_alive:
1353
1541
  parser->flags |= F_CONNECTION_KEEP_ALIVE;
1354
1542
  break;
@@ -1361,33 +1549,47 @@ size_t http_parser_execute (http_parser *parser,
1361
1549
  default:
1362
1550
  break;
1363
1551
  }
1552
+
1553
+ break;
1554
+ }
1555
+
1556
+ case s_header_value_lws:
1557
+ {
1558
+ if (ch == ' ' || ch == '\t')
1559
+ parser->state = s_header_value_start;
1560
+ else
1561
+ {
1562
+ parser->state = s_header_field_start;
1563
+ goto reexecute_byte;
1564
+ }
1364
1565
  break;
1365
1566
  }
1366
1567
 
1367
1568
  case s_headers_almost_done:
1368
- headers_almost_done:
1369
1569
  {
1370
- parser->maybe_ml = 0;
1371
1570
  STRICT_CHECK(ch != LF);
1372
1571
 
1373
1572
  if (parser->flags & F_TRAILING) {
1374
1573
  /* End of a chunked request */
1375
- CALLBACK2(message_complete);
1376
- state = NEW_MESSAGE();
1574
+ parser->state = NEW_MESSAGE();
1575
+ CALLBACK_NOTIFY(message_complete);
1377
1576
  break;
1378
1577
  }
1379
1578
 
1380
- nread = 0;
1579
+ parser->state = s_headers_done;
1381
1580
 
1382
- if (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT) {
1383
- parser->upgrade = 1;
1384
- }
1581
+ /* Set this here so that on_headers_complete() callbacks can see it */
1582
+ parser->upgrade =
1583
+ (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT);
1385
1584
 
1386
1585
  /* Here we call the headers_complete callback. This is somewhat
1387
1586
  * different than other callbacks because if the user returns 1, we
1388
1587
  * will interpret that as saying that this message has no body. This
1389
1588
  * is needed for the annoying case of recieving a response to a HEAD
1390
1589
  * request.
1590
+ *
1591
+ * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1592
+ * we have to simulate it by handling a change in errno below.
1391
1593
  */
1392
1594
  if (settings->on_headers_complete) {
1393
1595
  switch (settings->on_headers_complete(parser)) {
@@ -1399,38 +1601,54 @@ size_t http_parser_execute (http_parser *parser,
1399
1601
  break;
1400
1602
 
1401
1603
  default:
1604
+ SET_ERRNO(HPE_CB_headers_complete);
1402
1605
  return p - data; /* Error */
1403
1606
  }
1404
1607
  }
1405
1608
 
1406
- // Exit, the rest of the connect is in a different protocol.
1609
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1610
+ return p - data;
1611
+ }
1612
+
1613
+ goto reexecute_byte;
1614
+ }
1615
+
1616
+ case s_headers_done:
1617
+ {
1618
+ STRICT_CHECK(ch != LF);
1619
+
1620
+ parser->nread = 0;
1621
+
1622
+ /* Exit, the rest of the connect is in a different protocol. */
1407
1623
  if (parser->upgrade) {
1408
- CALLBACK2(message_complete);
1409
- return (p - data);
1624
+ parser->state = NEW_MESSAGE();
1625
+ CALLBACK_NOTIFY(message_complete);
1626
+ return (p - data) + 1;
1410
1627
  }
1411
1628
 
1412
1629
  if (parser->flags & F_SKIPBODY) {
1413
- CALLBACK2(message_complete);
1414
- state = NEW_MESSAGE();
1630
+ parser->state = NEW_MESSAGE();
1631
+ CALLBACK_NOTIFY(message_complete);
1415
1632
  } else if (parser->flags & F_CHUNKED) {
1416
1633
  /* chunked encoding - ignore Content-Length header */
1417
- state = s_chunk_size_start;
1634
+ parser->state = s_chunk_size_start;
1418
1635
  } else {
1419
1636
  if (parser->content_length == 0) {
1420
1637
  /* Content-Length header given but zero: Content-Length: 0\r\n */
1421
- CALLBACK2(message_complete);
1422
- state = NEW_MESSAGE();
1423
- } else if (parser->content_length > 0) {
1638
+ parser->state = NEW_MESSAGE();
1639
+ CALLBACK_NOTIFY(message_complete);
1640
+ } else if (parser->content_length != ULLONG_MAX) {
1424
1641
  /* Content-Length header given and non-zero */
1425
- state = s_body_identity;
1642
+ parser->state = s_body_identity;
1426
1643
  } else {
1427
- if (parser->type == HTTP_REQUEST || http_should_keep_alive(parser)) {
1644
+ if (parser->type == HTTP_REQUEST ||
1645
+ !http_message_needs_eof(parser)) {
1428
1646
  /* Assume content-length 0 - read the next */
1429
- CALLBACK2(message_complete);
1430
- state = NEW_MESSAGE();
1647
+ parser->state = NEW_MESSAGE();
1648
+ CALLBACK_NOTIFY(message_complete);
1431
1649
  } else {
1432
1650
  /* Read body until EOF */
1433
- state = s_body_identity_eof;
1651
+ parser->state = s_body_identity_eof;
1434
1652
  }
1435
1653
  }
1436
1654
  }
@@ -1439,59 +1657,103 @@ size_t http_parser_execute (http_parser *parser,
1439
1657
  }
1440
1658
 
1441
1659
  case s_body_identity:
1442
- to_read = MIN(pe - p, (int64_t)parser->content_length);
1443
- if (to_read > 0) {
1444
- if (settings->on_body) settings->on_body(parser, p, to_read, 0);
1445
- p += to_read - 1;
1446
- parser->content_length -= to_read;
1447
- if (parser->content_length == 0) {
1448
- CALLBACK2(message_complete);
1449
- state = NEW_MESSAGE();
1450
- }
1660
+ {
1661
+ uint64_t to_read = MIN(parser->content_length,
1662
+ (uint64_t) ((data + len) - p));
1663
+
1664
+ assert(parser->content_length != 0
1665
+ && parser->content_length != ULLONG_MAX);
1666
+
1667
+ /* The difference between advancing content_length and p is because
1668
+ * the latter will automaticaly advance on the next loop iteration.
1669
+ * Further, if content_length ends up at 0, we want to see the last
1670
+ * byte again for our message complete callback.
1671
+ */
1672
+ MARK(body);
1673
+ parser->content_length -= to_read;
1674
+ p += to_read - 1;
1675
+
1676
+ if (parser->content_length == 0) {
1677
+ parser->state = s_message_done;
1678
+
1679
+ /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1680
+ *
1681
+ * The alternative to doing this is to wait for the next byte to
1682
+ * trigger the data callback, just as in every other case. The
1683
+ * problem with this is that this makes it difficult for the test
1684
+ * harness to distinguish between complete-on-EOF and
1685
+ * complete-on-length. It's not clear that this distinction is
1686
+ * important for applications, but let's keep it for now.
1687
+ */
1688
+ CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1689
+ goto reexecute_byte;
1451
1690
  }
1691
+
1452
1692
  break;
1693
+ }
1453
1694
 
1454
1695
  /* read until EOF */
1455
1696
  case s_body_identity_eof:
1456
- to_read = pe - p;
1457
- if (to_read > 0) {
1458
- if (settings->on_body) settings->on_body(parser, p, to_read, 0);
1459
- p += to_read - 1;
1460
- }
1697
+ MARK(body);
1698
+ p = data + len - 1;
1699
+
1700
+ break;
1701
+
1702
+ case s_message_done:
1703
+ parser->state = NEW_MESSAGE();
1704
+ CALLBACK_NOTIFY(message_complete);
1461
1705
  break;
1462
1706
 
1463
1707
  case s_chunk_size_start:
1464
1708
  {
1709
+ assert(parser->nread == 1);
1465
1710
  assert(parser->flags & F_CHUNKED);
1466
1711
 
1467
- c = unhex[(unsigned char)ch];
1468
- if (c == -1) goto error;
1469
- parser->content_length = c;
1470
- state = s_chunk_size;
1712
+ unhex_val = unhex[(unsigned char)ch];
1713
+ if (unhex_val == -1) {
1714
+ SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1715
+ goto error;
1716
+ }
1717
+
1718
+ parser->content_length = unhex_val;
1719
+ parser->state = s_chunk_size;
1471
1720
  break;
1472
1721
  }
1473
1722
 
1474
1723
  case s_chunk_size:
1475
1724
  {
1725
+ uint64_t t;
1726
+
1476
1727
  assert(parser->flags & F_CHUNKED);
1477
1728
 
1478
1729
  if (ch == CR) {
1479
- state = s_chunk_size_almost_done;
1730
+ parser->state = s_chunk_size_almost_done;
1480
1731
  break;
1481
1732
  }
1482
1733
 
1483
- c = unhex[(unsigned char)ch];
1734
+ unhex_val = unhex[(unsigned char)ch];
1484
1735
 
1485
- if (c == -1) {
1736
+ if (unhex_val == -1) {
1486
1737
  if (ch == ';' || ch == ' ') {
1487
- state = s_chunk_parameters;
1738
+ parser->state = s_chunk_parameters;
1488
1739
  break;
1489
1740
  }
1741
+
1742
+ SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1743
+ goto error;
1744
+ }
1745
+
1746
+ t = parser->content_length;
1747
+ t *= 16;
1748
+ t += unhex_val;
1749
+
1750
+ /* Overflow? */
1751
+ if (t < parser->content_length || t == ULLONG_MAX) {
1752
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1490
1753
  goto error;
1491
1754
  }
1492
1755
 
1493
- parser->content_length *= 16;
1494
- parser->content_length += c;
1756
+ parser->content_length = t;
1495
1757
  break;
1496
1758
  }
1497
1759
 
@@ -1500,7 +1762,7 @@ size_t http_parser_execute (http_parser *parser,
1500
1762
  assert(parser->flags & F_CHUNKED);
1501
1763
  /* just ignore this shit. TODO check for overflow */
1502
1764
  if (ch == CR) {
1503
- state = s_chunk_size_almost_done;
1765
+ parser->state = s_chunk_size_almost_done;
1504
1766
  break;
1505
1767
  }
1506
1768
  break;
@@ -1511,105 +1773,418 @@ size_t http_parser_execute (http_parser *parser,
1511
1773
  assert(parser->flags & F_CHUNKED);
1512
1774
  STRICT_CHECK(ch != LF);
1513
1775
 
1776
+ parser->nread = 0;
1777
+
1514
1778
  if (parser->content_length == 0) {
1515
1779
  parser->flags |= F_TRAILING;
1516
- state = s_header_field_start;
1780
+ parser->state = s_header_field_start;
1517
1781
  } else {
1518
- state = s_chunk_data;
1782
+ parser->state = s_chunk_data;
1519
1783
  }
1520
1784
  break;
1521
1785
  }
1522
1786
 
1523
1787
  case s_chunk_data:
1524
1788
  {
1525
- assert(parser->flags & F_CHUNKED);
1789
+ uint64_t to_read = MIN(parser->content_length,
1790
+ (uint64_t) ((data + len) - p));
1526
1791
 
1527
- to_read = MIN(pe - p, (int64_t)(parser->content_length));
1792
+ assert(parser->flags & F_CHUNKED);
1793
+ assert(parser->content_length != 0
1794
+ && parser->content_length != ULLONG_MAX);
1528
1795
 
1529
- if (to_read > 0) {
1530
- if (settings->on_body) settings->on_body(parser, p, to_read, 0);
1531
- p += to_read - 1;
1532
- }
1796
+ /* See the explanation in s_body_identity for why the content
1797
+ * length and data pointers are managed this way.
1798
+ */
1799
+ MARK(body);
1800
+ parser->content_length -= to_read;
1801
+ p += to_read - 1;
1533
1802
 
1534
- if (to_read == parser->content_length) {
1535
- state = s_chunk_data_almost_done;
1803
+ if (parser->content_length == 0) {
1804
+ parser->state = s_chunk_data_almost_done;
1536
1805
  }
1537
1806
 
1538
- parser->content_length -= to_read;
1539
1807
  break;
1540
1808
  }
1541
1809
 
1542
1810
  case s_chunk_data_almost_done:
1543
1811
  assert(parser->flags & F_CHUNKED);
1812
+ assert(parser->content_length == 0);
1544
1813
  STRICT_CHECK(ch != CR);
1545
- state = s_chunk_data_done;
1814
+ parser->state = s_chunk_data_done;
1815
+ CALLBACK_DATA(body);
1546
1816
  break;
1547
1817
 
1548
1818
  case s_chunk_data_done:
1549
1819
  assert(parser->flags & F_CHUNKED);
1550
1820
  STRICT_CHECK(ch != LF);
1551
- state = s_chunk_size_start;
1821
+ parser->nread = 0;
1822
+ parser->state = s_chunk_size_start;
1552
1823
  break;
1553
1824
 
1554
1825
  default:
1555
1826
  assert(0 && "unhandled state");
1827
+ SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
1556
1828
  goto error;
1557
1829
  }
1558
1830
  }
1559
1831
 
1560
- CALLBACK_NOCLEAR(header_field);
1561
- CALLBACK_NOCLEAR(header_value);
1562
- CALLBACK_NOCLEAR(fragment);
1563
- CALLBACK_NOCLEAR(query_string);
1564
- CALLBACK_NOCLEAR(path);
1565
- CALLBACK_NOCLEAR(url);
1832
+ /* Run callbacks for any marks that we have leftover after we ran our of
1833
+ * bytes. There should be at most one of these set, so it's OK to invoke
1834
+ * them in series (unset marks will not result in callbacks).
1835
+ *
1836
+ * We use the NOADVANCE() variety of callbacks here because 'p' has already
1837
+ * overflowed 'data' and this allows us to correct for the off-by-one that
1838
+ * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
1839
+ * value that's in-bounds).
1840
+ */
1841
+
1842
+ assert(((header_field_mark ? 1 : 0) +
1843
+ (header_value_mark ? 1 : 0) +
1844
+ (url_mark ? 1 : 0) +
1845
+ (body_mark ? 1 : 0)) <= 1);
1566
1846
 
1567
- parser->state = state;
1568
- parser->header_state = header_state;
1569
- parser->index = index;
1570
- parser->nread = nread;
1847
+ CALLBACK_DATA_NOADVANCE(header_field);
1848
+ CALLBACK_DATA_NOADVANCE(header_value);
1849
+ CALLBACK_DATA_NOADVANCE(url);
1850
+ CALLBACK_DATA_NOADVANCE(body);
1571
1851
 
1572
1852
  return len;
1573
1853
 
1574
1854
  error:
1855
+ if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
1856
+ SET_ERRNO(HPE_UNKNOWN);
1857
+ }
1858
+
1575
1859
  return (p - data);
1576
1860
  }
1577
1861
 
1578
1862
 
1863
+ /* Does the parser need to see an EOF to find the end of the message? */
1864
+ int
1865
+ http_message_needs_eof (const http_parser *parser)
1866
+ {
1867
+ if (parser->type == HTTP_REQUEST) {
1868
+ return 0;
1869
+ }
1870
+
1871
+ /* See RFC 2616 section 4.4 */
1872
+ if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
1873
+ parser->status_code == 204 || /* No Content */
1874
+ parser->status_code == 304 || /* Not Modified */
1875
+ parser->flags & F_SKIPBODY) { /* response to a HEAD request */
1876
+ return 0;
1877
+ }
1878
+
1879
+ if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
1880
+ return 0;
1881
+ }
1882
+
1883
+ return 1;
1884
+ }
1885
+
1886
+
1579
1887
  int
1580
- http_should_keep_alive (http_parser *parser)
1888
+ http_should_keep_alive (const http_parser *parser)
1581
1889
  {
1582
1890
  if (parser->http_major > 0 && parser->http_minor > 0) {
1583
1891
  /* HTTP/1.1 */
1584
1892
  if (parser->flags & F_CONNECTION_CLOSE) {
1585
1893
  return 0;
1586
- } else {
1587
- return 1;
1588
1894
  }
1589
1895
  } else {
1590
1896
  /* HTTP/1.0 or earlier */
1591
- if (parser->flags & F_CONNECTION_KEEP_ALIVE) {
1592
- return 1;
1593
- } else {
1897
+ if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
1594
1898
  return 0;
1595
1899
  }
1596
1900
  }
1901
+
1902
+ return !http_message_needs_eof(parser);
1597
1903
  }
1598
1904
 
1599
1905
 
1600
- const char * http_method_str (enum http_method m)
1906
+ const char *
1907
+ http_method_str (enum http_method m)
1601
1908
  {
1602
- return method_strings[m];
1909
+ return ELEM_AT(method_strings, m, "<unknown>");
1603
1910
  }
1604
1911
 
1605
1912
 
1606
1913
  void
1607
1914
  http_parser_init (http_parser *parser, enum http_parser_type t)
1608
1915
  {
1916
+ void *data = parser->data; /* preserve application data */
1917
+ memset(parser, 0, sizeof(*parser));
1918
+ parser->data = data;
1609
1919
  parser->type = t;
1610
1920
  parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
1611
- parser->nread = 0;
1612
- parser->upgrade = 0;
1613
- parser->flags = 0;
1614
- parser->method = 0;
1921
+ parser->http_errno = HPE_OK;
1922
+ }
1923
+
1924
+ const char *
1925
+ http_errno_name(enum http_errno err) {
1926
+ assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1927
+ return http_strerror_tab[err].name;
1928
+ }
1929
+
1930
+ const char *
1931
+ http_errno_description(enum http_errno err) {
1932
+ assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1933
+ return http_strerror_tab[err].description;
1934
+ }
1935
+
1936
+ static enum http_host_state
1937
+ http_parse_host_char(enum http_host_state s, const char ch) {
1938
+ switch(s) {
1939
+ case s_http_userinfo:
1940
+ case s_http_userinfo_start:
1941
+ if (ch == '@') {
1942
+ return s_http_host_start;
1943
+ }
1944
+
1945
+ if (IS_USERINFO_CHAR(ch)) {
1946
+ return s_http_userinfo;
1947
+ }
1948
+ break;
1949
+
1950
+ case s_http_host_start:
1951
+ if (ch == '[') {
1952
+ return s_http_host_v6_start;
1953
+ }
1954
+
1955
+ if (IS_HOST_CHAR(ch)) {
1956
+ return s_http_host;
1957
+ }
1958
+
1959
+ break;
1960
+
1961
+ case s_http_host:
1962
+ if (IS_HOST_CHAR(ch)) {
1963
+ return s_http_host;
1964
+ }
1965
+
1966
+ /* FALLTHROUGH */
1967
+ case s_http_host_v6_end:
1968
+ if (ch == ':') {
1969
+ return s_http_host_port_start;
1970
+ }
1971
+
1972
+ break;
1973
+
1974
+ case s_http_host_v6:
1975
+ if (ch == ']') {
1976
+ return s_http_host_v6_end;
1977
+ }
1978
+
1979
+ /* FALLTHROUGH */
1980
+ case s_http_host_v6_start:
1981
+ if (IS_HEX(ch) || ch == ':') {
1982
+ return s_http_host_v6;
1983
+ }
1984
+
1985
+ break;
1986
+
1987
+ case s_http_host_port:
1988
+ case s_http_host_port_start:
1989
+ if (IS_NUM(ch)) {
1990
+ return s_http_host_port;
1991
+ }
1992
+
1993
+ break;
1994
+
1995
+ default:
1996
+ break;
1997
+ }
1998
+ return s_http_host_dead;
1999
+ }
2000
+
2001
+ static int
2002
+ http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2003
+ enum http_host_state s;
2004
+
2005
+ const char *p;
2006
+ size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
2007
+
2008
+ u->field_data[UF_HOST].len = 0;
2009
+
2010
+ s = found_at ? s_http_userinfo_start : s_http_host_start;
2011
+
2012
+ for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2013
+ enum http_host_state new_s = http_parse_host_char(s, *p);
2014
+
2015
+ if (new_s == s_http_host_dead) {
2016
+ return 1;
2017
+ }
2018
+
2019
+ switch(new_s) {
2020
+ case s_http_host:
2021
+ if (s != s_http_host) {
2022
+ u->field_data[UF_HOST].off = p - buf;
2023
+ }
2024
+ u->field_data[UF_HOST].len++;
2025
+ break;
2026
+
2027
+ case s_http_host_v6:
2028
+ if (s != s_http_host_v6) {
2029
+ u->field_data[UF_HOST].off = p - buf;
2030
+ }
2031
+ u->field_data[UF_HOST].len++;
2032
+ break;
2033
+
2034
+ case s_http_host_port:
2035
+ if (s != s_http_host_port) {
2036
+ u->field_data[UF_PORT].off = p - buf;
2037
+ u->field_data[UF_PORT].len = 0;
2038
+ u->field_set |= (1 << UF_PORT);
2039
+ }
2040
+ u->field_data[UF_PORT].len++;
2041
+ break;
2042
+
2043
+ case s_http_userinfo:
2044
+ if (s != s_http_userinfo) {
2045
+ u->field_data[UF_USERINFO].off = p - buf ;
2046
+ u->field_data[UF_USERINFO].len = 0;
2047
+ u->field_set |= (1 << UF_USERINFO);
2048
+ }
2049
+ u->field_data[UF_USERINFO].len++;
2050
+ break;
2051
+
2052
+ default:
2053
+ break;
2054
+ }
2055
+ s = new_s;
2056
+ }
2057
+
2058
+ /* Make sure we don't end somewhere unexpected */
2059
+ switch (s) {
2060
+ case s_http_host_start:
2061
+ case s_http_host_v6_start:
2062
+ case s_http_host_v6:
2063
+ case s_http_host_port_start:
2064
+ case s_http_userinfo:
2065
+ case s_http_userinfo_start:
2066
+ return 1;
2067
+ default:
2068
+ break;
2069
+ }
2070
+
2071
+ return 0;
2072
+ }
2073
+
2074
+ int
2075
+ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2076
+ struct http_parser_url *u)
2077
+ {
2078
+ enum state s;
2079
+ const char *p;
2080
+ enum http_parser_url_fields uf, old_uf;
2081
+ int found_at = 0;
2082
+
2083
+ u->port = u->field_set = 0;
2084
+ s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2085
+ uf = old_uf = UF_MAX;
2086
+
2087
+ for (p = buf; p < buf + buflen; p++) {
2088
+ s = parse_url_char(s, *p);
2089
+
2090
+ /* Figure out the next field that we're operating on */
2091
+ switch (s) {
2092
+ case s_dead:
2093
+ return 1;
2094
+
2095
+ /* Skip delimeters */
2096
+ case s_req_schema_slash:
2097
+ case s_req_schema_slash_slash:
2098
+ case s_req_server_start:
2099
+ case s_req_query_string_start:
2100
+ case s_req_fragment_start:
2101
+ continue;
2102
+
2103
+ case s_req_schema:
2104
+ uf = UF_SCHEMA;
2105
+ break;
2106
+
2107
+ case s_req_server_with_at:
2108
+ found_at = 1;
2109
+
2110
+ /* FALLTROUGH */
2111
+ case s_req_server:
2112
+ uf = UF_HOST;
2113
+ break;
2114
+
2115
+ case s_req_path:
2116
+ uf = UF_PATH;
2117
+ break;
2118
+
2119
+ case s_req_query_string:
2120
+ uf = UF_QUERY;
2121
+ break;
2122
+
2123
+ case s_req_fragment:
2124
+ uf = UF_FRAGMENT;
2125
+ break;
2126
+
2127
+ default:
2128
+ assert(!"Unexpected state");
2129
+ return 1;
2130
+ }
2131
+
2132
+ /* Nothing's changed; soldier on */
2133
+ if (uf == old_uf) {
2134
+ u->field_data[uf].len++;
2135
+ continue;
2136
+ }
2137
+
2138
+ u->field_data[uf].off = p - buf;
2139
+ u->field_data[uf].len = 1;
2140
+
2141
+ u->field_set |= (1 << uf);
2142
+ old_uf = uf;
2143
+ }
2144
+
2145
+ /* host must be present if there is a schema */
2146
+ /* parsing http:///toto will fail */
2147
+ if ((u->field_set & ((1 << UF_SCHEMA) | (1 << UF_HOST))) != 0) {
2148
+ if (http_parse_host(buf, u, found_at) != 0) {
2149
+ return 1;
2150
+ }
2151
+ }
2152
+
2153
+ /* CONNECT requests can only contain "hostname:port" */
2154
+ if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2155
+ return 1;
2156
+ }
2157
+
2158
+ if (u->field_set & (1 << UF_PORT)) {
2159
+ /* Don't bother with endp; we've already validated the string */
2160
+ unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
2161
+
2162
+ /* Ports have a max value of 2^16 */
2163
+ if (v > 0xffff) {
2164
+ return 1;
2165
+ }
2166
+
2167
+ u->port = (uint16_t) v;
2168
+ }
2169
+
2170
+ return 0;
2171
+ }
2172
+
2173
+ void
2174
+ http_parser_pause(http_parser *parser, int paused) {
2175
+ /* Users should only be pausing/unpausing a parser that is not in an error
2176
+ * state. In non-debug builds, there's not much that we can do about this
2177
+ * other than ignore it.
2178
+ */
2179
+ if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2180
+ HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2181
+ SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2182
+ } else {
2183
+ assert(0 && "Attempting to pause parser in error state");
2184
+ }
2185
+ }
2186
+
2187
+ int
2188
+ http_body_is_final(const struct http_parser *parser) {
2189
+ return parser->state == s_message_done;
1615
2190
  }