bossan 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,12 +2,12 @@
2
2
 
3
3
  #define LIMIT_MAX 1024 * 1024 * 1024
4
4
 
5
- buffer *
5
+ buffer_t *
6
6
  new_buffer(size_t buf_size, size_t limit)
7
7
  {
8
- buffer *buf;
9
- buf = ruby_xmalloc(sizeof(buffer));
10
- memset(buf, 0, sizeof(buffer));
8
+ buffer_t *buf;
9
+ buf = ruby_xmalloc(sizeof(buffer_t));
10
+ memset(buf, 0, sizeof(buffer_t));
11
11
  buf->buf = ruby_xmalloc(sizeof(char) * buf_size);
12
12
  buf->buf_size = buf_size;
13
13
  if(limit){
@@ -20,7 +20,7 @@ new_buffer(size_t buf_size, size_t limit)
20
20
 
21
21
 
22
22
  buffer_result
23
- write2buf(buffer *buf, const char *c, size_t l)
23
+ write2buf(buffer_t *buf, const char *c, size_t l)
24
24
  {
25
25
  size_t newl;
26
26
  char *newbuf;
@@ -49,7 +49,7 @@ write2buf(buffer *buf, const char *c, size_t l)
49
49
 
50
50
 
51
51
  void
52
- free_buffer(buffer *buf)
52
+ free_buffer(buffer_t *buf)
53
53
  {
54
54
  ruby_xfree(buf->buf);
55
55
  ruby_xfree(buf);
@@ -57,7 +57,7 @@ free_buffer(buffer *buf)
57
57
 
58
58
 
59
59
  VALUE
60
- getRbString(buffer *buf)
60
+ getRbString(buffer_t *buf)
61
61
  {
62
62
  VALUE o;
63
63
  o = rb_str_new(buf->buf, buf->len);
@@ -67,7 +67,7 @@ getRbString(buffer *buf)
67
67
 
68
68
 
69
69
  char *
70
- getString(buffer *buf)
70
+ getString(buffer_t *buf)
71
71
  {
72
72
  buf->buf[buf->len] = '\0';
73
73
  return buf->buf;
@@ -18,26 +18,31 @@ typedef struct {
18
18
  size_t buf_size;
19
19
  size_t len;
20
20
  size_t limit;
21
- } buffer;
21
+ } buffer_t;
22
22
 
23
- buffer *
23
+ buffer_t *
24
24
  new_buffer(size_t buf_size, size_t limit);
25
25
 
26
26
  buffer_result
27
- write2buf(buffer *buf, const char *c, size_t l);
27
+ write2buf(buffer_t *buf, const char *c, size_t l);
28
28
 
29
29
  void
30
- free_buffer(buffer *buf);
30
+ free_buffer(buffer_t *buf);
31
31
 
32
32
  VALUE
33
- getRbString(buffer *buf);
33
+ getRbString(buffer_t *buf);
34
34
 
35
35
  char *
36
- getString(buffer *buf);
37
-
36
+ getString(buffer_t *buf);
38
37
 
39
38
  #endif
40
39
 
41
40
 
42
41
 
43
42
 
43
+
44
+
45
+
46
+
47
+
48
+
@@ -7,24 +7,23 @@ typedef struct _client {
7
7
  int fd;
8
8
  char *remote_addr;
9
9
  uint32_t remote_port;
10
+
10
11
  uint8_t keep_alive;
11
- request *req;
12
- uint32_t body_length;
13
- int body_readed;
14
- void *body;
15
- int bad_request_code;
16
- request_body_type body_type;
17
- uint8_t complete;
12
+ char upgrade;
13
+ request *current_req;
14
+ request_queue *request_queue;
15
+
16
+ char complete;
18
17
 
19
- http_parser *http; // http req parser
20
- VALUE environ; // rack environ
21
- int status_code; // response status code
18
+ http_parser *http_parser; // http req parser
19
+ uint16_t status_code; // response status code
22
20
 
23
21
  VALUE http_status; // response status line
24
22
  VALUE headers; // http response headers
25
23
  uint8_t header_done; // header write status
26
24
  VALUE response; // rack response object
27
25
  VALUE response_iter; // rack response object
26
+ uint8_t chunked_response; // use Transfer-Encoding: chunked
28
27
  uint8_t content_length_set; // content_length_set flag
29
28
  uint32_t content_length; // content_length
30
29
  uint32_t write_bytes; // send body length
@@ -1,4 +1,7 @@
1
- /* Copyright 2009,2010 Ryan Dahl <ry@tinyclouds.org>
1
+ /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
2
+ *
3
+ * Additional changes are licensed under the same terms as NGINX and
4
+ * copyright Joyent, Inc. and other Node contributors. All rights reserved.
2
5
  *
3
6
  * Permission is hereby granted, free of charge, to any person obtaining a copy
4
7
  * of this software and associated documentation files (the "Software"), to
@@ -19,70 +22,114 @@
19
22
  * IN THE SOFTWARE.
20
23
  */
21
24
  #include "http_parser.h"
22
- #ifdef _WIN32
23
- typedef __int8 int8_t;
24
- typedef unsigned __int8 uint8_t;
25
- typedef __int16 int16_t;
26
- typedef unsigned __int16 uint16_t;
27
- typedef __int16 int32_t;
28
- typedef unsigned __int32 uint32_t;
29
- #else
30
- #include <stdint.h>
31
- #endif
32
25
  #include <assert.h>
33
26
  #include <stddef.h>
27
+ #include <ctype.h>
28
+ #include <stdlib.h>
29
+ #include <string.h>
30
+ #include <limits.h>
31
+
32
+ #if __GNUC__ >= 3
33
+ # define likely(x) __builtin_expect(!!(x), 1)
34
+ # define unlikely(x) __builtin_expect(!!(x), 0)
35
+ #else
36
+ # define likely(x) (x)
37
+ # define unlikely(x) (x)
38
+ #endif
34
39
 
40
+ #ifndef ULLONG_MAX
41
+ # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
42
+ #endif
35
43
 
36
44
  #ifndef MIN
37
45
  # define MIN(a,b) ((a) < (b) ? (a) : (b))
38
46
  #endif
39
47
 
48
+ #ifndef ARRAY_SIZE
49
+ # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
50
+ #endif
40
51
 
41
- #define CALLBACK2(FOR) \
42
- do { \
43
- if (settings->on_##FOR) { \
44
- if (0 != settings->on_##FOR(parser)) return (p - data); \
45
- } \
46
- } while (0)
52
+ #ifndef BIT_AT
53
+ # define BIT_AT(a, i) \
54
+ (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
55
+ (1 << ((unsigned int) (i) & 7))))
56
+ #endif
47
57
 
58
+ #ifndef ELEM_AT
59
+ # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
60
+ #endif
48
61
 
49
- #define MARK(FOR) \
62
+ #if HTTP_PARSER_DEBUG
63
+ #define SET_ERRNO(e) \
50
64
  do { \
51
- FOR##_mark = p; \
65
+ parser->http_errno = (e); \
66
+ parser->error_lineno = __LINE__; \
52
67
  } while (0)
68
+ #else
69
+ #define SET_ERRNO(e) \
70
+ do { \
71
+ parser->http_errno = (e); \
72
+ } while(0)
73
+ #endif
53
74
 
54
- #define CALLBACK_NOCLEAR(FOR) \
75
+
76
+ /* Run the notify callback FOR, returning ER if it fails */
77
+ #define CALLBACK_NOTIFY_(FOR, ER) \
55
78
  do { \
56
- if (FOR##_mark) { \
57
- if (settings->on_##FOR) { \
58
- if (0 != settings->on_##FOR(parser, \
59
- FOR##_mark, \
60
- p - FOR##_mark, 1)) \
61
- { \
62
- return (p - data); \
63
- } \
79
+ assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
80
+ \
81
+ if ( likely(settings->on_##FOR) ) { \
82
+ if (0 != settings->on_##FOR(parser)) { \
83
+ SET_ERRNO(HPE_CB_##FOR); \
84
+ } \
85
+ \
86
+ /* We either errored above or got paused; get out */ \
87
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
88
+ return (ER); \
64
89
  } \
65
90
  } \
66
91
  } while (0)
67
92
 
68
- #define CALLBACK_CLEAR(FOR) \
93
+ /* Run the notify callback FOR and consume the current byte */
94
+ #define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1)
95
+
96
+ /* Run the notify callback FOR and don't consume the current byte */
97
+ #define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data)
98
+
99
+ /* Run data callback FOR with LEN bytes, returning ER if it fails */
100
+ #define CALLBACK_DATA_(FOR, LEN, ER) \
69
101
  do { \
70
- if (FOR##_mark) { \
71
- if (settings->on_##FOR) { \
72
- if (0 != settings->on_##FOR(parser, \
73
- FOR##_mark, \
74
- p - FOR##_mark, 0)) \
75
- { \
76
- return (p - data); \
102
+ assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
103
+ \
104
+ if (likely(FOR##_mark)) { \
105
+ if (likely(settings->on_##FOR)) { \
106
+ if (0 != settings->on_##FOR(parser, FOR##_mark, (LEN))) { \
107
+ SET_ERRNO(HPE_CB_##FOR); \
108
+ } \
109
+ \
110
+ /* We either errored above or got paused; get out */ \
111
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
112
+ return (ER); \
77
113
  } \
78
114
  } \
115
+ FOR##_mark = NULL; \
79
116
  } \
80
117
  } while (0)
118
+
119
+ /* Run the data callback FOR and consume the current byte */
120
+ #define CALLBACK_DATA(FOR) \
121
+ CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
122
+
123
+ /* Run the data callback FOR and don't consume the current byte */
124
+ #define CALLBACK_DATA_NOADVANCE(FOR) \
125
+ CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
81
126
 
82
- #define CALLBACK(FOR) \
127
+ /* Set the mark FOR; non-destructive if mark is already set */
128
+ #define MARK(FOR) \
83
129
  do { \
84
- CALLBACK_CLEAR(FOR); \
85
- FOR##_mark = NULL; \
130
+ if (!FOR##_mark) { \
131
+ FOR##_mark = p; \
132
+ } \
86
133
  } while (0)
87
134
 
88
135
 
@@ -97,31 +144,21 @@ do { \
97
144
 
98
145
 
99
146
  static const char *method_strings[] =
100
- { "DELETE"
101
- , "GET"
102
- , "HEAD"
103
- , "POST"
104
- , "PUT"
105
- , "CONNECT"
106
- , "OPTIONS"
107
- , "TRACE"
108
- , "COPY"
109
- , "LOCK"
110
- , "MKCOL"
111
- , "MOVE"
112
- , "PROPFIND"
113
- , "PROPPATCH"
114
- , "UNLOCK"
115
- , "REPORT"
116
- , "MKACTIVITY"
117
- , "CHECKOUT"
118
- , "MERGE"
147
+ {
148
+ #define XX(num, name, string) #string,
149
+ HTTP_METHOD_MAP(XX)
150
+ #undef XX
119
151
  };
120
152
 
121
153
 
122
- /* ' ', '_', '-' and all alpha-numeric ascii characters are accepted by acceptable_header.
123
- The 'A'-'Z' are lower-cased. */
124
- static const char acceptable_header[256] = {
154
+ /* Tokens as defined by rfc 2616. Also lowercases them.
155
+ * token = 1*<any CHAR except CTLs or separators>
156
+ * separators = "(" | ")" | "<" | ">" | "@"
157
+ * | "," | ";" | ":" | "\" | <">
158
+ * | "/" | "[" | "]" | "?" | "="
159
+ * | "{" | "}" | SP | HT
160
+ */
161
+ static const char tokens[256] = {
125
162
  /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
126
163
  0, 0, 0, 0, 0, 0, 0, 0,
127
164
  /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
@@ -131,9 +168,9 @@ static const char acceptable_header[256] = {
131
168
  /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
132
169
  0, 0, 0, 0, 0, 0, 0, 0,
133
170
  /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
134
- ' ', 0, 0, 0, 0, 0, 0, 0,
171
+ 0, '!', 0, '#', '$', '%', '&', '\'',
135
172
  /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
136
- 0, 0, 0, 0, 0, '-', 0, 0,
173
+ 0, 0, '*', '+', 0, '-', '.', 0,
137
174
  /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
138
175
  '0', '1', '2', '3', '4', '5', '6', '7',
139
176
  /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
@@ -145,15 +182,15 @@ static const char acceptable_header[256] = {
145
182
  /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
146
183
  'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
147
184
  /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
148
- 'x', 'y', 'z', 0, 0, 0, 0, '_',
185
+ 'x', 'y', 'z', 0, 0, 0, '^', '_',
149
186
  /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
150
- 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
187
+ '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
151
188
  /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
152
189
  'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
153
190
  /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
154
191
  'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
155
192
  /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
156
- 'x', 'y', 'z', 0, 0, 0, 0, 0 };
193
+ 'x', 'y', 'z', 0, '|', 0, '~', 0 };
157
194
 
158
195
 
159
196
  static const int8_t unhex[256] =
@@ -168,40 +205,48 @@ static const int8_t unhex[256] =
168
205
  };
169
206
 
170
207
 
171
- static const uint8_t normal_url_char[256] = {
208
+ #if HTTP_PARSER_STRICT
209
+ # define T(v) 0
210
+ #else
211
+ # define T(v) v
212
+ #endif
213
+
214
+
215
+ static const uint8_t normal_url_char[32] = {
172
216
  /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
173
- 0, 0, 0, 0, 0, 0, 0, 0,
217
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
174
218
  /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
175
- 0, 0, 0, 0, 0, 0, 0, 0,
219
+ 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
176
220
  /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
177
- 0, 0, 0, 0, 0, 0, 0, 0,
221
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
178
222
  /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
179
- 0, 0, 0, 0, 0, 0, 0, 0,
223
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
180
224
  /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
181
- 0, 1, 1, 0, 1, 1, 1, 1,
225
+ 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
182
226
  /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
183
- 1, 1, 1, 1, 1, 1, 1, 1,
227
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
184
228
  /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
185
- 1, 1, 1, 1, 1, 1, 1, 1,
229
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
186
230
  /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
187
- 1, 1, 1, 1, 1, 1, 1, 0,
231
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
188
232
  /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
189
- 1, 1, 1, 1, 1, 1, 1, 1,
233
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
190
234
  /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
191
- 1, 1, 1, 1, 1, 1, 1, 1,
235
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
192
236
  /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
193
- 1, 1, 1, 1, 1, 1, 1, 1,
237
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
194
238
  /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
195
- 1, 1, 1, 1, 1, 1, 1, 1,
239
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
196
240
  /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
197
- 1, 1, 1, 1, 1, 1, 1, 1,
241
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
198
242
  /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
199
- 1, 1, 1, 1, 1, 1, 1, 1,
243
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
200
244
  /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
201
- 1, 1, 1, 1, 1, 1, 1, 1,
245
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
202
246
  /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
203
- 1, 1, 1, 1, 1, 1, 1, 0 };
247
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
204
248
 
249
+ #undef T
205
250
 
206
251
  enum state
207
252
  { s_dead = 1 /* important that this is > 0 */
@@ -229,8 +274,9 @@ enum state
229
274
  , s_req_schema
230
275
  , s_req_schema_slash
231
276
  , s_req_schema_slash_slash
232
- , s_req_host
233
- , s_req_port
277
+ , s_req_server_start
278
+ , s_req_server
279
+ , s_req_server_with_at
234
280
  , s_req_path
235
281
  , s_req_query_string_start
236
282
  , s_req_query_string
@@ -251,28 +297,35 @@ enum state
251
297
  , s_header_field
252
298
  , s_header_value_start
253
299
  , s_header_value
300
+ , s_header_value_lws
254
301
 
255
302
  , s_header_almost_done
256
303
 
304
+ , s_chunk_size_start
305
+ , s_chunk_size
306
+ , s_chunk_parameters
307
+ , s_chunk_size_almost_done
308
+
257
309
  , s_headers_almost_done
258
- /* Important: 's_headers_almost_done' must be the last 'header' state. All
310
+ , s_headers_done
311
+
312
+ /* Important: 's_headers_done' must be the last 'header' state. All
259
313
  * states beyond this must be 'body' states. It is used for overflow
260
314
  * checking. See the PARSING_HEADER() macro.
261
315
  */
262
- , s_chunk_size_start
263
- , s_chunk_size
264
- , s_chunk_size_almost_done
265
- , s_chunk_parameters
316
+
266
317
  , s_chunk_data
267
318
  , s_chunk_data_almost_done
268
319
  , s_chunk_data_done
269
320
 
270
321
  , s_body_identity
271
322
  , s_body_identity_eof
323
+
324
+ , s_message_done
272
325
  };
273
326
 
274
327
 
275
- #define PARSING_HEADER(state) (state <= s_headers_almost_done && 0 == (parser->flags & F_TRAILING))
328
+ #define PARSING_HEADER(state) (state <= s_headers_done)
276
329
 
277
330
 
278
331
  enum header_states
@@ -301,27 +354,59 @@ enum header_states
301
354
  , h_connection_close
302
355
  };
303
356
 
357
+ enum http_host_state
358
+ {
359
+ s_http_host_dead = 1
360
+ , s_http_userinfo_start
361
+ , s_http_userinfo
362
+ , s_http_host_start
363
+ , s_http_host_v6_start
364
+ , s_http_host
365
+ , s_http_host_v6
366
+ , s_http_host_v6_end
367
+ , s_http_host_port_start
368
+ , s_http_host_port
369
+ };
370
+
371
+ /* Macros for character classes; depends on strict-mode */
372
+ #define CR '\r'
373
+ #define LF '\n'
374
+ #define LOWER(c) (unsigned char)(c | 0x20)
375
+ #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
376
+ #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
377
+ #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
378
+ #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
379
+ #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
380
+ (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
381
+ (c) == ')')
382
+ #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
383
+ (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
384
+ (c) == '$' || (c) == ',')
304
385
 
305
- enum flags
306
- { F_CHUNKED = 1 << 0
307
- , F_CONNECTION_KEEP_ALIVE = 1 << 1
308
- , F_CONNECTION_CLOSE = 1 << 2
309
- , F_TRAILING = 1 << 3
310
- , F_UPGRADE = 1 << 4
311
- , F_SKIPBODY = 1 << 5
312
- };
313
-
314
-
315
- #define CR '\r'
316
- #define LF '\n'
317
- #define LOWER(c) (unsigned char)(c | 0x20)
386
+ #if HTTP_PARSER_STRICT
387
+ #define TOKEN(c) (tokens[(unsigned char)c])
388
+ #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
389
+ #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
390
+ #else
391
+ #define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
392
+ #define IS_URL_CHAR(c) \
393
+ (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
394
+ #define IS_HOST_CHAR(c) \
395
+ (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
396
+ #endif
318
397
 
319
398
 
320
399
  #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
321
400
 
322
401
 
323
402
  #if HTTP_PARSER_STRICT
324
- # define STRICT_CHECK(cond) if (cond) goto error
403
+ # define STRICT_CHECK(cond) \
404
+ do { \
405
+ if (cond) { \
406
+ SET_ERRNO(HPE_STRICT); \
407
+ goto error; \
408
+ } \
409
+ } while (0)
325
410
  # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
326
411
  #else
327
412
  # define STRICT_CHECK(cond)
@@ -329,69 +414,260 @@ enum flags
329
414
  #endif
330
415
 
331
416
 
417
+ /* Map errno values to strings for human-readable output */
418
+ #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
419
+ static struct {
420
+ const char *name;
421
+ const char *description;
422
+ } http_strerror_tab[] = {
423
+ HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
424
+ };
425
+ #undef HTTP_STRERROR_GEN
426
+
427
+ int http_message_needs_eof(const http_parser *parser);
428
+
429
+ /* Our URL parser.
430
+ *
431
+ * This is designed to be shared by http_parser_execute() for URL validation,
432
+ * hence it has a state transition + byte-for-byte interface. In addition, it
433
+ * is meant to be embedded in http_parser_parse_url(), which does the dirty
434
+ * work of turning state transitions URL components for its API.
435
+ *
436
+ * This function should only be invoked with non-space characters. It is
437
+ * assumed that the caller cares about (and can detect) the transition between
438
+ * URL and non-URL states by looking for these.
439
+ */
440
+ static enum state
441
+ parse_url_char(enum state s, const char ch)
442
+ {
443
+ if (ch == ' ' || ch == '\r' || ch == '\n') {
444
+ return s_dead;
445
+ }
446
+
447
+ #if HTTP_PARSER_STRICT
448
+ if (ch == '\t' || ch == '\f') {
449
+ return s_dead;
450
+ }
451
+ #endif
452
+
453
+ switch (s) {
454
+ case s_req_spaces_before_url:
455
+ /* Proxied requests are followed by scheme of an absolute URI (alpha).
456
+ * All methods except CONNECT are followed by '/' or '*'.
457
+ */
458
+
459
+ if (ch == '/' || ch == '*') {
460
+ return s_req_path;
461
+ }
462
+
463
+ if (IS_ALPHA(ch)) {
464
+ return s_req_schema;
465
+ }
466
+
467
+ break;
468
+
469
+ case s_req_schema:
470
+ if (IS_ALPHA(ch)) {
471
+ return s;
472
+ }
473
+
474
+ if (ch == ':') {
475
+ return s_req_schema_slash;
476
+ }
477
+
478
+ break;
479
+
480
+ case s_req_schema_slash:
481
+ if (ch == '/') {
482
+ return s_req_schema_slash_slash;
483
+ }
484
+
485
+ break;
486
+
487
+ case s_req_schema_slash_slash:
488
+ if (ch == '/') {
489
+ return s_req_server_start;
490
+ }
491
+
492
+ break;
493
+
494
+ case s_req_server_with_at:
495
+ if (ch == '@') {
496
+ return s_dead;
497
+ }
498
+
499
+ /* FALLTHROUGH */
500
+ case s_req_server_start:
501
+ case s_req_server:
502
+ if (ch == '/') {
503
+ return s_req_path;
504
+ }
505
+
506
+ if (ch == '?') {
507
+ return s_req_query_string_start;
508
+ }
509
+
510
+ if (ch == '@') {
511
+ return s_req_server_with_at;
512
+ }
513
+
514
+ if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
515
+ return s_req_server;
516
+ }
517
+
518
+ break;
519
+
520
+ case s_req_path:
521
+ if (IS_URL_CHAR(ch)) {
522
+ return s;
523
+ }
524
+
525
+ switch (ch) {
526
+ case '?':
527
+ return s_req_query_string_start;
528
+
529
+ case '#':
530
+ return s_req_fragment_start;
531
+ }
532
+
533
+ break;
534
+
535
+ case s_req_query_string_start:
536
+ case s_req_query_string:
537
+ if (IS_URL_CHAR(ch)) {
538
+ return s_req_query_string;
539
+ }
540
+
541
+ switch (ch) {
542
+ case '?':
543
+ /* allow extra '?' in query string */
544
+ return s_req_query_string;
545
+
546
+ case '#':
547
+ return s_req_fragment_start;
548
+ }
549
+
550
+ break;
551
+
552
+ case s_req_fragment_start:
553
+ if (IS_URL_CHAR(ch)) {
554
+ return s_req_fragment;
555
+ }
556
+
557
+ switch (ch) {
558
+ case '?':
559
+ return s_req_fragment;
560
+
561
+ case '#':
562
+ return s;
563
+ }
564
+
565
+ break;
566
+
567
+ case s_req_fragment:
568
+ if (IS_URL_CHAR(ch)) {
569
+ return s;
570
+ }
571
+
572
+ switch (ch) {
573
+ case '?':
574
+ case '#':
575
+ return s;
576
+ }
577
+
578
+ break;
579
+
580
+ default:
581
+ break;
582
+ }
583
+
584
+ /* We should never fall out of the switch above unless there's an error */
585
+ return s_dead;
586
+ }
587
+
332
588
  size_t http_parser_execute (http_parser *parser,
333
589
  const http_parser_settings *settings,
334
590
  const char *data,
335
- size_t len)
591
+ const size_t len)
336
592
  {
337
593
  char c, ch;
338
- const char *p = data, *pe;
339
- int64_t to_read;
594
+ int8_t unhex_val;
595
+ const char *p = data;
596
+ const char *header_field_mark = 0;
597
+ const char *header_value_mark = 0;
598
+ const char *url_mark = 0;
599
+ const char *body_mark = 0;
340
600
 
341
- enum state state = (enum state) parser->state;
342
- enum header_states header_state = (enum header_states) parser->header_state;
343
- uint64_t index = parser->index;
344
- uint64_t nread = parser->nread;
601
+ /* We're in an error state. Don't bother doing anything. */
602
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
603
+ return 0;
604
+ }
345
605
 
346
606
  if (len == 0) {
347
- if (state == s_body_identity_eof) {
348
- CALLBACK2(message_complete);
607
+ switch (parser->state) {
608
+ case s_body_identity_eof:
609
+ /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
610
+ * we got paused.
611
+ */
612
+ CALLBACK_NOTIFY_NOADVANCE(message_complete);
613
+ return 0;
614
+
615
+ case s_dead:
616
+ case s_start_req_or_res:
617
+ case s_start_res:
618
+ case s_start_req:
619
+ return 0;
620
+
621
+ default:
622
+ SET_ERRNO(HPE_INVALID_EOF_STATE);
623
+ return 1;
349
624
  }
350
- return 0;
351
625
  }
352
626
 
353
- /* technically we could combine all of these (except for url_mark) into one
354
- variable, saving stack space, but it seems more clear to have them
355
- separated. */
356
- const char *header_field_mark = 0;
357
- const char *header_value_mark = 0;
358
- const char *fragment_mark = 0;
359
- const char *query_string_mark = 0;
360
- const char *path_mark = 0;
361
- const char *url_mark = 0;
362
627
 
363
- if (state == s_header_field)
628
+ if (parser->state == s_header_field)
364
629
  header_field_mark = data;
365
- if (state == s_header_value)
630
+ if (parser->state == s_header_value)
366
631
  header_value_mark = data;
367
- if (state == s_req_fragment)
368
- fragment_mark = data;
369
- if (state == s_req_query_string)
370
- query_string_mark = data;
371
- if (state == s_req_path)
372
- path_mark = data;
373
- if (state == s_req_path || state == s_req_schema || state == s_req_schema_slash
374
- || state == s_req_schema_slash_slash || state == s_req_port
375
- || state == s_req_query_string_start || state == s_req_query_string
376
- || state == s_req_host
377
- || state == s_req_fragment_start || state == s_req_fragment)
632
+ switch (parser->state) {
633
+ case s_req_path:
634
+ case s_req_schema:
635
+ case s_req_schema_slash:
636
+ case s_req_schema_slash_slash:
637
+ case s_req_server_start:
638
+ case s_req_server:
639
+ case s_req_server_with_at:
640
+ case s_req_query_string_start:
641
+ case s_req_query_string:
642
+ case s_req_fragment_start:
643
+ case s_req_fragment:
378
644
  url_mark = data;
645
+ break;
646
+ }
379
647
 
380
- for (p=data, pe=data+len; p != pe; p++) {
648
+ for (p=data; likely(p != data + len); p++) {
381
649
  ch = *p;
382
650
 
383
- if (PARSING_HEADER(state)) {
384
- ++nread;
651
+ if ( likely(PARSING_HEADER(parser->state)) ) {
652
+ ++parser->nread;
385
653
  /* Buffer overflow attack */
386
- if (nread > HTTP_MAX_HEADER_SIZE) goto error;
654
+ if ( unlikely(parser->nread > HTTP_MAX_HEADER_SIZE) ) {
655
+ SET_ERRNO(HPE_HEADER_OVERFLOW);
656
+ goto error;
657
+ }
387
658
  }
388
659
 
389
- switch (state) {
660
+ reexecute_byte:
661
+ switch (parser->state) {
390
662
 
391
663
  case s_dead:
392
664
  /* this state is used after a 'Connection: close' message
393
665
  * the parser will error out if it reads another message
394
666
  */
667
+ if (ch == CR || ch == LF)
668
+ break;
669
+
670
+ SET_ERRNO(HPE_CLOSED_CONNECTION);
395
671
  goto error;
396
672
 
397
673
  case s_start_req_or_res:
@@ -399,42 +675,46 @@ size_t http_parser_execute (http_parser *parser,
399
675
  if (ch == CR || ch == LF)
400
676
  break;
401
677
  parser->flags = 0;
402
- parser->content_length = -1;
678
+ parser->content_length = ULLONG_MAX;
403
679
 
404
- CALLBACK2(message_begin);
680
+ if (ch == 'H') {
681
+ parser->state = s_res_or_resp_H;
405
682
 
406
- if (ch == 'H')
407
- state = s_res_or_resp_H;
408
- else {
683
+ CALLBACK_NOTIFY(message_begin);
684
+ } else {
409
685
  parser->type = HTTP_REQUEST;
410
- goto start_req_method_assign;
686
+ parser->state = s_start_req;
687
+ goto reexecute_byte;
411
688
  }
689
+
412
690
  break;
413
691
  }
414
692
 
415
693
  case s_res_or_resp_H:
416
694
  if (ch == 'T') {
417
695
  parser->type = HTTP_RESPONSE;
418
- state = s_res_HT;
696
+ parser->state = s_res_HT;
419
697
  } else {
420
- if (ch != 'E') goto error;
698
+ if (ch != 'E') {
699
+ SET_ERRNO(HPE_INVALID_CONSTANT);
700
+ goto error;
701
+ }
702
+
421
703
  parser->type = HTTP_REQUEST;
422
704
  parser->method = HTTP_HEAD;
423
- index = 2;
424
- state = s_req_method;
705
+ parser->index = 2;
706
+ parser->state = s_req_method;
425
707
  }
426
708
  break;
427
709
 
428
710
  case s_start_res:
429
711
  {
430
712
  parser->flags = 0;
431
- parser->content_length = -1;
432
-
433
- CALLBACK2(message_begin);
713
+ parser->content_length = ULLONG_MAX;
434
714
 
435
715
  switch (ch) {
436
716
  case 'H':
437
- state = s_res_H;
717
+ parser->state = s_res_H;
438
718
  break;
439
719
 
440
720
  case CR:
@@ -442,105 +722,133 @@ size_t http_parser_execute (http_parser *parser,
442
722
  break;
443
723
 
444
724
  default:
725
+ SET_ERRNO(HPE_INVALID_CONSTANT);
445
726
  goto error;
446
727
  }
728
+
729
+ CALLBACK_NOTIFY(message_begin);
447
730
  break;
448
731
  }
449
732
 
450
733
  case s_res_H:
451
734
  STRICT_CHECK(ch != 'T');
452
- state = s_res_HT;
735
+ parser->state = s_res_HT;
453
736
  break;
454
737
 
455
738
  case s_res_HT:
456
739
  STRICT_CHECK(ch != 'T');
457
- state = s_res_HTT;
740
+ parser->state = s_res_HTT;
458
741
  break;
459
742
 
460
743
  case s_res_HTT:
461
744
  STRICT_CHECK(ch != 'P');
462
- state = s_res_HTTP;
745
+ parser->state = s_res_HTTP;
463
746
  break;
464
747
 
465
748
  case s_res_HTTP:
466
749
  STRICT_CHECK(ch != '/');
467
- state = s_res_first_http_major;
750
+ parser->state = s_res_first_http_major;
468
751
  break;
469
752
 
470
753
  case s_res_first_http_major:
471
- if (ch < '1' || ch > '9') goto error;
754
+ if (ch < '0' || ch > '9') {
755
+ SET_ERRNO(HPE_INVALID_VERSION);
756
+ goto error;
757
+ }
758
+
472
759
  parser->http_major = ch - '0';
473
- state = s_res_http_major;
760
+ parser->state = s_res_http_major;
474
761
  break;
475
762
 
476
763
  /* major HTTP version or dot */
477
764
  case s_res_http_major:
478
765
  {
479
766
  if (ch == '.') {
480
- state = s_res_first_http_minor;
767
+ parser->state = s_res_first_http_minor;
481
768
  break;
482
769
  }
483
770
 
484
- if (ch < '0' || ch > '9') goto error;
771
+ if (!IS_NUM(ch)) {
772
+ SET_ERRNO(HPE_INVALID_VERSION);
773
+ goto error;
774
+ }
485
775
 
486
776
  parser->http_major *= 10;
487
777
  parser->http_major += ch - '0';
488
778
 
489
- if (parser->http_major > 999) goto error;
779
+ if (parser->http_major > 999) {
780
+ SET_ERRNO(HPE_INVALID_VERSION);
781
+ goto error;
782
+ }
783
+
490
784
  break;
491
785
  }
492
786
 
493
787
  /* first digit of minor HTTP version */
494
788
  case s_res_first_http_minor:
495
- if (ch < '0' || ch > '9') goto error;
789
+ if (!IS_NUM(ch)) {
790
+ SET_ERRNO(HPE_INVALID_VERSION);
791
+ goto error;
792
+ }
793
+
496
794
  parser->http_minor = ch - '0';
497
- state = s_res_http_minor;
795
+ parser->state = s_res_http_minor;
498
796
  break;
499
797
 
500
798
  /* minor HTTP version or end of request line */
501
799
  case s_res_http_minor:
502
800
  {
503
801
  if (ch == ' ') {
504
- state = s_res_first_status_code;
802
+ parser->state = s_res_first_status_code;
505
803
  break;
506
804
  }
507
805
 
508
- if (ch < '0' || ch > '9') goto error;
806
+ if (!IS_NUM(ch)) {
807
+ SET_ERRNO(HPE_INVALID_VERSION);
808
+ goto error;
809
+ }
509
810
 
510
811
  parser->http_minor *= 10;
511
812
  parser->http_minor += ch - '0';
512
813
 
513
- if (parser->http_minor > 999) goto error;
814
+ if (parser->http_minor > 999) {
815
+ SET_ERRNO(HPE_INVALID_VERSION);
816
+ goto error;
817
+ }
818
+
514
819
  break;
515
820
  }
516
821
 
517
822
  case s_res_first_status_code:
518
823
  {
519
- if (ch < '0' || ch > '9') {
824
+ if (!IS_NUM(ch)) {
520
825
  if (ch == ' ') {
521
826
  break;
522
827
  }
828
+
829
+ SET_ERRNO(HPE_INVALID_STATUS);
523
830
  goto error;
524
831
  }
525
832
  parser->status_code = ch - '0';
526
- state = s_res_status_code;
833
+ parser->state = s_res_status_code;
527
834
  break;
528
835
  }
529
836
 
530
837
  case s_res_status_code:
531
838
  {
532
- if (ch < '0' || ch > '9') {
839
+ if (!IS_NUM(ch)) {
533
840
  switch (ch) {
534
841
  case ' ':
535
- state = s_res_status;
842
+ parser->state = s_res_status;
536
843
  break;
537
844
  case CR:
538
- state = s_res_line_almost_done;
845
+ parser->state = s_res_line_almost_done;
539
846
  break;
540
847
  case LF:
541
- state = s_header_field_start;
848
+ parser->state = s_header_field_start;
542
849
  break;
543
850
  default:
851
+ SET_ERRNO(HPE_INVALID_STATUS);
544
852
  goto error;
545
853
  }
546
854
  break;
@@ -549,7 +857,11 @@ size_t http_parser_execute (http_parser *parser,
549
857
  parser->status_code *= 10;
550
858
  parser->status_code += ch - '0';
551
859
 
552
- if (parser->status_code > 999) goto error;
860
+ if (parser->status_code > 999) {
861
+ SET_ERRNO(HPE_INVALID_STATUS);
862
+ goto error;
863
+ }
864
+
553
865
  break;
554
866
  }
555
867
 
@@ -557,19 +869,19 @@ size_t http_parser_execute (http_parser *parser,
557
869
  /* the human readable status. e.g. "NOT FOUND"
558
870
  * we are not humans so just ignore this */
559
871
  if (ch == CR) {
560
- state = s_res_line_almost_done;
872
+ parser->state = s_res_line_almost_done;
561
873
  break;
562
874
  }
563
875
 
564
876
  if (ch == LF) {
565
- state = s_header_field_start;
877
+ parser->state = s_header_field_start;
566
878
  break;
567
879
  }
568
880
  break;
569
881
 
570
882
  case s_res_line_almost_done:
571
883
  STRICT_CHECK(ch != LF);
572
- state = s_header_field_start;
884
+ parser->state = s_header_field_start;
573
885
  break;
574
886
 
575
887
  case s_start_req:
@@ -577,335 +889,177 @@ size_t http_parser_execute (http_parser *parser,
577
889
  if (ch == CR || ch == LF)
578
890
  break;
579
891
  parser->flags = 0;
580
- parser->content_length = -1;
892
+ parser->content_length = ULLONG_MAX;
581
893
 
582
- CALLBACK2(message_begin);
583
-
584
- if (ch < 'A' || 'Z' < ch) goto error;
894
+ if (!IS_ALPHA(ch)) {
895
+ SET_ERRNO(HPE_INVALID_METHOD);
896
+ goto error;
897
+ }
585
898
 
586
- start_req_method_assign:
587
899
  parser->method = (enum http_method) 0;
588
- index = 1;
900
+ parser->index = 1;
589
901
  switch (ch) {
590
902
  case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
591
903
  case 'D': parser->method = HTTP_DELETE; break;
592
904
  case 'G': parser->method = HTTP_GET; break;
593
905
  case 'H': parser->method = HTTP_HEAD; break;
594
906
  case 'L': parser->method = HTTP_LOCK; break;
595
- case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE */ break;
907
+ case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
908
+ case 'N': parser->method = HTTP_NOTIFY; break;
596
909
  case 'O': parser->method = HTTP_OPTIONS; break;
597
- case 'P': parser->method = HTTP_POST; /* or PROPFIND or PROPPATCH or PUT */ break;
910
+ case 'P': parser->method = HTTP_POST;
911
+ /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
912
+ break;
598
913
  case 'R': parser->method = HTTP_REPORT; break;
914
+ case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break;
599
915
  case 'T': parser->method = HTTP_TRACE; break;
600
- case 'U': parser->method = HTTP_UNLOCK; break;
601
- default: goto error;
916
+ case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
917
+ default:
918
+ SET_ERRNO(HPE_INVALID_METHOD);
919
+ goto error;
602
920
  }
603
- state = s_req_method;
921
+ parser->state = s_req_method;
922
+
923
+ CALLBACK_NOTIFY(message_begin);
924
+
604
925
  break;
605
926
  }
606
927
 
607
928
  case s_req_method:
608
929
  {
609
- if (ch == '\0')
930
+ const char *matcher;
931
+ if (ch == '\0') {
932
+ SET_ERRNO(HPE_INVALID_METHOD);
610
933
  goto error;
934
+ }
611
935
 
612
- const char *matcher = method_strings[parser->method];
613
- if (ch == ' ' && matcher[index] == '\0') {
614
- state = s_req_spaces_before_url;
615
- } else if (ch == matcher[index]) {
616
- ; // nada
936
+ matcher = method_strings[parser->method];
937
+ if (ch == ' ' && matcher[parser->index] == '\0') {
938
+ parser->state = s_req_spaces_before_url;
939
+ } else if (ch == matcher[parser->index]) {
940
+ ; /* nada */
617
941
  } else if (parser->method == HTTP_CONNECT) {
618
- if (index == 1 && ch == 'H') {
942
+ if (parser->index == 1 && ch == 'H') {
619
943
  parser->method = HTTP_CHECKOUT;
620
- } else if (index == 2 && ch == 'P') {
944
+ } else if (parser->index == 2 && ch == 'P') {
621
945
  parser->method = HTTP_COPY;
946
+ } else {
947
+ goto error;
622
948
  }
623
949
  } else if (parser->method == HTTP_MKCOL) {
624
- if (index == 1 && ch == 'O') {
950
+ if (parser->index == 1 && ch == 'O') {
625
951
  parser->method = HTTP_MOVE;
626
- } else if (index == 1 && ch == 'E') {
952
+ } else if (parser->index == 1 && ch == 'E') {
627
953
  parser->method = HTTP_MERGE;
628
- } else if (index == 2 && ch == 'A') {
954
+ } else if (parser->index == 1 && ch == '-') {
955
+ parser->method = HTTP_MSEARCH;
956
+ } else if (parser->index == 2 && ch == 'A') {
629
957
  parser->method = HTTP_MKACTIVITY;
958
+ } else {
959
+ goto error;
960
+ }
961
+ } else if (parser->method == HTTP_SUBSCRIBE) {
962
+ if (parser->index == 1 && ch == 'E') {
963
+ parser->method = HTTP_SEARCH;
964
+ } else {
965
+ goto error;
966
+ }
967
+ } else if (parser->index == 1 && parser->method == HTTP_POST) {
968
+ if (ch == 'R') {
969
+ parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
970
+ } else if (ch == 'U') {
971
+ parser->method = HTTP_PUT; /* or HTTP_PURGE */
972
+ } else if (ch == 'A') {
973
+ parser->method = HTTP_PATCH;
974
+ } else {
975
+ goto error;
976
+ }
977
+ } else if (parser->index == 2) {
978
+ if (parser->method == HTTP_PUT) {
979
+ if (ch == 'R') parser->method = HTTP_PURGE;
980
+ } else if (parser->method == HTTP_UNLOCK) {
981
+ if (ch == 'S') parser->method = HTTP_UNSUBSCRIBE;
630
982
  }
631
- } else if (index == 1 && parser->method == HTTP_POST && ch == 'R') {
632
- parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
633
- } else if (index == 1 && parser->method == HTTP_POST && ch == 'U') {
634
- parser->method = HTTP_PUT;
635
- } else if (index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
983
+ } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
636
984
  parser->method = HTTP_PROPPATCH;
637
985
  } else {
986
+ SET_ERRNO(HPE_INVALID_METHOD);
638
987
  goto error;
639
988
  }
640
989
 
641
- ++index;
990
+ ++parser->index;
642
991
  break;
643
992
  }
993
+
644
994
  case s_req_spaces_before_url:
645
995
  {
646
996
  if (ch == ' ') break;
647
997
 
648
- if (ch == '/') {
649
- MARK(url);
650
- MARK(path);
651
- state = s_req_path;
652
- break;
998
+ MARK(url);
999
+ if (parser->method == HTTP_CONNECT) {
1000
+ parser->state = s_req_server_start;
653
1001
  }
654
1002
 
655
- c = LOWER(ch);
656
-
657
- if (c >= 'a' && c <= 'z') {
658
- MARK(url);
659
- state = s_req_schema;
660
- break;
1003
+ parser->state = parse_url_char((enum state)parser->state, ch);
1004
+ if (parser->state == s_dead) {
1005
+ SET_ERRNO(HPE_INVALID_URL);
1006
+ goto error;
661
1007
  }
662
1008
 
663
- goto error;
1009
+ break;
664
1010
  }
665
1011
 
666
1012
  case s_req_schema:
667
- {
668
- c = LOWER(ch);
669
-
670
- if (c >= 'a' && c <= 'z') break;
671
-
672
- if (ch == ':') {
673
- state = s_req_schema_slash;
674
- break;
675
- } else if (ch == '.') {
676
- state = s_req_host;
677
- break;
678
- }
679
-
680
- goto error;
681
- }
682
-
683
1013
  case s_req_schema_slash:
684
- STRICT_CHECK(ch != '/');
685
- state = s_req_schema_slash_slash;
686
- break;
687
-
688
1014
  case s_req_schema_slash_slash:
689
- STRICT_CHECK(ch != '/');
690
- state = s_req_host;
691
- break;
692
-
693
- case s_req_host:
694
- {
695
- c = LOWER(ch);
696
- if (c >= 'a' && c <= 'z') break;
697
- if ((ch >= '0' && ch <= '9') || ch == '.' || ch == '-') break;
698
- switch (ch) {
699
- case ':':
700
- state = s_req_port;
701
- break;
702
- case '/':
703
- MARK(path);
704
- state = s_req_path;
705
- break;
706
- case ' ':
707
- /* The request line looks like:
708
- * "GET http://foo.bar.com HTTP/1.1"
709
- * That is, there is no path.
710
- */
711
- CALLBACK(url);
712
- state = s_req_http_start;
713
- break;
714
- default:
715
- goto error;
716
- }
717
- break;
718
- }
719
-
720
- case s_req_port:
721
- {
722
- if (ch >= '0' && ch <= '9') break;
723
- switch (ch) {
724
- case '/':
725
- MARK(path);
726
- state = s_req_path;
727
- break;
728
- case ' ':
729
- /* The request line looks like:
730
- * "GET http://foo.bar.com:1234 HTTP/1.1"
731
- * That is, there is no path.
732
- */
733
- CALLBACK(url);
734
- state = s_req_http_start;
735
- break;
736
- default:
737
- goto error;
738
- }
739
- break;
740
- }
741
-
742
- case s_req_path:
1015
+ case s_req_server_start:
743
1016
  {
744
- if (normal_url_char[(unsigned char)ch]) break;
745
-
746
1017
  switch (ch) {
1018
+ /* No whitespace allowed here */
747
1019
  case ' ':
748
- CALLBACK(url);
749
- CALLBACK(path);
750
- state = s_req_http_start;
751
- break;
752
1020
  case CR:
753
- CALLBACK(url);
754
- CALLBACK(path);
755
- parser->http_minor = 9;
756
- state = s_req_line_almost_done;
757
- break;
758
1021
  case LF:
759
- CALLBACK(url);
760
- CALLBACK(path);
761
- parser->http_minor = 9;
762
- state = s_header_field_start;
763
- break;
764
- case '?':
765
- CALLBACK(path);
766
- state = s_req_query_string_start;
767
- break;
768
- case '#':
769
- CALLBACK(path);
770
- state = s_req_fragment_start;
771
- break;
772
- default:
1022
+ SET_ERRNO(HPE_INVALID_URL);
773
1023
  goto error;
774
- }
775
- break;
776
- }
777
-
778
- case s_req_query_string_start:
779
- {
780
- if (normal_url_char[(unsigned char)ch]) {
781
- MARK(query_string);
782
- state = s_req_query_string;
783
- break;
784
- }
785
-
786
- switch (ch) {
787
- case '?':
788
- break; // XXX ignore extra '?' ... is this right?
789
- case ' ':
790
- CALLBACK(url);
791
- state = s_req_http_start;
792
- break;
793
- case CR:
794
- CALLBACK(url);
795
- parser->http_minor = 9;
796
- state = s_req_line_almost_done;
797
- break;
798
- case LF:
799
- CALLBACK(url);
800
- parser->http_minor = 9;
801
- state = s_header_field_start;
802
- break;
803
- case '#':
804
- state = s_req_fragment_start;
805
- break;
806
1024
  default:
807
- goto error;
1025
+ parser->state = parse_url_char((enum state)parser->state, ch);
1026
+ if (parser->state == s_dead) {
1027
+ SET_ERRNO(HPE_INVALID_URL);
1028
+ goto error;
1029
+ }
808
1030
  }
809
- break;
810
- }
811
1031
 
812
- case s_req_query_string:
813
- {
814
- if (normal_url_char[(unsigned char)ch]) break;
815
-
816
- switch (ch) {
817
- case '?':
818
- // allow extra '?' in query string
819
- break;
820
- case ' ':
821
- CALLBACK(url);
822
- CALLBACK(query_string);
823
- state = s_req_http_start;
824
- break;
825
- case CR:
826
- CALLBACK(url);
827
- CALLBACK(query_string);
828
- parser->http_minor = 9;
829
- state = s_req_line_almost_done;
830
- break;
831
- case LF:
832
- CALLBACK(url);
833
- CALLBACK(query_string);
834
- parser->http_minor = 9;
835
- state = s_header_field_start;
836
- break;
837
- case '#':
838
- CALLBACK(query_string);
839
- state = s_req_fragment_start;
840
- break;
841
- default:
842
- goto error;
843
- }
844
1032
  break;
845
1033
  }
846
1034
 
1035
+ case s_req_server:
1036
+ case s_req_server_with_at:
1037
+ case s_req_path:
1038
+ case s_req_query_string_start:
1039
+ case s_req_query_string:
847
1040
  case s_req_fragment_start:
848
- {
849
- if (normal_url_char[(unsigned char)ch]) {
850
- MARK(fragment);
851
- state = s_req_fragment;
852
- break;
853
- }
854
-
855
- switch (ch) {
856
- case ' ':
857
- CALLBACK(url);
858
- state = s_req_http_start;
859
- break;
860
- case CR:
861
- CALLBACK(url);
862
- parser->http_minor = 9;
863
- state = s_req_line_almost_done;
864
- break;
865
- case LF:
866
- CALLBACK(url);
867
- parser->http_minor = 9;
868
- state = s_header_field_start;
869
- break;
870
- case '?':
871
- MARK(fragment);
872
- state = s_req_fragment;
873
- break;
874
- case '#':
875
- break;
876
- default:
877
- goto error;
878
- }
879
- break;
880
- }
881
-
882
1041
  case s_req_fragment:
883
1042
  {
884
- if (normal_url_char[(unsigned char)ch]) break;
885
-
886
1043
  switch (ch) {
887
1044
  case ' ':
888
- CALLBACK(url);
889
- CALLBACK(fragment);
890
- state = s_req_http_start;
1045
+ parser->state = s_req_http_start;
1046
+ CALLBACK_DATA(url);
891
1047
  break;
892
1048
  case CR:
893
- CALLBACK(url);
894
- CALLBACK(fragment);
895
- parser->http_minor = 9;
896
- state = s_req_line_almost_done;
897
- break;
898
1049
  case LF:
899
- CALLBACK(url);
900
- CALLBACK(fragment);
1050
+ parser->http_major = 0;
901
1051
  parser->http_minor = 9;
902
- state = s_header_field_start;
903
- break;
904
- case '?':
905
- case '#':
1052
+ parser->state = (ch == CR) ?
1053
+ s_req_line_almost_done :
1054
+ s_header_field_start;
1055
+ CALLBACK_DATA(url);
906
1056
  break;
907
1057
  default:
908
- goto error;
1058
+ parser->state = parse_url_char((enum state)parser->state, ch);
1059
+ if (parser->state == s_dead) {
1060
+ SET_ERRNO(HPE_INVALID_URL);
1061
+ goto error;
1062
+ }
909
1063
  }
910
1064
  break;
911
1065
  }
@@ -913,143 +1067,170 @@ size_t http_parser_execute (http_parser *parser,
913
1067
  case s_req_http_start:
914
1068
  switch (ch) {
915
1069
  case 'H':
916
- state = s_req_http_H;
1070
+ parser->state = s_req_http_H;
917
1071
  break;
918
1072
  case ' ':
919
1073
  break;
920
1074
  default:
1075
+ SET_ERRNO(HPE_INVALID_CONSTANT);
921
1076
  goto error;
922
1077
  }
923
1078
  break;
924
1079
 
925
1080
  case s_req_http_H:
926
1081
  STRICT_CHECK(ch != 'T');
927
- state = s_req_http_HT;
1082
+ parser->state = s_req_http_HT;
928
1083
  break;
929
1084
 
930
1085
  case s_req_http_HT:
931
1086
  STRICT_CHECK(ch != 'T');
932
- state = s_req_http_HTT;
1087
+ parser->state = s_req_http_HTT;
933
1088
  break;
934
1089
 
935
1090
  case s_req_http_HTT:
936
1091
  STRICT_CHECK(ch != 'P');
937
- state = s_req_http_HTTP;
1092
+ parser->state = s_req_http_HTTP;
938
1093
  break;
939
1094
 
940
1095
  case s_req_http_HTTP:
941
1096
  STRICT_CHECK(ch != '/');
942
- state = s_req_first_http_major;
1097
+ parser->state = s_req_first_http_major;
943
1098
  break;
944
1099
 
945
1100
  /* first digit of major HTTP version */
946
1101
  case s_req_first_http_major:
947
- if (ch < '1' || ch > '9') goto error;
1102
+ if (ch < '1' || ch > '9') {
1103
+ SET_ERRNO(HPE_INVALID_VERSION);
1104
+ goto error;
1105
+ }
1106
+
948
1107
  parser->http_major = ch - '0';
949
- state = s_req_http_major;
1108
+ parser->state = s_req_http_major;
950
1109
  break;
951
1110
 
952
1111
  /* major HTTP version or dot */
953
1112
  case s_req_http_major:
954
1113
  {
955
1114
  if (ch == '.') {
956
- state = s_req_first_http_minor;
1115
+ parser->state = s_req_first_http_minor;
957
1116
  break;
958
1117
  }
959
1118
 
960
- if (ch < '0' || ch > '9') goto error;
1119
+ if (!IS_NUM(ch)) {
1120
+ SET_ERRNO(HPE_INVALID_VERSION);
1121
+ goto error;
1122
+ }
961
1123
 
962
1124
  parser->http_major *= 10;
963
1125
  parser->http_major += ch - '0';
964
1126
 
965
- if (parser->http_major > 999) goto error;
1127
+ if (parser->http_major > 999) {
1128
+ SET_ERRNO(HPE_INVALID_VERSION);
1129
+ goto error;
1130
+ }
1131
+
966
1132
  break;
967
1133
  }
968
1134
 
969
1135
  /* first digit of minor HTTP version */
970
1136
  case s_req_first_http_minor:
971
- if (ch < '0' || ch > '9') goto error;
1137
+ if (!IS_NUM(ch)) {
1138
+ SET_ERRNO(HPE_INVALID_VERSION);
1139
+ goto error;
1140
+ }
1141
+
972
1142
  parser->http_minor = ch - '0';
973
- state = s_req_http_minor;
1143
+ parser->state = s_req_http_minor;
974
1144
  break;
975
1145
 
976
1146
  /* minor HTTP version or end of request line */
977
1147
  case s_req_http_minor:
978
1148
  {
979
1149
  if (ch == CR) {
980
- state = s_req_line_almost_done;
1150
+ parser->state = s_req_line_almost_done;
981
1151
  break;
982
1152
  }
983
1153
 
984
1154
  if (ch == LF) {
985
- state = s_header_field_start;
1155
+ parser->state = s_header_field_start;
986
1156
  break;
987
1157
  }
988
1158
 
989
1159
  /* XXX allow spaces after digit? */
990
1160
 
991
- if (ch < '0' || ch > '9') goto error;
1161
+ if (!IS_NUM(ch)) {
1162
+ SET_ERRNO(HPE_INVALID_VERSION);
1163
+ goto error;
1164
+ }
992
1165
 
993
1166
  parser->http_minor *= 10;
994
1167
  parser->http_minor += ch - '0';
995
1168
 
996
- if (parser->http_minor > 999) goto error;
1169
+ if (parser->http_minor > 999) {
1170
+ SET_ERRNO(HPE_INVALID_VERSION);
1171
+ goto error;
1172
+ }
1173
+
997
1174
  break;
998
1175
  }
999
1176
 
1000
1177
  /* end of request line */
1001
1178
  case s_req_line_almost_done:
1002
1179
  {
1003
- if (ch != LF) goto error;
1004
- state = s_header_field_start;
1180
+ if (ch != LF) {
1181
+ SET_ERRNO(HPE_LF_EXPECTED);
1182
+ goto error;
1183
+ }
1184
+
1185
+ parser->state = s_header_field_start;
1005
1186
  break;
1006
1187
  }
1007
1188
 
1008
1189
  case s_header_field_start:
1009
1190
  {
1010
1191
  if (ch == CR) {
1011
- state = s_headers_almost_done;
1192
+ parser->state = s_headers_almost_done;
1012
1193
  break;
1013
1194
  }
1014
1195
 
1015
1196
  if (ch == LF) {
1016
1197
  /* they might be just sending \n instead of \r\n so this would be
1017
1198
  * the second \n to denote the end of headers*/
1018
- state = s_headers_almost_done;
1019
- goto headers_almost_done;
1199
+ parser->state = s_headers_almost_done;
1200
+ goto reexecute_byte;
1020
1201
  }
1021
1202
 
1022
- if (parser->maybe_ml && (ch == ' '|| ch == '\t')) goto s_header_value_start_;
1023
- c = LOWER(ch);
1024
-
1025
- if (c < 'a' || 'z' < c) goto error;
1026
-
1027
- parser->maybe_ml = 0;
1203
+ c = TOKEN(ch);
1204
+
1205
+ if (!c) {
1206
+ SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1207
+ goto error;
1208
+ }
1028
1209
 
1029
1210
  MARK(header_field);
1030
1211
 
1031
- index = 0;
1032
- state = s_header_field;
1212
+ parser->index = 0;
1213
+ parser->state = s_header_field;
1033
1214
 
1034
1215
  switch (c) {
1035
1216
  case 'c':
1036
- header_state = h_C;
1217
+ parser->header_state = h_C;
1037
1218
  break;
1038
1219
 
1039
1220
  case 'p':
1040
- header_state = h_matching_proxy_connection;
1221
+ parser->header_state = h_matching_proxy_connection;
1041
1222
  break;
1042
1223
 
1043
1224
  case 't':
1044
- header_state = h_matching_transfer_encoding;
1225
+ parser->header_state = h_matching_transfer_encoding;
1045
1226
  break;
1046
1227
 
1047
1228
  case 'u':
1048
- header_state = h_matching_upgrade;
1229
+ parser->header_state = h_matching_upgrade;
1049
1230
  break;
1050
1231
 
1051
1232
  default:
1052
- header_state = h_general;
1233
+ parser->header_state = h_general;
1053
1234
  break;
1054
1235
  }
1055
1236
  break;
@@ -1057,34 +1238,34 @@ size_t http_parser_execute (http_parser *parser,
1057
1238
 
1058
1239
  case s_header_field:
1059
1240
  {
1060
- c = acceptable_header[(unsigned char)ch];
1241
+ c = TOKEN(ch);
1061
1242
 
1062
1243
  if (c) {
1063
- switch (header_state) {
1244
+ switch (parser->header_state) {
1064
1245
  case h_general:
1065
1246
  break;
1066
1247
 
1067
1248
  case h_C:
1068
- index++;
1069
- header_state = (c == 'o' ? h_CO : h_general);
1249
+ parser->index++;
1250
+ parser->header_state = (c == 'o' ? h_CO : h_general);
1070
1251
  break;
1071
1252
 
1072
1253
  case h_CO:
1073
- index++;
1074
- header_state = (c == 'n' ? h_CON : h_general);
1254
+ parser->index++;
1255
+ parser->header_state = (c == 'n' ? h_CON : h_general);
1075
1256
  break;
1076
1257
 
1077
1258
  case h_CON:
1078
- index++;
1259
+ parser->index++;
1079
1260
  switch (c) {
1080
1261
  case 'n':
1081
- header_state = h_matching_connection;
1262
+ parser->header_state = h_matching_connection;
1082
1263
  break;
1083
1264
  case 't':
1084
- header_state = h_matching_content_length;
1265
+ parser->header_state = h_matching_content_length;
1085
1266
  break;
1086
1267
  default:
1087
- header_state = h_general;
1268
+ parser->header_state = h_general;
1088
1269
  break;
1089
1270
  }
1090
1271
  break;
@@ -1092,60 +1273,60 @@ size_t http_parser_execute (http_parser *parser,
1092
1273
  /* connection */
1093
1274
 
1094
1275
  case h_matching_connection:
1095
- index++;
1096
- if (index > sizeof(CONNECTION)-1
1097
- || c != CONNECTION[index]) {
1098
- header_state = h_general;
1099
- } else if (index == sizeof(CONNECTION)-2) {
1100
- header_state = h_connection;
1276
+ parser->index++;
1277
+ if (parser->index > sizeof(CONNECTION)-1
1278
+ || c != CONNECTION[parser->index]) {
1279
+ parser->header_state = h_general;
1280
+ } else if (parser->index == sizeof(CONNECTION)-2) {
1281
+ parser->header_state = h_connection;
1101
1282
  }
1102
1283
  break;
1103
1284
 
1104
1285
  /* proxy-connection */
1105
1286
 
1106
1287
  case h_matching_proxy_connection:
1107
- index++;
1108
- if (index > sizeof(PROXY_CONNECTION)-1
1109
- || c != PROXY_CONNECTION[index]) {
1110
- header_state = h_general;
1111
- } else if (index == sizeof(PROXY_CONNECTION)-2) {
1112
- header_state = h_connection;
1288
+ parser->index++;
1289
+ if (parser->index > sizeof(PROXY_CONNECTION)-1
1290
+ || c != PROXY_CONNECTION[parser->index]) {
1291
+ parser->header_state = h_general;
1292
+ } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1293
+ parser->header_state = h_connection;
1113
1294
  }
1114
1295
  break;
1115
1296
 
1116
1297
  /* content-length */
1117
1298
 
1118
1299
  case h_matching_content_length:
1119
- index++;
1120
- if (index > sizeof(CONTENT_LENGTH)-1
1121
- || c != CONTENT_LENGTH[index]) {
1122
- header_state = h_general;
1123
- } else if (index == sizeof(CONTENT_LENGTH)-2) {
1124
- header_state = h_content_length;
1300
+ parser->index++;
1301
+ if (parser->index > sizeof(CONTENT_LENGTH)-1
1302
+ || c != CONTENT_LENGTH[parser->index]) {
1303
+ parser->header_state = h_general;
1304
+ } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1305
+ parser->header_state = h_content_length;
1125
1306
  }
1126
1307
  break;
1127
1308
 
1128
1309
  /* transfer-encoding */
1129
1310
 
1130
1311
  case h_matching_transfer_encoding:
1131
- index++;
1132
- if (index > sizeof(TRANSFER_ENCODING)-1
1133
- || c != TRANSFER_ENCODING[index]) {
1134
- header_state = h_general;
1135
- } else if (index == sizeof(TRANSFER_ENCODING)-2) {
1136
- header_state = h_transfer_encoding;
1312
+ parser->index++;
1313
+ if (parser->index > sizeof(TRANSFER_ENCODING)-1
1314
+ || c != TRANSFER_ENCODING[parser->index]) {
1315
+ parser->header_state = h_general;
1316
+ } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1317
+ parser->header_state = h_transfer_encoding;
1137
1318
  }
1138
1319
  break;
1139
1320
 
1140
1321
  /* upgrade */
1141
1322
 
1142
1323
  case h_matching_upgrade:
1143
- index++;
1144
- if (index > sizeof(UPGRADE)-1
1145
- || c != UPGRADE[index]) {
1146
- header_state = h_general;
1147
- } else if (index == sizeof(UPGRADE)-2) {
1148
- header_state = h_upgrade;
1324
+ parser->index++;
1325
+ if (parser->index > sizeof(UPGRADE)-1
1326
+ || c != UPGRADE[parser->index]) {
1327
+ parser->header_state = h_general;
1328
+ } else if (parser->index == sizeof(UPGRADE)-2) {
1329
+ parser->header_state = h_upgrade;
1149
1330
  }
1150
1331
  break;
1151
1332
 
@@ -1153,7 +1334,7 @@ size_t http_parser_execute (http_parser *parser,
1153
1334
  case h_content_length:
1154
1335
  case h_transfer_encoding:
1155
1336
  case h_upgrade:
1156
- if (ch != ' ') header_state = h_general;
1337
+ if (ch != ' ') parser->header_state = h_general;
1157
1338
  break;
1158
1339
 
1159
1340
  default:
@@ -1164,92 +1345,89 @@ size_t http_parser_execute (http_parser *parser,
1164
1345
  }
1165
1346
 
1166
1347
  if (ch == ':') {
1167
- CALLBACK(header_field);
1168
- state = s_header_value_start;
1348
+ parser->state = s_header_value_start;
1349
+ CALLBACK_DATA(header_field);
1169
1350
  break;
1170
1351
  }
1171
1352
 
1172
1353
  if (ch == CR) {
1173
- state = s_header_almost_done;
1174
- CALLBACK(header_field);
1354
+ parser->state = s_header_almost_done;
1355
+ CALLBACK_DATA(header_field);
1175
1356
  break;
1176
1357
  }
1177
1358
 
1178
1359
  if (ch == LF) {
1179
- CALLBACK(header_field);
1180
- state = s_header_field_start;
1360
+ parser->state = s_header_field_start;
1361
+ CALLBACK_DATA(header_field);
1181
1362
  break;
1182
1363
  }
1183
1364
 
1365
+ SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1184
1366
  goto error;
1185
1367
  }
1186
1368
 
1187
1369
  case s_header_value_start:
1188
- s_header_value_start_:
1189
1370
  {
1190
- if (!parser->maybe_ml && (ch == ' ' || ch == '\t')) break;
1371
+ if (ch == ' ' || ch == '\t') break;
1191
1372
 
1192
1373
  MARK(header_value);
1193
1374
 
1194
- state = s_header_value;
1195
- index = 0;
1196
-
1197
- c = acceptable_header[(unsigned char)ch];
1375
+ parser->state = s_header_value;
1376
+ parser->index = 0;
1198
1377
 
1199
- if (!c) {
1200
- if (ch == CR) {
1201
- CALLBACK(header_value);
1202
- header_state = h_general;
1203
- state = s_header_almost_done;
1204
- parser->maybe_ml = 1;
1205
- break;
1206
- }
1207
-
1208
- if (ch == LF) {
1209
- CALLBACK(header_value);
1210
- state = s_header_field_start;
1211
- parser->maybe_ml = 1;
1212
- break;
1213
- }
1378
+ if (ch == CR) {
1379
+ parser->header_state = h_general;
1380
+ parser->state = s_header_almost_done;
1381
+ CALLBACK_DATA(header_value);
1382
+ break;
1383
+ }
1214
1384
 
1215
- header_state = h_general;
1385
+ if (ch == LF) {
1386
+ parser->state = s_header_field_start;
1387
+ CALLBACK_DATA(header_value);
1216
1388
  break;
1217
1389
  }
1218
1390
 
1219
- switch (header_state) {
1391
+ c = LOWER(ch);
1392
+
1393
+ switch (parser->header_state) {
1220
1394
  case h_upgrade:
1221
1395
  parser->flags |= F_UPGRADE;
1222
- header_state = h_general;
1396
+ parser->header_state = h_general;
1223
1397
  break;
1224
1398
 
1225
1399
  case h_transfer_encoding:
1226
1400
  /* looking for 'Transfer-Encoding: chunked' */
1227
1401
  if ('c' == c) {
1228
- header_state = h_matching_transfer_encoding_chunked;
1402
+ parser->header_state = h_matching_transfer_encoding_chunked;
1229
1403
  } else {
1230
- header_state = h_general;
1404
+ parser->header_state = h_general;
1231
1405
  }
1232
1406
  break;
1233
1407
 
1234
1408
  case h_content_length:
1235
- if (ch < '0' || ch > '9') goto error;
1409
+ if (!IS_NUM(ch)) {
1410
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1411
+ goto error;
1412
+ }
1413
+
1236
1414
  parser->content_length = ch - '0';
1237
1415
  break;
1238
1416
 
1239
1417
  case h_connection:
1240
1418
  /* looking for 'Connection: keep-alive' */
1241
1419
  if (c == 'k') {
1242
- header_state = h_matching_connection_keep_alive;
1420
+ parser->header_state = h_matching_connection_keep_alive;
1243
1421
  /* looking for 'Connection: close' */
1244
1422
  } else if (c == 'c') {
1245
- header_state = h_matching_connection_close;
1423
+ parser->header_state = h_matching_connection_close;
1246
1424
  } else {
1247
- header_state = h_general;
1425
+ parser->header_state = h_general;
1248
1426
  }
1249
1427
  break;
1250
1428
 
1251
1429
  default:
1252
- header_state = h_general;
1430
+ parser->header_state = h_general;
1253
1431
  break;
1254
1432
  }
1255
1433
  break;
@@ -1257,30 +1435,22 @@ size_t http_parser_execute (http_parser *parser,
1257
1435
 
1258
1436
  case s_header_value:
1259
1437
  {
1260
- c = acceptable_header[(unsigned char)ch];
1261
-
1262
- if (!c) {
1263
- if (ch == CR) {
1264
- CALLBACK(header_value);
1265
- state = s_header_almost_done;
1266
- if(header_state == h_general){
1267
- parser->maybe_ml = 1;
1268
- }
1269
-
1270
- break;
1271
- }
1272
1438
 
1273
- if (ch == LF) {
1274
- CALLBACK(header_value);
1275
- if(header_state == h_general){
1276
- parser->maybe_ml = 1;
1277
- }
1278
- goto header_almost_done;
1279
- }
1439
+ if (ch == CR) {
1440
+ parser->state = s_header_almost_done;
1441
+ CALLBACK_DATA(header_value);
1280
1442
  break;
1281
1443
  }
1282
1444
 
1283
- switch (header_state) {
1445
+ if (ch == LF) {
1446
+ parser->state = s_header_almost_done;
1447
+ CALLBACK_DATA_NOADVANCE(header_value);
1448
+ goto reexecute_byte;
1449
+ }
1450
+
1451
+ c = LOWER(ch);
1452
+
1453
+ switch (parser->header_state) {
1284
1454
  case h_general:
1285
1455
  break;
1286
1456
 
@@ -1290,65 +1460,83 @@ size_t http_parser_execute (http_parser *parser,
1290
1460
  break;
1291
1461
 
1292
1462
  case h_content_length:
1293
- if (ch < '0' || ch > '9') goto error;
1294
- parser->content_length *= 10;
1295
- parser->content_length += ch - '0';
1463
+ {
1464
+ uint64_t t;
1465
+
1466
+ if (ch == ' ') break;
1467
+
1468
+ if (!IS_NUM(ch)) {
1469
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1470
+ goto error;
1471
+ }
1472
+
1473
+ t = parser->content_length;
1474
+ t *= 10;
1475
+ t += ch - '0';
1476
+
1477
+ /* Overflow? */
1478
+ if (t < parser->content_length || t == ULLONG_MAX) {
1479
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1480
+ goto error;
1481
+ }
1482
+
1483
+ parser->content_length = t;
1296
1484
  break;
1485
+ }
1297
1486
 
1298
1487
  /* Transfer-Encoding: chunked */
1299
1488
  case h_matching_transfer_encoding_chunked:
1300
- index++;
1301
- if (index > sizeof(CHUNKED)-1
1302
- || c != CHUNKED[index]) {
1303
- header_state = h_general;
1304
- } else if (index == sizeof(CHUNKED)-2) {
1305
- header_state = h_transfer_encoding_chunked;
1489
+ parser->index++;
1490
+ if (parser->index > sizeof(CHUNKED)-1
1491
+ || c != CHUNKED[parser->index]) {
1492
+ parser->header_state = h_general;
1493
+ } else if (parser->index == sizeof(CHUNKED)-2) {
1494
+ parser->header_state = h_transfer_encoding_chunked;
1306
1495
  }
1307
1496
  break;
1308
1497
 
1309
1498
  /* looking for 'Connection: keep-alive' */
1310
1499
  case h_matching_connection_keep_alive:
1311
- index++;
1312
- if (index > sizeof(KEEP_ALIVE)-1
1313
- || c != KEEP_ALIVE[index]) {
1314
- header_state = h_general;
1315
- } else if (index == sizeof(KEEP_ALIVE)-2) {
1316
- header_state = h_connection_keep_alive;
1500
+ parser->index++;
1501
+ if (parser->index > sizeof(KEEP_ALIVE)-1
1502
+ || c != KEEP_ALIVE[parser->index]) {
1503
+ parser->header_state = h_general;
1504
+ } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1505
+ parser->header_state = h_connection_keep_alive;
1317
1506
  }
1318
1507
  break;
1319
1508
 
1320
1509
  /* looking for 'Connection: close' */
1321
1510
  case h_matching_connection_close:
1322
- index++;
1323
- if (index > sizeof(CLOSE)-1 || c != CLOSE[index]) {
1324
- header_state = h_general;
1325
- } else if (index == sizeof(CLOSE)-2) {
1326
- header_state = h_connection_close;
1511
+ parser->index++;
1512
+ if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1513
+ parser->header_state = h_general;
1514
+ } else if (parser->index == sizeof(CLOSE)-2) {
1515
+ parser->header_state = h_connection_close;
1327
1516
  }
1328
1517
  break;
1329
1518
 
1330
1519
  case h_transfer_encoding_chunked:
1331
1520
  case h_connection_keep_alive:
1332
1521
  case h_connection_close:
1333
- if (ch != ' ') header_state = h_general;
1522
+ if (ch != ' ') parser->header_state = h_general;
1334
1523
  break;
1335
1524
 
1336
1525
  default:
1337
- state = s_header_value;
1338
- header_state = h_general;
1526
+ parser->state = s_header_value;
1527
+ parser->header_state = h_general;
1339
1528
  break;
1340
1529
  }
1341
1530
  break;
1342
1531
  }
1343
1532
 
1344
1533
  case s_header_almost_done:
1345
- header_almost_done:
1346
1534
  {
1347
1535
  STRICT_CHECK(ch != LF);
1348
1536
 
1349
- state = s_header_field_start;
1537
+ parser->state = s_header_value_lws;
1350
1538
 
1351
- switch (header_state) {
1539
+ switch (parser->header_state) {
1352
1540
  case h_connection_keep_alive:
1353
1541
  parser->flags |= F_CONNECTION_KEEP_ALIVE;
1354
1542
  break;
@@ -1361,33 +1549,47 @@ size_t http_parser_execute (http_parser *parser,
1361
1549
  default:
1362
1550
  break;
1363
1551
  }
1552
+
1553
+ break;
1554
+ }
1555
+
1556
+ case s_header_value_lws:
1557
+ {
1558
+ if (ch == ' ' || ch == '\t')
1559
+ parser->state = s_header_value_start;
1560
+ else
1561
+ {
1562
+ parser->state = s_header_field_start;
1563
+ goto reexecute_byte;
1564
+ }
1364
1565
  break;
1365
1566
  }
1366
1567
 
1367
1568
  case s_headers_almost_done:
1368
- headers_almost_done:
1369
1569
  {
1370
- parser->maybe_ml = 0;
1371
1570
  STRICT_CHECK(ch != LF);
1372
1571
 
1373
1572
  if (parser->flags & F_TRAILING) {
1374
1573
  /* End of a chunked request */
1375
- CALLBACK2(message_complete);
1376
- state = NEW_MESSAGE();
1574
+ parser->state = NEW_MESSAGE();
1575
+ CALLBACK_NOTIFY(message_complete);
1377
1576
  break;
1378
1577
  }
1379
1578
 
1380
- nread = 0;
1579
+ parser->state = s_headers_done;
1381
1580
 
1382
- if (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT) {
1383
- parser->upgrade = 1;
1384
- }
1581
+ /* Set this here so that on_headers_complete() callbacks can see it */
1582
+ parser->upgrade =
1583
+ (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT);
1385
1584
 
1386
1585
  /* Here we call the headers_complete callback. This is somewhat
1387
1586
  * different than other callbacks because if the user returns 1, we
1388
1587
  * will interpret that as saying that this message has no body. This
1389
1588
  * is needed for the annoying case of recieving a response to a HEAD
1390
1589
  * request.
1590
+ *
1591
+ * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1592
+ * we have to simulate it by handling a change in errno below.
1391
1593
  */
1392
1594
  if (settings->on_headers_complete) {
1393
1595
  switch (settings->on_headers_complete(parser)) {
@@ -1399,38 +1601,54 @@ size_t http_parser_execute (http_parser *parser,
1399
1601
  break;
1400
1602
 
1401
1603
  default:
1604
+ SET_ERRNO(HPE_CB_headers_complete);
1402
1605
  return p - data; /* Error */
1403
1606
  }
1404
1607
  }
1405
1608
 
1406
- // Exit, the rest of the connect is in a different protocol.
1609
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1610
+ return p - data;
1611
+ }
1612
+
1613
+ goto reexecute_byte;
1614
+ }
1615
+
1616
+ case s_headers_done:
1617
+ {
1618
+ STRICT_CHECK(ch != LF);
1619
+
1620
+ parser->nread = 0;
1621
+
1622
+ /* Exit, the rest of the connect is in a different protocol. */
1407
1623
  if (parser->upgrade) {
1408
- CALLBACK2(message_complete);
1409
- return (p - data);
1624
+ parser->state = NEW_MESSAGE();
1625
+ CALLBACK_NOTIFY(message_complete);
1626
+ return (p - data) + 1;
1410
1627
  }
1411
1628
 
1412
1629
  if (parser->flags & F_SKIPBODY) {
1413
- CALLBACK2(message_complete);
1414
- state = NEW_MESSAGE();
1630
+ parser->state = NEW_MESSAGE();
1631
+ CALLBACK_NOTIFY(message_complete);
1415
1632
  } else if (parser->flags & F_CHUNKED) {
1416
1633
  /* chunked encoding - ignore Content-Length header */
1417
- state = s_chunk_size_start;
1634
+ parser->state = s_chunk_size_start;
1418
1635
  } else {
1419
1636
  if (parser->content_length == 0) {
1420
1637
  /* Content-Length header given but zero: Content-Length: 0\r\n */
1421
- CALLBACK2(message_complete);
1422
- state = NEW_MESSAGE();
1423
- } else if (parser->content_length > 0) {
1638
+ parser->state = NEW_MESSAGE();
1639
+ CALLBACK_NOTIFY(message_complete);
1640
+ } else if (parser->content_length != ULLONG_MAX) {
1424
1641
  /* Content-Length header given and non-zero */
1425
- state = s_body_identity;
1642
+ parser->state = s_body_identity;
1426
1643
  } else {
1427
- if (parser->type == HTTP_REQUEST || http_should_keep_alive(parser)) {
1644
+ if (parser->type == HTTP_REQUEST ||
1645
+ !http_message_needs_eof(parser)) {
1428
1646
  /* Assume content-length 0 - read the next */
1429
- CALLBACK2(message_complete);
1430
- state = NEW_MESSAGE();
1647
+ parser->state = NEW_MESSAGE();
1648
+ CALLBACK_NOTIFY(message_complete);
1431
1649
  } else {
1432
1650
  /* Read body until EOF */
1433
- state = s_body_identity_eof;
1651
+ parser->state = s_body_identity_eof;
1434
1652
  }
1435
1653
  }
1436
1654
  }
@@ -1439,59 +1657,103 @@ size_t http_parser_execute (http_parser *parser,
1439
1657
  }
1440
1658
 
1441
1659
  case s_body_identity:
1442
- to_read = MIN(pe - p, (int64_t)parser->content_length);
1443
- if (to_read > 0) {
1444
- if (settings->on_body) settings->on_body(parser, p, to_read, 0);
1445
- p += to_read - 1;
1446
- parser->content_length -= to_read;
1447
- if (parser->content_length == 0) {
1448
- CALLBACK2(message_complete);
1449
- state = NEW_MESSAGE();
1450
- }
1660
+ {
1661
+ uint64_t to_read = MIN(parser->content_length,
1662
+ (uint64_t) ((data + len) - p));
1663
+
1664
+ assert(parser->content_length != 0
1665
+ && parser->content_length != ULLONG_MAX);
1666
+
1667
+ /* The difference between advancing content_length and p is because
1668
+ * the latter will automaticaly advance on the next loop iteration.
1669
+ * Further, if content_length ends up at 0, we want to see the last
1670
+ * byte again for our message complete callback.
1671
+ */
1672
+ MARK(body);
1673
+ parser->content_length -= to_read;
1674
+ p += to_read - 1;
1675
+
1676
+ if (parser->content_length == 0) {
1677
+ parser->state = s_message_done;
1678
+
1679
+ /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1680
+ *
1681
+ * The alternative to doing this is to wait for the next byte to
1682
+ * trigger the data callback, just as in every other case. The
1683
+ * problem with this is that this makes it difficult for the test
1684
+ * harness to distinguish between complete-on-EOF and
1685
+ * complete-on-length. It's not clear that this distinction is
1686
+ * important for applications, but let's keep it for now.
1687
+ */
1688
+ CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1689
+ goto reexecute_byte;
1451
1690
  }
1691
+
1452
1692
  break;
1693
+ }
1453
1694
 
1454
1695
  /* read until EOF */
1455
1696
  case s_body_identity_eof:
1456
- to_read = pe - p;
1457
- if (to_read > 0) {
1458
- if (settings->on_body) settings->on_body(parser, p, to_read, 0);
1459
- p += to_read - 1;
1460
- }
1697
+ MARK(body);
1698
+ p = data + len - 1;
1699
+
1700
+ break;
1701
+
1702
+ case s_message_done:
1703
+ parser->state = NEW_MESSAGE();
1704
+ CALLBACK_NOTIFY(message_complete);
1461
1705
  break;
1462
1706
 
1463
1707
  case s_chunk_size_start:
1464
1708
  {
1709
+ assert(parser->nread == 1);
1465
1710
  assert(parser->flags & F_CHUNKED);
1466
1711
 
1467
- c = unhex[(unsigned char)ch];
1468
- if (c == -1) goto error;
1469
- parser->content_length = c;
1470
- state = s_chunk_size;
1712
+ unhex_val = unhex[(unsigned char)ch];
1713
+ if (unhex_val == -1) {
1714
+ SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1715
+ goto error;
1716
+ }
1717
+
1718
+ parser->content_length = unhex_val;
1719
+ parser->state = s_chunk_size;
1471
1720
  break;
1472
1721
  }
1473
1722
 
1474
1723
  case s_chunk_size:
1475
1724
  {
1725
+ uint64_t t;
1726
+
1476
1727
  assert(parser->flags & F_CHUNKED);
1477
1728
 
1478
1729
  if (ch == CR) {
1479
- state = s_chunk_size_almost_done;
1730
+ parser->state = s_chunk_size_almost_done;
1480
1731
  break;
1481
1732
  }
1482
1733
 
1483
- c = unhex[(unsigned char)ch];
1734
+ unhex_val = unhex[(unsigned char)ch];
1484
1735
 
1485
- if (c == -1) {
1736
+ if (unhex_val == -1) {
1486
1737
  if (ch == ';' || ch == ' ') {
1487
- state = s_chunk_parameters;
1738
+ parser->state = s_chunk_parameters;
1488
1739
  break;
1489
1740
  }
1741
+
1742
+ SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1743
+ goto error;
1744
+ }
1745
+
1746
+ t = parser->content_length;
1747
+ t *= 16;
1748
+ t += unhex_val;
1749
+
1750
+ /* Overflow? */
1751
+ if (t < parser->content_length || t == ULLONG_MAX) {
1752
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1490
1753
  goto error;
1491
1754
  }
1492
1755
 
1493
- parser->content_length *= 16;
1494
- parser->content_length += c;
1756
+ parser->content_length = t;
1495
1757
  break;
1496
1758
  }
1497
1759
 
@@ -1500,7 +1762,7 @@ size_t http_parser_execute (http_parser *parser,
1500
1762
  assert(parser->flags & F_CHUNKED);
1501
1763
  /* just ignore this shit. TODO check for overflow */
1502
1764
  if (ch == CR) {
1503
- state = s_chunk_size_almost_done;
1765
+ parser->state = s_chunk_size_almost_done;
1504
1766
  break;
1505
1767
  }
1506
1768
  break;
@@ -1511,105 +1773,418 @@ size_t http_parser_execute (http_parser *parser,
1511
1773
  assert(parser->flags & F_CHUNKED);
1512
1774
  STRICT_CHECK(ch != LF);
1513
1775
 
1776
+ parser->nread = 0;
1777
+
1514
1778
  if (parser->content_length == 0) {
1515
1779
  parser->flags |= F_TRAILING;
1516
- state = s_header_field_start;
1780
+ parser->state = s_header_field_start;
1517
1781
  } else {
1518
- state = s_chunk_data;
1782
+ parser->state = s_chunk_data;
1519
1783
  }
1520
1784
  break;
1521
1785
  }
1522
1786
 
1523
1787
  case s_chunk_data:
1524
1788
  {
1525
- assert(parser->flags & F_CHUNKED);
1789
+ uint64_t to_read = MIN(parser->content_length,
1790
+ (uint64_t) ((data + len) - p));
1526
1791
 
1527
- to_read = MIN(pe - p, (int64_t)(parser->content_length));
1792
+ assert(parser->flags & F_CHUNKED);
1793
+ assert(parser->content_length != 0
1794
+ && parser->content_length != ULLONG_MAX);
1528
1795
 
1529
- if (to_read > 0) {
1530
- if (settings->on_body) settings->on_body(parser, p, to_read, 0);
1531
- p += to_read - 1;
1532
- }
1796
+ /* See the explanation in s_body_identity for why the content
1797
+ * length and data pointers are managed this way.
1798
+ */
1799
+ MARK(body);
1800
+ parser->content_length -= to_read;
1801
+ p += to_read - 1;
1533
1802
 
1534
- if (to_read == parser->content_length) {
1535
- state = s_chunk_data_almost_done;
1803
+ if (parser->content_length == 0) {
1804
+ parser->state = s_chunk_data_almost_done;
1536
1805
  }
1537
1806
 
1538
- parser->content_length -= to_read;
1539
1807
  break;
1540
1808
  }
1541
1809
 
1542
1810
  case s_chunk_data_almost_done:
1543
1811
  assert(parser->flags & F_CHUNKED);
1812
+ assert(parser->content_length == 0);
1544
1813
  STRICT_CHECK(ch != CR);
1545
- state = s_chunk_data_done;
1814
+ parser->state = s_chunk_data_done;
1815
+ CALLBACK_DATA(body);
1546
1816
  break;
1547
1817
 
1548
1818
  case s_chunk_data_done:
1549
1819
  assert(parser->flags & F_CHUNKED);
1550
1820
  STRICT_CHECK(ch != LF);
1551
- state = s_chunk_size_start;
1821
+ parser->nread = 0;
1822
+ parser->state = s_chunk_size_start;
1552
1823
  break;
1553
1824
 
1554
1825
  default:
1555
1826
  assert(0 && "unhandled state");
1827
+ SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
1556
1828
  goto error;
1557
1829
  }
1558
1830
  }
1559
1831
 
1560
- CALLBACK_NOCLEAR(header_field);
1561
- CALLBACK_NOCLEAR(header_value);
1562
- CALLBACK_NOCLEAR(fragment);
1563
- CALLBACK_NOCLEAR(query_string);
1564
- CALLBACK_NOCLEAR(path);
1565
- CALLBACK_NOCLEAR(url);
1832
+ /* Run callbacks for any marks that we have leftover after we ran our of
1833
+ * bytes. There should be at most one of these set, so it's OK to invoke
1834
+ * them in series (unset marks will not result in callbacks).
1835
+ *
1836
+ * We use the NOADVANCE() variety of callbacks here because 'p' has already
1837
+ * overflowed 'data' and this allows us to correct for the off-by-one that
1838
+ * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
1839
+ * value that's in-bounds).
1840
+ */
1841
+
1842
+ assert(((header_field_mark ? 1 : 0) +
1843
+ (header_value_mark ? 1 : 0) +
1844
+ (url_mark ? 1 : 0) +
1845
+ (body_mark ? 1 : 0)) <= 1);
1566
1846
 
1567
- parser->state = state;
1568
- parser->header_state = header_state;
1569
- parser->index = index;
1570
- parser->nread = nread;
1847
+ CALLBACK_DATA_NOADVANCE(header_field);
1848
+ CALLBACK_DATA_NOADVANCE(header_value);
1849
+ CALLBACK_DATA_NOADVANCE(url);
1850
+ CALLBACK_DATA_NOADVANCE(body);
1571
1851
 
1572
1852
  return len;
1573
1853
 
1574
1854
  error:
1855
+ if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
1856
+ SET_ERRNO(HPE_UNKNOWN);
1857
+ }
1858
+
1575
1859
  return (p - data);
1576
1860
  }
1577
1861
 
1578
1862
 
1863
+ /* Does the parser need to see an EOF to find the end of the message? */
1864
+ int
1865
+ http_message_needs_eof (const http_parser *parser)
1866
+ {
1867
+ if (parser->type == HTTP_REQUEST) {
1868
+ return 0;
1869
+ }
1870
+
1871
+ /* See RFC 2616 section 4.4 */
1872
+ if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
1873
+ parser->status_code == 204 || /* No Content */
1874
+ parser->status_code == 304 || /* Not Modified */
1875
+ parser->flags & F_SKIPBODY) { /* response to a HEAD request */
1876
+ return 0;
1877
+ }
1878
+
1879
+ if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
1880
+ return 0;
1881
+ }
1882
+
1883
+ return 1;
1884
+ }
1885
+
1886
+
1579
1887
  int
1580
- http_should_keep_alive (http_parser *parser)
1888
+ http_should_keep_alive (const http_parser *parser)
1581
1889
  {
1582
1890
  if (parser->http_major > 0 && parser->http_minor > 0) {
1583
1891
  /* HTTP/1.1 */
1584
1892
  if (parser->flags & F_CONNECTION_CLOSE) {
1585
1893
  return 0;
1586
- } else {
1587
- return 1;
1588
1894
  }
1589
1895
  } else {
1590
1896
  /* HTTP/1.0 or earlier */
1591
- if (parser->flags & F_CONNECTION_KEEP_ALIVE) {
1592
- return 1;
1593
- } else {
1897
+ if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
1594
1898
  return 0;
1595
1899
  }
1596
1900
  }
1901
+
1902
+ return !http_message_needs_eof(parser);
1597
1903
  }
1598
1904
 
1599
1905
 
1600
- const char * http_method_str (enum http_method m)
1906
+ const char *
1907
+ http_method_str (enum http_method m)
1601
1908
  {
1602
- return method_strings[m];
1909
+ return ELEM_AT(method_strings, m, "<unknown>");
1603
1910
  }
1604
1911
 
1605
1912
 
1606
1913
  void
1607
1914
  http_parser_init (http_parser *parser, enum http_parser_type t)
1608
1915
  {
1916
+ void *data = parser->data; /* preserve application data */
1917
+ memset(parser, 0, sizeof(*parser));
1918
+ parser->data = data;
1609
1919
  parser->type = t;
1610
1920
  parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
1611
- parser->nread = 0;
1612
- parser->upgrade = 0;
1613
- parser->flags = 0;
1614
- parser->method = 0;
1921
+ parser->http_errno = HPE_OK;
1922
+ }
1923
+
1924
+ const char *
1925
+ http_errno_name(enum http_errno err) {
1926
+ assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1927
+ return http_strerror_tab[err].name;
1928
+ }
1929
+
1930
+ const char *
1931
+ http_errno_description(enum http_errno err) {
1932
+ assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1933
+ return http_strerror_tab[err].description;
1934
+ }
1935
+
1936
+ static enum http_host_state
1937
+ http_parse_host_char(enum http_host_state s, const char ch) {
1938
+ switch(s) {
1939
+ case s_http_userinfo:
1940
+ case s_http_userinfo_start:
1941
+ if (ch == '@') {
1942
+ return s_http_host_start;
1943
+ }
1944
+
1945
+ if (IS_USERINFO_CHAR(ch)) {
1946
+ return s_http_userinfo;
1947
+ }
1948
+ break;
1949
+
1950
+ case s_http_host_start:
1951
+ if (ch == '[') {
1952
+ return s_http_host_v6_start;
1953
+ }
1954
+
1955
+ if (IS_HOST_CHAR(ch)) {
1956
+ return s_http_host;
1957
+ }
1958
+
1959
+ break;
1960
+
1961
+ case s_http_host:
1962
+ if (IS_HOST_CHAR(ch)) {
1963
+ return s_http_host;
1964
+ }
1965
+
1966
+ /* FALLTHROUGH */
1967
+ case s_http_host_v6_end:
1968
+ if (ch == ':') {
1969
+ return s_http_host_port_start;
1970
+ }
1971
+
1972
+ break;
1973
+
1974
+ case s_http_host_v6:
1975
+ if (ch == ']') {
1976
+ return s_http_host_v6_end;
1977
+ }
1978
+
1979
+ /* FALLTHROUGH */
1980
+ case s_http_host_v6_start:
1981
+ if (IS_HEX(ch) || ch == ':') {
1982
+ return s_http_host_v6;
1983
+ }
1984
+
1985
+ break;
1986
+
1987
+ case s_http_host_port:
1988
+ case s_http_host_port_start:
1989
+ if (IS_NUM(ch)) {
1990
+ return s_http_host_port;
1991
+ }
1992
+
1993
+ break;
1994
+
1995
+ default:
1996
+ break;
1997
+ }
1998
+ return s_http_host_dead;
1999
+ }
2000
+
2001
+ static int
2002
+ http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
2003
+ enum http_host_state s;
2004
+
2005
+ const char *p;
2006
+ size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
2007
+
2008
+ u->field_data[UF_HOST].len = 0;
2009
+
2010
+ s = found_at ? s_http_userinfo_start : s_http_host_start;
2011
+
2012
+ for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2013
+ enum http_host_state new_s = http_parse_host_char(s, *p);
2014
+
2015
+ if (new_s == s_http_host_dead) {
2016
+ return 1;
2017
+ }
2018
+
2019
+ switch(new_s) {
2020
+ case s_http_host:
2021
+ if (s != s_http_host) {
2022
+ u->field_data[UF_HOST].off = p - buf;
2023
+ }
2024
+ u->field_data[UF_HOST].len++;
2025
+ break;
2026
+
2027
+ case s_http_host_v6:
2028
+ if (s != s_http_host_v6) {
2029
+ u->field_data[UF_HOST].off = p - buf;
2030
+ }
2031
+ u->field_data[UF_HOST].len++;
2032
+ break;
2033
+
2034
+ case s_http_host_port:
2035
+ if (s != s_http_host_port) {
2036
+ u->field_data[UF_PORT].off = p - buf;
2037
+ u->field_data[UF_PORT].len = 0;
2038
+ u->field_set |= (1 << UF_PORT);
2039
+ }
2040
+ u->field_data[UF_PORT].len++;
2041
+ break;
2042
+
2043
+ case s_http_userinfo:
2044
+ if (s != s_http_userinfo) {
2045
+ u->field_data[UF_USERINFO].off = p - buf ;
2046
+ u->field_data[UF_USERINFO].len = 0;
2047
+ u->field_set |= (1 << UF_USERINFO);
2048
+ }
2049
+ u->field_data[UF_USERINFO].len++;
2050
+ break;
2051
+
2052
+ default:
2053
+ break;
2054
+ }
2055
+ s = new_s;
2056
+ }
2057
+
2058
+ /* Make sure we don't end somewhere unexpected */
2059
+ switch (s) {
2060
+ case s_http_host_start:
2061
+ case s_http_host_v6_start:
2062
+ case s_http_host_v6:
2063
+ case s_http_host_port_start:
2064
+ case s_http_userinfo:
2065
+ case s_http_userinfo_start:
2066
+ return 1;
2067
+ default:
2068
+ break;
2069
+ }
2070
+
2071
+ return 0;
2072
+ }
2073
+
2074
+ int
2075
+ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2076
+ struct http_parser_url *u)
2077
+ {
2078
+ enum state s;
2079
+ const char *p;
2080
+ enum http_parser_url_fields uf, old_uf;
2081
+ int found_at = 0;
2082
+
2083
+ u->port = u->field_set = 0;
2084
+ s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2085
+ uf = old_uf = UF_MAX;
2086
+
2087
+ for (p = buf; p < buf + buflen; p++) {
2088
+ s = parse_url_char(s, *p);
2089
+
2090
+ /* Figure out the next field that we're operating on */
2091
+ switch (s) {
2092
+ case s_dead:
2093
+ return 1;
2094
+
2095
+ /* Skip delimeters */
2096
+ case s_req_schema_slash:
2097
+ case s_req_schema_slash_slash:
2098
+ case s_req_server_start:
2099
+ case s_req_query_string_start:
2100
+ case s_req_fragment_start:
2101
+ continue;
2102
+
2103
+ case s_req_schema:
2104
+ uf = UF_SCHEMA;
2105
+ break;
2106
+
2107
+ case s_req_server_with_at:
2108
+ found_at = 1;
2109
+
2110
+ /* FALLTROUGH */
2111
+ case s_req_server:
2112
+ uf = UF_HOST;
2113
+ break;
2114
+
2115
+ case s_req_path:
2116
+ uf = UF_PATH;
2117
+ break;
2118
+
2119
+ case s_req_query_string:
2120
+ uf = UF_QUERY;
2121
+ break;
2122
+
2123
+ case s_req_fragment:
2124
+ uf = UF_FRAGMENT;
2125
+ break;
2126
+
2127
+ default:
2128
+ assert(!"Unexpected state");
2129
+ return 1;
2130
+ }
2131
+
2132
+ /* Nothing's changed; soldier on */
2133
+ if (uf == old_uf) {
2134
+ u->field_data[uf].len++;
2135
+ continue;
2136
+ }
2137
+
2138
+ u->field_data[uf].off = p - buf;
2139
+ u->field_data[uf].len = 1;
2140
+
2141
+ u->field_set |= (1 << uf);
2142
+ old_uf = uf;
2143
+ }
2144
+
2145
+ /* host must be present if there is a schema */
2146
+ /* parsing http:///toto will fail */
2147
+ if ((u->field_set & ((1 << UF_SCHEMA) | (1 << UF_HOST))) != 0) {
2148
+ if (http_parse_host(buf, u, found_at) != 0) {
2149
+ return 1;
2150
+ }
2151
+ }
2152
+
2153
+ /* CONNECT requests can only contain "hostname:port" */
2154
+ if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2155
+ return 1;
2156
+ }
2157
+
2158
+ if (u->field_set & (1 << UF_PORT)) {
2159
+ /* Don't bother with endp; we've already validated the string */
2160
+ unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
2161
+
2162
+ /* Ports have a max value of 2^16 */
2163
+ if (v > 0xffff) {
2164
+ return 1;
2165
+ }
2166
+
2167
+ u->port = (uint16_t) v;
2168
+ }
2169
+
2170
+ return 0;
2171
+ }
2172
+
2173
+ void
2174
+ http_parser_pause(http_parser *parser, int paused) {
2175
+ /* Users should only be pausing/unpausing a parser that is not in an error
2176
+ * state. In non-debug builds, there's not much that we can do about this
2177
+ * other than ignore it.
2178
+ */
2179
+ if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2180
+ HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2181
+ SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2182
+ } else {
2183
+ assert(0 && "Attempting to pause parser in error state");
2184
+ }
2185
+ }
2186
+
2187
+ int
2188
+ http_body_is_final(const struct http_parser *parser) {
2189
+ return parser->state == s_message_done;
1615
2190
  }