http_parser.rb 0.5.3 → 0.6.0.beta.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. data/.gitmodules +3 -3
  2. data/Gemfile +1 -1
  3. data/Gemfile.lock +9 -2
  4. data/README.md +50 -45
  5. data/bench/standalone.rb +23 -0
  6. data/bench/thin.rb +1 -0
  7. data/ext/ruby_http_parser/org/ruby_http_parser/RubyHttpParser.java +66 -58
  8. data/ext/ruby_http_parser/ruby_http_parser.c +10 -41
  9. data/ext/ruby_http_parser/vendor/http-parser-java/AUTHORS +32 -0
  10. data/ext/ruby_http_parser/vendor/http-parser-java/LICENSE-MIT +5 -1
  11. data/ext/ruby_http_parser/vendor/http-parser-java/README.md +133 -1
  12. data/ext/ruby_http_parser/vendor/http-parser-java/TODO +6 -0
  13. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.c +1029 -615
  14. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.gyp +79 -0
  15. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.h +177 -43
  16. data/ext/ruby_http_parser/vendor/http-parser-java/src/Http-parser.java.iml +22 -0
  17. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/FieldData.java +41 -0
  18. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPHeadersCompleteCallback.java +13 -0
  19. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPMethod.java +4 -1
  20. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPParserUrl.java +76 -0
  21. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/ParserSettings.java +2 -2
  22. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/Util.java +6 -6
  23. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPHeadersCompleteCallback.java +12 -0
  24. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPParser.java +715 -637
  25. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/ParserSettings.java +1 -1
  26. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Message.java +71 -21
  27. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/ParseUrl.java +51 -0
  28. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Requests.java +1 -1
  29. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Responses.java +1 -0
  30. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Test.java +2 -1
  31. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestHeaderOverflowError.java +1 -0
  32. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestLoaderNG.java +6 -17
  33. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestNoOverflowLongBody.java +1 -0
  34. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/UnitTest.java +1 -0
  35. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Upgrade.java +1 -0
  36. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Url.java +127 -0
  37. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Util.java +80 -9
  38. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/WrongContentLength.java +2 -1
  39. data/ext/ruby_http_parser/vendor/http-parser-java/test.c +1141 -210
  40. data/ext/ruby_http_parser/vendor/http-parser-java/tests.dumped +230 -71
  41. data/ext/ruby_http_parser/vendor/http-parser/AUTHORS +32 -0
  42. data/ext/ruby_http_parser/vendor/http-parser/LICENSE-MIT +5 -1
  43. data/ext/ruby_http_parser/vendor/http-parser/README.md +9 -2
  44. data/ext/ruby_http_parser/vendor/http-parser/http_parser.c +1029 -615
  45. data/ext/ruby_http_parser/vendor/http-parser/http_parser.gyp +79 -0
  46. data/ext/ruby_http_parser/vendor/http-parser/http_parser.h +145 -16
  47. data/ext/ruby_http_parser/vendor/http-parser/test.c +1065 -141
  48. data/http_parser.rb.gemspec +3 -1
  49. data/spec/parser_spec.rb +41 -17
  50. data/spec/support/requests.json +236 -24
  51. data/spec/support/responses.json +182 -36
  52. data/tasks/compile.rake +2 -2
  53. data/tasks/fixtures.rake +7 -1
  54. metadata +57 -19
  55. data/ext/ruby_http_parser/vendor/http-parser-java/compile +0 -1
  56. data/ext/ruby_http_parser/vendor/http-parser-java/test_permutations +0 -1
  57. data/ext/ruby_http_parser/vendor/http-parser-java/test_unit +0 -1
  58. data/ext/ruby_http_parser/vendor/http-parser-java/test_utf8 +0 -1
@@ -17,9 +17,6 @@ typedef struct ParserWrapper {
17
17
  ryah_http_parser parser;
18
18
 
19
19
  VALUE request_url;
20
- VALUE request_path;
21
- VALUE query_string;
22
- VALUE fragment;
23
20
 
24
21
  VALUE headers;
25
22
 
@@ -49,9 +46,6 @@ void ParserWrapper_init(ParserWrapper *wrapper) {
49
46
  wrapper->parser.http_minor = 0;
50
47
 
51
48
  wrapper->request_url = Qnil;
52
- wrapper->request_path = Qnil;
53
- wrapper->query_string = Qnil;
54
- wrapper->fragment = Qnil;
55
49
 
56
50
  wrapper->upgrade_data = Qnil;
57
51
 
@@ -66,9 +60,6 @@ void ParserWrapper_mark(void *data) {
66
60
  if(data) {
67
61
  ParserWrapper *wrapper = (ParserWrapper *) data;
68
62
  rb_gc_mark_maybe(wrapper->request_url);
69
- rb_gc_mark_maybe(wrapper->request_path);
70
- rb_gc_mark_maybe(wrapper->query_string);
71
- rb_gc_mark_maybe(wrapper->fragment);
72
63
  rb_gc_mark_maybe(wrapper->upgrade_data);
73
64
  rb_gc_mark_maybe(wrapper->headers);
74
65
  rb_gc_mark_maybe(wrapper->on_message_begin);
@@ -111,9 +102,6 @@ int on_message_begin(ryah_http_parser *parser) {
111
102
  GET_WRAPPER(wrapper, parser);
112
103
 
113
104
  wrapper->request_url = rb_str_new2("");
114
- wrapper->request_path = rb_str_new2("");
115
- wrapper->query_string = rb_str_new2("");
116
- wrapper->fragment = rb_str_new2("");
117
105
  wrapper->headers = rb_hash_new();
118
106
  wrapper->upgrade_data = rb_str_new2("");
119
107
 
@@ -139,24 +127,6 @@ int on_url(ryah_http_parser *parser, const char *at, size_t length) {
139
127
  return 0;
140
128
  }
141
129
 
142
- int on_path(ryah_http_parser *parser, const char *at, size_t length) {
143
- GET_WRAPPER(wrapper, parser);
144
- rb_str_cat(wrapper->request_path, at, length);
145
- return 0;
146
- }
147
-
148
- int on_query_string(ryah_http_parser *parser, const char *at, size_t length) {
149
- GET_WRAPPER(wrapper, parser);
150
- rb_str_cat(wrapper->query_string, at, length);
151
- return 0;
152
- }
153
-
154
- int on_fragment(ryah_http_parser *parser, const char *at, size_t length) {
155
- GET_WRAPPER(wrapper, parser);
156
- rb_str_cat(wrapper->fragment, at, length);
157
- return 0;
158
- }
159
-
160
130
  int on_header_field(ryah_http_parser *parser, const char *at, size_t length) {
161
131
  GET_WRAPPER(wrapper, parser);
162
132
 
@@ -278,10 +248,7 @@ int on_message_complete(ryah_http_parser *parser) {
278
248
 
279
249
  static ryah_http_parser_settings settings = {
280
250
  .on_message_begin = on_message_begin,
281
- .on_path = on_path,
282
- .on_query_string = on_query_string,
283
251
  .on_url = on_url,
284
- .on_fragment = on_fragment,
285
252
  .on_header_field = on_header_field,
286
253
  .on_header_value = on_header_value,
287
254
  .on_headers_complete = on_headers_complete,
@@ -318,6 +285,10 @@ VALUE ResponseParser_alloc(VALUE klass) {
318
285
  return Parser_alloc_by_type(klass, HTTP_RESPONSE);
319
286
  }
320
287
 
288
+ VALUE Parser_strict_p(VALUE klass) {
289
+ return HTTP_PARSER_STRICT == 1 ? Qtrue : Qfalse;
290
+ }
291
+
321
292
  VALUE Parser_initialize(int argc, VALUE *argv, VALUE self) {
322
293
  ParserWrapper *wrapper = NULL;
323
294
  DATA_GET(self, ParserWrapper, wrapper);
@@ -349,11 +320,14 @@ VALUE Parser_execute(VALUE self, VALUE data) {
349
320
  size_t nparsed = ryah_http_parser_execute(&wrapper->parser, &settings, ptr, len);
350
321
 
351
322
  if (wrapper->parser.upgrade) {
352
- rb_str_cat(wrapper->upgrade_data, ptr + nparsed + 1, len - nparsed - 1);
323
+ if (RTEST(wrapper->stopped))
324
+ nparsed += 1;
325
+
326
+ rb_str_cat(wrapper->upgrade_data, ptr + nparsed, len - nparsed);
353
327
 
354
328
  } else if (nparsed != (size_t)len) {
355
329
  if (!RTEST(wrapper->stopped) && !RTEST(wrapper->completed))
356
- rb_raise(eParserError, "Could not parse data entirely");
330
+ rb_raise(eParserError, "Could not parse data entirely (%zu != %zu)", nparsed, len);
357
331
  else
358
332
  nparsed += 1; // error states fail on the current character
359
333
  }
@@ -465,9 +439,6 @@ VALUE Parser_status_code(VALUE self) {
465
439
  }
466
440
 
467
441
  DEFINE_GETTER(request_url);
468
- DEFINE_GETTER(request_path);
469
- DEFINE_GETTER(query_string);
470
- DEFINE_GETTER(fragment);
471
442
  DEFINE_GETTER(headers);
472
443
  DEFINE_GETTER(upgrade_data);
473
444
  DEFINE_GETTER(header_value_type);
@@ -515,6 +486,7 @@ void Init_ruby_http_parser() {
515
486
  rb_define_alloc_func(cRequestParser, RequestParser_alloc);
516
487
  rb_define_alloc_func(cResponseParser, ResponseParser_alloc);
517
488
 
489
+ rb_define_singleton_method(cParser, "strict?", Parser_strict_p, 0);
518
490
  rb_define_method(cParser, "initialize", Parser_initialize, -1);
519
491
 
520
492
  rb_define_method(cParser, "on_message_begin=", Parser_set_on_message_begin, 1);
@@ -534,9 +506,6 @@ void Init_ruby_http_parser() {
534
506
  rb_define_method(cParser, "status_code", Parser_status_code, 0);
535
507
 
536
508
  rb_define_method(cParser, "request_url", Parser_request_url, 0);
537
- rb_define_method(cParser, "request_path", Parser_request_path, 0);
538
- rb_define_method(cParser, "query_string", Parser_query_string, 0);
539
- rb_define_method(cParser, "fragment", Parser_fragment, 0);
540
509
  rb_define_method(cParser, "headers", Parser_headers, 0);
541
510
  rb_define_method(cParser, "upgrade_data", Parser_upgrade_data, 0);
542
511
  rb_define_method(cParser, "header_value_type", Parser_header_value_type, 0);
@@ -0,0 +1,32 @@
1
+ # Authors ordered by first contribution.
2
+ Ryan Dahl <ry@tinyclouds.org>
3
+ Jeremy Hinegardner <jeremy@hinegardner.org>
4
+ Sergey Shepelev <temotor@gmail.com>
5
+ Joe Damato <ice799@gmail.com>
6
+ tomika <tomika_nospam@freemail.hu>
7
+ Phoenix Sol <phoenix@burninglabs.com>
8
+ Cliff Frey <cliff@meraki.com>
9
+ Ewen Cheslack-Postava <ewencp@cs.stanford.edu>
10
+ Santiago Gala <sgala@apache.org>
11
+ Tim Becker <tim.becker@syngenio.de>
12
+ Jeff Terrace <jterrace@gmail.com>
13
+ Ben Noordhuis <info@bnoordhuis.nl>
14
+ Nathan Rajlich <nathan@tootallnate.net>
15
+ Mark Nottingham <mnot@mnot.net>
16
+ Aman Gupta <aman@tmm1.net>
17
+ Tim Becker <tim.becker@kuriositaet.de>
18
+ Sean Cunningham <sean.cunningham@mandiant.com>
19
+ Peter Griess <pg@std.in>
20
+ Salman Haq <salman.haq@asti-usa.com>
21
+ Cliff Frey <clifffrey@gmail.com>
22
+ Jon Kolb <jon@b0g.us>
23
+ Fouad Mardini <f.mardini@gmail.com>
24
+ Paul Querna <pquerna@apache.org>
25
+ Felix Geisendörfer <felix@debuggable.com>
26
+ koichik <koichik@improvement.jp>
27
+ Andre Caron <andre.l.caron@gmail.com>
28
+ Ivo Raisr <ivosh@ivosh.net>
29
+ James McLaughlin <jamie@lacewing-project.org>
30
+ David Gwynne <loki@animata.net>
31
+ LE ROUX Thomas <thomas@procheo.fr>
32
+ Randy Rizun <rrizun@ortivawireless.com>
@@ -23,7 +23,11 @@ IN THE SOFTWARE.
23
23
  This code mainly based on code with the following license:
24
24
 
25
25
 
26
- Copyright Joyent, Inc. and other Node contributors. All rights reserved.
26
+ http_parser.c is based on src/http/ngx_http_parse.c from NGINX copyright
27
+ Igor Sysoev.
28
+
29
+ Additional changes are licensed under the same terms as NGINX and
30
+ copyright Joyent, Inc. and other Node contributors. All rights reserved.
27
31
 
28
32
  Permission is hereby granted, free of charge, to any person obtaining a copy
29
33
  of this software and associated documentation files (the "Software"), to
@@ -24,7 +24,7 @@ The parser extracts the following information from HTTP messages:
24
24
  * Response status code
25
25
  * Transfer-Encoding
26
26
  * HTTP version
27
- * Request path, query string, fragment
27
+ * Request URL
28
28
  * Message body
29
29
 
30
30
  Building
@@ -49,3 +49,135 @@ Usage
49
49
  help or have suggestions, feel free to contact me at
50
50
  (tim.becker@kuriositaet.de).
51
51
 
52
+
53
+ One `http_parser` object is used per TCP connection. Initialize the struct
54
+ using `http_parser_init()` and set the callbacks. That might look something
55
+ like this for a request parser:
56
+
57
+ http_parser_settings settings;
58
+ settings.on_path = my_path_callback;
59
+ settings.on_header_field = my_header_field_callback;
60
+ /* ... */
61
+
62
+ http_parser *parser = malloc(sizeof(http_parser));
63
+ http_parser_init(parser, HTTP_REQUEST);
64
+ parser->data = my_socket;
65
+
66
+ When data is received on the socket execute the parser and check for errors.
67
+
68
+ size_t len = 80*1024, nparsed;
69
+ char buf[len];
70
+ ssize_t recved;
71
+
72
+ recved = recv(fd, buf, len, 0);
73
+
74
+ if (recved < 0) {
75
+ /* Handle error. */
76
+ }
77
+
78
+ /* Start up / continue the parser.
79
+ * Note we pass recved==0 to signal that EOF has been recieved.
80
+ */
81
+ nparsed = http_parser_execute(parser, &settings, buf, recved);
82
+
83
+ if (parser->upgrade) {
84
+ /* handle new protocol */
85
+ } else if (nparsed != recved) {
86
+ /* Handle error. Usually just close the connection. */
87
+ }
88
+
89
+ HTTP needs to know where the end of the stream is. For example, sometimes
90
+ servers send responses without Content-Length and expect the client to
91
+ consume input (for the body) until EOF. To tell http_parser about EOF, give
92
+ `0` as the forth parameter to `http_parser_execute()`. Callbacks and errors
93
+ can still be encountered during an EOF, so one must still be prepared
94
+ to receive them.
95
+
96
+ Scalar valued message information such as `status_code`, `method`, and the
97
+ HTTP version are stored in the parser structure. This data is only
98
+ temporally stored in `http_parser` and gets reset on each new message. If
99
+ this information is needed later, copy it out of the structure during the
100
+ `headers_complete` callback.
101
+
102
+ The parser decodes the transfer-encoding for both requests and responses
103
+ transparently. That is, a chunked encoding is decoded before being sent to
104
+ the on_body callback.
105
+
106
+
107
+ The Special Problem of Upgrade
108
+ ------------------------------
109
+
110
+ HTTP supports upgrading the connection to a different protocol. An
111
+ increasingly common example of this is the Web Socket protocol which sends
112
+ a request like
113
+
114
+ GET /demo HTTP/1.1
115
+ Upgrade: WebSocket
116
+ Connection: Upgrade
117
+ Host: example.com
118
+ Origin: http://example.com
119
+ WebSocket-Protocol: sample
120
+
121
+ followed by non-HTTP data.
122
+
123
+ (See http://tools.ietf.org/html/draft-hixie-thewebsocketprotocol-75 for more
124
+ information the Web Socket protocol.)
125
+
126
+ To support this, the parser will treat this as a normal HTTP message without a
127
+ body. Issuing both on_headers_complete and on_message_complete callbacks. However
128
+ http_parser_execute() will stop parsing at the end of the headers and return.
129
+
130
+ The user is expected to check if `parser->upgrade` has been set to 1 after
131
+ `http_parser_execute()` returns. Non-HTTP data begins at the buffer supplied
132
+ offset by the return value of `http_parser_execute()`.
133
+
134
+
135
+ Callbacks
136
+ ---------
137
+
138
+ During the `http_parser_execute()` call, the callbacks set in
139
+ `http_parser_settings` will be executed. The parser maintains state and
140
+ never looks behind, so buffering the data is not necessary. If you need to
141
+ save certain data for later usage, you can do that from the callbacks.
142
+
143
+ There are two types of callbacks:
144
+
145
+ * notification `typedef int (*http_cb) (http_parser*);`
146
+ Callbacks: on_message_begin, on_headers_complete, on_message_complete.
147
+ * data `typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);`
148
+ Callbacks: (requests only) on_uri,
149
+ (common) on_header_field, on_header_value, on_body;
150
+
151
+ Callbacks must return 0 on success. Returning a non-zero value indicates
152
+ error to the parser, making it exit immediately.
153
+
154
+ In case you parse HTTP message in chunks (i.e. `read()` request line
155
+ from socket, parse, read half headers, parse, etc) your data callbacks
156
+ may be called more than once. Http-parser guarantees that data pointer is only
157
+ valid for the lifetime of callback. You can also `read()` into a heap allocated
158
+ buffer to avoid copying memory around if this fits your application.
159
+
160
+ Reading headers may be a tricky task if you read/parse headers partially.
161
+ Basically, you need to remember whether last header callback was field or value
162
+ and apply following logic:
163
+
164
+ (on_header_field and on_header_value shortened to on_h_*)
165
+ ------------------------ ------------ --------------------------------------------
166
+ | State (prev. callback) | Callback | Description/action |
167
+ ------------------------ ------------ --------------------------------------------
168
+ | nothing (first call) | on_h_field | Allocate new buffer and copy callback data |
169
+ | | | into it |
170
+ ------------------------ ------------ --------------------------------------------
171
+ | value | on_h_field | New header started. |
172
+ | | | Copy current name,value buffers to headers |
173
+ | | | list and allocate new buffer for new name |
174
+ ------------------------ ------------ --------------------------------------------
175
+ | field | on_h_field | Previous name continues. Reallocate name |
176
+ | | | buffer and append callback data to it |
177
+ ------------------------ ------------ --------------------------------------------
178
+ | field | on_h_value | Value for current header started. Allocate |
179
+ | | | new buffer and copy callback data to it |
180
+ ------------------------ ------------ --------------------------------------------
181
+ | value | on_h_value | Value continues. Reallocate value buffer |
182
+ | | | and append callback data to it |
183
+ ------------------------ ------------ --------------------------------------------
@@ -1,4 +1,10 @@
1
+ decide how to handle errs per default:
2
+ - ry: "set state to dead", return `read`
3
+ - current: call on_error w/ details, if no on_error handler set,
4
+ throw Exception, else call on_error and behave like orig...
5
+
1
6
  some tests from test.c left to port
7
+ (scan ...)
2
8
  documentation
3
9
 
4
10
  hi level callback interface
@@ -1,4 +1,7 @@
1
- /* Copyright Joyent, Inc. and other Node contributors. All rights reserved.
1
+ /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
2
+ *
3
+ * Additional changes are licensed under the same terms as NGINX and
4
+ * copyright Joyent, Inc. and other Node contributors. All rights reserved.
2
5
  *
3
6
  * Permission is hereby granted, free of charge, to any person obtaining a copy
4
7
  * of this software and associated documentation files (the "Software"), to
@@ -18,48 +21,94 @@
18
21
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19
22
  * IN THE SOFTWARE.
20
23
  */
21
- #include <http_parser.h>
24
+ #include "http_parser.h"
22
25
  #include <assert.h>
23
26
  #include <stddef.h>
27
+ #include <ctype.h>
28
+ #include <stdlib.h>
29
+ #include <string.h>
30
+ #include <limits.h>
24
31
 
32
+ #ifndef ULLONG_MAX
33
+ # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
34
+ #endif
25
35
 
26
36
  #ifndef MIN
27
37
  # define MIN(a,b) ((a) < (b) ? (a) : (b))
28
38
  #endif
29
39
 
30
40
 
31
- #define CALLBACK2(FOR) \
41
+ #if HTTP_PARSER_DEBUG
42
+ #define SET_ERRNO(e) \
32
43
  do { \
33
- if (settings->on_##FOR) { \
34
- if (0 != settings->on_##FOR(parser)) return (p - data); \
35
- } \
44
+ parser->http_errno = (e); \
45
+ parser->error_lineno = __LINE__; \
36
46
  } while (0)
47
+ #else
48
+ #define SET_ERRNO(e) \
49
+ do { \
50
+ parser->http_errno = (e); \
51
+ } while(0)
52
+ #endif
37
53
 
38
54
 
39
- #define MARK(FOR) \
55
+ /* Run the notify callback FOR, returning ER if it fails */
56
+ #define CALLBACK_NOTIFY_(FOR, ER) \
40
57
  do { \
41
- FOR##_mark = p; \
58
+ assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
59
+ \
60
+ if (settings->on_##FOR) { \
61
+ if (0 != settings->on_##FOR(parser)) { \
62
+ SET_ERRNO(HPE_CB_##FOR); \
63
+ } \
64
+ \
65
+ /* We either errored above or got paused; get out */ \
66
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
67
+ return (ER); \
68
+ } \
69
+ } \
42
70
  } while (0)
43
71
 
44
- #define CALLBACK_NOCLEAR(FOR) \
72
+ /* Run the notify callback FOR and consume the current byte */
73
+ #define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1)
74
+
75
+ /* Run the notify callback FOR and don't consume the current byte */
76
+ #define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data)
77
+
78
+ /* Run data callback FOR with LEN bytes, returning ER if it fails */
79
+ #define CALLBACK_DATA_(FOR, LEN, ER) \
45
80
  do { \
81
+ assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
82
+ \
46
83
  if (FOR##_mark) { \
47
84
  if (settings->on_##FOR) { \
48
- if (0 != settings->on_##FOR(parser, \
49
- FOR##_mark, \
50
- p - FOR##_mark)) \
51
- { \
52
- return (p - data); \
85
+ if (0 != settings->on_##FOR(parser, FOR##_mark, (LEN))) { \
86
+ SET_ERRNO(HPE_CB_##FOR); \
87
+ } \
88
+ \
89
+ /* We either errored above or got paused; get out */ \
90
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
91
+ return (ER); \
53
92
  } \
54
93
  } \
94
+ FOR##_mark = NULL; \
55
95
  } \
56
96
  } while (0)
97
+
98
+ /* Run the data callback FOR and consume the current byte */
99
+ #define CALLBACK_DATA(FOR) \
100
+ CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
57
101
 
102
+ /* Run the data callback FOR and don't consume the current byte */
103
+ #define CALLBACK_DATA_NOADVANCE(FOR) \
104
+ CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
58
105
 
59
- #define CALLBACK(FOR) \
106
+ /* Set the mark FOR; non-destructive if mark is already set */
107
+ #define MARK(FOR) \
60
108
  do { \
61
- CALLBACK_NOCLEAR(FOR); \
62
- FOR##_mark = NULL; \
109
+ if (!FOR##_mark) { \
110
+ FOR##_mark = p; \
111
+ } \
63
112
  } while (0)
64
113
 
65
114
 
@@ -97,6 +146,8 @@ static const char *method_strings[] =
97
146
  , "NOTIFY"
98
147
  , "SUBSCRIBE"
99
148
  , "UNSUBSCRIBE"
149
+ , "PATCH"
150
+ , "PURGE"
100
151
  };
101
152
 
102
153
 
@@ -117,9 +168,9 @@ static const char tokens[256] = {
117
168
  /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
118
169
  0, 0, 0, 0, 0, 0, 0, 0,
119
170
  /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
120
- ' ', '!', '"', '#', '$', '%', '&', '\'',
171
+ 0, '!', 0, '#', '$', '%', '&', '\'',
121
172
  /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
122
- 0, 0, '*', '+', 0, '-', '.', '/',
173
+ 0, 0, '*', '+', 0, '-', '.', 0,
123
174
  /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
124
175
  '0', '1', '2', '3', '4', '5', '6', '7',
125
176
  /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
@@ -139,7 +190,7 @@ static const char tokens[256] = {
139
190
  /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
140
191
  'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
141
192
  /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
142
- 'x', 'y', 'z', 0, '|', '}', '~', 0 };
193
+ 'x', 'y', 'z', 0, '|', 0, '~', 0 };
143
194
 
144
195
 
145
196
  static const int8_t unhex[256] =
@@ -186,28 +237,7 @@ static const uint8_t normal_url_char[256] = {
186
237
  /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
187
238
  1, 1, 1, 1, 1, 1, 1, 1,
188
239
  /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
189
- 1, 1, 1, 1, 1, 1, 1, 0,
190
-
191
- /* Remainder of non-ASCII range are accepted as-is to support implicitly UTF-8
192
- encoded paths. This is out of spec, but clients generate this and most other
193
- HTTP servers support it. We should, too. */
194
-
195
- 1, 1, 1, 1, 1, 1, 1, 1,
196
- 1, 1, 1, 1, 1, 1, 1, 1,
197
- 1, 1, 1, 1, 1, 1, 1, 1,
198
- 1, 1, 1, 1, 1, 1, 1, 1,
199
- 1, 1, 1, 1, 1, 1, 1, 1,
200
- 1, 1, 1, 1, 1, 1, 1, 1,
201
- 1, 1, 1, 1, 1, 1, 1, 1,
202
- 1, 1, 1, 1, 1, 1, 1, 1,
203
- 1, 1, 1, 1, 1, 1, 1, 1,
204
- 1, 1, 1, 1, 1, 1, 1, 1,
205
- 1, 1, 1, 1, 1, 1, 1, 1,
206
- 1, 1, 1, 1, 1, 1, 1, 1,
207
- 1, 1, 1, 1, 1, 1, 1, 1,
208
- 1, 1, 1, 1, 1, 1, 1, 1,
209
- 1, 1, 1, 1, 1, 1, 1, 1,
210
- 1, 1, 1, 1, 1, 1, 1, 1 };
240
+ 1, 1, 1, 1, 1, 1, 1, 0, };
211
241
 
212
242
 
213
243
  enum state
@@ -236,7 +266,12 @@ enum state
236
266
  , s_req_schema
237
267
  , s_req_schema_slash
238
268
  , s_req_schema_slash_slash
269
+ , s_req_host_start
270
+ , s_req_host_v6_start
271
+ , s_req_host_v6
272
+ , s_req_host_v6_end
239
273
  , s_req_host
274
+ , s_req_port_start
240
275
  , s_req_port
241
276
  , s_req_path
242
277
  , s_req_query_string_start
@@ -258,6 +293,7 @@ enum state
258
293
  , s_header_field
259
294
  , s_header_value_start
260
295
  , s_header_value
296
+ , s_header_value_lws
261
297
 
262
298
  , s_header_almost_done
263
299
 
@@ -265,9 +301,11 @@ enum state
265
301
  , s_chunk_size
266
302
  , s_chunk_parameters
267
303
  , s_chunk_size_almost_done
268
-
304
+
269
305
  , s_headers_almost_done
270
- /* Important: 's_headers_almost_done' must be the last 'header' state. All
306
+ , s_headers_done
307
+
308
+ /* Important: 's_headers_done' must be the last 'header' state. All
271
309
  * states beyond this must be 'body' states. It is used for overflow
272
310
  * checking. See the PARSING_HEADER() macro.
273
311
  */
@@ -278,10 +316,12 @@ enum state
278
316
 
279
317
  , s_body_identity
280
318
  , s_body_identity_eof
319
+
320
+ , s_message_done
281
321
  };
282
322
 
283
323
 
284
- #define PARSING_HEADER(state) (state <= s_headers_almost_done)
324
+ #define PARSING_HEADER(state) (state <= s_headers_done)
285
325
 
286
326
 
287
327
  enum header_states
@@ -311,27 +351,39 @@ enum header_states
311
351
  };
312
352
 
313
353
 
314
- enum flags
315
- { F_CHUNKED = 1 << 0
316
- , F_CONNECTION_KEEP_ALIVE = 1 << 1
317
- , F_CONNECTION_CLOSE = 1 << 2
318
- , F_TRAILING = 1 << 3
319
- , F_UPGRADE = 1 << 4
320
- , F_SKIPBODY = 1 << 5
321
- };
322
-
354
+ /* Macros for character classes; depends on strict-mode */
355
+ #define CR '\r'
356
+ #define LF '\n'
357
+ #define LOWER(c) (unsigned char)(c | 0x20)
358
+ #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
359
+ #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
360
+ #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
361
+ #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
323
362
 
324
- #define CR '\r'
325
- #define LF '\n'
326
- #define LOWER(c) (unsigned char)(c | 0x20)
327
- #define TOKEN(c) tokens[(unsigned char)c]
363
+ #if HTTP_PARSER_STRICT
364
+ #define TOKEN(c) (tokens[(unsigned char)c])
365
+ #define IS_URL_CHAR(c) (normal_url_char[(unsigned char) (c)])
366
+ #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
367
+ #else
368
+ #define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
369
+ #define IS_URL_CHAR(c) \
370
+ (normal_url_char[(unsigned char) (c)] || ((c) & 0x80))
371
+ #define IS_HOST_CHAR(c) \
372
+ (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
373
+ #endif
328
374
 
329
375
 
330
376
  #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
331
377
 
332
378
 
333
379
  #if HTTP_PARSER_STRICT
334
- # define STRICT_CHECK(cond) if (cond) goto error
380
+ # define STRICT_CHECK(cond) \
381
+ do { \
382
+ if (cond) { \
383
+ SET_ERRNO(HPE_STRICT); \
384
+ goto error; \
385
+ } \
386
+ } while (0)
335
387
  # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
336
388
  #else
337
389
  # define STRICT_CHECK(cond)
@@ -339,24 +391,228 @@ enum flags
339
391
  #endif
340
392
 
341
393
 
394
+ /* Map errno values to strings for human-readable output */
395
+ #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
396
+ static struct {
397
+ const char *name;
398
+ const char *description;
399
+ } http_strerror_tab[] = {
400
+ HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
401
+ };
402
+ #undef HTTP_STRERROR_GEN
403
+
404
+ int http_message_needs_eof(http_parser *parser);
405
+
406
+ /* Our URL parser.
407
+ *
408
+ * This is designed to be shared by http_parser_execute() for URL validation,
409
+ * hence it has a state transition + byte-for-byte interface. In addition, it
410
+ * is meant to be embedded in http_parser_parse_url(), which does the dirty
411
+ * work of turning state transitions URL components for its API.
412
+ *
413
+ * This function should only be invoked with non-space characters. It is
414
+ * assumed that the caller cares about (and can detect) the transition between
415
+ * URL and non-URL states by looking for these.
416
+ */
417
+ static enum state
418
+ parse_url_char(enum state s, const char ch)
419
+ {
420
+ assert(!isspace(ch));
421
+
422
+ switch (s) {
423
+ case s_req_spaces_before_url:
424
+ /* Proxied requests are followed by scheme of an absolute URI (alpha).
425
+ * All methods except CONNECT are followed by '/' or '*'.
426
+ */
427
+
428
+ if (ch == '/' || ch == '*') {
429
+ return s_req_path;
430
+ }
431
+
432
+ if (IS_ALPHA(ch)) {
433
+ return s_req_schema;
434
+ }
435
+
436
+ break;
437
+
438
+ case s_req_schema:
439
+ if (IS_ALPHA(ch)) {
440
+ return s;
441
+ }
442
+
443
+ if (ch == ':') {
444
+ return s_req_schema_slash;
445
+ }
446
+
447
+ break;
448
+
449
+ case s_req_schema_slash:
450
+ if (ch == '/') {
451
+ return s_req_schema_slash_slash;
452
+ }
453
+
454
+ break;
455
+
456
+ case s_req_schema_slash_slash:
457
+ if (ch == '/') {
458
+ return s_req_host_start;
459
+ }
460
+
461
+ break;
462
+
463
+ case s_req_host_start:
464
+ if (ch == '[') {
465
+ return s_req_host_v6_start;
466
+ }
467
+
468
+ if (IS_HOST_CHAR(ch)) {
469
+ return s_req_host;
470
+ }
471
+
472
+ break;
473
+
474
+ case s_req_host:
475
+ if (IS_HOST_CHAR(ch)) {
476
+ return s_req_host;
477
+ }
478
+
479
+ /* FALLTHROUGH */
480
+ case s_req_host_v6_end:
481
+ switch (ch) {
482
+ case ':':
483
+ return s_req_port_start;
484
+
485
+ case '/':
486
+ return s_req_path;
487
+
488
+ case '?':
489
+ return s_req_query_string_start;
490
+ }
491
+
492
+ break;
493
+
494
+ case s_req_host_v6:
495
+ if (ch == ']') {
496
+ return s_req_host_v6_end;
497
+ }
498
+
499
+ /* FALLTHROUGH */
500
+ case s_req_host_v6_start:
501
+ if (IS_HEX(ch) || ch == ':') {
502
+ return s_req_host_v6;
503
+ }
504
+ break;
505
+
506
+ case s_req_port:
507
+ switch (ch) {
508
+ case '/':
509
+ return s_req_path;
510
+
511
+ case '?':
512
+ return s_req_query_string_start;
513
+ }
514
+
515
+ /* FALLTHROUGH */
516
+ case s_req_port_start:
517
+ if (IS_NUM(ch)) {
518
+ return s_req_port;
519
+ }
520
+
521
+ break;
522
+
523
+ case s_req_path:
524
+ if (IS_URL_CHAR(ch)) {
525
+ return s;
526
+ }
527
+
528
+ switch (ch) {
529
+ case '?':
530
+ return s_req_query_string_start;
531
+
532
+ case '#':
533
+ return s_req_fragment_start;
534
+ }
535
+
536
+ break;
537
+
538
+ case s_req_query_string_start:
539
+ case s_req_query_string:
540
+ if (IS_URL_CHAR(ch)) {
541
+ return s_req_query_string;
542
+ }
543
+
544
+ switch (ch) {
545
+ case '?':
546
+ /* allow extra '?' in query string */
547
+ return s_req_query_string;
548
+
549
+ case '#':
550
+ return s_req_fragment_start;
551
+ }
552
+
553
+ break;
554
+
555
+ case s_req_fragment_start:
556
+ if (IS_URL_CHAR(ch)) {
557
+ return s_req_fragment;
558
+ }
559
+
560
+ switch (ch) {
561
+ case '?':
562
+ return s_req_fragment;
563
+
564
+ case '#':
565
+ return s;
566
+ }
567
+
568
+ break;
569
+
570
+ case s_req_fragment:
571
+ if (IS_URL_CHAR(ch)) {
572
+ return s;
573
+ }
574
+
575
+ switch (ch) {
576
+ case '?':
577
+ case '#':
578
+ return s;
579
+ }
580
+
581
+ break;
582
+
583
+ default:
584
+ break;
585
+ }
586
+
587
+ /* We should never fall out of the switch above unless there's an error */
588
+ return s_dead;
589
+ }
590
+
342
591
  size_t http_parser_execute (http_parser *parser,
343
592
  const http_parser_settings *settings,
344
593
  const char *data,
345
594
  size_t len)
346
595
  {
347
596
  char c, ch;
348
- const char *p = data, *pe;
349
- int64_t to_read;
597
+ int8_t unhex_val;
598
+ const char *p = data;
599
+ const char *header_field_mark = 0;
600
+ const char *header_value_mark = 0;
601
+ const char *url_mark = 0;
602
+ const char *body_mark = 0;
350
603
 
351
- enum state state = (enum state) parser->state;
352
- enum header_states header_state = (enum header_states) parser->header_state;
353
- uint64_t index = parser->index;
354
- uint64_t nread = parser->nread;
604
+ /* We're in an error state. Don't bother doing anything. */
605
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
606
+ return 0;
607
+ }
355
608
 
356
609
  if (len == 0) {
357
- switch (state) {
610
+ switch (parser->state) {
358
611
  case s_body_identity_eof:
359
- CALLBACK2(message_complete);
612
+ /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
613
+ * we got paused.
614
+ */
615
+ CALLBACK_NOTIFY_NOADVANCE(message_complete);
360
616
  return 0;
361
617
 
362
618
  case s_dead:
@@ -366,52 +622,59 @@ size_t http_parser_execute (http_parser *parser,
366
622
  return 0;
367
623
 
368
624
  default:
369
- return 1; // error
625
+ SET_ERRNO(HPE_INVALID_EOF_STATE);
626
+ return 1;
370
627
  }
371
628
  }
372
629
 
373
- /* technically we could combine all of these (except for url_mark) into one
374
- variable, saving stack space, but it seems more clear to have them
375
- separated. */
376
- const char *header_field_mark = 0;
377
- const char *header_value_mark = 0;
378
- const char *fragment_mark = 0;
379
- const char *query_string_mark = 0;
380
- const char *path_mark = 0;
381
- const char *url_mark = 0;
382
630
 
383
- if (state == s_header_field)
631
+ if (parser->state == s_header_field)
384
632
  header_field_mark = data;
385
- if (state == s_header_value)
633
+ if (parser->state == s_header_value)
386
634
  header_value_mark = data;
387
- if (state == s_req_fragment)
388
- fragment_mark = data;
389
- if (state == s_req_query_string)
390
- query_string_mark = data;
391
- if (state == s_req_path)
392
- path_mark = data;
393
- if (state == s_req_path || state == s_req_schema || state == s_req_schema_slash
394
- || state == s_req_schema_slash_slash || state == s_req_port
395
- || state == s_req_query_string_start || state == s_req_query_string
396
- || state == s_req_host
397
- || state == s_req_fragment_start || state == s_req_fragment)
635
+ switch (parser->state) {
636
+ case s_req_path:
637
+ case s_req_schema:
638
+ case s_req_schema_slash:
639
+ case s_req_schema_slash_slash:
640
+ case s_req_host_start:
641
+ case s_req_host_v6_start:
642
+ case s_req_host_v6:
643
+ case s_req_host_v6_end:
644
+ case s_req_host:
645
+ case s_req_port_start:
646
+ case s_req_port:
647
+ case s_req_query_string_start:
648
+ case s_req_query_string:
649
+ case s_req_fragment_start:
650
+ case s_req_fragment:
398
651
  url_mark = data;
652
+ break;
653
+ }
399
654
 
400
- for (p=data, pe=data+len; p != pe; p++) {
655
+ for (p=data; p != data + len; p++) {
401
656
  ch = *p;
402
657
 
403
- if (PARSING_HEADER(state)) {
404
- ++nread;
658
+ if (PARSING_HEADER(parser->state)) {
659
+ ++parser->nread;
405
660
  /* Buffer overflow attack */
406
- if (nread > HTTP_MAX_HEADER_SIZE) goto error;
661
+ if (parser->nread > HTTP_MAX_HEADER_SIZE) {
662
+ SET_ERRNO(HPE_HEADER_OVERFLOW);
663
+ goto error;
664
+ }
407
665
  }
408
666
 
409
- switch (state) {
667
+ reexecute_byte:
668
+ switch (parser->state) {
410
669
 
411
670
  case s_dead:
412
671
  /* this state is used after a 'Connection: close' message
413
672
  * the parser will error out if it reads another message
414
673
  */
674
+ if (ch == CR || ch == LF)
675
+ break;
676
+
677
+ SET_ERRNO(HPE_CLOSED_CONNECTION);
415
678
  goto error;
416
679
 
417
680
  case s_start_req_or_res:
@@ -419,42 +682,46 @@ size_t http_parser_execute (http_parser *parser,
419
682
  if (ch == CR || ch == LF)
420
683
  break;
421
684
  parser->flags = 0;
422
- parser->content_length = -1;
685
+ parser->content_length = ULLONG_MAX;
423
686
 
424
- CALLBACK2(message_begin);
687
+ if (ch == 'H') {
688
+ parser->state = s_res_or_resp_H;
425
689
 
426
- if (ch == 'H')
427
- state = s_res_or_resp_H;
428
- else {
690
+ CALLBACK_NOTIFY(message_begin);
691
+ } else {
429
692
  parser->type = HTTP_REQUEST;
430
- goto start_req_method_assign;
693
+ parser->state = s_start_req;
694
+ goto reexecute_byte;
431
695
  }
696
+
432
697
  break;
433
698
  }
434
699
 
435
700
  case s_res_or_resp_H:
436
701
  if (ch == 'T') {
437
702
  parser->type = HTTP_RESPONSE;
438
- state = s_res_HT;
703
+ parser->state = s_res_HT;
439
704
  } else {
440
- if (ch != 'E') goto error;
705
+ if (ch != 'E') {
706
+ SET_ERRNO(HPE_INVALID_CONSTANT);
707
+ goto error;
708
+ }
709
+
441
710
  parser->type = HTTP_REQUEST;
442
711
  parser->method = HTTP_HEAD;
443
- index = 2;
444
- state = s_req_method;
712
+ parser->index = 2;
713
+ parser->state = s_req_method;
445
714
  }
446
715
  break;
447
716
 
448
717
  case s_start_res:
449
718
  {
450
719
  parser->flags = 0;
451
- parser->content_length = -1;
452
-
453
- CALLBACK2(message_begin);
720
+ parser->content_length = ULLONG_MAX;
454
721
 
455
722
  switch (ch) {
456
723
  case 'H':
457
- state = s_res_H;
724
+ parser->state = s_res_H;
458
725
  break;
459
726
 
460
727
  case CR:
@@ -462,105 +729,133 @@ size_t http_parser_execute (http_parser *parser,
462
729
  break;
463
730
 
464
731
  default:
732
+ SET_ERRNO(HPE_INVALID_CONSTANT);
465
733
  goto error;
466
734
  }
735
+
736
+ CALLBACK_NOTIFY(message_begin);
467
737
  break;
468
738
  }
469
739
 
470
740
  case s_res_H:
471
741
  STRICT_CHECK(ch != 'T');
472
- state = s_res_HT;
742
+ parser->state = s_res_HT;
473
743
  break;
474
744
 
475
745
  case s_res_HT:
476
746
  STRICT_CHECK(ch != 'T');
477
- state = s_res_HTT;
747
+ parser->state = s_res_HTT;
478
748
  break;
479
749
 
480
750
  case s_res_HTT:
481
751
  STRICT_CHECK(ch != 'P');
482
- state = s_res_HTTP;
752
+ parser->state = s_res_HTTP;
483
753
  break;
484
754
 
485
755
  case s_res_HTTP:
486
756
  STRICT_CHECK(ch != '/');
487
- state = s_res_first_http_major;
757
+ parser->state = s_res_first_http_major;
488
758
  break;
489
759
 
490
760
  case s_res_first_http_major:
491
- if (ch < '1' || ch > '9') goto error;
761
+ if (ch < '0' || ch > '9') {
762
+ SET_ERRNO(HPE_INVALID_VERSION);
763
+ goto error;
764
+ }
765
+
492
766
  parser->http_major = ch - '0';
493
- state = s_res_http_major;
767
+ parser->state = s_res_http_major;
494
768
  break;
495
769
 
496
770
  /* major HTTP version or dot */
497
771
  case s_res_http_major:
498
772
  {
499
773
  if (ch == '.') {
500
- state = s_res_first_http_minor;
774
+ parser->state = s_res_first_http_minor;
501
775
  break;
502
776
  }
503
777
 
504
- if (ch < '0' || ch > '9') goto error;
778
+ if (!IS_NUM(ch)) {
779
+ SET_ERRNO(HPE_INVALID_VERSION);
780
+ goto error;
781
+ }
505
782
 
506
783
  parser->http_major *= 10;
507
784
  parser->http_major += ch - '0';
508
785
 
509
- if (parser->http_major > 999) goto error;
786
+ if (parser->http_major > 999) {
787
+ SET_ERRNO(HPE_INVALID_VERSION);
788
+ goto error;
789
+ }
790
+
510
791
  break;
511
792
  }
512
793
 
513
794
  /* first digit of minor HTTP version */
514
795
  case s_res_first_http_minor:
515
- if (ch < '0' || ch > '9') goto error;
796
+ if (!IS_NUM(ch)) {
797
+ SET_ERRNO(HPE_INVALID_VERSION);
798
+ goto error;
799
+ }
800
+
516
801
  parser->http_minor = ch - '0';
517
- state = s_res_http_minor;
802
+ parser->state = s_res_http_minor;
518
803
  break;
519
804
 
520
805
  /* minor HTTP version or end of request line */
521
806
  case s_res_http_minor:
522
807
  {
523
808
  if (ch == ' ') {
524
- state = s_res_first_status_code;
809
+ parser->state = s_res_first_status_code;
525
810
  break;
526
811
  }
527
812
 
528
- if (ch < '0' || ch > '9') goto error;
813
+ if (!IS_NUM(ch)) {
814
+ SET_ERRNO(HPE_INVALID_VERSION);
815
+ goto error;
816
+ }
529
817
 
530
818
  parser->http_minor *= 10;
531
819
  parser->http_minor += ch - '0';
532
820
 
533
- if (parser->http_minor > 999) goto error;
821
+ if (parser->http_minor > 999) {
822
+ SET_ERRNO(HPE_INVALID_VERSION);
823
+ goto error;
824
+ }
825
+
534
826
  break;
535
827
  }
536
828
 
537
829
  case s_res_first_status_code:
538
830
  {
539
- if (ch < '0' || ch > '9') {
831
+ if (!IS_NUM(ch)) {
540
832
  if (ch == ' ') {
541
833
  break;
542
834
  }
835
+
836
+ SET_ERRNO(HPE_INVALID_STATUS);
543
837
  goto error;
544
838
  }
545
839
  parser->status_code = ch - '0';
546
- state = s_res_status_code;
840
+ parser->state = s_res_status_code;
547
841
  break;
548
842
  }
549
843
 
550
844
  case s_res_status_code:
551
845
  {
552
- if (ch < '0' || ch > '9') {
846
+ if (!IS_NUM(ch)) {
553
847
  switch (ch) {
554
848
  case ' ':
555
- state = s_res_status;
849
+ parser->state = s_res_status;
556
850
  break;
557
851
  case CR:
558
- state = s_res_line_almost_done;
852
+ parser->state = s_res_line_almost_done;
559
853
  break;
560
854
  case LF:
561
- state = s_header_field_start;
855
+ parser->state = s_header_field_start;
562
856
  break;
563
857
  default:
858
+ SET_ERRNO(HPE_INVALID_STATUS);
564
859
  goto error;
565
860
  }
566
861
  break;
@@ -569,7 +864,11 @@ size_t http_parser_execute (http_parser *parser,
569
864
  parser->status_code *= 10;
570
865
  parser->status_code += ch - '0';
571
866
 
572
- if (parser->status_code > 999) goto error;
867
+ if (parser->status_code > 999) {
868
+ SET_ERRNO(HPE_INVALID_STATUS);
869
+ goto error;
870
+ }
871
+
573
872
  break;
574
873
  }
575
874
 
@@ -577,19 +876,19 @@ size_t http_parser_execute (http_parser *parser,
577
876
  /* the human readable status. e.g. "NOT FOUND"
578
877
  * we are not humans so just ignore this */
579
878
  if (ch == CR) {
580
- state = s_res_line_almost_done;
879
+ parser->state = s_res_line_almost_done;
581
880
  break;
582
881
  }
583
882
 
584
883
  if (ch == LF) {
585
- state = s_header_field_start;
884
+ parser->state = s_header_field_start;
586
885
  break;
587
886
  }
588
887
  break;
589
888
 
590
889
  case s_res_line_almost_done:
591
890
  STRICT_CHECK(ch != LF);
592
- state = s_header_field_start;
891
+ parser->state = s_header_field_start;
593
892
  break;
594
893
 
595
894
  case s_start_req:
@@ -597,15 +896,15 @@ size_t http_parser_execute (http_parser *parser,
597
896
  if (ch == CR || ch == LF)
598
897
  break;
599
898
  parser->flags = 0;
600
- parser->content_length = -1;
899
+ parser->content_length = ULLONG_MAX;
601
900
 
602
- CALLBACK2(message_begin);
603
-
604
- if (ch < 'A' || 'Z' < ch) goto error;
901
+ if (!IS_ALPHA(ch)) {
902
+ SET_ERRNO(HPE_INVALID_METHOD);
903
+ goto error;
904
+ }
605
905
 
606
- start_req_method_assign:
607
906
  parser->method = (enum http_method) 0;
608
- index = 1;
907
+ parser->index = 1;
609
908
  switch (ch) {
610
909
  case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
611
910
  case 'D': parser->method = HTTP_DELETE; break;
@@ -615,342 +914,157 @@ size_t http_parser_execute (http_parser *parser,
615
914
  case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
616
915
  case 'N': parser->method = HTTP_NOTIFY; break;
617
916
  case 'O': parser->method = HTTP_OPTIONS; break;
618
- case 'P': parser->method = HTTP_POST; /* or PROPFIND or PROPPATCH or PUT */ break;
917
+ case 'P': parser->method = HTTP_POST;
918
+ /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
919
+ break;
619
920
  case 'R': parser->method = HTTP_REPORT; break;
620
921
  case 'S': parser->method = HTTP_SUBSCRIBE; break;
621
922
  case 'T': parser->method = HTTP_TRACE; break;
622
923
  case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
623
- default: goto error;
924
+ default:
925
+ SET_ERRNO(HPE_INVALID_METHOD);
926
+ goto error;
624
927
  }
625
- state = s_req_method;
928
+ parser->state = s_req_method;
929
+
930
+ CALLBACK_NOTIFY(message_begin);
931
+
626
932
  break;
627
933
  }
628
934
 
629
935
  case s_req_method:
630
936
  {
631
- if (ch == '\0')
937
+ const char *matcher;
938
+ if (ch == '\0') {
939
+ SET_ERRNO(HPE_INVALID_METHOD);
632
940
  goto error;
941
+ }
633
942
 
634
- const char *matcher = method_strings[parser->method];
635
- if (ch == ' ' && matcher[index] == '\0') {
636
- state = s_req_spaces_before_url;
637
- } else if (ch == matcher[index]) {
943
+ matcher = method_strings[parser->method];
944
+ if (ch == ' ' && matcher[parser->index] == '\0') {
945
+ parser->state = s_req_spaces_before_url;
946
+ } else if (ch == matcher[parser->index]) {
638
947
  ; /* nada */
639
948
  } else if (parser->method == HTTP_CONNECT) {
640
- if (index == 1 && ch == 'H') {
949
+ if (parser->index == 1 && ch == 'H') {
641
950
  parser->method = HTTP_CHECKOUT;
642
- } else if (index == 2 && ch == 'P') {
951
+ } else if (parser->index == 2 && ch == 'P') {
643
952
  parser->method = HTTP_COPY;
953
+ } else {
954
+ goto error;
644
955
  }
645
956
  } else if (parser->method == HTTP_MKCOL) {
646
- if (index == 1 && ch == 'O') {
957
+ if (parser->index == 1 && ch == 'O') {
647
958
  parser->method = HTTP_MOVE;
648
- } else if (index == 1 && ch == 'E') {
959
+ } else if (parser->index == 1 && ch == 'E') {
649
960
  parser->method = HTTP_MERGE;
650
- } else if (index == 1 && ch == '-') {
961
+ } else if (parser->index == 1 && ch == '-') {
651
962
  parser->method = HTTP_MSEARCH;
652
- } else if (index == 2 && ch == 'A') {
963
+ } else if (parser->index == 2 && ch == 'A') {
653
964
  parser->method = HTTP_MKACTIVITY;
965
+ } else {
966
+ goto error;
654
967
  }
655
- } else if (index == 1 && parser->method == HTTP_POST && ch == 'R') {
656
- parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
657
- } else if (index == 1 && parser->method == HTTP_POST && ch == 'U') {
658
- parser->method = HTTP_PUT;
659
- } else if (index == 2 && parser->method == HTTP_UNLOCK && ch == 'S') {
660
- parser->method = HTTP_UNSUBSCRIBE;
661
- } else if (index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
968
+ } else if (parser->index == 1 && parser->method == HTTP_POST) {
969
+ if (ch == 'R') {
970
+ parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
971
+ } else if (ch == 'U') {
972
+ parser->method = HTTP_PUT; /* or HTTP_PURGE */
973
+ } else if (ch == 'A') {
974
+ parser->method = HTTP_PATCH;
975
+ } else {
976
+ goto error;
977
+ }
978
+ } else if (parser->index == 2) {
979
+ if (parser->method == HTTP_PUT) {
980
+ if (ch == 'R') parser->method = HTTP_PURGE;
981
+ } else if (parser->method == HTTP_UNLOCK) {
982
+ if (ch == 'S') parser->method = HTTP_UNSUBSCRIBE;
983
+ }
984
+ } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
662
985
  parser->method = HTTP_PROPPATCH;
663
986
  } else {
987
+ SET_ERRNO(HPE_INVALID_METHOD);
664
988
  goto error;
665
989
  }
666
990
 
667
- ++index;
991
+ ++parser->index;
668
992
  break;
669
993
  }
994
+
670
995
  case s_req_spaces_before_url:
671
996
  {
672
997
  if (ch == ' ') break;
673
998
 
674
- if (ch == '/' || ch == '*') {
675
- MARK(url);
676
- MARK(path);
677
- state = s_req_path;
678
- break;
999
+ MARK(url);
1000
+ if (parser->method == HTTP_CONNECT) {
1001
+ parser->state = s_req_host_start;
679
1002
  }
680
1003
 
681
- c = LOWER(ch);
682
-
683
- if (c >= 'a' && c <= 'z') {
684
- MARK(url);
685
- state = s_req_schema;
686
- break;
1004
+ parser->state = parse_url_char((enum state)parser->state, ch);
1005
+ if (parser->state == s_dead) {
1006
+ SET_ERRNO(HPE_INVALID_URL);
1007
+ goto error;
687
1008
  }
688
1009
 
689
- goto error;
1010
+ break;
690
1011
  }
691
1012
 
692
1013
  case s_req_schema:
693
- {
694
- c = LOWER(ch);
695
-
696
- if (c >= 'a' && c <= 'z') break;
697
-
698
- if (ch == ':') {
699
- state = s_req_schema_slash;
700
- break;
701
- } else if (ch == '.') {
702
- state = s_req_host;
703
- break;
704
- } else if ('0' <= ch && ch <= '9') {
705
- state = s_req_host;
706
- break;
707
- }
708
-
709
- goto error;
710
- }
711
-
712
1014
  case s_req_schema_slash:
713
- STRICT_CHECK(ch != '/');
714
- state = s_req_schema_slash_slash;
715
- break;
716
-
717
1015
  case s_req_schema_slash_slash:
718
- STRICT_CHECK(ch != '/');
719
- state = s_req_host;
720
- break;
721
-
722
- case s_req_host:
723
- {
724
- c = LOWER(ch);
725
- if (c >= 'a' && c <= 'z') break;
726
- if ((ch >= '0' && ch <= '9') || ch == '.' || ch == '-') break;
727
- switch (ch) {
728
- case ':':
729
- state = s_req_port;
730
- break;
731
- case '/':
732
- MARK(path);
733
- state = s_req_path;
734
- break;
735
- case ' ':
736
- /* The request line looks like:
737
- * "GET http://foo.bar.com HTTP/1.1"
738
- * That is, there is no path.
739
- */
740
- CALLBACK(url);
741
- state = s_req_http_start;
742
- break;
743
- case '?':
744
- state = s_req_query_string_start;
745
- break;
746
- default:
747
- goto error;
748
- }
749
- break;
750
- }
751
-
752
- case s_req_port:
1016
+ case s_req_host_start:
1017
+ case s_req_host_v6_start:
1018
+ case s_req_host_v6:
1019
+ case s_req_port_start:
753
1020
  {
754
- if (ch >= '0' && ch <= '9') break;
755
1021
  switch (ch) {
756
- case '/':
757
- MARK(path);
758
- state = s_req_path;
759
- break;
1022
+ /* No whitespace allowed here */
760
1023
  case ' ':
761
- /* The request line looks like:
762
- * "GET http://foo.bar.com:1234 HTTP/1.1"
763
- * That is, there is no path.
764
- */
765
- CALLBACK(url);
766
- state = s_req_http_start;
767
- break;
768
- case '?':
769
- state = s_req_query_string_start;
770
- break;
771
- default:
772
- goto error;
773
- }
774
- break;
775
- }
776
-
777
- case s_req_path:
778
- {
779
- if (normal_url_char[(unsigned char)ch]) break;
780
-
781
- switch (ch) {
782
- case ' ':
783
- CALLBACK(url);
784
- CALLBACK(path);
785
- state = s_req_http_start;
786
- break;
787
1024
  case CR:
788
- CALLBACK(url);
789
- CALLBACK(path);
790
- parser->http_major = 0;
791
- parser->http_minor = 9;
792
- state = s_req_line_almost_done;
793
- break;
794
1025
  case LF:
795
- CALLBACK(url);
796
- CALLBACK(path);
797
- parser->http_major = 0;
798
- parser->http_minor = 9;
799
- state = s_header_field_start;
800
- break;
801
- case '?':
802
- CALLBACK(path);
803
- state = s_req_query_string_start;
804
- break;
805
- case '#':
806
- CALLBACK(path);
807
- state = s_req_fragment_start;
808
- break;
809
- default:
1026
+ SET_ERRNO(HPE_INVALID_URL);
810
1027
  goto error;
811
- }
812
- break;
813
- }
814
-
815
- case s_req_query_string_start:
816
- {
817
- if (normal_url_char[(unsigned char)ch]) {
818
- MARK(query_string);
819
- state = s_req_query_string;
820
- break;
821
- }
822
-
823
- switch (ch) {
824
- case '?':
825
- break; /* XXX ignore extra '?' ... is this right? */
826
- case ' ':
827
- CALLBACK(url);
828
- state = s_req_http_start;
829
- break;
830
- case CR:
831
- CALLBACK(url);
832
- parser->http_major = 0;
833
- parser->http_minor = 9;
834
- state = s_req_line_almost_done;
835
- break;
836
- case LF:
837
- CALLBACK(url);
838
- parser->http_major = 0;
839
- parser->http_minor = 9;
840
- state = s_header_field_start;
841
- break;
842
- case '#':
843
- state = s_req_fragment_start;
844
- break;
845
1028
  default:
846
- goto error;
1029
+ parser->state = parse_url_char((enum state)parser->state, ch);
1030
+ if (parser->state == s_dead) {
1031
+ SET_ERRNO(HPE_INVALID_URL);
1032
+ goto error;
1033
+ }
847
1034
  }
848
- break;
849
- }
850
1035
 
851
- case s_req_query_string:
852
- {
853
- if (normal_url_char[(unsigned char)ch]) break;
854
-
855
- switch (ch) {
856
- case '?':
857
- /* allow extra '?' in query string */
858
- break;
859
- case ' ':
860
- CALLBACK(url);
861
- CALLBACK(query_string);
862
- state = s_req_http_start;
863
- break;
864
- case CR:
865
- CALLBACK(url);
866
- CALLBACK(query_string);
867
- parser->http_major = 0;
868
- parser->http_minor = 9;
869
- state = s_req_line_almost_done;
870
- break;
871
- case LF:
872
- CALLBACK(url);
873
- CALLBACK(query_string);
874
- parser->http_major = 0;
875
- parser->http_minor = 9;
876
- state = s_header_field_start;
877
- break;
878
- case '#':
879
- CALLBACK(query_string);
880
- state = s_req_fragment_start;
881
- break;
882
- default:
883
- goto error;
884
- }
885
1036
  break;
886
1037
  }
887
1038
 
1039
+ case s_req_host:
1040
+ case s_req_host_v6_end:
1041
+ case s_req_port:
1042
+ case s_req_path:
1043
+ case s_req_query_string_start:
1044
+ case s_req_query_string:
888
1045
  case s_req_fragment_start:
889
- {
890
- if (normal_url_char[(unsigned char)ch]) {
891
- MARK(fragment);
892
- state = s_req_fragment;
893
- break;
894
- }
895
-
896
- switch (ch) {
897
- case ' ':
898
- CALLBACK(url);
899
- state = s_req_http_start;
900
- break;
901
- case CR:
902
- CALLBACK(url);
903
- parser->http_major = 0;
904
- parser->http_minor = 9;
905
- state = s_req_line_almost_done;
906
- break;
907
- case LF:
908
- CALLBACK(url);
909
- parser->http_major = 0;
910
- parser->http_minor = 9;
911
- state = s_header_field_start;
912
- break;
913
- case '?':
914
- MARK(fragment);
915
- state = s_req_fragment;
916
- break;
917
- case '#':
918
- break;
919
- default:
920
- goto error;
921
- }
922
- break;
923
- }
924
-
925
1046
  case s_req_fragment:
926
1047
  {
927
- if (normal_url_char[(unsigned char)ch]) break;
928
-
929
1048
  switch (ch) {
930
1049
  case ' ':
931
- CALLBACK(url);
932
- CALLBACK(fragment);
933
- state = s_req_http_start;
1050
+ parser->state = s_req_http_start;
1051
+ CALLBACK_DATA(url);
934
1052
  break;
935
1053
  case CR:
936
- CALLBACK(url);
937
- CALLBACK(fragment);
938
- parser->http_major = 0;
939
- parser->http_minor = 9;
940
- state = s_req_line_almost_done;
941
- break;
942
1054
  case LF:
943
- CALLBACK(url);
944
- CALLBACK(fragment);
945
1055
  parser->http_major = 0;
946
1056
  parser->http_minor = 9;
947
- state = s_header_field_start;
948
- break;
949
- case '?':
950
- case '#':
1057
+ parser->state = (ch == CR) ?
1058
+ s_req_line_almost_done :
1059
+ s_header_field_start;
1060
+ CALLBACK_DATA(url);
951
1061
  break;
952
1062
  default:
953
- goto error;
1063
+ parser->state = parse_url_char((enum state)parser->state, ch);
1064
+ if (parser->state == s_dead) {
1065
+ SET_ERRNO(HPE_INVALID_URL);
1066
+ goto error;
1067
+ }
954
1068
  }
955
1069
  break;
956
1070
  }
@@ -958,140 +1072,170 @@ size_t http_parser_execute (http_parser *parser,
958
1072
  case s_req_http_start:
959
1073
  switch (ch) {
960
1074
  case 'H':
961
- state = s_req_http_H;
1075
+ parser->state = s_req_http_H;
962
1076
  break;
963
1077
  case ' ':
964
1078
  break;
965
1079
  default:
1080
+ SET_ERRNO(HPE_INVALID_CONSTANT);
966
1081
  goto error;
967
1082
  }
968
1083
  break;
969
1084
 
970
1085
  case s_req_http_H:
971
1086
  STRICT_CHECK(ch != 'T');
972
- state = s_req_http_HT;
1087
+ parser->state = s_req_http_HT;
973
1088
  break;
974
1089
 
975
1090
  case s_req_http_HT:
976
1091
  STRICT_CHECK(ch != 'T');
977
- state = s_req_http_HTT;
1092
+ parser->state = s_req_http_HTT;
978
1093
  break;
979
1094
 
980
1095
  case s_req_http_HTT:
981
1096
  STRICT_CHECK(ch != 'P');
982
- state = s_req_http_HTTP;
1097
+ parser->state = s_req_http_HTTP;
983
1098
  break;
984
1099
 
985
1100
  case s_req_http_HTTP:
986
1101
  STRICT_CHECK(ch != '/');
987
- state = s_req_first_http_major;
1102
+ parser->state = s_req_first_http_major;
988
1103
  break;
989
1104
 
990
1105
  /* first digit of major HTTP version */
991
1106
  case s_req_first_http_major:
992
- if (ch < '1' || ch > '9') goto error;
1107
+ if (ch < '1' || ch > '9') {
1108
+ SET_ERRNO(HPE_INVALID_VERSION);
1109
+ goto error;
1110
+ }
1111
+
993
1112
  parser->http_major = ch - '0';
994
- state = s_req_http_major;
1113
+ parser->state = s_req_http_major;
995
1114
  break;
996
1115
 
997
1116
  /* major HTTP version or dot */
998
1117
  case s_req_http_major:
999
1118
  {
1000
1119
  if (ch == '.') {
1001
- state = s_req_first_http_minor;
1120
+ parser->state = s_req_first_http_minor;
1002
1121
  break;
1003
1122
  }
1004
1123
 
1005
- if (ch < '0' || ch > '9') goto error;
1124
+ if (!IS_NUM(ch)) {
1125
+ SET_ERRNO(HPE_INVALID_VERSION);
1126
+ goto error;
1127
+ }
1006
1128
 
1007
1129
  parser->http_major *= 10;
1008
1130
  parser->http_major += ch - '0';
1009
1131
 
1010
- if (parser->http_major > 999) goto error;
1132
+ if (parser->http_major > 999) {
1133
+ SET_ERRNO(HPE_INVALID_VERSION);
1134
+ goto error;
1135
+ }
1136
+
1011
1137
  break;
1012
1138
  }
1013
1139
 
1014
1140
  /* first digit of minor HTTP version */
1015
1141
  case s_req_first_http_minor:
1016
- if (ch < '0' || ch > '9') goto error;
1142
+ if (!IS_NUM(ch)) {
1143
+ SET_ERRNO(HPE_INVALID_VERSION);
1144
+ goto error;
1145
+ }
1146
+
1017
1147
  parser->http_minor = ch - '0';
1018
- state = s_req_http_minor;
1148
+ parser->state = s_req_http_minor;
1019
1149
  break;
1020
1150
 
1021
1151
  /* minor HTTP version or end of request line */
1022
1152
  case s_req_http_minor:
1023
1153
  {
1024
1154
  if (ch == CR) {
1025
- state = s_req_line_almost_done;
1155
+ parser->state = s_req_line_almost_done;
1026
1156
  break;
1027
1157
  }
1028
1158
 
1029
1159
  if (ch == LF) {
1030
- state = s_header_field_start;
1160
+ parser->state = s_header_field_start;
1031
1161
  break;
1032
1162
  }
1033
1163
 
1034
1164
  /* XXX allow spaces after digit? */
1035
1165
 
1036
- if (ch < '0' || ch > '9') goto error;
1166
+ if (!IS_NUM(ch)) {
1167
+ SET_ERRNO(HPE_INVALID_VERSION);
1168
+ goto error;
1169
+ }
1037
1170
 
1038
1171
  parser->http_minor *= 10;
1039
1172
  parser->http_minor += ch - '0';
1040
1173
 
1041
- if (parser->http_minor > 999) goto error;
1174
+ if (parser->http_minor > 999) {
1175
+ SET_ERRNO(HPE_INVALID_VERSION);
1176
+ goto error;
1177
+ }
1178
+
1042
1179
  break;
1043
1180
  }
1044
1181
 
1045
1182
  /* end of request line */
1046
1183
  case s_req_line_almost_done:
1047
1184
  {
1048
- if (ch != LF) goto error;
1049
- state = s_header_field_start;
1185
+ if (ch != LF) {
1186
+ SET_ERRNO(HPE_LF_EXPECTED);
1187
+ goto error;
1188
+ }
1189
+
1190
+ parser->state = s_header_field_start;
1050
1191
  break;
1051
1192
  }
1052
1193
 
1053
1194
  case s_header_field_start:
1054
1195
  {
1055
1196
  if (ch == CR) {
1056
- state = s_headers_almost_done;
1197
+ parser->state = s_headers_almost_done;
1057
1198
  break;
1058
1199
  }
1059
1200
 
1060
1201
  if (ch == LF) {
1061
1202
  /* they might be just sending \n instead of \r\n so this would be
1062
1203
  * the second \n to denote the end of headers*/
1063
- state = s_headers_almost_done;
1064
- goto headers_almost_done;
1204
+ parser->state = s_headers_almost_done;
1205
+ goto reexecute_byte;
1065
1206
  }
1066
1207
 
1067
1208
  c = TOKEN(ch);
1068
1209
 
1069
- if (!c) goto error;
1210
+ if (!c) {
1211
+ SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1212
+ goto error;
1213
+ }
1070
1214
 
1071
1215
  MARK(header_field);
1072
1216
 
1073
- index = 0;
1074
- state = s_header_field;
1217
+ parser->index = 0;
1218
+ parser->state = s_header_field;
1075
1219
 
1076
1220
  switch (c) {
1077
1221
  case 'c':
1078
- header_state = h_C;
1222
+ parser->header_state = h_C;
1079
1223
  break;
1080
1224
 
1081
1225
  case 'p':
1082
- header_state = h_matching_proxy_connection;
1226
+ parser->header_state = h_matching_proxy_connection;
1083
1227
  break;
1084
1228
 
1085
1229
  case 't':
1086
- header_state = h_matching_transfer_encoding;
1230
+ parser->header_state = h_matching_transfer_encoding;
1087
1231
  break;
1088
1232
 
1089
1233
  case 'u':
1090
- header_state = h_matching_upgrade;
1234
+ parser->header_state = h_matching_upgrade;
1091
1235
  break;
1092
1236
 
1093
1237
  default:
1094
- header_state = h_general;
1238
+ parser->header_state = h_general;
1095
1239
  break;
1096
1240
  }
1097
1241
  break;
@@ -1102,31 +1246,31 @@ size_t http_parser_execute (http_parser *parser,
1102
1246
  c = TOKEN(ch);
1103
1247
 
1104
1248
  if (c) {
1105
- switch (header_state) {
1249
+ switch (parser->header_state) {
1106
1250
  case h_general:
1107
1251
  break;
1108
1252
 
1109
1253
  case h_C:
1110
- index++;
1111
- header_state = (c == 'o' ? h_CO : h_general);
1254
+ parser->index++;
1255
+ parser->header_state = (c == 'o' ? h_CO : h_general);
1112
1256
  break;
1113
1257
 
1114
1258
  case h_CO:
1115
- index++;
1116
- header_state = (c == 'n' ? h_CON : h_general);
1259
+ parser->index++;
1260
+ parser->header_state = (c == 'n' ? h_CON : h_general);
1117
1261
  break;
1118
1262
 
1119
1263
  case h_CON:
1120
- index++;
1264
+ parser->index++;
1121
1265
  switch (c) {
1122
1266
  case 'n':
1123
- header_state = h_matching_connection;
1267
+ parser->header_state = h_matching_connection;
1124
1268
  break;
1125
1269
  case 't':
1126
- header_state = h_matching_content_length;
1270
+ parser->header_state = h_matching_content_length;
1127
1271
  break;
1128
1272
  default:
1129
- header_state = h_general;
1273
+ parser->header_state = h_general;
1130
1274
  break;
1131
1275
  }
1132
1276
  break;
@@ -1134,60 +1278,60 @@ size_t http_parser_execute (http_parser *parser,
1134
1278
  /* connection */
1135
1279
 
1136
1280
  case h_matching_connection:
1137
- index++;
1138
- if (index > sizeof(CONNECTION)-1
1139
- || c != CONNECTION[index]) {
1140
- header_state = h_general;
1141
- } else if (index == sizeof(CONNECTION)-2) {
1142
- header_state = h_connection;
1281
+ parser->index++;
1282
+ if (parser->index > sizeof(CONNECTION)-1
1283
+ || c != CONNECTION[parser->index]) {
1284
+ parser->header_state = h_general;
1285
+ } else if (parser->index == sizeof(CONNECTION)-2) {
1286
+ parser->header_state = h_connection;
1143
1287
  }
1144
1288
  break;
1145
1289
 
1146
1290
  /* proxy-connection */
1147
1291
 
1148
1292
  case h_matching_proxy_connection:
1149
- index++;
1150
- if (index > sizeof(PROXY_CONNECTION)-1
1151
- || c != PROXY_CONNECTION[index]) {
1152
- header_state = h_general;
1153
- } else if (index == sizeof(PROXY_CONNECTION)-2) {
1154
- header_state = h_connection;
1293
+ parser->index++;
1294
+ if (parser->index > sizeof(PROXY_CONNECTION)-1
1295
+ || c != PROXY_CONNECTION[parser->index]) {
1296
+ parser->header_state = h_general;
1297
+ } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1298
+ parser->header_state = h_connection;
1155
1299
  }
1156
1300
  break;
1157
1301
 
1158
1302
  /* content-length */
1159
1303
 
1160
1304
  case h_matching_content_length:
1161
- index++;
1162
- if (index > sizeof(CONTENT_LENGTH)-1
1163
- || c != CONTENT_LENGTH[index]) {
1164
- header_state = h_general;
1165
- } else if (index == sizeof(CONTENT_LENGTH)-2) {
1166
- header_state = h_content_length;
1305
+ parser->index++;
1306
+ if (parser->index > sizeof(CONTENT_LENGTH)-1
1307
+ || c != CONTENT_LENGTH[parser->index]) {
1308
+ parser->header_state = h_general;
1309
+ } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1310
+ parser->header_state = h_content_length;
1167
1311
  }
1168
1312
  break;
1169
1313
 
1170
1314
  /* transfer-encoding */
1171
1315
 
1172
1316
  case h_matching_transfer_encoding:
1173
- index++;
1174
- if (index > sizeof(TRANSFER_ENCODING)-1
1175
- || c != TRANSFER_ENCODING[index]) {
1176
- header_state = h_general;
1177
- } else if (index == sizeof(TRANSFER_ENCODING)-2) {
1178
- header_state = h_transfer_encoding;
1317
+ parser->index++;
1318
+ if (parser->index > sizeof(TRANSFER_ENCODING)-1
1319
+ || c != TRANSFER_ENCODING[parser->index]) {
1320
+ parser->header_state = h_general;
1321
+ } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1322
+ parser->header_state = h_transfer_encoding;
1179
1323
  }
1180
1324
  break;
1181
1325
 
1182
1326
  /* upgrade */
1183
1327
 
1184
1328
  case h_matching_upgrade:
1185
- index++;
1186
- if (index > sizeof(UPGRADE)-1
1187
- || c != UPGRADE[index]) {
1188
- header_state = h_general;
1189
- } else if (index == sizeof(UPGRADE)-2) {
1190
- header_state = h_upgrade;
1329
+ parser->index++;
1330
+ if (parser->index > sizeof(UPGRADE)-1
1331
+ || c != UPGRADE[parser->index]) {
1332
+ parser->header_state = h_general;
1333
+ } else if (parser->index == sizeof(UPGRADE)-2) {
1334
+ parser->header_state = h_upgrade;
1191
1335
  }
1192
1336
  break;
1193
1337
 
@@ -1195,7 +1339,7 @@ size_t http_parser_execute (http_parser *parser,
1195
1339
  case h_content_length:
1196
1340
  case h_transfer_encoding:
1197
1341
  case h_upgrade:
1198
- if (ch != ' ') header_state = h_general;
1342
+ if (ch != ' ') parser->header_state = h_general;
1199
1343
  break;
1200
1344
 
1201
1345
  default:
@@ -1206,84 +1350,89 @@ size_t http_parser_execute (http_parser *parser,
1206
1350
  }
1207
1351
 
1208
1352
  if (ch == ':') {
1209
- CALLBACK(header_field);
1210
- state = s_header_value_start;
1353
+ parser->state = s_header_value_start;
1354
+ CALLBACK_DATA(header_field);
1211
1355
  break;
1212
1356
  }
1213
1357
 
1214
1358
  if (ch == CR) {
1215
- state = s_header_almost_done;
1216
- CALLBACK(header_field);
1359
+ parser->state = s_header_almost_done;
1360
+ CALLBACK_DATA(header_field);
1217
1361
  break;
1218
1362
  }
1219
1363
 
1220
1364
  if (ch == LF) {
1221
- CALLBACK(header_field);
1222
- state = s_header_field_start;
1365
+ parser->state = s_header_field_start;
1366
+ CALLBACK_DATA(header_field);
1223
1367
  break;
1224
1368
  }
1225
1369
 
1370
+ SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1226
1371
  goto error;
1227
1372
  }
1228
1373
 
1229
1374
  case s_header_value_start:
1230
1375
  {
1231
- if (ch == ' ') break;
1376
+ if (ch == ' ' || ch == '\t') break;
1232
1377
 
1233
1378
  MARK(header_value);
1234
1379
 
1235
- state = s_header_value;
1236
- index = 0;
1237
-
1238
- c = LOWER(ch);
1380
+ parser->state = s_header_value;
1381
+ parser->index = 0;
1239
1382
 
1240
1383
  if (ch == CR) {
1241
- CALLBACK(header_value);
1242
- header_state = h_general;
1243
- state = s_header_almost_done;
1384
+ parser->header_state = h_general;
1385
+ parser->state = s_header_almost_done;
1386
+ CALLBACK_DATA(header_value);
1244
1387
  break;
1245
1388
  }
1246
1389
 
1247
1390
  if (ch == LF) {
1248
- CALLBACK(header_value);
1249
- state = s_header_field_start;
1391
+ parser->state = s_header_field_start;
1392
+ CALLBACK_DATA(header_value);
1250
1393
  break;
1251
1394
  }
1252
1395
 
1253
- switch (header_state) {
1396
+ c = LOWER(ch);
1397
+
1398
+ switch (parser->header_state) {
1254
1399
  case h_upgrade:
1255
1400
  parser->flags |= F_UPGRADE;
1256
- header_state = h_general;
1401
+ parser->header_state = h_general;
1257
1402
  break;
1258
1403
 
1259
1404
  case h_transfer_encoding:
1260
1405
  /* looking for 'Transfer-Encoding: chunked' */
1261
1406
  if ('c' == c) {
1262
- header_state = h_matching_transfer_encoding_chunked;
1407
+ parser->header_state = h_matching_transfer_encoding_chunked;
1263
1408
  } else {
1264
- header_state = h_general;
1409
+ parser->header_state = h_general;
1265
1410
  }
1266
1411
  break;
1267
1412
 
1268
1413
  case h_content_length:
1269
- if (ch < '0' || ch > '9') goto error;
1414
+ if (!IS_NUM(ch)) {
1415
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1416
+ goto error;
1417
+ }
1418
+
1270
1419
  parser->content_length = ch - '0';
1271
1420
  break;
1272
1421
 
1273
1422
  case h_connection:
1274
1423
  /* looking for 'Connection: keep-alive' */
1275
1424
  if (c == 'k') {
1276
- header_state = h_matching_connection_keep_alive;
1425
+ parser->header_state = h_matching_connection_keep_alive;
1277
1426
  /* looking for 'Connection: close' */
1278
1427
  } else if (c == 'c') {
1279
- header_state = h_matching_connection_close;
1428
+ parser->header_state = h_matching_connection_close;
1280
1429
  } else {
1281
- header_state = h_general;
1430
+ parser->header_state = h_general;
1282
1431
  }
1283
1432
  break;
1284
1433
 
1285
1434
  default:
1286
- header_state = h_general;
1435
+ parser->header_state = h_general;
1287
1436
  break;
1288
1437
  }
1289
1438
  break;
@@ -1291,20 +1440,22 @@ size_t http_parser_execute (http_parser *parser,
1291
1440
 
1292
1441
  case s_header_value:
1293
1442
  {
1294
- c = LOWER(ch);
1295
1443
 
1296
1444
  if (ch == CR) {
1297
- CALLBACK(header_value);
1298
- state = s_header_almost_done;
1445
+ parser->state = s_header_almost_done;
1446
+ CALLBACK_DATA(header_value);
1299
1447
  break;
1300
1448
  }
1301
1449
 
1302
1450
  if (ch == LF) {
1303
- CALLBACK(header_value);
1304
- goto header_almost_done;
1451
+ parser->state = s_header_almost_done;
1452
+ CALLBACK_DATA_NOADVANCE(header_value);
1453
+ goto reexecute_byte;
1305
1454
  }
1306
1455
 
1307
- switch (header_state) {
1456
+ c = LOWER(ch);
1457
+
1458
+ switch (parser->header_state) {
1308
1459
  case h_general:
1309
1460
  break;
1310
1461
 
@@ -1314,66 +1465,83 @@ size_t http_parser_execute (http_parser *parser,
1314
1465
  break;
1315
1466
 
1316
1467
  case h_content_length:
1468
+ {
1469
+ uint64_t t;
1470
+
1317
1471
  if (ch == ' ') break;
1318
- if (ch < '0' || ch > '9') goto error;
1319
- parser->content_length *= 10;
1320
- parser->content_length += ch - '0';
1472
+
1473
+ if (!IS_NUM(ch)) {
1474
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1475
+ goto error;
1476
+ }
1477
+
1478
+ t = parser->content_length;
1479
+ t *= 10;
1480
+ t += ch - '0';
1481
+
1482
+ /* Overflow? */
1483
+ if (t < parser->content_length || t == ULLONG_MAX) {
1484
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1485
+ goto error;
1486
+ }
1487
+
1488
+ parser->content_length = t;
1321
1489
  break;
1490
+ }
1322
1491
 
1323
1492
  /* Transfer-Encoding: chunked */
1324
1493
  case h_matching_transfer_encoding_chunked:
1325
- index++;
1326
- if (index > sizeof(CHUNKED)-1
1327
- || c != CHUNKED[index]) {
1328
- header_state = h_general;
1329
- } else if (index == sizeof(CHUNKED)-2) {
1330
- header_state = h_transfer_encoding_chunked;
1494
+ parser->index++;
1495
+ if (parser->index > sizeof(CHUNKED)-1
1496
+ || c != CHUNKED[parser->index]) {
1497
+ parser->header_state = h_general;
1498
+ } else if (parser->index == sizeof(CHUNKED)-2) {
1499
+ parser->header_state = h_transfer_encoding_chunked;
1331
1500
  }
1332
1501
  break;
1333
1502
 
1334
1503
  /* looking for 'Connection: keep-alive' */
1335
1504
  case h_matching_connection_keep_alive:
1336
- index++;
1337
- if (index > sizeof(KEEP_ALIVE)-1
1338
- || c != KEEP_ALIVE[index]) {
1339
- header_state = h_general;
1340
- } else if (index == sizeof(KEEP_ALIVE)-2) {
1341
- header_state = h_connection_keep_alive;
1505
+ parser->index++;
1506
+ if (parser->index > sizeof(KEEP_ALIVE)-1
1507
+ || c != KEEP_ALIVE[parser->index]) {
1508
+ parser->header_state = h_general;
1509
+ } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1510
+ parser->header_state = h_connection_keep_alive;
1342
1511
  }
1343
1512
  break;
1344
1513
 
1345
1514
  /* looking for 'Connection: close' */
1346
1515
  case h_matching_connection_close:
1347
- index++;
1348
- if (index > sizeof(CLOSE)-1 || c != CLOSE[index]) {
1349
- header_state = h_general;
1350
- } else if (index == sizeof(CLOSE)-2) {
1351
- header_state = h_connection_close;
1516
+ parser->index++;
1517
+ if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1518
+ parser->header_state = h_general;
1519
+ } else if (parser->index == sizeof(CLOSE)-2) {
1520
+ parser->header_state = h_connection_close;
1352
1521
  }
1353
1522
  break;
1354
1523
 
1355
1524
  case h_transfer_encoding_chunked:
1356
1525
  case h_connection_keep_alive:
1357
1526
  case h_connection_close:
1358
- if (ch != ' ') header_state = h_general;
1527
+ if (ch != ' ') parser->header_state = h_general;
1359
1528
  break;
1360
1529
 
1361
1530
  default:
1362
- state = s_header_value;
1363
- header_state = h_general;
1531
+ parser->state = s_header_value;
1532
+ parser->header_state = h_general;
1364
1533
  break;
1365
1534
  }
1366
1535
  break;
1367
1536
  }
1368
1537
 
1369
1538
  case s_header_almost_done:
1370
- header_almost_done:
1371
1539
  {
1372
1540
  STRICT_CHECK(ch != LF);
1373
1541
 
1374
- state = s_header_field_start;
1542
+ parser->state = s_header_value_lws;
1375
1543
 
1376
- switch (header_state) {
1544
+ switch (parser->header_state) {
1377
1545
  case h_connection_keep_alive:
1378
1546
  parser->flags |= F_CONNECTION_KEEP_ALIVE;
1379
1547
  break;
@@ -1386,32 +1554,47 @@ size_t http_parser_execute (http_parser *parser,
1386
1554
  default:
1387
1555
  break;
1388
1556
  }
1557
+
1558
+ break;
1559
+ }
1560
+
1561
+ case s_header_value_lws:
1562
+ {
1563
+ if (ch == ' ' || ch == '\t')
1564
+ parser->state = s_header_value_start;
1565
+ else
1566
+ {
1567
+ parser->state = s_header_field_start;
1568
+ goto reexecute_byte;
1569
+ }
1389
1570
  break;
1390
1571
  }
1391
1572
 
1392
1573
  case s_headers_almost_done:
1393
- headers_almost_done:
1394
1574
  {
1395
1575
  STRICT_CHECK(ch != LF);
1396
1576
 
1397
1577
  if (parser->flags & F_TRAILING) {
1398
1578
  /* End of a chunked request */
1399
- CALLBACK2(message_complete);
1400
- state = NEW_MESSAGE();
1579
+ parser->state = NEW_MESSAGE();
1580
+ CALLBACK_NOTIFY(message_complete);
1401
1581
  break;
1402
1582
  }
1403
1583
 
1404
- nread = 0;
1584
+ parser->state = s_headers_done;
1405
1585
 
1406
- if (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT) {
1407
- parser->upgrade = 1;
1408
- }
1586
+ /* Set this here so that on_headers_complete() callbacks can see it */
1587
+ parser->upgrade =
1588
+ (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT);
1409
1589
 
1410
1590
  /* Here we call the headers_complete callback. This is somewhat
1411
1591
  * different than other callbacks because if the user returns 1, we
1412
1592
  * will interpret that as saying that this message has no body. This
1413
1593
  * is needed for the annoying case of recieving a response to a HEAD
1414
1594
  * request.
1595
+ *
1596
+ * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1597
+ * we have to simulate it by handling a change in errno below.
1415
1598
  */
1416
1599
  if (settings->on_headers_complete) {
1417
1600
  switch (settings->on_headers_complete(parser)) {
@@ -1423,39 +1606,54 @@ size_t http_parser_execute (http_parser *parser,
1423
1606
  break;
1424
1607
 
1425
1608
  default:
1426
- parser->state = state;
1609
+ SET_ERRNO(HPE_CB_headers_complete);
1427
1610
  return p - data; /* Error */
1428
1611
  }
1429
1612
  }
1430
1613
 
1614
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1615
+ return p - data;
1616
+ }
1617
+
1618
+ goto reexecute_byte;
1619
+ }
1620
+
1621
+ case s_headers_done:
1622
+ {
1623
+ STRICT_CHECK(ch != LF);
1624
+
1625
+ parser->nread = 0;
1626
+
1431
1627
  /* Exit, the rest of the connect is in a different protocol. */
1432
1628
  if (parser->upgrade) {
1433
- CALLBACK2(message_complete);
1434
- return (p - data);
1629
+ parser->state = NEW_MESSAGE();
1630
+ CALLBACK_NOTIFY(message_complete);
1631
+ return (p - data) + 1;
1435
1632
  }
1436
1633
 
1437
1634
  if (parser->flags & F_SKIPBODY) {
1438
- CALLBACK2(message_complete);
1439
- state = NEW_MESSAGE();
1635
+ parser->state = NEW_MESSAGE();
1636
+ CALLBACK_NOTIFY(message_complete);
1440
1637
  } else if (parser->flags & F_CHUNKED) {
1441
1638
  /* chunked encoding - ignore Content-Length header */
1442
- state = s_chunk_size_start;
1639
+ parser->state = s_chunk_size_start;
1443
1640
  } else {
1444
1641
  if (parser->content_length == 0) {
1445
1642
  /* Content-Length header given but zero: Content-Length: 0\r\n */
1446
- CALLBACK2(message_complete);
1447
- state = NEW_MESSAGE();
1448
- } else if (parser->content_length > 0) {
1643
+ parser->state = NEW_MESSAGE();
1644
+ CALLBACK_NOTIFY(message_complete);
1645
+ } else if (parser->content_length != ULLONG_MAX) {
1449
1646
  /* Content-Length header given and non-zero */
1450
- state = s_body_identity;
1647
+ parser->state = s_body_identity;
1451
1648
  } else {
1452
- if (parser->type == HTTP_REQUEST || http_should_keep_alive(parser)) {
1649
+ if (parser->type == HTTP_REQUEST ||
1650
+ !http_message_needs_eof(parser)) {
1453
1651
  /* Assume content-length 0 - read the next */
1454
- CALLBACK2(message_complete);
1455
- state = NEW_MESSAGE();
1652
+ parser->state = NEW_MESSAGE();
1653
+ CALLBACK_NOTIFY(message_complete);
1456
1654
  } else {
1457
1655
  /* Read body until EOF */
1458
- state = s_body_identity_eof;
1656
+ parser->state = s_body_identity_eof;
1459
1657
  }
1460
1658
  }
1461
1659
  }
@@ -1464,60 +1662,103 @@ size_t http_parser_execute (http_parser *parser,
1464
1662
  }
1465
1663
 
1466
1664
  case s_body_identity:
1467
- to_read = MIN(pe - p, (int64_t)parser->content_length);
1468
- if (to_read > 0) {
1469
- if (settings->on_body) settings->on_body(parser, p, to_read);
1470
- p += to_read - 1;
1471
- parser->content_length -= to_read;
1472
- if (parser->content_length == 0) {
1473
- CALLBACK2(message_complete);
1474
- state = NEW_MESSAGE();
1475
- }
1665
+ {
1666
+ uint64_t to_read = MIN(parser->content_length,
1667
+ (uint64_t) ((data + len) - p));
1668
+
1669
+ assert(parser->content_length != 0
1670
+ && parser->content_length != ULLONG_MAX);
1671
+
1672
+ /* The difference between advancing content_length and p is because
1673
+ * the latter will automaticaly advance on the next loop iteration.
1674
+ * Further, if content_length ends up at 0, we want to see the last
1675
+ * byte again for our message complete callback.
1676
+ */
1677
+ MARK(body);
1678
+ parser->content_length -= to_read;
1679
+ p += to_read - 1;
1680
+
1681
+ if (parser->content_length == 0) {
1682
+ parser->state = s_message_done;
1683
+
1684
+ /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1685
+ *
1686
+ * The alternative to doing this is to wait for the next byte to
1687
+ * trigger the data callback, just as in every other case. The
1688
+ * problem with this is that this makes it difficult for the test
1689
+ * harness to distinguish between complete-on-EOF and
1690
+ * complete-on-length. It's not clear that this distinction is
1691
+ * important for applications, but let's keep it for now.
1692
+ */
1693
+ CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1694
+ goto reexecute_byte;
1476
1695
  }
1696
+
1477
1697
  break;
1698
+ }
1478
1699
 
1479
1700
  /* read until EOF */
1480
1701
  case s_body_identity_eof:
1481
- to_read = pe - p;
1482
- if (to_read > 0) {
1483
- if (settings->on_body) settings->on_body(parser, p, to_read);
1484
- p += to_read - 1;
1485
- }
1702
+ MARK(body);
1703
+ p = data + len - 1;
1704
+
1705
+ break;
1706
+
1707
+ case s_message_done:
1708
+ parser->state = NEW_MESSAGE();
1709
+ CALLBACK_NOTIFY(message_complete);
1486
1710
  break;
1487
1711
 
1488
1712
  case s_chunk_size_start:
1489
1713
  {
1490
- assert(nread == 1);
1714
+ assert(parser->nread == 1);
1491
1715
  assert(parser->flags & F_CHUNKED);
1492
1716
 
1493
- c = unhex[(unsigned char)ch];
1494
- if (c == -1) goto error;
1495
- parser->content_length = c;
1496
- state = s_chunk_size;
1717
+ unhex_val = unhex[(unsigned char)ch];
1718
+ if (unhex_val == -1) {
1719
+ SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1720
+ goto error;
1721
+ }
1722
+
1723
+ parser->content_length = unhex_val;
1724
+ parser->state = s_chunk_size;
1497
1725
  break;
1498
1726
  }
1499
1727
 
1500
1728
  case s_chunk_size:
1501
1729
  {
1730
+ uint64_t t;
1731
+
1502
1732
  assert(parser->flags & F_CHUNKED);
1503
1733
 
1504
1734
  if (ch == CR) {
1505
- state = s_chunk_size_almost_done;
1735
+ parser->state = s_chunk_size_almost_done;
1506
1736
  break;
1507
1737
  }
1508
1738
 
1509
- c = unhex[(unsigned char)ch];
1739
+ unhex_val = unhex[(unsigned char)ch];
1510
1740
 
1511
- if (c == -1) {
1741
+ if (unhex_val == -1) {
1512
1742
  if (ch == ';' || ch == ' ') {
1513
- state = s_chunk_parameters;
1743
+ parser->state = s_chunk_parameters;
1514
1744
  break;
1515
1745
  }
1746
+
1747
+ SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1748
+ goto error;
1749
+ }
1750
+
1751
+ t = parser->content_length;
1752
+ t *= 16;
1753
+ t += unhex_val;
1754
+
1755
+ /* Overflow? */
1756
+ if (t < parser->content_length || t == ULLONG_MAX) {
1757
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1516
1758
  goto error;
1517
1759
  }
1518
1760
 
1519
- parser->content_length *= 16;
1520
- parser->content_length += c;
1761
+ parser->content_length = t;
1521
1762
  break;
1522
1763
  }
1523
1764
 
@@ -1526,7 +1767,7 @@ size_t http_parser_execute (http_parser *parser,
1526
1767
  assert(parser->flags & F_CHUNKED);
1527
1768
  /* just ignore this shit. TODO check for overflow */
1528
1769
  if (ch == CR) {
1529
- state = s_chunk_size_almost_done;
1770
+ parser->state = s_chunk_size_almost_done;
1530
1771
  break;
1531
1772
  }
1532
1773
  break;
@@ -1537,74 +1778,117 @@ size_t http_parser_execute (http_parser *parser,
1537
1778
  assert(parser->flags & F_CHUNKED);
1538
1779
  STRICT_CHECK(ch != LF);
1539
1780
 
1540
- nread = 0;
1781
+ parser->nread = 0;
1541
1782
 
1542
1783
  if (parser->content_length == 0) {
1543
1784
  parser->flags |= F_TRAILING;
1544
- state = s_header_field_start;
1785
+ parser->state = s_header_field_start;
1545
1786
  } else {
1546
- state = s_chunk_data;
1787
+ parser->state = s_chunk_data;
1547
1788
  }
1548
1789
  break;
1549
1790
  }
1550
1791
 
1551
1792
  case s_chunk_data:
1552
1793
  {
1553
- assert(parser->flags & F_CHUNKED);
1794
+ uint64_t to_read = MIN(parser->content_length,
1795
+ (uint64_t) ((data + len) - p));
1554
1796
 
1555
- to_read = MIN(pe - p, (int64_t)(parser->content_length));
1797
+ assert(parser->flags & F_CHUNKED);
1798
+ assert(parser->content_length != 0
1799
+ && parser->content_length != ULLONG_MAX);
1556
1800
 
1557
- if (to_read > 0) {
1558
- if (settings->on_body) settings->on_body(parser, p, to_read);
1559
- p += to_read - 1;
1560
- }
1801
+ /* See the explanation in s_body_identity for why the content
1802
+ * length and data pointers are managed this way.
1803
+ */
1804
+ MARK(body);
1805
+ parser->content_length -= to_read;
1806
+ p += to_read - 1;
1561
1807
 
1562
- if (to_read == parser->content_length) {
1563
- state = s_chunk_data_almost_done;
1808
+ if (parser->content_length == 0) {
1809
+ parser->state = s_chunk_data_almost_done;
1564
1810
  }
1565
1811
 
1566
- parser->content_length -= to_read;
1567
1812
  break;
1568
1813
  }
1569
1814
 
1570
1815
  case s_chunk_data_almost_done:
1571
1816
  assert(parser->flags & F_CHUNKED);
1817
+ assert(parser->content_length == 0);
1572
1818
  STRICT_CHECK(ch != CR);
1573
- state = s_chunk_data_done;
1819
+ parser->state = s_chunk_data_done;
1820
+ CALLBACK_DATA(body);
1574
1821
  break;
1575
1822
 
1576
1823
  case s_chunk_data_done:
1577
1824
  assert(parser->flags & F_CHUNKED);
1578
1825
  STRICT_CHECK(ch != LF);
1579
- state = s_chunk_size_start;
1826
+ parser->nread = 0;
1827
+ parser->state = s_chunk_size_start;
1580
1828
  break;
1581
1829
 
1582
1830
  default:
1583
1831
  assert(0 && "unhandled state");
1832
+ SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
1584
1833
  goto error;
1585
1834
  }
1586
1835
  }
1587
1836
 
1588
- CALLBACK_NOCLEAR(header_field);
1589
- CALLBACK_NOCLEAR(header_value);
1590
- CALLBACK_NOCLEAR(fragment);
1591
- CALLBACK_NOCLEAR(query_string);
1592
- CALLBACK_NOCLEAR(path);
1593
- CALLBACK_NOCLEAR(url);
1837
+ /* Run callbacks for any marks that we have leftover after we ran our of
1838
+ * bytes. There should be at most one of these set, so it's OK to invoke
1839
+ * them in series (unset marks will not result in callbacks).
1840
+ *
1841
+ * We use the NOADVANCE() variety of callbacks here because 'p' has already
1842
+ * overflowed 'data' and this allows us to correct for the off-by-one that
1843
+ * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
1844
+ * value that's in-bounds).
1845
+ */
1594
1846
 
1595
- parser->state = state;
1596
- parser->header_state = header_state;
1597
- parser->index = index;
1598
- parser->nread = nread;
1847
+ assert(((header_field_mark ? 1 : 0) +
1848
+ (header_value_mark ? 1 : 0) +
1849
+ (url_mark ? 1 : 0) +
1850
+ (body_mark ? 1 : 0)) <= 1);
1851
+
1852
+ CALLBACK_DATA_NOADVANCE(header_field);
1853
+ CALLBACK_DATA_NOADVANCE(header_value);
1854
+ CALLBACK_DATA_NOADVANCE(url);
1855
+ CALLBACK_DATA_NOADVANCE(body);
1599
1856
 
1600
1857
  return len;
1601
1858
 
1602
1859
  error:
1603
- parser->state = s_dead;
1860
+ if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
1861
+ SET_ERRNO(HPE_UNKNOWN);
1862
+ }
1863
+
1604
1864
  return (p - data);
1605
1865
  }
1606
1866
 
1607
1867
 
1868
+ /* Does the parser need to see an EOF to find the end of the message? */
1869
+ int
1870
+ http_message_needs_eof (http_parser *parser)
1871
+ {
1872
+ if (parser->type == HTTP_REQUEST) {
1873
+ return 0;
1874
+ }
1875
+
1876
+ /* See RFC 2616 section 4.4 */
1877
+ if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
1878
+ parser->status_code == 204 || /* No Content */
1879
+ parser->status_code == 304 || /* Not Modified */
1880
+ parser->flags & F_SKIPBODY) { /* response to a HEAD request */
1881
+ return 0;
1882
+ }
1883
+
1884
+ if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
1885
+ return 0;
1886
+ }
1887
+
1888
+ return 1;
1889
+ }
1890
+
1891
+
1608
1892
  int
1609
1893
  http_should_keep_alive (http_parser *parser)
1610
1894
  {
@@ -1612,17 +1896,15 @@ http_should_keep_alive (http_parser *parser)
1612
1896
  /* HTTP/1.1 */
1613
1897
  if (parser->flags & F_CONNECTION_CLOSE) {
1614
1898
  return 0;
1615
- } else {
1616
- return 1;
1617
1899
  }
1618
1900
  } else {
1619
1901
  /* HTTP/1.0 or earlier */
1620
- if (parser->flags & F_CONNECTION_KEEP_ALIVE) {
1621
- return 1;
1622
- } else {
1902
+ if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
1623
1903
  return 0;
1624
1904
  }
1625
1905
  }
1906
+
1907
+ return !http_message_needs_eof(parser);
1626
1908
  }
1627
1909
 
1628
1910
 
@@ -1635,10 +1917,142 @@ const char * http_method_str (enum http_method m)
1635
1917
  void
1636
1918
  http_parser_init (http_parser *parser, enum http_parser_type t)
1637
1919
  {
1920
+ void *data = parser->data; /* preserve application data */
1921
+ memset(parser, 0, sizeof(*parser));
1922
+ parser->data = data;
1638
1923
  parser->type = t;
1639
1924
  parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
1640
- parser->nread = 0;
1641
- parser->upgrade = 0;
1642
- parser->flags = 0;
1643
- parser->method = 0;
1925
+ parser->http_errno = HPE_OK;
1926
+ }
1927
+
1928
+ const char *
1929
+ http_errno_name(enum http_errno err) {
1930
+ assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1931
+ return http_strerror_tab[err].name;
1932
+ }
1933
+
1934
+ const char *
1935
+ http_errno_description(enum http_errno err) {
1936
+ assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1937
+ return http_strerror_tab[err].description;
1938
+ }
1939
+
1940
+ int
1941
+ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
1942
+ struct http_parser_url *u)
1943
+ {
1944
+ enum state s;
1945
+ const char *p;
1946
+ enum http_parser_url_fields uf, old_uf;
1947
+
1948
+ u->port = u->field_set = 0;
1949
+ s = is_connect ? s_req_host_start : s_req_spaces_before_url;
1950
+ uf = old_uf = UF_MAX;
1951
+
1952
+ for (p = buf; p < buf + buflen; p++) {
1953
+ s = parse_url_char(s, *p);
1954
+
1955
+ /* Figure out the next field that we're operating on */
1956
+ switch (s) {
1957
+ case s_dead:
1958
+ return 1;
1959
+
1960
+ /* Skip delimeters */
1961
+ case s_req_schema_slash:
1962
+ case s_req_schema_slash_slash:
1963
+ case s_req_host_start:
1964
+ case s_req_host_v6_start:
1965
+ case s_req_host_v6_end:
1966
+ case s_req_port_start:
1967
+ case s_req_query_string_start:
1968
+ case s_req_fragment_start:
1969
+ continue;
1970
+
1971
+ case s_req_schema:
1972
+ uf = UF_SCHEMA;
1973
+ break;
1974
+
1975
+ case s_req_host:
1976
+ case s_req_host_v6:
1977
+ uf = UF_HOST;
1978
+ break;
1979
+
1980
+ case s_req_port:
1981
+ uf = UF_PORT;
1982
+ break;
1983
+
1984
+ case s_req_path:
1985
+ uf = UF_PATH;
1986
+ break;
1987
+
1988
+ case s_req_query_string:
1989
+ uf = UF_QUERY;
1990
+ break;
1991
+
1992
+ case s_req_fragment:
1993
+ uf = UF_FRAGMENT;
1994
+ break;
1995
+
1996
+ default:
1997
+ assert(!"Unexpected state");
1998
+ return 1;
1999
+ }
2000
+
2001
+ /* Nothing's changed; soldier on */
2002
+ if (uf == old_uf) {
2003
+ u->field_data[uf].len++;
2004
+ continue;
2005
+ }
2006
+
2007
+ u->field_data[uf].off = p - buf;
2008
+ u->field_data[uf].len = 1;
2009
+
2010
+ u->field_set |= (1 << uf);
2011
+ old_uf = uf;
2012
+ }
2013
+
2014
+ /* CONNECT requests can only contain "hostname:port" */
2015
+ if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2016
+ return 1;
2017
+ }
2018
+
2019
+ /* Make sure we don't end somewhere unexpected */
2020
+ switch (s) {
2021
+ case s_req_host_v6_start:
2022
+ case s_req_host_v6:
2023
+ case s_req_host_v6_end:
2024
+ case s_req_host:
2025
+ case s_req_port_start:
2026
+ return 1;
2027
+ default:
2028
+ break;
2029
+ }
2030
+
2031
+ if (u->field_set & (1 << UF_PORT)) {
2032
+ /* Don't bother with endp; we've already validated the string */
2033
+ unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
2034
+
2035
+ /* Ports have a max value of 2^16 */
2036
+ if (v > 0xffff) {
2037
+ return 1;
2038
+ }
2039
+
2040
+ u->port = (uint16_t) v;
2041
+ }
2042
+
2043
+ return 0;
2044
+ }
2045
+
2046
+ void
2047
+ http_parser_pause(http_parser *parser, int paused) {
2048
+ /* Users should only be pausing/unpausing a parser that is not in an error
2049
+ * state. In non-debug builds, there's not much that we can do about this
2050
+ * other than ignore it.
2051
+ */
2052
+ if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2053
+ HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2054
+ SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2055
+ } else {
2056
+ assert(0 && "Attempting to pause parser in error state");
2057
+ }
1644
2058
  }