http_parser.rb 0.5.3 → 0.6.0.beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. data/.gitmodules +3 -3
  2. data/Gemfile +1 -1
  3. data/Gemfile.lock +9 -2
  4. data/README.md +50 -45
  5. data/bench/standalone.rb +23 -0
  6. data/bench/thin.rb +1 -0
  7. data/ext/ruby_http_parser/org/ruby_http_parser/RubyHttpParser.java +66 -58
  8. data/ext/ruby_http_parser/ruby_http_parser.c +10 -41
  9. data/ext/ruby_http_parser/vendor/http-parser-java/AUTHORS +32 -0
  10. data/ext/ruby_http_parser/vendor/http-parser-java/LICENSE-MIT +5 -1
  11. data/ext/ruby_http_parser/vendor/http-parser-java/README.md +133 -1
  12. data/ext/ruby_http_parser/vendor/http-parser-java/TODO +6 -0
  13. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.c +1029 -615
  14. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.gyp +79 -0
  15. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.h +177 -43
  16. data/ext/ruby_http_parser/vendor/http-parser-java/src/Http-parser.java.iml +22 -0
  17. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/FieldData.java +41 -0
  18. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPHeadersCompleteCallback.java +13 -0
  19. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPMethod.java +4 -1
  20. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPParserUrl.java +76 -0
  21. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/ParserSettings.java +2 -2
  22. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/Util.java +6 -6
  23. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPHeadersCompleteCallback.java +12 -0
  24. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPParser.java +715 -637
  25. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/ParserSettings.java +1 -1
  26. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Message.java +71 -21
  27. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/ParseUrl.java +51 -0
  28. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Requests.java +1 -1
  29. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Responses.java +1 -0
  30. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Test.java +2 -1
  31. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestHeaderOverflowError.java +1 -0
  32. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestLoaderNG.java +6 -17
  33. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestNoOverflowLongBody.java +1 -0
  34. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/UnitTest.java +1 -0
  35. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Upgrade.java +1 -0
  36. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Url.java +127 -0
  37. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Util.java +80 -9
  38. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/WrongContentLength.java +2 -1
  39. data/ext/ruby_http_parser/vendor/http-parser-java/test.c +1141 -210
  40. data/ext/ruby_http_parser/vendor/http-parser-java/tests.dumped +230 -71
  41. data/ext/ruby_http_parser/vendor/http-parser/AUTHORS +32 -0
  42. data/ext/ruby_http_parser/vendor/http-parser/LICENSE-MIT +5 -1
  43. data/ext/ruby_http_parser/vendor/http-parser/README.md +9 -2
  44. data/ext/ruby_http_parser/vendor/http-parser/http_parser.c +1029 -615
  45. data/ext/ruby_http_parser/vendor/http-parser/http_parser.gyp +79 -0
  46. data/ext/ruby_http_parser/vendor/http-parser/http_parser.h +145 -16
  47. data/ext/ruby_http_parser/vendor/http-parser/test.c +1065 -141
  48. data/http_parser.rb.gemspec +3 -1
  49. data/spec/parser_spec.rb +41 -17
  50. data/spec/support/requests.json +236 -24
  51. data/spec/support/responses.json +182 -36
  52. data/tasks/compile.rake +2 -2
  53. data/tasks/fixtures.rake +7 -1
  54. metadata +57 -19
  55. data/ext/ruby_http_parser/vendor/http-parser-java/compile +0 -1
  56. data/ext/ruby_http_parser/vendor/http-parser-java/test_permutations +0 -1
  57. data/ext/ruby_http_parser/vendor/http-parser-java/test_unit +0 -1
  58. data/ext/ruby_http_parser/vendor/http-parser-java/test_utf8 +0 -1
@@ -17,9 +17,6 @@ typedef struct ParserWrapper {
17
17
  ryah_http_parser parser;
18
18
 
19
19
  VALUE request_url;
20
- VALUE request_path;
21
- VALUE query_string;
22
- VALUE fragment;
23
20
 
24
21
  VALUE headers;
25
22
 
@@ -49,9 +46,6 @@ void ParserWrapper_init(ParserWrapper *wrapper) {
49
46
  wrapper->parser.http_minor = 0;
50
47
 
51
48
  wrapper->request_url = Qnil;
52
- wrapper->request_path = Qnil;
53
- wrapper->query_string = Qnil;
54
- wrapper->fragment = Qnil;
55
49
 
56
50
  wrapper->upgrade_data = Qnil;
57
51
 
@@ -66,9 +60,6 @@ void ParserWrapper_mark(void *data) {
66
60
  if(data) {
67
61
  ParserWrapper *wrapper = (ParserWrapper *) data;
68
62
  rb_gc_mark_maybe(wrapper->request_url);
69
- rb_gc_mark_maybe(wrapper->request_path);
70
- rb_gc_mark_maybe(wrapper->query_string);
71
- rb_gc_mark_maybe(wrapper->fragment);
72
63
  rb_gc_mark_maybe(wrapper->upgrade_data);
73
64
  rb_gc_mark_maybe(wrapper->headers);
74
65
  rb_gc_mark_maybe(wrapper->on_message_begin);
@@ -111,9 +102,6 @@ int on_message_begin(ryah_http_parser *parser) {
111
102
  GET_WRAPPER(wrapper, parser);
112
103
 
113
104
  wrapper->request_url = rb_str_new2("");
114
- wrapper->request_path = rb_str_new2("");
115
- wrapper->query_string = rb_str_new2("");
116
- wrapper->fragment = rb_str_new2("");
117
105
  wrapper->headers = rb_hash_new();
118
106
  wrapper->upgrade_data = rb_str_new2("");
119
107
 
@@ -139,24 +127,6 @@ int on_url(ryah_http_parser *parser, const char *at, size_t length) {
139
127
  return 0;
140
128
  }
141
129
 
142
- int on_path(ryah_http_parser *parser, const char *at, size_t length) {
143
- GET_WRAPPER(wrapper, parser);
144
- rb_str_cat(wrapper->request_path, at, length);
145
- return 0;
146
- }
147
-
148
- int on_query_string(ryah_http_parser *parser, const char *at, size_t length) {
149
- GET_WRAPPER(wrapper, parser);
150
- rb_str_cat(wrapper->query_string, at, length);
151
- return 0;
152
- }
153
-
154
- int on_fragment(ryah_http_parser *parser, const char *at, size_t length) {
155
- GET_WRAPPER(wrapper, parser);
156
- rb_str_cat(wrapper->fragment, at, length);
157
- return 0;
158
- }
159
-
160
130
  int on_header_field(ryah_http_parser *parser, const char *at, size_t length) {
161
131
  GET_WRAPPER(wrapper, parser);
162
132
 
@@ -278,10 +248,7 @@ int on_message_complete(ryah_http_parser *parser) {
278
248
 
279
249
  static ryah_http_parser_settings settings = {
280
250
  .on_message_begin = on_message_begin,
281
- .on_path = on_path,
282
- .on_query_string = on_query_string,
283
251
  .on_url = on_url,
284
- .on_fragment = on_fragment,
285
252
  .on_header_field = on_header_field,
286
253
  .on_header_value = on_header_value,
287
254
  .on_headers_complete = on_headers_complete,
@@ -318,6 +285,10 @@ VALUE ResponseParser_alloc(VALUE klass) {
318
285
  return Parser_alloc_by_type(klass, HTTP_RESPONSE);
319
286
  }
320
287
 
288
+ VALUE Parser_strict_p(VALUE klass) {
289
+ return HTTP_PARSER_STRICT == 1 ? Qtrue : Qfalse;
290
+ }
291
+
321
292
  VALUE Parser_initialize(int argc, VALUE *argv, VALUE self) {
322
293
  ParserWrapper *wrapper = NULL;
323
294
  DATA_GET(self, ParserWrapper, wrapper);
@@ -349,11 +320,14 @@ VALUE Parser_execute(VALUE self, VALUE data) {
349
320
  size_t nparsed = ryah_http_parser_execute(&wrapper->parser, &settings, ptr, len);
350
321
 
351
322
  if (wrapper->parser.upgrade) {
352
- rb_str_cat(wrapper->upgrade_data, ptr + nparsed + 1, len - nparsed - 1);
323
+ if (RTEST(wrapper->stopped))
324
+ nparsed += 1;
325
+
326
+ rb_str_cat(wrapper->upgrade_data, ptr + nparsed, len - nparsed);
353
327
 
354
328
  } else if (nparsed != (size_t)len) {
355
329
  if (!RTEST(wrapper->stopped) && !RTEST(wrapper->completed))
356
- rb_raise(eParserError, "Could not parse data entirely");
330
+ rb_raise(eParserError, "Could not parse data entirely (%zu != %zu)", nparsed, len);
357
331
  else
358
332
  nparsed += 1; // error states fail on the current character
359
333
  }
@@ -465,9 +439,6 @@ VALUE Parser_status_code(VALUE self) {
465
439
  }
466
440
 
467
441
  DEFINE_GETTER(request_url);
468
- DEFINE_GETTER(request_path);
469
- DEFINE_GETTER(query_string);
470
- DEFINE_GETTER(fragment);
471
442
  DEFINE_GETTER(headers);
472
443
  DEFINE_GETTER(upgrade_data);
473
444
  DEFINE_GETTER(header_value_type);
@@ -515,6 +486,7 @@ void Init_ruby_http_parser() {
515
486
  rb_define_alloc_func(cRequestParser, RequestParser_alloc);
516
487
  rb_define_alloc_func(cResponseParser, ResponseParser_alloc);
517
488
 
489
+ rb_define_singleton_method(cParser, "strict?", Parser_strict_p, 0);
518
490
  rb_define_method(cParser, "initialize", Parser_initialize, -1);
519
491
 
520
492
  rb_define_method(cParser, "on_message_begin=", Parser_set_on_message_begin, 1);
@@ -534,9 +506,6 @@ void Init_ruby_http_parser() {
534
506
  rb_define_method(cParser, "status_code", Parser_status_code, 0);
535
507
 
536
508
  rb_define_method(cParser, "request_url", Parser_request_url, 0);
537
- rb_define_method(cParser, "request_path", Parser_request_path, 0);
538
- rb_define_method(cParser, "query_string", Parser_query_string, 0);
539
- rb_define_method(cParser, "fragment", Parser_fragment, 0);
540
509
  rb_define_method(cParser, "headers", Parser_headers, 0);
541
510
  rb_define_method(cParser, "upgrade_data", Parser_upgrade_data, 0);
542
511
  rb_define_method(cParser, "header_value_type", Parser_header_value_type, 0);
@@ -0,0 +1,32 @@
1
+ # Authors ordered by first contribution.
2
+ Ryan Dahl <ry@tinyclouds.org>
3
+ Jeremy Hinegardner <jeremy@hinegardner.org>
4
+ Sergey Shepelev <temotor@gmail.com>
5
+ Joe Damato <ice799@gmail.com>
6
+ tomika <tomika_nospam@freemail.hu>
7
+ Phoenix Sol <phoenix@burninglabs.com>
8
+ Cliff Frey <cliff@meraki.com>
9
+ Ewen Cheslack-Postava <ewencp@cs.stanford.edu>
10
+ Santiago Gala <sgala@apache.org>
11
+ Tim Becker <tim.becker@syngenio.de>
12
+ Jeff Terrace <jterrace@gmail.com>
13
+ Ben Noordhuis <info@bnoordhuis.nl>
14
+ Nathan Rajlich <nathan@tootallnate.net>
15
+ Mark Nottingham <mnot@mnot.net>
16
+ Aman Gupta <aman@tmm1.net>
17
+ Tim Becker <tim.becker@kuriositaet.de>
18
+ Sean Cunningham <sean.cunningham@mandiant.com>
19
+ Peter Griess <pg@std.in>
20
+ Salman Haq <salman.haq@asti-usa.com>
21
+ Cliff Frey <clifffrey@gmail.com>
22
+ Jon Kolb <jon@b0g.us>
23
+ Fouad Mardini <f.mardini@gmail.com>
24
+ Paul Querna <pquerna@apache.org>
25
+ Felix Geisendörfer <felix@debuggable.com>
26
+ koichik <koichik@improvement.jp>
27
+ Andre Caron <andre.l.caron@gmail.com>
28
+ Ivo Raisr <ivosh@ivosh.net>
29
+ James McLaughlin <jamie@lacewing-project.org>
30
+ David Gwynne <loki@animata.net>
31
+ LE ROUX Thomas <thomas@procheo.fr>
32
+ Randy Rizun <rrizun@ortivawireless.com>
@@ -23,7 +23,11 @@ IN THE SOFTWARE.
23
23
  This code mainly based on code with the following license:
24
24
 
25
25
 
26
- Copyright Joyent, Inc. and other Node contributors. All rights reserved.
26
+ http_parser.c is based on src/http/ngx_http_parse.c from NGINX copyright
27
+ Igor Sysoev.
28
+
29
+ Additional changes are licensed under the same terms as NGINX and
30
+ copyright Joyent, Inc. and other Node contributors. All rights reserved.
27
31
 
28
32
  Permission is hereby granted, free of charge, to any person obtaining a copy
29
33
  of this software and associated documentation files (the "Software"), to
@@ -24,7 +24,7 @@ The parser extracts the following information from HTTP messages:
24
24
  * Response status code
25
25
  * Transfer-Encoding
26
26
  * HTTP version
27
- * Request path, query string, fragment
27
+ * Request URL
28
28
  * Message body
29
29
 
30
30
  Building
@@ -49,3 +49,135 @@ Usage
49
49
  help or have suggestions, feel free to contact me at
50
50
  (tim.becker@kuriositaet.de).
51
51
 
52
+
53
+ One `http_parser` object is used per TCP connection. Initialize the struct
54
+ using `http_parser_init()` and set the callbacks. That might look something
55
+ like this for a request parser:
56
+
57
+ http_parser_settings settings;
58
+ settings.on_path = my_path_callback;
59
+ settings.on_header_field = my_header_field_callback;
60
+ /* ... */
61
+
62
+ http_parser *parser = malloc(sizeof(http_parser));
63
+ http_parser_init(parser, HTTP_REQUEST);
64
+ parser->data = my_socket;
65
+
66
+ When data is received on the socket execute the parser and check for errors.
67
+
68
+ size_t len = 80*1024, nparsed;
69
+ char buf[len];
70
+ ssize_t recved;
71
+
72
+ recved = recv(fd, buf, len, 0);
73
+
74
+ if (recved < 0) {
75
+ /* Handle error. */
76
+ }
77
+
78
+ /* Start up / continue the parser.
79
+ * Note we pass recved==0 to signal that EOF has been recieved.
80
+ */
81
+ nparsed = http_parser_execute(parser, &settings, buf, recved);
82
+
83
+ if (parser->upgrade) {
84
+ /* handle new protocol */
85
+ } else if (nparsed != recved) {
86
+ /* Handle error. Usually just close the connection. */
87
+ }
88
+
89
+ HTTP needs to know where the end of the stream is. For example, sometimes
90
+ servers send responses without Content-Length and expect the client to
91
+ consume input (for the body) until EOF. To tell http_parser about EOF, give
92
+ `0` as the forth parameter to `http_parser_execute()`. Callbacks and errors
93
+ can still be encountered during an EOF, so one must still be prepared
94
+ to receive them.
95
+
96
+ Scalar valued message information such as `status_code`, `method`, and the
97
+ HTTP version are stored in the parser structure. This data is only
98
+ temporally stored in `http_parser` and gets reset on each new message. If
99
+ this information is needed later, copy it out of the structure during the
100
+ `headers_complete` callback.
101
+
102
+ The parser decodes the transfer-encoding for both requests and responses
103
+ transparently. That is, a chunked encoding is decoded before being sent to
104
+ the on_body callback.
105
+
106
+
107
+ The Special Problem of Upgrade
108
+ ------------------------------
109
+
110
+ HTTP supports upgrading the connection to a different protocol. An
111
+ increasingly common example of this is the Web Socket protocol which sends
112
+ a request like
113
+
114
+ GET /demo HTTP/1.1
115
+ Upgrade: WebSocket
116
+ Connection: Upgrade
117
+ Host: example.com
118
+ Origin: http://example.com
119
+ WebSocket-Protocol: sample
120
+
121
+ followed by non-HTTP data.
122
+
123
+ (See http://tools.ietf.org/html/draft-hixie-thewebsocketprotocol-75 for more
124
+ information the Web Socket protocol.)
125
+
126
+ To support this, the parser will treat this as a normal HTTP message without a
127
+ body. Issuing both on_headers_complete and on_message_complete callbacks. However
128
+ http_parser_execute() will stop parsing at the end of the headers and return.
129
+
130
+ The user is expected to check if `parser->upgrade` has been set to 1 after
131
+ `http_parser_execute()` returns. Non-HTTP data begins at the buffer supplied
132
+ offset by the return value of `http_parser_execute()`.
133
+
134
+
135
+ Callbacks
136
+ ---------
137
+
138
+ During the `http_parser_execute()` call, the callbacks set in
139
+ `http_parser_settings` will be executed. The parser maintains state and
140
+ never looks behind, so buffering the data is not necessary. If you need to
141
+ save certain data for later usage, you can do that from the callbacks.
142
+
143
+ There are two types of callbacks:
144
+
145
+ * notification `typedef int (*http_cb) (http_parser*);`
146
+ Callbacks: on_message_begin, on_headers_complete, on_message_complete.
147
+ * data `typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);`
148
+ Callbacks: (requests only) on_uri,
149
+ (common) on_header_field, on_header_value, on_body;
150
+
151
+ Callbacks must return 0 on success. Returning a non-zero value indicates
152
+ error to the parser, making it exit immediately.
153
+
154
+ In case you parse HTTP message in chunks (i.e. `read()` request line
155
+ from socket, parse, read half headers, parse, etc) your data callbacks
156
+ may be called more than once. Http-parser guarantees that data pointer is only
157
+ valid for the lifetime of callback. You can also `read()` into a heap allocated
158
+ buffer to avoid copying memory around if this fits your application.
159
+
160
+ Reading headers may be a tricky task if you read/parse headers partially.
161
+ Basically, you need to remember whether last header callback was field or value
162
+ and apply following logic:
163
+
164
+ (on_header_field and on_header_value shortened to on_h_*)
165
+ ------------------------ ------------ --------------------------------------------
166
+ | State (prev. callback) | Callback | Description/action |
167
+ ------------------------ ------------ --------------------------------------------
168
+ | nothing (first call) | on_h_field | Allocate new buffer and copy callback data |
169
+ | | | into it |
170
+ ------------------------ ------------ --------------------------------------------
171
+ | value | on_h_field | New header started. |
172
+ | | | Copy current name,value buffers to headers |
173
+ | | | list and allocate new buffer for new name |
174
+ ------------------------ ------------ --------------------------------------------
175
+ | field | on_h_field | Previous name continues. Reallocate name |
176
+ | | | buffer and append callback data to it |
177
+ ------------------------ ------------ --------------------------------------------
178
+ | field | on_h_value | Value for current header started. Allocate |
179
+ | | | new buffer and copy callback data to it |
180
+ ------------------------ ------------ --------------------------------------------
181
+ | value | on_h_value | Value continues. Reallocate value buffer |
182
+ | | | and append callback data to it |
183
+ ------------------------ ------------ --------------------------------------------
@@ -1,4 +1,10 @@
1
+ decide how to handle errs per default:
2
+ - ry: "set state to dead", return `read`
3
+ - current: call on_error w/ details, if no on_error handler set,
4
+ throw Exception, else call on_error and behave like orig...
5
+
1
6
  some tests from test.c left to port
7
+ (scan ...)
2
8
  documentation
3
9
 
4
10
  hi level callback interface
@@ -1,4 +1,7 @@
1
- /* Copyright Joyent, Inc. and other Node contributors. All rights reserved.
1
+ /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
2
+ *
3
+ * Additional changes are licensed under the same terms as NGINX and
4
+ * copyright Joyent, Inc. and other Node contributors. All rights reserved.
2
5
  *
3
6
  * Permission is hereby granted, free of charge, to any person obtaining a copy
4
7
  * of this software and associated documentation files (the "Software"), to
@@ -18,48 +21,94 @@
18
21
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19
22
  * IN THE SOFTWARE.
20
23
  */
21
- #include <http_parser.h>
24
+ #include "http_parser.h"
22
25
  #include <assert.h>
23
26
  #include <stddef.h>
27
+ #include <ctype.h>
28
+ #include <stdlib.h>
29
+ #include <string.h>
30
+ #include <limits.h>
24
31
 
32
+ #ifndef ULLONG_MAX
33
+ # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
34
+ #endif
25
35
 
26
36
  #ifndef MIN
27
37
  # define MIN(a,b) ((a) < (b) ? (a) : (b))
28
38
  #endif
29
39
 
30
40
 
31
- #define CALLBACK2(FOR) \
41
+ #if HTTP_PARSER_DEBUG
42
+ #define SET_ERRNO(e) \
32
43
  do { \
33
- if (settings->on_##FOR) { \
34
- if (0 != settings->on_##FOR(parser)) return (p - data); \
35
- } \
44
+ parser->http_errno = (e); \
45
+ parser->error_lineno = __LINE__; \
36
46
  } while (0)
47
+ #else
48
+ #define SET_ERRNO(e) \
49
+ do { \
50
+ parser->http_errno = (e); \
51
+ } while(0)
52
+ #endif
37
53
 
38
54
 
39
- #define MARK(FOR) \
55
+ /* Run the notify callback FOR, returning ER if it fails */
56
+ #define CALLBACK_NOTIFY_(FOR, ER) \
40
57
  do { \
41
- FOR##_mark = p; \
58
+ assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
59
+ \
60
+ if (settings->on_##FOR) { \
61
+ if (0 != settings->on_##FOR(parser)) { \
62
+ SET_ERRNO(HPE_CB_##FOR); \
63
+ } \
64
+ \
65
+ /* We either errored above or got paused; get out */ \
66
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
67
+ return (ER); \
68
+ } \
69
+ } \
42
70
  } while (0)
43
71
 
44
- #define CALLBACK_NOCLEAR(FOR) \
72
+ /* Run the notify callback FOR and consume the current byte */
73
+ #define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1)
74
+
75
+ /* Run the notify callback FOR and don't consume the current byte */
76
+ #define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data)
77
+
78
+ /* Run data callback FOR with LEN bytes, returning ER if it fails */
79
+ #define CALLBACK_DATA_(FOR, LEN, ER) \
45
80
  do { \
81
+ assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
82
+ \
46
83
  if (FOR##_mark) { \
47
84
  if (settings->on_##FOR) { \
48
- if (0 != settings->on_##FOR(parser, \
49
- FOR##_mark, \
50
- p - FOR##_mark)) \
51
- { \
52
- return (p - data); \
85
+ if (0 != settings->on_##FOR(parser, FOR##_mark, (LEN))) { \
86
+ SET_ERRNO(HPE_CB_##FOR); \
87
+ } \
88
+ \
89
+ /* We either errored above or got paused; get out */ \
90
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
91
+ return (ER); \
53
92
  } \
54
93
  } \
94
+ FOR##_mark = NULL; \
55
95
  } \
56
96
  } while (0)
97
+
98
+ /* Run the data callback FOR and consume the current byte */
99
+ #define CALLBACK_DATA(FOR) \
100
+ CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
57
101
 
102
+ /* Run the data callback FOR and don't consume the current byte */
103
+ #define CALLBACK_DATA_NOADVANCE(FOR) \
104
+ CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
58
105
 
59
- #define CALLBACK(FOR) \
106
+ /* Set the mark FOR; non-destructive if mark is already set */
107
+ #define MARK(FOR) \
60
108
  do { \
61
- CALLBACK_NOCLEAR(FOR); \
62
- FOR##_mark = NULL; \
109
+ if (!FOR##_mark) { \
110
+ FOR##_mark = p; \
111
+ } \
63
112
  } while (0)
64
113
 
65
114
 
@@ -97,6 +146,8 @@ static const char *method_strings[] =
97
146
  , "NOTIFY"
98
147
  , "SUBSCRIBE"
99
148
  , "UNSUBSCRIBE"
149
+ , "PATCH"
150
+ , "PURGE"
100
151
  };
101
152
 
102
153
 
@@ -117,9 +168,9 @@ static const char tokens[256] = {
117
168
  /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
118
169
  0, 0, 0, 0, 0, 0, 0, 0,
119
170
  /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
120
- ' ', '!', '"', '#', '$', '%', '&', '\'',
171
+ 0, '!', 0, '#', '$', '%', '&', '\'',
121
172
  /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
122
- 0, 0, '*', '+', 0, '-', '.', '/',
173
+ 0, 0, '*', '+', 0, '-', '.', 0,
123
174
  /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
124
175
  '0', '1', '2', '3', '4', '5', '6', '7',
125
176
  /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
@@ -139,7 +190,7 @@ static const char tokens[256] = {
139
190
  /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
140
191
  'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
141
192
  /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
142
- 'x', 'y', 'z', 0, '|', '}', '~', 0 };
193
+ 'x', 'y', 'z', 0, '|', 0, '~', 0 };
143
194
 
144
195
 
145
196
  static const int8_t unhex[256] =
@@ -186,28 +237,7 @@ static const uint8_t normal_url_char[256] = {
186
237
  /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
187
238
  1, 1, 1, 1, 1, 1, 1, 1,
188
239
  /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
189
- 1, 1, 1, 1, 1, 1, 1, 0,
190
-
191
- /* Remainder of non-ASCII range are accepted as-is to support implicitly UTF-8
192
- encoded paths. This is out of spec, but clients generate this and most other
193
- HTTP servers support it. We should, too. */
194
-
195
- 1, 1, 1, 1, 1, 1, 1, 1,
196
- 1, 1, 1, 1, 1, 1, 1, 1,
197
- 1, 1, 1, 1, 1, 1, 1, 1,
198
- 1, 1, 1, 1, 1, 1, 1, 1,
199
- 1, 1, 1, 1, 1, 1, 1, 1,
200
- 1, 1, 1, 1, 1, 1, 1, 1,
201
- 1, 1, 1, 1, 1, 1, 1, 1,
202
- 1, 1, 1, 1, 1, 1, 1, 1,
203
- 1, 1, 1, 1, 1, 1, 1, 1,
204
- 1, 1, 1, 1, 1, 1, 1, 1,
205
- 1, 1, 1, 1, 1, 1, 1, 1,
206
- 1, 1, 1, 1, 1, 1, 1, 1,
207
- 1, 1, 1, 1, 1, 1, 1, 1,
208
- 1, 1, 1, 1, 1, 1, 1, 1,
209
- 1, 1, 1, 1, 1, 1, 1, 1,
210
- 1, 1, 1, 1, 1, 1, 1, 1 };
240
+ 1, 1, 1, 1, 1, 1, 1, 0, };
211
241
 
212
242
 
213
243
  enum state
@@ -236,7 +266,12 @@ enum state
236
266
  , s_req_schema
237
267
  , s_req_schema_slash
238
268
  , s_req_schema_slash_slash
269
+ , s_req_host_start
270
+ , s_req_host_v6_start
271
+ , s_req_host_v6
272
+ , s_req_host_v6_end
239
273
  , s_req_host
274
+ , s_req_port_start
240
275
  , s_req_port
241
276
  , s_req_path
242
277
  , s_req_query_string_start
@@ -258,6 +293,7 @@ enum state
258
293
  , s_header_field
259
294
  , s_header_value_start
260
295
  , s_header_value
296
+ , s_header_value_lws
261
297
 
262
298
  , s_header_almost_done
263
299
 
@@ -265,9 +301,11 @@ enum state
265
301
  , s_chunk_size
266
302
  , s_chunk_parameters
267
303
  , s_chunk_size_almost_done
268
-
304
+
269
305
  , s_headers_almost_done
270
- /* Important: 's_headers_almost_done' must be the last 'header' state. All
306
+ , s_headers_done
307
+
308
+ /* Important: 's_headers_done' must be the last 'header' state. All
271
309
  * states beyond this must be 'body' states. It is used for overflow
272
310
  * checking. See the PARSING_HEADER() macro.
273
311
  */
@@ -278,10 +316,12 @@ enum state
278
316
 
279
317
  , s_body_identity
280
318
  , s_body_identity_eof
319
+
320
+ , s_message_done
281
321
  };
282
322
 
283
323
 
284
- #define PARSING_HEADER(state) (state <= s_headers_almost_done)
324
+ #define PARSING_HEADER(state) (state <= s_headers_done)
285
325
 
286
326
 
287
327
  enum header_states
@@ -311,27 +351,39 @@ enum header_states
311
351
  };
312
352
 
313
353
 
314
- enum flags
315
- { F_CHUNKED = 1 << 0
316
- , F_CONNECTION_KEEP_ALIVE = 1 << 1
317
- , F_CONNECTION_CLOSE = 1 << 2
318
- , F_TRAILING = 1 << 3
319
- , F_UPGRADE = 1 << 4
320
- , F_SKIPBODY = 1 << 5
321
- };
322
-
354
+ /* Macros for character classes; depends on strict-mode */
355
+ #define CR '\r'
356
+ #define LF '\n'
357
+ #define LOWER(c) (unsigned char)(c | 0x20)
358
+ #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
359
+ #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
360
+ #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
361
+ #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
323
362
 
324
- #define CR '\r'
325
- #define LF '\n'
326
- #define LOWER(c) (unsigned char)(c | 0x20)
327
- #define TOKEN(c) tokens[(unsigned char)c]
363
+ #if HTTP_PARSER_STRICT
364
+ #define TOKEN(c) (tokens[(unsigned char)c])
365
+ #define IS_URL_CHAR(c) (normal_url_char[(unsigned char) (c)])
366
+ #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
367
+ #else
368
+ #define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
369
+ #define IS_URL_CHAR(c) \
370
+ (normal_url_char[(unsigned char) (c)] || ((c) & 0x80))
371
+ #define IS_HOST_CHAR(c) \
372
+ (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
373
+ #endif
328
374
 
329
375
 
330
376
  #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
331
377
 
332
378
 
333
379
  #if HTTP_PARSER_STRICT
334
- # define STRICT_CHECK(cond) if (cond) goto error
380
+ # define STRICT_CHECK(cond) \
381
+ do { \
382
+ if (cond) { \
383
+ SET_ERRNO(HPE_STRICT); \
384
+ goto error; \
385
+ } \
386
+ } while (0)
335
387
  # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
336
388
  #else
337
389
  # define STRICT_CHECK(cond)
@@ -339,24 +391,228 @@ enum flags
339
391
  #endif
340
392
 
341
393
 
394
+ /* Map errno values to strings for human-readable output */
395
+ #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
396
+ static struct {
397
+ const char *name;
398
+ const char *description;
399
+ } http_strerror_tab[] = {
400
+ HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
401
+ };
402
+ #undef HTTP_STRERROR_GEN
403
+
404
+ int http_message_needs_eof(http_parser *parser);
405
+
406
+ /* Our URL parser.
407
+ *
408
+ * This is designed to be shared by http_parser_execute() for URL validation,
409
+ * hence it has a state transition + byte-for-byte interface. In addition, it
410
+ * is meant to be embedded in http_parser_parse_url(), which does the dirty
411
+ * work of turning state transitions URL components for its API.
412
+ *
413
+ * This function should only be invoked with non-space characters. It is
414
+ * assumed that the caller cares about (and can detect) the transition between
415
+ * URL and non-URL states by looking for these.
416
+ */
417
+ static enum state
418
+ parse_url_char(enum state s, const char ch)
419
+ {
420
+ assert(!isspace(ch));
421
+
422
+ switch (s) {
423
+ case s_req_spaces_before_url:
424
+ /* Proxied requests are followed by scheme of an absolute URI (alpha).
425
+ * All methods except CONNECT are followed by '/' or '*'.
426
+ */
427
+
428
+ if (ch == '/' || ch == '*') {
429
+ return s_req_path;
430
+ }
431
+
432
+ if (IS_ALPHA(ch)) {
433
+ return s_req_schema;
434
+ }
435
+
436
+ break;
437
+
438
+ case s_req_schema:
439
+ if (IS_ALPHA(ch)) {
440
+ return s;
441
+ }
442
+
443
+ if (ch == ':') {
444
+ return s_req_schema_slash;
445
+ }
446
+
447
+ break;
448
+
449
+ case s_req_schema_slash:
450
+ if (ch == '/') {
451
+ return s_req_schema_slash_slash;
452
+ }
453
+
454
+ break;
455
+
456
+ case s_req_schema_slash_slash:
457
+ if (ch == '/') {
458
+ return s_req_host_start;
459
+ }
460
+
461
+ break;
462
+
463
+ case s_req_host_start:
464
+ if (ch == '[') {
465
+ return s_req_host_v6_start;
466
+ }
467
+
468
+ if (IS_HOST_CHAR(ch)) {
469
+ return s_req_host;
470
+ }
471
+
472
+ break;
473
+
474
+ case s_req_host:
475
+ if (IS_HOST_CHAR(ch)) {
476
+ return s_req_host;
477
+ }
478
+
479
+ /* FALLTHROUGH */
480
+ case s_req_host_v6_end:
481
+ switch (ch) {
482
+ case ':':
483
+ return s_req_port_start;
484
+
485
+ case '/':
486
+ return s_req_path;
487
+
488
+ case '?':
489
+ return s_req_query_string_start;
490
+ }
491
+
492
+ break;
493
+
494
+ case s_req_host_v6:
495
+ if (ch == ']') {
496
+ return s_req_host_v6_end;
497
+ }
498
+
499
+ /* FALLTHROUGH */
500
+ case s_req_host_v6_start:
501
+ if (IS_HEX(ch) || ch == ':') {
502
+ return s_req_host_v6;
503
+ }
504
+ break;
505
+
506
+ case s_req_port:
507
+ switch (ch) {
508
+ case '/':
509
+ return s_req_path;
510
+
511
+ case '?':
512
+ return s_req_query_string_start;
513
+ }
514
+
515
+ /* FALLTHROUGH */
516
+ case s_req_port_start:
517
+ if (IS_NUM(ch)) {
518
+ return s_req_port;
519
+ }
520
+
521
+ break;
522
+
523
+ case s_req_path:
524
+ if (IS_URL_CHAR(ch)) {
525
+ return s;
526
+ }
527
+
528
+ switch (ch) {
529
+ case '?':
530
+ return s_req_query_string_start;
531
+
532
+ case '#':
533
+ return s_req_fragment_start;
534
+ }
535
+
536
+ break;
537
+
538
+ case s_req_query_string_start:
539
+ case s_req_query_string:
540
+ if (IS_URL_CHAR(ch)) {
541
+ return s_req_query_string;
542
+ }
543
+
544
+ switch (ch) {
545
+ case '?':
546
+ /* allow extra '?' in query string */
547
+ return s_req_query_string;
548
+
549
+ case '#':
550
+ return s_req_fragment_start;
551
+ }
552
+
553
+ break;
554
+
555
+ case s_req_fragment_start:
556
+ if (IS_URL_CHAR(ch)) {
557
+ return s_req_fragment;
558
+ }
559
+
560
+ switch (ch) {
561
+ case '?':
562
+ return s_req_fragment;
563
+
564
+ case '#':
565
+ return s;
566
+ }
567
+
568
+ break;
569
+
570
+ case s_req_fragment:
571
+ if (IS_URL_CHAR(ch)) {
572
+ return s;
573
+ }
574
+
575
+ switch (ch) {
576
+ case '?':
577
+ case '#':
578
+ return s;
579
+ }
580
+
581
+ break;
582
+
583
+ default:
584
+ break;
585
+ }
586
+
587
+ /* We should never fall out of the switch above unless there's an error */
588
+ return s_dead;
589
+ }
590
+
342
591
  size_t http_parser_execute (http_parser *parser,
343
592
  const http_parser_settings *settings,
344
593
  const char *data,
345
594
  size_t len)
346
595
  {
347
596
  char c, ch;
348
- const char *p = data, *pe;
349
- int64_t to_read;
597
+ int8_t unhex_val;
598
+ const char *p = data;
599
+ const char *header_field_mark = 0;
600
+ const char *header_value_mark = 0;
601
+ const char *url_mark = 0;
602
+ const char *body_mark = 0;
350
603
 
351
- enum state state = (enum state) parser->state;
352
- enum header_states header_state = (enum header_states) parser->header_state;
353
- uint64_t index = parser->index;
354
- uint64_t nread = parser->nread;
604
+ /* We're in an error state. Don't bother doing anything. */
605
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
606
+ return 0;
607
+ }
355
608
 
356
609
  if (len == 0) {
357
- switch (state) {
610
+ switch (parser->state) {
358
611
  case s_body_identity_eof:
359
- CALLBACK2(message_complete);
612
+ /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
613
+ * we got paused.
614
+ */
615
+ CALLBACK_NOTIFY_NOADVANCE(message_complete);
360
616
  return 0;
361
617
 
362
618
  case s_dead:
@@ -366,52 +622,59 @@ size_t http_parser_execute (http_parser *parser,
366
622
  return 0;
367
623
 
368
624
  default:
369
- return 1; // error
625
+ SET_ERRNO(HPE_INVALID_EOF_STATE);
626
+ return 1;
370
627
  }
371
628
  }
372
629
 
373
- /* technically we could combine all of these (except for url_mark) into one
374
- variable, saving stack space, but it seems more clear to have them
375
- separated. */
376
- const char *header_field_mark = 0;
377
- const char *header_value_mark = 0;
378
- const char *fragment_mark = 0;
379
- const char *query_string_mark = 0;
380
- const char *path_mark = 0;
381
- const char *url_mark = 0;
382
630
 
383
- if (state == s_header_field)
631
+ if (parser->state == s_header_field)
384
632
  header_field_mark = data;
385
- if (state == s_header_value)
633
+ if (parser->state == s_header_value)
386
634
  header_value_mark = data;
387
- if (state == s_req_fragment)
388
- fragment_mark = data;
389
- if (state == s_req_query_string)
390
- query_string_mark = data;
391
- if (state == s_req_path)
392
- path_mark = data;
393
- if (state == s_req_path || state == s_req_schema || state == s_req_schema_slash
394
- || state == s_req_schema_slash_slash || state == s_req_port
395
- || state == s_req_query_string_start || state == s_req_query_string
396
- || state == s_req_host
397
- || state == s_req_fragment_start || state == s_req_fragment)
635
+ switch (parser->state) {
636
+ case s_req_path:
637
+ case s_req_schema:
638
+ case s_req_schema_slash:
639
+ case s_req_schema_slash_slash:
640
+ case s_req_host_start:
641
+ case s_req_host_v6_start:
642
+ case s_req_host_v6:
643
+ case s_req_host_v6_end:
644
+ case s_req_host:
645
+ case s_req_port_start:
646
+ case s_req_port:
647
+ case s_req_query_string_start:
648
+ case s_req_query_string:
649
+ case s_req_fragment_start:
650
+ case s_req_fragment:
398
651
  url_mark = data;
652
+ break;
653
+ }
399
654
 
400
- for (p=data, pe=data+len; p != pe; p++) {
655
+ for (p=data; p != data + len; p++) {
401
656
  ch = *p;
402
657
 
403
- if (PARSING_HEADER(state)) {
404
- ++nread;
658
+ if (PARSING_HEADER(parser->state)) {
659
+ ++parser->nread;
405
660
  /* Buffer overflow attack */
406
- if (nread > HTTP_MAX_HEADER_SIZE) goto error;
661
+ if (parser->nread > HTTP_MAX_HEADER_SIZE) {
662
+ SET_ERRNO(HPE_HEADER_OVERFLOW);
663
+ goto error;
664
+ }
407
665
  }
408
666
 
409
- switch (state) {
667
+ reexecute_byte:
668
+ switch (parser->state) {
410
669
 
411
670
  case s_dead:
412
671
  /* this state is used after a 'Connection: close' message
413
672
  * the parser will error out if it reads another message
414
673
  */
674
+ if (ch == CR || ch == LF)
675
+ break;
676
+
677
+ SET_ERRNO(HPE_CLOSED_CONNECTION);
415
678
  goto error;
416
679
 
417
680
  case s_start_req_or_res:
@@ -419,42 +682,46 @@ size_t http_parser_execute (http_parser *parser,
419
682
  if (ch == CR || ch == LF)
420
683
  break;
421
684
  parser->flags = 0;
422
- parser->content_length = -1;
685
+ parser->content_length = ULLONG_MAX;
423
686
 
424
- CALLBACK2(message_begin);
687
+ if (ch == 'H') {
688
+ parser->state = s_res_or_resp_H;
425
689
 
426
- if (ch == 'H')
427
- state = s_res_or_resp_H;
428
- else {
690
+ CALLBACK_NOTIFY(message_begin);
691
+ } else {
429
692
  parser->type = HTTP_REQUEST;
430
- goto start_req_method_assign;
693
+ parser->state = s_start_req;
694
+ goto reexecute_byte;
431
695
  }
696
+
432
697
  break;
433
698
  }
434
699
 
435
700
  case s_res_or_resp_H:
436
701
  if (ch == 'T') {
437
702
  parser->type = HTTP_RESPONSE;
438
- state = s_res_HT;
703
+ parser->state = s_res_HT;
439
704
  } else {
440
- if (ch != 'E') goto error;
705
+ if (ch != 'E') {
706
+ SET_ERRNO(HPE_INVALID_CONSTANT);
707
+ goto error;
708
+ }
709
+
441
710
  parser->type = HTTP_REQUEST;
442
711
  parser->method = HTTP_HEAD;
443
- index = 2;
444
- state = s_req_method;
712
+ parser->index = 2;
713
+ parser->state = s_req_method;
445
714
  }
446
715
  break;
447
716
 
448
717
  case s_start_res:
449
718
  {
450
719
  parser->flags = 0;
451
- parser->content_length = -1;
452
-
453
- CALLBACK2(message_begin);
720
+ parser->content_length = ULLONG_MAX;
454
721
 
455
722
  switch (ch) {
456
723
  case 'H':
457
- state = s_res_H;
724
+ parser->state = s_res_H;
458
725
  break;
459
726
 
460
727
  case CR:
@@ -462,105 +729,133 @@ size_t http_parser_execute (http_parser *parser,
462
729
  break;
463
730
 
464
731
  default:
732
+ SET_ERRNO(HPE_INVALID_CONSTANT);
465
733
  goto error;
466
734
  }
735
+
736
+ CALLBACK_NOTIFY(message_begin);
467
737
  break;
468
738
  }
469
739
 
470
740
  case s_res_H:
471
741
  STRICT_CHECK(ch != 'T');
472
- state = s_res_HT;
742
+ parser->state = s_res_HT;
473
743
  break;
474
744
 
475
745
  case s_res_HT:
476
746
  STRICT_CHECK(ch != 'T');
477
- state = s_res_HTT;
747
+ parser->state = s_res_HTT;
478
748
  break;
479
749
 
480
750
  case s_res_HTT:
481
751
  STRICT_CHECK(ch != 'P');
482
- state = s_res_HTTP;
752
+ parser->state = s_res_HTTP;
483
753
  break;
484
754
 
485
755
  case s_res_HTTP:
486
756
  STRICT_CHECK(ch != '/');
487
- state = s_res_first_http_major;
757
+ parser->state = s_res_first_http_major;
488
758
  break;
489
759
 
490
760
  case s_res_first_http_major:
491
- if (ch < '1' || ch > '9') goto error;
761
+ if (ch < '0' || ch > '9') {
762
+ SET_ERRNO(HPE_INVALID_VERSION);
763
+ goto error;
764
+ }
765
+
492
766
  parser->http_major = ch - '0';
493
- state = s_res_http_major;
767
+ parser->state = s_res_http_major;
494
768
  break;
495
769
 
496
770
  /* major HTTP version or dot */
497
771
  case s_res_http_major:
498
772
  {
499
773
  if (ch == '.') {
500
- state = s_res_first_http_minor;
774
+ parser->state = s_res_first_http_minor;
501
775
  break;
502
776
  }
503
777
 
504
- if (ch < '0' || ch > '9') goto error;
778
+ if (!IS_NUM(ch)) {
779
+ SET_ERRNO(HPE_INVALID_VERSION);
780
+ goto error;
781
+ }
505
782
 
506
783
  parser->http_major *= 10;
507
784
  parser->http_major += ch - '0';
508
785
 
509
- if (parser->http_major > 999) goto error;
786
+ if (parser->http_major > 999) {
787
+ SET_ERRNO(HPE_INVALID_VERSION);
788
+ goto error;
789
+ }
790
+
510
791
  break;
511
792
  }
512
793
 
513
794
  /* first digit of minor HTTP version */
514
795
  case s_res_first_http_minor:
515
- if (ch < '0' || ch > '9') goto error;
796
+ if (!IS_NUM(ch)) {
797
+ SET_ERRNO(HPE_INVALID_VERSION);
798
+ goto error;
799
+ }
800
+
516
801
  parser->http_minor = ch - '0';
517
- state = s_res_http_minor;
802
+ parser->state = s_res_http_minor;
518
803
  break;
519
804
 
520
805
  /* minor HTTP version or end of request line */
521
806
  case s_res_http_minor:
522
807
  {
523
808
  if (ch == ' ') {
524
- state = s_res_first_status_code;
809
+ parser->state = s_res_first_status_code;
525
810
  break;
526
811
  }
527
812
 
528
- if (ch < '0' || ch > '9') goto error;
813
+ if (!IS_NUM(ch)) {
814
+ SET_ERRNO(HPE_INVALID_VERSION);
815
+ goto error;
816
+ }
529
817
 
530
818
  parser->http_minor *= 10;
531
819
  parser->http_minor += ch - '0';
532
820
 
533
- if (parser->http_minor > 999) goto error;
821
+ if (parser->http_minor > 999) {
822
+ SET_ERRNO(HPE_INVALID_VERSION);
823
+ goto error;
824
+ }
825
+
534
826
  break;
535
827
  }
536
828
 
537
829
  case s_res_first_status_code:
538
830
  {
539
- if (ch < '0' || ch > '9') {
831
+ if (!IS_NUM(ch)) {
540
832
  if (ch == ' ') {
541
833
  break;
542
834
  }
835
+
836
+ SET_ERRNO(HPE_INVALID_STATUS);
543
837
  goto error;
544
838
  }
545
839
  parser->status_code = ch - '0';
546
- state = s_res_status_code;
840
+ parser->state = s_res_status_code;
547
841
  break;
548
842
  }
549
843
 
550
844
  case s_res_status_code:
551
845
  {
552
- if (ch < '0' || ch > '9') {
846
+ if (!IS_NUM(ch)) {
553
847
  switch (ch) {
554
848
  case ' ':
555
- state = s_res_status;
849
+ parser->state = s_res_status;
556
850
  break;
557
851
  case CR:
558
- state = s_res_line_almost_done;
852
+ parser->state = s_res_line_almost_done;
559
853
  break;
560
854
  case LF:
561
- state = s_header_field_start;
855
+ parser->state = s_header_field_start;
562
856
  break;
563
857
  default:
858
+ SET_ERRNO(HPE_INVALID_STATUS);
564
859
  goto error;
565
860
  }
566
861
  break;
@@ -569,7 +864,11 @@ size_t http_parser_execute (http_parser *parser,
569
864
  parser->status_code *= 10;
570
865
  parser->status_code += ch - '0';
571
866
 
572
- if (parser->status_code > 999) goto error;
867
+ if (parser->status_code > 999) {
868
+ SET_ERRNO(HPE_INVALID_STATUS);
869
+ goto error;
870
+ }
871
+
573
872
  break;
574
873
  }
575
874
 
@@ -577,19 +876,19 @@ size_t http_parser_execute (http_parser *parser,
577
876
  /* the human readable status. e.g. "NOT FOUND"
578
877
  * we are not humans so just ignore this */
579
878
  if (ch == CR) {
580
- state = s_res_line_almost_done;
879
+ parser->state = s_res_line_almost_done;
581
880
  break;
582
881
  }
583
882
 
584
883
  if (ch == LF) {
585
- state = s_header_field_start;
884
+ parser->state = s_header_field_start;
586
885
  break;
587
886
  }
588
887
  break;
589
888
 
590
889
  case s_res_line_almost_done:
591
890
  STRICT_CHECK(ch != LF);
592
- state = s_header_field_start;
891
+ parser->state = s_header_field_start;
593
892
  break;
594
893
 
595
894
  case s_start_req:
@@ -597,15 +896,15 @@ size_t http_parser_execute (http_parser *parser,
597
896
  if (ch == CR || ch == LF)
598
897
  break;
599
898
  parser->flags = 0;
600
- parser->content_length = -1;
899
+ parser->content_length = ULLONG_MAX;
601
900
 
602
- CALLBACK2(message_begin);
603
-
604
- if (ch < 'A' || 'Z' < ch) goto error;
901
+ if (!IS_ALPHA(ch)) {
902
+ SET_ERRNO(HPE_INVALID_METHOD);
903
+ goto error;
904
+ }
605
905
 
606
- start_req_method_assign:
607
906
  parser->method = (enum http_method) 0;
608
- index = 1;
907
+ parser->index = 1;
609
908
  switch (ch) {
610
909
  case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
611
910
  case 'D': parser->method = HTTP_DELETE; break;
@@ -615,342 +914,157 @@ size_t http_parser_execute (http_parser *parser,
615
914
  case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
616
915
  case 'N': parser->method = HTTP_NOTIFY; break;
617
916
  case 'O': parser->method = HTTP_OPTIONS; break;
618
- case 'P': parser->method = HTTP_POST; /* or PROPFIND or PROPPATCH or PUT */ break;
917
+ case 'P': parser->method = HTTP_POST;
918
+ /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
919
+ break;
619
920
  case 'R': parser->method = HTTP_REPORT; break;
620
921
  case 'S': parser->method = HTTP_SUBSCRIBE; break;
621
922
  case 'T': parser->method = HTTP_TRACE; break;
622
923
  case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
623
- default: goto error;
924
+ default:
925
+ SET_ERRNO(HPE_INVALID_METHOD);
926
+ goto error;
624
927
  }
625
- state = s_req_method;
928
+ parser->state = s_req_method;
929
+
930
+ CALLBACK_NOTIFY(message_begin);
931
+
626
932
  break;
627
933
  }
628
934
 
629
935
  case s_req_method:
630
936
  {
631
- if (ch == '\0')
937
+ const char *matcher;
938
+ if (ch == '\0') {
939
+ SET_ERRNO(HPE_INVALID_METHOD);
632
940
  goto error;
941
+ }
633
942
 
634
- const char *matcher = method_strings[parser->method];
635
- if (ch == ' ' && matcher[index] == '\0') {
636
- state = s_req_spaces_before_url;
637
- } else if (ch == matcher[index]) {
943
+ matcher = method_strings[parser->method];
944
+ if (ch == ' ' && matcher[parser->index] == '\0') {
945
+ parser->state = s_req_spaces_before_url;
946
+ } else if (ch == matcher[parser->index]) {
638
947
  ; /* nada */
639
948
  } else if (parser->method == HTTP_CONNECT) {
640
- if (index == 1 && ch == 'H') {
949
+ if (parser->index == 1 && ch == 'H') {
641
950
  parser->method = HTTP_CHECKOUT;
642
- } else if (index == 2 && ch == 'P') {
951
+ } else if (parser->index == 2 && ch == 'P') {
643
952
  parser->method = HTTP_COPY;
953
+ } else {
954
+ goto error;
644
955
  }
645
956
  } else if (parser->method == HTTP_MKCOL) {
646
- if (index == 1 && ch == 'O') {
957
+ if (parser->index == 1 && ch == 'O') {
647
958
  parser->method = HTTP_MOVE;
648
- } else if (index == 1 && ch == 'E') {
959
+ } else if (parser->index == 1 && ch == 'E') {
649
960
  parser->method = HTTP_MERGE;
650
- } else if (index == 1 && ch == '-') {
961
+ } else if (parser->index == 1 && ch == '-') {
651
962
  parser->method = HTTP_MSEARCH;
652
- } else if (index == 2 && ch == 'A') {
963
+ } else if (parser->index == 2 && ch == 'A') {
653
964
  parser->method = HTTP_MKACTIVITY;
965
+ } else {
966
+ goto error;
654
967
  }
655
- } else if (index == 1 && parser->method == HTTP_POST && ch == 'R') {
656
- parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
657
- } else if (index == 1 && parser->method == HTTP_POST && ch == 'U') {
658
- parser->method = HTTP_PUT;
659
- } else if (index == 2 && parser->method == HTTP_UNLOCK && ch == 'S') {
660
- parser->method = HTTP_UNSUBSCRIBE;
661
- } else if (index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
968
+ } else if (parser->index == 1 && parser->method == HTTP_POST) {
969
+ if (ch == 'R') {
970
+ parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
971
+ } else if (ch == 'U') {
972
+ parser->method = HTTP_PUT; /* or HTTP_PURGE */
973
+ } else if (ch == 'A') {
974
+ parser->method = HTTP_PATCH;
975
+ } else {
976
+ goto error;
977
+ }
978
+ } else if (parser->index == 2) {
979
+ if (parser->method == HTTP_PUT) {
980
+ if (ch == 'R') parser->method = HTTP_PURGE;
981
+ } else if (parser->method == HTTP_UNLOCK) {
982
+ if (ch == 'S') parser->method = HTTP_UNSUBSCRIBE;
983
+ }
984
+ } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
662
985
  parser->method = HTTP_PROPPATCH;
663
986
  } else {
987
+ SET_ERRNO(HPE_INVALID_METHOD);
664
988
  goto error;
665
989
  }
666
990
 
667
- ++index;
991
+ ++parser->index;
668
992
  break;
669
993
  }
994
+
670
995
  case s_req_spaces_before_url:
671
996
  {
672
997
  if (ch == ' ') break;
673
998
 
674
- if (ch == '/' || ch == '*') {
675
- MARK(url);
676
- MARK(path);
677
- state = s_req_path;
678
- break;
999
+ MARK(url);
1000
+ if (parser->method == HTTP_CONNECT) {
1001
+ parser->state = s_req_host_start;
679
1002
  }
680
1003
 
681
- c = LOWER(ch);
682
-
683
- if (c >= 'a' && c <= 'z') {
684
- MARK(url);
685
- state = s_req_schema;
686
- break;
1004
+ parser->state = parse_url_char((enum state)parser->state, ch);
1005
+ if (parser->state == s_dead) {
1006
+ SET_ERRNO(HPE_INVALID_URL);
1007
+ goto error;
687
1008
  }
688
1009
 
689
- goto error;
1010
+ break;
690
1011
  }
691
1012
 
692
1013
  case s_req_schema:
693
- {
694
- c = LOWER(ch);
695
-
696
- if (c >= 'a' && c <= 'z') break;
697
-
698
- if (ch == ':') {
699
- state = s_req_schema_slash;
700
- break;
701
- } else if (ch == '.') {
702
- state = s_req_host;
703
- break;
704
- } else if ('0' <= ch && ch <= '9') {
705
- state = s_req_host;
706
- break;
707
- }
708
-
709
- goto error;
710
- }
711
-
712
1014
  case s_req_schema_slash:
713
- STRICT_CHECK(ch != '/');
714
- state = s_req_schema_slash_slash;
715
- break;
716
-
717
1015
  case s_req_schema_slash_slash:
718
- STRICT_CHECK(ch != '/');
719
- state = s_req_host;
720
- break;
721
-
722
- case s_req_host:
723
- {
724
- c = LOWER(ch);
725
- if (c >= 'a' && c <= 'z') break;
726
- if ((ch >= '0' && ch <= '9') || ch == '.' || ch == '-') break;
727
- switch (ch) {
728
- case ':':
729
- state = s_req_port;
730
- break;
731
- case '/':
732
- MARK(path);
733
- state = s_req_path;
734
- break;
735
- case ' ':
736
- /* The request line looks like:
737
- * "GET http://foo.bar.com HTTP/1.1"
738
- * That is, there is no path.
739
- */
740
- CALLBACK(url);
741
- state = s_req_http_start;
742
- break;
743
- case '?':
744
- state = s_req_query_string_start;
745
- break;
746
- default:
747
- goto error;
748
- }
749
- break;
750
- }
751
-
752
- case s_req_port:
1016
+ case s_req_host_start:
1017
+ case s_req_host_v6_start:
1018
+ case s_req_host_v6:
1019
+ case s_req_port_start:
753
1020
  {
754
- if (ch >= '0' && ch <= '9') break;
755
1021
  switch (ch) {
756
- case '/':
757
- MARK(path);
758
- state = s_req_path;
759
- break;
1022
+ /* No whitespace allowed here */
760
1023
  case ' ':
761
- /* The request line looks like:
762
- * "GET http://foo.bar.com:1234 HTTP/1.1"
763
- * That is, there is no path.
764
- */
765
- CALLBACK(url);
766
- state = s_req_http_start;
767
- break;
768
- case '?':
769
- state = s_req_query_string_start;
770
- break;
771
- default:
772
- goto error;
773
- }
774
- break;
775
- }
776
-
777
- case s_req_path:
778
- {
779
- if (normal_url_char[(unsigned char)ch]) break;
780
-
781
- switch (ch) {
782
- case ' ':
783
- CALLBACK(url);
784
- CALLBACK(path);
785
- state = s_req_http_start;
786
- break;
787
1024
  case CR:
788
- CALLBACK(url);
789
- CALLBACK(path);
790
- parser->http_major = 0;
791
- parser->http_minor = 9;
792
- state = s_req_line_almost_done;
793
- break;
794
1025
  case LF:
795
- CALLBACK(url);
796
- CALLBACK(path);
797
- parser->http_major = 0;
798
- parser->http_minor = 9;
799
- state = s_header_field_start;
800
- break;
801
- case '?':
802
- CALLBACK(path);
803
- state = s_req_query_string_start;
804
- break;
805
- case '#':
806
- CALLBACK(path);
807
- state = s_req_fragment_start;
808
- break;
809
- default:
1026
+ SET_ERRNO(HPE_INVALID_URL);
810
1027
  goto error;
811
- }
812
- break;
813
- }
814
-
815
- case s_req_query_string_start:
816
- {
817
- if (normal_url_char[(unsigned char)ch]) {
818
- MARK(query_string);
819
- state = s_req_query_string;
820
- break;
821
- }
822
-
823
- switch (ch) {
824
- case '?':
825
- break; /* XXX ignore extra '?' ... is this right? */
826
- case ' ':
827
- CALLBACK(url);
828
- state = s_req_http_start;
829
- break;
830
- case CR:
831
- CALLBACK(url);
832
- parser->http_major = 0;
833
- parser->http_minor = 9;
834
- state = s_req_line_almost_done;
835
- break;
836
- case LF:
837
- CALLBACK(url);
838
- parser->http_major = 0;
839
- parser->http_minor = 9;
840
- state = s_header_field_start;
841
- break;
842
- case '#':
843
- state = s_req_fragment_start;
844
- break;
845
1028
  default:
846
- goto error;
1029
+ parser->state = parse_url_char((enum state)parser->state, ch);
1030
+ if (parser->state == s_dead) {
1031
+ SET_ERRNO(HPE_INVALID_URL);
1032
+ goto error;
1033
+ }
847
1034
  }
848
- break;
849
- }
850
1035
 
851
- case s_req_query_string:
852
- {
853
- if (normal_url_char[(unsigned char)ch]) break;
854
-
855
- switch (ch) {
856
- case '?':
857
- /* allow extra '?' in query string */
858
- break;
859
- case ' ':
860
- CALLBACK(url);
861
- CALLBACK(query_string);
862
- state = s_req_http_start;
863
- break;
864
- case CR:
865
- CALLBACK(url);
866
- CALLBACK(query_string);
867
- parser->http_major = 0;
868
- parser->http_minor = 9;
869
- state = s_req_line_almost_done;
870
- break;
871
- case LF:
872
- CALLBACK(url);
873
- CALLBACK(query_string);
874
- parser->http_major = 0;
875
- parser->http_minor = 9;
876
- state = s_header_field_start;
877
- break;
878
- case '#':
879
- CALLBACK(query_string);
880
- state = s_req_fragment_start;
881
- break;
882
- default:
883
- goto error;
884
- }
885
1036
  break;
886
1037
  }
887
1038
 
1039
+ case s_req_host:
1040
+ case s_req_host_v6_end:
1041
+ case s_req_port:
1042
+ case s_req_path:
1043
+ case s_req_query_string_start:
1044
+ case s_req_query_string:
888
1045
  case s_req_fragment_start:
889
- {
890
- if (normal_url_char[(unsigned char)ch]) {
891
- MARK(fragment);
892
- state = s_req_fragment;
893
- break;
894
- }
895
-
896
- switch (ch) {
897
- case ' ':
898
- CALLBACK(url);
899
- state = s_req_http_start;
900
- break;
901
- case CR:
902
- CALLBACK(url);
903
- parser->http_major = 0;
904
- parser->http_minor = 9;
905
- state = s_req_line_almost_done;
906
- break;
907
- case LF:
908
- CALLBACK(url);
909
- parser->http_major = 0;
910
- parser->http_minor = 9;
911
- state = s_header_field_start;
912
- break;
913
- case '?':
914
- MARK(fragment);
915
- state = s_req_fragment;
916
- break;
917
- case '#':
918
- break;
919
- default:
920
- goto error;
921
- }
922
- break;
923
- }
924
-
925
1046
  case s_req_fragment:
926
1047
  {
927
- if (normal_url_char[(unsigned char)ch]) break;
928
-
929
1048
  switch (ch) {
930
1049
  case ' ':
931
- CALLBACK(url);
932
- CALLBACK(fragment);
933
- state = s_req_http_start;
1050
+ parser->state = s_req_http_start;
1051
+ CALLBACK_DATA(url);
934
1052
  break;
935
1053
  case CR:
936
- CALLBACK(url);
937
- CALLBACK(fragment);
938
- parser->http_major = 0;
939
- parser->http_minor = 9;
940
- state = s_req_line_almost_done;
941
- break;
942
1054
  case LF:
943
- CALLBACK(url);
944
- CALLBACK(fragment);
945
1055
  parser->http_major = 0;
946
1056
  parser->http_minor = 9;
947
- state = s_header_field_start;
948
- break;
949
- case '?':
950
- case '#':
1057
+ parser->state = (ch == CR) ?
1058
+ s_req_line_almost_done :
1059
+ s_header_field_start;
1060
+ CALLBACK_DATA(url);
951
1061
  break;
952
1062
  default:
953
- goto error;
1063
+ parser->state = parse_url_char((enum state)parser->state, ch);
1064
+ if (parser->state == s_dead) {
1065
+ SET_ERRNO(HPE_INVALID_URL);
1066
+ goto error;
1067
+ }
954
1068
  }
955
1069
  break;
956
1070
  }
@@ -958,140 +1072,170 @@ size_t http_parser_execute (http_parser *parser,
958
1072
  case s_req_http_start:
959
1073
  switch (ch) {
960
1074
  case 'H':
961
- state = s_req_http_H;
1075
+ parser->state = s_req_http_H;
962
1076
  break;
963
1077
  case ' ':
964
1078
  break;
965
1079
  default:
1080
+ SET_ERRNO(HPE_INVALID_CONSTANT);
966
1081
  goto error;
967
1082
  }
968
1083
  break;
969
1084
 
970
1085
  case s_req_http_H:
971
1086
  STRICT_CHECK(ch != 'T');
972
- state = s_req_http_HT;
1087
+ parser->state = s_req_http_HT;
973
1088
  break;
974
1089
 
975
1090
  case s_req_http_HT:
976
1091
  STRICT_CHECK(ch != 'T');
977
- state = s_req_http_HTT;
1092
+ parser->state = s_req_http_HTT;
978
1093
  break;
979
1094
 
980
1095
  case s_req_http_HTT:
981
1096
  STRICT_CHECK(ch != 'P');
982
- state = s_req_http_HTTP;
1097
+ parser->state = s_req_http_HTTP;
983
1098
  break;
984
1099
 
985
1100
  case s_req_http_HTTP:
986
1101
  STRICT_CHECK(ch != '/');
987
- state = s_req_first_http_major;
1102
+ parser->state = s_req_first_http_major;
988
1103
  break;
989
1104
 
990
1105
  /* first digit of major HTTP version */
991
1106
  case s_req_first_http_major:
992
- if (ch < '1' || ch > '9') goto error;
1107
+ if (ch < '1' || ch > '9') {
1108
+ SET_ERRNO(HPE_INVALID_VERSION);
1109
+ goto error;
1110
+ }
1111
+
993
1112
  parser->http_major = ch - '0';
994
- state = s_req_http_major;
1113
+ parser->state = s_req_http_major;
995
1114
  break;
996
1115
 
997
1116
  /* major HTTP version or dot */
998
1117
  case s_req_http_major:
999
1118
  {
1000
1119
  if (ch == '.') {
1001
- state = s_req_first_http_minor;
1120
+ parser->state = s_req_first_http_minor;
1002
1121
  break;
1003
1122
  }
1004
1123
 
1005
- if (ch < '0' || ch > '9') goto error;
1124
+ if (!IS_NUM(ch)) {
1125
+ SET_ERRNO(HPE_INVALID_VERSION);
1126
+ goto error;
1127
+ }
1006
1128
 
1007
1129
  parser->http_major *= 10;
1008
1130
  parser->http_major += ch - '0';
1009
1131
 
1010
- if (parser->http_major > 999) goto error;
1132
+ if (parser->http_major > 999) {
1133
+ SET_ERRNO(HPE_INVALID_VERSION);
1134
+ goto error;
1135
+ }
1136
+
1011
1137
  break;
1012
1138
  }
1013
1139
 
1014
1140
  /* first digit of minor HTTP version */
1015
1141
  case s_req_first_http_minor:
1016
- if (ch < '0' || ch > '9') goto error;
1142
+ if (!IS_NUM(ch)) {
1143
+ SET_ERRNO(HPE_INVALID_VERSION);
1144
+ goto error;
1145
+ }
1146
+
1017
1147
  parser->http_minor = ch - '0';
1018
- state = s_req_http_minor;
1148
+ parser->state = s_req_http_minor;
1019
1149
  break;
1020
1150
 
1021
1151
  /* minor HTTP version or end of request line */
1022
1152
  case s_req_http_minor:
1023
1153
  {
1024
1154
  if (ch == CR) {
1025
- state = s_req_line_almost_done;
1155
+ parser->state = s_req_line_almost_done;
1026
1156
  break;
1027
1157
  }
1028
1158
 
1029
1159
  if (ch == LF) {
1030
- state = s_header_field_start;
1160
+ parser->state = s_header_field_start;
1031
1161
  break;
1032
1162
  }
1033
1163
 
1034
1164
  /* XXX allow spaces after digit? */
1035
1165
 
1036
- if (ch < '0' || ch > '9') goto error;
1166
+ if (!IS_NUM(ch)) {
1167
+ SET_ERRNO(HPE_INVALID_VERSION);
1168
+ goto error;
1169
+ }
1037
1170
 
1038
1171
  parser->http_minor *= 10;
1039
1172
  parser->http_minor += ch - '0';
1040
1173
 
1041
- if (parser->http_minor > 999) goto error;
1174
+ if (parser->http_minor > 999) {
1175
+ SET_ERRNO(HPE_INVALID_VERSION);
1176
+ goto error;
1177
+ }
1178
+
1042
1179
  break;
1043
1180
  }
1044
1181
 
1045
1182
  /* end of request line */
1046
1183
  case s_req_line_almost_done:
1047
1184
  {
1048
- if (ch != LF) goto error;
1049
- state = s_header_field_start;
1185
+ if (ch != LF) {
1186
+ SET_ERRNO(HPE_LF_EXPECTED);
1187
+ goto error;
1188
+ }
1189
+
1190
+ parser->state = s_header_field_start;
1050
1191
  break;
1051
1192
  }
1052
1193
 
1053
1194
  case s_header_field_start:
1054
1195
  {
1055
1196
  if (ch == CR) {
1056
- state = s_headers_almost_done;
1197
+ parser->state = s_headers_almost_done;
1057
1198
  break;
1058
1199
  }
1059
1200
 
1060
1201
  if (ch == LF) {
1061
1202
  /* they might be just sending \n instead of \r\n so this would be
1062
1203
  * the second \n to denote the end of headers*/
1063
- state = s_headers_almost_done;
1064
- goto headers_almost_done;
1204
+ parser->state = s_headers_almost_done;
1205
+ goto reexecute_byte;
1065
1206
  }
1066
1207
 
1067
1208
  c = TOKEN(ch);
1068
1209
 
1069
- if (!c) goto error;
1210
+ if (!c) {
1211
+ SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1212
+ goto error;
1213
+ }
1070
1214
 
1071
1215
  MARK(header_field);
1072
1216
 
1073
- index = 0;
1074
- state = s_header_field;
1217
+ parser->index = 0;
1218
+ parser->state = s_header_field;
1075
1219
 
1076
1220
  switch (c) {
1077
1221
  case 'c':
1078
- header_state = h_C;
1222
+ parser->header_state = h_C;
1079
1223
  break;
1080
1224
 
1081
1225
  case 'p':
1082
- header_state = h_matching_proxy_connection;
1226
+ parser->header_state = h_matching_proxy_connection;
1083
1227
  break;
1084
1228
 
1085
1229
  case 't':
1086
- header_state = h_matching_transfer_encoding;
1230
+ parser->header_state = h_matching_transfer_encoding;
1087
1231
  break;
1088
1232
 
1089
1233
  case 'u':
1090
- header_state = h_matching_upgrade;
1234
+ parser->header_state = h_matching_upgrade;
1091
1235
  break;
1092
1236
 
1093
1237
  default:
1094
- header_state = h_general;
1238
+ parser->header_state = h_general;
1095
1239
  break;
1096
1240
  }
1097
1241
  break;
@@ -1102,31 +1246,31 @@ size_t http_parser_execute (http_parser *parser,
1102
1246
  c = TOKEN(ch);
1103
1247
 
1104
1248
  if (c) {
1105
- switch (header_state) {
1249
+ switch (parser->header_state) {
1106
1250
  case h_general:
1107
1251
  break;
1108
1252
 
1109
1253
  case h_C:
1110
- index++;
1111
- header_state = (c == 'o' ? h_CO : h_general);
1254
+ parser->index++;
1255
+ parser->header_state = (c == 'o' ? h_CO : h_general);
1112
1256
  break;
1113
1257
 
1114
1258
  case h_CO:
1115
- index++;
1116
- header_state = (c == 'n' ? h_CON : h_general);
1259
+ parser->index++;
1260
+ parser->header_state = (c == 'n' ? h_CON : h_general);
1117
1261
  break;
1118
1262
 
1119
1263
  case h_CON:
1120
- index++;
1264
+ parser->index++;
1121
1265
  switch (c) {
1122
1266
  case 'n':
1123
- header_state = h_matching_connection;
1267
+ parser->header_state = h_matching_connection;
1124
1268
  break;
1125
1269
  case 't':
1126
- header_state = h_matching_content_length;
1270
+ parser->header_state = h_matching_content_length;
1127
1271
  break;
1128
1272
  default:
1129
- header_state = h_general;
1273
+ parser->header_state = h_general;
1130
1274
  break;
1131
1275
  }
1132
1276
  break;
@@ -1134,60 +1278,60 @@ size_t http_parser_execute (http_parser *parser,
1134
1278
  /* connection */
1135
1279
 
1136
1280
  case h_matching_connection:
1137
- index++;
1138
- if (index > sizeof(CONNECTION)-1
1139
- || c != CONNECTION[index]) {
1140
- header_state = h_general;
1141
- } else if (index == sizeof(CONNECTION)-2) {
1142
- header_state = h_connection;
1281
+ parser->index++;
1282
+ if (parser->index > sizeof(CONNECTION)-1
1283
+ || c != CONNECTION[parser->index]) {
1284
+ parser->header_state = h_general;
1285
+ } else if (parser->index == sizeof(CONNECTION)-2) {
1286
+ parser->header_state = h_connection;
1143
1287
  }
1144
1288
  break;
1145
1289
 
1146
1290
  /* proxy-connection */
1147
1291
 
1148
1292
  case h_matching_proxy_connection:
1149
- index++;
1150
- if (index > sizeof(PROXY_CONNECTION)-1
1151
- || c != PROXY_CONNECTION[index]) {
1152
- header_state = h_general;
1153
- } else if (index == sizeof(PROXY_CONNECTION)-2) {
1154
- header_state = h_connection;
1293
+ parser->index++;
1294
+ if (parser->index > sizeof(PROXY_CONNECTION)-1
1295
+ || c != PROXY_CONNECTION[parser->index]) {
1296
+ parser->header_state = h_general;
1297
+ } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1298
+ parser->header_state = h_connection;
1155
1299
  }
1156
1300
  break;
1157
1301
 
1158
1302
  /* content-length */
1159
1303
 
1160
1304
  case h_matching_content_length:
1161
- index++;
1162
- if (index > sizeof(CONTENT_LENGTH)-1
1163
- || c != CONTENT_LENGTH[index]) {
1164
- header_state = h_general;
1165
- } else if (index == sizeof(CONTENT_LENGTH)-2) {
1166
- header_state = h_content_length;
1305
+ parser->index++;
1306
+ if (parser->index > sizeof(CONTENT_LENGTH)-1
1307
+ || c != CONTENT_LENGTH[parser->index]) {
1308
+ parser->header_state = h_general;
1309
+ } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1310
+ parser->header_state = h_content_length;
1167
1311
  }
1168
1312
  break;
1169
1313
 
1170
1314
  /* transfer-encoding */
1171
1315
 
1172
1316
  case h_matching_transfer_encoding:
1173
- index++;
1174
- if (index > sizeof(TRANSFER_ENCODING)-1
1175
- || c != TRANSFER_ENCODING[index]) {
1176
- header_state = h_general;
1177
- } else if (index == sizeof(TRANSFER_ENCODING)-2) {
1178
- header_state = h_transfer_encoding;
1317
+ parser->index++;
1318
+ if (parser->index > sizeof(TRANSFER_ENCODING)-1
1319
+ || c != TRANSFER_ENCODING[parser->index]) {
1320
+ parser->header_state = h_general;
1321
+ } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1322
+ parser->header_state = h_transfer_encoding;
1179
1323
  }
1180
1324
  break;
1181
1325
 
1182
1326
  /* upgrade */
1183
1327
 
1184
1328
  case h_matching_upgrade:
1185
- index++;
1186
- if (index > sizeof(UPGRADE)-1
1187
- || c != UPGRADE[index]) {
1188
- header_state = h_general;
1189
- } else if (index == sizeof(UPGRADE)-2) {
1190
- header_state = h_upgrade;
1329
+ parser->index++;
1330
+ if (parser->index > sizeof(UPGRADE)-1
1331
+ || c != UPGRADE[parser->index]) {
1332
+ parser->header_state = h_general;
1333
+ } else if (parser->index == sizeof(UPGRADE)-2) {
1334
+ parser->header_state = h_upgrade;
1191
1335
  }
1192
1336
  break;
1193
1337
 
@@ -1195,7 +1339,7 @@ size_t http_parser_execute (http_parser *parser,
1195
1339
  case h_content_length:
1196
1340
  case h_transfer_encoding:
1197
1341
  case h_upgrade:
1198
- if (ch != ' ') header_state = h_general;
1342
+ if (ch != ' ') parser->header_state = h_general;
1199
1343
  break;
1200
1344
 
1201
1345
  default:
@@ -1206,84 +1350,89 @@ size_t http_parser_execute (http_parser *parser,
1206
1350
  }
1207
1351
 
1208
1352
  if (ch == ':') {
1209
- CALLBACK(header_field);
1210
- state = s_header_value_start;
1353
+ parser->state = s_header_value_start;
1354
+ CALLBACK_DATA(header_field);
1211
1355
  break;
1212
1356
  }
1213
1357
 
1214
1358
  if (ch == CR) {
1215
- state = s_header_almost_done;
1216
- CALLBACK(header_field);
1359
+ parser->state = s_header_almost_done;
1360
+ CALLBACK_DATA(header_field);
1217
1361
  break;
1218
1362
  }
1219
1363
 
1220
1364
  if (ch == LF) {
1221
- CALLBACK(header_field);
1222
- state = s_header_field_start;
1365
+ parser->state = s_header_field_start;
1366
+ CALLBACK_DATA(header_field);
1223
1367
  break;
1224
1368
  }
1225
1369
 
1370
+ SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1226
1371
  goto error;
1227
1372
  }
1228
1373
 
1229
1374
  case s_header_value_start:
1230
1375
  {
1231
- if (ch == ' ') break;
1376
+ if (ch == ' ' || ch == '\t') break;
1232
1377
 
1233
1378
  MARK(header_value);
1234
1379
 
1235
- state = s_header_value;
1236
- index = 0;
1237
-
1238
- c = LOWER(ch);
1380
+ parser->state = s_header_value;
1381
+ parser->index = 0;
1239
1382
 
1240
1383
  if (ch == CR) {
1241
- CALLBACK(header_value);
1242
- header_state = h_general;
1243
- state = s_header_almost_done;
1384
+ parser->header_state = h_general;
1385
+ parser->state = s_header_almost_done;
1386
+ CALLBACK_DATA(header_value);
1244
1387
  break;
1245
1388
  }
1246
1389
 
1247
1390
  if (ch == LF) {
1248
- CALLBACK(header_value);
1249
- state = s_header_field_start;
1391
+ parser->state = s_header_field_start;
1392
+ CALLBACK_DATA(header_value);
1250
1393
  break;
1251
1394
  }
1252
1395
 
1253
- switch (header_state) {
1396
+ c = LOWER(ch);
1397
+
1398
+ switch (parser->header_state) {
1254
1399
  case h_upgrade:
1255
1400
  parser->flags |= F_UPGRADE;
1256
- header_state = h_general;
1401
+ parser->header_state = h_general;
1257
1402
  break;
1258
1403
 
1259
1404
  case h_transfer_encoding:
1260
1405
  /* looking for 'Transfer-Encoding: chunked' */
1261
1406
  if ('c' == c) {
1262
- header_state = h_matching_transfer_encoding_chunked;
1407
+ parser->header_state = h_matching_transfer_encoding_chunked;
1263
1408
  } else {
1264
- header_state = h_general;
1409
+ parser->header_state = h_general;
1265
1410
  }
1266
1411
  break;
1267
1412
 
1268
1413
  case h_content_length:
1269
- if (ch < '0' || ch > '9') goto error;
1414
+ if (!IS_NUM(ch)) {
1415
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1416
+ goto error;
1417
+ }
1418
+
1270
1419
  parser->content_length = ch - '0';
1271
1420
  break;
1272
1421
 
1273
1422
  case h_connection:
1274
1423
  /* looking for 'Connection: keep-alive' */
1275
1424
  if (c == 'k') {
1276
- header_state = h_matching_connection_keep_alive;
1425
+ parser->header_state = h_matching_connection_keep_alive;
1277
1426
  /* looking for 'Connection: close' */
1278
1427
  } else if (c == 'c') {
1279
- header_state = h_matching_connection_close;
1428
+ parser->header_state = h_matching_connection_close;
1280
1429
  } else {
1281
- header_state = h_general;
1430
+ parser->header_state = h_general;
1282
1431
  }
1283
1432
  break;
1284
1433
 
1285
1434
  default:
1286
- header_state = h_general;
1435
+ parser->header_state = h_general;
1287
1436
  break;
1288
1437
  }
1289
1438
  break;
@@ -1291,20 +1440,22 @@ size_t http_parser_execute (http_parser *parser,
1291
1440
 
1292
1441
  case s_header_value:
1293
1442
  {
1294
- c = LOWER(ch);
1295
1443
 
1296
1444
  if (ch == CR) {
1297
- CALLBACK(header_value);
1298
- state = s_header_almost_done;
1445
+ parser->state = s_header_almost_done;
1446
+ CALLBACK_DATA(header_value);
1299
1447
  break;
1300
1448
  }
1301
1449
 
1302
1450
  if (ch == LF) {
1303
- CALLBACK(header_value);
1304
- goto header_almost_done;
1451
+ parser->state = s_header_almost_done;
1452
+ CALLBACK_DATA_NOADVANCE(header_value);
1453
+ goto reexecute_byte;
1305
1454
  }
1306
1455
 
1307
- switch (header_state) {
1456
+ c = LOWER(ch);
1457
+
1458
+ switch (parser->header_state) {
1308
1459
  case h_general:
1309
1460
  break;
1310
1461
 
@@ -1314,66 +1465,83 @@ size_t http_parser_execute (http_parser *parser,
1314
1465
  break;
1315
1466
 
1316
1467
  case h_content_length:
1468
+ {
1469
+ uint64_t t;
1470
+
1317
1471
  if (ch == ' ') break;
1318
- if (ch < '0' || ch > '9') goto error;
1319
- parser->content_length *= 10;
1320
- parser->content_length += ch - '0';
1472
+
1473
+ if (!IS_NUM(ch)) {
1474
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1475
+ goto error;
1476
+ }
1477
+
1478
+ t = parser->content_length;
1479
+ t *= 10;
1480
+ t += ch - '0';
1481
+
1482
+ /* Overflow? */
1483
+ if (t < parser->content_length || t == ULLONG_MAX) {
1484
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1485
+ goto error;
1486
+ }
1487
+
1488
+ parser->content_length = t;
1321
1489
  break;
1490
+ }
1322
1491
 
1323
1492
  /* Transfer-Encoding: chunked */
1324
1493
  case h_matching_transfer_encoding_chunked:
1325
- index++;
1326
- if (index > sizeof(CHUNKED)-1
1327
- || c != CHUNKED[index]) {
1328
- header_state = h_general;
1329
- } else if (index == sizeof(CHUNKED)-2) {
1330
- header_state = h_transfer_encoding_chunked;
1494
+ parser->index++;
1495
+ if (parser->index > sizeof(CHUNKED)-1
1496
+ || c != CHUNKED[parser->index]) {
1497
+ parser->header_state = h_general;
1498
+ } else if (parser->index == sizeof(CHUNKED)-2) {
1499
+ parser->header_state = h_transfer_encoding_chunked;
1331
1500
  }
1332
1501
  break;
1333
1502
 
1334
1503
  /* looking for 'Connection: keep-alive' */
1335
1504
  case h_matching_connection_keep_alive:
1336
- index++;
1337
- if (index > sizeof(KEEP_ALIVE)-1
1338
- || c != KEEP_ALIVE[index]) {
1339
- header_state = h_general;
1340
- } else if (index == sizeof(KEEP_ALIVE)-2) {
1341
- header_state = h_connection_keep_alive;
1505
+ parser->index++;
1506
+ if (parser->index > sizeof(KEEP_ALIVE)-1
1507
+ || c != KEEP_ALIVE[parser->index]) {
1508
+ parser->header_state = h_general;
1509
+ } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1510
+ parser->header_state = h_connection_keep_alive;
1342
1511
  }
1343
1512
  break;
1344
1513
 
1345
1514
  /* looking for 'Connection: close' */
1346
1515
  case h_matching_connection_close:
1347
- index++;
1348
- if (index > sizeof(CLOSE)-1 || c != CLOSE[index]) {
1349
- header_state = h_general;
1350
- } else if (index == sizeof(CLOSE)-2) {
1351
- header_state = h_connection_close;
1516
+ parser->index++;
1517
+ if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1518
+ parser->header_state = h_general;
1519
+ } else if (parser->index == sizeof(CLOSE)-2) {
1520
+ parser->header_state = h_connection_close;
1352
1521
  }
1353
1522
  break;
1354
1523
 
1355
1524
  case h_transfer_encoding_chunked:
1356
1525
  case h_connection_keep_alive:
1357
1526
  case h_connection_close:
1358
- if (ch != ' ') header_state = h_general;
1527
+ if (ch != ' ') parser->header_state = h_general;
1359
1528
  break;
1360
1529
 
1361
1530
  default:
1362
- state = s_header_value;
1363
- header_state = h_general;
1531
+ parser->state = s_header_value;
1532
+ parser->header_state = h_general;
1364
1533
  break;
1365
1534
  }
1366
1535
  break;
1367
1536
  }
1368
1537
 
1369
1538
  case s_header_almost_done:
1370
- header_almost_done:
1371
1539
  {
1372
1540
  STRICT_CHECK(ch != LF);
1373
1541
 
1374
- state = s_header_field_start;
1542
+ parser->state = s_header_value_lws;
1375
1543
 
1376
- switch (header_state) {
1544
+ switch (parser->header_state) {
1377
1545
  case h_connection_keep_alive:
1378
1546
  parser->flags |= F_CONNECTION_KEEP_ALIVE;
1379
1547
  break;
@@ -1386,32 +1554,47 @@ size_t http_parser_execute (http_parser *parser,
1386
1554
  default:
1387
1555
  break;
1388
1556
  }
1557
+
1558
+ break;
1559
+ }
1560
+
1561
+ case s_header_value_lws:
1562
+ {
1563
+ if (ch == ' ' || ch == '\t')
1564
+ parser->state = s_header_value_start;
1565
+ else
1566
+ {
1567
+ parser->state = s_header_field_start;
1568
+ goto reexecute_byte;
1569
+ }
1389
1570
  break;
1390
1571
  }
1391
1572
 
1392
1573
  case s_headers_almost_done:
1393
- headers_almost_done:
1394
1574
  {
1395
1575
  STRICT_CHECK(ch != LF);
1396
1576
 
1397
1577
  if (parser->flags & F_TRAILING) {
1398
1578
  /* End of a chunked request */
1399
- CALLBACK2(message_complete);
1400
- state = NEW_MESSAGE();
1579
+ parser->state = NEW_MESSAGE();
1580
+ CALLBACK_NOTIFY(message_complete);
1401
1581
  break;
1402
1582
  }
1403
1583
 
1404
- nread = 0;
1584
+ parser->state = s_headers_done;
1405
1585
 
1406
- if (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT) {
1407
- parser->upgrade = 1;
1408
- }
1586
+ /* Set this here so that on_headers_complete() callbacks can see it */
1587
+ parser->upgrade =
1588
+ (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT);
1409
1589
 
1410
1590
  /* Here we call the headers_complete callback. This is somewhat
1411
1591
  * different than other callbacks because if the user returns 1, we
1412
1592
  * will interpret that as saying that this message has no body. This
1413
1593
  * is needed for the annoying case of recieving a response to a HEAD
1414
1594
  * request.
1595
+ *
1596
+ * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1597
+ * we have to simulate it by handling a change in errno below.
1415
1598
  */
1416
1599
  if (settings->on_headers_complete) {
1417
1600
  switch (settings->on_headers_complete(parser)) {
@@ -1423,39 +1606,54 @@ size_t http_parser_execute (http_parser *parser,
1423
1606
  break;
1424
1607
 
1425
1608
  default:
1426
- parser->state = state;
1609
+ SET_ERRNO(HPE_CB_headers_complete);
1427
1610
  return p - data; /* Error */
1428
1611
  }
1429
1612
  }
1430
1613
 
1614
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1615
+ return p - data;
1616
+ }
1617
+
1618
+ goto reexecute_byte;
1619
+ }
1620
+
1621
+ case s_headers_done:
1622
+ {
1623
+ STRICT_CHECK(ch != LF);
1624
+
1625
+ parser->nread = 0;
1626
+
1431
1627
  /* Exit, the rest of the connect is in a different protocol. */
1432
1628
  if (parser->upgrade) {
1433
- CALLBACK2(message_complete);
1434
- return (p - data);
1629
+ parser->state = NEW_MESSAGE();
1630
+ CALLBACK_NOTIFY(message_complete);
1631
+ return (p - data) + 1;
1435
1632
  }
1436
1633
 
1437
1634
  if (parser->flags & F_SKIPBODY) {
1438
- CALLBACK2(message_complete);
1439
- state = NEW_MESSAGE();
1635
+ parser->state = NEW_MESSAGE();
1636
+ CALLBACK_NOTIFY(message_complete);
1440
1637
  } else if (parser->flags & F_CHUNKED) {
1441
1638
  /* chunked encoding - ignore Content-Length header */
1442
- state = s_chunk_size_start;
1639
+ parser->state = s_chunk_size_start;
1443
1640
  } else {
1444
1641
  if (parser->content_length == 0) {
1445
1642
  /* Content-Length header given but zero: Content-Length: 0\r\n */
1446
- CALLBACK2(message_complete);
1447
- state = NEW_MESSAGE();
1448
- } else if (parser->content_length > 0) {
1643
+ parser->state = NEW_MESSAGE();
1644
+ CALLBACK_NOTIFY(message_complete);
1645
+ } else if (parser->content_length != ULLONG_MAX) {
1449
1646
  /* Content-Length header given and non-zero */
1450
- state = s_body_identity;
1647
+ parser->state = s_body_identity;
1451
1648
  } else {
1452
- if (parser->type == HTTP_REQUEST || http_should_keep_alive(parser)) {
1649
+ if (parser->type == HTTP_REQUEST ||
1650
+ !http_message_needs_eof(parser)) {
1453
1651
  /* Assume content-length 0 - read the next */
1454
- CALLBACK2(message_complete);
1455
- state = NEW_MESSAGE();
1652
+ parser->state = NEW_MESSAGE();
1653
+ CALLBACK_NOTIFY(message_complete);
1456
1654
  } else {
1457
1655
  /* Read body until EOF */
1458
- state = s_body_identity_eof;
1656
+ parser->state = s_body_identity_eof;
1459
1657
  }
1460
1658
  }
1461
1659
  }
@@ -1464,60 +1662,103 @@ size_t http_parser_execute (http_parser *parser,
1464
1662
  }
1465
1663
 
1466
1664
  case s_body_identity:
1467
- to_read = MIN(pe - p, (int64_t)parser->content_length);
1468
- if (to_read > 0) {
1469
- if (settings->on_body) settings->on_body(parser, p, to_read);
1470
- p += to_read - 1;
1471
- parser->content_length -= to_read;
1472
- if (parser->content_length == 0) {
1473
- CALLBACK2(message_complete);
1474
- state = NEW_MESSAGE();
1475
- }
1665
+ {
1666
+ uint64_t to_read = MIN(parser->content_length,
1667
+ (uint64_t) ((data + len) - p));
1668
+
1669
+ assert(parser->content_length != 0
1670
+ && parser->content_length != ULLONG_MAX);
1671
+
1672
+ /* The difference between advancing content_length and p is because
1673
+ * the latter will automaticaly advance on the next loop iteration.
1674
+ * Further, if content_length ends up at 0, we want to see the last
1675
+ * byte again for our message complete callback.
1676
+ */
1677
+ MARK(body);
1678
+ parser->content_length -= to_read;
1679
+ p += to_read - 1;
1680
+
1681
+ if (parser->content_length == 0) {
1682
+ parser->state = s_message_done;
1683
+
1684
+ /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1685
+ *
1686
+ * The alternative to doing this is to wait for the next byte to
1687
+ * trigger the data callback, just as in every other case. The
1688
+ * problem with this is that this makes it difficult for the test
1689
+ * harness to distinguish between complete-on-EOF and
1690
+ * complete-on-length. It's not clear that this distinction is
1691
+ * important for applications, but let's keep it for now.
1692
+ */
1693
+ CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1694
+ goto reexecute_byte;
1476
1695
  }
1696
+
1477
1697
  break;
1698
+ }
1478
1699
 
1479
1700
  /* read until EOF */
1480
1701
  case s_body_identity_eof:
1481
- to_read = pe - p;
1482
- if (to_read > 0) {
1483
- if (settings->on_body) settings->on_body(parser, p, to_read);
1484
- p += to_read - 1;
1485
- }
1702
+ MARK(body);
1703
+ p = data + len - 1;
1704
+
1705
+ break;
1706
+
1707
+ case s_message_done:
1708
+ parser->state = NEW_MESSAGE();
1709
+ CALLBACK_NOTIFY(message_complete);
1486
1710
  break;
1487
1711
 
1488
1712
  case s_chunk_size_start:
1489
1713
  {
1490
- assert(nread == 1);
1714
+ assert(parser->nread == 1);
1491
1715
  assert(parser->flags & F_CHUNKED);
1492
1716
 
1493
- c = unhex[(unsigned char)ch];
1494
- if (c == -1) goto error;
1495
- parser->content_length = c;
1496
- state = s_chunk_size;
1717
+ unhex_val = unhex[(unsigned char)ch];
1718
+ if (unhex_val == -1) {
1719
+ SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1720
+ goto error;
1721
+ }
1722
+
1723
+ parser->content_length = unhex_val;
1724
+ parser->state = s_chunk_size;
1497
1725
  break;
1498
1726
  }
1499
1727
 
1500
1728
  case s_chunk_size:
1501
1729
  {
1730
+ uint64_t t;
1731
+
1502
1732
  assert(parser->flags & F_CHUNKED);
1503
1733
 
1504
1734
  if (ch == CR) {
1505
- state = s_chunk_size_almost_done;
1735
+ parser->state = s_chunk_size_almost_done;
1506
1736
  break;
1507
1737
  }
1508
1738
 
1509
- c = unhex[(unsigned char)ch];
1739
+ unhex_val = unhex[(unsigned char)ch];
1510
1740
 
1511
- if (c == -1) {
1741
+ if (unhex_val == -1) {
1512
1742
  if (ch == ';' || ch == ' ') {
1513
- state = s_chunk_parameters;
1743
+ parser->state = s_chunk_parameters;
1514
1744
  break;
1515
1745
  }
1746
+
1747
+ SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1748
+ goto error;
1749
+ }
1750
+
1751
+ t = parser->content_length;
1752
+ t *= 16;
1753
+ t += unhex_val;
1754
+
1755
+ /* Overflow? */
1756
+ if (t < parser->content_length || t == ULLONG_MAX) {
1757
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1516
1758
  goto error;
1517
1759
  }
1518
1760
 
1519
- parser->content_length *= 16;
1520
- parser->content_length += c;
1761
+ parser->content_length = t;
1521
1762
  break;
1522
1763
  }
1523
1764
 
@@ -1526,7 +1767,7 @@ size_t http_parser_execute (http_parser *parser,
1526
1767
  assert(parser->flags & F_CHUNKED);
1527
1768
  /* just ignore this shit. TODO check for overflow */
1528
1769
  if (ch == CR) {
1529
- state = s_chunk_size_almost_done;
1770
+ parser->state = s_chunk_size_almost_done;
1530
1771
  break;
1531
1772
  }
1532
1773
  break;
@@ -1537,74 +1778,117 @@ size_t http_parser_execute (http_parser *parser,
1537
1778
  assert(parser->flags & F_CHUNKED);
1538
1779
  STRICT_CHECK(ch != LF);
1539
1780
 
1540
- nread = 0;
1781
+ parser->nread = 0;
1541
1782
 
1542
1783
  if (parser->content_length == 0) {
1543
1784
  parser->flags |= F_TRAILING;
1544
- state = s_header_field_start;
1785
+ parser->state = s_header_field_start;
1545
1786
  } else {
1546
- state = s_chunk_data;
1787
+ parser->state = s_chunk_data;
1547
1788
  }
1548
1789
  break;
1549
1790
  }
1550
1791
 
1551
1792
  case s_chunk_data:
1552
1793
  {
1553
- assert(parser->flags & F_CHUNKED);
1794
+ uint64_t to_read = MIN(parser->content_length,
1795
+ (uint64_t) ((data + len) - p));
1554
1796
 
1555
- to_read = MIN(pe - p, (int64_t)(parser->content_length));
1797
+ assert(parser->flags & F_CHUNKED);
1798
+ assert(parser->content_length != 0
1799
+ && parser->content_length != ULLONG_MAX);
1556
1800
 
1557
- if (to_read > 0) {
1558
- if (settings->on_body) settings->on_body(parser, p, to_read);
1559
- p += to_read - 1;
1560
- }
1801
+ /* See the explanation in s_body_identity for why the content
1802
+ * length and data pointers are managed this way.
1803
+ */
1804
+ MARK(body);
1805
+ parser->content_length -= to_read;
1806
+ p += to_read - 1;
1561
1807
 
1562
- if (to_read == parser->content_length) {
1563
- state = s_chunk_data_almost_done;
1808
+ if (parser->content_length == 0) {
1809
+ parser->state = s_chunk_data_almost_done;
1564
1810
  }
1565
1811
 
1566
- parser->content_length -= to_read;
1567
1812
  break;
1568
1813
  }
1569
1814
 
1570
1815
  case s_chunk_data_almost_done:
1571
1816
  assert(parser->flags & F_CHUNKED);
1817
+ assert(parser->content_length == 0);
1572
1818
  STRICT_CHECK(ch != CR);
1573
- state = s_chunk_data_done;
1819
+ parser->state = s_chunk_data_done;
1820
+ CALLBACK_DATA(body);
1574
1821
  break;
1575
1822
 
1576
1823
  case s_chunk_data_done:
1577
1824
  assert(parser->flags & F_CHUNKED);
1578
1825
  STRICT_CHECK(ch != LF);
1579
- state = s_chunk_size_start;
1826
+ parser->nread = 0;
1827
+ parser->state = s_chunk_size_start;
1580
1828
  break;
1581
1829
 
1582
1830
  default:
1583
1831
  assert(0 && "unhandled state");
1832
+ SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
1584
1833
  goto error;
1585
1834
  }
1586
1835
  }
1587
1836
 
1588
- CALLBACK_NOCLEAR(header_field);
1589
- CALLBACK_NOCLEAR(header_value);
1590
- CALLBACK_NOCLEAR(fragment);
1591
- CALLBACK_NOCLEAR(query_string);
1592
- CALLBACK_NOCLEAR(path);
1593
- CALLBACK_NOCLEAR(url);
1837
+ /* Run callbacks for any marks that we have leftover after we ran our of
1838
+ * bytes. There should be at most one of these set, so it's OK to invoke
1839
+ * them in series (unset marks will not result in callbacks).
1840
+ *
1841
+ * We use the NOADVANCE() variety of callbacks here because 'p' has already
1842
+ * overflowed 'data' and this allows us to correct for the off-by-one that
1843
+ * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
1844
+ * value that's in-bounds).
1845
+ */
1594
1846
 
1595
- parser->state = state;
1596
- parser->header_state = header_state;
1597
- parser->index = index;
1598
- parser->nread = nread;
1847
+ assert(((header_field_mark ? 1 : 0) +
1848
+ (header_value_mark ? 1 : 0) +
1849
+ (url_mark ? 1 : 0) +
1850
+ (body_mark ? 1 : 0)) <= 1);
1851
+
1852
+ CALLBACK_DATA_NOADVANCE(header_field);
1853
+ CALLBACK_DATA_NOADVANCE(header_value);
1854
+ CALLBACK_DATA_NOADVANCE(url);
1855
+ CALLBACK_DATA_NOADVANCE(body);
1599
1856
 
1600
1857
  return len;
1601
1858
 
1602
1859
  error:
1603
- parser->state = s_dead;
1860
+ if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
1861
+ SET_ERRNO(HPE_UNKNOWN);
1862
+ }
1863
+
1604
1864
  return (p - data);
1605
1865
  }
1606
1866
 
1607
1867
 
1868
+ /* Does the parser need to see an EOF to find the end of the message? */
1869
+ int
1870
+ http_message_needs_eof (http_parser *parser)
1871
+ {
1872
+ if (parser->type == HTTP_REQUEST) {
1873
+ return 0;
1874
+ }
1875
+
1876
+ /* See RFC 2616 section 4.4 */
1877
+ if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
1878
+ parser->status_code == 204 || /* No Content */
1879
+ parser->status_code == 304 || /* Not Modified */
1880
+ parser->flags & F_SKIPBODY) { /* response to a HEAD request */
1881
+ return 0;
1882
+ }
1883
+
1884
+ if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
1885
+ return 0;
1886
+ }
1887
+
1888
+ return 1;
1889
+ }
1890
+
1891
+
1608
1892
  int
1609
1893
  http_should_keep_alive (http_parser *parser)
1610
1894
  {
@@ -1612,17 +1896,15 @@ http_should_keep_alive (http_parser *parser)
1612
1896
  /* HTTP/1.1 */
1613
1897
  if (parser->flags & F_CONNECTION_CLOSE) {
1614
1898
  return 0;
1615
- } else {
1616
- return 1;
1617
1899
  }
1618
1900
  } else {
1619
1901
  /* HTTP/1.0 or earlier */
1620
- if (parser->flags & F_CONNECTION_KEEP_ALIVE) {
1621
- return 1;
1622
- } else {
1902
+ if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
1623
1903
  return 0;
1624
1904
  }
1625
1905
  }
1906
+
1907
+ return !http_message_needs_eof(parser);
1626
1908
  }
1627
1909
 
1628
1910
 
@@ -1635,10 +1917,142 @@ const char * http_method_str (enum http_method m)
1635
1917
  void
1636
1918
  http_parser_init (http_parser *parser, enum http_parser_type t)
1637
1919
  {
1920
+ void *data = parser->data; /* preserve application data */
1921
+ memset(parser, 0, sizeof(*parser));
1922
+ parser->data = data;
1638
1923
  parser->type = t;
1639
1924
  parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
1640
- parser->nread = 0;
1641
- parser->upgrade = 0;
1642
- parser->flags = 0;
1643
- parser->method = 0;
1925
+ parser->http_errno = HPE_OK;
1926
+ }
1927
+
1928
+ const char *
1929
+ http_errno_name(enum http_errno err) {
1930
+ assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1931
+ return http_strerror_tab[err].name;
1932
+ }
1933
+
1934
+ const char *
1935
+ http_errno_description(enum http_errno err) {
1936
+ assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1937
+ return http_strerror_tab[err].description;
1938
+ }
1939
+
1940
+ int
1941
+ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
1942
+ struct http_parser_url *u)
1943
+ {
1944
+ enum state s;
1945
+ const char *p;
1946
+ enum http_parser_url_fields uf, old_uf;
1947
+
1948
+ u->port = u->field_set = 0;
1949
+ s = is_connect ? s_req_host_start : s_req_spaces_before_url;
1950
+ uf = old_uf = UF_MAX;
1951
+
1952
+ for (p = buf; p < buf + buflen; p++) {
1953
+ s = parse_url_char(s, *p);
1954
+
1955
+ /* Figure out the next field that we're operating on */
1956
+ switch (s) {
1957
+ case s_dead:
1958
+ return 1;
1959
+
1960
+ /* Skip delimeters */
1961
+ case s_req_schema_slash:
1962
+ case s_req_schema_slash_slash:
1963
+ case s_req_host_start:
1964
+ case s_req_host_v6_start:
1965
+ case s_req_host_v6_end:
1966
+ case s_req_port_start:
1967
+ case s_req_query_string_start:
1968
+ case s_req_fragment_start:
1969
+ continue;
1970
+
1971
+ case s_req_schema:
1972
+ uf = UF_SCHEMA;
1973
+ break;
1974
+
1975
+ case s_req_host:
1976
+ case s_req_host_v6:
1977
+ uf = UF_HOST;
1978
+ break;
1979
+
1980
+ case s_req_port:
1981
+ uf = UF_PORT;
1982
+ break;
1983
+
1984
+ case s_req_path:
1985
+ uf = UF_PATH;
1986
+ break;
1987
+
1988
+ case s_req_query_string:
1989
+ uf = UF_QUERY;
1990
+ break;
1991
+
1992
+ case s_req_fragment:
1993
+ uf = UF_FRAGMENT;
1994
+ break;
1995
+
1996
+ default:
1997
+ assert(!"Unexpected state");
1998
+ return 1;
1999
+ }
2000
+
2001
+ /* Nothing's changed; soldier on */
2002
+ if (uf == old_uf) {
2003
+ u->field_data[uf].len++;
2004
+ continue;
2005
+ }
2006
+
2007
+ u->field_data[uf].off = p - buf;
2008
+ u->field_data[uf].len = 1;
2009
+
2010
+ u->field_set |= (1 << uf);
2011
+ old_uf = uf;
2012
+ }
2013
+
2014
+ /* CONNECT requests can only contain "hostname:port" */
2015
+ if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2016
+ return 1;
2017
+ }
2018
+
2019
+ /* Make sure we don't end somewhere unexpected */
2020
+ switch (s) {
2021
+ case s_req_host_v6_start:
2022
+ case s_req_host_v6:
2023
+ case s_req_host_v6_end:
2024
+ case s_req_host:
2025
+ case s_req_port_start:
2026
+ return 1;
2027
+ default:
2028
+ break;
2029
+ }
2030
+
2031
+ if (u->field_set & (1 << UF_PORT)) {
2032
+ /* Don't bother with endp; we've already validated the string */
2033
+ unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
2034
+
2035
+ /* Ports have a max value of 2^16 */
2036
+ if (v > 0xffff) {
2037
+ return 1;
2038
+ }
2039
+
2040
+ u->port = (uint16_t) v;
2041
+ }
2042
+
2043
+ return 0;
2044
+ }
2045
+
2046
+ void
2047
+ http_parser_pause(http_parser *parser, int paused) {
2048
+ /* Users should only be pausing/unpausing a parser that is not in an error
2049
+ * state. In non-debug builds, there's not much that we can do about this
2050
+ * other than ignore it.
2051
+ */
2052
+ if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2053
+ HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2054
+ SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2055
+ } else {
2056
+ assert(0 && "Attempting to pause parser in error state");
2057
+ }
1644
2058
  }