RubyGems - http_parser.rb - Versions diffs - 0.5.3 → 0.6.0.beta.1 - Mend

http_parser.rb 0.5.3 → 0.6.0.beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

data/ext/ruby_http_parser/ruby_http_parser.c CHANGED

@@ -17,9 +17,6 @@ typedef struct ParserWrapper {
   ryah_http_parser parser;
   VALUE request_url;
-  VALUE request_path;
-  VALUE query_string;
-  VALUE fragment;
   VALUE headers;
@@ -49,9 +46,6 @@ void ParserWrapper_init(ParserWrapper *wrapper) {
   wrapper->parser.http_minor = 0;
   wrapper->request_url = Qnil;
-  wrapper->request_path = Qnil;
-  wrapper->query_string = Qnil;
-  wrapper->fragment = Qnil;
   wrapper->upgrade_data = Qnil;
@@ -66,9 +60,6 @@ void ParserWrapper_mark(void *data) {
   if(data) {
     ParserWrapper *wrapper = (ParserWrapper *) data;
     rb_gc_mark_maybe(wrapper->request_url);
-    rb_gc_mark_maybe(wrapper->request_path);
-    rb_gc_mark_maybe(wrapper->query_string);
-    rb_gc_mark_maybe(wrapper->fragment);
     rb_gc_mark_maybe(wrapper->upgrade_data);
     rb_gc_mark_maybe(wrapper->headers);
     rb_gc_mark_maybe(wrapper->on_message_begin);
@@ -111,9 +102,6 @@ int on_message_begin(ryah_http_parser *parser) {
   GET_WRAPPER(wrapper, parser);
   wrapper->request_url = rb_str_new2("");
-  wrapper->request_path = rb_str_new2("");
-  wrapper->query_string = rb_str_new2("");
-  wrapper->fragment = rb_str_new2("");
   wrapper->headers = rb_hash_new();
   wrapper->upgrade_data = rb_str_new2("");
@@ -139,24 +127,6 @@ int on_url(ryah_http_parser *parser, const char *at, size_t length) {
   return 0;
 }
-int on_path(ryah_http_parser *parser, const char *at, size_t length) {
-  GET_WRAPPER(wrapper, parser);
-  rb_str_cat(wrapper->request_path, at, length);
-  return 0;
-}
-int on_query_string(ryah_http_parser *parser, const char *at, size_t length) {
-  GET_WRAPPER(wrapper, parser);
-  rb_str_cat(wrapper->query_string, at, length);
-  return 0;
-}
-int on_fragment(ryah_http_parser *parser, const char *at, size_t length) {
-  GET_WRAPPER(wrapper, parser);
-  rb_str_cat(wrapper->fragment, at, length);
-  return 0;
-}
 int on_header_field(ryah_http_parser *parser, const char *at, size_t length) {
   GET_WRAPPER(wrapper, parser);
@@ -278,10 +248,7 @@ int on_message_complete(ryah_http_parser *parser) {
 static ryah_http_parser_settings settings = {
   .on_message_begin = on_message_begin,
-  .on_path = on_path,
-  .on_query_string = on_query_string,
   .on_url = on_url,
-  .on_fragment = on_fragment,
   .on_header_field = on_header_field,
   .on_header_value = on_header_value,
   .on_headers_complete = on_headers_complete,
@@ -318,6 +285,10 @@ VALUE ResponseParser_alloc(VALUE klass) {
   return Parser_alloc_by_type(klass, HTTP_RESPONSE);
 }
+VALUE Parser_strict_p(VALUE klass) {
+  return HTTP_PARSER_STRICT == 1 ? Qtrue : Qfalse;
+}
 VALUE Parser_initialize(int argc, VALUE *argv, VALUE self) {
   ParserWrapper *wrapper = NULL;
   DATA_GET(self, ParserWrapper, wrapper);
@@ -349,11 +320,14 @@ VALUE Parser_execute(VALUE self, VALUE data) {
   size_t nparsed = ryah_http_parser_execute(&wrapper->parser, &settings, ptr, len);
   if (wrapper->parser.upgrade) {
-    rb_str_cat(wrapper->upgrade_data, ptr + nparsed + 1, len - nparsed - 1);
+    if (RTEST(wrapper->stopped))
+      nparsed += 1;
+    rb_str_cat(wrapper->upgrade_data, ptr + nparsed, len - nparsed);
   } else if (nparsed != (size_t)len) {
     if (!RTEST(wrapper->stopped) && !RTEST(wrapper->completed))
-      rb_raise(eParserError, "Could not parse data entirely");
+      rb_raise(eParserError, "Could not parse data entirely (%zu != %zu)", nparsed, len);
     else
       nparsed += 1; // error states fail on the current character
   }
@@ -465,9 +439,6 @@ VALUE Parser_status_code(VALUE self) {
   }
 DEFINE_GETTER(request_url);
-DEFINE_GETTER(request_path);
-DEFINE_GETTER(query_string);
-DEFINE_GETTER(fragment);
 DEFINE_GETTER(headers);
 DEFINE_GETTER(upgrade_data);
 DEFINE_GETTER(header_value_type);
@@ -515,6 +486,7 @@ void Init_ruby_http_parser() {
   rb_define_alloc_func(cRequestParser, RequestParser_alloc);
   rb_define_alloc_func(cResponseParser, ResponseParser_alloc);
+  rb_define_singleton_method(cParser, "strict?", Parser_strict_p, 0);
   rb_define_method(cParser, "initialize", Parser_initialize, -1);
   rb_define_method(cParser, "on_message_begin=", Parser_set_on_message_begin, 1);
@@ -534,9 +506,6 @@ void Init_ruby_http_parser() {
   rb_define_method(cParser, "status_code", Parser_status_code, 0);
   rb_define_method(cParser, "request_url", Parser_request_url, 0);
-  rb_define_method(cParser, "request_path", Parser_request_path, 0);
-  rb_define_method(cParser, "query_string", Parser_query_string, 0);
-  rb_define_method(cParser, "fragment", Parser_fragment, 0);
   rb_define_method(cParser, "headers", Parser_headers, 0);
   rb_define_method(cParser, "upgrade_data", Parser_upgrade_data, 0);
   rb_define_method(cParser, "header_value_type", Parser_header_value_type, 0);

data/ext/ruby_http_parser/vendor/http-parser-java/AUTHORS ADDED

@@ -0,0 +1,32 @@
+# Authors ordered by first contribution.
+Ryan Dahl <ry@tinyclouds.org>
+Jeremy Hinegardner <jeremy@hinegardner.org>
+Sergey Shepelev <temotor@gmail.com>
+Joe Damato <ice799@gmail.com>
+tomika <tomika_nospam@freemail.hu>
+Phoenix Sol <phoenix@burninglabs.com>
+Cliff Frey <cliff@meraki.com>
+Ewen Cheslack-Postava <ewencp@cs.stanford.edu>
+Santiago Gala <sgala@apache.org>
+Tim Becker <tim.becker@syngenio.de>
+Jeff Terrace <jterrace@gmail.com>
+Ben Noordhuis <info@bnoordhuis.nl>
+Nathan Rajlich <nathan@tootallnate.net>
+Mark Nottingham <mnot@mnot.net>
+Aman Gupta <aman@tmm1.net>
+Tim Becker <tim.becker@kuriositaet.de>
+Sean Cunningham <sean.cunningham@mandiant.com>
+Peter Griess <pg@std.in>
+Salman Haq <salman.haq@asti-usa.com>
+Cliff Frey <clifffrey@gmail.com>
+Jon Kolb <jon@b0g.us>
+Fouad Mardini <f.mardini@gmail.com>
+Paul Querna <pquerna@apache.org>
+Felix Geisendörfer <felix@debuggable.com>
+koichik <koichik@improvement.jp>
+Andre Caron <andre.l.caron@gmail.com>
+Ivo Raisr <ivosh@ivosh.net>
+James McLaughlin <jamie@lacewing-project.org>
+David Gwynne <loki@animata.net>
+LE ROUX Thomas <thomas@procheo.fr>
+Randy Rizun <rrizun@ortivawireless.com>

data/ext/ruby_http_parser/vendor/http-parser-java/LICENSE-MIT CHANGED

@@ -23,7 +23,11 @@ IN THE SOFTWARE.
 This code mainly based on code with the following license:
-Copyright Joyent, Inc. and other Node contributors. All rights reserved.
+http_parser.c is based on src/http/ngx_http_parse.c from NGINX copyright
+Igor Sysoev.
+Additional changes are licensed under the same terms as NGINX and
+copyright Joyent, Inc. and other Node contributors. All rights reserved.
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to

data/ext/ruby_http_parser/vendor/http-parser-java/README.md CHANGED

@@ -24,7 +24,7 @@ The parser extracts the following information from HTTP messages:
   * Response status code
   * Transfer-Encoding
   * HTTP version
-  * Request path, query string, fragment
+  * Request URL
   * Message body
 Building
@@ -49,3 +49,135 @@ Usage
   help or have suggestions, feel free to contact me at
   (tim.becker@kuriositaet.de).
+One `http_parser` object is used per TCP connection. Initialize the struct
+using `http_parser_init()` and set the callbacks. That might look something
+like this for a request parser:
+    http_parser_settings settings;
+    settings.on_path = my_path_callback;
+    settings.on_header_field = my_header_field_callback;
+    /* ... */
+    http_parser *parser = malloc(sizeof(http_parser));
+    http_parser_init(parser, HTTP_REQUEST);
+    parser->data = my_socket;
+When data is received on the socket execute the parser and check for errors.
+    size_t len = 80*1024, nparsed;
+    char buf[len];
+    ssize_t recved;
+    recved = recv(fd, buf, len, 0);
+    if (recved < 0) {
+      /* Handle error. */
+    }
+    /* Start up / continue the parser.
+     * Note we pass recved==0 to signal that EOF has been recieved.
+     */
+    nparsed = http_parser_execute(parser, &settings, buf, recved);
+    if (parser->upgrade) {
+      /* handle new protocol */
+    } else if (nparsed != recved) {
+      /* Handle error. Usually just close the connection. */
+    }
+HTTP needs to know where the end of the stream is. For example, sometimes
+servers send responses without Content-Length and expect the client to
+consume input (for the body) until EOF. To tell http_parser about EOF, give
+`0` as the forth parameter to `http_parser_execute()`. Callbacks and errors
+can still be encountered during an EOF, so one must still be prepared
+to receive them.
+Scalar valued message information such as `status_code`, `method`, and the
+HTTP version are stored in the parser structure. This data is only
+temporally stored in `http_parser` and gets reset on each new message. If
+this information is needed later, copy it out of the structure during the
+`headers_complete` callback.
+The parser decodes the transfer-encoding for both requests and responses
+transparently. That is, a chunked encoding is decoded before being sent to
+the on_body callback.
+The Special Problem of Upgrade
+------------------------------
+HTTP supports upgrading the connection to a different protocol. An
+increasingly common example of this is the Web Socket protocol which sends
+a request like
+        GET /demo HTTP/1.1
+        Upgrade: WebSocket
+        Connection: Upgrade
+        Host: example.com
+        Origin: http://example.com
+        WebSocket-Protocol: sample
+followed by non-HTTP data.
+(See http://tools.ietf.org/html/draft-hixie-thewebsocketprotocol-75 for more
+information the Web Socket protocol.)
+To support this, the parser will treat this as a normal HTTP message without a
+body. Issuing both on_headers_complete and on_message_complete callbacks. However
+http_parser_execute() will stop parsing at the end of the headers and return.
+The user is expected to check if `parser->upgrade` has been set to 1 after
+`http_parser_execute()` returns. Non-HTTP data begins at the buffer supplied
+offset by the return value of `http_parser_execute()`.
+Callbacks
+---------
+During the `http_parser_execute()` call, the callbacks set in
+`http_parser_settings` will be executed. The parser maintains state and
+never looks behind, so buffering the data is not necessary. If you need to
+save certain data for later usage, you can do that from the callbacks.
+There are two types of callbacks:
+* notification `typedef int (*http_cb) (http_parser*);`
+    Callbacks: on_message_begin, on_headers_complete, on_message_complete.
+* data `typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);`
+    Callbacks: (requests only) on_uri,
+               (common) on_header_field, on_header_value, on_body;
+Callbacks must return 0 on success. Returning a non-zero value indicates
+error to the parser, making it exit immediately.
+In case you parse HTTP message in chunks (i.e. `read()` request line
+from socket, parse, read half headers, parse, etc) your data callbacks
+may be called more than once. Http-parser guarantees that data pointer is only
+valid for the lifetime of callback. You can also `read()` into a heap allocated
+buffer to avoid copying memory around if this fits your application.
+Reading headers may be a tricky task if you read/parse headers partially.
+Basically, you need to remember whether last header callback was field or value
+and apply following logic:
+    (on_header_field and on_header_value shortened to on_h_*)
+     ------------------------ ------------ --------------------------------------------
+    | State (prev. callback) | Callback   | Description/action                         |
+     ------------------------ ------------ --------------------------------------------
+    | nothing (first call)   | on_h_field | Allocate new buffer and copy callback data |
+    |                        |            | into it                                    |
+     ------------------------ ------------ --------------------------------------------
+    | value                  | on_h_field | New header started.                        |
+    |                        |            | Copy current name,value buffers to headers |
+    |                        |            | list and allocate new buffer for new name  |
+     ------------------------ ------------ --------------------------------------------
+    | field                  | on_h_field | Previous name continues. Reallocate name   |
+    |                        |            | buffer and append callback data to it      |
+     ------------------------ ------------ --------------------------------------------
+    | field                  | on_h_value | Value for current header started. Allocate |
+    |                        |            | new buffer and copy callback data to it    |
+     ------------------------ ------------ --------------------------------------------
+    | value                  | on_h_value | Value continues. Reallocate value buffer   |
+    |                        |            | and append callback data to it             |
+     ------------------------ ------------ --------------------------------------------

data/ext/ruby_http_parser/vendor/http-parser-java/TODO CHANGED

@@ -1,4 +1,10 @@
+decide how to handle errs per default:
+  - ry: "set state to dead", return `read`
+  - current: call on_error w/ details, if no on_error handler set,
+    throw Exception, else call on_error and behave like orig...
 some tests from test.c left to port
+  (scan ...)
 documentation
 hi level callback interface

data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.c CHANGED

@@ -1,4 +1,7 @@
-/* Copyright Joyent, Inc. and other Node contributors. All rights reserved.
+/* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
+ *
+ * Additional changes are licensed under the same terms as NGINX and
+ * copyright Joyent, Inc. and other Node contributors. All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to
@@ -18,48 +21,94 @@
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  */
-#include <http_parser.h>
+#include "http_parser.h"
 #include <assert.h>
 #include <stddef.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#ifndef ULLONG_MAX
+# define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
+#endif
 #ifndef MIN
 # define MIN(a,b) ((a) < (b) ? (a) : (b))
 #endif
-#define CALLBACK2(FOR)                                               \
+#if HTTP_PARSER_DEBUG
+#define SET_ERRNO(e)                                                 \
 do {                                                                 \
-  if (settings->on_##FOR) {                                          \
-    if (0 != settings->on_##FOR(parser)) return (p - data);          \
-  }                                                                  \
+  parser->http_errno = (e);                                          \
+  parser->error_lineno = __LINE__;                                   \
 } while (0)
+#else
+#define SET_ERRNO(e)                                                 \
+do {                                                                 \
+  parser->http_errno = (e);                                          \
+} while(0)
+#endif
-#define MARK(FOR)                                                    \
+/* Run the notify callback FOR, returning ER if it fails */
+#define CALLBACK_NOTIFY_(FOR, ER)                                    \
 do {                                                                 \
-  FOR##_mark = p;                                                    \
+  assert(HTTP_PARSER_ERRNO(parser) == HPE_OK);                       \
+                                                                     \
+  if (settings->on_##FOR) {                                          \
+    if (0 != settings->on_##FOR(parser)) {                           \
+      SET_ERRNO(HPE_CB_##FOR);                                       \
+    }                                                                \
+                                                                     \
+    /* We either errored above or got paused; get out */             \
+    if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {                       \
+      return (ER);                                                   \
+    }                                                                \
+  }                                                                  \
 } while (0)
-#define CALLBACK_NOCLEAR(FOR)                                        \
+/* Run the notify callback FOR and consume the current byte */
+#define CALLBACK_NOTIFY(FOR)            CALLBACK_NOTIFY_(FOR, p - data + 1)
+/* Run the notify callback FOR and don't consume the current byte */
+#define CALLBACK_NOTIFY_NOADVANCE(FOR)  CALLBACK_NOTIFY_(FOR, p - data)
+/* Run data callback FOR with LEN bytes, returning ER if it fails */
+#define CALLBACK_DATA_(FOR, LEN, ER)                                 \
 do {                                                                 \
+  assert(HTTP_PARSER_ERRNO(parser) == HPE_OK);                       \
+                                                                     \
   if (FOR##_mark) {                                                  \
     if (settings->on_##FOR) {                                        \
-      if (0 != settings->on_##FOR(parser,                            \
-                                 FOR##_mark,                         \
-                                 p - FOR##_mark))                    \
-      {                                                              \
-        return (p - data);                                           \
+      if (0 != settings->on_##FOR(parser, FOR##_mark, (LEN))) {      \
+        SET_ERRNO(HPE_CB_##FOR);                                     \
+      }                                                              \
+                                                                     \
+      /* We either errored above or got paused; get out */           \
+      if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {                     \
+        return (ER);                                                 \
       }                                                              \
     }                                                                \
+    FOR##_mark = NULL;                                               \
   }                                                                  \
 } while (0)
+/* Run the data callback FOR and consume the current byte */
+#define CALLBACK_DATA(FOR)                                           \
+    CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
+/* Run the data callback FOR and don't consume the current byte */
+#define CALLBACK_DATA_NOADVANCE(FOR)                                 \
+    CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
-#define CALLBACK(FOR)                                                \
+/* Set the mark FOR; non-destructive if mark is already set */
+#define MARK(FOR)                                                    \
 do {                                                                 \
-  CALLBACK_NOCLEAR(FOR);                                             \
-  FOR##_mark = NULL;                                                 \
+  if (!FOR##_mark) {                                                 \
+    FOR##_mark = p;                                                  \
+  }                                                                  \
 } while (0)
@@ -97,6 +146,8 @@ static const char *method_strings[] =
   , "NOTIFY"
   , "SUBSCRIBE"
   , "UNSUBSCRIBE"
+  , "PATCH"
+  , "PURGE"
   };
@@ -117,9 +168,9 @@ static const char tokens[256] = {
 /*  24 can   25 em    26 sub   27 esc   28 fs    29 gs    30 rs    31 us  */
         0,       0,       0,       0,       0,       0,       0,       0,
 /*  32 sp    33  !    34  "    35  #    36  $    37  %    38  &    39  '  */
-       ' ',      '!',     '"',     '#',     '$',     '%',     '&',    '\'',
+        0,      '!',      0,      '#',     '$',     '%',     '&',    '\'',
 /*  40  (    41  )    42  *    43  +    44  ,    45  -    46  .    47  /  */
-        0,       0,      '*',     '+',      0,      '-',     '.',     '/',
+        0,       0,      '*',     '+',      0,      '-',     '.',      0,
 /*  48  0    49  1    50  2    51  3    52  4    53  5    54  6    55  7  */
        '0',     '1',     '2',     '3',     '4',     '5',     '6',     '7',
 /*  56  8    57  9    58  :    59  ;    60  <    61  =    62  >    63  ?  */
@@ -139,7 +190,7 @@ static const char tokens[256] = {
 /* 112  p   113  q   114  r   115  s   116  t   117  u   118  v   119  w  */
        'p',     'q',     'r',     's',     't',     'u',     'v',     'w',
 /* 120  x   121  y   122  z   123  {   124  |   125  }   126  ~   127 del */
-       'x',     'y',     'z',      0,      '|',     '}',     '~',       0 };
+       'x',     'y',     'z',      0,      '|',      0,      '~',       0 };
 static const int8_t unhex[256] =
@@ -186,28 +237,7 @@ static const uint8_t normal_url_char[256] = {
 /* 112  p   113  q   114  r   115  s   116  t   117  u   118  v   119  w  */
         1,       1,       1,       1,       1,       1,       1,       1,
 /* 120  x   121  y   122  z   123  {   124  |   125  }   126  ~   127 del */
-        1,       1,       1,       1,       1,       1,       1,       0,
-/* Remainder of non-ASCII range are accepted as-is to support implicitly UTF-8
-   encoded paths. This is out of spec, but clients generate this and most other
-   HTTP servers support it. We should, too. */
-        1,       1,       1,       1,       1,       1,       1,       1,
-        1,       1,       1,       1,       1,       1,       1,       1,
-        1,       1,       1,       1,       1,       1,       1,       1,
-        1,       1,       1,       1,       1,       1,       1,       1,
-        1,       1,       1,       1,       1,       1,       1,       1,
-        1,       1,       1,       1,       1,       1,       1,       1,
-        1,       1,       1,       1,       1,       1,       1,       1,
-        1,       1,       1,       1,       1,       1,       1,       1,
-        1,       1,       1,       1,       1,       1,       1,       1,
-        1,       1,       1,       1,       1,       1,       1,       1,
-        1,       1,       1,       1,       1,       1,       1,       1,
-        1,       1,       1,       1,       1,       1,       1,       1,
-        1,       1,       1,       1,       1,       1,       1,       1,
-        1,       1,       1,       1,       1,       1,       1,       1,
-        1,       1,       1,       1,       1,       1,       1,       1,
-        1,       1,       1,       1,       1,       1,       1,       1 };
+        1,       1,       1,       1,       1,       1,       1,       0, };
 enum state
@@ -236,7 +266,12 @@ enum state
   , s_req_schema
   , s_req_schema_slash
   , s_req_schema_slash_slash
+  , s_req_host_start
+  , s_req_host_v6_start
+  , s_req_host_v6
+  , s_req_host_v6_end
   , s_req_host
+  , s_req_port_start
   , s_req_port
   , s_req_path
   , s_req_query_string_start
@@ -258,6 +293,7 @@ enum state
   , s_header_field
   , s_header_value_start
   , s_header_value
+  , s_header_value_lws
   , s_header_almost_done
@@ -265,9 +301,11 @@ enum state
   , s_chunk_size
   , s_chunk_parameters
   , s_chunk_size_almost_done
   , s_headers_almost_done
-  /* Important: 's_headers_almost_done' must be the last 'header' state. All
+  , s_headers_done
+  /* Important: 's_headers_done' must be the last 'header' state. All
    * states beyond this must be 'body' states. It is used for overflow
    * checking. See the PARSING_HEADER() macro.
    */
@@ -278,10 +316,12 @@ enum state
   , s_body_identity
   , s_body_identity_eof
+  , s_message_done
   };
-#define PARSING_HEADER(state) (state <= s_headers_almost_done)
+#define PARSING_HEADER(state) (state <= s_headers_done)
 enum header_states
@@ -311,27 +351,39 @@ enum header_states
   };
-enum flags
-  { F_CHUNKED               = 1 << 0
-  , F_CONNECTION_KEEP_ALIVE = 1 << 1
-  , F_CONNECTION_CLOSE      = 1 << 2
-  , F_TRAILING              = 1 << 3
-  , F_UPGRADE               = 1 << 4
-  , F_SKIPBODY              = 1 << 5
-  };
+/* Macros for character classes; depends on strict-mode  */
+#define CR                  '\r'
+#define LF                  '\n'
+#define LOWER(c)            (unsigned char)(c | 0x20)
+#define IS_ALPHA(c)         (LOWER(c) >= 'a' && LOWER(c) <= 'z')
+#define IS_NUM(c)           ((c) >= '0' && (c) <= '9')
+#define IS_ALPHANUM(c)      (IS_ALPHA(c) || IS_NUM(c))
+#define IS_HEX(c)           (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
-#define CR '\r'
-#define LF '\n'
-#define LOWER(c) (unsigned char)(c | 0x20)
-#define TOKEN(c) tokens[(unsigned char)c]
+#if HTTP_PARSER_STRICT
+#define TOKEN(c)            (tokens[(unsigned char)c])
+#define IS_URL_CHAR(c)      (normal_url_char[(unsigned char) (c)])
+#define IS_HOST_CHAR(c)     (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
+#else
+#define TOKEN(c)            ((c == ' ') ? ' ' : tokens[(unsigned char)c])
+#define IS_URL_CHAR(c)                                                         \
+  (normal_url_char[(unsigned char) (c)] || ((c) & 0x80))
+#define IS_HOST_CHAR(c)                                                        \
+  (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
+#endif
 #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
 #if HTTP_PARSER_STRICT
-# define STRICT_CHECK(cond) if (cond) goto error
+# define STRICT_CHECK(cond)                                          \
+do {                                                                 \
+  if (cond) {                                                        \
+    SET_ERRNO(HPE_STRICT);                                           \
+    goto error;                                                      \
+  }                                                                  \
+} while (0)
 # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
 #else
 # define STRICT_CHECK(cond)
@@ -339,24 +391,228 @@ enum flags
 #endif
+/* Map errno values to strings for human-readable output */
+#define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
+static struct {
+  const char *name;
+  const char *description;
+} http_strerror_tab[] = {
+  HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
+};
+#undef HTTP_STRERROR_GEN
+int http_message_needs_eof(http_parser *parser);
+/* Our URL parser.
+ *
+ * This is designed to be shared by http_parser_execute() for URL validation,
+ * hence it has a state transition + byte-for-byte interface. In addition, it
+ * is meant to be embedded in http_parser_parse_url(), which does the dirty
+ * work of turning state transitions URL components for its API.
+ *
+ * This function should only be invoked with non-space characters. It is
+ * assumed that the caller cares about (and can detect) the transition between
+ * URL and non-URL states by looking for these.
+ */
+static enum state
+parse_url_char(enum state s, const char ch)
+{
+  assert(!isspace(ch));
+  switch (s) {
+    case s_req_spaces_before_url:
+      /* Proxied requests are followed by scheme of an absolute URI (alpha).
+       * All methods except CONNECT are followed by '/' or '*'.
+       */
+      if (ch == '/' || ch == '*') {
+        return s_req_path;
+      }
+      if (IS_ALPHA(ch)) {
+        return s_req_schema;
+      }
+      break;
+    case s_req_schema:
+      if (IS_ALPHA(ch)) {
+        return s;
+      }
+      if (ch == ':') {
+        return s_req_schema_slash;
+      }
+      break;
+    case s_req_schema_slash:
+      if (ch == '/') {
+        return s_req_schema_slash_slash;
+      }
+      break;
+    case s_req_schema_slash_slash:
+      if (ch == '/') {
+        return s_req_host_start;
+      }
+      break;
+    case s_req_host_start:
+      if (ch == '[') {
+        return s_req_host_v6_start;
+      }
+      if (IS_HOST_CHAR(ch)) {
+        return s_req_host;
+      }
+      break;
+    case s_req_host:
+      if (IS_HOST_CHAR(ch)) {
+        return s_req_host;
+      }
+      /* FALLTHROUGH */
+    case s_req_host_v6_end:
+      switch (ch) {
+        case ':':
+          return s_req_port_start;
+        case '/':
+          return s_req_path;
+        case '?':
+          return s_req_query_string_start;
+      }
+      break;
+    case s_req_host_v6:
+      if (ch == ']') {
+        return s_req_host_v6_end;
+      }
+      /* FALLTHROUGH */
+    case s_req_host_v6_start:
+      if (IS_HEX(ch) || ch == ':') {
+        return s_req_host_v6;
+      }
+      break;
+    case s_req_port:
+      switch (ch) {
+        case '/':
+          return s_req_path;
+        case '?':
+          return s_req_query_string_start;
+      }
+      /* FALLTHROUGH */
+    case s_req_port_start:
+      if (IS_NUM(ch)) {
+        return s_req_port;
+      }
+      break;
+    case s_req_path:
+      if (IS_URL_CHAR(ch)) {
+        return s;
+      }
+      switch (ch) {
+        case '?':
+          return s_req_query_string_start;
+        case '#':
+          return s_req_fragment_start;
+      }
+      break;
+    case s_req_query_string_start:
+    case s_req_query_string:
+      if (IS_URL_CHAR(ch)) {
+        return s_req_query_string;
+      }
+      switch (ch) {
+        case '?':
+          /* allow extra '?' in query string */
+          return s_req_query_string;
+        case '#':
+          return s_req_fragment_start;
+      }
+      break;
+    case s_req_fragment_start:
+      if (IS_URL_CHAR(ch)) {
+        return s_req_fragment;
+      }
+      switch (ch) {
+        case '?':
+          return s_req_fragment;
+        case '#':
+          return s;
+      }
+      break;
+    case s_req_fragment:
+      if (IS_URL_CHAR(ch)) {
+        return s;
+      }
+      switch (ch) {
+        case '?':
+        case '#':
+          return s;
+      }
+      break;
+    default:
+      break;
+  }
+  /* We should never fall out of the switch above unless there's an error */
+  return s_dead;
+}
 size_t http_parser_execute (http_parser *parser,
                             const http_parser_settings *settings,
                             const char *data,
                             size_t len)
 {
   char c, ch;
-  const char *p = data, *pe;
-  int64_t to_read;
+  int8_t unhex_val;
+  const char *p = data;
+  const char *header_field_mark = 0;
+  const char *header_value_mark = 0;
+  const char *url_mark = 0;
+  const char *body_mark = 0;
-  enum state state = (enum state) parser->state;
-  enum header_states header_state = (enum header_states) parser->header_state;
-  uint64_t index = parser->index;
-  uint64_t nread = parser->nread;
+  /* We're in an error state. Don't bother doing anything. */
+  if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
+    return 0;
+  }
   if (len == 0) {
-    switch (state) {
+    switch (parser->state) {
       case s_body_identity_eof:
-        CALLBACK2(message_complete);
+        /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
+         * we got paused.
+         */
+        CALLBACK_NOTIFY_NOADVANCE(message_complete);
         return 0;
       case s_dead:
@@ -366,52 +622,59 @@ size_t http_parser_execute (http_parser *parser,
         return 0;
       default:
-        return 1; // error
+        SET_ERRNO(HPE_INVALID_EOF_STATE);
+        return 1;
     }
   }
-  /* technically we could combine all of these (except for url_mark) into one
-     variable, saving stack space, but it seems more clear to have them
-     separated. */
-  const char *header_field_mark = 0;
-  const char *header_value_mark = 0;
-  const char *fragment_mark = 0;
-  const char *query_string_mark = 0;
-  const char *path_mark = 0;
-  const char *url_mark = 0;
-  if (state == s_header_field)
+  if (parser->state == s_header_field)
     header_field_mark = data;
-  if (state == s_header_value)
+  if (parser->state == s_header_value)
     header_value_mark = data;
-  if (state == s_req_fragment)
-    fragment_mark = data;
-  if (state == s_req_query_string)
-    query_string_mark = data;
-  if (state == s_req_path)
-    path_mark = data;
-  if (state == s_req_path || state == s_req_schema || state == s_req_schema_slash
-      || state == s_req_schema_slash_slash || state == s_req_port
-      || state == s_req_query_string_start || state == s_req_query_string
-      || state == s_req_host
-      || state == s_req_fragment_start || state == s_req_fragment)
+  switch (parser->state) {
+  case s_req_path:
+  case s_req_schema:
+  case s_req_schema_slash:
+  case s_req_schema_slash_slash:
+  case s_req_host_start:
+  case s_req_host_v6_start:
+  case s_req_host_v6:
+  case s_req_host_v6_end:
+  case s_req_host:
+  case s_req_port_start:
+  case s_req_port:
+  case s_req_query_string_start:
+  case s_req_query_string:
+  case s_req_fragment_start:
+  case s_req_fragment:
     url_mark = data;
+    break;
+  }
-  for (p=data, pe=data+len; p != pe; p++) {
+  for (p=data; p != data + len; p++) {
     ch = *p;
-    if (PARSING_HEADER(state)) {
-      ++nread;
+    if (PARSING_HEADER(parser->state)) {
+      ++parser->nread;
       /* Buffer overflow attack */
-      if (nread > HTTP_MAX_HEADER_SIZE) goto error;
+      if (parser->nread > HTTP_MAX_HEADER_SIZE) {
+        SET_ERRNO(HPE_HEADER_OVERFLOW);
+        goto error;
+      }
     }
-    switch (state) {
+    reexecute_byte:
+    switch (parser->state) {
       case s_dead:
         /* this state is used after a 'Connection: close' message
          * the parser will error out if it reads another message
          */
+        if (ch == CR || ch == LF)
+          break;
+        SET_ERRNO(HPE_CLOSED_CONNECTION);
         goto error;
       case s_start_req_or_res:
@@ -419,42 +682,46 @@ size_t http_parser_execute (http_parser *parser,
         if (ch == CR || ch == LF)
           break;
         parser->flags = 0;
-        parser->content_length = -1;
+        parser->content_length = ULLONG_MAX;
-        CALLBACK2(message_begin);
+        if (ch == 'H') {
+          parser->state = s_res_or_resp_H;
-        if (ch == 'H')
-          state = s_res_or_resp_H;
-        else {
+          CALLBACK_NOTIFY(message_begin);
+        } else {
           parser->type = HTTP_REQUEST;
-          goto start_req_method_assign;
+          parser->state = s_start_req;
+          goto reexecute_byte;
         }
         break;
       }
       case s_res_or_resp_H:
         if (ch == 'T') {
           parser->type = HTTP_RESPONSE;
-          state = s_res_HT;
+          parser->state = s_res_HT;
         } else {
-          if (ch != 'E') goto error;
+          if (ch != 'E') {
+            SET_ERRNO(HPE_INVALID_CONSTANT);
+            goto error;
+          }
           parser->type = HTTP_REQUEST;
           parser->method = HTTP_HEAD;
-          index = 2;
-          state = s_req_method;
+          parser->index = 2;
+          parser->state = s_req_method;
         }
         break;
       case s_start_res:
       {
         parser->flags = 0;
-        parser->content_length = -1;
-        CALLBACK2(message_begin);
+        parser->content_length = ULLONG_MAX;
         switch (ch) {
           case 'H':
-            state = s_res_H;
+            parser->state = s_res_H;
             break;
           case CR:
@@ -462,105 +729,133 @@ size_t http_parser_execute (http_parser *parser,
             break;
           default:
+            SET_ERRNO(HPE_INVALID_CONSTANT);
             goto error;
         }
+        CALLBACK_NOTIFY(message_begin);
         break;
       }
       case s_res_H:
         STRICT_CHECK(ch != 'T');
-        state = s_res_HT;
+        parser->state = s_res_HT;
         break;
       case s_res_HT:
         STRICT_CHECK(ch != 'T');
-        state = s_res_HTT;
+        parser->state = s_res_HTT;
         break;
       case s_res_HTT:
         STRICT_CHECK(ch != 'P');
-        state = s_res_HTTP;
+        parser->state = s_res_HTTP;
         break;
       case s_res_HTTP:
         STRICT_CHECK(ch != '/');
-        state = s_res_first_http_major;
+        parser->state = s_res_first_http_major;
         break;
       case s_res_first_http_major:
-        if (ch < '1' || ch > '9') goto error;
+        if (ch < '0' || ch > '9') {
+          SET_ERRNO(HPE_INVALID_VERSION);
+          goto error;
+        }
         parser->http_major = ch - '0';
-        state = s_res_http_major;
+        parser->state = s_res_http_major;
         break;
       /* major HTTP version or dot */
       case s_res_http_major:
       {
         if (ch == '.') {
-          state = s_res_first_http_minor;
+          parser->state = s_res_first_http_minor;
           break;
         }
-        if (ch < '0' || ch > '9') goto error;
+        if (!IS_NUM(ch)) {
+          SET_ERRNO(HPE_INVALID_VERSION);
+          goto error;
+        }
         parser->http_major *= 10;
         parser->http_major += ch - '0';
-        if (parser->http_major > 999) goto error;
+        if (parser->http_major > 999) {
+          SET_ERRNO(HPE_INVALID_VERSION);
+          goto error;
+        }
         break;
       }
       /* first digit of minor HTTP version */
       case s_res_first_http_minor:
-        if (ch < '0' || ch > '9') goto error;
+        if (!IS_NUM(ch)) {
+          SET_ERRNO(HPE_INVALID_VERSION);
+          goto error;
+        }
         parser->http_minor = ch - '0';
-        state = s_res_http_minor;
+        parser->state = s_res_http_minor;
         break;
       /* minor HTTP version or end of request line */
       case s_res_http_minor:
       {
         if (ch == ' ') {
-          state = s_res_first_status_code;
+          parser->state = s_res_first_status_code;
           break;
         }
-        if (ch < '0' || ch > '9') goto error;
+        if (!IS_NUM(ch)) {
+          SET_ERRNO(HPE_INVALID_VERSION);
+          goto error;
+        }
         parser->http_minor *= 10;
         parser->http_minor += ch - '0';
-        if (parser->http_minor > 999) goto error;
+        if (parser->http_minor > 999) {
+          SET_ERRNO(HPE_INVALID_VERSION);
+          goto error;
+        }
         break;
       }
       case s_res_first_status_code:
       {
-        if (ch < '0' || ch > '9') {
+        if (!IS_NUM(ch)) {
           if (ch == ' ') {
             break;
           }
+          SET_ERRNO(HPE_INVALID_STATUS);
           goto error;
         }
         parser->status_code = ch - '0';
-        state = s_res_status_code;
+        parser->state = s_res_status_code;
         break;
       }
       case s_res_status_code:
       {
-        if (ch < '0' || ch > '9') {
+        if (!IS_NUM(ch)) {
           switch (ch) {
             case ' ':
-              state = s_res_status;
+              parser->state = s_res_status;
               break;
             case CR:
-              state = s_res_line_almost_done;
+              parser->state = s_res_line_almost_done;
               break;
             case LF:
-              state = s_header_field_start;
+              parser->state = s_header_field_start;
               break;
             default:
+              SET_ERRNO(HPE_INVALID_STATUS);
               goto error;
           }
           break;
@@ -569,7 +864,11 @@ size_t http_parser_execute (http_parser *parser,
         parser->status_code *= 10;
         parser->status_code += ch - '0';
-        if (parser->status_code > 999) goto error;
+        if (parser->status_code > 999) {
+          SET_ERRNO(HPE_INVALID_STATUS);
+          goto error;
+        }
         break;
       }
@@ -577,19 +876,19 @@ size_t http_parser_execute (http_parser *parser,
         /* the human readable status. e.g. "NOT FOUND"
          * we are not humans so just ignore this */
         if (ch == CR) {
-          state = s_res_line_almost_done;
+          parser->state = s_res_line_almost_done;
           break;
         }
         if (ch == LF) {
-          state = s_header_field_start;
+          parser->state = s_header_field_start;
           break;
         }
         break;
       case s_res_line_almost_done:
         STRICT_CHECK(ch != LF);
-        state = s_header_field_start;
+        parser->state = s_header_field_start;
         break;
       case s_start_req:
@@ -597,15 +896,15 @@ size_t http_parser_execute (http_parser *parser,
         if (ch == CR || ch == LF)
           break;
         parser->flags = 0;
-        parser->content_length = -1;
+        parser->content_length = ULLONG_MAX;
-        CALLBACK2(message_begin);
-        if (ch < 'A' || 'Z' < ch) goto error;
+        if (!IS_ALPHA(ch)) {
+          SET_ERRNO(HPE_INVALID_METHOD);
+          goto error;
+        }
-      start_req_method_assign:
         parser->method = (enum http_method) 0;
-        index = 1;
+        parser->index = 1;
         switch (ch) {
           case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
           case 'D': parser->method = HTTP_DELETE; break;
@@ -615,342 +914,157 @@ size_t http_parser_execute (http_parser *parser,
           case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
           case 'N': parser->method = HTTP_NOTIFY; break;
           case 'O': parser->method = HTTP_OPTIONS; break;
-          case 'P': parser->method = HTTP_POST; /* or PROPFIND or PROPPATCH or PUT */ break;
+          case 'P': parser->method = HTTP_POST;
+            /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
+            break;
           case 'R': parser->method = HTTP_REPORT; break;
           case 'S': parser->method = HTTP_SUBSCRIBE; break;
           case 'T': parser->method = HTTP_TRACE; break;
           case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
-          default: goto error;
+          default:
+            SET_ERRNO(HPE_INVALID_METHOD);
+            goto error;
         }
-        state = s_req_method;
+        parser->state = s_req_method;
+        CALLBACK_NOTIFY(message_begin);
         break;
       }
       case s_req_method:
       {
-        if (ch == '\0')
+        const char *matcher;
+        if (ch == '\0') {
+          SET_ERRNO(HPE_INVALID_METHOD);
           goto error;
+        }
-        const char *matcher = method_strings[parser->method];
-        if (ch == ' ' && matcher[index] == '\0') {
-          state = s_req_spaces_before_url;
-        } else if (ch == matcher[index]) {
+        matcher = method_strings[parser->method];
+        if (ch == ' ' && matcher[parser->index] == '\0') {
+          parser->state = s_req_spaces_before_url;
+        } else if (ch == matcher[parser->index]) {
           ; /* nada */
         } else if (parser->method == HTTP_CONNECT) {
-          if (index == 1 && ch == 'H') {
+          if (parser->index == 1 && ch == 'H') {
             parser->method = HTTP_CHECKOUT;
-          } else if (index == 2  && ch == 'P') {
+          } else if (parser->index == 2  && ch == 'P') {
             parser->method = HTTP_COPY;
+          } else {
+            goto error;
           }
         } else if (parser->method == HTTP_MKCOL) {
-          if (index == 1 && ch == 'O') {
+          if (parser->index == 1 && ch == 'O') {
             parser->method = HTTP_MOVE;
-          } else if (index == 1 && ch == 'E') {
+          } else if (parser->index == 1 && ch == 'E') {
             parser->method = HTTP_MERGE;
-          } else if (index == 1 && ch == '-') {
+          } else if (parser->index == 1 && ch == '-') {
             parser->method = HTTP_MSEARCH;
-          } else if (index == 2 && ch == 'A') {
+          } else if (parser->index == 2 && ch == 'A') {
             parser->method = HTTP_MKACTIVITY;
+          } else {
+            goto error;
           }
-        } else if (index == 1 && parser->method == HTTP_POST && ch == 'R') {
-          parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
-        } else if (index == 1 && parser->method == HTTP_POST && ch == 'U') {
-          parser->method = HTTP_PUT;
-        } else if (index == 2 && parser->method == HTTP_UNLOCK && ch == 'S') {
-          parser->method = HTTP_UNSUBSCRIBE;
-        } else if (index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
+        } else if (parser->index == 1 && parser->method == HTTP_POST) {
+          if (ch == 'R') {
+            parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
+          } else if (ch == 'U') {
+            parser->method = HTTP_PUT; /* or HTTP_PURGE */
+          } else if (ch == 'A') {
+            parser->method = HTTP_PATCH;
+          } else {
+            goto error;
+          }
+        } else if (parser->index == 2) {
+          if (parser->method == HTTP_PUT) {
+            if (ch == 'R') parser->method = HTTP_PURGE;
+          } else if (parser->method == HTTP_UNLOCK) {
+            if (ch == 'S') parser->method = HTTP_UNSUBSCRIBE;
+          }
+        } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
           parser->method = HTTP_PROPPATCH;
         } else {
+          SET_ERRNO(HPE_INVALID_METHOD);
           goto error;
         }
-        ++index;
+        ++parser->index;
         break;
       }
       case s_req_spaces_before_url:
       {
         if (ch == ' ') break;
-        if (ch == '/' || ch == '*') {
-          MARK(url);
-          MARK(path);
-          state = s_req_path;
-          break;
+        MARK(url);
+        if (parser->method == HTTP_CONNECT) {
+          parser->state = s_req_host_start;
         }
-        c = LOWER(ch);
-        if (c >= 'a' && c <= 'z') {
-          MARK(url);
-          state = s_req_schema;
-          break;
+        parser->state = parse_url_char((enum state)parser->state, ch);
+        if (parser->state == s_dead) {
+          SET_ERRNO(HPE_INVALID_URL);
+          goto error;
         }
-        goto error;
+        break;
       }
       case s_req_schema:
-      {
-        c = LOWER(ch);
-        if (c >= 'a' && c <= 'z') break;
-        if (ch == ':') {
-          state = s_req_schema_slash;
-          break;
-        } else if (ch == '.') {
-          state = s_req_host;
-          break;
-        } else if ('0' <= ch && ch <= '9') {
-          state = s_req_host;
-          break;
-        }
-        goto error;
-      }
       case s_req_schema_slash:
-        STRICT_CHECK(ch != '/');
-        state = s_req_schema_slash_slash;
-        break;
       case s_req_schema_slash_slash:
-        STRICT_CHECK(ch != '/');
-        state = s_req_host;
-        break;
-      case s_req_host:
-      {
-        c = LOWER(ch);
-        if (c >= 'a' && c <= 'z') break;
-        if ((ch >= '0' && ch <= '9') || ch == '.' || ch == '-') break;
-        switch (ch) {
-          case ':':
-            state = s_req_port;
-            break;
-          case '/':
-            MARK(path);
-            state = s_req_path;
-            break;
-          case ' ':
-            /* The request line looks like:
-             *   "GET http://foo.bar.com HTTP/1.1"
-             * That is, there is no path.
-             */
-            CALLBACK(url);
-            state = s_req_http_start;
-            break;
-          case '?':
-            state = s_req_query_string_start;
-            break;
-          default:
-            goto error;
-        }
-        break;
-      }
-      case s_req_port:
+      case s_req_host_start:
+      case s_req_host_v6_start:
+      case s_req_host_v6:
+      case s_req_port_start:
       {
-        if (ch >= '0' && ch <= '9') break;
         switch (ch) {
-          case '/':
-            MARK(path);
-            state = s_req_path;
-            break;
+          /* No whitespace allowed here */
           case ' ':
-            /* The request line looks like:
-             *   "GET http://foo.bar.com:1234 HTTP/1.1"
-             * That is, there is no path.
-             */
-            CALLBACK(url);
-            state = s_req_http_start;
-            break;
-          case '?':
-            state = s_req_query_string_start;
-            break;
-          default:
-            goto error;
-        }
-        break;
-      }
-      case s_req_path:
-      {
-        if (normal_url_char[(unsigned char)ch]) break;
-        switch (ch) {
-          case ' ':
-            CALLBACK(url);
-            CALLBACK(path);
-            state = s_req_http_start;
-            break;
           case CR:
-            CALLBACK(url);
-            CALLBACK(path);
-            parser->http_major = 0;
-            parser->http_minor = 9;
-            state = s_req_line_almost_done;
-            break;
           case LF:
-            CALLBACK(url);
-            CALLBACK(path);
-            parser->http_major = 0;
-            parser->http_minor = 9;
-            state = s_header_field_start;
-            break;
-          case '?':
-            CALLBACK(path);
-            state = s_req_query_string_start;
-            break;
-          case '#':
-            CALLBACK(path);
-            state = s_req_fragment_start;
-            break;
-          default:
+            SET_ERRNO(HPE_INVALID_URL);
             goto error;
-        }
-        break;
-      }
-      case s_req_query_string_start:
-      {
-        if (normal_url_char[(unsigned char)ch]) {
-          MARK(query_string);
-          state = s_req_query_string;
-          break;
-        }
-        switch (ch) {
-          case '?':
-            break; /* XXX ignore extra '?' ... is this right? */
-          case ' ':
-            CALLBACK(url);
-            state = s_req_http_start;
-            break;
-          case CR:
-            CALLBACK(url);
-            parser->http_major = 0;
-            parser->http_minor = 9;
-            state = s_req_line_almost_done;
-            break;
-          case LF:
-            CALLBACK(url);
-            parser->http_major = 0;
-            parser->http_minor = 9;
-            state = s_header_field_start;
-            break;
-          case '#':
-            state = s_req_fragment_start;
-            break;
           default:
-            goto error;
+            parser->state = parse_url_char((enum state)parser->state, ch);
+            if (parser->state == s_dead) {
+              SET_ERRNO(HPE_INVALID_URL);
+              goto error;
+            }
         }
-        break;
-      }
-      case s_req_query_string:
-      {
-        if (normal_url_char[(unsigned char)ch]) break;
-        switch (ch) {
-          case '?':
-            /* allow extra '?' in query string */
-            break;
-          case ' ':
-            CALLBACK(url);
-            CALLBACK(query_string);
-            state = s_req_http_start;
-            break;
-          case CR:
-            CALLBACK(url);
-            CALLBACK(query_string);
-            parser->http_major = 0;
-            parser->http_minor = 9;
-            state = s_req_line_almost_done;
-            break;
-          case LF:
-            CALLBACK(url);
-            CALLBACK(query_string);
-            parser->http_major = 0;
-            parser->http_minor = 9;
-            state = s_header_field_start;
-            break;
-          case '#':
-            CALLBACK(query_string);
-            state = s_req_fragment_start;
-            break;
-          default:
-            goto error;
-        }
         break;
       }
+      case s_req_host:
+      case s_req_host_v6_end:
+      case s_req_port:
+      case s_req_path:
+      case s_req_query_string_start:
+      case s_req_query_string:
       case s_req_fragment_start:
-      {
-        if (normal_url_char[(unsigned char)ch]) {
-          MARK(fragment);
-          state = s_req_fragment;
-          break;
-        }
-        switch (ch) {
-          case ' ':
-            CALLBACK(url);
-            state = s_req_http_start;
-            break;
-          case CR:
-            CALLBACK(url);
-            parser->http_major = 0;
-            parser->http_minor = 9;
-            state = s_req_line_almost_done;
-            break;
-          case LF:
-            CALLBACK(url);
-            parser->http_major = 0;
-            parser->http_minor = 9;
-            state = s_header_field_start;
-            break;
-          case '?':
-            MARK(fragment);
-            state = s_req_fragment;
-            break;
-          case '#':
-            break;
-          default:
-            goto error;
-        }
-        break;
-      }
       case s_req_fragment:
       {
-        if (normal_url_char[(unsigned char)ch]) break;
         switch (ch) {
           case ' ':
-            CALLBACK(url);
-            CALLBACK(fragment);
-            state = s_req_http_start;
+            parser->state = s_req_http_start;
+            CALLBACK_DATA(url);
             break;
           case CR:
-            CALLBACK(url);
-            CALLBACK(fragment);
-            parser->http_major = 0;
-            parser->http_minor = 9;
-            state = s_req_line_almost_done;
-            break;
           case LF:
-            CALLBACK(url);
-            CALLBACK(fragment);
             parser->http_major = 0;
             parser->http_minor = 9;
-            state = s_header_field_start;
-            break;
-          case '?':
-          case '#':
+            parser->state = (ch == CR) ?
+              s_req_line_almost_done :
+              s_header_field_start;
+            CALLBACK_DATA(url);
             break;
           default:
-            goto error;
+            parser->state = parse_url_char((enum state)parser->state, ch);
+            if (parser->state == s_dead) {
+              SET_ERRNO(HPE_INVALID_URL);
+              goto error;
+            }
         }
         break;
       }
@@ -958,140 +1072,170 @@ size_t http_parser_execute (http_parser *parser,
       case s_req_http_start:
         switch (ch) {
           case 'H':
-            state = s_req_http_H;
+            parser->state = s_req_http_H;
             break;
           case ' ':
             break;
           default:
+            SET_ERRNO(HPE_INVALID_CONSTANT);
             goto error;
         }
         break;
       case s_req_http_H:
         STRICT_CHECK(ch != 'T');
-        state = s_req_http_HT;
+        parser->state = s_req_http_HT;
         break;
       case s_req_http_HT:
         STRICT_CHECK(ch != 'T');
-        state = s_req_http_HTT;
+        parser->state = s_req_http_HTT;
         break;
       case s_req_http_HTT:
         STRICT_CHECK(ch != 'P');
-        state = s_req_http_HTTP;
+        parser->state = s_req_http_HTTP;
         break;
       case s_req_http_HTTP:
         STRICT_CHECK(ch != '/');
-        state = s_req_first_http_major;
+        parser->state = s_req_first_http_major;
         break;
       /* first digit of major HTTP version */
       case s_req_first_http_major:
-        if (ch < '1' || ch > '9') goto error;
+        if (ch < '1' || ch > '9') {
+          SET_ERRNO(HPE_INVALID_VERSION);
+          goto error;
+        }
         parser->http_major = ch - '0';
-        state = s_req_http_major;
+        parser->state = s_req_http_major;
         break;
       /* major HTTP version or dot */
       case s_req_http_major:
       {
         if (ch == '.') {
-          state = s_req_first_http_minor;
+          parser->state = s_req_first_http_minor;
           break;
         }
-        if (ch < '0' || ch > '9') goto error;
+        if (!IS_NUM(ch)) {
+          SET_ERRNO(HPE_INVALID_VERSION);
+          goto error;
+        }
         parser->http_major *= 10;
         parser->http_major += ch - '0';
-        if (parser->http_major > 999) goto error;
+        if (parser->http_major > 999) {
+          SET_ERRNO(HPE_INVALID_VERSION);
+          goto error;
+        }
         break;
       }
       /* first digit of minor HTTP version */
       case s_req_first_http_minor:
-        if (ch < '0' || ch > '9') goto error;
+        if (!IS_NUM(ch)) {
+          SET_ERRNO(HPE_INVALID_VERSION);
+          goto error;
+        }
         parser->http_minor = ch - '0';
-        state = s_req_http_minor;
+        parser->state = s_req_http_minor;
         break;
       /* minor HTTP version or end of request line */
       case s_req_http_minor:
       {
         if (ch == CR) {
-          state = s_req_line_almost_done;
+          parser->state = s_req_line_almost_done;
           break;
         }
         if (ch == LF) {
-          state = s_header_field_start;
+          parser->state = s_header_field_start;
           break;
         }
         /* XXX allow spaces after digit? */
-        if (ch < '0' || ch > '9') goto error;
+        if (!IS_NUM(ch)) {
+          SET_ERRNO(HPE_INVALID_VERSION);
+          goto error;
+        }
         parser->http_minor *= 10;
         parser->http_minor += ch - '0';
-        if (parser->http_minor > 999) goto error;
+        if (parser->http_minor > 999) {
+          SET_ERRNO(HPE_INVALID_VERSION);
+          goto error;
+        }
         break;
       }
       /* end of request line */
       case s_req_line_almost_done:
       {
-        if (ch != LF) goto error;
-        state = s_header_field_start;
+        if (ch != LF) {
+          SET_ERRNO(HPE_LF_EXPECTED);
+          goto error;
+        }
+        parser->state = s_header_field_start;
         break;
       }
       case s_header_field_start:
       {
         if (ch == CR) {
-          state = s_headers_almost_done;
+          parser->state = s_headers_almost_done;
           break;
         }
         if (ch == LF) {
           /* they might be just sending \n instead of \r\n so this would be
            * the second \n to denote the end of headers*/
-          state = s_headers_almost_done;
-          goto headers_almost_done;
+          parser->state = s_headers_almost_done;
+          goto reexecute_byte;
         }
         c = TOKEN(ch);
-        if (!c) goto error;
+        if (!c) {
+          SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
+          goto error;
+        }
         MARK(header_field);
-        index = 0;
-        state = s_header_field;
+        parser->index = 0;
+        parser->state = s_header_field;
         switch (c) {
           case 'c':
-            header_state = h_C;
+            parser->header_state = h_C;
             break;
           case 'p':
-            header_state = h_matching_proxy_connection;
+            parser->header_state = h_matching_proxy_connection;
             break;
           case 't':
-            header_state = h_matching_transfer_encoding;
+            parser->header_state = h_matching_transfer_encoding;
             break;
           case 'u':
-            header_state = h_matching_upgrade;
+            parser->header_state = h_matching_upgrade;
             break;
           default:
-            header_state = h_general;
+            parser->header_state = h_general;
             break;
         }
         break;
@@ -1102,31 +1246,31 @@ size_t http_parser_execute (http_parser *parser,
         c = TOKEN(ch);
         if (c) {
-          switch (header_state) {
+          switch (parser->header_state) {
             case h_general:
               break;
             case h_C:
-              index++;
-              header_state = (c == 'o' ? h_CO : h_general);
+              parser->index++;
+              parser->header_state = (c == 'o' ? h_CO : h_general);
               break;
             case h_CO:
-              index++;
-              header_state = (c == 'n' ? h_CON : h_general);
+              parser->index++;
+              parser->header_state = (c == 'n' ? h_CON : h_general);
               break;
             case h_CON:
-              index++;
+              parser->index++;
               switch (c) {
                 case 'n':
-                  header_state = h_matching_connection;
+                  parser->header_state = h_matching_connection;
                   break;
                 case 't':
-                  header_state = h_matching_content_length;
+                  parser->header_state = h_matching_content_length;
                   break;
                 default:
-                  header_state = h_general;
+                  parser->header_state = h_general;
                   break;
               }
               break;
@@ -1134,60 +1278,60 @@ size_t http_parser_execute (http_parser *parser,
             /* connection */
             case h_matching_connection:
-              index++;
-              if (index > sizeof(CONNECTION)-1
-                  || c != CONNECTION[index]) {
-                header_state = h_general;
-              } else if (index == sizeof(CONNECTION)-2) {
-                header_state = h_connection;
+              parser->index++;
+              if (parser->index > sizeof(CONNECTION)-1
+                  || c != CONNECTION[parser->index]) {
+                parser->header_state = h_general;
+              } else if (parser->index == sizeof(CONNECTION)-2) {
+                parser->header_state = h_connection;
               }
               break;
             /* proxy-connection */
             case h_matching_proxy_connection:
-              index++;
-              if (index > sizeof(PROXY_CONNECTION)-1
-                  || c != PROXY_CONNECTION[index]) {
-                header_state = h_general;
-              } else if (index == sizeof(PROXY_CONNECTION)-2) {
-                header_state = h_connection;
+              parser->index++;
+              if (parser->index > sizeof(PROXY_CONNECTION)-1
+                  || c != PROXY_CONNECTION[parser->index]) {
+                parser->header_state = h_general;
+              } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
+                parser->header_state = h_connection;
               }
               break;
             /* content-length */
             case h_matching_content_length:
-              index++;
-              if (index > sizeof(CONTENT_LENGTH)-1
-                  || c != CONTENT_LENGTH[index]) {
-                header_state = h_general;
-              } else if (index == sizeof(CONTENT_LENGTH)-2) {
-                header_state = h_content_length;
+              parser->index++;
+              if (parser->index > sizeof(CONTENT_LENGTH)-1
+                  || c != CONTENT_LENGTH[parser->index]) {
+                parser->header_state = h_general;
+              } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
+                parser->header_state = h_content_length;
               }
               break;
             /* transfer-encoding */
             case h_matching_transfer_encoding:
-              index++;
-              if (index > sizeof(TRANSFER_ENCODING)-1
-                  || c != TRANSFER_ENCODING[index]) {
-                header_state = h_general;
-              } else if (index == sizeof(TRANSFER_ENCODING)-2) {
-                header_state = h_transfer_encoding;
+              parser->index++;
+              if (parser->index > sizeof(TRANSFER_ENCODING)-1
+                  || c != TRANSFER_ENCODING[parser->index]) {
+                parser->header_state = h_general;
+              } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
+                parser->header_state = h_transfer_encoding;
               }
               break;
             /* upgrade */
             case h_matching_upgrade:
-              index++;
-              if (index > sizeof(UPGRADE)-1
-                  || c != UPGRADE[index]) {
-                header_state = h_general;
-              } else if (index == sizeof(UPGRADE)-2) {
-                header_state = h_upgrade;
+              parser->index++;
+              if (parser->index > sizeof(UPGRADE)-1
+                  || c != UPGRADE[parser->index]) {
+                parser->header_state = h_general;
+              } else if (parser->index == sizeof(UPGRADE)-2) {
+                parser->header_state = h_upgrade;
               }
               break;
@@ -1195,7 +1339,7 @@ size_t http_parser_execute (http_parser *parser,
             case h_content_length:
             case h_transfer_encoding:
             case h_upgrade:
-              if (ch != ' ') header_state = h_general;
+              if (ch != ' ') parser->header_state = h_general;
               break;
             default:
@@ -1206,84 +1350,89 @@ size_t http_parser_execute (http_parser *parser,
         }
         if (ch == ':') {
-          CALLBACK(header_field);
-          state = s_header_value_start;
+          parser->state = s_header_value_start;
+          CALLBACK_DATA(header_field);
           break;
         }
         if (ch == CR) {
-          state = s_header_almost_done;
-          CALLBACK(header_field);
+          parser->state = s_header_almost_done;
+          CALLBACK_DATA(header_field);
           break;
         }
         if (ch == LF) {
-          CALLBACK(header_field);
-          state = s_header_field_start;
+          parser->state = s_header_field_start;
+          CALLBACK_DATA(header_field);
           break;
         }
+        SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
         goto error;
       }
       case s_header_value_start:
       {
-        if (ch == ' ') break;
+        if (ch == ' ' || ch == '\t') break;
         MARK(header_value);
-        state = s_header_value;
-        index = 0;
-        c = LOWER(ch);
+        parser->state = s_header_value;
+        parser->index = 0;
         if (ch == CR) {
-          CALLBACK(header_value);
-          header_state = h_general;
-          state = s_header_almost_done;
+          parser->header_state = h_general;
+          parser->state = s_header_almost_done;
+          CALLBACK_DATA(header_value);
           break;
         }
         if (ch == LF) {
-          CALLBACK(header_value);
-          state = s_header_field_start;
+          parser->state = s_header_field_start;
+          CALLBACK_DATA(header_value);
           break;
         }
-        switch (header_state) {
+        c = LOWER(ch);
+        switch (parser->header_state) {
           case h_upgrade:
             parser->flags |= F_UPGRADE;
-            header_state = h_general;
+            parser->header_state = h_general;
             break;
           case h_transfer_encoding:
             /* looking for 'Transfer-Encoding: chunked' */
             if ('c' == c) {
-              header_state = h_matching_transfer_encoding_chunked;
+              parser->header_state = h_matching_transfer_encoding_chunked;
             } else {
-              header_state = h_general;
+              parser->header_state = h_general;
             }
             break;
           case h_content_length:
-            if (ch < '0' || ch > '9') goto error;
+            if (!IS_NUM(ch)) {
+              SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
+              goto error;
+            }
             parser->content_length = ch - '0';
             break;
           case h_connection:
             /* looking for 'Connection: keep-alive' */
             if (c == 'k') {
-              header_state = h_matching_connection_keep_alive;
+              parser->header_state = h_matching_connection_keep_alive;
             /* looking for 'Connection: close' */
             } else if (c == 'c') {
-              header_state = h_matching_connection_close;
+              parser->header_state = h_matching_connection_close;
             } else {
-              header_state = h_general;
+              parser->header_state = h_general;
             }
             break;
           default:
-            header_state = h_general;
+            parser->header_state = h_general;
             break;
         }
         break;
@@ -1291,20 +1440,22 @@ size_t http_parser_execute (http_parser *parser,
       case s_header_value:
       {
-        c = LOWER(ch);
         if (ch == CR) {
-          CALLBACK(header_value);
-          state = s_header_almost_done;
+          parser->state = s_header_almost_done;
+          CALLBACK_DATA(header_value);
           break;
         }
         if (ch == LF) {
-          CALLBACK(header_value);
-          goto header_almost_done;
+          parser->state = s_header_almost_done;
+          CALLBACK_DATA_NOADVANCE(header_value);
+          goto reexecute_byte;
         }
-        switch (header_state) {
+        c = LOWER(ch);
+        switch (parser->header_state) {
           case h_general:
             break;
@@ -1314,66 +1465,83 @@ size_t http_parser_execute (http_parser *parser,
             break;
           case h_content_length:
+          {
+            uint64_t t;
             if (ch == ' ') break;
-            if (ch < '0' || ch > '9') goto error;
-            parser->content_length *= 10;
-            parser->content_length += ch - '0';
+            if (!IS_NUM(ch)) {
+              SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
+              goto error;
+            }
+            t = parser->content_length;
+            t *= 10;
+            t += ch - '0';
+            /* Overflow? */
+            if (t < parser->content_length || t == ULLONG_MAX) {
+              SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
+              goto error;
+            }
+            parser->content_length = t;
             break;
+          }
           /* Transfer-Encoding: chunked */
           case h_matching_transfer_encoding_chunked:
-            index++;
-            if (index > sizeof(CHUNKED)-1
-                || c != CHUNKED[index]) {
-              header_state = h_general;
-            } else if (index == sizeof(CHUNKED)-2) {
-              header_state = h_transfer_encoding_chunked;
+            parser->index++;
+            if (parser->index > sizeof(CHUNKED)-1
+                || c != CHUNKED[parser->index]) {
+              parser->header_state = h_general;
+            } else if (parser->index == sizeof(CHUNKED)-2) {
+              parser->header_state = h_transfer_encoding_chunked;
             }
             break;
           /* looking for 'Connection: keep-alive' */
           case h_matching_connection_keep_alive:
-            index++;
-            if (index > sizeof(KEEP_ALIVE)-1
-                || c != KEEP_ALIVE[index]) {
-              header_state = h_general;
-            } else if (index == sizeof(KEEP_ALIVE)-2) {
-              header_state = h_connection_keep_alive;
+            parser->index++;
+            if (parser->index > sizeof(KEEP_ALIVE)-1
+                || c != KEEP_ALIVE[parser->index]) {
+              parser->header_state = h_general;
+            } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
+              parser->header_state = h_connection_keep_alive;
             }
             break;
           /* looking for 'Connection: close' */
           case h_matching_connection_close:
-            index++;
-            if (index > sizeof(CLOSE)-1 || c != CLOSE[index]) {
-              header_state = h_general;
-            } else if (index == sizeof(CLOSE)-2) {
-              header_state = h_connection_close;
+            parser->index++;
+            if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
+              parser->header_state = h_general;
+            } else if (parser->index == sizeof(CLOSE)-2) {
+              parser->header_state = h_connection_close;
             }
             break;
           case h_transfer_encoding_chunked:
           case h_connection_keep_alive:
           case h_connection_close:
-            if (ch != ' ') header_state = h_general;
+            if (ch != ' ') parser->header_state = h_general;
             break;
           default:
-            state = s_header_value;
-            header_state = h_general;
+            parser->state = s_header_value;
+            parser->header_state = h_general;
             break;
         }
         break;
       }
       case s_header_almost_done:
-      header_almost_done:
       {
         STRICT_CHECK(ch != LF);
-        state = s_header_field_start;
+        parser->state = s_header_value_lws;
-        switch (header_state) {
+        switch (parser->header_state) {
           case h_connection_keep_alive:
             parser->flags |= F_CONNECTION_KEEP_ALIVE;
             break;
@@ -1386,32 +1554,47 @@ size_t http_parser_execute (http_parser *parser,
           default:
             break;
         }
+        break;
+      }
+      case s_header_value_lws:
+      {
+        if (ch == ' ' || ch == '\t')
+          parser->state = s_header_value_start;
+        else
+        {
+          parser->state = s_header_field_start;
+          goto reexecute_byte;
+        }
         break;
       }
       case s_headers_almost_done:
-      headers_almost_done:
       {
         STRICT_CHECK(ch != LF);
         if (parser->flags & F_TRAILING) {
           /* End of a chunked request */
-          CALLBACK2(message_complete);
-          state = NEW_MESSAGE();
+          parser->state = NEW_MESSAGE();
+          CALLBACK_NOTIFY(message_complete);
           break;
         }
-        nread = 0;
+        parser->state = s_headers_done;
-        if (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT) {
-          parser->upgrade = 1;
-        }
+        /* Set this here so that on_headers_complete() callbacks can see it */
+        parser->upgrade =
+          (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT);
         /* Here we call the headers_complete callback. This is somewhat
          * different than other callbacks because if the user returns 1, we
          * will interpret that as saying that this message has no body. This
          * is needed for the annoying case of recieving a response to a HEAD
          * request.
+         *
+         * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
+         * we have to simulate it by handling a change in errno below.
          */
         if (settings->on_headers_complete) {
           switch (settings->on_headers_complete(parser)) {
@@ -1423,39 +1606,54 @@ size_t http_parser_execute (http_parser *parser,
               break;
             default:
-              parser->state = state;
+              SET_ERRNO(HPE_CB_headers_complete);
               return p - data; /* Error */
           }
         }
+        if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
+          return p - data;
+        }
+        goto reexecute_byte;
+      }
+      case s_headers_done:
+      {
+        STRICT_CHECK(ch != LF);
+        parser->nread = 0;
         /* Exit, the rest of the connect is in a different protocol. */
         if (parser->upgrade) {
-          CALLBACK2(message_complete);
-          return (p - data);
+          parser->state = NEW_MESSAGE();
+          CALLBACK_NOTIFY(message_complete);
+          return (p - data) + 1;
         }
         if (parser->flags & F_SKIPBODY) {
-          CALLBACK2(message_complete);
-          state = NEW_MESSAGE();
+          parser->state = NEW_MESSAGE();
+          CALLBACK_NOTIFY(message_complete);
         } else if (parser->flags & F_CHUNKED) {
           /* chunked encoding - ignore Content-Length header */
-          state = s_chunk_size_start;
+          parser->state = s_chunk_size_start;
         } else {
           if (parser->content_length == 0) {
             /* Content-Length header given but zero: Content-Length: 0\r\n */
-            CALLBACK2(message_complete);
-            state = NEW_MESSAGE();
-          } else if (parser->content_length > 0) {
+            parser->state = NEW_MESSAGE();
+            CALLBACK_NOTIFY(message_complete);
+          } else if (parser->content_length != ULLONG_MAX) {
             /* Content-Length header given and non-zero */
-            state = s_body_identity;
+            parser->state = s_body_identity;
           } else {
-            if (parser->type == HTTP_REQUEST || http_should_keep_alive(parser)) {
+            if (parser->type == HTTP_REQUEST ||
+                !http_message_needs_eof(parser)) {
               /* Assume content-length 0 - read the next */
-              CALLBACK2(message_complete);
-              state = NEW_MESSAGE();
+              parser->state = NEW_MESSAGE();
+              CALLBACK_NOTIFY(message_complete);
             } else {
               /* Read body until EOF */
-              state = s_body_identity_eof;
+              parser->state = s_body_identity_eof;
             }
           }
         }
@@ -1464,60 +1662,103 @@ size_t http_parser_execute (http_parser *parser,
       }
       case s_body_identity:
-        to_read = MIN(pe - p, (int64_t)parser->content_length);
-        if (to_read > 0) {
-          if (settings->on_body) settings->on_body(parser, p, to_read);
-          p += to_read - 1;
-          parser->content_length -= to_read;
-          if (parser->content_length == 0) {
-            CALLBACK2(message_complete);
-            state = NEW_MESSAGE();
-          }
+      {
+        uint64_t to_read = MIN(parser->content_length,
+                               (uint64_t) ((data + len) - p));
+        assert(parser->content_length != 0
+            && parser->content_length != ULLONG_MAX);
+        /* The difference between advancing content_length and p is because
+         * the latter will automaticaly advance on the next loop iteration.
+         * Further, if content_length ends up at 0, we want to see the last
+         * byte again for our message complete callback.
+         */
+        MARK(body);
+        parser->content_length -= to_read;
+        p += to_read - 1;
+        if (parser->content_length == 0) {
+          parser->state = s_message_done;
+          /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
+           *
+           * The alternative to doing this is to wait for the next byte to
+           * trigger the data callback, just as in every other case. The
+           * problem with this is that this makes it difficult for the test
+           * harness to distinguish between complete-on-EOF and
+           * complete-on-length. It's not clear that this distinction is
+           * important for applications, but let's keep it for now.
+           */
+          CALLBACK_DATA_(body, p - body_mark + 1, p - data);
+          goto reexecute_byte;
         }
         break;
+      }
       /* read until EOF */
       case s_body_identity_eof:
-        to_read = pe - p;
-        if (to_read > 0) {
-          if (settings->on_body) settings->on_body(parser, p, to_read);
-          p += to_read - 1;
-        }
+        MARK(body);
+        p = data + len - 1;
+        break;
+      case s_message_done:
+        parser->state = NEW_MESSAGE();
+        CALLBACK_NOTIFY(message_complete);
         break;
       case s_chunk_size_start:
       {
-        assert(nread == 1);
+        assert(parser->nread == 1);
         assert(parser->flags & F_CHUNKED);
-        c = unhex[(unsigned char)ch];
-        if (c == -1) goto error;
-        parser->content_length = c;
-        state = s_chunk_size;
+        unhex_val = unhex[(unsigned char)ch];
+        if (unhex_val == -1) {
+          SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
+          goto error;
+        }
+        parser->content_length = unhex_val;
+        parser->state = s_chunk_size;
         break;
       }
       case s_chunk_size:
       {
+        uint64_t t;
         assert(parser->flags & F_CHUNKED);
         if (ch == CR) {
-          state = s_chunk_size_almost_done;
+          parser->state = s_chunk_size_almost_done;
           break;
         }
-        c = unhex[(unsigned char)ch];
+        unhex_val = unhex[(unsigned char)ch];
-        if (c == -1) {
+        if (unhex_val == -1) {
           if (ch == ';' || ch == ' ') {
-            state = s_chunk_parameters;
+            parser->state = s_chunk_parameters;
             break;
           }
+          SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
+          goto error;
+        }
+        t = parser->content_length;
+        t *= 16;
+        t += unhex_val;
+        /* Overflow? */
+        if (t < parser->content_length || t == ULLONG_MAX) {
+          SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
           goto error;
         }
-        parser->content_length *= 16;
-        parser->content_length += c;
+        parser->content_length = t;
         break;
       }
@@ -1526,7 +1767,7 @@ size_t http_parser_execute (http_parser *parser,
         assert(parser->flags & F_CHUNKED);
         /* just ignore this shit. TODO check for overflow */
         if (ch == CR) {
-          state = s_chunk_size_almost_done;
+          parser->state = s_chunk_size_almost_done;
           break;
         }
         break;
@@ -1537,74 +1778,117 @@ size_t http_parser_execute (http_parser *parser,
         assert(parser->flags & F_CHUNKED);
         STRICT_CHECK(ch != LF);
-        nread = 0;
+        parser->nread = 0;
         if (parser->content_length == 0) {
           parser->flags |= F_TRAILING;
-          state = s_header_field_start;
+          parser->state = s_header_field_start;
         } else {
-          state = s_chunk_data;
+          parser->state = s_chunk_data;
         }
         break;
       }
       case s_chunk_data:
       {
-        assert(parser->flags & F_CHUNKED);
+        uint64_t to_read = MIN(parser->content_length,
+                               (uint64_t) ((data + len) - p));
-        to_read = MIN(pe - p, (int64_t)(parser->content_length));
+        assert(parser->flags & F_CHUNKED);
+        assert(parser->content_length != 0
+            && parser->content_length != ULLONG_MAX);
-        if (to_read > 0) {
-          if (settings->on_body) settings->on_body(parser, p, to_read);
-          p += to_read - 1;
-        }
+        /* See the explanation in s_body_identity for why the content
+         * length and data pointers are managed this way.
+         */
+        MARK(body);
+        parser->content_length -= to_read;
+        p += to_read - 1;
-        if (to_read == parser->content_length) {
-          state = s_chunk_data_almost_done;
+        if (parser->content_length == 0) {
+          parser->state = s_chunk_data_almost_done;
         }
-        parser->content_length -= to_read;
         break;
       }
       case s_chunk_data_almost_done:
         assert(parser->flags & F_CHUNKED);
+        assert(parser->content_length == 0);
         STRICT_CHECK(ch != CR);
-        state = s_chunk_data_done;
+        parser->state = s_chunk_data_done;
+        CALLBACK_DATA(body);
         break;
       case s_chunk_data_done:
         assert(parser->flags & F_CHUNKED);
         STRICT_CHECK(ch != LF);
-        state = s_chunk_size_start;
+        parser->nread = 0;
+        parser->state = s_chunk_size_start;
         break;
       default:
         assert(0 && "unhandled state");
+        SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
         goto error;
     }
   }
-  CALLBACK_NOCLEAR(header_field);
-  CALLBACK_NOCLEAR(header_value);
-  CALLBACK_NOCLEAR(fragment);
-  CALLBACK_NOCLEAR(query_string);
-  CALLBACK_NOCLEAR(path);
-  CALLBACK_NOCLEAR(url);
+  /* Run callbacks for any marks that we have leftover after we ran our of
+   * bytes. There should be at most one of these set, so it's OK to invoke
+   * them in series (unset marks will not result in callbacks).
+   *
+   * We use the NOADVANCE() variety of callbacks here because 'p' has already
+   * overflowed 'data' and this allows us to correct for the off-by-one that
+   * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
+   * value that's in-bounds).
+   */
-  parser->state = state;
-  parser->header_state = header_state;
-  parser->index = index;
-  parser->nread = nread;
+  assert(((header_field_mark ? 1 : 0) +
+          (header_value_mark ? 1 : 0) +
+          (url_mark ? 1 : 0)  +
+          (body_mark ? 1 : 0)) <= 1);
+  CALLBACK_DATA_NOADVANCE(header_field);
+  CALLBACK_DATA_NOADVANCE(header_value);
+  CALLBACK_DATA_NOADVANCE(url);
+  CALLBACK_DATA_NOADVANCE(body);
   return len;
 error:
-  parser->state = s_dead;
+  if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
+    SET_ERRNO(HPE_UNKNOWN);
+  }
   return (p - data);
 }
+/* Does the parser need to see an EOF to find the end of the message? */
+int
+http_message_needs_eof (http_parser *parser)
+{
+  if (parser->type == HTTP_REQUEST) {
+    return 0;
+  }
+  /* See RFC 2616 section 4.4 */
+  if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
+      parser->status_code == 204 ||     /* No Content */
+      parser->status_code == 304 ||     /* Not Modified */
+      parser->flags & F_SKIPBODY) {     /* response to a HEAD request */
+    return 0;
+  }
+  if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
+    return 0;
+  }
+  return 1;
+}
 int
 http_should_keep_alive (http_parser *parser)
 {
@@ -1612,17 +1896,15 @@ http_should_keep_alive (http_parser *parser)
     /* HTTP/1.1 */
     if (parser->flags & F_CONNECTION_CLOSE) {
       return 0;
-    } else {
-      return 1;
     }
   } else {
     /* HTTP/1.0 or earlier */
-    if (parser->flags & F_CONNECTION_KEEP_ALIVE) {
-      return 1;
-    } else {
+    if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
       return 0;
     }
   }
+  return !http_message_needs_eof(parser);
 }
@@ -1635,10 +1917,142 @@ const char * http_method_str (enum http_method m)
 void
 http_parser_init (http_parser *parser, enum http_parser_type t)
 {
+  void *data = parser->data; /* preserve application data */
+  memset(parser, 0, sizeof(*parser));
+  parser->data = data;
   parser->type = t;
   parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
-  parser->nread = 0;
-  parser->upgrade = 0;
-  parser->flags = 0;
-  parser->method = 0;
+  parser->http_errno = HPE_OK;
+}
+const char *
+http_errno_name(enum http_errno err) {
+  assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
+  return http_strerror_tab[err].name;
+}
+const char *
+http_errno_description(enum http_errno err) {
+  assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
+  return http_strerror_tab[err].description;
+}
+int
+http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
+                      struct http_parser_url *u)
+{
+  enum state s;
+  const char *p;
+  enum http_parser_url_fields uf, old_uf;
+  u->port = u->field_set = 0;
+  s = is_connect ? s_req_host_start : s_req_spaces_before_url;
+  uf = old_uf = UF_MAX;
+  for (p = buf; p < buf + buflen; p++) {
+    s = parse_url_char(s, *p);
+    /* Figure out the next field that we're operating on */
+    switch (s) {
+      case s_dead:
+        return 1;
+      /* Skip delimeters */
+      case s_req_schema_slash:
+      case s_req_schema_slash_slash:
+      case s_req_host_start:
+      case s_req_host_v6_start:
+      case s_req_host_v6_end:
+      case s_req_port_start:
+      case s_req_query_string_start:
+      case s_req_fragment_start:
+        continue;
+      case s_req_schema:
+        uf = UF_SCHEMA;
+        break;
+      case s_req_host:
+      case s_req_host_v6:
+        uf = UF_HOST;
+        break;
+      case s_req_port:
+        uf = UF_PORT;
+        break;
+      case s_req_path:
+        uf = UF_PATH;
+        break;
+      case s_req_query_string:
+        uf = UF_QUERY;
+        break;
+      case s_req_fragment:
+        uf = UF_FRAGMENT;
+        break;
+      default:
+        assert(!"Unexpected state");
+        return 1;
+    }
+    /* Nothing's changed; soldier on */
+    if (uf == old_uf) {
+      u->field_data[uf].len++;
+      continue;
+    }
+    u->field_data[uf].off = p - buf;
+    u->field_data[uf].len = 1;
+    u->field_set |= (1 << uf);
+    old_uf = uf;
+  }
+  /* CONNECT requests can only contain "hostname:port" */
+  if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
+    return 1;
+  }
+  /* Make sure we don't end somewhere unexpected */
+  switch (s) {
+  case s_req_host_v6_start:
+  case s_req_host_v6:
+  case s_req_host_v6_end:
+  case s_req_host:
+  case s_req_port_start:
+    return 1;
+  default:
+    break;
+  }
+  if (u->field_set & (1 << UF_PORT)) {
+    /* Don't bother with endp; we've already validated the string */
+    unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
+    /* Ports have a max value of 2^16 */
+    if (v > 0xffff) {
+      return 1;
+    }
+    u->port = (uint16_t) v;
+  }
+  return 0;
+}
+void
+http_parser_pause(http_parser *parser, int paused) {
+  /* Users should only be pausing/unpausing a parser that is not in an error
+   * state. In non-debug builds, there's not much that we can do about this
+   * other than ignore it.
+   */
+  if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
+      HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
+    SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
+  } else {
+    assert(0 && "Attempting to pause parser in error state");
+  }
 }