http_parser.rb 0.5.2 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/linux.yml +23 -0
  3. data/.github/workflows/windows.yml +23 -0
  4. data/.gitignore +5 -4
  5. data/.gitmodules +4 -4
  6. data/Gemfile +1 -1
  7. data/README.md +52 -47
  8. data/Rakefile +1 -0
  9. data/bench/standalone.rb +23 -0
  10. data/bench/thin.rb +1 -0
  11. data/ext/ruby_http_parser/extconf.rb +1 -1
  12. data/ext/ruby_http_parser/org/ruby_http_parser/RubyHttpParser.java +139 -83
  13. data/ext/ruby_http_parser/ruby_http_parser.c +40 -41
  14. data/ext/ruby_http_parser/vendor/http-parser-java/AUTHORS +32 -0
  15. data/ext/ruby_http_parser/vendor/http-parser-java/LICENSE-MIT +5 -1
  16. data/ext/ruby_http_parser/vendor/http-parser-java/README.md +133 -1
  17. data/ext/ruby_http_parser/vendor/http-parser-java/TODO +6 -0
  18. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.c +1202 -671
  19. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.gyp +79 -0
  20. data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.h +172 -51
  21. data/ext/ruby_http_parser/vendor/http-parser-java/src/Http-parser.java.iml +22 -0
  22. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/FieldData.java +41 -0
  23. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPMethod.java +8 -3
  24. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPParserUrl.java +76 -0
  25. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/ParserSettings.java +35 -102
  26. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/Util.java +6 -6
  27. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPParser.java +775 -682
  28. data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/ParserSettings.java +8 -4
  29. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Message.java +70 -20
  30. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/ParseUrl.java +51 -0
  31. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Requests.java +1 -1
  32. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Responses.java +1 -0
  33. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Test.java +2 -1
  34. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestHeaderOverflowError.java +1 -0
  35. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestLoaderNG.java +6 -17
  36. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestNoOverflowLongBody.java +1 -0
  37. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/UnitTest.java +1 -0
  38. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Upgrade.java +1 -0
  39. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Url.java +127 -0
  40. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Util.java +80 -9
  41. data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/WrongContentLength.java +2 -1
  42. data/ext/ruby_http_parser/vendor/http-parser-java/test.c +1637 -280
  43. data/ext/ruby_http_parser/vendor/http-parser-java/tests.dumped +230 -71
  44. data/ext/ruby_http_parser/vendor/http-parser/AUTHORS +68 -0
  45. data/ext/ruby_http_parser/vendor/http-parser/LICENSE-MIT +1 -1
  46. data/ext/ruby_http_parser/vendor/http-parser/README.md +113 -38
  47. data/ext/ruby_http_parser/vendor/http-parser/bench.c +128 -0
  48. data/ext/ruby_http_parser/vendor/http-parser/contrib/parsertrace.c +157 -0
  49. data/ext/ruby_http_parser/vendor/http-parser/contrib/url_parser.c +47 -0
  50. data/ext/ruby_http_parser/vendor/http-parser/http_parser.c +1576 -780
  51. data/ext/ruby_http_parser/vendor/http-parser/http_parser.gyp +111 -0
  52. data/ext/ruby_http_parser/vendor/http-parser/http_parser.h +308 -58
  53. data/ext/ruby_http_parser/vendor/http-parser/test.c +2964 -460
  54. data/http_parser.rb.gemspec +14 -7
  55. data/spec/parser_spec.rb +196 -102
  56. data/spec/support/requests.json +236 -24
  57. data/spec/support/responses.json +202 -36
  58. data/tasks/compile.rake +2 -2
  59. data/tasks/fixtures.rake +8 -2
  60. data/tasks/spec.rake +1 -1
  61. metadata +141 -134
  62. data/Gemfile.lock +0 -32
  63. data/ext/ruby_http_parser/vendor/http-parser-java/compile +0 -1
  64. data/ext/ruby_http_parser/vendor/http-parser-java/test_permutations +0 -1
  65. data/ext/ruby_http_parser/vendor/http-parser-java/test_unit +0 -1
  66. data/ext/ruby_http_parser/vendor/http-parser-java/test_utf8 +0 -1
  67. data/ext/ruby_http_parser/vendor/http-parser/CONTRIBUTIONS +0 -4
@@ -16,10 +16,8 @@
16
16
  typedef struct ParserWrapper {
17
17
  ryah_http_parser parser;
18
18
 
19
+ VALUE status;
19
20
  VALUE request_url;
20
- VALUE request_path;
21
- VALUE query_string;
22
- VALUE fragment;
23
21
 
24
22
  VALUE headers;
25
23
 
@@ -48,10 +46,8 @@ void ParserWrapper_init(ParserWrapper *wrapper) {
48
46
  wrapper->parser.http_major = 0;
49
47
  wrapper->parser.http_minor = 0;
50
48
 
49
+ wrapper->status = Qnil;
51
50
  wrapper->request_url = Qnil;
52
- wrapper->request_path = Qnil;
53
- wrapper->query_string = Qnil;
54
- wrapper->fragment = Qnil;
55
51
 
56
52
  wrapper->upgrade_data = Qnil;
57
53
 
@@ -65,10 +61,8 @@ void ParserWrapper_init(ParserWrapper *wrapper) {
65
61
  void ParserWrapper_mark(void *data) {
66
62
  if(data) {
67
63
  ParserWrapper *wrapper = (ParserWrapper *) data;
64
+ rb_gc_mark_maybe(wrapper->status);
68
65
  rb_gc_mark_maybe(wrapper->request_url);
69
- rb_gc_mark_maybe(wrapper->request_path);
70
- rb_gc_mark_maybe(wrapper->query_string);
71
- rb_gc_mark_maybe(wrapper->fragment);
72
66
  rb_gc_mark_maybe(wrapper->upgrade_data);
73
67
  rb_gc_mark_maybe(wrapper->headers);
74
68
  rb_gc_mark_maybe(wrapper->on_message_begin);
@@ -100,6 +94,7 @@ static ID Ion_body;
100
94
  static ID Ion_message_complete;
101
95
 
102
96
  static VALUE Sstop;
97
+ static VALUE Sreset;
103
98
  static VALUE Sarrays;
104
99
  static VALUE Sstrings;
105
100
  static VALUE Smixed;
@@ -109,10 +104,8 @@ static VALUE Smixed;
109
104
  int on_message_begin(ryah_http_parser *parser) {
110
105
  GET_WRAPPER(wrapper, parser);
111
106
 
107
+ wrapper->status = rb_str_new2("");
112
108
  wrapper->request_url = rb_str_new2("");
113
- wrapper->request_path = rb_str_new2("");
114
- wrapper->query_string = rb_str_new2("");
115
- wrapper->fragment = rb_str_new2("");
116
109
  wrapper->headers = rb_hash_new();
117
110
  wrapper->upgrade_data = rb_str_new2("");
118
111
 
@@ -132,27 +125,28 @@ int on_message_begin(ryah_http_parser *parser) {
132
125
  }
133
126
  }
134
127
 
135
- int on_url(ryah_http_parser *parser, const char *at, size_t length) {
128
+ int on_status(ryah_http_parser *parser, const char *at, size_t length) {
136
129
  GET_WRAPPER(wrapper, parser);
137
- rb_str_cat(wrapper->request_url, at, length);
138
- return 0;
139
- }
140
130
 
141
- int on_path(ryah_http_parser *parser, const char *at, size_t length) {
142
- GET_WRAPPER(wrapper, parser);
143
- rb_str_cat(wrapper->request_path, at, length);
144
- return 0;
145
- }
146
-
147
- int on_query_string(ryah_http_parser *parser, const char *at, size_t length) {
148
- GET_WRAPPER(wrapper, parser);
149
- rb_str_cat(wrapper->query_string, at, length);
131
+ if (at && length) {
132
+ if (wrapper->status == Qnil) {
133
+ wrapper->status = rb_str_new(at, length);
134
+ } else {
135
+ rb_str_cat(wrapper->status, at, length);
136
+ }
137
+ }
150
138
  return 0;
151
139
  }
152
140
 
153
- int on_fragment(ryah_http_parser *parser, const char *at, size_t length) {
141
+ int on_url(ryah_http_parser *parser, const char *at, size_t length) {
154
142
  GET_WRAPPER(wrapper, parser);
155
- rb_str_cat(wrapper->fragment, at, length);
143
+ if (at && length) {
144
+ if (wrapper->request_url == Qnil) {
145
+ wrapper->request_url = rb_str_new(at, length);
146
+ } else {
147
+ rb_str_cat(wrapper->request_url, at, length);
148
+ }
149
+ }
156
150
  return 0;
157
151
  }
158
152
 
@@ -165,7 +159,6 @@ int on_header_field(ryah_http_parser *parser, const char *at, size_t length) {
165
159
  } else {
166
160
  rb_str_cat(wrapper->curr_field_name, at, length);
167
161
  }
168
-
169
162
  return 0;
170
163
  }
171
164
 
@@ -229,6 +222,8 @@ int on_headers_complete(ryah_http_parser *parser) {
229
222
  if (ret == Sstop) {
230
223
  wrapper->stopped = Qtrue;
231
224
  return -1;
225
+ } else if (ret == Sreset){
226
+ return 1;
232
227
  } else {
233
228
  return 0;
234
229
  }
@@ -275,10 +270,8 @@ int on_message_complete(ryah_http_parser *parser) {
275
270
 
276
271
  static ryah_http_parser_settings settings = {
277
272
  .on_message_begin = on_message_begin,
278
- .on_path = on_path,
279
- .on_query_string = on_query_string,
273
+ .on_status = on_status,
280
274
  .on_url = on_url,
281
- .on_fragment = on_fragment,
282
275
  .on_header_field = on_header_field,
283
276
  .on_header_value = on_header_value,
284
277
  .on_headers_complete = on_headers_complete,
@@ -315,6 +308,10 @@ VALUE ResponseParser_alloc(VALUE klass) {
315
308
  return Parser_alloc_by_type(klass, HTTP_RESPONSE);
316
309
  }
317
310
 
311
+ VALUE Parser_strict_p(VALUE klass) {
312
+ return HTTP_PARSER_STRICT == 1 ? Qtrue : Qfalse;
313
+ }
314
+
318
315
  VALUE Parser_initialize(int argc, VALUE *argv, VALUE self) {
319
316
  ParserWrapper *wrapper = NULL;
320
317
  DATA_GET(self, ParserWrapper, wrapper);
@@ -346,11 +343,15 @@ VALUE Parser_execute(VALUE self, VALUE data) {
346
343
  size_t nparsed = ryah_http_parser_execute(&wrapper->parser, &settings, ptr, len);
347
344
 
348
345
  if (wrapper->parser.upgrade) {
349
- rb_str_cat(wrapper->upgrade_data, ptr + nparsed + 1, len - nparsed - 1);
346
+ if (RTEST(wrapper->stopped) && !RTEST(wrapper->completed))
347
+ nparsed += 1;
350
348
 
351
- } else if (nparsed != len) {
349
+ if (nparsed < len)
350
+ rb_str_cat(wrapper->upgrade_data, ptr + nparsed, len - nparsed);
351
+
352
+ } else if (nparsed != (size_t)len) {
352
353
  if (!RTEST(wrapper->stopped) && !RTEST(wrapper->completed))
353
- rb_raise(eParserError, "Could not parse data entirely");
354
+ rb_raise(eParserError, "Could not parse data entirely (%zu != %zu)", nparsed, len);
354
355
  else
355
356
  nparsed += 1; // error states fail on the current character
356
357
  }
@@ -401,7 +402,7 @@ VALUE Parser_upgrade_p(VALUE self) {
401
402
  ParserWrapper *wrapper = NULL;
402
403
  DATA_GET(self, ParserWrapper, wrapper);
403
404
 
404
- return wrapper->parser.upgrade == 1 ? Qtrue : Qfalse;
405
+ return wrapper->parser.upgrade ? Qtrue : Qfalse;
405
406
  }
406
407
 
407
408
  VALUE Parser_http_version(VALUE self) {
@@ -461,10 +462,8 @@ VALUE Parser_status_code(VALUE self) {
461
462
  return wrapper->name; \
462
463
  }
463
464
 
465
+ DEFINE_GETTER(status);
464
466
  DEFINE_GETTER(request_url);
465
- DEFINE_GETTER(request_path);
466
- DEFINE_GETTER(query_string);
467
- DEFINE_GETTER(fragment);
468
467
  DEFINE_GETTER(headers);
469
468
  DEFINE_GETTER(upgrade_data);
470
469
  DEFINE_GETTER(header_value_type);
@@ -502,6 +501,7 @@ void Init_ruby_http_parser() {
502
501
  Ion_body = rb_intern("on_body");
503
502
  Ion_message_complete = rb_intern("on_message_complete");
504
503
  Sstop = ID2SYM(rb_intern("stop"));
504
+ Sreset = ID2SYM(rb_intern("reset"));
505
505
 
506
506
  Sarrays = ID2SYM(rb_intern("arrays"));
507
507
  Sstrings = ID2SYM(rb_intern("strings"));
@@ -511,6 +511,7 @@ void Init_ruby_http_parser() {
511
511
  rb_define_alloc_func(cRequestParser, RequestParser_alloc);
512
512
  rb_define_alloc_func(cResponseParser, ResponseParser_alloc);
513
513
 
514
+ rb_define_singleton_method(cParser, "strict?", Parser_strict_p, 0);
514
515
  rb_define_method(cParser, "initialize", Parser_initialize, -1);
515
516
 
516
517
  rb_define_method(cParser, "on_message_begin=", Parser_set_on_message_begin, 1);
@@ -529,10 +530,8 @@ void Init_ruby_http_parser() {
529
530
  rb_define_method(cParser, "http_method", Parser_http_method, 0);
530
531
  rb_define_method(cParser, "status_code", Parser_status_code, 0);
531
532
 
533
+ rb_define_method(cParser, "status", Parser_status, 0);
532
534
  rb_define_method(cParser, "request_url", Parser_request_url, 0);
533
- rb_define_method(cParser, "request_path", Parser_request_path, 0);
534
- rb_define_method(cParser, "query_string", Parser_query_string, 0);
535
- rb_define_method(cParser, "fragment", Parser_fragment, 0);
536
535
  rb_define_method(cParser, "headers", Parser_headers, 0);
537
536
  rb_define_method(cParser, "upgrade_data", Parser_upgrade_data, 0);
538
537
  rb_define_method(cParser, "header_value_type", Parser_header_value_type, 0);
@@ -0,0 +1,32 @@
1
+ # Authors ordered by first contribution.
2
+ Ryan Dahl <ry@tinyclouds.org>
3
+ Jeremy Hinegardner <jeremy@hinegardner.org>
4
+ Sergey Shepelev <temotor@gmail.com>
5
+ Joe Damato <ice799@gmail.com>
6
+ tomika <tomika_nospam@freemail.hu>
7
+ Phoenix Sol <phoenix@burninglabs.com>
8
+ Cliff Frey <cliff@meraki.com>
9
+ Ewen Cheslack-Postava <ewencp@cs.stanford.edu>
10
+ Santiago Gala <sgala@apache.org>
11
+ Tim Becker <tim.becker@syngenio.de>
12
+ Jeff Terrace <jterrace@gmail.com>
13
+ Ben Noordhuis <info@bnoordhuis.nl>
14
+ Nathan Rajlich <nathan@tootallnate.net>
15
+ Mark Nottingham <mnot@mnot.net>
16
+ Aman Gupta <aman@tmm1.net>
17
+ Tim Becker <tim.becker@kuriositaet.de>
18
+ Sean Cunningham <sean.cunningham@mandiant.com>
19
+ Peter Griess <pg@std.in>
20
+ Salman Haq <salman.haq@asti-usa.com>
21
+ Cliff Frey <clifffrey@gmail.com>
22
+ Jon Kolb <jon@b0g.us>
23
+ Fouad Mardini <f.mardini@gmail.com>
24
+ Paul Querna <pquerna@apache.org>
25
+ Felix Geisendörfer <felix@debuggable.com>
26
+ koichik <koichik@improvement.jp>
27
+ Andre Caron <andre.l.caron@gmail.com>
28
+ Ivo Raisr <ivosh@ivosh.net>
29
+ James McLaughlin <jamie@lacewing-project.org>
30
+ David Gwynne <loki@animata.net>
31
+ LE ROUX Thomas <thomas@procheo.fr>
32
+ Randy Rizun <rrizun@ortivawireless.com>
@@ -23,7 +23,11 @@ IN THE SOFTWARE.
23
23
  This code mainly based on code with the following license:
24
24
 
25
25
 
26
- Copyright Joyent, Inc. and other Node contributors. All rights reserved.
26
+ http_parser.c is based on src/http/ngx_http_parse.c from NGINX copyright
27
+ Igor Sysoev.
28
+
29
+ Additional changes are licensed under the same terms as NGINX and
30
+ copyright Joyent, Inc. and other Node contributors. All rights reserved.
27
31
 
28
32
  Permission is hereby granted, free of charge, to any person obtaining a copy
29
33
  of this software and associated documentation files (the "Software"), to
@@ -24,7 +24,7 @@ The parser extracts the following information from HTTP messages:
24
24
  * Response status code
25
25
  * Transfer-Encoding
26
26
  * HTTP version
27
- * Request path, query string, fragment
27
+ * Request URL
28
28
  * Message body
29
29
 
30
30
  Building
@@ -49,3 +49,135 @@ Usage
49
49
  help or have suggestions, feel free to contact me at
50
50
  (tim.becker@kuriositaet.de).
51
51
 
52
+
53
+ One `http_parser` object is used per TCP connection. Initialize the struct
54
+ using `http_parser_init()` and set the callbacks. That might look something
55
+ like this for a request parser:
56
+
57
+ http_parser_settings settings;
58
+ settings.on_path = my_path_callback;
59
+ settings.on_header_field = my_header_field_callback;
60
+ /* ... */
61
+
62
+ http_parser *parser = malloc(sizeof(http_parser));
63
+ http_parser_init(parser, HTTP_REQUEST);
64
+ parser->data = my_socket;
65
+
66
+ When data is received on the socket execute the parser and check for errors.
67
+
68
+ size_t len = 80*1024, nparsed;
69
+ char buf[len];
70
+ ssize_t recved;
71
+
72
+ recved = recv(fd, buf, len, 0);
73
+
74
+ if (recved < 0) {
75
+ /* Handle error. */
76
+ }
77
+
78
+ /* Start up / continue the parser.
79
+ * Note we pass recved==0 to signal that EOF has been recieved.
80
+ */
81
+ nparsed = http_parser_execute(parser, &settings, buf, recved);
82
+
83
+ if (parser->upgrade) {
84
+ /* handle new protocol */
85
+ } else if (nparsed != recved) {
86
+ /* Handle error. Usually just close the connection. */
87
+ }
88
+
89
+ HTTP needs to know where the end of the stream is. For example, sometimes
90
+ servers send responses without Content-Length and expect the client to
91
+ consume input (for the body) until EOF. To tell http_parser about EOF, give
92
+ `0` as the forth parameter to `http_parser_execute()`. Callbacks and errors
93
+ can still be encountered during an EOF, so one must still be prepared
94
+ to receive them.
95
+
96
+ Scalar valued message information such as `status_code`, `method`, and the
97
+ HTTP version are stored in the parser structure. This data is only
98
+ temporally stored in `http_parser` and gets reset on each new message. If
99
+ this information is needed later, copy it out of the structure during the
100
+ `headers_complete` callback.
101
+
102
+ The parser decodes the transfer-encoding for both requests and responses
103
+ transparently. That is, a chunked encoding is decoded before being sent to
104
+ the on_body callback.
105
+
106
+
107
+ The Special Problem of Upgrade
108
+ ------------------------------
109
+
110
+ HTTP supports upgrading the connection to a different protocol. An
111
+ increasingly common example of this is the Web Socket protocol which sends
112
+ a request like
113
+
114
+ GET /demo HTTP/1.1
115
+ Upgrade: WebSocket
116
+ Connection: Upgrade
117
+ Host: example.com
118
+ Origin: http://example.com
119
+ WebSocket-Protocol: sample
120
+
121
+ followed by non-HTTP data.
122
+
123
+ (See http://tools.ietf.org/html/draft-hixie-thewebsocketprotocol-75 for more
124
+ information the Web Socket protocol.)
125
+
126
+ To support this, the parser will treat this as a normal HTTP message without a
127
+ body. Issuing both on_headers_complete and on_message_complete callbacks. However
128
+ http_parser_execute() will stop parsing at the end of the headers and return.
129
+
130
+ The user is expected to check if `parser->upgrade` has been set to 1 after
131
+ `http_parser_execute()` returns. Non-HTTP data begins at the buffer supplied
132
+ offset by the return value of `http_parser_execute()`.
133
+
134
+
135
+ Callbacks
136
+ ---------
137
+
138
+ During the `http_parser_execute()` call, the callbacks set in
139
+ `http_parser_settings` will be executed. The parser maintains state and
140
+ never looks behind, so buffering the data is not necessary. If you need to
141
+ save certain data for later usage, you can do that from the callbacks.
142
+
143
+ There are two types of callbacks:
144
+
145
+ * notification `typedef int (*http_cb) (http_parser*);`
146
+ Callbacks: on_message_begin, on_headers_complete, on_message_complete.
147
+ * data `typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);`
148
+ Callbacks: (requests only) on_uri,
149
+ (common) on_header_field, on_header_value, on_body;
150
+
151
+ Callbacks must return 0 on success. Returning a non-zero value indicates
152
+ error to the parser, making it exit immediately.
153
+
154
+ In case you parse HTTP message in chunks (i.e. `read()` request line
155
+ from socket, parse, read half headers, parse, etc) your data callbacks
156
+ may be called more than once. Http-parser guarantees that data pointer is only
157
+ valid for the lifetime of callback. You can also `read()` into a heap allocated
158
+ buffer to avoid copying memory around if this fits your application.
159
+
160
+ Reading headers may be a tricky task if you read/parse headers partially.
161
+ Basically, you need to remember whether last header callback was field or value
162
+ and apply following logic:
163
+
164
+ (on_header_field and on_header_value shortened to on_h_*)
165
+ ------------------------ ------------ --------------------------------------------
166
+ | State (prev. callback) | Callback | Description/action |
167
+ ------------------------ ------------ --------------------------------------------
168
+ | nothing (first call) | on_h_field | Allocate new buffer and copy callback data |
169
+ | | | into it |
170
+ ------------------------ ------------ --------------------------------------------
171
+ | value | on_h_field | New header started. |
172
+ | | | Copy current name,value buffers to headers |
173
+ | | | list and allocate new buffer for new name |
174
+ ------------------------ ------------ --------------------------------------------
175
+ | field | on_h_field | Previous name continues. Reallocate name |
176
+ | | | buffer and append callback data to it |
177
+ ------------------------ ------------ --------------------------------------------
178
+ | field | on_h_value | Value for current header started. Allocate |
179
+ | | | new buffer and copy callback data to it |
180
+ ------------------------ ------------ --------------------------------------------
181
+ | value | on_h_value | Value continues. Reallocate value buffer |
182
+ | | | and append callback data to it |
183
+ ------------------------ ------------ --------------------------------------------
@@ -1,4 +1,10 @@
1
+ decide how to handle errs per default:
2
+ - ry: "set state to dead", return `read`
3
+ - current: call on_error w/ details, if no on_error handler set,
4
+ throw Exception, else call on_error and behave like orig...
5
+
1
6
  some tests from test.c left to port
7
+ (scan ...)
2
8
  documentation
3
9
 
4
10
  hi level callback interface
@@ -1,4 +1,7 @@
1
- /* Copyright Joyent, Inc. and other Node contributors. All rights reserved.
1
+ /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
2
+ *
3
+ * Additional changes are licensed under the same terms as NGINX and
4
+ * copyright Joyent, Inc. and other Node contributors. All rights reserved.
2
5
  *
3
6
  * Permission is hereby granted, free of charge, to any person obtaining a copy
4
7
  * of this software and associated documentation files (the "Software"), to
@@ -18,48 +21,99 @@
18
21
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19
22
  * IN THE SOFTWARE.
20
23
  */
21
- #include <http_parser.h>
24
+ #include "http_parser.h"
22
25
  #include <assert.h>
23
26
  #include <stddef.h>
27
+ #include <ctype.h>
28
+ #include <stdlib.h>
29
+ #include <string.h>
30
+ #include <limits.h>
24
31
 
32
+ #ifndef ULLONG_MAX
33
+ # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
34
+ #endif
25
35
 
26
36
  #ifndef MIN
27
37
  # define MIN(a,b) ((a) < (b) ? (a) : (b))
28
38
  #endif
29
39
 
40
+ #ifndef ARRAY_SIZE
41
+ # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
42
+ #endif
43
+
44
+ #ifndef BIT_AT
45
+ # define BIT_AT(a, i) \
46
+ (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
47
+ (1 << ((unsigned int) (i) & 7))))
48
+ #endif
49
+
50
+ #ifndef ELEM_AT
51
+ # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
52
+ #endif
53
+
54
+ #define SET_ERRNO(e) \
55
+ do { \
56
+ parser->http_errno = (e); \
57
+ } while(0)
58
+
30
59
 
31
- #define CALLBACK2(FOR) \
60
+ /* Run the notify callback FOR, returning ER if it fails */
61
+ #define CALLBACK_NOTIFY_(FOR, ER) \
32
62
  do { \
63
+ assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
64
+ \
33
65
  if (settings->on_##FOR) { \
34
- if (0 != settings->on_##FOR(parser)) return (p - data); \
66
+ if (0 != settings->on_##FOR(parser)) { \
67
+ SET_ERRNO(HPE_CB_##FOR); \
68
+ } \
69
+ \
70
+ /* We either errored above or got paused; get out */ \
71
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
72
+ return (ER); \
73
+ } \
35
74
  } \
36
75
  } while (0)
37
76
 
77
+ /* Run the notify callback FOR and consume the current byte */
78
+ #define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1)
38
79
 
39
- #define MARK(FOR) \
40
- do { \
41
- FOR##_mark = p; \
42
- } while (0)
80
+ /* Run the notify callback FOR and don't consume the current byte */
81
+ #define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data)
43
82
 
44
- #define CALLBACK_NOCLEAR(FOR) \
83
+ /* Run data callback FOR with LEN bytes, returning ER if it fails */
84
+ #define CALLBACK_DATA_(FOR, LEN, ER) \
45
85
  do { \
86
+ assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
87
+ \
46
88
  if (FOR##_mark) { \
47
89
  if (settings->on_##FOR) { \
48
- if (0 != settings->on_##FOR(parser, \
49
- FOR##_mark, \
50
- p - FOR##_mark)) \
51
- { \
52
- return (p - data); \
90
+ if (0 != settings->on_##FOR(parser, FOR##_mark, (LEN))) { \
91
+ SET_ERRNO(HPE_CB_##FOR); \
92
+ } \
93
+ \
94
+ /* We either errored above or got paused; get out */ \
95
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
96
+ return (ER); \
53
97
  } \
54
98
  } \
99
+ FOR##_mark = NULL; \
55
100
  } \
56
101
  } while (0)
57
102
 
103
+ /* Run the data callback FOR and consume the current byte */
104
+ #define CALLBACK_DATA(FOR) \
105
+ CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
58
106
 
59
- #define CALLBACK(FOR) \
107
+ /* Run the data callback FOR and don't consume the current byte */
108
+ #define CALLBACK_DATA_NOADVANCE(FOR) \
109
+ CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
110
+
111
+ /* Set the mark FOR; non-destructive if mark is already set */
112
+ #define MARK(FOR) \
60
113
  do { \
61
- CALLBACK_NOCLEAR(FOR); \
62
- FOR##_mark = NULL; \
114
+ if (!FOR##_mark) { \
115
+ FOR##_mark = p; \
116
+ } \
63
117
  } while (0)
64
118
 
65
119
 
@@ -74,29 +128,10 @@ do { \
74
128
 
75
129
 
76
130
  static const char *method_strings[] =
77
- { "DELETE"
78
- , "GET"
79
- , "HEAD"
80
- , "POST"
81
- , "PUT"
82
- , "CONNECT"
83
- , "OPTIONS"
84
- , "TRACE"
85
- , "COPY"
86
- , "LOCK"
87
- , "MKCOL"
88
- , "MOVE"
89
- , "PROPFIND"
90
- , "PROPPATCH"
91
- , "UNLOCK"
92
- , "REPORT"
93
- , "MKACTIVITY"
94
- , "CHECKOUT"
95
- , "MERGE"
96
- , "M-SEARCH"
97
- , "NOTIFY"
98
- , "SUBSCRIBE"
99
- , "UNSUBSCRIBE"
131
+ {
132
+ #define XX(num, name, string) #string,
133
+ HTTP_METHOD_MAP(XX)
134
+ #undef XX
100
135
  };
101
136
 
102
137
 
@@ -117,9 +152,9 @@ static const char tokens[256] = {
117
152
  /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
118
153
  0, 0, 0, 0, 0, 0, 0, 0,
119
154
  /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
120
- ' ', '!', '"', '#', '$', '%', '&', '\'',
155
+ 0, '!', 0, '#', '$', '%', '&', '\'',
121
156
  /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
122
- 0, 0, '*', '+', 0, '-', '.', '/',
157
+ 0, 0, '*', '+', 0, '-', '.', 0,
123
158
  /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
124
159
  '0', '1', '2', '3', '4', '5', '6', '7',
125
160
  /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
@@ -139,7 +174,7 @@ static const char tokens[256] = {
139
174
  /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
140
175
  'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
141
176
  /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
142
- 'x', 'y', 'z', 0, '|', '}', '~', 0 };
177
+ 'x', 'y', 'z', 0, '|', 0, '~', 0 };
143
178
 
144
179
 
145
180
  static const int8_t unhex[256] =
@@ -154,61 +189,48 @@ static const int8_t unhex[256] =
154
189
  };
155
190
 
156
191
 
157
- static const uint8_t normal_url_char[256] = {
192
+ #if HTTP_PARSER_STRICT
193
+ # define T(v) 0
194
+ #else
195
+ # define T(v) v
196
+ #endif
197
+
198
+
199
+ static const uint8_t normal_url_char[32] = {
158
200
  /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
159
- 0, 0, 0, 0, 0, 0, 0, 0,
201
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
160
202
  /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
161
- 0, 0, 0, 0, 0, 0, 0, 0,
203
+ 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
162
204
  /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
163
- 0, 0, 0, 0, 0, 0, 0, 0,
205
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
164
206
  /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
165
- 0, 0, 0, 0, 0, 0, 0, 0,
207
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
166
208
  /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
167
- 0, 1, 1, 0, 1, 1, 1, 1,
209
+ 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
168
210
  /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
169
- 1, 1, 1, 1, 1, 1, 1, 1,
211
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
170
212
  /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
171
- 1, 1, 1, 1, 1, 1, 1, 1,
213
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
172
214
  /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
173
- 1, 1, 1, 1, 1, 1, 1, 0,
215
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
174
216
  /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
175
- 1, 1, 1, 1, 1, 1, 1, 1,
217
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
176
218
  /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
177
- 1, 1, 1, 1, 1, 1, 1, 1,
219
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
178
220
  /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
179
- 1, 1, 1, 1, 1, 1, 1, 1,
221
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
180
222
  /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
181
- 1, 1, 1, 1, 1, 1, 1, 1,
223
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
182
224
  /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
183
- 1, 1, 1, 1, 1, 1, 1, 1,
225
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
184
226
  /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
185
- 1, 1, 1, 1, 1, 1, 1, 1,
227
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
186
228
  /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
187
- 1, 1, 1, 1, 1, 1, 1, 1,
229
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
188
230
  /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
189
- 1, 1, 1, 1, 1, 1, 1, 0,
190
-
191
- /* Remainder of non-ASCII range are accepted as-is to support implicitly UTF-8
192
- encoded paths. This is out of spec, but clients generate this and most other
193
- HTTP servers support it. We should, too. */
194
-
195
- 1, 1, 1, 1, 1, 1, 1, 1,
196
- 1, 1, 1, 1, 1, 1, 1, 1,
197
- 1, 1, 1, 1, 1, 1, 1, 1,
198
- 1, 1, 1, 1, 1, 1, 1, 1,
199
- 1, 1, 1, 1, 1, 1, 1, 1,
200
- 1, 1, 1, 1, 1, 1, 1, 1,
201
- 1, 1, 1, 1, 1, 1, 1, 1,
202
- 1, 1, 1, 1, 1, 1, 1, 1,
203
- 1, 1, 1, 1, 1, 1, 1, 1,
204
- 1, 1, 1, 1, 1, 1, 1, 1,
205
- 1, 1, 1, 1, 1, 1, 1, 1,
206
- 1, 1, 1, 1, 1, 1, 1, 1,
207
- 1, 1, 1, 1, 1, 1, 1, 1,
208
- 1, 1, 1, 1, 1, 1, 1, 1,
209
- 1, 1, 1, 1, 1, 1, 1, 1,
210
- 1, 1, 1, 1, 1, 1, 1, 1 };
231
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
211
232
 
233
+ #undef T
212
234
 
213
235
  enum state
214
236
  { s_dead = 1 /* important that this is > 0 */
@@ -236,8 +258,9 @@ enum state
236
258
  , s_req_schema
237
259
  , s_req_schema_slash
238
260
  , s_req_schema_slash_slash
239
- , s_req_host
240
- , s_req_port
261
+ , s_req_server_start
262
+ , s_req_server
263
+ , s_req_server_with_at
241
264
  , s_req_path
242
265
  , s_req_query_string_start
243
266
  , s_req_query_string
@@ -258,6 +281,7 @@ enum state
258
281
  , s_header_field
259
282
  , s_header_value_start
260
283
  , s_header_value
284
+ , s_header_value_lws
261
285
 
262
286
  , s_header_almost_done
263
287
 
@@ -265,9 +289,11 @@ enum state
265
289
  , s_chunk_size
266
290
  , s_chunk_parameters
267
291
  , s_chunk_size_almost_done
268
-
292
+
269
293
  , s_headers_almost_done
270
- /* Important: 's_headers_almost_done' must be the last 'header' state. All
294
+ , s_headers_done
295
+
296
+ /* Important: 's_headers_done' must be the last 'header' state. All
271
297
  * states beyond this must be 'body' states. It is used for overflow
272
298
  * checking. See the PARSING_HEADER() macro.
273
299
  */
@@ -278,10 +304,12 @@ enum state
278
304
 
279
305
  , s_body_identity
280
306
  , s_body_identity_eof
307
+
308
+ , s_message_done
281
309
  };
282
310
 
283
311
 
284
- #define PARSING_HEADER(state) (state <= s_headers_almost_done)
312
+ #define PARSING_HEADER(state) (state <= s_headers_done)
285
313
 
286
314
 
287
315
  enum header_states
@@ -310,28 +338,59 @@ enum header_states
310
338
  , h_connection_close
311
339
  };
312
340
 
341
+ enum http_host_state
342
+ {
343
+ s_http_host_dead = 1
344
+ , s_http_userinfo_start
345
+ , s_http_userinfo
346
+ , s_http_host_start
347
+ , s_http_host_v6_start
348
+ , s_http_host
349
+ , s_http_host_v6
350
+ , s_http_host_v6_end
351
+ , s_http_host_port_start
352
+ , s_http_host_port
353
+ };
354
+
355
+ /* Macros for character classes; depends on strict-mode */
356
+ #define CR '\r'
357
+ #define LF '\n'
358
+ #define LOWER(c) (unsigned char)(c | 0x20)
359
+ #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
360
+ #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
361
+ #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
362
+ #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
363
+ #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
364
+ (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
365
+ (c) == ')')
366
+ #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
367
+ (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
368
+ (c) == '$' || (c) == ',')
313
369
 
314
- enum flags
315
- { F_CHUNKED = 1 << 0
316
- , F_CONNECTION_KEEP_ALIVE = 1 << 1
317
- , F_CONNECTION_CLOSE = 1 << 2
318
- , F_TRAILING = 1 << 3
319
- , F_UPGRADE = 1 << 4
320
- , F_SKIPBODY = 1 << 5
321
- };
322
-
323
-
324
- #define CR '\r'
325
- #define LF '\n'
326
- #define LOWER(c) (unsigned char)(c | 0x20)
327
- #define TOKEN(c) tokens[(unsigned char)c]
370
+ #if HTTP_PARSER_STRICT
371
+ #define TOKEN(c) (tokens[(unsigned char)c])
372
+ #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
373
+ #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
374
+ #else
375
+ #define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
376
+ #define IS_URL_CHAR(c) \
377
+ (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
378
+ #define IS_HOST_CHAR(c) \
379
+ (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
380
+ #endif
328
381
 
329
382
 
330
383
  #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
331
384
 
332
385
 
333
386
  #if HTTP_PARSER_STRICT
334
- # define STRICT_CHECK(cond) if (cond) goto error
387
+ # define STRICT_CHECK(cond) \
388
+ do { \
389
+ if (cond) { \
390
+ SET_ERRNO(HPE_STRICT); \
391
+ goto error; \
392
+ } \
393
+ } while (0)
335
394
  # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
336
395
  #else
337
396
  # define STRICT_CHECK(cond)
@@ -339,24 +398,202 @@ enum flags
339
398
  #endif
340
399
 
341
400
 
401
+ /* Map errno values to strings for human-readable output */
402
+ #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
403
+ static struct {
404
+ const char *name;
405
+ const char *description;
406
+ } http_strerror_tab[] = {
407
+ HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
408
+ };
409
+ #undef HTTP_STRERROR_GEN
410
+
411
+ int http_message_needs_eof(const http_parser *parser);
412
+
413
+ /* Our URL parser.
414
+ *
415
+ * This is designed to be shared by http_parser_execute() for URL validation,
416
+ * hence it has a state transition + byte-for-byte interface. In addition, it
417
+ * is meant to be embedded in http_parser_parse_url(), which does the dirty
418
+ * work of turning state transitions URL components for its API.
419
+ *
420
+ * This function should only be invoked with non-space characters. It is
421
+ * assumed that the caller cares about (and can detect) the transition between
422
+ * URL and non-URL states by looking for these.
423
+ */
424
+ static enum state
425
+ parse_url_char(enum state s, const char ch)
426
+ {
427
+ if (ch == ' ' || ch == '\r' || ch == '\n') {
428
+ return s_dead;
429
+ }
430
+
431
+ #if HTTP_PARSER_STRICT
432
+ if (ch == '\t' || ch == '\f') {
433
+ return s_dead;
434
+ }
435
+ #endif
436
+
437
+ switch (s) {
438
+ case s_req_spaces_before_url:
439
+ /* Proxied requests are followed by scheme of an absolute URI (alpha).
440
+ * All methods except CONNECT are followed by '/' or '*'.
441
+ */
442
+
443
+ if (ch == '/' || ch == '*') {
444
+ return s_req_path;
445
+ }
446
+
447
+ if (IS_ALPHA(ch)) {
448
+ return s_req_schema;
449
+ }
450
+
451
+ break;
452
+
453
+ case s_req_schema:
454
+ if (IS_ALPHA(ch)) {
455
+ return s;
456
+ }
457
+
458
+ if (ch == ':') {
459
+ return s_req_schema_slash;
460
+ }
461
+
462
+ break;
463
+
464
+ case s_req_schema_slash:
465
+ if (ch == '/') {
466
+ return s_req_schema_slash_slash;
467
+ }
468
+
469
+ break;
470
+
471
+ case s_req_schema_slash_slash:
472
+ if (ch == '/') {
473
+ return s_req_server_start;
474
+ }
475
+
476
+ break;
477
+
478
+ case s_req_server_with_at:
479
+ if (ch == '@') {
480
+ return s_dead;
481
+ }
482
+
483
+ /* FALLTHROUGH */
484
+ case s_req_server_start:
485
+ case s_req_server:
486
+ if (ch == '/') {
487
+ return s_req_path;
488
+ }
489
+
490
+ if (ch == '?') {
491
+ return s_req_query_string_start;
492
+ }
493
+
494
+ if (ch == '@') {
495
+ return s_req_server_with_at;
496
+ }
497
+
498
+ if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
499
+ return s_req_server;
500
+ }
501
+
502
+ break;
503
+
504
+ case s_req_path:
505
+ if (IS_URL_CHAR(ch)) {
506
+ return s;
507
+ }
508
+
509
+ switch (ch) {
510
+ case '?':
511
+ return s_req_query_string_start;
512
+
513
+ case '#':
514
+ return s_req_fragment_start;
515
+ }
516
+
517
+ break;
518
+
519
+ case s_req_query_string_start:
520
+ case s_req_query_string:
521
+ if (IS_URL_CHAR(ch)) {
522
+ return s_req_query_string;
523
+ }
524
+
525
+ switch (ch) {
526
+ case '?':
527
+ /* allow extra '?' in query string */
528
+ return s_req_query_string;
529
+
530
+ case '#':
531
+ return s_req_fragment_start;
532
+ }
533
+
534
+ break;
535
+
536
+ case s_req_fragment_start:
537
+ if (IS_URL_CHAR(ch)) {
538
+ return s_req_fragment;
539
+ }
540
+
541
+ switch (ch) {
542
+ case '?':
543
+ return s_req_fragment;
544
+
545
+ case '#':
546
+ return s;
547
+ }
548
+
549
+ break;
550
+
551
+ case s_req_fragment:
552
+ if (IS_URL_CHAR(ch)) {
553
+ return s;
554
+ }
555
+
556
+ switch (ch) {
557
+ case '?':
558
+ case '#':
559
+ return s;
560
+ }
561
+
562
+ break;
563
+
564
+ default:
565
+ break;
566
+ }
567
+
568
+ /* We should never fall out of the switch above unless there's an error */
569
+ return s_dead;
570
+ }
571
+
342
572
  size_t http_parser_execute (http_parser *parser,
343
573
  const http_parser_settings *settings,
344
574
  const char *data,
345
575
  size_t len)
346
576
  {
347
577
  char c, ch;
348
- const char *p = data, *pe;
349
- int64_t to_read;
578
+ int8_t unhex_val;
579
+ const char *p = data;
580
+ const char *header_field_mark = 0;
581
+ const char *header_value_mark = 0;
582
+ const char *url_mark = 0;
583
+ const char *body_mark = 0;
350
584
 
351
- enum state state = (enum state) parser->state;
352
- enum header_states header_state = (enum header_states) parser->header_state;
353
- uint64_t index = parser->index;
354
- uint64_t nread = parser->nread;
585
+ /* We're in an error state. Don't bother doing anything. */
586
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
587
+ return 0;
588
+ }
355
589
 
356
590
  if (len == 0) {
357
- switch (state) {
591
+ switch (parser->state) {
358
592
  case s_body_identity_eof:
359
- CALLBACK2(message_complete);
593
+ /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
594
+ * we got paused.
595
+ */
596
+ CALLBACK_NOTIFY_NOADVANCE(message_complete);
360
597
  return 0;
361
598
 
362
599
  case s_dead:
@@ -366,52 +603,55 @@ size_t http_parser_execute (http_parser *parser,
366
603
  return 0;
367
604
 
368
605
  default:
369
- return 1; // error
606
+ SET_ERRNO(HPE_INVALID_EOF_STATE);
607
+ return 1;
370
608
  }
371
609
  }
372
610
 
373
- /* technically we could combine all of these (except for url_mark) into one
374
- variable, saving stack space, but it seems more clear to have them
375
- separated. */
376
- const char *header_field_mark = 0;
377
- const char *header_value_mark = 0;
378
- const char *fragment_mark = 0;
379
- const char *query_string_mark = 0;
380
- const char *path_mark = 0;
381
- const char *url_mark = 0;
382
611
 
383
- if (state == s_header_field)
612
+ if (parser->state == s_header_field)
384
613
  header_field_mark = data;
385
- if (state == s_header_value)
614
+ if (parser->state == s_header_value)
386
615
  header_value_mark = data;
387
- if (state == s_req_fragment)
388
- fragment_mark = data;
389
- if (state == s_req_query_string)
390
- query_string_mark = data;
391
- if (state == s_req_path)
392
- path_mark = data;
393
- if (state == s_req_path || state == s_req_schema || state == s_req_schema_slash
394
- || state == s_req_schema_slash_slash || state == s_req_port
395
- || state == s_req_query_string_start || state == s_req_query_string
396
- || state == s_req_host
397
- || state == s_req_fragment_start || state == s_req_fragment)
616
+ switch (parser->state) {
617
+ case s_req_path:
618
+ case s_req_schema:
619
+ case s_req_schema_slash:
620
+ case s_req_schema_slash_slash:
621
+ case s_req_server_start:
622
+ case s_req_server:
623
+ case s_req_server_with_at:
624
+ case s_req_query_string_start:
625
+ case s_req_query_string:
626
+ case s_req_fragment_start:
627
+ case s_req_fragment:
398
628
  url_mark = data;
629
+ break;
630
+ }
399
631
 
400
- for (p=data, pe=data+len; p != pe; p++) {
632
+ for (p=data; p != data + len; p++) {
401
633
  ch = *p;
402
634
 
403
- if (PARSING_HEADER(state)) {
404
- ++nread;
635
+ if (PARSING_HEADER(parser->state)) {
636
+ ++parser->nread;
405
637
  /* Buffer overflow attack */
406
- if (nread > HTTP_MAX_HEADER_SIZE) goto error;
638
+ if (parser->nread > HTTP_MAX_HEADER_SIZE) {
639
+ SET_ERRNO(HPE_HEADER_OVERFLOW);
640
+ goto error;
641
+ }
407
642
  }
408
643
 
409
- switch (state) {
644
+ reexecute_byte:
645
+ switch (parser->state) {
410
646
 
411
647
  case s_dead:
412
648
  /* this state is used after a 'Connection: close' message
413
649
  * the parser will error out if it reads another message
414
650
  */
651
+ if (ch == CR || ch == LF)
652
+ break;
653
+
654
+ SET_ERRNO(HPE_CLOSED_CONNECTION);
415
655
  goto error;
416
656
 
417
657
  case s_start_req_or_res:
@@ -419,42 +659,46 @@ size_t http_parser_execute (http_parser *parser,
419
659
  if (ch == CR || ch == LF)
420
660
  break;
421
661
  parser->flags = 0;
422
- parser->content_length = -1;
662
+ parser->content_length = ULLONG_MAX;
423
663
 
424
- CALLBACK2(message_begin);
664
+ if (ch == 'H') {
665
+ parser->state = s_res_or_resp_H;
425
666
 
426
- if (ch == 'H')
427
- state = s_res_or_resp_H;
428
- else {
667
+ CALLBACK_NOTIFY(message_begin);
668
+ } else {
429
669
  parser->type = HTTP_REQUEST;
430
- goto start_req_method_assign;
670
+ parser->state = s_start_req;
671
+ goto reexecute_byte;
431
672
  }
673
+
432
674
  break;
433
675
  }
434
676
 
435
677
  case s_res_or_resp_H:
436
678
  if (ch == 'T') {
437
679
  parser->type = HTTP_RESPONSE;
438
- state = s_res_HT;
680
+ parser->state = s_res_HT;
439
681
  } else {
440
- if (ch != 'E') goto error;
682
+ if (ch != 'E') {
683
+ SET_ERRNO(HPE_INVALID_CONSTANT);
684
+ goto error;
685
+ }
686
+
441
687
  parser->type = HTTP_REQUEST;
442
688
  parser->method = HTTP_HEAD;
443
- index = 2;
444
- state = s_req_method;
689
+ parser->index = 2;
690
+ parser->state = s_req_method;
445
691
  }
446
692
  break;
447
693
 
448
694
  case s_start_res:
449
695
  {
450
696
  parser->flags = 0;
451
- parser->content_length = -1;
452
-
453
- CALLBACK2(message_begin);
697
+ parser->content_length = ULLONG_MAX;
454
698
 
455
699
  switch (ch) {
456
700
  case 'H':
457
- state = s_res_H;
701
+ parser->state = s_res_H;
458
702
  break;
459
703
 
460
704
  case CR:
@@ -462,105 +706,133 @@ size_t http_parser_execute (http_parser *parser,
462
706
  break;
463
707
 
464
708
  default:
709
+ SET_ERRNO(HPE_INVALID_CONSTANT);
465
710
  goto error;
466
711
  }
712
+
713
+ CALLBACK_NOTIFY(message_begin);
467
714
  break;
468
715
  }
469
716
 
470
717
  case s_res_H:
471
718
  STRICT_CHECK(ch != 'T');
472
- state = s_res_HT;
719
+ parser->state = s_res_HT;
473
720
  break;
474
721
 
475
722
  case s_res_HT:
476
723
  STRICT_CHECK(ch != 'T');
477
- state = s_res_HTT;
724
+ parser->state = s_res_HTT;
478
725
  break;
479
726
 
480
727
  case s_res_HTT:
481
728
  STRICT_CHECK(ch != 'P');
482
- state = s_res_HTTP;
729
+ parser->state = s_res_HTTP;
483
730
  break;
484
731
 
485
732
  case s_res_HTTP:
486
733
  STRICT_CHECK(ch != '/');
487
- state = s_res_first_http_major;
734
+ parser->state = s_res_first_http_major;
488
735
  break;
489
736
 
490
737
  case s_res_first_http_major:
491
- if (ch < '1' || ch > '9') goto error;
738
+ if (ch < '0' || ch > '9') {
739
+ SET_ERRNO(HPE_INVALID_VERSION);
740
+ goto error;
741
+ }
742
+
492
743
  parser->http_major = ch - '0';
493
- state = s_res_http_major;
744
+ parser->state = s_res_http_major;
494
745
  break;
495
746
 
496
747
  /* major HTTP version or dot */
497
748
  case s_res_http_major:
498
749
  {
499
750
  if (ch == '.') {
500
- state = s_res_first_http_minor;
751
+ parser->state = s_res_first_http_minor;
501
752
  break;
502
753
  }
503
754
 
504
- if (ch < '0' || ch > '9') goto error;
755
+ if (!IS_NUM(ch)) {
756
+ SET_ERRNO(HPE_INVALID_VERSION);
757
+ goto error;
758
+ }
505
759
 
506
760
  parser->http_major *= 10;
507
761
  parser->http_major += ch - '0';
508
762
 
509
- if (parser->http_major > 999) goto error;
763
+ if (parser->http_major > 999) {
764
+ SET_ERRNO(HPE_INVALID_VERSION);
765
+ goto error;
766
+ }
767
+
510
768
  break;
511
769
  }
512
770
 
513
771
  /* first digit of minor HTTP version */
514
772
  case s_res_first_http_minor:
515
- if (ch < '0' || ch > '9') goto error;
773
+ if (!IS_NUM(ch)) {
774
+ SET_ERRNO(HPE_INVALID_VERSION);
775
+ goto error;
776
+ }
777
+
516
778
  parser->http_minor = ch - '0';
517
- state = s_res_http_minor;
779
+ parser->state = s_res_http_minor;
518
780
  break;
519
781
 
520
782
  /* minor HTTP version or end of request line */
521
783
  case s_res_http_minor:
522
784
  {
523
785
  if (ch == ' ') {
524
- state = s_res_first_status_code;
786
+ parser->state = s_res_first_status_code;
525
787
  break;
526
788
  }
527
789
 
528
- if (ch < '0' || ch > '9') goto error;
790
+ if (!IS_NUM(ch)) {
791
+ SET_ERRNO(HPE_INVALID_VERSION);
792
+ goto error;
793
+ }
529
794
 
530
795
  parser->http_minor *= 10;
531
796
  parser->http_minor += ch - '0';
532
797
 
533
- if (parser->http_minor > 999) goto error;
798
+ if (parser->http_minor > 999) {
799
+ SET_ERRNO(HPE_INVALID_VERSION);
800
+ goto error;
801
+ }
802
+
534
803
  break;
535
804
  }
536
805
 
537
806
  case s_res_first_status_code:
538
807
  {
539
- if (ch < '0' || ch > '9') {
808
+ if (!IS_NUM(ch)) {
540
809
  if (ch == ' ') {
541
810
  break;
542
811
  }
812
+
813
+ SET_ERRNO(HPE_INVALID_STATUS);
543
814
  goto error;
544
815
  }
545
816
  parser->status_code = ch - '0';
546
- state = s_res_status_code;
817
+ parser->state = s_res_status_code;
547
818
  break;
548
819
  }
549
820
 
550
821
  case s_res_status_code:
551
822
  {
552
- if (ch < '0' || ch > '9') {
823
+ if (!IS_NUM(ch)) {
553
824
  switch (ch) {
554
825
  case ' ':
555
- state = s_res_status;
826
+ parser->state = s_res_status;
556
827
  break;
557
828
  case CR:
558
- state = s_res_line_almost_done;
829
+ parser->state = s_res_line_almost_done;
559
830
  break;
560
831
  case LF:
561
- state = s_header_field_start;
832
+ parser->state = s_header_field_start;
562
833
  break;
563
834
  default:
835
+ SET_ERRNO(HPE_INVALID_STATUS);
564
836
  goto error;
565
837
  }
566
838
  break;
@@ -569,7 +841,11 @@ size_t http_parser_execute (http_parser *parser,
569
841
  parser->status_code *= 10;
570
842
  parser->status_code += ch - '0';
571
843
 
572
- if (parser->status_code > 999) goto error;
844
+ if (parser->status_code > 999) {
845
+ SET_ERRNO(HPE_INVALID_STATUS);
846
+ goto error;
847
+ }
848
+
573
849
  break;
574
850
  }
575
851
 
@@ -577,19 +853,20 @@ size_t http_parser_execute (http_parser *parser,
577
853
  /* the human readable status. e.g. "NOT FOUND"
578
854
  * we are not humans so just ignore this */
579
855
  if (ch == CR) {
580
- state = s_res_line_almost_done;
856
+ parser->state = s_res_line_almost_done;
581
857
  break;
582
858
  }
583
859
 
584
860
  if (ch == LF) {
585
- state = s_header_field_start;
861
+ parser->state = s_header_field_start;
586
862
  break;
587
863
  }
588
864
  break;
589
865
 
590
866
  case s_res_line_almost_done:
591
867
  STRICT_CHECK(ch != LF);
592
- state = s_header_field_start;
868
+ parser->state = s_header_field_start;
869
+ CALLBACK_NOTIFY(status_complete);
593
870
  break;
594
871
 
595
872
  case s_start_req:
@@ -597,15 +874,15 @@ size_t http_parser_execute (http_parser *parser,
597
874
  if (ch == CR || ch == LF)
598
875
  break;
599
876
  parser->flags = 0;
600
- parser->content_length = -1;
601
-
602
- CALLBACK2(message_begin);
877
+ parser->content_length = ULLONG_MAX;
603
878
 
604
- if (ch < 'A' || 'Z' < ch) goto error;
879
+ if (!IS_ALPHA(ch)) {
880
+ SET_ERRNO(HPE_INVALID_METHOD);
881
+ goto error;
882
+ }
605
883
 
606
- start_req_method_assign:
607
884
  parser->method = (enum http_method) 0;
608
- index = 1;
885
+ parser->index = 1;
609
886
  switch (ch) {
610
887
  case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
611
888
  case 'D': parser->method = HTTP_DELETE; break;
@@ -615,483 +892,330 @@ size_t http_parser_execute (http_parser *parser,
615
892
  case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
616
893
  case 'N': parser->method = HTTP_NOTIFY; break;
617
894
  case 'O': parser->method = HTTP_OPTIONS; break;
618
- case 'P': parser->method = HTTP_POST; /* or PROPFIND or PROPPATCH or PUT */ break;
895
+ case 'P': parser->method = HTTP_POST;
896
+ /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
897
+ break;
619
898
  case 'R': parser->method = HTTP_REPORT; break;
620
- case 'S': parser->method = HTTP_SUBSCRIBE; break;
899
+ case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break;
621
900
  case 'T': parser->method = HTTP_TRACE; break;
622
901
  case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
623
- default: goto error;
902
+ default:
903
+ SET_ERRNO(HPE_INVALID_METHOD);
904
+ goto error;
624
905
  }
625
- state = s_req_method;
906
+ parser->state = s_req_method;
907
+
908
+ CALLBACK_NOTIFY(message_begin);
909
+
626
910
  break;
627
911
  }
628
912
 
629
913
  case s_req_method:
630
914
  {
631
- if (ch == '\0')
915
+ const char *matcher;
916
+ if (ch == '\0') {
917
+ SET_ERRNO(HPE_INVALID_METHOD);
632
918
  goto error;
919
+ }
633
920
 
634
- const char *matcher = method_strings[parser->method];
635
- if (ch == ' ' && matcher[index] == '\0') {
636
- state = s_req_spaces_before_url;
637
- } else if (ch == matcher[index]) {
921
+ matcher = method_strings[parser->method];
922
+ if (ch == ' ' && matcher[parser->index] == '\0') {
923
+ parser->state = s_req_spaces_before_url;
924
+ } else if (ch == matcher[parser->index]) {
638
925
  ; /* nada */
639
926
  } else if (parser->method == HTTP_CONNECT) {
640
- if (index == 1 && ch == 'H') {
927
+ if (parser->index == 1 && ch == 'H') {
641
928
  parser->method = HTTP_CHECKOUT;
642
- } else if (index == 2 && ch == 'P') {
929
+ } else if (parser->index == 2 && ch == 'P') {
643
930
  parser->method = HTTP_COPY;
931
+ } else {
932
+ goto error;
644
933
  }
645
934
  } else if (parser->method == HTTP_MKCOL) {
646
- if (index == 1 && ch == 'O') {
935
+ if (parser->index == 1 && ch == 'O') {
647
936
  parser->method = HTTP_MOVE;
648
- } else if (index == 1 && ch == 'E') {
937
+ } else if (parser->index == 1 && ch == 'E') {
649
938
  parser->method = HTTP_MERGE;
650
- } else if (index == 1 && ch == '-') {
939
+ } else if (parser->index == 1 && ch == '-') {
651
940
  parser->method = HTTP_MSEARCH;
652
- } else if (index == 2 && ch == 'A') {
941
+ } else if (parser->index == 2 && ch == 'A') {
653
942
  parser->method = HTTP_MKACTIVITY;
943
+ } else {
944
+ goto error;
945
+ }
946
+ } else if (parser->method == HTTP_SUBSCRIBE) {
947
+ if (parser->index == 1 && ch == 'E') {
948
+ parser->method = HTTP_SEARCH;
949
+ } else {
950
+ goto error;
951
+ }
952
+ } else if (parser->index == 1 && parser->method == HTTP_POST) {
953
+ if (ch == 'R') {
954
+ parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
955
+ } else if (ch == 'U') {
956
+ parser->method = HTTP_PUT; /* or HTTP_PURGE */
957
+ } else if (ch == 'A') {
958
+ parser->method = HTTP_PATCH;
959
+ } else {
960
+ goto error;
654
961
  }
655
- } else if (index == 1 && parser->method == HTTP_POST && ch == 'R') {
656
- parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
657
- } else if (index == 1 && parser->method == HTTP_POST && ch == 'U') {
658
- parser->method = HTTP_PUT;
659
- } else if (index == 2 && parser->method == HTTP_UNLOCK && ch == 'S') {
660
- parser->method = HTTP_UNSUBSCRIBE;
661
- } else if (index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
962
+ } else if (parser->index == 2) {
963
+ if (parser->method == HTTP_PUT) {
964
+ if (ch == 'R') parser->method = HTTP_PURGE;
965
+ } else if (parser->method == HTTP_UNLOCK) {
966
+ if (ch == 'S') parser->method = HTTP_UNSUBSCRIBE;
967
+ }
968
+ } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
662
969
  parser->method = HTTP_PROPPATCH;
663
970
  } else {
971
+ SET_ERRNO(HPE_INVALID_METHOD);
664
972
  goto error;
665
973
  }
666
974
 
667
- ++index;
975
+ ++parser->index;
668
976
  break;
669
977
  }
978
+
670
979
  case s_req_spaces_before_url:
671
980
  {
672
981
  if (ch == ' ') break;
673
982
 
674
- if (ch == '/' || ch == '*') {
675
- MARK(url);
676
- MARK(path);
677
- state = s_req_path;
678
- break;
983
+ MARK(url);
984
+ if (parser->method == HTTP_CONNECT) {
985
+ parser->state = s_req_server_start;
679
986
  }
680
987
 
681
- c = LOWER(ch);
682
-
683
- if (c >= 'a' && c <= 'z') {
684
- MARK(url);
685
- state = s_req_schema;
686
- break;
988
+ parser->state = parse_url_char((enum state)parser->state, ch);
989
+ if (parser->state == s_dead) {
990
+ SET_ERRNO(HPE_INVALID_URL);
991
+ goto error;
687
992
  }
688
993
 
689
- goto error;
994
+ break;
690
995
  }
691
996
 
692
997
  case s_req_schema:
693
- {
694
- c = LOWER(ch);
695
-
696
- if (c >= 'a' && c <= 'z') break;
697
-
698
- if (ch == ':') {
699
- state = s_req_schema_slash;
700
- break;
701
- } else if (ch == '.') {
702
- state = s_req_host;
703
- break;
704
- } else if ('0' <= ch && ch <= '9') {
705
- state = s_req_host;
706
- break;
707
- }
708
-
709
- goto error;
710
- }
711
-
712
998
  case s_req_schema_slash:
713
- STRICT_CHECK(ch != '/');
714
- state = s_req_schema_slash_slash;
715
- break;
716
-
717
999
  case s_req_schema_slash_slash:
718
- STRICT_CHECK(ch != '/');
719
- state = s_req_host;
720
- break;
721
-
722
- case s_req_host:
1000
+ case s_req_server_start:
723
1001
  {
724
- c = LOWER(ch);
725
- if (c >= 'a' && c <= 'z') break;
726
- if ((ch >= '0' && ch <= '9') || ch == '.' || ch == '-') break;
727
1002
  switch (ch) {
728
- case ':':
729
- state = s_req_port;
730
- break;
731
- case '/':
732
- MARK(path);
733
- state = s_req_path;
734
- break;
1003
+ /* No whitespace allowed here */
735
1004
  case ' ':
736
- /* The request line looks like:
737
- * "GET http://foo.bar.com HTTP/1.1"
738
- * That is, there is no path.
739
- */
740
- CALLBACK(url);
741
- state = s_req_http_start;
742
- break;
743
- case '?':
744
- state = s_req_query_string_start;
745
- break;
746
- default:
1005
+ case CR:
1006
+ case LF:
1007
+ SET_ERRNO(HPE_INVALID_URL);
747
1008
  goto error;
748
- }
749
- break;
750
- }
751
-
752
- case s_req_port:
753
- {
754
- if (ch >= '0' && ch <= '9') break;
755
- switch (ch) {
756
- case '/':
757
- MARK(path);
758
- state = s_req_path;
759
- break;
760
- case ' ':
761
- /* The request line looks like:
762
- * "GET http://foo.bar.com:1234 HTTP/1.1"
763
- * That is, there is no path.
764
- */
765
- CALLBACK(url);
766
- state = s_req_http_start;
767
- break;
768
- case '?':
769
- state = s_req_query_string_start;
770
- break;
771
1009
  default:
772
- goto error;
1010
+ parser->state = parse_url_char((enum state)parser->state, ch);
1011
+ if (parser->state == s_dead) {
1012
+ SET_ERRNO(HPE_INVALID_URL);
1013
+ goto error;
1014
+ }
773
1015
  }
1016
+
774
1017
  break;
775
1018
  }
776
1019
 
1020
+ case s_req_server:
1021
+ case s_req_server_with_at:
777
1022
  case s_req_path:
1023
+ case s_req_query_string_start:
1024
+ case s_req_query_string:
1025
+ case s_req_fragment_start:
1026
+ case s_req_fragment:
778
1027
  {
779
- if (normal_url_char[(unsigned char)ch]) break;
780
-
781
1028
  switch (ch) {
782
1029
  case ' ':
783
- CALLBACK(url);
784
- CALLBACK(path);
785
- state = s_req_http_start;
1030
+ parser->state = s_req_http_start;
1031
+ CALLBACK_DATA(url);
786
1032
  break;
787
1033
  case CR:
788
- CALLBACK(url);
789
- CALLBACK(path);
790
- parser->http_major = 0;
791
- parser->http_minor = 9;
792
- state = s_req_line_almost_done;
793
- break;
794
1034
  case LF:
795
- CALLBACK(url);
796
- CALLBACK(path);
797
1035
  parser->http_major = 0;
798
1036
  parser->http_minor = 9;
799
- state = s_header_field_start;
800
- break;
801
- case '?':
802
- CALLBACK(path);
803
- state = s_req_query_string_start;
804
- break;
805
- case '#':
806
- CALLBACK(path);
807
- state = s_req_fragment_start;
1037
+ parser->state = (ch == CR) ?
1038
+ s_req_line_almost_done :
1039
+ s_header_field_start;
1040
+ CALLBACK_DATA(url);
808
1041
  break;
809
1042
  default:
810
- goto error;
1043
+ parser->state = parse_url_char((enum state)parser->state, ch);
1044
+ if (parser->state == s_dead) {
1045
+ SET_ERRNO(HPE_INVALID_URL);
1046
+ goto error;
1047
+ }
811
1048
  }
812
1049
  break;
813
1050
  }
814
1051
 
815
- case s_req_query_string_start:
816
- {
817
- if (normal_url_char[(unsigned char)ch]) {
818
- MARK(query_string);
819
- state = s_req_query_string;
820
- break;
821
- }
822
-
1052
+ case s_req_http_start:
823
1053
  switch (ch) {
824
- case '?':
825
- break; /* XXX ignore extra '?' ... is this right? */
826
- case ' ':
827
- CALLBACK(url);
828
- state = s_req_http_start;
829
- break;
830
- case CR:
831
- CALLBACK(url);
832
- parser->http_major = 0;
833
- parser->http_minor = 9;
834
- state = s_req_line_almost_done;
835
- break;
836
- case LF:
837
- CALLBACK(url);
838
- parser->http_major = 0;
839
- parser->http_minor = 9;
840
- state = s_header_field_start;
1054
+ case 'H':
1055
+ parser->state = s_req_http_H;
841
1056
  break;
842
- case '#':
843
- state = s_req_fragment_start;
1057
+ case ' ':
844
1058
  break;
845
1059
  default:
1060
+ SET_ERRNO(HPE_INVALID_CONSTANT);
846
1061
  goto error;
847
1062
  }
848
1063
  break;
849
- }
850
1064
 
851
- case s_req_query_string:
852
- {
853
- if (normal_url_char[(unsigned char)ch]) break;
854
-
855
- switch (ch) {
856
- case '?':
857
- /* allow extra '?' in query string */
858
- break;
859
- case ' ':
860
- CALLBACK(url);
861
- CALLBACK(query_string);
862
- state = s_req_http_start;
863
- break;
864
- case CR:
865
- CALLBACK(url);
866
- CALLBACK(query_string);
867
- parser->http_major = 0;
868
- parser->http_minor = 9;
869
- state = s_req_line_almost_done;
870
- break;
871
- case LF:
872
- CALLBACK(url);
873
- CALLBACK(query_string);
874
- parser->http_major = 0;
875
- parser->http_minor = 9;
876
- state = s_header_field_start;
877
- break;
878
- case '#':
879
- CALLBACK(query_string);
880
- state = s_req_fragment_start;
881
- break;
882
- default:
883
- goto error;
884
- }
885
- break;
886
- }
887
-
888
- case s_req_fragment_start:
889
- {
890
- if (normal_url_char[(unsigned char)ch]) {
891
- MARK(fragment);
892
- state = s_req_fragment;
893
- break;
894
- }
895
-
896
- switch (ch) {
897
- case ' ':
898
- CALLBACK(url);
899
- state = s_req_http_start;
900
- break;
901
- case CR:
902
- CALLBACK(url);
903
- parser->http_major = 0;
904
- parser->http_minor = 9;
905
- state = s_req_line_almost_done;
906
- break;
907
- case LF:
908
- CALLBACK(url);
909
- parser->http_major = 0;
910
- parser->http_minor = 9;
911
- state = s_header_field_start;
912
- break;
913
- case '?':
914
- MARK(fragment);
915
- state = s_req_fragment;
916
- break;
917
- case '#':
918
- break;
919
- default:
920
- goto error;
921
- }
922
- break;
923
- }
924
-
925
- case s_req_fragment:
926
- {
927
- if (normal_url_char[(unsigned char)ch]) break;
928
-
929
- switch (ch) {
930
- case ' ':
931
- CALLBACK(url);
932
- CALLBACK(fragment);
933
- state = s_req_http_start;
934
- break;
935
- case CR:
936
- CALLBACK(url);
937
- CALLBACK(fragment);
938
- parser->http_major = 0;
939
- parser->http_minor = 9;
940
- state = s_req_line_almost_done;
941
- break;
942
- case LF:
943
- CALLBACK(url);
944
- CALLBACK(fragment);
945
- parser->http_major = 0;
946
- parser->http_minor = 9;
947
- state = s_header_field_start;
948
- break;
949
- case '?':
950
- case '#':
951
- break;
952
- default:
953
- goto error;
954
- }
955
- break;
956
- }
957
-
958
- case s_req_http_start:
959
- switch (ch) {
960
- case 'H':
961
- state = s_req_http_H;
962
- break;
963
- case ' ':
964
- break;
965
- default:
966
- goto error;
967
- }
968
- break;
969
-
970
- case s_req_http_H:
971
- STRICT_CHECK(ch != 'T');
972
- state = s_req_http_HT;
973
- break;
1065
+ case s_req_http_H:
1066
+ STRICT_CHECK(ch != 'T');
1067
+ parser->state = s_req_http_HT;
1068
+ break;
974
1069
 
975
1070
  case s_req_http_HT:
976
1071
  STRICT_CHECK(ch != 'T');
977
- state = s_req_http_HTT;
1072
+ parser->state = s_req_http_HTT;
978
1073
  break;
979
1074
 
980
1075
  case s_req_http_HTT:
981
1076
  STRICT_CHECK(ch != 'P');
982
- state = s_req_http_HTTP;
1077
+ parser->state = s_req_http_HTTP;
983
1078
  break;
984
1079
 
985
1080
  case s_req_http_HTTP:
986
1081
  STRICT_CHECK(ch != '/');
987
- state = s_req_first_http_major;
1082
+ parser->state = s_req_first_http_major;
988
1083
  break;
989
1084
 
990
1085
  /* first digit of major HTTP version */
991
1086
  case s_req_first_http_major:
992
- if (ch < '1' || ch > '9') goto error;
1087
+ if (ch < '1' || ch > '9') {
1088
+ SET_ERRNO(HPE_INVALID_VERSION);
1089
+ goto error;
1090
+ }
1091
+
993
1092
  parser->http_major = ch - '0';
994
- state = s_req_http_major;
1093
+ parser->state = s_req_http_major;
995
1094
  break;
996
1095
 
997
1096
  /* major HTTP version or dot */
998
1097
  case s_req_http_major:
999
1098
  {
1000
1099
  if (ch == '.') {
1001
- state = s_req_first_http_minor;
1100
+ parser->state = s_req_first_http_minor;
1002
1101
  break;
1003
1102
  }
1004
1103
 
1005
- if (ch < '0' || ch > '9') goto error;
1104
+ if (!IS_NUM(ch)) {
1105
+ SET_ERRNO(HPE_INVALID_VERSION);
1106
+ goto error;
1107
+ }
1006
1108
 
1007
1109
  parser->http_major *= 10;
1008
1110
  parser->http_major += ch - '0';
1009
1111
 
1010
- if (parser->http_major > 999) goto error;
1112
+ if (parser->http_major > 999) {
1113
+ SET_ERRNO(HPE_INVALID_VERSION);
1114
+ goto error;
1115
+ }
1116
+
1011
1117
  break;
1012
1118
  }
1013
1119
 
1014
1120
  /* first digit of minor HTTP version */
1015
1121
  case s_req_first_http_minor:
1016
- if (ch < '0' || ch > '9') goto error;
1122
+ if (!IS_NUM(ch)) {
1123
+ SET_ERRNO(HPE_INVALID_VERSION);
1124
+ goto error;
1125
+ }
1126
+
1017
1127
  parser->http_minor = ch - '0';
1018
- state = s_req_http_minor;
1128
+ parser->state = s_req_http_minor;
1019
1129
  break;
1020
1130
 
1021
1131
  /* minor HTTP version or end of request line */
1022
1132
  case s_req_http_minor:
1023
1133
  {
1024
1134
  if (ch == CR) {
1025
- state = s_req_line_almost_done;
1135
+ parser->state = s_req_line_almost_done;
1026
1136
  break;
1027
1137
  }
1028
1138
 
1029
1139
  if (ch == LF) {
1030
- state = s_header_field_start;
1140
+ parser->state = s_header_field_start;
1031
1141
  break;
1032
1142
  }
1033
1143
 
1034
1144
  /* XXX allow spaces after digit? */
1035
1145
 
1036
- if (ch < '0' || ch > '9') goto error;
1146
+ if (!IS_NUM(ch)) {
1147
+ SET_ERRNO(HPE_INVALID_VERSION);
1148
+ goto error;
1149
+ }
1037
1150
 
1038
1151
  parser->http_minor *= 10;
1039
1152
  parser->http_minor += ch - '0';
1040
1153
 
1041
- if (parser->http_minor > 999) goto error;
1154
+ if (parser->http_minor > 999) {
1155
+ SET_ERRNO(HPE_INVALID_VERSION);
1156
+ goto error;
1157
+ }
1158
+
1042
1159
  break;
1043
1160
  }
1044
1161
 
1045
1162
  /* end of request line */
1046
1163
  case s_req_line_almost_done:
1047
1164
  {
1048
- if (ch != LF) goto error;
1049
- state = s_header_field_start;
1165
+ if (ch != LF) {
1166
+ SET_ERRNO(HPE_LF_EXPECTED);
1167
+ goto error;
1168
+ }
1169
+
1170
+ parser->state = s_header_field_start;
1050
1171
  break;
1051
1172
  }
1052
1173
 
1053
1174
  case s_header_field_start:
1054
1175
  {
1055
1176
  if (ch == CR) {
1056
- state = s_headers_almost_done;
1177
+ parser->state = s_headers_almost_done;
1057
1178
  break;
1058
1179
  }
1059
1180
 
1060
1181
  if (ch == LF) {
1061
1182
  /* they might be just sending \n instead of \r\n so this would be
1062
1183
  * the second \n to denote the end of headers*/
1063
- state = s_headers_almost_done;
1064
- goto headers_almost_done;
1184
+ parser->state = s_headers_almost_done;
1185
+ goto reexecute_byte;
1065
1186
  }
1066
1187
 
1067
1188
  c = TOKEN(ch);
1068
1189
 
1069
- if (!c) goto error;
1190
+ if (!c) {
1191
+ SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1192
+ goto error;
1193
+ }
1070
1194
 
1071
1195
  MARK(header_field);
1072
1196
 
1073
- index = 0;
1074
- state = s_header_field;
1197
+ parser->index = 0;
1198
+ parser->state = s_header_field;
1075
1199
 
1076
1200
  switch (c) {
1077
1201
  case 'c':
1078
- header_state = h_C;
1202
+ parser->header_state = h_C;
1079
1203
  break;
1080
1204
 
1081
1205
  case 'p':
1082
- header_state = h_matching_proxy_connection;
1206
+ parser->header_state = h_matching_proxy_connection;
1083
1207
  break;
1084
1208
 
1085
1209
  case 't':
1086
- header_state = h_matching_transfer_encoding;
1210
+ parser->header_state = h_matching_transfer_encoding;
1087
1211
  break;
1088
1212
 
1089
1213
  case 'u':
1090
- header_state = h_matching_upgrade;
1214
+ parser->header_state = h_matching_upgrade;
1091
1215
  break;
1092
1216
 
1093
1217
  default:
1094
- header_state = h_general;
1218
+ parser->header_state = h_general;
1095
1219
  break;
1096
1220
  }
1097
1221
  break;
@@ -1102,31 +1226,31 @@ size_t http_parser_execute (http_parser *parser,
1102
1226
  c = TOKEN(ch);
1103
1227
 
1104
1228
  if (c) {
1105
- switch (header_state) {
1229
+ switch (parser->header_state) {
1106
1230
  case h_general:
1107
1231
  break;
1108
1232
 
1109
1233
  case h_C:
1110
- index++;
1111
- header_state = (c == 'o' ? h_CO : h_general);
1234
+ parser->index++;
1235
+ parser->header_state = (c == 'o' ? h_CO : h_general);
1112
1236
  break;
1113
1237
 
1114
1238
  case h_CO:
1115
- index++;
1116
- header_state = (c == 'n' ? h_CON : h_general);
1239
+ parser->index++;
1240
+ parser->header_state = (c == 'n' ? h_CON : h_general);
1117
1241
  break;
1118
1242
 
1119
1243
  case h_CON:
1120
- index++;
1244
+ parser->index++;
1121
1245
  switch (c) {
1122
1246
  case 'n':
1123
- header_state = h_matching_connection;
1247
+ parser->header_state = h_matching_connection;
1124
1248
  break;
1125
1249
  case 't':
1126
- header_state = h_matching_content_length;
1250
+ parser->header_state = h_matching_content_length;
1127
1251
  break;
1128
1252
  default:
1129
- header_state = h_general;
1253
+ parser->header_state = h_general;
1130
1254
  break;
1131
1255
  }
1132
1256
  break;
@@ -1134,60 +1258,60 @@ size_t http_parser_execute (http_parser *parser,
1134
1258
  /* connection */
1135
1259
 
1136
1260
  case h_matching_connection:
1137
- index++;
1138
- if (index > sizeof(CONNECTION)-1
1139
- || c != CONNECTION[index]) {
1140
- header_state = h_general;
1141
- } else if (index == sizeof(CONNECTION)-2) {
1142
- header_state = h_connection;
1261
+ parser->index++;
1262
+ if (parser->index > sizeof(CONNECTION)-1
1263
+ || c != CONNECTION[parser->index]) {
1264
+ parser->header_state = h_general;
1265
+ } else if (parser->index == sizeof(CONNECTION)-2) {
1266
+ parser->header_state = h_connection;
1143
1267
  }
1144
1268
  break;
1145
1269
 
1146
1270
  /* proxy-connection */
1147
1271
 
1148
1272
  case h_matching_proxy_connection:
1149
- index++;
1150
- if (index > sizeof(PROXY_CONNECTION)-1
1151
- || c != PROXY_CONNECTION[index]) {
1152
- header_state = h_general;
1153
- } else if (index == sizeof(PROXY_CONNECTION)-2) {
1154
- header_state = h_connection;
1273
+ parser->index++;
1274
+ if (parser->index > sizeof(PROXY_CONNECTION)-1
1275
+ || c != PROXY_CONNECTION[parser->index]) {
1276
+ parser->header_state = h_general;
1277
+ } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1278
+ parser->header_state = h_connection;
1155
1279
  }
1156
1280
  break;
1157
1281
 
1158
1282
  /* content-length */
1159
1283
 
1160
1284
  case h_matching_content_length:
1161
- index++;
1162
- if (index > sizeof(CONTENT_LENGTH)-1
1163
- || c != CONTENT_LENGTH[index]) {
1164
- header_state = h_general;
1165
- } else if (index == sizeof(CONTENT_LENGTH)-2) {
1166
- header_state = h_content_length;
1285
+ parser->index++;
1286
+ if (parser->index > sizeof(CONTENT_LENGTH)-1
1287
+ || c != CONTENT_LENGTH[parser->index]) {
1288
+ parser->header_state = h_general;
1289
+ } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1290
+ parser->header_state = h_content_length;
1167
1291
  }
1168
1292
  break;
1169
1293
 
1170
1294
  /* transfer-encoding */
1171
1295
 
1172
1296
  case h_matching_transfer_encoding:
1173
- index++;
1174
- if (index > sizeof(TRANSFER_ENCODING)-1
1175
- || c != TRANSFER_ENCODING[index]) {
1176
- header_state = h_general;
1177
- } else if (index == sizeof(TRANSFER_ENCODING)-2) {
1178
- header_state = h_transfer_encoding;
1297
+ parser->index++;
1298
+ if (parser->index > sizeof(TRANSFER_ENCODING)-1
1299
+ || c != TRANSFER_ENCODING[parser->index]) {
1300
+ parser->header_state = h_general;
1301
+ } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1302
+ parser->header_state = h_transfer_encoding;
1179
1303
  }
1180
1304
  break;
1181
1305
 
1182
1306
  /* upgrade */
1183
1307
 
1184
1308
  case h_matching_upgrade:
1185
- index++;
1186
- if (index > sizeof(UPGRADE)-1
1187
- || c != UPGRADE[index]) {
1188
- header_state = h_general;
1189
- } else if (index == sizeof(UPGRADE)-2) {
1190
- header_state = h_upgrade;
1309
+ parser->index++;
1310
+ if (parser->index > sizeof(UPGRADE)-1
1311
+ || c != UPGRADE[parser->index]) {
1312
+ parser->header_state = h_general;
1313
+ } else if (parser->index == sizeof(UPGRADE)-2) {
1314
+ parser->header_state = h_upgrade;
1191
1315
  }
1192
1316
  break;
1193
1317
 
@@ -1195,7 +1319,7 @@ size_t http_parser_execute (http_parser *parser,
1195
1319
  case h_content_length:
1196
1320
  case h_transfer_encoding:
1197
1321
  case h_upgrade:
1198
- if (ch != ' ') header_state = h_general;
1322
+ if (ch != ' ') parser->header_state = h_general;
1199
1323
  break;
1200
1324
 
1201
1325
  default:
@@ -1206,84 +1330,89 @@ size_t http_parser_execute (http_parser *parser,
1206
1330
  }
1207
1331
 
1208
1332
  if (ch == ':') {
1209
- CALLBACK(header_field);
1210
- state = s_header_value_start;
1333
+ parser->state = s_header_value_start;
1334
+ CALLBACK_DATA(header_field);
1211
1335
  break;
1212
1336
  }
1213
1337
 
1214
1338
  if (ch == CR) {
1215
- state = s_header_almost_done;
1216
- CALLBACK(header_field);
1339
+ parser->state = s_header_almost_done;
1340
+ CALLBACK_DATA(header_field);
1217
1341
  break;
1218
1342
  }
1219
1343
 
1220
1344
  if (ch == LF) {
1221
- CALLBACK(header_field);
1222
- state = s_header_field_start;
1345
+ parser->state = s_header_field_start;
1346
+ CALLBACK_DATA(header_field);
1223
1347
  break;
1224
1348
  }
1225
1349
 
1350
+ SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1226
1351
  goto error;
1227
1352
  }
1228
1353
 
1229
1354
  case s_header_value_start:
1230
1355
  {
1231
- if (ch == ' ') break;
1356
+ if (ch == ' ' || ch == '\t') break;
1232
1357
 
1233
1358
  MARK(header_value);
1234
1359
 
1235
- state = s_header_value;
1236
- index = 0;
1237
-
1238
- c = LOWER(ch);
1360
+ parser->state = s_header_value;
1361
+ parser->index = 0;
1239
1362
 
1240
1363
  if (ch == CR) {
1241
- CALLBACK(header_value);
1242
- header_state = h_general;
1243
- state = s_header_almost_done;
1364
+ parser->header_state = h_general;
1365
+ parser->state = s_header_almost_done;
1366
+ CALLBACK_DATA(header_value);
1244
1367
  break;
1245
1368
  }
1246
1369
 
1247
1370
  if (ch == LF) {
1248
- CALLBACK(header_value);
1249
- state = s_header_field_start;
1371
+ parser->state = s_header_field_start;
1372
+ CALLBACK_DATA(header_value);
1250
1373
  break;
1251
1374
  }
1252
1375
 
1253
- switch (header_state) {
1376
+ c = LOWER(ch);
1377
+
1378
+ switch (parser->header_state) {
1254
1379
  case h_upgrade:
1255
1380
  parser->flags |= F_UPGRADE;
1256
- header_state = h_general;
1381
+ parser->header_state = h_general;
1257
1382
  break;
1258
1383
 
1259
1384
  case h_transfer_encoding:
1260
1385
  /* looking for 'Transfer-Encoding: chunked' */
1261
1386
  if ('c' == c) {
1262
- header_state = h_matching_transfer_encoding_chunked;
1387
+ parser->header_state = h_matching_transfer_encoding_chunked;
1263
1388
  } else {
1264
- header_state = h_general;
1389
+ parser->header_state = h_general;
1265
1390
  }
1266
1391
  break;
1267
1392
 
1268
1393
  case h_content_length:
1269
- if (ch < '0' || ch > '9') goto error;
1394
+ if (!IS_NUM(ch)) {
1395
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1396
+ goto error;
1397
+ }
1398
+
1270
1399
  parser->content_length = ch - '0';
1271
1400
  break;
1272
1401
 
1273
1402
  case h_connection:
1274
1403
  /* looking for 'Connection: keep-alive' */
1275
1404
  if (c == 'k') {
1276
- header_state = h_matching_connection_keep_alive;
1405
+ parser->header_state = h_matching_connection_keep_alive;
1277
1406
  /* looking for 'Connection: close' */
1278
1407
  } else if (c == 'c') {
1279
- header_state = h_matching_connection_close;
1408
+ parser->header_state = h_matching_connection_close;
1280
1409
  } else {
1281
- header_state = h_general;
1410
+ parser->header_state = h_general;
1282
1411
  }
1283
1412
  break;
1284
1413
 
1285
1414
  default:
1286
- header_state = h_general;
1415
+ parser->header_state = h_general;
1287
1416
  break;
1288
1417
  }
1289
1418
  break;
@@ -1291,20 +1420,22 @@ size_t http_parser_execute (http_parser *parser,
1291
1420
 
1292
1421
  case s_header_value:
1293
1422
  {
1294
- c = LOWER(ch);
1295
1423
 
1296
1424
  if (ch == CR) {
1297
- CALLBACK(header_value);
1298
- state = s_header_almost_done;
1425
+ parser->state = s_header_almost_done;
1426
+ CALLBACK_DATA(header_value);
1299
1427
  break;
1300
1428
  }
1301
1429
 
1302
1430
  if (ch == LF) {
1303
- CALLBACK(header_value);
1304
- goto header_almost_done;
1431
+ parser->state = s_header_almost_done;
1432
+ CALLBACK_DATA_NOADVANCE(header_value);
1433
+ goto reexecute_byte;
1305
1434
  }
1306
1435
 
1307
- switch (header_state) {
1436
+ c = LOWER(ch);
1437
+
1438
+ switch (parser->header_state) {
1308
1439
  case h_general:
1309
1440
  break;
1310
1441
 
@@ -1314,66 +1445,83 @@ size_t http_parser_execute (http_parser *parser,
1314
1445
  break;
1315
1446
 
1316
1447
  case h_content_length:
1448
+ {
1449
+ uint64_t t;
1450
+
1317
1451
  if (ch == ' ') break;
1318
- if (ch < '0' || ch > '9') goto error;
1319
- parser->content_length *= 10;
1320
- parser->content_length += ch - '0';
1452
+
1453
+ if (!IS_NUM(ch)) {
1454
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1455
+ goto error;
1456
+ }
1457
+
1458
+ t = parser->content_length;
1459
+ t *= 10;
1460
+ t += ch - '0';
1461
+
1462
+ /* Overflow? */
1463
+ if (t < parser->content_length || t == ULLONG_MAX) {
1464
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1465
+ goto error;
1466
+ }
1467
+
1468
+ parser->content_length = t;
1321
1469
  break;
1470
+ }
1322
1471
 
1323
1472
  /* Transfer-Encoding: chunked */
1324
1473
  case h_matching_transfer_encoding_chunked:
1325
- index++;
1326
- if (index > sizeof(CHUNKED)-1
1327
- || c != CHUNKED[index]) {
1328
- header_state = h_general;
1329
- } else if (index == sizeof(CHUNKED)-2) {
1330
- header_state = h_transfer_encoding_chunked;
1474
+ parser->index++;
1475
+ if (parser->index > sizeof(CHUNKED)-1
1476
+ || c != CHUNKED[parser->index]) {
1477
+ parser->header_state = h_general;
1478
+ } else if (parser->index == sizeof(CHUNKED)-2) {
1479
+ parser->header_state = h_transfer_encoding_chunked;
1331
1480
  }
1332
1481
  break;
1333
1482
 
1334
1483
  /* looking for 'Connection: keep-alive' */
1335
1484
  case h_matching_connection_keep_alive:
1336
- index++;
1337
- if (index > sizeof(KEEP_ALIVE)-1
1338
- || c != KEEP_ALIVE[index]) {
1339
- header_state = h_general;
1340
- } else if (index == sizeof(KEEP_ALIVE)-2) {
1341
- header_state = h_connection_keep_alive;
1485
+ parser->index++;
1486
+ if (parser->index > sizeof(KEEP_ALIVE)-1
1487
+ || c != KEEP_ALIVE[parser->index]) {
1488
+ parser->header_state = h_general;
1489
+ } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1490
+ parser->header_state = h_connection_keep_alive;
1342
1491
  }
1343
1492
  break;
1344
1493
 
1345
1494
  /* looking for 'Connection: close' */
1346
1495
  case h_matching_connection_close:
1347
- index++;
1348
- if (index > sizeof(CLOSE)-1 || c != CLOSE[index]) {
1349
- header_state = h_general;
1350
- } else if (index == sizeof(CLOSE)-2) {
1351
- header_state = h_connection_close;
1496
+ parser->index++;
1497
+ if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1498
+ parser->header_state = h_general;
1499
+ } else if (parser->index == sizeof(CLOSE)-2) {
1500
+ parser->header_state = h_connection_close;
1352
1501
  }
1353
1502
  break;
1354
1503
 
1355
1504
  case h_transfer_encoding_chunked:
1356
1505
  case h_connection_keep_alive:
1357
1506
  case h_connection_close:
1358
- if (ch != ' ') header_state = h_general;
1507
+ if (ch != ' ') parser->header_state = h_general;
1359
1508
  break;
1360
1509
 
1361
1510
  default:
1362
- state = s_header_value;
1363
- header_state = h_general;
1511
+ parser->state = s_header_value;
1512
+ parser->header_state = h_general;
1364
1513
  break;
1365
1514
  }
1366
1515
  break;
1367
1516
  }
1368
1517
 
1369
1518
  case s_header_almost_done:
1370
- header_almost_done:
1371
1519
  {
1372
1520
  STRICT_CHECK(ch != LF);
1373
1521
 
1374
- state = s_header_field_start;
1522
+ parser->state = s_header_value_lws;
1375
1523
 
1376
- switch (header_state) {
1524
+ switch (parser->header_state) {
1377
1525
  case h_connection_keep_alive:
1378
1526
  parser->flags |= F_CONNECTION_KEEP_ALIVE;
1379
1527
  break;
@@ -1386,32 +1534,47 @@ size_t http_parser_execute (http_parser *parser,
1386
1534
  default:
1387
1535
  break;
1388
1536
  }
1537
+
1538
+ break;
1539
+ }
1540
+
1541
+ case s_header_value_lws:
1542
+ {
1543
+ if (ch == ' ' || ch == '\t')
1544
+ parser->state = s_header_value_start;
1545
+ else
1546
+ {
1547
+ parser->state = s_header_field_start;
1548
+ goto reexecute_byte;
1549
+ }
1389
1550
  break;
1390
1551
  }
1391
1552
 
1392
1553
  case s_headers_almost_done:
1393
- headers_almost_done:
1394
1554
  {
1395
1555
  STRICT_CHECK(ch != LF);
1396
1556
 
1397
1557
  if (parser->flags & F_TRAILING) {
1398
1558
  /* End of a chunked request */
1399
- CALLBACK2(message_complete);
1400
- state = NEW_MESSAGE();
1559
+ parser->state = NEW_MESSAGE();
1560
+ CALLBACK_NOTIFY(message_complete);
1401
1561
  break;
1402
1562
  }
1403
1563
 
1404
- nread = 0;
1564
+ parser->state = s_headers_done;
1405
1565
 
1406
- if (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT) {
1407
- parser->upgrade = 1;
1408
- }
1566
+ /* Set this here so that on_headers_complete() callbacks can see it */
1567
+ parser->upgrade =
1568
+ (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT);
1409
1569
 
1410
1570
  /* Here we call the headers_complete callback. This is somewhat
1411
1571
  * different than other callbacks because if the user returns 1, we
1412
1572
  * will interpret that as saying that this message has no body. This
1413
1573
  * is needed for the annoying case of recieving a response to a HEAD
1414
1574
  * request.
1575
+ *
1576
+ * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1577
+ * we have to simulate it by handling a change in errno below.
1415
1578
  */
1416
1579
  if (settings->on_headers_complete) {
1417
1580
  switch (settings->on_headers_complete(parser)) {
@@ -1423,39 +1586,54 @@ size_t http_parser_execute (http_parser *parser,
1423
1586
  break;
1424
1587
 
1425
1588
  default:
1426
- parser->state = state;
1589
+ SET_ERRNO(HPE_CB_headers_complete);
1427
1590
  return p - data; /* Error */
1428
1591
  }
1429
1592
  }
1430
1593
 
1594
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1595
+ return p - data;
1596
+ }
1597
+
1598
+ goto reexecute_byte;
1599
+ }
1600
+
1601
+ case s_headers_done:
1602
+ {
1603
+ STRICT_CHECK(ch != LF);
1604
+
1605
+ parser->nread = 0;
1606
+
1431
1607
  /* Exit, the rest of the connect is in a different protocol. */
1432
1608
  if (parser->upgrade) {
1433
- CALLBACK2(message_complete);
1434
- return (p - data);
1609
+ parser->state = NEW_MESSAGE();
1610
+ CALLBACK_NOTIFY(message_complete);
1611
+ return (p - data) + 1;
1435
1612
  }
1436
1613
 
1437
1614
  if (parser->flags & F_SKIPBODY) {
1438
- CALLBACK2(message_complete);
1439
- state = NEW_MESSAGE();
1615
+ parser->state = NEW_MESSAGE();
1616
+ CALLBACK_NOTIFY(message_complete);
1440
1617
  } else if (parser->flags & F_CHUNKED) {
1441
1618
  /* chunked encoding - ignore Content-Length header */
1442
- state = s_chunk_size_start;
1619
+ parser->state = s_chunk_size_start;
1443
1620
  } else {
1444
1621
  if (parser->content_length == 0) {
1445
1622
  /* Content-Length header given but zero: Content-Length: 0\r\n */
1446
- CALLBACK2(message_complete);
1447
- state = NEW_MESSAGE();
1448
- } else if (parser->content_length > 0) {
1623
+ parser->state = NEW_MESSAGE();
1624
+ CALLBACK_NOTIFY(message_complete);
1625
+ } else if (parser->content_length != ULLONG_MAX) {
1449
1626
  /* Content-Length header given and non-zero */
1450
- state = s_body_identity;
1627
+ parser->state = s_body_identity;
1451
1628
  } else {
1452
- if (parser->type == HTTP_REQUEST || http_should_keep_alive(parser)) {
1629
+ if (parser->type == HTTP_REQUEST ||
1630
+ !http_message_needs_eof(parser)) {
1453
1631
  /* Assume content-length 0 - read the next */
1454
- CALLBACK2(message_complete);
1455
- state = NEW_MESSAGE();
1632
+ parser->state = NEW_MESSAGE();
1633
+ CALLBACK_NOTIFY(message_complete);
1456
1634
  } else {
1457
1635
  /* Read body until EOF */
1458
- state = s_body_identity_eof;
1636
+ parser->state = s_body_identity_eof;
1459
1637
  }
1460
1638
  }
1461
1639
  }
@@ -1464,60 +1642,103 @@ size_t http_parser_execute (http_parser *parser,
1464
1642
  }
1465
1643
 
1466
1644
  case s_body_identity:
1467
- to_read = MIN(pe - p, (int64_t)parser->content_length);
1468
- if (to_read > 0) {
1469
- if (settings->on_body) settings->on_body(parser, p, to_read);
1470
- p += to_read - 1;
1471
- parser->content_length -= to_read;
1472
- if (parser->content_length == 0) {
1473
- CALLBACK2(message_complete);
1474
- state = NEW_MESSAGE();
1475
- }
1645
+ {
1646
+ uint64_t to_read = MIN(parser->content_length,
1647
+ (uint64_t) ((data + len) - p));
1648
+
1649
+ assert(parser->content_length != 0
1650
+ && parser->content_length != ULLONG_MAX);
1651
+
1652
+ /* The difference between advancing content_length and p is because
1653
+ * the latter will automaticaly advance on the next loop iteration.
1654
+ * Further, if content_length ends up at 0, we want to see the last
1655
+ * byte again for our message complete callback.
1656
+ */
1657
+ MARK(body);
1658
+ parser->content_length -= to_read;
1659
+ p += to_read - 1;
1660
+
1661
+ if (parser->content_length == 0) {
1662
+ parser->state = s_message_done;
1663
+
1664
+ /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1665
+ *
1666
+ * The alternative to doing this is to wait for the next byte to
1667
+ * trigger the data callback, just as in every other case. The
1668
+ * problem with this is that this makes it difficult for the test
1669
+ * harness to distinguish between complete-on-EOF and
1670
+ * complete-on-length. It's not clear that this distinction is
1671
+ * important for applications, but let's keep it for now.
1672
+ */
1673
+ CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1674
+ goto reexecute_byte;
1476
1675
  }
1676
+
1477
1677
  break;
1678
+ }
1478
1679
 
1479
1680
  /* read until EOF */
1480
1681
  case s_body_identity_eof:
1481
- to_read = pe - p;
1482
- if (to_read > 0) {
1483
- if (settings->on_body) settings->on_body(parser, p, to_read);
1484
- p += to_read - 1;
1485
- }
1682
+ MARK(body);
1683
+ p = data + len - 1;
1684
+
1685
+ break;
1686
+
1687
+ case s_message_done:
1688
+ parser->state = NEW_MESSAGE();
1689
+ CALLBACK_NOTIFY(message_complete);
1486
1690
  break;
1487
1691
 
1488
1692
  case s_chunk_size_start:
1489
1693
  {
1490
- assert(nread == 1);
1694
+ assert(parser->nread == 1);
1491
1695
  assert(parser->flags & F_CHUNKED);
1492
1696
 
1493
- c = unhex[(unsigned char)ch];
1494
- if (c == -1) goto error;
1495
- parser->content_length = c;
1496
- state = s_chunk_size;
1697
+ unhex_val = unhex[(unsigned char)ch];
1698
+ if (unhex_val == -1) {
1699
+ SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1700
+ goto error;
1701
+ }
1702
+
1703
+ parser->content_length = unhex_val;
1704
+ parser->state = s_chunk_size;
1497
1705
  break;
1498
1706
  }
1499
1707
 
1500
1708
  case s_chunk_size:
1501
1709
  {
1710
+ uint64_t t;
1711
+
1502
1712
  assert(parser->flags & F_CHUNKED);
1503
1713
 
1504
1714
  if (ch == CR) {
1505
- state = s_chunk_size_almost_done;
1715
+ parser->state = s_chunk_size_almost_done;
1506
1716
  break;
1507
1717
  }
1508
1718
 
1509
- c = unhex[(unsigned char)ch];
1719
+ unhex_val = unhex[(unsigned char)ch];
1510
1720
 
1511
- if (c == -1) {
1721
+ if (unhex_val == -1) {
1512
1722
  if (ch == ';' || ch == ' ') {
1513
- state = s_chunk_parameters;
1723
+ parser->state = s_chunk_parameters;
1514
1724
  break;
1515
1725
  }
1726
+
1727
+ SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1516
1728
  goto error;
1517
1729
  }
1518
1730
 
1519
- parser->content_length *= 16;
1520
- parser->content_length += c;
1731
+ t = parser->content_length;
1732
+ t *= 16;
1733
+ t += unhex_val;
1734
+
1735
+ /* Overflow? */
1736
+ if (t < parser->content_length || t == ULLONG_MAX) {
1737
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1738
+ goto error;
1739
+ }
1740
+
1741
+ parser->content_length = t;
1521
1742
  break;
1522
1743
  }
1523
1744
 
@@ -1526,7 +1747,7 @@ size_t http_parser_execute (http_parser *parser,
1526
1747
  assert(parser->flags & F_CHUNKED);
1527
1748
  /* just ignore this shit. TODO check for overflow */
1528
1749
  if (ch == CR) {
1529
- state = s_chunk_size_almost_done;
1750
+ parser->state = s_chunk_size_almost_done;
1530
1751
  break;
1531
1752
  }
1532
1753
  break;
@@ -1537,108 +1758,418 @@ size_t http_parser_execute (http_parser *parser,
1537
1758
  assert(parser->flags & F_CHUNKED);
1538
1759
  STRICT_CHECK(ch != LF);
1539
1760
 
1540
- nread = 0;
1761
+ parser->nread = 0;
1541
1762
 
1542
1763
  if (parser->content_length == 0) {
1543
1764
  parser->flags |= F_TRAILING;
1544
- state = s_header_field_start;
1765
+ parser->state = s_header_field_start;
1545
1766
  } else {
1546
- state = s_chunk_data;
1767
+ parser->state = s_chunk_data;
1547
1768
  }
1548
1769
  break;
1549
1770
  }
1550
1771
 
1551
1772
  case s_chunk_data:
1552
1773
  {
1553
- assert(parser->flags & F_CHUNKED);
1774
+ uint64_t to_read = MIN(parser->content_length,
1775
+ (uint64_t) ((data + len) - p));
1554
1776
 
1555
- to_read = MIN(pe - p, (int64_t)(parser->content_length));
1777
+ assert(parser->flags & F_CHUNKED);
1778
+ assert(parser->content_length != 0
1779
+ && parser->content_length != ULLONG_MAX);
1556
1780
 
1557
- if (to_read > 0) {
1558
- if (settings->on_body) settings->on_body(parser, p, to_read);
1559
- p += to_read - 1;
1560
- }
1781
+ /* See the explanation in s_body_identity for why the content
1782
+ * length and data pointers are managed this way.
1783
+ */
1784
+ MARK(body);
1785
+ parser->content_length -= to_read;
1786
+ p += to_read - 1;
1561
1787
 
1562
- if (to_read == parser->content_length) {
1563
- state = s_chunk_data_almost_done;
1788
+ if (parser->content_length == 0) {
1789
+ parser->state = s_chunk_data_almost_done;
1564
1790
  }
1565
1791
 
1566
- parser->content_length -= to_read;
1567
1792
  break;
1568
1793
  }
1569
1794
 
1570
1795
  case s_chunk_data_almost_done:
1571
1796
  assert(parser->flags & F_CHUNKED);
1797
+ assert(parser->content_length == 0);
1572
1798
  STRICT_CHECK(ch != CR);
1573
- state = s_chunk_data_done;
1799
+ parser->state = s_chunk_data_done;
1800
+ CALLBACK_DATA(body);
1574
1801
  break;
1575
1802
 
1576
1803
  case s_chunk_data_done:
1577
1804
  assert(parser->flags & F_CHUNKED);
1578
1805
  STRICT_CHECK(ch != LF);
1579
- state = s_chunk_size_start;
1806
+ parser->nread = 0;
1807
+ parser->state = s_chunk_size_start;
1580
1808
  break;
1581
1809
 
1582
1810
  default:
1583
1811
  assert(0 && "unhandled state");
1812
+ SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
1584
1813
  goto error;
1585
1814
  }
1586
1815
  }
1587
1816
 
1588
- CALLBACK_NOCLEAR(header_field);
1589
- CALLBACK_NOCLEAR(header_value);
1590
- CALLBACK_NOCLEAR(fragment);
1591
- CALLBACK_NOCLEAR(query_string);
1592
- CALLBACK_NOCLEAR(path);
1593
- CALLBACK_NOCLEAR(url);
1817
+ /* Run callbacks for any marks that we have leftover after we ran our of
1818
+ * bytes. There should be at most one of these set, so it's OK to invoke
1819
+ * them in series (unset marks will not result in callbacks).
1820
+ *
1821
+ * We use the NOADVANCE() variety of callbacks here because 'p' has already
1822
+ * overflowed 'data' and this allows us to correct for the off-by-one that
1823
+ * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
1824
+ * value that's in-bounds).
1825
+ */
1826
+
1827
+ assert(((header_field_mark ? 1 : 0) +
1828
+ (header_value_mark ? 1 : 0) +
1829
+ (url_mark ? 1 : 0) +
1830
+ (body_mark ? 1 : 0)) <= 1);
1594
1831
 
1595
- parser->state = state;
1596
- parser->header_state = header_state;
1597
- parser->index = index;
1598
- parser->nread = nread;
1832
+ CALLBACK_DATA_NOADVANCE(header_field);
1833
+ CALLBACK_DATA_NOADVANCE(header_value);
1834
+ CALLBACK_DATA_NOADVANCE(url);
1835
+ CALLBACK_DATA_NOADVANCE(body);
1599
1836
 
1600
1837
  return len;
1601
1838
 
1602
1839
  error:
1603
- parser->state = s_dead;
1840
+ if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
1841
+ SET_ERRNO(HPE_UNKNOWN);
1842
+ }
1843
+
1604
1844
  return (p - data);
1605
1845
  }
1606
1846
 
1607
1847
 
1848
+ /* Does the parser need to see an EOF to find the end of the message? */
1608
1849
  int
1609
- http_should_keep_alive (http_parser *parser)
1850
+ http_message_needs_eof (const http_parser *parser)
1851
+ {
1852
+ if (parser->type == HTTP_REQUEST) {
1853
+ return 0;
1854
+ }
1855
+
1856
+ /* See RFC 2616 section 4.4 */
1857
+ if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
1858
+ parser->status_code == 204 || /* No Content */
1859
+ parser->status_code == 304 || /* Not Modified */
1860
+ parser->flags & F_SKIPBODY) { /* response to a HEAD request */
1861
+ return 0;
1862
+ }
1863
+
1864
+ if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
1865
+ return 0;
1866
+ }
1867
+
1868
+ return 1;
1869
+ }
1870
+
1871
+
1872
+ int
1873
+ http_should_keep_alive (const http_parser *parser)
1610
1874
  {
1611
1875
  if (parser->http_major > 0 && parser->http_minor > 0) {
1612
1876
  /* HTTP/1.1 */
1613
1877
  if (parser->flags & F_CONNECTION_CLOSE) {
1614
1878
  return 0;
1615
- } else {
1616
- return 1;
1617
1879
  }
1618
1880
  } else {
1619
1881
  /* HTTP/1.0 or earlier */
1620
- if (parser->flags & F_CONNECTION_KEEP_ALIVE) {
1621
- return 1;
1622
- } else {
1882
+ if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
1623
1883
  return 0;
1624
1884
  }
1625
1885
  }
1886
+
1887
+ return !http_message_needs_eof(parser);
1626
1888
  }
1627
1889
 
1628
1890
 
1629
- const char * http_method_str (enum http_method m)
1891
+ const char *
1892
+ http_method_str (enum http_method m)
1630
1893
  {
1631
- return method_strings[m];
1894
+ return ELEM_AT(method_strings, m, "<unknown>");
1632
1895
  }
1633
1896
 
1634
1897
 
1635
1898
  void
1636
1899
  http_parser_init (http_parser *parser, enum http_parser_type t)
1637
1900
  {
1901
+ void *data = parser->data; /* preserve application data */
1902
+ memset(parser, 0, sizeof(*parser));
1903
+ parser->data = data;
1638
1904
  parser->type = t;
1639
1905
  parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
1640
- parser->nread = 0;
1641
- parser->upgrade = 0;
1642
- parser->flags = 0;
1643
- parser->method = 0;
1906
+ parser->http_errno = HPE_OK;
1907
+ }
1908
+
1909
+ const char *
1910
+ http_errno_name(enum http_errno err) {
1911
+ assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1912
+ return http_strerror_tab[err].name;
1913
+ }
1914
+
1915
+ const char *
1916
+ http_errno_description(enum http_errno err) {
1917
+ assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1918
+ return http_strerror_tab[err].description;
1919
+ }
1920
+
1921
+ static enum http_host_state
1922
+ http_parse_host_char(enum http_host_state s, const char ch) {
1923
+ switch(s) {
1924
+ case s_http_userinfo:
1925
+ case s_http_userinfo_start:
1926
+ if (ch == '@') {
1927
+ return s_http_host_start;
1928
+ }
1929
+
1930
+ if (IS_USERINFO_CHAR(ch)) {
1931
+ return s_http_userinfo;
1932
+ }
1933
+ break;
1934
+
1935
+ case s_http_host_start:
1936
+ if (ch == '[') {
1937
+ return s_http_host_v6_start;
1938
+ }
1939
+
1940
+ if (IS_HOST_CHAR(ch)) {
1941
+ return s_http_host;
1942
+ }
1943
+
1944
+ break;
1945
+
1946
+ case s_http_host:
1947
+ if (IS_HOST_CHAR(ch)) {
1948
+ return s_http_host;
1949
+ }
1950
+
1951
+ /* FALLTHROUGH */
1952
+ case s_http_host_v6_end:
1953
+ if (ch == ':') {
1954
+ return s_http_host_port_start;
1955
+ }
1956
+
1957
+ break;
1958
+
1959
+ case s_http_host_v6:
1960
+ if (ch == ']') {
1961
+ return s_http_host_v6_end;
1962
+ }
1963
+
1964
+ /* FALLTHROUGH */
1965
+ case s_http_host_v6_start:
1966
+ if (IS_HEX(ch) || ch == ':' || ch == '.') {
1967
+ return s_http_host_v6;
1968
+ }
1969
+
1970
+ break;
1971
+
1972
+ case s_http_host_port:
1973
+ case s_http_host_port_start:
1974
+ if (IS_NUM(ch)) {
1975
+ return s_http_host_port;
1976
+ }
1977
+
1978
+ break;
1979
+
1980
+ default:
1981
+ break;
1982
+ }
1983
+ return s_http_host_dead;
1984
+ }
1985
+
1986
+ static int
1987
+ http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
1988
+ enum http_host_state s;
1989
+
1990
+ const char *p;
1991
+ size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
1992
+
1993
+ u->field_data[UF_HOST].len = 0;
1994
+
1995
+ s = found_at ? s_http_userinfo_start : s_http_host_start;
1996
+
1997
+ for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
1998
+ enum http_host_state new_s = http_parse_host_char(s, *p);
1999
+
2000
+ if (new_s == s_http_host_dead) {
2001
+ return 1;
2002
+ }
2003
+
2004
+ switch(new_s) {
2005
+ case s_http_host:
2006
+ if (s != s_http_host) {
2007
+ u->field_data[UF_HOST].off = p - buf;
2008
+ }
2009
+ u->field_data[UF_HOST].len++;
2010
+ break;
2011
+
2012
+ case s_http_host_v6:
2013
+ if (s != s_http_host_v6) {
2014
+ u->field_data[UF_HOST].off = p - buf;
2015
+ }
2016
+ u->field_data[UF_HOST].len++;
2017
+ break;
2018
+
2019
+ case s_http_host_port:
2020
+ if (s != s_http_host_port) {
2021
+ u->field_data[UF_PORT].off = p - buf;
2022
+ u->field_data[UF_PORT].len = 0;
2023
+ u->field_set |= (1 << UF_PORT);
2024
+ }
2025
+ u->field_data[UF_PORT].len++;
2026
+ break;
2027
+
2028
+ case s_http_userinfo:
2029
+ if (s != s_http_userinfo) {
2030
+ u->field_data[UF_USERINFO].off = p - buf ;
2031
+ u->field_data[UF_USERINFO].len = 0;
2032
+ u->field_set |= (1 << UF_USERINFO);
2033
+ }
2034
+ u->field_data[UF_USERINFO].len++;
2035
+ break;
2036
+
2037
+ default:
2038
+ break;
2039
+ }
2040
+ s = new_s;
2041
+ }
2042
+
2043
+ /* Make sure we don't end somewhere unexpected */
2044
+ switch (s) {
2045
+ case s_http_host_start:
2046
+ case s_http_host_v6_start:
2047
+ case s_http_host_v6:
2048
+ case s_http_host_port_start:
2049
+ case s_http_userinfo:
2050
+ case s_http_userinfo_start:
2051
+ return 1;
2052
+ default:
2053
+ break;
2054
+ }
2055
+
2056
+ return 0;
2057
+ }
2058
+
2059
+ int
2060
+ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2061
+ struct http_parser_url *u)
2062
+ {
2063
+ enum state s;
2064
+ const char *p;
2065
+ enum http_parser_url_fields uf, old_uf;
2066
+ int found_at = 0;
2067
+
2068
+ u->port = u->field_set = 0;
2069
+ s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2070
+ uf = old_uf = UF_MAX;
2071
+
2072
+ for (p = buf; p < buf + buflen; p++) {
2073
+ s = parse_url_char(s, *p);
2074
+
2075
+ /* Figure out the next field that we're operating on */
2076
+ switch (s) {
2077
+ case s_dead:
2078
+ return 1;
2079
+
2080
+ /* Skip delimeters */
2081
+ case s_req_schema_slash:
2082
+ case s_req_schema_slash_slash:
2083
+ case s_req_server_start:
2084
+ case s_req_query_string_start:
2085
+ case s_req_fragment_start:
2086
+ continue;
2087
+
2088
+ case s_req_schema:
2089
+ uf = UF_SCHEMA;
2090
+ break;
2091
+
2092
+ case s_req_server_with_at:
2093
+ found_at = 1;
2094
+
2095
+ /* FALLTROUGH */
2096
+ case s_req_server:
2097
+ uf = UF_HOST;
2098
+ break;
2099
+
2100
+ case s_req_path:
2101
+ uf = UF_PATH;
2102
+ break;
2103
+
2104
+ case s_req_query_string:
2105
+ uf = UF_QUERY;
2106
+ break;
2107
+
2108
+ case s_req_fragment:
2109
+ uf = UF_FRAGMENT;
2110
+ break;
2111
+
2112
+ default:
2113
+ assert(!"Unexpected state");
2114
+ return 1;
2115
+ }
2116
+
2117
+ /* Nothing's changed; soldier on */
2118
+ if (uf == old_uf) {
2119
+ u->field_data[uf].len++;
2120
+ continue;
2121
+ }
2122
+
2123
+ u->field_data[uf].off = p - buf;
2124
+ u->field_data[uf].len = 1;
2125
+
2126
+ u->field_set |= (1 << uf);
2127
+ old_uf = uf;
2128
+ }
2129
+
2130
+ /* host must be present if there is a schema */
2131
+ /* parsing http:///toto will fail */
2132
+ if ((u->field_set & ((1 << UF_SCHEMA) | (1 << UF_HOST))) != 0) {
2133
+ if (http_parse_host(buf, u, found_at) != 0) {
2134
+ return 1;
2135
+ }
2136
+ }
2137
+
2138
+ /* CONNECT requests can only contain "hostname:port" */
2139
+ if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2140
+ return 1;
2141
+ }
2142
+
2143
+ if (u->field_set & (1 << UF_PORT)) {
2144
+ /* Don't bother with endp; we've already validated the string */
2145
+ unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
2146
+
2147
+ /* Ports have a max value of 2^16 */
2148
+ if (v > 0xffff) {
2149
+ return 1;
2150
+ }
2151
+
2152
+ u->port = (uint16_t) v;
2153
+ }
2154
+
2155
+ return 0;
2156
+ }
2157
+
2158
+ void
2159
+ http_parser_pause(http_parser *parser, int paused) {
2160
+ /* Users should only be pausing/unpausing a parser that is not in an error
2161
+ * state. In non-debug builds, there's not much that we can do about this
2162
+ * other than ignore it.
2163
+ */
2164
+ if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2165
+ HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2166
+ SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2167
+ } else {
2168
+ assert(0 && "Attempting to pause parser in error state");
2169
+ }
2170
+ }
2171
+
2172
+ int
2173
+ http_body_is_final(const struct http_parser *parser) {
2174
+ return parser->state == s_message_done;
1644
2175
  }