http_parser.rb 0.5.3 → 0.6.0.beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitmodules +3 -3
- data/Gemfile +1 -1
- data/Gemfile.lock +9 -2
- data/README.md +50 -45
- data/bench/standalone.rb +23 -0
- data/bench/thin.rb +1 -0
- data/ext/ruby_http_parser/org/ruby_http_parser/RubyHttpParser.java +66 -58
- data/ext/ruby_http_parser/ruby_http_parser.c +10 -41
- data/ext/ruby_http_parser/vendor/http-parser-java/AUTHORS +32 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/LICENSE-MIT +5 -1
- data/ext/ruby_http_parser/vendor/http-parser-java/README.md +133 -1
- data/ext/ruby_http_parser/vendor/http-parser-java/TODO +6 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.c +1029 -615
- data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.gyp +79 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.h +177 -43
- data/ext/ruby_http_parser/vendor/http-parser-java/src/Http-parser.java.iml +22 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/FieldData.java +41 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPHeadersCompleteCallback.java +13 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPMethod.java +4 -1
- data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPParserUrl.java +76 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/ParserSettings.java +2 -2
- data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/Util.java +6 -6
- data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPHeadersCompleteCallback.java +12 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPParser.java +715 -637
- data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/ParserSettings.java +1 -1
- data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Message.java +71 -21
- data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/ParseUrl.java +51 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Requests.java +1 -1
- data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Responses.java +1 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Test.java +2 -1
- data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestHeaderOverflowError.java +1 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestLoaderNG.java +6 -17
- data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestNoOverflowLongBody.java +1 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/UnitTest.java +1 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Upgrade.java +1 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Url.java +127 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Util.java +80 -9
- data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/WrongContentLength.java +2 -1
- data/ext/ruby_http_parser/vendor/http-parser-java/test.c +1141 -210
- data/ext/ruby_http_parser/vendor/http-parser-java/tests.dumped +230 -71
- data/ext/ruby_http_parser/vendor/http-parser/AUTHORS +32 -0
- data/ext/ruby_http_parser/vendor/http-parser/LICENSE-MIT +5 -1
- data/ext/ruby_http_parser/vendor/http-parser/README.md +9 -2
- data/ext/ruby_http_parser/vendor/http-parser/http_parser.c +1029 -615
- data/ext/ruby_http_parser/vendor/http-parser/http_parser.gyp +79 -0
- data/ext/ruby_http_parser/vendor/http-parser/http_parser.h +145 -16
- data/ext/ruby_http_parser/vendor/http-parser/test.c +1065 -141
- data/http_parser.rb.gemspec +3 -1
- data/spec/parser_spec.rb +41 -17
- data/spec/support/requests.json +236 -24
- data/spec/support/responses.json +182 -36
- data/tasks/compile.rake +2 -2
- data/tasks/fixtures.rake +7 -1
- metadata +57 -19
- data/ext/ruby_http_parser/vendor/http-parser-java/compile +0 -1
- data/ext/ruby_http_parser/vendor/http-parser-java/test_permutations +0 -1
- data/ext/ruby_http_parser/vendor/http-parser-java/test_unit +0 -1
- data/ext/ruby_http_parser/vendor/http-parser-java/test_utf8 +0 -1
@@ -17,9 +17,6 @@ typedef struct ParserWrapper {
|
|
17
17
|
ryah_http_parser parser;
|
18
18
|
|
19
19
|
VALUE request_url;
|
20
|
-
VALUE request_path;
|
21
|
-
VALUE query_string;
|
22
|
-
VALUE fragment;
|
23
20
|
|
24
21
|
VALUE headers;
|
25
22
|
|
@@ -49,9 +46,6 @@ void ParserWrapper_init(ParserWrapper *wrapper) {
|
|
49
46
|
wrapper->parser.http_minor = 0;
|
50
47
|
|
51
48
|
wrapper->request_url = Qnil;
|
52
|
-
wrapper->request_path = Qnil;
|
53
|
-
wrapper->query_string = Qnil;
|
54
|
-
wrapper->fragment = Qnil;
|
55
49
|
|
56
50
|
wrapper->upgrade_data = Qnil;
|
57
51
|
|
@@ -66,9 +60,6 @@ void ParserWrapper_mark(void *data) {
|
|
66
60
|
if(data) {
|
67
61
|
ParserWrapper *wrapper = (ParserWrapper *) data;
|
68
62
|
rb_gc_mark_maybe(wrapper->request_url);
|
69
|
-
rb_gc_mark_maybe(wrapper->request_path);
|
70
|
-
rb_gc_mark_maybe(wrapper->query_string);
|
71
|
-
rb_gc_mark_maybe(wrapper->fragment);
|
72
63
|
rb_gc_mark_maybe(wrapper->upgrade_data);
|
73
64
|
rb_gc_mark_maybe(wrapper->headers);
|
74
65
|
rb_gc_mark_maybe(wrapper->on_message_begin);
|
@@ -111,9 +102,6 @@ int on_message_begin(ryah_http_parser *parser) {
|
|
111
102
|
GET_WRAPPER(wrapper, parser);
|
112
103
|
|
113
104
|
wrapper->request_url = rb_str_new2("");
|
114
|
-
wrapper->request_path = rb_str_new2("");
|
115
|
-
wrapper->query_string = rb_str_new2("");
|
116
|
-
wrapper->fragment = rb_str_new2("");
|
117
105
|
wrapper->headers = rb_hash_new();
|
118
106
|
wrapper->upgrade_data = rb_str_new2("");
|
119
107
|
|
@@ -139,24 +127,6 @@ int on_url(ryah_http_parser *parser, const char *at, size_t length) {
|
|
139
127
|
return 0;
|
140
128
|
}
|
141
129
|
|
142
|
-
int on_path(ryah_http_parser *parser, const char *at, size_t length) {
|
143
|
-
GET_WRAPPER(wrapper, parser);
|
144
|
-
rb_str_cat(wrapper->request_path, at, length);
|
145
|
-
return 0;
|
146
|
-
}
|
147
|
-
|
148
|
-
int on_query_string(ryah_http_parser *parser, const char *at, size_t length) {
|
149
|
-
GET_WRAPPER(wrapper, parser);
|
150
|
-
rb_str_cat(wrapper->query_string, at, length);
|
151
|
-
return 0;
|
152
|
-
}
|
153
|
-
|
154
|
-
int on_fragment(ryah_http_parser *parser, const char *at, size_t length) {
|
155
|
-
GET_WRAPPER(wrapper, parser);
|
156
|
-
rb_str_cat(wrapper->fragment, at, length);
|
157
|
-
return 0;
|
158
|
-
}
|
159
|
-
|
160
130
|
int on_header_field(ryah_http_parser *parser, const char *at, size_t length) {
|
161
131
|
GET_WRAPPER(wrapper, parser);
|
162
132
|
|
@@ -278,10 +248,7 @@ int on_message_complete(ryah_http_parser *parser) {
|
|
278
248
|
|
279
249
|
static ryah_http_parser_settings settings = {
|
280
250
|
.on_message_begin = on_message_begin,
|
281
|
-
.on_path = on_path,
|
282
|
-
.on_query_string = on_query_string,
|
283
251
|
.on_url = on_url,
|
284
|
-
.on_fragment = on_fragment,
|
285
252
|
.on_header_field = on_header_field,
|
286
253
|
.on_header_value = on_header_value,
|
287
254
|
.on_headers_complete = on_headers_complete,
|
@@ -318,6 +285,10 @@ VALUE ResponseParser_alloc(VALUE klass) {
|
|
318
285
|
return Parser_alloc_by_type(klass, HTTP_RESPONSE);
|
319
286
|
}
|
320
287
|
|
288
|
+
VALUE Parser_strict_p(VALUE klass) {
|
289
|
+
return HTTP_PARSER_STRICT == 1 ? Qtrue : Qfalse;
|
290
|
+
}
|
291
|
+
|
321
292
|
VALUE Parser_initialize(int argc, VALUE *argv, VALUE self) {
|
322
293
|
ParserWrapper *wrapper = NULL;
|
323
294
|
DATA_GET(self, ParserWrapper, wrapper);
|
@@ -349,11 +320,14 @@ VALUE Parser_execute(VALUE self, VALUE data) {
|
|
349
320
|
size_t nparsed = ryah_http_parser_execute(&wrapper->parser, &settings, ptr, len);
|
350
321
|
|
351
322
|
if (wrapper->parser.upgrade) {
|
352
|
-
|
323
|
+
if (RTEST(wrapper->stopped))
|
324
|
+
nparsed += 1;
|
325
|
+
|
326
|
+
rb_str_cat(wrapper->upgrade_data, ptr + nparsed, len - nparsed);
|
353
327
|
|
354
328
|
} else if (nparsed != (size_t)len) {
|
355
329
|
if (!RTEST(wrapper->stopped) && !RTEST(wrapper->completed))
|
356
|
-
rb_raise(eParserError, "Could not parse data entirely");
|
330
|
+
rb_raise(eParserError, "Could not parse data entirely (%zu != %zu)", nparsed, len);
|
357
331
|
else
|
358
332
|
nparsed += 1; // error states fail on the current character
|
359
333
|
}
|
@@ -465,9 +439,6 @@ VALUE Parser_status_code(VALUE self) {
|
|
465
439
|
}
|
466
440
|
|
467
441
|
DEFINE_GETTER(request_url);
|
468
|
-
DEFINE_GETTER(request_path);
|
469
|
-
DEFINE_GETTER(query_string);
|
470
|
-
DEFINE_GETTER(fragment);
|
471
442
|
DEFINE_GETTER(headers);
|
472
443
|
DEFINE_GETTER(upgrade_data);
|
473
444
|
DEFINE_GETTER(header_value_type);
|
@@ -515,6 +486,7 @@ void Init_ruby_http_parser() {
|
|
515
486
|
rb_define_alloc_func(cRequestParser, RequestParser_alloc);
|
516
487
|
rb_define_alloc_func(cResponseParser, ResponseParser_alloc);
|
517
488
|
|
489
|
+
rb_define_singleton_method(cParser, "strict?", Parser_strict_p, 0);
|
518
490
|
rb_define_method(cParser, "initialize", Parser_initialize, -1);
|
519
491
|
|
520
492
|
rb_define_method(cParser, "on_message_begin=", Parser_set_on_message_begin, 1);
|
@@ -534,9 +506,6 @@ void Init_ruby_http_parser() {
|
|
534
506
|
rb_define_method(cParser, "status_code", Parser_status_code, 0);
|
535
507
|
|
536
508
|
rb_define_method(cParser, "request_url", Parser_request_url, 0);
|
537
|
-
rb_define_method(cParser, "request_path", Parser_request_path, 0);
|
538
|
-
rb_define_method(cParser, "query_string", Parser_query_string, 0);
|
539
|
-
rb_define_method(cParser, "fragment", Parser_fragment, 0);
|
540
509
|
rb_define_method(cParser, "headers", Parser_headers, 0);
|
541
510
|
rb_define_method(cParser, "upgrade_data", Parser_upgrade_data, 0);
|
542
511
|
rb_define_method(cParser, "header_value_type", Parser_header_value_type, 0);
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# Authors ordered by first contribution.
|
2
|
+
Ryan Dahl <ry@tinyclouds.org>
|
3
|
+
Jeremy Hinegardner <jeremy@hinegardner.org>
|
4
|
+
Sergey Shepelev <temotor@gmail.com>
|
5
|
+
Joe Damato <ice799@gmail.com>
|
6
|
+
tomika <tomika_nospam@freemail.hu>
|
7
|
+
Phoenix Sol <phoenix@burninglabs.com>
|
8
|
+
Cliff Frey <cliff@meraki.com>
|
9
|
+
Ewen Cheslack-Postava <ewencp@cs.stanford.edu>
|
10
|
+
Santiago Gala <sgala@apache.org>
|
11
|
+
Tim Becker <tim.becker@syngenio.de>
|
12
|
+
Jeff Terrace <jterrace@gmail.com>
|
13
|
+
Ben Noordhuis <info@bnoordhuis.nl>
|
14
|
+
Nathan Rajlich <nathan@tootallnate.net>
|
15
|
+
Mark Nottingham <mnot@mnot.net>
|
16
|
+
Aman Gupta <aman@tmm1.net>
|
17
|
+
Tim Becker <tim.becker@kuriositaet.de>
|
18
|
+
Sean Cunningham <sean.cunningham@mandiant.com>
|
19
|
+
Peter Griess <pg@std.in>
|
20
|
+
Salman Haq <salman.haq@asti-usa.com>
|
21
|
+
Cliff Frey <clifffrey@gmail.com>
|
22
|
+
Jon Kolb <jon@b0g.us>
|
23
|
+
Fouad Mardini <f.mardini@gmail.com>
|
24
|
+
Paul Querna <pquerna@apache.org>
|
25
|
+
Felix Geisendörfer <felix@debuggable.com>
|
26
|
+
koichik <koichik@improvement.jp>
|
27
|
+
Andre Caron <andre.l.caron@gmail.com>
|
28
|
+
Ivo Raisr <ivosh@ivosh.net>
|
29
|
+
James McLaughlin <jamie@lacewing-project.org>
|
30
|
+
David Gwynne <loki@animata.net>
|
31
|
+
LE ROUX Thomas <thomas@procheo.fr>
|
32
|
+
Randy Rizun <rrizun@ortivawireless.com>
|
@@ -23,7 +23,11 @@ IN THE SOFTWARE.
|
|
23
23
|
This code mainly based on code with the following license:
|
24
24
|
|
25
25
|
|
26
|
-
|
26
|
+
http_parser.c is based on src/http/ngx_http_parse.c from NGINX copyright
|
27
|
+
Igor Sysoev.
|
28
|
+
|
29
|
+
Additional changes are licensed under the same terms as NGINX and
|
30
|
+
copyright Joyent, Inc. and other Node contributors. All rights reserved.
|
27
31
|
|
28
32
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
29
33
|
of this software and associated documentation files (the "Software"), to
|
@@ -24,7 +24,7 @@ The parser extracts the following information from HTTP messages:
|
|
24
24
|
* Response status code
|
25
25
|
* Transfer-Encoding
|
26
26
|
* HTTP version
|
27
|
-
* Request
|
27
|
+
* Request URL
|
28
28
|
* Message body
|
29
29
|
|
30
30
|
Building
|
@@ -49,3 +49,135 @@ Usage
|
|
49
49
|
help or have suggestions, feel free to contact me at
|
50
50
|
(tim.becker@kuriositaet.de).
|
51
51
|
|
52
|
+
|
53
|
+
One `http_parser` object is used per TCP connection. Initialize the struct
|
54
|
+
using `http_parser_init()` and set the callbacks. That might look something
|
55
|
+
like this for a request parser:
|
56
|
+
|
57
|
+
http_parser_settings settings;
|
58
|
+
settings.on_path = my_path_callback;
|
59
|
+
settings.on_header_field = my_header_field_callback;
|
60
|
+
/* ... */
|
61
|
+
|
62
|
+
http_parser *parser = malloc(sizeof(http_parser));
|
63
|
+
http_parser_init(parser, HTTP_REQUEST);
|
64
|
+
parser->data = my_socket;
|
65
|
+
|
66
|
+
When data is received on the socket execute the parser and check for errors.
|
67
|
+
|
68
|
+
size_t len = 80*1024, nparsed;
|
69
|
+
char buf[len];
|
70
|
+
ssize_t recved;
|
71
|
+
|
72
|
+
recved = recv(fd, buf, len, 0);
|
73
|
+
|
74
|
+
if (recved < 0) {
|
75
|
+
/* Handle error. */
|
76
|
+
}
|
77
|
+
|
78
|
+
/* Start up / continue the parser.
|
79
|
+
* Note we pass recved==0 to signal that EOF has been recieved.
|
80
|
+
*/
|
81
|
+
nparsed = http_parser_execute(parser, &settings, buf, recved);
|
82
|
+
|
83
|
+
if (parser->upgrade) {
|
84
|
+
/* handle new protocol */
|
85
|
+
} else if (nparsed != recved) {
|
86
|
+
/* Handle error. Usually just close the connection. */
|
87
|
+
}
|
88
|
+
|
89
|
+
HTTP needs to know where the end of the stream is. For example, sometimes
|
90
|
+
servers send responses without Content-Length and expect the client to
|
91
|
+
consume input (for the body) until EOF. To tell http_parser about EOF, give
|
92
|
+
`0` as the forth parameter to `http_parser_execute()`. Callbacks and errors
|
93
|
+
can still be encountered during an EOF, so one must still be prepared
|
94
|
+
to receive them.
|
95
|
+
|
96
|
+
Scalar valued message information such as `status_code`, `method`, and the
|
97
|
+
HTTP version are stored in the parser structure. This data is only
|
98
|
+
temporally stored in `http_parser` and gets reset on each new message. If
|
99
|
+
this information is needed later, copy it out of the structure during the
|
100
|
+
`headers_complete` callback.
|
101
|
+
|
102
|
+
The parser decodes the transfer-encoding for both requests and responses
|
103
|
+
transparently. That is, a chunked encoding is decoded before being sent to
|
104
|
+
the on_body callback.
|
105
|
+
|
106
|
+
|
107
|
+
The Special Problem of Upgrade
|
108
|
+
------------------------------
|
109
|
+
|
110
|
+
HTTP supports upgrading the connection to a different protocol. An
|
111
|
+
increasingly common example of this is the Web Socket protocol which sends
|
112
|
+
a request like
|
113
|
+
|
114
|
+
GET /demo HTTP/1.1
|
115
|
+
Upgrade: WebSocket
|
116
|
+
Connection: Upgrade
|
117
|
+
Host: example.com
|
118
|
+
Origin: http://example.com
|
119
|
+
WebSocket-Protocol: sample
|
120
|
+
|
121
|
+
followed by non-HTTP data.
|
122
|
+
|
123
|
+
(See http://tools.ietf.org/html/draft-hixie-thewebsocketprotocol-75 for more
|
124
|
+
information the Web Socket protocol.)
|
125
|
+
|
126
|
+
To support this, the parser will treat this as a normal HTTP message without a
|
127
|
+
body. Issuing both on_headers_complete and on_message_complete callbacks. However
|
128
|
+
http_parser_execute() will stop parsing at the end of the headers and return.
|
129
|
+
|
130
|
+
The user is expected to check if `parser->upgrade` has been set to 1 after
|
131
|
+
`http_parser_execute()` returns. Non-HTTP data begins at the buffer supplied
|
132
|
+
offset by the return value of `http_parser_execute()`.
|
133
|
+
|
134
|
+
|
135
|
+
Callbacks
|
136
|
+
---------
|
137
|
+
|
138
|
+
During the `http_parser_execute()` call, the callbacks set in
|
139
|
+
`http_parser_settings` will be executed. The parser maintains state and
|
140
|
+
never looks behind, so buffering the data is not necessary. If you need to
|
141
|
+
save certain data for later usage, you can do that from the callbacks.
|
142
|
+
|
143
|
+
There are two types of callbacks:
|
144
|
+
|
145
|
+
* notification `typedef int (*http_cb) (http_parser*);`
|
146
|
+
Callbacks: on_message_begin, on_headers_complete, on_message_complete.
|
147
|
+
* data `typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);`
|
148
|
+
Callbacks: (requests only) on_uri,
|
149
|
+
(common) on_header_field, on_header_value, on_body;
|
150
|
+
|
151
|
+
Callbacks must return 0 on success. Returning a non-zero value indicates
|
152
|
+
error to the parser, making it exit immediately.
|
153
|
+
|
154
|
+
In case you parse HTTP message in chunks (i.e. `read()` request line
|
155
|
+
from socket, parse, read half headers, parse, etc) your data callbacks
|
156
|
+
may be called more than once. Http-parser guarantees that data pointer is only
|
157
|
+
valid for the lifetime of callback. You can also `read()` into a heap allocated
|
158
|
+
buffer to avoid copying memory around if this fits your application.
|
159
|
+
|
160
|
+
Reading headers may be a tricky task if you read/parse headers partially.
|
161
|
+
Basically, you need to remember whether last header callback was field or value
|
162
|
+
and apply following logic:
|
163
|
+
|
164
|
+
(on_header_field and on_header_value shortened to on_h_*)
|
165
|
+
------------------------ ------------ --------------------------------------------
|
166
|
+
| State (prev. callback) | Callback | Description/action |
|
167
|
+
------------------------ ------------ --------------------------------------------
|
168
|
+
| nothing (first call) | on_h_field | Allocate new buffer and copy callback data |
|
169
|
+
| | | into it |
|
170
|
+
------------------------ ------------ --------------------------------------------
|
171
|
+
| value | on_h_field | New header started. |
|
172
|
+
| | | Copy current name,value buffers to headers |
|
173
|
+
| | | list and allocate new buffer for new name |
|
174
|
+
------------------------ ------------ --------------------------------------------
|
175
|
+
| field | on_h_field | Previous name continues. Reallocate name |
|
176
|
+
| | | buffer and append callback data to it |
|
177
|
+
------------------------ ------------ --------------------------------------------
|
178
|
+
| field | on_h_value | Value for current header started. Allocate |
|
179
|
+
| | | new buffer and copy callback data to it |
|
180
|
+
------------------------ ------------ --------------------------------------------
|
181
|
+
| value | on_h_value | Value continues. Reallocate value buffer |
|
182
|
+
| | | and append callback data to it |
|
183
|
+
------------------------ ------------ --------------------------------------------
|
@@ -1,4 +1,10 @@
|
|
1
|
+
decide how to handle errs per default:
|
2
|
+
- ry: "set state to dead", return `read`
|
3
|
+
- current: call on_error w/ details, if no on_error handler set,
|
4
|
+
throw Exception, else call on_error and behave like orig...
|
5
|
+
|
1
6
|
some tests from test.c left to port
|
7
|
+
(scan ...)
|
2
8
|
documentation
|
3
9
|
|
4
10
|
hi level callback interface
|
@@ -1,4 +1,7 @@
|
|
1
|
-
/*
|
1
|
+
/* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
|
2
|
+
*
|
3
|
+
* Additional changes are licensed under the same terms as NGINX and
|
4
|
+
* copyright Joyent, Inc. and other Node contributors. All rights reserved.
|
2
5
|
*
|
3
6
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
7
|
* of this software and associated documentation files (the "Software"), to
|
@@ -18,48 +21,94 @@
|
|
18
21
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
19
22
|
* IN THE SOFTWARE.
|
20
23
|
*/
|
21
|
-
#include
|
24
|
+
#include "http_parser.h"
|
22
25
|
#include <assert.h>
|
23
26
|
#include <stddef.h>
|
27
|
+
#include <ctype.h>
|
28
|
+
#include <stdlib.h>
|
29
|
+
#include <string.h>
|
30
|
+
#include <limits.h>
|
24
31
|
|
32
|
+
#ifndef ULLONG_MAX
|
33
|
+
# define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
|
34
|
+
#endif
|
25
35
|
|
26
36
|
#ifndef MIN
|
27
37
|
# define MIN(a,b) ((a) < (b) ? (a) : (b))
|
28
38
|
#endif
|
29
39
|
|
30
40
|
|
31
|
-
#
|
41
|
+
#if HTTP_PARSER_DEBUG
|
42
|
+
#define SET_ERRNO(e) \
|
32
43
|
do { \
|
33
|
-
|
34
|
-
|
35
|
-
} \
|
44
|
+
parser->http_errno = (e); \
|
45
|
+
parser->error_lineno = __LINE__; \
|
36
46
|
} while (0)
|
47
|
+
#else
|
48
|
+
#define SET_ERRNO(e) \
|
49
|
+
do { \
|
50
|
+
parser->http_errno = (e); \
|
51
|
+
} while(0)
|
52
|
+
#endif
|
37
53
|
|
38
54
|
|
39
|
-
|
55
|
+
/* Run the notify callback FOR, returning ER if it fails */
|
56
|
+
#define CALLBACK_NOTIFY_(FOR, ER) \
|
40
57
|
do { \
|
41
|
-
|
58
|
+
assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
|
59
|
+
\
|
60
|
+
if (settings->on_##FOR) { \
|
61
|
+
if (0 != settings->on_##FOR(parser)) { \
|
62
|
+
SET_ERRNO(HPE_CB_##FOR); \
|
63
|
+
} \
|
64
|
+
\
|
65
|
+
/* We either errored above or got paused; get out */ \
|
66
|
+
if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
|
67
|
+
return (ER); \
|
68
|
+
} \
|
69
|
+
} \
|
42
70
|
} while (0)
|
43
71
|
|
44
|
-
|
72
|
+
/* Run the notify callback FOR and consume the current byte */
|
73
|
+
#define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1)
|
74
|
+
|
75
|
+
/* Run the notify callback FOR and don't consume the current byte */
|
76
|
+
#define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data)
|
77
|
+
|
78
|
+
/* Run data callback FOR with LEN bytes, returning ER if it fails */
|
79
|
+
#define CALLBACK_DATA_(FOR, LEN, ER) \
|
45
80
|
do { \
|
81
|
+
assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
|
82
|
+
\
|
46
83
|
if (FOR##_mark) { \
|
47
84
|
if (settings->on_##FOR) { \
|
48
|
-
if (0 != settings->on_##FOR(parser,
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
85
|
+
if (0 != settings->on_##FOR(parser, FOR##_mark, (LEN))) { \
|
86
|
+
SET_ERRNO(HPE_CB_##FOR); \
|
87
|
+
} \
|
88
|
+
\
|
89
|
+
/* We either errored above or got paused; get out */ \
|
90
|
+
if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
|
91
|
+
return (ER); \
|
53
92
|
} \
|
54
93
|
} \
|
94
|
+
FOR##_mark = NULL; \
|
55
95
|
} \
|
56
96
|
} while (0)
|
97
|
+
|
98
|
+
/* Run the data callback FOR and consume the current byte */
|
99
|
+
#define CALLBACK_DATA(FOR) \
|
100
|
+
CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
|
57
101
|
|
102
|
+
/* Run the data callback FOR and don't consume the current byte */
|
103
|
+
#define CALLBACK_DATA_NOADVANCE(FOR) \
|
104
|
+
CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
|
58
105
|
|
59
|
-
|
106
|
+
/* Set the mark FOR; non-destructive if mark is already set */
|
107
|
+
#define MARK(FOR) \
|
60
108
|
do { \
|
61
|
-
|
62
|
-
|
109
|
+
if (!FOR##_mark) { \
|
110
|
+
FOR##_mark = p; \
|
111
|
+
} \
|
63
112
|
} while (0)
|
64
113
|
|
65
114
|
|
@@ -97,6 +146,8 @@ static const char *method_strings[] =
|
|
97
146
|
, "NOTIFY"
|
98
147
|
, "SUBSCRIBE"
|
99
148
|
, "UNSUBSCRIBE"
|
149
|
+
, "PATCH"
|
150
|
+
, "PURGE"
|
100
151
|
};
|
101
152
|
|
102
153
|
|
@@ -117,9 +168,9 @@ static const char tokens[256] = {
|
|
117
168
|
/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
|
118
169
|
0, 0, 0, 0, 0, 0, 0, 0,
|
119
170
|
/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
|
120
|
-
|
171
|
+
0, '!', 0, '#', '$', '%', '&', '\'',
|
121
172
|
/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
|
122
|
-
0, 0, '*', '+', 0, '-', '.',
|
173
|
+
0, 0, '*', '+', 0, '-', '.', 0,
|
123
174
|
/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
|
124
175
|
'0', '1', '2', '3', '4', '5', '6', '7',
|
125
176
|
/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
|
@@ -139,7 +190,7 @@ static const char tokens[256] = {
|
|
139
190
|
/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
|
140
191
|
'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
|
141
192
|
/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
|
142
|
-
'x', 'y', 'z', 0, '|',
|
193
|
+
'x', 'y', 'z', 0, '|', 0, '~', 0 };
|
143
194
|
|
144
195
|
|
145
196
|
static const int8_t unhex[256] =
|
@@ -186,28 +237,7 @@ static const uint8_t normal_url_char[256] = {
|
|
186
237
|
/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
|
187
238
|
1, 1, 1, 1, 1, 1, 1, 1,
|
188
239
|
/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
|
189
|
-
1, 1, 1, 1, 1, 1, 1, 0,
|
190
|
-
|
191
|
-
/* Remainder of non-ASCII range are accepted as-is to support implicitly UTF-8
|
192
|
-
encoded paths. This is out of spec, but clients generate this and most other
|
193
|
-
HTTP servers support it. We should, too. */
|
194
|
-
|
195
|
-
1, 1, 1, 1, 1, 1, 1, 1,
|
196
|
-
1, 1, 1, 1, 1, 1, 1, 1,
|
197
|
-
1, 1, 1, 1, 1, 1, 1, 1,
|
198
|
-
1, 1, 1, 1, 1, 1, 1, 1,
|
199
|
-
1, 1, 1, 1, 1, 1, 1, 1,
|
200
|
-
1, 1, 1, 1, 1, 1, 1, 1,
|
201
|
-
1, 1, 1, 1, 1, 1, 1, 1,
|
202
|
-
1, 1, 1, 1, 1, 1, 1, 1,
|
203
|
-
1, 1, 1, 1, 1, 1, 1, 1,
|
204
|
-
1, 1, 1, 1, 1, 1, 1, 1,
|
205
|
-
1, 1, 1, 1, 1, 1, 1, 1,
|
206
|
-
1, 1, 1, 1, 1, 1, 1, 1,
|
207
|
-
1, 1, 1, 1, 1, 1, 1, 1,
|
208
|
-
1, 1, 1, 1, 1, 1, 1, 1,
|
209
|
-
1, 1, 1, 1, 1, 1, 1, 1,
|
210
|
-
1, 1, 1, 1, 1, 1, 1, 1 };
|
240
|
+
1, 1, 1, 1, 1, 1, 1, 0, };
|
211
241
|
|
212
242
|
|
213
243
|
enum state
|
@@ -236,7 +266,12 @@ enum state
|
|
236
266
|
, s_req_schema
|
237
267
|
, s_req_schema_slash
|
238
268
|
, s_req_schema_slash_slash
|
269
|
+
, s_req_host_start
|
270
|
+
, s_req_host_v6_start
|
271
|
+
, s_req_host_v6
|
272
|
+
, s_req_host_v6_end
|
239
273
|
, s_req_host
|
274
|
+
, s_req_port_start
|
240
275
|
, s_req_port
|
241
276
|
, s_req_path
|
242
277
|
, s_req_query_string_start
|
@@ -258,6 +293,7 @@ enum state
|
|
258
293
|
, s_header_field
|
259
294
|
, s_header_value_start
|
260
295
|
, s_header_value
|
296
|
+
, s_header_value_lws
|
261
297
|
|
262
298
|
, s_header_almost_done
|
263
299
|
|
@@ -265,9 +301,11 @@ enum state
|
|
265
301
|
, s_chunk_size
|
266
302
|
, s_chunk_parameters
|
267
303
|
, s_chunk_size_almost_done
|
268
|
-
|
304
|
+
|
269
305
|
, s_headers_almost_done
|
270
|
-
|
306
|
+
, s_headers_done
|
307
|
+
|
308
|
+
/* Important: 's_headers_done' must be the last 'header' state. All
|
271
309
|
* states beyond this must be 'body' states. It is used for overflow
|
272
310
|
* checking. See the PARSING_HEADER() macro.
|
273
311
|
*/
|
@@ -278,10 +316,12 @@ enum state
|
|
278
316
|
|
279
317
|
, s_body_identity
|
280
318
|
, s_body_identity_eof
|
319
|
+
|
320
|
+
, s_message_done
|
281
321
|
};
|
282
322
|
|
283
323
|
|
284
|
-
#define PARSING_HEADER(state) (state <=
|
324
|
+
#define PARSING_HEADER(state) (state <= s_headers_done)
|
285
325
|
|
286
326
|
|
287
327
|
enum header_states
|
@@ -311,27 +351,39 @@ enum header_states
|
|
311
351
|
};
|
312
352
|
|
313
353
|
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
354
|
+
/* Macros for character classes; depends on strict-mode */
|
355
|
+
#define CR '\r'
|
356
|
+
#define LF '\n'
|
357
|
+
#define LOWER(c) (unsigned char)(c | 0x20)
|
358
|
+
#define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
|
359
|
+
#define IS_NUM(c) ((c) >= '0' && (c) <= '9')
|
360
|
+
#define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
|
361
|
+
#define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
|
323
362
|
|
324
|
-
#
|
325
|
-
#define
|
326
|
-
#define
|
327
|
-
#define
|
363
|
+
#if HTTP_PARSER_STRICT
|
364
|
+
#define TOKEN(c) (tokens[(unsigned char)c])
|
365
|
+
#define IS_URL_CHAR(c) (normal_url_char[(unsigned char) (c)])
|
366
|
+
#define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
|
367
|
+
#else
|
368
|
+
#define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
|
369
|
+
#define IS_URL_CHAR(c) \
|
370
|
+
(normal_url_char[(unsigned char) (c)] || ((c) & 0x80))
|
371
|
+
#define IS_HOST_CHAR(c) \
|
372
|
+
(IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
|
373
|
+
#endif
|
328
374
|
|
329
375
|
|
330
376
|
#define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
|
331
377
|
|
332
378
|
|
333
379
|
#if HTTP_PARSER_STRICT
|
334
|
-
# define STRICT_CHECK(cond)
|
380
|
+
# define STRICT_CHECK(cond) \
|
381
|
+
do { \
|
382
|
+
if (cond) { \
|
383
|
+
SET_ERRNO(HPE_STRICT); \
|
384
|
+
goto error; \
|
385
|
+
} \
|
386
|
+
} while (0)
|
335
387
|
# define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
|
336
388
|
#else
|
337
389
|
# define STRICT_CHECK(cond)
|
@@ -339,24 +391,228 @@ enum flags
|
|
339
391
|
#endif
|
340
392
|
|
341
393
|
|
394
|
+
/* Map errno values to strings for human-readable output */
|
395
|
+
#define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
|
396
|
+
static struct {
|
397
|
+
const char *name;
|
398
|
+
const char *description;
|
399
|
+
} http_strerror_tab[] = {
|
400
|
+
HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
|
401
|
+
};
|
402
|
+
#undef HTTP_STRERROR_GEN
|
403
|
+
|
404
|
+
int http_message_needs_eof(http_parser *parser);
|
405
|
+
|
406
|
+
/* Our URL parser.
|
407
|
+
*
|
408
|
+
* This is designed to be shared by http_parser_execute() for URL validation,
|
409
|
+
* hence it has a state transition + byte-for-byte interface. In addition, it
|
410
|
+
* is meant to be embedded in http_parser_parse_url(), which does the dirty
|
411
|
+
* work of turning state transitions URL components for its API.
|
412
|
+
*
|
413
|
+
* This function should only be invoked with non-space characters. It is
|
414
|
+
* assumed that the caller cares about (and can detect) the transition between
|
415
|
+
* URL and non-URL states by looking for these.
|
416
|
+
*/
|
417
|
+
static enum state
|
418
|
+
parse_url_char(enum state s, const char ch)
|
419
|
+
{
|
420
|
+
assert(!isspace(ch));
|
421
|
+
|
422
|
+
switch (s) {
|
423
|
+
case s_req_spaces_before_url:
|
424
|
+
/* Proxied requests are followed by scheme of an absolute URI (alpha).
|
425
|
+
* All methods except CONNECT are followed by '/' or '*'.
|
426
|
+
*/
|
427
|
+
|
428
|
+
if (ch == '/' || ch == '*') {
|
429
|
+
return s_req_path;
|
430
|
+
}
|
431
|
+
|
432
|
+
if (IS_ALPHA(ch)) {
|
433
|
+
return s_req_schema;
|
434
|
+
}
|
435
|
+
|
436
|
+
break;
|
437
|
+
|
438
|
+
case s_req_schema:
|
439
|
+
if (IS_ALPHA(ch)) {
|
440
|
+
return s;
|
441
|
+
}
|
442
|
+
|
443
|
+
if (ch == ':') {
|
444
|
+
return s_req_schema_slash;
|
445
|
+
}
|
446
|
+
|
447
|
+
break;
|
448
|
+
|
449
|
+
case s_req_schema_slash:
|
450
|
+
if (ch == '/') {
|
451
|
+
return s_req_schema_slash_slash;
|
452
|
+
}
|
453
|
+
|
454
|
+
break;
|
455
|
+
|
456
|
+
case s_req_schema_slash_slash:
|
457
|
+
if (ch == '/') {
|
458
|
+
return s_req_host_start;
|
459
|
+
}
|
460
|
+
|
461
|
+
break;
|
462
|
+
|
463
|
+
case s_req_host_start:
|
464
|
+
if (ch == '[') {
|
465
|
+
return s_req_host_v6_start;
|
466
|
+
}
|
467
|
+
|
468
|
+
if (IS_HOST_CHAR(ch)) {
|
469
|
+
return s_req_host;
|
470
|
+
}
|
471
|
+
|
472
|
+
break;
|
473
|
+
|
474
|
+
case s_req_host:
|
475
|
+
if (IS_HOST_CHAR(ch)) {
|
476
|
+
return s_req_host;
|
477
|
+
}
|
478
|
+
|
479
|
+
/* FALLTHROUGH */
|
480
|
+
case s_req_host_v6_end:
|
481
|
+
switch (ch) {
|
482
|
+
case ':':
|
483
|
+
return s_req_port_start;
|
484
|
+
|
485
|
+
case '/':
|
486
|
+
return s_req_path;
|
487
|
+
|
488
|
+
case '?':
|
489
|
+
return s_req_query_string_start;
|
490
|
+
}
|
491
|
+
|
492
|
+
break;
|
493
|
+
|
494
|
+
case s_req_host_v6:
|
495
|
+
if (ch == ']') {
|
496
|
+
return s_req_host_v6_end;
|
497
|
+
}
|
498
|
+
|
499
|
+
/* FALLTHROUGH */
|
500
|
+
case s_req_host_v6_start:
|
501
|
+
if (IS_HEX(ch) || ch == ':') {
|
502
|
+
return s_req_host_v6;
|
503
|
+
}
|
504
|
+
break;
|
505
|
+
|
506
|
+
case s_req_port:
|
507
|
+
switch (ch) {
|
508
|
+
case '/':
|
509
|
+
return s_req_path;
|
510
|
+
|
511
|
+
case '?':
|
512
|
+
return s_req_query_string_start;
|
513
|
+
}
|
514
|
+
|
515
|
+
/* FALLTHROUGH */
|
516
|
+
case s_req_port_start:
|
517
|
+
if (IS_NUM(ch)) {
|
518
|
+
return s_req_port;
|
519
|
+
}
|
520
|
+
|
521
|
+
break;
|
522
|
+
|
523
|
+
case s_req_path:
|
524
|
+
if (IS_URL_CHAR(ch)) {
|
525
|
+
return s;
|
526
|
+
}
|
527
|
+
|
528
|
+
switch (ch) {
|
529
|
+
case '?':
|
530
|
+
return s_req_query_string_start;
|
531
|
+
|
532
|
+
case '#':
|
533
|
+
return s_req_fragment_start;
|
534
|
+
}
|
535
|
+
|
536
|
+
break;
|
537
|
+
|
538
|
+
case s_req_query_string_start:
|
539
|
+
case s_req_query_string:
|
540
|
+
if (IS_URL_CHAR(ch)) {
|
541
|
+
return s_req_query_string;
|
542
|
+
}
|
543
|
+
|
544
|
+
switch (ch) {
|
545
|
+
case '?':
|
546
|
+
/* allow extra '?' in query string */
|
547
|
+
return s_req_query_string;
|
548
|
+
|
549
|
+
case '#':
|
550
|
+
return s_req_fragment_start;
|
551
|
+
}
|
552
|
+
|
553
|
+
break;
|
554
|
+
|
555
|
+
case s_req_fragment_start:
|
556
|
+
if (IS_URL_CHAR(ch)) {
|
557
|
+
return s_req_fragment;
|
558
|
+
}
|
559
|
+
|
560
|
+
switch (ch) {
|
561
|
+
case '?':
|
562
|
+
return s_req_fragment;
|
563
|
+
|
564
|
+
case '#':
|
565
|
+
return s;
|
566
|
+
}
|
567
|
+
|
568
|
+
break;
|
569
|
+
|
570
|
+
case s_req_fragment:
|
571
|
+
if (IS_URL_CHAR(ch)) {
|
572
|
+
return s;
|
573
|
+
}
|
574
|
+
|
575
|
+
switch (ch) {
|
576
|
+
case '?':
|
577
|
+
case '#':
|
578
|
+
return s;
|
579
|
+
}
|
580
|
+
|
581
|
+
break;
|
582
|
+
|
583
|
+
default:
|
584
|
+
break;
|
585
|
+
}
|
586
|
+
|
587
|
+
/* We should never fall out of the switch above unless there's an error */
|
588
|
+
return s_dead;
|
589
|
+
}
|
590
|
+
|
342
591
|
size_t http_parser_execute (http_parser *parser,
|
343
592
|
const http_parser_settings *settings,
|
344
593
|
const char *data,
|
345
594
|
size_t len)
|
346
595
|
{
|
347
596
|
char c, ch;
|
348
|
-
|
349
|
-
|
597
|
+
int8_t unhex_val;
|
598
|
+
const char *p = data;
|
599
|
+
const char *header_field_mark = 0;
|
600
|
+
const char *header_value_mark = 0;
|
601
|
+
const char *url_mark = 0;
|
602
|
+
const char *body_mark = 0;
|
350
603
|
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
604
|
+
/* We're in an error state. Don't bother doing anything. */
|
605
|
+
if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
|
606
|
+
return 0;
|
607
|
+
}
|
355
608
|
|
356
609
|
if (len == 0) {
|
357
|
-
switch (state) {
|
610
|
+
switch (parser->state) {
|
358
611
|
case s_body_identity_eof:
|
359
|
-
|
612
|
+
/* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
|
613
|
+
* we got paused.
|
614
|
+
*/
|
615
|
+
CALLBACK_NOTIFY_NOADVANCE(message_complete);
|
360
616
|
return 0;
|
361
617
|
|
362
618
|
case s_dead:
|
@@ -366,52 +622,59 @@ size_t http_parser_execute (http_parser *parser,
|
|
366
622
|
return 0;
|
367
623
|
|
368
624
|
default:
|
369
|
-
|
625
|
+
SET_ERRNO(HPE_INVALID_EOF_STATE);
|
626
|
+
return 1;
|
370
627
|
}
|
371
628
|
}
|
372
629
|
|
373
|
-
/* technically we could combine all of these (except for url_mark) into one
|
374
|
-
variable, saving stack space, but it seems more clear to have them
|
375
|
-
separated. */
|
376
|
-
const char *header_field_mark = 0;
|
377
|
-
const char *header_value_mark = 0;
|
378
|
-
const char *fragment_mark = 0;
|
379
|
-
const char *query_string_mark = 0;
|
380
|
-
const char *path_mark = 0;
|
381
|
-
const char *url_mark = 0;
|
382
630
|
|
383
|
-
if (state == s_header_field)
|
631
|
+
if (parser->state == s_header_field)
|
384
632
|
header_field_mark = data;
|
385
|
-
if (state == s_header_value)
|
633
|
+
if (parser->state == s_header_value)
|
386
634
|
header_value_mark = data;
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
635
|
+
switch (parser->state) {
|
636
|
+
case s_req_path:
|
637
|
+
case s_req_schema:
|
638
|
+
case s_req_schema_slash:
|
639
|
+
case s_req_schema_slash_slash:
|
640
|
+
case s_req_host_start:
|
641
|
+
case s_req_host_v6_start:
|
642
|
+
case s_req_host_v6:
|
643
|
+
case s_req_host_v6_end:
|
644
|
+
case s_req_host:
|
645
|
+
case s_req_port_start:
|
646
|
+
case s_req_port:
|
647
|
+
case s_req_query_string_start:
|
648
|
+
case s_req_query_string:
|
649
|
+
case s_req_fragment_start:
|
650
|
+
case s_req_fragment:
|
398
651
|
url_mark = data;
|
652
|
+
break;
|
653
|
+
}
|
399
654
|
|
400
|
-
for (p=data
|
655
|
+
for (p=data; p != data + len; p++) {
|
401
656
|
ch = *p;
|
402
657
|
|
403
|
-
if (PARSING_HEADER(state)) {
|
404
|
-
++nread;
|
658
|
+
if (PARSING_HEADER(parser->state)) {
|
659
|
+
++parser->nread;
|
405
660
|
/* Buffer overflow attack */
|
406
|
-
if (nread > HTTP_MAX_HEADER_SIZE)
|
661
|
+
if (parser->nread > HTTP_MAX_HEADER_SIZE) {
|
662
|
+
SET_ERRNO(HPE_HEADER_OVERFLOW);
|
663
|
+
goto error;
|
664
|
+
}
|
407
665
|
}
|
408
666
|
|
409
|
-
|
667
|
+
reexecute_byte:
|
668
|
+
switch (parser->state) {
|
410
669
|
|
411
670
|
case s_dead:
|
412
671
|
/* this state is used after a 'Connection: close' message
|
413
672
|
* the parser will error out if it reads another message
|
414
673
|
*/
|
674
|
+
if (ch == CR || ch == LF)
|
675
|
+
break;
|
676
|
+
|
677
|
+
SET_ERRNO(HPE_CLOSED_CONNECTION);
|
415
678
|
goto error;
|
416
679
|
|
417
680
|
case s_start_req_or_res:
|
@@ -419,42 +682,46 @@ size_t http_parser_execute (http_parser *parser,
|
|
419
682
|
if (ch == CR || ch == LF)
|
420
683
|
break;
|
421
684
|
parser->flags = 0;
|
422
|
-
parser->content_length =
|
685
|
+
parser->content_length = ULLONG_MAX;
|
423
686
|
|
424
|
-
|
687
|
+
if (ch == 'H') {
|
688
|
+
parser->state = s_res_or_resp_H;
|
425
689
|
|
426
|
-
|
427
|
-
|
428
|
-
else {
|
690
|
+
CALLBACK_NOTIFY(message_begin);
|
691
|
+
} else {
|
429
692
|
parser->type = HTTP_REQUEST;
|
430
|
-
|
693
|
+
parser->state = s_start_req;
|
694
|
+
goto reexecute_byte;
|
431
695
|
}
|
696
|
+
|
432
697
|
break;
|
433
698
|
}
|
434
699
|
|
435
700
|
case s_res_or_resp_H:
|
436
701
|
if (ch == 'T') {
|
437
702
|
parser->type = HTTP_RESPONSE;
|
438
|
-
state = s_res_HT;
|
703
|
+
parser->state = s_res_HT;
|
439
704
|
} else {
|
440
|
-
if (ch != 'E')
|
705
|
+
if (ch != 'E') {
|
706
|
+
SET_ERRNO(HPE_INVALID_CONSTANT);
|
707
|
+
goto error;
|
708
|
+
}
|
709
|
+
|
441
710
|
parser->type = HTTP_REQUEST;
|
442
711
|
parser->method = HTTP_HEAD;
|
443
|
-
index = 2;
|
444
|
-
state = s_req_method;
|
712
|
+
parser->index = 2;
|
713
|
+
parser->state = s_req_method;
|
445
714
|
}
|
446
715
|
break;
|
447
716
|
|
448
717
|
case s_start_res:
|
449
718
|
{
|
450
719
|
parser->flags = 0;
|
451
|
-
parser->content_length =
|
452
|
-
|
453
|
-
CALLBACK2(message_begin);
|
720
|
+
parser->content_length = ULLONG_MAX;
|
454
721
|
|
455
722
|
switch (ch) {
|
456
723
|
case 'H':
|
457
|
-
state = s_res_H;
|
724
|
+
parser->state = s_res_H;
|
458
725
|
break;
|
459
726
|
|
460
727
|
case CR:
|
@@ -462,105 +729,133 @@ size_t http_parser_execute (http_parser *parser,
|
|
462
729
|
break;
|
463
730
|
|
464
731
|
default:
|
732
|
+
SET_ERRNO(HPE_INVALID_CONSTANT);
|
465
733
|
goto error;
|
466
734
|
}
|
735
|
+
|
736
|
+
CALLBACK_NOTIFY(message_begin);
|
467
737
|
break;
|
468
738
|
}
|
469
739
|
|
470
740
|
case s_res_H:
|
471
741
|
STRICT_CHECK(ch != 'T');
|
472
|
-
state = s_res_HT;
|
742
|
+
parser->state = s_res_HT;
|
473
743
|
break;
|
474
744
|
|
475
745
|
case s_res_HT:
|
476
746
|
STRICT_CHECK(ch != 'T');
|
477
|
-
state = s_res_HTT;
|
747
|
+
parser->state = s_res_HTT;
|
478
748
|
break;
|
479
749
|
|
480
750
|
case s_res_HTT:
|
481
751
|
STRICT_CHECK(ch != 'P');
|
482
|
-
state = s_res_HTTP;
|
752
|
+
parser->state = s_res_HTTP;
|
483
753
|
break;
|
484
754
|
|
485
755
|
case s_res_HTTP:
|
486
756
|
STRICT_CHECK(ch != '/');
|
487
|
-
state = s_res_first_http_major;
|
757
|
+
parser->state = s_res_first_http_major;
|
488
758
|
break;
|
489
759
|
|
490
760
|
case s_res_first_http_major:
|
491
|
-
if (ch < '
|
761
|
+
if (ch < '0' || ch > '9') {
|
762
|
+
SET_ERRNO(HPE_INVALID_VERSION);
|
763
|
+
goto error;
|
764
|
+
}
|
765
|
+
|
492
766
|
parser->http_major = ch - '0';
|
493
|
-
state = s_res_http_major;
|
767
|
+
parser->state = s_res_http_major;
|
494
768
|
break;
|
495
769
|
|
496
770
|
/* major HTTP version or dot */
|
497
771
|
case s_res_http_major:
|
498
772
|
{
|
499
773
|
if (ch == '.') {
|
500
|
-
state = s_res_first_http_minor;
|
774
|
+
parser->state = s_res_first_http_minor;
|
501
775
|
break;
|
502
776
|
}
|
503
777
|
|
504
|
-
if (ch
|
778
|
+
if (!IS_NUM(ch)) {
|
779
|
+
SET_ERRNO(HPE_INVALID_VERSION);
|
780
|
+
goto error;
|
781
|
+
}
|
505
782
|
|
506
783
|
parser->http_major *= 10;
|
507
784
|
parser->http_major += ch - '0';
|
508
785
|
|
509
|
-
if (parser->http_major > 999)
|
786
|
+
if (parser->http_major > 999) {
|
787
|
+
SET_ERRNO(HPE_INVALID_VERSION);
|
788
|
+
goto error;
|
789
|
+
}
|
790
|
+
|
510
791
|
break;
|
511
792
|
}
|
512
793
|
|
513
794
|
/* first digit of minor HTTP version */
|
514
795
|
case s_res_first_http_minor:
|
515
|
-
if (ch
|
796
|
+
if (!IS_NUM(ch)) {
|
797
|
+
SET_ERRNO(HPE_INVALID_VERSION);
|
798
|
+
goto error;
|
799
|
+
}
|
800
|
+
|
516
801
|
parser->http_minor = ch - '0';
|
517
|
-
state = s_res_http_minor;
|
802
|
+
parser->state = s_res_http_minor;
|
518
803
|
break;
|
519
804
|
|
520
805
|
/* minor HTTP version or end of request line */
|
521
806
|
case s_res_http_minor:
|
522
807
|
{
|
523
808
|
if (ch == ' ') {
|
524
|
-
state = s_res_first_status_code;
|
809
|
+
parser->state = s_res_first_status_code;
|
525
810
|
break;
|
526
811
|
}
|
527
812
|
|
528
|
-
if (ch
|
813
|
+
if (!IS_NUM(ch)) {
|
814
|
+
SET_ERRNO(HPE_INVALID_VERSION);
|
815
|
+
goto error;
|
816
|
+
}
|
529
817
|
|
530
818
|
parser->http_minor *= 10;
|
531
819
|
parser->http_minor += ch - '0';
|
532
820
|
|
533
|
-
if (parser->http_minor > 999)
|
821
|
+
if (parser->http_minor > 999) {
|
822
|
+
SET_ERRNO(HPE_INVALID_VERSION);
|
823
|
+
goto error;
|
824
|
+
}
|
825
|
+
|
534
826
|
break;
|
535
827
|
}
|
536
828
|
|
537
829
|
case s_res_first_status_code:
|
538
830
|
{
|
539
|
-
if (ch
|
831
|
+
if (!IS_NUM(ch)) {
|
540
832
|
if (ch == ' ') {
|
541
833
|
break;
|
542
834
|
}
|
835
|
+
|
836
|
+
SET_ERRNO(HPE_INVALID_STATUS);
|
543
837
|
goto error;
|
544
838
|
}
|
545
839
|
parser->status_code = ch - '0';
|
546
|
-
state = s_res_status_code;
|
840
|
+
parser->state = s_res_status_code;
|
547
841
|
break;
|
548
842
|
}
|
549
843
|
|
550
844
|
case s_res_status_code:
|
551
845
|
{
|
552
|
-
if (ch
|
846
|
+
if (!IS_NUM(ch)) {
|
553
847
|
switch (ch) {
|
554
848
|
case ' ':
|
555
|
-
state = s_res_status;
|
849
|
+
parser->state = s_res_status;
|
556
850
|
break;
|
557
851
|
case CR:
|
558
|
-
state = s_res_line_almost_done;
|
852
|
+
parser->state = s_res_line_almost_done;
|
559
853
|
break;
|
560
854
|
case LF:
|
561
|
-
state = s_header_field_start;
|
855
|
+
parser->state = s_header_field_start;
|
562
856
|
break;
|
563
857
|
default:
|
858
|
+
SET_ERRNO(HPE_INVALID_STATUS);
|
564
859
|
goto error;
|
565
860
|
}
|
566
861
|
break;
|
@@ -569,7 +864,11 @@ size_t http_parser_execute (http_parser *parser,
|
|
569
864
|
parser->status_code *= 10;
|
570
865
|
parser->status_code += ch - '0';
|
571
866
|
|
572
|
-
if (parser->status_code > 999)
|
867
|
+
if (parser->status_code > 999) {
|
868
|
+
SET_ERRNO(HPE_INVALID_STATUS);
|
869
|
+
goto error;
|
870
|
+
}
|
871
|
+
|
573
872
|
break;
|
574
873
|
}
|
575
874
|
|
@@ -577,19 +876,19 @@ size_t http_parser_execute (http_parser *parser,
|
|
577
876
|
/* the human readable status. e.g. "NOT FOUND"
|
578
877
|
* we are not humans so just ignore this */
|
579
878
|
if (ch == CR) {
|
580
|
-
state = s_res_line_almost_done;
|
879
|
+
parser->state = s_res_line_almost_done;
|
581
880
|
break;
|
582
881
|
}
|
583
882
|
|
584
883
|
if (ch == LF) {
|
585
|
-
state = s_header_field_start;
|
884
|
+
parser->state = s_header_field_start;
|
586
885
|
break;
|
587
886
|
}
|
588
887
|
break;
|
589
888
|
|
590
889
|
case s_res_line_almost_done:
|
591
890
|
STRICT_CHECK(ch != LF);
|
592
|
-
state = s_header_field_start;
|
891
|
+
parser->state = s_header_field_start;
|
593
892
|
break;
|
594
893
|
|
595
894
|
case s_start_req:
|
@@ -597,15 +896,15 @@ size_t http_parser_execute (http_parser *parser,
|
|
597
896
|
if (ch == CR || ch == LF)
|
598
897
|
break;
|
599
898
|
parser->flags = 0;
|
600
|
-
parser->content_length =
|
899
|
+
parser->content_length = ULLONG_MAX;
|
601
900
|
|
602
|
-
|
603
|
-
|
604
|
-
|
901
|
+
if (!IS_ALPHA(ch)) {
|
902
|
+
SET_ERRNO(HPE_INVALID_METHOD);
|
903
|
+
goto error;
|
904
|
+
}
|
605
905
|
|
606
|
-
start_req_method_assign:
|
607
906
|
parser->method = (enum http_method) 0;
|
608
|
-
index = 1;
|
907
|
+
parser->index = 1;
|
609
908
|
switch (ch) {
|
610
909
|
case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
|
611
910
|
case 'D': parser->method = HTTP_DELETE; break;
|
@@ -615,342 +914,157 @@ size_t http_parser_execute (http_parser *parser,
|
|
615
914
|
case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
|
616
915
|
case 'N': parser->method = HTTP_NOTIFY; break;
|
617
916
|
case 'O': parser->method = HTTP_OPTIONS; break;
|
618
|
-
case 'P': parser->method = HTTP_POST;
|
917
|
+
case 'P': parser->method = HTTP_POST;
|
918
|
+
/* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
|
919
|
+
break;
|
619
920
|
case 'R': parser->method = HTTP_REPORT; break;
|
620
921
|
case 'S': parser->method = HTTP_SUBSCRIBE; break;
|
621
922
|
case 'T': parser->method = HTTP_TRACE; break;
|
622
923
|
case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
|
623
|
-
default:
|
924
|
+
default:
|
925
|
+
SET_ERRNO(HPE_INVALID_METHOD);
|
926
|
+
goto error;
|
624
927
|
}
|
625
|
-
state = s_req_method;
|
928
|
+
parser->state = s_req_method;
|
929
|
+
|
930
|
+
CALLBACK_NOTIFY(message_begin);
|
931
|
+
|
626
932
|
break;
|
627
933
|
}
|
628
934
|
|
629
935
|
case s_req_method:
|
630
936
|
{
|
631
|
-
|
937
|
+
const char *matcher;
|
938
|
+
if (ch == '\0') {
|
939
|
+
SET_ERRNO(HPE_INVALID_METHOD);
|
632
940
|
goto error;
|
941
|
+
}
|
633
942
|
|
634
|
-
|
635
|
-
if (ch == ' ' && matcher[index] == '\0') {
|
636
|
-
state = s_req_spaces_before_url;
|
637
|
-
} else if (ch == matcher[index]) {
|
943
|
+
matcher = method_strings[parser->method];
|
944
|
+
if (ch == ' ' && matcher[parser->index] == '\0') {
|
945
|
+
parser->state = s_req_spaces_before_url;
|
946
|
+
} else if (ch == matcher[parser->index]) {
|
638
947
|
; /* nada */
|
639
948
|
} else if (parser->method == HTTP_CONNECT) {
|
640
|
-
if (index == 1 && ch == 'H') {
|
949
|
+
if (parser->index == 1 && ch == 'H') {
|
641
950
|
parser->method = HTTP_CHECKOUT;
|
642
|
-
} else if (index == 2 && ch == 'P') {
|
951
|
+
} else if (parser->index == 2 && ch == 'P') {
|
643
952
|
parser->method = HTTP_COPY;
|
953
|
+
} else {
|
954
|
+
goto error;
|
644
955
|
}
|
645
956
|
} else if (parser->method == HTTP_MKCOL) {
|
646
|
-
if (index == 1 && ch == 'O') {
|
957
|
+
if (parser->index == 1 && ch == 'O') {
|
647
958
|
parser->method = HTTP_MOVE;
|
648
|
-
} else if (index == 1 && ch == 'E') {
|
959
|
+
} else if (parser->index == 1 && ch == 'E') {
|
649
960
|
parser->method = HTTP_MERGE;
|
650
|
-
} else if (index == 1 && ch == '-') {
|
961
|
+
} else if (parser->index == 1 && ch == '-') {
|
651
962
|
parser->method = HTTP_MSEARCH;
|
652
|
-
} else if (index == 2 && ch == 'A') {
|
963
|
+
} else if (parser->index == 2 && ch == 'A') {
|
653
964
|
parser->method = HTTP_MKACTIVITY;
|
965
|
+
} else {
|
966
|
+
goto error;
|
654
967
|
}
|
655
|
-
} else if (index == 1 && parser->method == HTTP_POST
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
|
968
|
+
} else if (parser->index == 1 && parser->method == HTTP_POST) {
|
969
|
+
if (ch == 'R') {
|
970
|
+
parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
|
971
|
+
} else if (ch == 'U') {
|
972
|
+
parser->method = HTTP_PUT; /* or HTTP_PURGE */
|
973
|
+
} else if (ch == 'A') {
|
974
|
+
parser->method = HTTP_PATCH;
|
975
|
+
} else {
|
976
|
+
goto error;
|
977
|
+
}
|
978
|
+
} else if (parser->index == 2) {
|
979
|
+
if (parser->method == HTTP_PUT) {
|
980
|
+
if (ch == 'R') parser->method = HTTP_PURGE;
|
981
|
+
} else if (parser->method == HTTP_UNLOCK) {
|
982
|
+
if (ch == 'S') parser->method = HTTP_UNSUBSCRIBE;
|
983
|
+
}
|
984
|
+
} else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
|
662
985
|
parser->method = HTTP_PROPPATCH;
|
663
986
|
} else {
|
987
|
+
SET_ERRNO(HPE_INVALID_METHOD);
|
664
988
|
goto error;
|
665
989
|
}
|
666
990
|
|
667
|
-
++index;
|
991
|
+
++parser->index;
|
668
992
|
break;
|
669
993
|
}
|
994
|
+
|
670
995
|
case s_req_spaces_before_url:
|
671
996
|
{
|
672
997
|
if (ch == ' ') break;
|
673
998
|
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
state = s_req_path;
|
678
|
-
break;
|
999
|
+
MARK(url);
|
1000
|
+
if (parser->method == HTTP_CONNECT) {
|
1001
|
+
parser->state = s_req_host_start;
|
679
1002
|
}
|
680
1003
|
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
state = s_req_schema;
|
686
|
-
break;
|
1004
|
+
parser->state = parse_url_char((enum state)parser->state, ch);
|
1005
|
+
if (parser->state == s_dead) {
|
1006
|
+
SET_ERRNO(HPE_INVALID_URL);
|
1007
|
+
goto error;
|
687
1008
|
}
|
688
1009
|
|
689
|
-
|
1010
|
+
break;
|
690
1011
|
}
|
691
1012
|
|
692
1013
|
case s_req_schema:
|
693
|
-
{
|
694
|
-
c = LOWER(ch);
|
695
|
-
|
696
|
-
if (c >= 'a' && c <= 'z') break;
|
697
|
-
|
698
|
-
if (ch == ':') {
|
699
|
-
state = s_req_schema_slash;
|
700
|
-
break;
|
701
|
-
} else if (ch == '.') {
|
702
|
-
state = s_req_host;
|
703
|
-
break;
|
704
|
-
} else if ('0' <= ch && ch <= '9') {
|
705
|
-
state = s_req_host;
|
706
|
-
break;
|
707
|
-
}
|
708
|
-
|
709
|
-
goto error;
|
710
|
-
}
|
711
|
-
|
712
1014
|
case s_req_schema_slash:
|
713
|
-
STRICT_CHECK(ch != '/');
|
714
|
-
state = s_req_schema_slash_slash;
|
715
|
-
break;
|
716
|
-
|
717
1015
|
case s_req_schema_slash_slash:
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
case s_req_host:
|
723
|
-
{
|
724
|
-
c = LOWER(ch);
|
725
|
-
if (c >= 'a' && c <= 'z') break;
|
726
|
-
if ((ch >= '0' && ch <= '9') || ch == '.' || ch == '-') break;
|
727
|
-
switch (ch) {
|
728
|
-
case ':':
|
729
|
-
state = s_req_port;
|
730
|
-
break;
|
731
|
-
case '/':
|
732
|
-
MARK(path);
|
733
|
-
state = s_req_path;
|
734
|
-
break;
|
735
|
-
case ' ':
|
736
|
-
/* The request line looks like:
|
737
|
-
* "GET http://foo.bar.com HTTP/1.1"
|
738
|
-
* That is, there is no path.
|
739
|
-
*/
|
740
|
-
CALLBACK(url);
|
741
|
-
state = s_req_http_start;
|
742
|
-
break;
|
743
|
-
case '?':
|
744
|
-
state = s_req_query_string_start;
|
745
|
-
break;
|
746
|
-
default:
|
747
|
-
goto error;
|
748
|
-
}
|
749
|
-
break;
|
750
|
-
}
|
751
|
-
|
752
|
-
case s_req_port:
|
1016
|
+
case s_req_host_start:
|
1017
|
+
case s_req_host_v6_start:
|
1018
|
+
case s_req_host_v6:
|
1019
|
+
case s_req_port_start:
|
753
1020
|
{
|
754
|
-
if (ch >= '0' && ch <= '9') break;
|
755
1021
|
switch (ch) {
|
756
|
-
|
757
|
-
MARK(path);
|
758
|
-
state = s_req_path;
|
759
|
-
break;
|
1022
|
+
/* No whitespace allowed here */
|
760
1023
|
case ' ':
|
761
|
-
/* The request line looks like:
|
762
|
-
* "GET http://foo.bar.com:1234 HTTP/1.1"
|
763
|
-
* That is, there is no path.
|
764
|
-
*/
|
765
|
-
CALLBACK(url);
|
766
|
-
state = s_req_http_start;
|
767
|
-
break;
|
768
|
-
case '?':
|
769
|
-
state = s_req_query_string_start;
|
770
|
-
break;
|
771
|
-
default:
|
772
|
-
goto error;
|
773
|
-
}
|
774
|
-
break;
|
775
|
-
}
|
776
|
-
|
777
|
-
case s_req_path:
|
778
|
-
{
|
779
|
-
if (normal_url_char[(unsigned char)ch]) break;
|
780
|
-
|
781
|
-
switch (ch) {
|
782
|
-
case ' ':
|
783
|
-
CALLBACK(url);
|
784
|
-
CALLBACK(path);
|
785
|
-
state = s_req_http_start;
|
786
|
-
break;
|
787
1024
|
case CR:
|
788
|
-
CALLBACK(url);
|
789
|
-
CALLBACK(path);
|
790
|
-
parser->http_major = 0;
|
791
|
-
parser->http_minor = 9;
|
792
|
-
state = s_req_line_almost_done;
|
793
|
-
break;
|
794
1025
|
case LF:
|
795
|
-
|
796
|
-
CALLBACK(path);
|
797
|
-
parser->http_major = 0;
|
798
|
-
parser->http_minor = 9;
|
799
|
-
state = s_header_field_start;
|
800
|
-
break;
|
801
|
-
case '?':
|
802
|
-
CALLBACK(path);
|
803
|
-
state = s_req_query_string_start;
|
804
|
-
break;
|
805
|
-
case '#':
|
806
|
-
CALLBACK(path);
|
807
|
-
state = s_req_fragment_start;
|
808
|
-
break;
|
809
|
-
default:
|
1026
|
+
SET_ERRNO(HPE_INVALID_URL);
|
810
1027
|
goto error;
|
811
|
-
}
|
812
|
-
break;
|
813
|
-
}
|
814
|
-
|
815
|
-
case s_req_query_string_start:
|
816
|
-
{
|
817
|
-
if (normal_url_char[(unsigned char)ch]) {
|
818
|
-
MARK(query_string);
|
819
|
-
state = s_req_query_string;
|
820
|
-
break;
|
821
|
-
}
|
822
|
-
|
823
|
-
switch (ch) {
|
824
|
-
case '?':
|
825
|
-
break; /* XXX ignore extra '?' ... is this right? */
|
826
|
-
case ' ':
|
827
|
-
CALLBACK(url);
|
828
|
-
state = s_req_http_start;
|
829
|
-
break;
|
830
|
-
case CR:
|
831
|
-
CALLBACK(url);
|
832
|
-
parser->http_major = 0;
|
833
|
-
parser->http_minor = 9;
|
834
|
-
state = s_req_line_almost_done;
|
835
|
-
break;
|
836
|
-
case LF:
|
837
|
-
CALLBACK(url);
|
838
|
-
parser->http_major = 0;
|
839
|
-
parser->http_minor = 9;
|
840
|
-
state = s_header_field_start;
|
841
|
-
break;
|
842
|
-
case '#':
|
843
|
-
state = s_req_fragment_start;
|
844
|
-
break;
|
845
1028
|
default:
|
846
|
-
|
1029
|
+
parser->state = parse_url_char((enum state)parser->state, ch);
|
1030
|
+
if (parser->state == s_dead) {
|
1031
|
+
SET_ERRNO(HPE_INVALID_URL);
|
1032
|
+
goto error;
|
1033
|
+
}
|
847
1034
|
}
|
848
|
-
break;
|
849
|
-
}
|
850
1035
|
|
851
|
-
case s_req_query_string:
|
852
|
-
{
|
853
|
-
if (normal_url_char[(unsigned char)ch]) break;
|
854
|
-
|
855
|
-
switch (ch) {
|
856
|
-
case '?':
|
857
|
-
/* allow extra '?' in query string */
|
858
|
-
break;
|
859
|
-
case ' ':
|
860
|
-
CALLBACK(url);
|
861
|
-
CALLBACK(query_string);
|
862
|
-
state = s_req_http_start;
|
863
|
-
break;
|
864
|
-
case CR:
|
865
|
-
CALLBACK(url);
|
866
|
-
CALLBACK(query_string);
|
867
|
-
parser->http_major = 0;
|
868
|
-
parser->http_minor = 9;
|
869
|
-
state = s_req_line_almost_done;
|
870
|
-
break;
|
871
|
-
case LF:
|
872
|
-
CALLBACK(url);
|
873
|
-
CALLBACK(query_string);
|
874
|
-
parser->http_major = 0;
|
875
|
-
parser->http_minor = 9;
|
876
|
-
state = s_header_field_start;
|
877
|
-
break;
|
878
|
-
case '#':
|
879
|
-
CALLBACK(query_string);
|
880
|
-
state = s_req_fragment_start;
|
881
|
-
break;
|
882
|
-
default:
|
883
|
-
goto error;
|
884
|
-
}
|
885
1036
|
break;
|
886
1037
|
}
|
887
1038
|
|
1039
|
+
case s_req_host:
|
1040
|
+
case s_req_host_v6_end:
|
1041
|
+
case s_req_port:
|
1042
|
+
case s_req_path:
|
1043
|
+
case s_req_query_string_start:
|
1044
|
+
case s_req_query_string:
|
888
1045
|
case s_req_fragment_start:
|
889
|
-
{
|
890
|
-
if (normal_url_char[(unsigned char)ch]) {
|
891
|
-
MARK(fragment);
|
892
|
-
state = s_req_fragment;
|
893
|
-
break;
|
894
|
-
}
|
895
|
-
|
896
|
-
switch (ch) {
|
897
|
-
case ' ':
|
898
|
-
CALLBACK(url);
|
899
|
-
state = s_req_http_start;
|
900
|
-
break;
|
901
|
-
case CR:
|
902
|
-
CALLBACK(url);
|
903
|
-
parser->http_major = 0;
|
904
|
-
parser->http_minor = 9;
|
905
|
-
state = s_req_line_almost_done;
|
906
|
-
break;
|
907
|
-
case LF:
|
908
|
-
CALLBACK(url);
|
909
|
-
parser->http_major = 0;
|
910
|
-
parser->http_minor = 9;
|
911
|
-
state = s_header_field_start;
|
912
|
-
break;
|
913
|
-
case '?':
|
914
|
-
MARK(fragment);
|
915
|
-
state = s_req_fragment;
|
916
|
-
break;
|
917
|
-
case '#':
|
918
|
-
break;
|
919
|
-
default:
|
920
|
-
goto error;
|
921
|
-
}
|
922
|
-
break;
|
923
|
-
}
|
924
|
-
|
925
1046
|
case s_req_fragment:
|
926
1047
|
{
|
927
|
-
if (normal_url_char[(unsigned char)ch]) break;
|
928
|
-
|
929
1048
|
switch (ch) {
|
930
1049
|
case ' ':
|
931
|
-
|
932
|
-
|
933
|
-
state = s_req_http_start;
|
1050
|
+
parser->state = s_req_http_start;
|
1051
|
+
CALLBACK_DATA(url);
|
934
1052
|
break;
|
935
1053
|
case CR:
|
936
|
-
CALLBACK(url);
|
937
|
-
CALLBACK(fragment);
|
938
|
-
parser->http_major = 0;
|
939
|
-
parser->http_minor = 9;
|
940
|
-
state = s_req_line_almost_done;
|
941
|
-
break;
|
942
1054
|
case LF:
|
943
|
-
CALLBACK(url);
|
944
|
-
CALLBACK(fragment);
|
945
1055
|
parser->http_major = 0;
|
946
1056
|
parser->http_minor = 9;
|
947
|
-
state =
|
948
|
-
|
949
|
-
|
950
|
-
|
1057
|
+
parser->state = (ch == CR) ?
|
1058
|
+
s_req_line_almost_done :
|
1059
|
+
s_header_field_start;
|
1060
|
+
CALLBACK_DATA(url);
|
951
1061
|
break;
|
952
1062
|
default:
|
953
|
-
|
1063
|
+
parser->state = parse_url_char((enum state)parser->state, ch);
|
1064
|
+
if (parser->state == s_dead) {
|
1065
|
+
SET_ERRNO(HPE_INVALID_URL);
|
1066
|
+
goto error;
|
1067
|
+
}
|
954
1068
|
}
|
955
1069
|
break;
|
956
1070
|
}
|
@@ -958,140 +1072,170 @@ size_t http_parser_execute (http_parser *parser,
|
|
958
1072
|
case s_req_http_start:
|
959
1073
|
switch (ch) {
|
960
1074
|
case 'H':
|
961
|
-
state = s_req_http_H;
|
1075
|
+
parser->state = s_req_http_H;
|
962
1076
|
break;
|
963
1077
|
case ' ':
|
964
1078
|
break;
|
965
1079
|
default:
|
1080
|
+
SET_ERRNO(HPE_INVALID_CONSTANT);
|
966
1081
|
goto error;
|
967
1082
|
}
|
968
1083
|
break;
|
969
1084
|
|
970
1085
|
case s_req_http_H:
|
971
1086
|
STRICT_CHECK(ch != 'T');
|
972
|
-
state = s_req_http_HT;
|
1087
|
+
parser->state = s_req_http_HT;
|
973
1088
|
break;
|
974
1089
|
|
975
1090
|
case s_req_http_HT:
|
976
1091
|
STRICT_CHECK(ch != 'T');
|
977
|
-
state = s_req_http_HTT;
|
1092
|
+
parser->state = s_req_http_HTT;
|
978
1093
|
break;
|
979
1094
|
|
980
1095
|
case s_req_http_HTT:
|
981
1096
|
STRICT_CHECK(ch != 'P');
|
982
|
-
state = s_req_http_HTTP;
|
1097
|
+
parser->state = s_req_http_HTTP;
|
983
1098
|
break;
|
984
1099
|
|
985
1100
|
case s_req_http_HTTP:
|
986
1101
|
STRICT_CHECK(ch != '/');
|
987
|
-
state = s_req_first_http_major;
|
1102
|
+
parser->state = s_req_first_http_major;
|
988
1103
|
break;
|
989
1104
|
|
990
1105
|
/* first digit of major HTTP version */
|
991
1106
|
case s_req_first_http_major:
|
992
|
-
if (ch < '1' || ch > '9')
|
1107
|
+
if (ch < '1' || ch > '9') {
|
1108
|
+
SET_ERRNO(HPE_INVALID_VERSION);
|
1109
|
+
goto error;
|
1110
|
+
}
|
1111
|
+
|
993
1112
|
parser->http_major = ch - '0';
|
994
|
-
state = s_req_http_major;
|
1113
|
+
parser->state = s_req_http_major;
|
995
1114
|
break;
|
996
1115
|
|
997
1116
|
/* major HTTP version or dot */
|
998
1117
|
case s_req_http_major:
|
999
1118
|
{
|
1000
1119
|
if (ch == '.') {
|
1001
|
-
state = s_req_first_http_minor;
|
1120
|
+
parser->state = s_req_first_http_minor;
|
1002
1121
|
break;
|
1003
1122
|
}
|
1004
1123
|
|
1005
|
-
if (ch
|
1124
|
+
if (!IS_NUM(ch)) {
|
1125
|
+
SET_ERRNO(HPE_INVALID_VERSION);
|
1126
|
+
goto error;
|
1127
|
+
}
|
1006
1128
|
|
1007
1129
|
parser->http_major *= 10;
|
1008
1130
|
parser->http_major += ch - '0';
|
1009
1131
|
|
1010
|
-
if (parser->http_major > 999)
|
1132
|
+
if (parser->http_major > 999) {
|
1133
|
+
SET_ERRNO(HPE_INVALID_VERSION);
|
1134
|
+
goto error;
|
1135
|
+
}
|
1136
|
+
|
1011
1137
|
break;
|
1012
1138
|
}
|
1013
1139
|
|
1014
1140
|
/* first digit of minor HTTP version */
|
1015
1141
|
case s_req_first_http_minor:
|
1016
|
-
if (ch
|
1142
|
+
if (!IS_NUM(ch)) {
|
1143
|
+
SET_ERRNO(HPE_INVALID_VERSION);
|
1144
|
+
goto error;
|
1145
|
+
}
|
1146
|
+
|
1017
1147
|
parser->http_minor = ch - '0';
|
1018
|
-
state = s_req_http_minor;
|
1148
|
+
parser->state = s_req_http_minor;
|
1019
1149
|
break;
|
1020
1150
|
|
1021
1151
|
/* minor HTTP version or end of request line */
|
1022
1152
|
case s_req_http_minor:
|
1023
1153
|
{
|
1024
1154
|
if (ch == CR) {
|
1025
|
-
state = s_req_line_almost_done;
|
1155
|
+
parser->state = s_req_line_almost_done;
|
1026
1156
|
break;
|
1027
1157
|
}
|
1028
1158
|
|
1029
1159
|
if (ch == LF) {
|
1030
|
-
state = s_header_field_start;
|
1160
|
+
parser->state = s_header_field_start;
|
1031
1161
|
break;
|
1032
1162
|
}
|
1033
1163
|
|
1034
1164
|
/* XXX allow spaces after digit? */
|
1035
1165
|
|
1036
|
-
if (ch
|
1166
|
+
if (!IS_NUM(ch)) {
|
1167
|
+
SET_ERRNO(HPE_INVALID_VERSION);
|
1168
|
+
goto error;
|
1169
|
+
}
|
1037
1170
|
|
1038
1171
|
parser->http_minor *= 10;
|
1039
1172
|
parser->http_minor += ch - '0';
|
1040
1173
|
|
1041
|
-
if (parser->http_minor > 999)
|
1174
|
+
if (parser->http_minor > 999) {
|
1175
|
+
SET_ERRNO(HPE_INVALID_VERSION);
|
1176
|
+
goto error;
|
1177
|
+
}
|
1178
|
+
|
1042
1179
|
break;
|
1043
1180
|
}
|
1044
1181
|
|
1045
1182
|
/* end of request line */
|
1046
1183
|
case s_req_line_almost_done:
|
1047
1184
|
{
|
1048
|
-
if (ch != LF)
|
1049
|
-
|
1185
|
+
if (ch != LF) {
|
1186
|
+
SET_ERRNO(HPE_LF_EXPECTED);
|
1187
|
+
goto error;
|
1188
|
+
}
|
1189
|
+
|
1190
|
+
parser->state = s_header_field_start;
|
1050
1191
|
break;
|
1051
1192
|
}
|
1052
1193
|
|
1053
1194
|
case s_header_field_start:
|
1054
1195
|
{
|
1055
1196
|
if (ch == CR) {
|
1056
|
-
state = s_headers_almost_done;
|
1197
|
+
parser->state = s_headers_almost_done;
|
1057
1198
|
break;
|
1058
1199
|
}
|
1059
1200
|
|
1060
1201
|
if (ch == LF) {
|
1061
1202
|
/* they might be just sending \n instead of \r\n so this would be
|
1062
1203
|
* the second \n to denote the end of headers*/
|
1063
|
-
state = s_headers_almost_done;
|
1064
|
-
goto
|
1204
|
+
parser->state = s_headers_almost_done;
|
1205
|
+
goto reexecute_byte;
|
1065
1206
|
}
|
1066
1207
|
|
1067
1208
|
c = TOKEN(ch);
|
1068
1209
|
|
1069
|
-
if (!c)
|
1210
|
+
if (!c) {
|
1211
|
+
SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
|
1212
|
+
goto error;
|
1213
|
+
}
|
1070
1214
|
|
1071
1215
|
MARK(header_field);
|
1072
1216
|
|
1073
|
-
index = 0;
|
1074
|
-
state = s_header_field;
|
1217
|
+
parser->index = 0;
|
1218
|
+
parser->state = s_header_field;
|
1075
1219
|
|
1076
1220
|
switch (c) {
|
1077
1221
|
case 'c':
|
1078
|
-
header_state = h_C;
|
1222
|
+
parser->header_state = h_C;
|
1079
1223
|
break;
|
1080
1224
|
|
1081
1225
|
case 'p':
|
1082
|
-
header_state = h_matching_proxy_connection;
|
1226
|
+
parser->header_state = h_matching_proxy_connection;
|
1083
1227
|
break;
|
1084
1228
|
|
1085
1229
|
case 't':
|
1086
|
-
header_state = h_matching_transfer_encoding;
|
1230
|
+
parser->header_state = h_matching_transfer_encoding;
|
1087
1231
|
break;
|
1088
1232
|
|
1089
1233
|
case 'u':
|
1090
|
-
header_state = h_matching_upgrade;
|
1234
|
+
parser->header_state = h_matching_upgrade;
|
1091
1235
|
break;
|
1092
1236
|
|
1093
1237
|
default:
|
1094
|
-
header_state = h_general;
|
1238
|
+
parser->header_state = h_general;
|
1095
1239
|
break;
|
1096
1240
|
}
|
1097
1241
|
break;
|
@@ -1102,31 +1246,31 @@ size_t http_parser_execute (http_parser *parser,
|
|
1102
1246
|
c = TOKEN(ch);
|
1103
1247
|
|
1104
1248
|
if (c) {
|
1105
|
-
switch (header_state) {
|
1249
|
+
switch (parser->header_state) {
|
1106
1250
|
case h_general:
|
1107
1251
|
break;
|
1108
1252
|
|
1109
1253
|
case h_C:
|
1110
|
-
index++;
|
1111
|
-
header_state = (c == 'o' ? h_CO : h_general);
|
1254
|
+
parser->index++;
|
1255
|
+
parser->header_state = (c == 'o' ? h_CO : h_general);
|
1112
1256
|
break;
|
1113
1257
|
|
1114
1258
|
case h_CO:
|
1115
|
-
index++;
|
1116
|
-
header_state = (c == 'n' ? h_CON : h_general);
|
1259
|
+
parser->index++;
|
1260
|
+
parser->header_state = (c == 'n' ? h_CON : h_general);
|
1117
1261
|
break;
|
1118
1262
|
|
1119
1263
|
case h_CON:
|
1120
|
-
index++;
|
1264
|
+
parser->index++;
|
1121
1265
|
switch (c) {
|
1122
1266
|
case 'n':
|
1123
|
-
header_state = h_matching_connection;
|
1267
|
+
parser->header_state = h_matching_connection;
|
1124
1268
|
break;
|
1125
1269
|
case 't':
|
1126
|
-
header_state = h_matching_content_length;
|
1270
|
+
parser->header_state = h_matching_content_length;
|
1127
1271
|
break;
|
1128
1272
|
default:
|
1129
|
-
header_state = h_general;
|
1273
|
+
parser->header_state = h_general;
|
1130
1274
|
break;
|
1131
1275
|
}
|
1132
1276
|
break;
|
@@ -1134,60 +1278,60 @@ size_t http_parser_execute (http_parser *parser,
|
|
1134
1278
|
/* connection */
|
1135
1279
|
|
1136
1280
|
case h_matching_connection:
|
1137
|
-
index++;
|
1138
|
-
if (index > sizeof(CONNECTION)-1
|
1139
|
-
|| c != CONNECTION[index]) {
|
1140
|
-
header_state = h_general;
|
1141
|
-
} else if (index == sizeof(CONNECTION)-2) {
|
1142
|
-
header_state = h_connection;
|
1281
|
+
parser->index++;
|
1282
|
+
if (parser->index > sizeof(CONNECTION)-1
|
1283
|
+
|| c != CONNECTION[parser->index]) {
|
1284
|
+
parser->header_state = h_general;
|
1285
|
+
} else if (parser->index == sizeof(CONNECTION)-2) {
|
1286
|
+
parser->header_state = h_connection;
|
1143
1287
|
}
|
1144
1288
|
break;
|
1145
1289
|
|
1146
1290
|
/* proxy-connection */
|
1147
1291
|
|
1148
1292
|
case h_matching_proxy_connection:
|
1149
|
-
index++;
|
1150
|
-
if (index > sizeof(PROXY_CONNECTION)-1
|
1151
|
-
|| c != PROXY_CONNECTION[index]) {
|
1152
|
-
header_state = h_general;
|
1153
|
-
} else if (index == sizeof(PROXY_CONNECTION)-2) {
|
1154
|
-
header_state = h_connection;
|
1293
|
+
parser->index++;
|
1294
|
+
if (parser->index > sizeof(PROXY_CONNECTION)-1
|
1295
|
+
|| c != PROXY_CONNECTION[parser->index]) {
|
1296
|
+
parser->header_state = h_general;
|
1297
|
+
} else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
|
1298
|
+
parser->header_state = h_connection;
|
1155
1299
|
}
|
1156
1300
|
break;
|
1157
1301
|
|
1158
1302
|
/* content-length */
|
1159
1303
|
|
1160
1304
|
case h_matching_content_length:
|
1161
|
-
index++;
|
1162
|
-
if (index > sizeof(CONTENT_LENGTH)-1
|
1163
|
-
|| c != CONTENT_LENGTH[index]) {
|
1164
|
-
header_state = h_general;
|
1165
|
-
} else if (index == sizeof(CONTENT_LENGTH)-2) {
|
1166
|
-
header_state = h_content_length;
|
1305
|
+
parser->index++;
|
1306
|
+
if (parser->index > sizeof(CONTENT_LENGTH)-1
|
1307
|
+
|| c != CONTENT_LENGTH[parser->index]) {
|
1308
|
+
parser->header_state = h_general;
|
1309
|
+
} else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
|
1310
|
+
parser->header_state = h_content_length;
|
1167
1311
|
}
|
1168
1312
|
break;
|
1169
1313
|
|
1170
1314
|
/* transfer-encoding */
|
1171
1315
|
|
1172
1316
|
case h_matching_transfer_encoding:
|
1173
|
-
index++;
|
1174
|
-
if (index > sizeof(TRANSFER_ENCODING)-1
|
1175
|
-
|| c != TRANSFER_ENCODING[index]) {
|
1176
|
-
header_state = h_general;
|
1177
|
-
} else if (index == sizeof(TRANSFER_ENCODING)-2) {
|
1178
|
-
header_state = h_transfer_encoding;
|
1317
|
+
parser->index++;
|
1318
|
+
if (parser->index > sizeof(TRANSFER_ENCODING)-1
|
1319
|
+
|| c != TRANSFER_ENCODING[parser->index]) {
|
1320
|
+
parser->header_state = h_general;
|
1321
|
+
} else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
|
1322
|
+
parser->header_state = h_transfer_encoding;
|
1179
1323
|
}
|
1180
1324
|
break;
|
1181
1325
|
|
1182
1326
|
/* upgrade */
|
1183
1327
|
|
1184
1328
|
case h_matching_upgrade:
|
1185
|
-
index++;
|
1186
|
-
if (index > sizeof(UPGRADE)-1
|
1187
|
-
|| c != UPGRADE[index]) {
|
1188
|
-
header_state = h_general;
|
1189
|
-
} else if (index == sizeof(UPGRADE)-2) {
|
1190
|
-
header_state = h_upgrade;
|
1329
|
+
parser->index++;
|
1330
|
+
if (parser->index > sizeof(UPGRADE)-1
|
1331
|
+
|| c != UPGRADE[parser->index]) {
|
1332
|
+
parser->header_state = h_general;
|
1333
|
+
} else if (parser->index == sizeof(UPGRADE)-2) {
|
1334
|
+
parser->header_state = h_upgrade;
|
1191
1335
|
}
|
1192
1336
|
break;
|
1193
1337
|
|
@@ -1195,7 +1339,7 @@ size_t http_parser_execute (http_parser *parser,
|
|
1195
1339
|
case h_content_length:
|
1196
1340
|
case h_transfer_encoding:
|
1197
1341
|
case h_upgrade:
|
1198
|
-
if (ch != ' ') header_state = h_general;
|
1342
|
+
if (ch != ' ') parser->header_state = h_general;
|
1199
1343
|
break;
|
1200
1344
|
|
1201
1345
|
default:
|
@@ -1206,84 +1350,89 @@ size_t http_parser_execute (http_parser *parser,
|
|
1206
1350
|
}
|
1207
1351
|
|
1208
1352
|
if (ch == ':') {
|
1209
|
-
|
1210
|
-
|
1353
|
+
parser->state = s_header_value_start;
|
1354
|
+
CALLBACK_DATA(header_field);
|
1211
1355
|
break;
|
1212
1356
|
}
|
1213
1357
|
|
1214
1358
|
if (ch == CR) {
|
1215
|
-
state = s_header_almost_done;
|
1216
|
-
|
1359
|
+
parser->state = s_header_almost_done;
|
1360
|
+
CALLBACK_DATA(header_field);
|
1217
1361
|
break;
|
1218
1362
|
}
|
1219
1363
|
|
1220
1364
|
if (ch == LF) {
|
1221
|
-
|
1222
|
-
|
1365
|
+
parser->state = s_header_field_start;
|
1366
|
+
CALLBACK_DATA(header_field);
|
1223
1367
|
break;
|
1224
1368
|
}
|
1225
1369
|
|
1370
|
+
SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
|
1226
1371
|
goto error;
|
1227
1372
|
}
|
1228
1373
|
|
1229
1374
|
case s_header_value_start:
|
1230
1375
|
{
|
1231
|
-
if (ch == ' ') break;
|
1376
|
+
if (ch == ' ' || ch == '\t') break;
|
1232
1377
|
|
1233
1378
|
MARK(header_value);
|
1234
1379
|
|
1235
|
-
state = s_header_value;
|
1236
|
-
index = 0;
|
1237
|
-
|
1238
|
-
c = LOWER(ch);
|
1380
|
+
parser->state = s_header_value;
|
1381
|
+
parser->index = 0;
|
1239
1382
|
|
1240
1383
|
if (ch == CR) {
|
1241
|
-
|
1242
|
-
|
1243
|
-
|
1384
|
+
parser->header_state = h_general;
|
1385
|
+
parser->state = s_header_almost_done;
|
1386
|
+
CALLBACK_DATA(header_value);
|
1244
1387
|
break;
|
1245
1388
|
}
|
1246
1389
|
|
1247
1390
|
if (ch == LF) {
|
1248
|
-
|
1249
|
-
|
1391
|
+
parser->state = s_header_field_start;
|
1392
|
+
CALLBACK_DATA(header_value);
|
1250
1393
|
break;
|
1251
1394
|
}
|
1252
1395
|
|
1253
|
-
|
1396
|
+
c = LOWER(ch);
|
1397
|
+
|
1398
|
+
switch (parser->header_state) {
|
1254
1399
|
case h_upgrade:
|
1255
1400
|
parser->flags |= F_UPGRADE;
|
1256
|
-
header_state = h_general;
|
1401
|
+
parser->header_state = h_general;
|
1257
1402
|
break;
|
1258
1403
|
|
1259
1404
|
case h_transfer_encoding:
|
1260
1405
|
/* looking for 'Transfer-Encoding: chunked' */
|
1261
1406
|
if ('c' == c) {
|
1262
|
-
header_state = h_matching_transfer_encoding_chunked;
|
1407
|
+
parser->header_state = h_matching_transfer_encoding_chunked;
|
1263
1408
|
} else {
|
1264
|
-
header_state = h_general;
|
1409
|
+
parser->header_state = h_general;
|
1265
1410
|
}
|
1266
1411
|
break;
|
1267
1412
|
|
1268
1413
|
case h_content_length:
|
1269
|
-
if (ch
|
1414
|
+
if (!IS_NUM(ch)) {
|
1415
|
+
SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
|
1416
|
+
goto error;
|
1417
|
+
}
|
1418
|
+
|
1270
1419
|
parser->content_length = ch - '0';
|
1271
1420
|
break;
|
1272
1421
|
|
1273
1422
|
case h_connection:
|
1274
1423
|
/* looking for 'Connection: keep-alive' */
|
1275
1424
|
if (c == 'k') {
|
1276
|
-
header_state = h_matching_connection_keep_alive;
|
1425
|
+
parser->header_state = h_matching_connection_keep_alive;
|
1277
1426
|
/* looking for 'Connection: close' */
|
1278
1427
|
} else if (c == 'c') {
|
1279
|
-
header_state = h_matching_connection_close;
|
1428
|
+
parser->header_state = h_matching_connection_close;
|
1280
1429
|
} else {
|
1281
|
-
header_state = h_general;
|
1430
|
+
parser->header_state = h_general;
|
1282
1431
|
}
|
1283
1432
|
break;
|
1284
1433
|
|
1285
1434
|
default:
|
1286
|
-
header_state = h_general;
|
1435
|
+
parser->header_state = h_general;
|
1287
1436
|
break;
|
1288
1437
|
}
|
1289
1438
|
break;
|
@@ -1291,20 +1440,22 @@ size_t http_parser_execute (http_parser *parser,
|
|
1291
1440
|
|
1292
1441
|
case s_header_value:
|
1293
1442
|
{
|
1294
|
-
c = LOWER(ch);
|
1295
1443
|
|
1296
1444
|
if (ch == CR) {
|
1297
|
-
|
1298
|
-
|
1445
|
+
parser->state = s_header_almost_done;
|
1446
|
+
CALLBACK_DATA(header_value);
|
1299
1447
|
break;
|
1300
1448
|
}
|
1301
1449
|
|
1302
1450
|
if (ch == LF) {
|
1303
|
-
|
1304
|
-
|
1451
|
+
parser->state = s_header_almost_done;
|
1452
|
+
CALLBACK_DATA_NOADVANCE(header_value);
|
1453
|
+
goto reexecute_byte;
|
1305
1454
|
}
|
1306
1455
|
|
1307
|
-
|
1456
|
+
c = LOWER(ch);
|
1457
|
+
|
1458
|
+
switch (parser->header_state) {
|
1308
1459
|
case h_general:
|
1309
1460
|
break;
|
1310
1461
|
|
@@ -1314,66 +1465,83 @@ size_t http_parser_execute (http_parser *parser,
|
|
1314
1465
|
break;
|
1315
1466
|
|
1316
1467
|
case h_content_length:
|
1468
|
+
{
|
1469
|
+
uint64_t t;
|
1470
|
+
|
1317
1471
|
if (ch == ' ') break;
|
1318
|
-
|
1319
|
-
|
1320
|
-
|
1472
|
+
|
1473
|
+
if (!IS_NUM(ch)) {
|
1474
|
+
SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
|
1475
|
+
goto error;
|
1476
|
+
}
|
1477
|
+
|
1478
|
+
t = parser->content_length;
|
1479
|
+
t *= 10;
|
1480
|
+
t += ch - '0';
|
1481
|
+
|
1482
|
+
/* Overflow? */
|
1483
|
+
if (t < parser->content_length || t == ULLONG_MAX) {
|
1484
|
+
SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
|
1485
|
+
goto error;
|
1486
|
+
}
|
1487
|
+
|
1488
|
+
parser->content_length = t;
|
1321
1489
|
break;
|
1490
|
+
}
|
1322
1491
|
|
1323
1492
|
/* Transfer-Encoding: chunked */
|
1324
1493
|
case h_matching_transfer_encoding_chunked:
|
1325
|
-
index++;
|
1326
|
-
if (index > sizeof(CHUNKED)-1
|
1327
|
-
|| c != CHUNKED[index]) {
|
1328
|
-
header_state = h_general;
|
1329
|
-
} else if (index == sizeof(CHUNKED)-2) {
|
1330
|
-
header_state = h_transfer_encoding_chunked;
|
1494
|
+
parser->index++;
|
1495
|
+
if (parser->index > sizeof(CHUNKED)-1
|
1496
|
+
|| c != CHUNKED[parser->index]) {
|
1497
|
+
parser->header_state = h_general;
|
1498
|
+
} else if (parser->index == sizeof(CHUNKED)-2) {
|
1499
|
+
parser->header_state = h_transfer_encoding_chunked;
|
1331
1500
|
}
|
1332
1501
|
break;
|
1333
1502
|
|
1334
1503
|
/* looking for 'Connection: keep-alive' */
|
1335
1504
|
case h_matching_connection_keep_alive:
|
1336
|
-
index++;
|
1337
|
-
if (index > sizeof(KEEP_ALIVE)-1
|
1338
|
-
|| c != KEEP_ALIVE[index]) {
|
1339
|
-
header_state = h_general;
|
1340
|
-
} else if (index == sizeof(KEEP_ALIVE)-2) {
|
1341
|
-
header_state = h_connection_keep_alive;
|
1505
|
+
parser->index++;
|
1506
|
+
if (parser->index > sizeof(KEEP_ALIVE)-1
|
1507
|
+
|| c != KEEP_ALIVE[parser->index]) {
|
1508
|
+
parser->header_state = h_general;
|
1509
|
+
} else if (parser->index == sizeof(KEEP_ALIVE)-2) {
|
1510
|
+
parser->header_state = h_connection_keep_alive;
|
1342
1511
|
}
|
1343
1512
|
break;
|
1344
1513
|
|
1345
1514
|
/* looking for 'Connection: close' */
|
1346
1515
|
case h_matching_connection_close:
|
1347
|
-
index++;
|
1348
|
-
if (index > sizeof(CLOSE)-1 || c != CLOSE[index]) {
|
1349
|
-
header_state = h_general;
|
1350
|
-
} else if (index == sizeof(CLOSE)-2) {
|
1351
|
-
header_state = h_connection_close;
|
1516
|
+
parser->index++;
|
1517
|
+
if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
|
1518
|
+
parser->header_state = h_general;
|
1519
|
+
} else if (parser->index == sizeof(CLOSE)-2) {
|
1520
|
+
parser->header_state = h_connection_close;
|
1352
1521
|
}
|
1353
1522
|
break;
|
1354
1523
|
|
1355
1524
|
case h_transfer_encoding_chunked:
|
1356
1525
|
case h_connection_keep_alive:
|
1357
1526
|
case h_connection_close:
|
1358
|
-
if (ch != ' ') header_state = h_general;
|
1527
|
+
if (ch != ' ') parser->header_state = h_general;
|
1359
1528
|
break;
|
1360
1529
|
|
1361
1530
|
default:
|
1362
|
-
state = s_header_value;
|
1363
|
-
header_state = h_general;
|
1531
|
+
parser->state = s_header_value;
|
1532
|
+
parser->header_state = h_general;
|
1364
1533
|
break;
|
1365
1534
|
}
|
1366
1535
|
break;
|
1367
1536
|
}
|
1368
1537
|
|
1369
1538
|
case s_header_almost_done:
|
1370
|
-
header_almost_done:
|
1371
1539
|
{
|
1372
1540
|
STRICT_CHECK(ch != LF);
|
1373
1541
|
|
1374
|
-
state =
|
1542
|
+
parser->state = s_header_value_lws;
|
1375
1543
|
|
1376
|
-
switch (header_state) {
|
1544
|
+
switch (parser->header_state) {
|
1377
1545
|
case h_connection_keep_alive:
|
1378
1546
|
parser->flags |= F_CONNECTION_KEEP_ALIVE;
|
1379
1547
|
break;
|
@@ -1386,32 +1554,47 @@ size_t http_parser_execute (http_parser *parser,
|
|
1386
1554
|
default:
|
1387
1555
|
break;
|
1388
1556
|
}
|
1557
|
+
|
1558
|
+
break;
|
1559
|
+
}
|
1560
|
+
|
1561
|
+
case s_header_value_lws:
|
1562
|
+
{
|
1563
|
+
if (ch == ' ' || ch == '\t')
|
1564
|
+
parser->state = s_header_value_start;
|
1565
|
+
else
|
1566
|
+
{
|
1567
|
+
parser->state = s_header_field_start;
|
1568
|
+
goto reexecute_byte;
|
1569
|
+
}
|
1389
1570
|
break;
|
1390
1571
|
}
|
1391
1572
|
|
1392
1573
|
case s_headers_almost_done:
|
1393
|
-
headers_almost_done:
|
1394
1574
|
{
|
1395
1575
|
STRICT_CHECK(ch != LF);
|
1396
1576
|
|
1397
1577
|
if (parser->flags & F_TRAILING) {
|
1398
1578
|
/* End of a chunked request */
|
1399
|
-
|
1400
|
-
|
1579
|
+
parser->state = NEW_MESSAGE();
|
1580
|
+
CALLBACK_NOTIFY(message_complete);
|
1401
1581
|
break;
|
1402
1582
|
}
|
1403
1583
|
|
1404
|
-
|
1584
|
+
parser->state = s_headers_done;
|
1405
1585
|
|
1406
|
-
|
1407
|
-
|
1408
|
-
|
1586
|
+
/* Set this here so that on_headers_complete() callbacks can see it */
|
1587
|
+
parser->upgrade =
|
1588
|
+
(parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT);
|
1409
1589
|
|
1410
1590
|
/* Here we call the headers_complete callback. This is somewhat
|
1411
1591
|
* different than other callbacks because if the user returns 1, we
|
1412
1592
|
* will interpret that as saying that this message has no body. This
|
1413
1593
|
* is needed for the annoying case of recieving a response to a HEAD
|
1414
1594
|
* request.
|
1595
|
+
*
|
1596
|
+
* We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
|
1597
|
+
* we have to simulate it by handling a change in errno below.
|
1415
1598
|
*/
|
1416
1599
|
if (settings->on_headers_complete) {
|
1417
1600
|
switch (settings->on_headers_complete(parser)) {
|
@@ -1423,39 +1606,54 @@ size_t http_parser_execute (http_parser *parser,
|
|
1423
1606
|
break;
|
1424
1607
|
|
1425
1608
|
default:
|
1426
|
-
|
1609
|
+
SET_ERRNO(HPE_CB_headers_complete);
|
1427
1610
|
return p - data; /* Error */
|
1428
1611
|
}
|
1429
1612
|
}
|
1430
1613
|
|
1614
|
+
if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
|
1615
|
+
return p - data;
|
1616
|
+
}
|
1617
|
+
|
1618
|
+
goto reexecute_byte;
|
1619
|
+
}
|
1620
|
+
|
1621
|
+
case s_headers_done:
|
1622
|
+
{
|
1623
|
+
STRICT_CHECK(ch != LF);
|
1624
|
+
|
1625
|
+
parser->nread = 0;
|
1626
|
+
|
1431
1627
|
/* Exit, the rest of the connect is in a different protocol. */
|
1432
1628
|
if (parser->upgrade) {
|
1433
|
-
|
1434
|
-
|
1629
|
+
parser->state = NEW_MESSAGE();
|
1630
|
+
CALLBACK_NOTIFY(message_complete);
|
1631
|
+
return (p - data) + 1;
|
1435
1632
|
}
|
1436
1633
|
|
1437
1634
|
if (parser->flags & F_SKIPBODY) {
|
1438
|
-
|
1439
|
-
|
1635
|
+
parser->state = NEW_MESSAGE();
|
1636
|
+
CALLBACK_NOTIFY(message_complete);
|
1440
1637
|
} else if (parser->flags & F_CHUNKED) {
|
1441
1638
|
/* chunked encoding - ignore Content-Length header */
|
1442
|
-
state = s_chunk_size_start;
|
1639
|
+
parser->state = s_chunk_size_start;
|
1443
1640
|
} else {
|
1444
1641
|
if (parser->content_length == 0) {
|
1445
1642
|
/* Content-Length header given but zero: Content-Length: 0\r\n */
|
1446
|
-
|
1447
|
-
|
1448
|
-
} else if (parser->content_length
|
1643
|
+
parser->state = NEW_MESSAGE();
|
1644
|
+
CALLBACK_NOTIFY(message_complete);
|
1645
|
+
} else if (parser->content_length != ULLONG_MAX) {
|
1449
1646
|
/* Content-Length header given and non-zero */
|
1450
|
-
state = s_body_identity;
|
1647
|
+
parser->state = s_body_identity;
|
1451
1648
|
} else {
|
1452
|
-
if (parser->type == HTTP_REQUEST ||
|
1649
|
+
if (parser->type == HTTP_REQUEST ||
|
1650
|
+
!http_message_needs_eof(parser)) {
|
1453
1651
|
/* Assume content-length 0 - read the next */
|
1454
|
-
|
1455
|
-
|
1652
|
+
parser->state = NEW_MESSAGE();
|
1653
|
+
CALLBACK_NOTIFY(message_complete);
|
1456
1654
|
} else {
|
1457
1655
|
/* Read body until EOF */
|
1458
|
-
state = s_body_identity_eof;
|
1656
|
+
parser->state = s_body_identity_eof;
|
1459
1657
|
}
|
1460
1658
|
}
|
1461
1659
|
}
|
@@ -1464,60 +1662,103 @@ size_t http_parser_execute (http_parser *parser,
|
|
1464
1662
|
}
|
1465
1663
|
|
1466
1664
|
case s_body_identity:
|
1467
|
-
|
1468
|
-
|
1469
|
-
|
1470
|
-
|
1471
|
-
|
1472
|
-
|
1473
|
-
|
1474
|
-
|
1475
|
-
|
1665
|
+
{
|
1666
|
+
uint64_t to_read = MIN(parser->content_length,
|
1667
|
+
(uint64_t) ((data + len) - p));
|
1668
|
+
|
1669
|
+
assert(parser->content_length != 0
|
1670
|
+
&& parser->content_length != ULLONG_MAX);
|
1671
|
+
|
1672
|
+
/* The difference between advancing content_length and p is because
|
1673
|
+
* the latter will automaticaly advance on the next loop iteration.
|
1674
|
+
* Further, if content_length ends up at 0, we want to see the last
|
1675
|
+
* byte again for our message complete callback.
|
1676
|
+
*/
|
1677
|
+
MARK(body);
|
1678
|
+
parser->content_length -= to_read;
|
1679
|
+
p += to_read - 1;
|
1680
|
+
|
1681
|
+
if (parser->content_length == 0) {
|
1682
|
+
parser->state = s_message_done;
|
1683
|
+
|
1684
|
+
/* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
|
1685
|
+
*
|
1686
|
+
* The alternative to doing this is to wait for the next byte to
|
1687
|
+
* trigger the data callback, just as in every other case. The
|
1688
|
+
* problem with this is that this makes it difficult for the test
|
1689
|
+
* harness to distinguish between complete-on-EOF and
|
1690
|
+
* complete-on-length. It's not clear that this distinction is
|
1691
|
+
* important for applications, but let's keep it for now.
|
1692
|
+
*/
|
1693
|
+
CALLBACK_DATA_(body, p - body_mark + 1, p - data);
|
1694
|
+
goto reexecute_byte;
|
1476
1695
|
}
|
1696
|
+
|
1477
1697
|
break;
|
1698
|
+
}
|
1478
1699
|
|
1479
1700
|
/* read until EOF */
|
1480
1701
|
case s_body_identity_eof:
|
1481
|
-
|
1482
|
-
|
1483
|
-
|
1484
|
-
|
1485
|
-
|
1702
|
+
MARK(body);
|
1703
|
+
p = data + len - 1;
|
1704
|
+
|
1705
|
+
break;
|
1706
|
+
|
1707
|
+
case s_message_done:
|
1708
|
+
parser->state = NEW_MESSAGE();
|
1709
|
+
CALLBACK_NOTIFY(message_complete);
|
1486
1710
|
break;
|
1487
1711
|
|
1488
1712
|
case s_chunk_size_start:
|
1489
1713
|
{
|
1490
|
-
assert(nread == 1);
|
1714
|
+
assert(parser->nread == 1);
|
1491
1715
|
assert(parser->flags & F_CHUNKED);
|
1492
1716
|
|
1493
|
-
|
1494
|
-
if (
|
1495
|
-
|
1496
|
-
|
1717
|
+
unhex_val = unhex[(unsigned char)ch];
|
1718
|
+
if (unhex_val == -1) {
|
1719
|
+
SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
|
1720
|
+
goto error;
|
1721
|
+
}
|
1722
|
+
|
1723
|
+
parser->content_length = unhex_val;
|
1724
|
+
parser->state = s_chunk_size;
|
1497
1725
|
break;
|
1498
1726
|
}
|
1499
1727
|
|
1500
1728
|
case s_chunk_size:
|
1501
1729
|
{
|
1730
|
+
uint64_t t;
|
1731
|
+
|
1502
1732
|
assert(parser->flags & F_CHUNKED);
|
1503
1733
|
|
1504
1734
|
if (ch == CR) {
|
1505
|
-
state = s_chunk_size_almost_done;
|
1735
|
+
parser->state = s_chunk_size_almost_done;
|
1506
1736
|
break;
|
1507
1737
|
}
|
1508
1738
|
|
1509
|
-
|
1739
|
+
unhex_val = unhex[(unsigned char)ch];
|
1510
1740
|
|
1511
|
-
if (
|
1741
|
+
if (unhex_val == -1) {
|
1512
1742
|
if (ch == ';' || ch == ' ') {
|
1513
|
-
state = s_chunk_parameters;
|
1743
|
+
parser->state = s_chunk_parameters;
|
1514
1744
|
break;
|
1515
1745
|
}
|
1746
|
+
|
1747
|
+
SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
|
1748
|
+
goto error;
|
1749
|
+
}
|
1750
|
+
|
1751
|
+
t = parser->content_length;
|
1752
|
+
t *= 16;
|
1753
|
+
t += unhex_val;
|
1754
|
+
|
1755
|
+
/* Overflow? */
|
1756
|
+
if (t < parser->content_length || t == ULLONG_MAX) {
|
1757
|
+
SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
|
1516
1758
|
goto error;
|
1517
1759
|
}
|
1518
1760
|
|
1519
|
-
parser->content_length
|
1520
|
-
parser->content_length += c;
|
1761
|
+
parser->content_length = t;
|
1521
1762
|
break;
|
1522
1763
|
}
|
1523
1764
|
|
@@ -1526,7 +1767,7 @@ size_t http_parser_execute (http_parser *parser,
|
|
1526
1767
|
assert(parser->flags & F_CHUNKED);
|
1527
1768
|
/* just ignore this shit. TODO check for overflow */
|
1528
1769
|
if (ch == CR) {
|
1529
|
-
state = s_chunk_size_almost_done;
|
1770
|
+
parser->state = s_chunk_size_almost_done;
|
1530
1771
|
break;
|
1531
1772
|
}
|
1532
1773
|
break;
|
@@ -1537,74 +1778,117 @@ size_t http_parser_execute (http_parser *parser,
|
|
1537
1778
|
assert(parser->flags & F_CHUNKED);
|
1538
1779
|
STRICT_CHECK(ch != LF);
|
1539
1780
|
|
1540
|
-
nread = 0;
|
1781
|
+
parser->nread = 0;
|
1541
1782
|
|
1542
1783
|
if (parser->content_length == 0) {
|
1543
1784
|
parser->flags |= F_TRAILING;
|
1544
|
-
state = s_header_field_start;
|
1785
|
+
parser->state = s_header_field_start;
|
1545
1786
|
} else {
|
1546
|
-
state = s_chunk_data;
|
1787
|
+
parser->state = s_chunk_data;
|
1547
1788
|
}
|
1548
1789
|
break;
|
1549
1790
|
}
|
1550
1791
|
|
1551
1792
|
case s_chunk_data:
|
1552
1793
|
{
|
1553
|
-
|
1794
|
+
uint64_t to_read = MIN(parser->content_length,
|
1795
|
+
(uint64_t) ((data + len) - p));
|
1554
1796
|
|
1555
|
-
|
1797
|
+
assert(parser->flags & F_CHUNKED);
|
1798
|
+
assert(parser->content_length != 0
|
1799
|
+
&& parser->content_length != ULLONG_MAX);
|
1556
1800
|
|
1557
|
-
|
1558
|
-
|
1559
|
-
|
1560
|
-
|
1801
|
+
/* See the explanation in s_body_identity for why the content
|
1802
|
+
* length and data pointers are managed this way.
|
1803
|
+
*/
|
1804
|
+
MARK(body);
|
1805
|
+
parser->content_length -= to_read;
|
1806
|
+
p += to_read - 1;
|
1561
1807
|
|
1562
|
-
if (
|
1563
|
-
state = s_chunk_data_almost_done;
|
1808
|
+
if (parser->content_length == 0) {
|
1809
|
+
parser->state = s_chunk_data_almost_done;
|
1564
1810
|
}
|
1565
1811
|
|
1566
|
-
parser->content_length -= to_read;
|
1567
1812
|
break;
|
1568
1813
|
}
|
1569
1814
|
|
1570
1815
|
case s_chunk_data_almost_done:
|
1571
1816
|
assert(parser->flags & F_CHUNKED);
|
1817
|
+
assert(parser->content_length == 0);
|
1572
1818
|
STRICT_CHECK(ch != CR);
|
1573
|
-
state = s_chunk_data_done;
|
1819
|
+
parser->state = s_chunk_data_done;
|
1820
|
+
CALLBACK_DATA(body);
|
1574
1821
|
break;
|
1575
1822
|
|
1576
1823
|
case s_chunk_data_done:
|
1577
1824
|
assert(parser->flags & F_CHUNKED);
|
1578
1825
|
STRICT_CHECK(ch != LF);
|
1579
|
-
|
1826
|
+
parser->nread = 0;
|
1827
|
+
parser->state = s_chunk_size_start;
|
1580
1828
|
break;
|
1581
1829
|
|
1582
1830
|
default:
|
1583
1831
|
assert(0 && "unhandled state");
|
1832
|
+
SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
|
1584
1833
|
goto error;
|
1585
1834
|
}
|
1586
1835
|
}
|
1587
1836
|
|
1588
|
-
|
1589
|
-
|
1590
|
-
|
1591
|
-
|
1592
|
-
|
1593
|
-
|
1837
|
+
/* Run callbacks for any marks that we have leftover after we ran our of
|
1838
|
+
* bytes. There should be at most one of these set, so it's OK to invoke
|
1839
|
+
* them in series (unset marks will not result in callbacks).
|
1840
|
+
*
|
1841
|
+
* We use the NOADVANCE() variety of callbacks here because 'p' has already
|
1842
|
+
* overflowed 'data' and this allows us to correct for the off-by-one that
|
1843
|
+
* we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
|
1844
|
+
* value that's in-bounds).
|
1845
|
+
*/
|
1594
1846
|
|
1595
|
-
|
1596
|
-
|
1597
|
-
|
1598
|
-
|
1847
|
+
assert(((header_field_mark ? 1 : 0) +
|
1848
|
+
(header_value_mark ? 1 : 0) +
|
1849
|
+
(url_mark ? 1 : 0) +
|
1850
|
+
(body_mark ? 1 : 0)) <= 1);
|
1851
|
+
|
1852
|
+
CALLBACK_DATA_NOADVANCE(header_field);
|
1853
|
+
CALLBACK_DATA_NOADVANCE(header_value);
|
1854
|
+
CALLBACK_DATA_NOADVANCE(url);
|
1855
|
+
CALLBACK_DATA_NOADVANCE(body);
|
1599
1856
|
|
1600
1857
|
return len;
|
1601
1858
|
|
1602
1859
|
error:
|
1603
|
-
parser
|
1860
|
+
if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
|
1861
|
+
SET_ERRNO(HPE_UNKNOWN);
|
1862
|
+
}
|
1863
|
+
|
1604
1864
|
return (p - data);
|
1605
1865
|
}
|
1606
1866
|
|
1607
1867
|
|
1868
|
+
/* Does the parser need to see an EOF to find the end of the message? */
|
1869
|
+
int
|
1870
|
+
http_message_needs_eof (http_parser *parser)
|
1871
|
+
{
|
1872
|
+
if (parser->type == HTTP_REQUEST) {
|
1873
|
+
return 0;
|
1874
|
+
}
|
1875
|
+
|
1876
|
+
/* See RFC 2616 section 4.4 */
|
1877
|
+
if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
|
1878
|
+
parser->status_code == 204 || /* No Content */
|
1879
|
+
parser->status_code == 304 || /* Not Modified */
|
1880
|
+
parser->flags & F_SKIPBODY) { /* response to a HEAD request */
|
1881
|
+
return 0;
|
1882
|
+
}
|
1883
|
+
|
1884
|
+
if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
|
1885
|
+
return 0;
|
1886
|
+
}
|
1887
|
+
|
1888
|
+
return 1;
|
1889
|
+
}
|
1890
|
+
|
1891
|
+
|
1608
1892
|
int
|
1609
1893
|
http_should_keep_alive (http_parser *parser)
|
1610
1894
|
{
|
@@ -1612,17 +1896,15 @@ http_should_keep_alive (http_parser *parser)
|
|
1612
1896
|
/* HTTP/1.1 */
|
1613
1897
|
if (parser->flags & F_CONNECTION_CLOSE) {
|
1614
1898
|
return 0;
|
1615
|
-
} else {
|
1616
|
-
return 1;
|
1617
1899
|
}
|
1618
1900
|
} else {
|
1619
1901
|
/* HTTP/1.0 or earlier */
|
1620
|
-
if (parser->flags & F_CONNECTION_KEEP_ALIVE) {
|
1621
|
-
return 1;
|
1622
|
-
} else {
|
1902
|
+
if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
|
1623
1903
|
return 0;
|
1624
1904
|
}
|
1625
1905
|
}
|
1906
|
+
|
1907
|
+
return !http_message_needs_eof(parser);
|
1626
1908
|
}
|
1627
1909
|
|
1628
1910
|
|
@@ -1635,10 +1917,142 @@ const char * http_method_str (enum http_method m)
|
|
1635
1917
|
void
|
1636
1918
|
http_parser_init (http_parser *parser, enum http_parser_type t)
|
1637
1919
|
{
|
1920
|
+
void *data = parser->data; /* preserve application data */
|
1921
|
+
memset(parser, 0, sizeof(*parser));
|
1922
|
+
parser->data = data;
|
1638
1923
|
parser->type = t;
|
1639
1924
|
parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
|
1640
|
-
parser->
|
1641
|
-
|
1642
|
-
|
1643
|
-
|
1925
|
+
parser->http_errno = HPE_OK;
|
1926
|
+
}
|
1927
|
+
|
1928
|
+
const char *
|
1929
|
+
http_errno_name(enum http_errno err) {
|
1930
|
+
assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
|
1931
|
+
return http_strerror_tab[err].name;
|
1932
|
+
}
|
1933
|
+
|
1934
|
+
const char *
|
1935
|
+
http_errno_description(enum http_errno err) {
|
1936
|
+
assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
|
1937
|
+
return http_strerror_tab[err].description;
|
1938
|
+
}
|
1939
|
+
|
1940
|
+
int
|
1941
|
+
http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
|
1942
|
+
struct http_parser_url *u)
|
1943
|
+
{
|
1944
|
+
enum state s;
|
1945
|
+
const char *p;
|
1946
|
+
enum http_parser_url_fields uf, old_uf;
|
1947
|
+
|
1948
|
+
u->port = u->field_set = 0;
|
1949
|
+
s = is_connect ? s_req_host_start : s_req_spaces_before_url;
|
1950
|
+
uf = old_uf = UF_MAX;
|
1951
|
+
|
1952
|
+
for (p = buf; p < buf + buflen; p++) {
|
1953
|
+
s = parse_url_char(s, *p);
|
1954
|
+
|
1955
|
+
/* Figure out the next field that we're operating on */
|
1956
|
+
switch (s) {
|
1957
|
+
case s_dead:
|
1958
|
+
return 1;
|
1959
|
+
|
1960
|
+
/* Skip delimeters */
|
1961
|
+
case s_req_schema_slash:
|
1962
|
+
case s_req_schema_slash_slash:
|
1963
|
+
case s_req_host_start:
|
1964
|
+
case s_req_host_v6_start:
|
1965
|
+
case s_req_host_v6_end:
|
1966
|
+
case s_req_port_start:
|
1967
|
+
case s_req_query_string_start:
|
1968
|
+
case s_req_fragment_start:
|
1969
|
+
continue;
|
1970
|
+
|
1971
|
+
case s_req_schema:
|
1972
|
+
uf = UF_SCHEMA;
|
1973
|
+
break;
|
1974
|
+
|
1975
|
+
case s_req_host:
|
1976
|
+
case s_req_host_v6:
|
1977
|
+
uf = UF_HOST;
|
1978
|
+
break;
|
1979
|
+
|
1980
|
+
case s_req_port:
|
1981
|
+
uf = UF_PORT;
|
1982
|
+
break;
|
1983
|
+
|
1984
|
+
case s_req_path:
|
1985
|
+
uf = UF_PATH;
|
1986
|
+
break;
|
1987
|
+
|
1988
|
+
case s_req_query_string:
|
1989
|
+
uf = UF_QUERY;
|
1990
|
+
break;
|
1991
|
+
|
1992
|
+
case s_req_fragment:
|
1993
|
+
uf = UF_FRAGMENT;
|
1994
|
+
break;
|
1995
|
+
|
1996
|
+
default:
|
1997
|
+
assert(!"Unexpected state");
|
1998
|
+
return 1;
|
1999
|
+
}
|
2000
|
+
|
2001
|
+
/* Nothing's changed; soldier on */
|
2002
|
+
if (uf == old_uf) {
|
2003
|
+
u->field_data[uf].len++;
|
2004
|
+
continue;
|
2005
|
+
}
|
2006
|
+
|
2007
|
+
u->field_data[uf].off = p - buf;
|
2008
|
+
u->field_data[uf].len = 1;
|
2009
|
+
|
2010
|
+
u->field_set |= (1 << uf);
|
2011
|
+
old_uf = uf;
|
2012
|
+
}
|
2013
|
+
|
2014
|
+
/* CONNECT requests can only contain "hostname:port" */
|
2015
|
+
if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
|
2016
|
+
return 1;
|
2017
|
+
}
|
2018
|
+
|
2019
|
+
/* Make sure we don't end somewhere unexpected */
|
2020
|
+
switch (s) {
|
2021
|
+
case s_req_host_v6_start:
|
2022
|
+
case s_req_host_v6:
|
2023
|
+
case s_req_host_v6_end:
|
2024
|
+
case s_req_host:
|
2025
|
+
case s_req_port_start:
|
2026
|
+
return 1;
|
2027
|
+
default:
|
2028
|
+
break;
|
2029
|
+
}
|
2030
|
+
|
2031
|
+
if (u->field_set & (1 << UF_PORT)) {
|
2032
|
+
/* Don't bother with endp; we've already validated the string */
|
2033
|
+
unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
|
2034
|
+
|
2035
|
+
/* Ports have a max value of 2^16 */
|
2036
|
+
if (v > 0xffff) {
|
2037
|
+
return 1;
|
2038
|
+
}
|
2039
|
+
|
2040
|
+
u->port = (uint16_t) v;
|
2041
|
+
}
|
2042
|
+
|
2043
|
+
return 0;
|
2044
|
+
}
|
2045
|
+
|
2046
|
+
void
|
2047
|
+
http_parser_pause(http_parser *parser, int paused) {
|
2048
|
+
/* Users should only be pausing/unpausing a parser that is not in an error
|
2049
|
+
* state. In non-debug builds, there's not much that we can do about this
|
2050
|
+
* other than ignore it.
|
2051
|
+
*/
|
2052
|
+
if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
|
2053
|
+
HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
|
2054
|
+
SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
|
2055
|
+
} else {
|
2056
|
+
assert(0 && "Attempting to pause parser in error state");
|
2057
|
+
}
|
1644
2058
|
}
|