midori_http_parser 0.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +12 -0
- data/.gitmodules +6 -0
- data/.travis.yml +33 -0
- data/Gemfile +2 -0
- data/LICENSE-MIT +20 -0
- data/README.md +90 -0
- data/Rakefile +6 -0
- data/bench/standalone.rb +23 -0
- data/bench/thin.rb +58 -0
- data/ext/ruby_http_parser/.gitignore +1 -0
- data/ext/ruby_http_parser/RubyHttpParserService.java +18 -0
- data/ext/ruby_http_parser/ext_help.h +18 -0
- data/ext/ruby_http_parser/extconf.rb +24 -0
- data/ext/ruby_http_parser/org/ruby_http_parser/RubyHttpParser.java +495 -0
- data/ext/ruby_http_parser/ruby_http_parser.c +516 -0
- data/ext/ruby_http_parser/vendor/.gitkeep +0 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/AUTHORS +32 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/LICENSE-MIT +48 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/README.md +183 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/TODO +28 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/build.xml +74 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.c +2175 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.gyp +79 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/http_parser.h +304 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/Http-parser.java.iml +22 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/FieldData.java +41 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPCallback.java +8 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPDataCallback.java +34 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPErrorCallback.java +12 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPException.java +9 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPMethod.java +113 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPParser.java +36 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/HTTPParserUrl.java +76 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/ParserSettings.java +256 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/ParserType.java +13 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/Util.java +111 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPCallback.java +5 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPDataCallback.java +25 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPErrorCallback.java +7 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/HTTPParser.java +2171 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/impl/http_parser/lolevel/ParserSettings.java +83 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Message.java +374 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/ParseUrl.java +51 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Requests.java +69 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Responses.java +52 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Test.java +16 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestHeaderOverflowError.java +48 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestLoaderNG.java +212 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/TestNoOverflowLongBody.java +62 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/UnitTest.java +117 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Upgrade.java +27 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Url.java +127 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/Util.java +236 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/src/test/http_parser/lolevel/WrongContentLength.java +59 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/test.c +3425 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/tests.dumped +845 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/tests.utf8 +17 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/tools/byte_constants.rb +6 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/tools/const_char.rb +13 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/tools/lowcase.rb +15 -0
- data/ext/ruby_http_parser/vendor/http-parser-java/tools/parse_tests.rb +33 -0
- data/ext/ruby_http_parser/vendor/http-parser/AUTHORS +68 -0
- data/ext/ruby_http_parser/vendor/http-parser/LICENSE-MIT +23 -0
- data/ext/ruby_http_parser/vendor/http-parser/README.md +246 -0
- data/ext/ruby_http_parser/vendor/http-parser/bench.c +111 -0
- data/ext/ruby_http_parser/vendor/http-parser/contrib/parsertrace.c +160 -0
- data/ext/ruby_http_parser/vendor/http-parser/contrib/url_parser.c +47 -0
- data/ext/ruby_http_parser/vendor/http-parser/http_parser.c +2470 -0
- data/ext/ruby_http_parser/vendor/http-parser/http_parser.gyp +111 -0
- data/ext/ruby_http_parser/vendor/http-parser/http_parser.h +432 -0
- data/ext/ruby_http_parser/vendor/http-parser/test.c +4226 -0
- data/ext/ruby_http_parser/vendor/http-parser/test_fast +0 -0
- data/ext/ruby_http_parser/vendor/http-parser/test_g +0 -0
- data/lib/http/parser.rb +1 -0
- data/lib/http_parser.rb +21 -0
- data/midori_http_parser.gemspec +24 -0
- data/spec/parser_spec.rb +376 -0
- data/spec/spec_helper.rb +1 -0
- data/spec/support/requests.json +631 -0
- data/spec/support/responses.json +375 -0
- data/tasks/compile.rake +42 -0
- data/tasks/fixtures.rake +71 -0
- data/tasks/spec.rake +5 -0
- data/tasks/submodules.rake +7 -0
- metadata +206 -0
@@ -0,0 +1,17 @@
|
|
1
|
+
name :utf-8 path request
|
2
|
+
raw :"GET /δ¶/δt/pope?q=1#narf HTTP/1.1\r\nHost: github.com\r\n\r\n"
|
3
|
+
type :HTTP_REQUEST
|
4
|
+
method: HTTP_GET
|
5
|
+
status_code :0
|
6
|
+
request_path:/δ¶/δt/pope
|
7
|
+
request_url :/δ¶/δt/pope?q=1#narf
|
8
|
+
fragment :narf
|
9
|
+
query_string:q=1
|
10
|
+
body :""
|
11
|
+
body_size :0
|
12
|
+
header_0 :{ "Host": "github.com"}
|
13
|
+
should_keep_alive :1
|
14
|
+
upgrade :0
|
15
|
+
http_major :1
|
16
|
+
http_minor :1
|
17
|
+
|
@@ -0,0 +1,33 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
|
4
|
+
|
5
|
+
# name : 200 trailing space on chunked body
|
6
|
+
# raw : "HTTP/1.1 200 OK\r\nContent-Type: text/plain\r\nTransfer-Encoding: chunked\r\n\r\n25 \r\nThis is the data in the first chunk\r\n\r\n1C\r\nand this is the second one\r\n\r\n0 \r\n\r\n"
|
7
|
+
# type : HTTP_RESPONSE
|
8
|
+
# method: HTTP_DELETE
|
9
|
+
# status code :200
|
10
|
+
# request_path:
|
11
|
+
# request_url :
|
12
|
+
# fragment :
|
13
|
+
# query_string:
|
14
|
+
# body :"This is the data in the first chunk\r\nand this is the second one\r\n"
|
15
|
+
# body_size :65
|
16
|
+
# header_0 :{ "Content-Type": "text/plain"}
|
17
|
+
# header_1 :{ "Transfer-Encoding": "chunked"}
|
18
|
+
# should_keep_alive :1
|
19
|
+
# upgrade :0
|
20
|
+
# http_major :1
|
21
|
+
# http_minor :1
|
22
|
+
|
23
|
+
|
24
|
+
class ParserTest
|
25
|
+
attr_accessor :name
|
26
|
+
attr_accessor :raw
|
27
|
+
attr_accessor :type
|
28
|
+
attr_accessor :method
|
29
|
+
attr_accessor :status_code
|
30
|
+
attr_accessor :request_path
|
31
|
+
attr_accessor :method
|
32
|
+
end
|
33
|
+
|
@@ -0,0 +1,68 @@
|
|
1
|
+
# Authors ordered by first contribution.
|
2
|
+
Ryan Dahl <ry@tinyclouds.org>
|
3
|
+
Jeremy Hinegardner <jeremy@hinegardner.org>
|
4
|
+
Sergey Shepelev <temotor@gmail.com>
|
5
|
+
Joe Damato <ice799@gmail.com>
|
6
|
+
tomika <tomika_nospam@freemail.hu>
|
7
|
+
Phoenix Sol <phoenix@burninglabs.com>
|
8
|
+
Cliff Frey <cliff@meraki.com>
|
9
|
+
Ewen Cheslack-Postava <ewencp@cs.stanford.edu>
|
10
|
+
Santiago Gala <sgala@apache.org>
|
11
|
+
Tim Becker <tim.becker@syngenio.de>
|
12
|
+
Jeff Terrace <jterrace@gmail.com>
|
13
|
+
Ben Noordhuis <info@bnoordhuis.nl>
|
14
|
+
Nathan Rajlich <nathan@tootallnate.net>
|
15
|
+
Mark Nottingham <mnot@mnot.net>
|
16
|
+
Aman Gupta <aman@tmm1.net>
|
17
|
+
Tim Becker <tim.becker@kuriositaet.de>
|
18
|
+
Sean Cunningham <sean.cunningham@mandiant.com>
|
19
|
+
Peter Griess <pg@std.in>
|
20
|
+
Salman Haq <salman.haq@asti-usa.com>
|
21
|
+
Cliff Frey <clifffrey@gmail.com>
|
22
|
+
Jon Kolb <jon@b0g.us>
|
23
|
+
Fouad Mardini <f.mardini@gmail.com>
|
24
|
+
Paul Querna <pquerna@apache.org>
|
25
|
+
Felix Geisendörfer <felix@debuggable.com>
|
26
|
+
koichik <koichik@improvement.jp>
|
27
|
+
Andre Caron <andre.l.caron@gmail.com>
|
28
|
+
Ivo Raisr <ivosh@ivosh.net>
|
29
|
+
James McLaughlin <jamie@lacewing-project.org>
|
30
|
+
David Gwynne <loki@animata.net>
|
31
|
+
Thomas LE ROUX <thomas@november-eleven.fr>
|
32
|
+
Randy Rizun <rrizun@ortivawireless.com>
|
33
|
+
Andre Louis Caron <andre.louis.caron@usherbrooke.ca>
|
34
|
+
Simon Zimmermann <simonz05@gmail.com>
|
35
|
+
Erik Dubbelboer <erik@dubbelboer.com>
|
36
|
+
Martell Malone <martellmalone@gmail.com>
|
37
|
+
Bertrand Paquet <bpaquet@octo.com>
|
38
|
+
BogDan Vatra <bogdan@kde.org>
|
39
|
+
Peter Faiman <peter@thepicard.org>
|
40
|
+
Corey Richardson <corey@octayn.net>
|
41
|
+
Tóth Tamás <tomika_nospam@freemail.hu>
|
42
|
+
Cam Swords <cam.swords@gmail.com>
|
43
|
+
Chris Dickinson <christopher.s.dickinson@gmail.com>
|
44
|
+
Uli Köhler <ukoehler@btronik.de>
|
45
|
+
Charlie Somerville <charlie@charliesomerville.com>
|
46
|
+
Patrik Stutz <patrik.stutz@gmail.com>
|
47
|
+
Fedor Indutny <fedor.indutny@gmail.com>
|
48
|
+
runner <runner.mei@gmail.com>
|
49
|
+
Alexis Campailla <alexis@janeasystems.com>
|
50
|
+
David Wragg <david@wragg.org>
|
51
|
+
Vinnie Falco <vinnie.falco@gmail.com>
|
52
|
+
Alex Butum <alexbutum@linux.com>
|
53
|
+
Rex Feng <rexfeng@gmail.com>
|
54
|
+
Alex Kocharin <alex@kocharin.ru>
|
55
|
+
Mark Koopman <markmontymark@yahoo.com>
|
56
|
+
Helge Heß <me@helgehess.eu>
|
57
|
+
Alexis La Goutte <alexis.lagoutte@gmail.com>
|
58
|
+
George Miroshnykov <george.miroshnykov@gmail.com>
|
59
|
+
Maciej Małecki <me@mmalecki.com>
|
60
|
+
Marc O'Morain <github.com@marcomorain.com>
|
61
|
+
Jeff Pinner <jpinner@twitter.com>
|
62
|
+
Timothy J Fontaine <tjfontaine@gmail.com>
|
63
|
+
Akagi201 <akagi201@gmail.com>
|
64
|
+
Romain Giraud <giraud.romain@gmail.com>
|
65
|
+
Jay Satiro <raysatiro@yahoo.com>
|
66
|
+
Arne Steen <Arne.Steen@gmx.de>
|
67
|
+
Kjell Schubert <kjell.schubert@gmail.com>
|
68
|
+
Olivier Mengué <dolmen@cpan.org>
|
@@ -0,0 +1,23 @@
|
|
1
|
+
http_parser.c is based on src/http/ngx_http_parse.c from NGINX copyright
|
2
|
+
Igor Sysoev.
|
3
|
+
|
4
|
+
Additional changes are licensed under the same terms as NGINX and
|
5
|
+
copyright Joyent, Inc. and other Node contributors. All rights reserved.
|
6
|
+
|
7
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
8
|
+
of this software and associated documentation files (the "Software"), to
|
9
|
+
deal in the Software without restriction, including without limitation the
|
10
|
+
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
11
|
+
sell copies of the Software, and to permit persons to whom the Software is
|
12
|
+
furnished to do so, subject to the following conditions:
|
13
|
+
|
14
|
+
The above copyright notice and this permission notice shall be included in
|
15
|
+
all copies or substantial portions of the Software.
|
16
|
+
|
17
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
18
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
19
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
20
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
21
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
22
|
+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
23
|
+
IN THE SOFTWARE.
|
@@ -0,0 +1,246 @@
|
|
1
|
+
HTTP Parser
|
2
|
+
===========
|
3
|
+
|
4
|
+
[![Build Status](https://api.travis-ci.org/nodejs/http-parser.svg?branch=master)](https://travis-ci.org/nodejs/http-parser)
|
5
|
+
|
6
|
+
This is a parser for HTTP messages written in C. It parses both requests and
|
7
|
+
responses. The parser is designed to be used in performance HTTP
|
8
|
+
applications. It does not make any syscalls nor allocations, it does not
|
9
|
+
buffer data, it can be interrupted at anytime. Depending on your
|
10
|
+
architecture, it only requires about 40 bytes of data per message
|
11
|
+
stream (in a web server that is per connection).
|
12
|
+
|
13
|
+
Features:
|
14
|
+
|
15
|
+
* No dependencies
|
16
|
+
* Handles persistent streams (keep-alive).
|
17
|
+
* Decodes chunked encoding.
|
18
|
+
* Upgrade support
|
19
|
+
* Defends against buffer overflow attacks.
|
20
|
+
|
21
|
+
The parser extracts the following information from HTTP messages:
|
22
|
+
|
23
|
+
* Header fields and values
|
24
|
+
* Content-Length
|
25
|
+
* Request method
|
26
|
+
* Response status code
|
27
|
+
* Transfer-Encoding
|
28
|
+
* HTTP version
|
29
|
+
* Request URL
|
30
|
+
* Message body
|
31
|
+
|
32
|
+
|
33
|
+
Usage
|
34
|
+
-----
|
35
|
+
|
36
|
+
One `http_parser` object is used per TCP connection. Initialize the struct
|
37
|
+
using `http_parser_init()` and set the callbacks. That might look something
|
38
|
+
like this for a request parser:
|
39
|
+
```c
|
40
|
+
http_parser_settings settings;
|
41
|
+
settings.on_url = my_url_callback;
|
42
|
+
settings.on_header_field = my_header_field_callback;
|
43
|
+
/* ... */
|
44
|
+
|
45
|
+
http_parser *parser = malloc(sizeof(http_parser));
|
46
|
+
http_parser_init(parser, HTTP_REQUEST);
|
47
|
+
parser->data = my_socket;
|
48
|
+
```
|
49
|
+
|
50
|
+
When data is received on the socket execute the parser and check for errors.
|
51
|
+
|
52
|
+
```c
|
53
|
+
size_t len = 80*1024, nparsed;
|
54
|
+
char buf[len];
|
55
|
+
ssize_t recved;
|
56
|
+
|
57
|
+
recved = recv(fd, buf, len, 0);
|
58
|
+
|
59
|
+
if (recved < 0) {
|
60
|
+
/* Handle error. */
|
61
|
+
}
|
62
|
+
|
63
|
+
/* Start up / continue the parser.
|
64
|
+
* Note we pass recved==0 to signal that EOF has been received.
|
65
|
+
*/
|
66
|
+
nparsed = http_parser_execute(parser, &settings, buf, recved);
|
67
|
+
|
68
|
+
if (parser->upgrade) {
|
69
|
+
/* handle new protocol */
|
70
|
+
} else if (nparsed != recved) {
|
71
|
+
/* Handle error. Usually just close the connection. */
|
72
|
+
}
|
73
|
+
```
|
74
|
+
|
75
|
+
HTTP needs to know where the end of the stream is. For example, sometimes
|
76
|
+
servers send responses without Content-Length and expect the client to
|
77
|
+
consume input (for the body) until EOF. To tell http_parser about EOF, give
|
78
|
+
`0` as the fourth parameter to `http_parser_execute()`. Callbacks and errors
|
79
|
+
can still be encountered during an EOF, so one must still be prepared
|
80
|
+
to receive them.
|
81
|
+
|
82
|
+
Scalar valued message information such as `status_code`, `method`, and the
|
83
|
+
HTTP version are stored in the parser structure. This data is only
|
84
|
+
temporally stored in `http_parser` and gets reset on each new message. If
|
85
|
+
this information is needed later, copy it out of the structure during the
|
86
|
+
`headers_complete` callback.
|
87
|
+
|
88
|
+
The parser decodes the transfer-encoding for both requests and responses
|
89
|
+
transparently. That is, a chunked encoding is decoded before being sent to
|
90
|
+
the on_body callback.
|
91
|
+
|
92
|
+
|
93
|
+
The Special Problem of Upgrade
|
94
|
+
------------------------------
|
95
|
+
|
96
|
+
HTTP supports upgrading the connection to a different protocol. An
|
97
|
+
increasingly common example of this is the WebSocket protocol which sends
|
98
|
+
a request like
|
99
|
+
|
100
|
+
GET /demo HTTP/1.1
|
101
|
+
Upgrade: WebSocket
|
102
|
+
Connection: Upgrade
|
103
|
+
Host: example.com
|
104
|
+
Origin: http://example.com
|
105
|
+
WebSocket-Protocol: sample
|
106
|
+
|
107
|
+
followed by non-HTTP data.
|
108
|
+
|
109
|
+
(See [RFC6455](https://tools.ietf.org/html/rfc6455) for more information the
|
110
|
+
WebSocket protocol.)
|
111
|
+
|
112
|
+
To support this, the parser will treat this as a normal HTTP message without a
|
113
|
+
body, issuing both on_headers_complete and on_message_complete callbacks. However
|
114
|
+
http_parser_execute() will stop parsing at the end of the headers and return.
|
115
|
+
|
116
|
+
The user is expected to check if `parser->upgrade` has been set to 1 after
|
117
|
+
`http_parser_execute()` returns. Non-HTTP data begins at the buffer supplied
|
118
|
+
offset by the return value of `http_parser_execute()`.
|
119
|
+
|
120
|
+
|
121
|
+
Callbacks
|
122
|
+
---------
|
123
|
+
|
124
|
+
During the `http_parser_execute()` call, the callbacks set in
|
125
|
+
`http_parser_settings` will be executed. The parser maintains state and
|
126
|
+
never looks behind, so buffering the data is not necessary. If you need to
|
127
|
+
save certain data for later usage, you can do that from the callbacks.
|
128
|
+
|
129
|
+
There are two types of callbacks:
|
130
|
+
|
131
|
+
* notification `typedef int (*http_cb) (http_parser*);`
|
132
|
+
Callbacks: on_message_begin, on_headers_complete, on_message_complete.
|
133
|
+
* data `typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);`
|
134
|
+
Callbacks: (requests only) on_url,
|
135
|
+
(common) on_header_field, on_header_value, on_body;
|
136
|
+
|
137
|
+
Callbacks must return 0 on success. Returning a non-zero value indicates
|
138
|
+
error to the parser, making it exit immediately.
|
139
|
+
|
140
|
+
For cases where it is necessary to pass local information to/from a callback,
|
141
|
+
the `http_parser` object's `data` field can be used.
|
142
|
+
An example of such a case is when using threads to handle a socket connection,
|
143
|
+
parse a request, and then give a response over that socket. By instantiation
|
144
|
+
of a thread-local struct containing relevant data (e.g. accepted socket,
|
145
|
+
allocated memory for callbacks to write into, etc), a parser's callbacks are
|
146
|
+
able to communicate data between the scope of the thread and the scope of the
|
147
|
+
callback in a threadsafe manner. This allows http-parser to be used in
|
148
|
+
multi-threaded contexts.
|
149
|
+
|
150
|
+
Example:
|
151
|
+
```c
|
152
|
+
typedef struct {
|
153
|
+
socket_t sock;
|
154
|
+
void* buffer;
|
155
|
+
int buf_len;
|
156
|
+
} custom_data_t;
|
157
|
+
|
158
|
+
|
159
|
+
int my_url_callback(http_parser* parser, const char *at, size_t length) {
|
160
|
+
/* access to thread local custom_data_t struct.
|
161
|
+
Use this access save parsed data for later use into thread local
|
162
|
+
buffer, or communicate over socket
|
163
|
+
*/
|
164
|
+
parser->data;
|
165
|
+
...
|
166
|
+
return 0;
|
167
|
+
}
|
168
|
+
|
169
|
+
...
|
170
|
+
|
171
|
+
void http_parser_thread(socket_t sock) {
|
172
|
+
int nparsed = 0;
|
173
|
+
/* allocate memory for user data */
|
174
|
+
custom_data_t *my_data = malloc(sizeof(custom_data_t));
|
175
|
+
|
176
|
+
/* some information for use by callbacks.
|
177
|
+
* achieves thread -> callback information flow */
|
178
|
+
my_data->sock = sock;
|
179
|
+
|
180
|
+
/* instantiate a thread-local parser */
|
181
|
+
http_parser *parser = malloc(sizeof(http_parser));
|
182
|
+
http_parser_init(parser, HTTP_REQUEST); /* initialise parser */
|
183
|
+
/* this custom data reference is accessible through the reference to the
|
184
|
+
parser supplied to callback functions */
|
185
|
+
parser->data = my_data;
|
186
|
+
|
187
|
+
http_parser_settings settings; /* set up callbacks */
|
188
|
+
settings.on_url = my_url_callback;
|
189
|
+
|
190
|
+
/* execute parser */
|
191
|
+
nparsed = http_parser_execute(parser, &settings, buf, recved);
|
192
|
+
|
193
|
+
...
|
194
|
+
/* parsed information copied from callback.
|
195
|
+
can now perform action on data copied into thread-local memory from callbacks.
|
196
|
+
achieves callback -> thread information flow */
|
197
|
+
my_data->buffer;
|
198
|
+
...
|
199
|
+
}
|
200
|
+
|
201
|
+
```
|
202
|
+
|
203
|
+
In case you parse HTTP message in chunks (i.e. `read()` request line
|
204
|
+
from socket, parse, read half headers, parse, etc) your data callbacks
|
205
|
+
may be called more than once. Http-parser guarantees that data pointer is only
|
206
|
+
valid for the lifetime of callback. You can also `read()` into a heap allocated
|
207
|
+
buffer to avoid copying memory around if this fits your application.
|
208
|
+
|
209
|
+
Reading headers may be a tricky task if you read/parse headers partially.
|
210
|
+
Basically, you need to remember whether last header callback was field or value
|
211
|
+
and apply the following logic:
|
212
|
+
|
213
|
+
(on_header_field and on_header_value shortened to on_h_*)
|
214
|
+
------------------------ ------------ --------------------------------------------
|
215
|
+
| State (prev. callback) | Callback | Description/action |
|
216
|
+
------------------------ ------------ --------------------------------------------
|
217
|
+
| nothing (first call) | on_h_field | Allocate new buffer and copy callback data |
|
218
|
+
| | | into it |
|
219
|
+
------------------------ ------------ --------------------------------------------
|
220
|
+
| value | on_h_field | New header started. |
|
221
|
+
| | | Copy current name,value buffers to headers |
|
222
|
+
| | | list and allocate new buffer for new name |
|
223
|
+
------------------------ ------------ --------------------------------------------
|
224
|
+
| field | on_h_field | Previous name continues. Reallocate name |
|
225
|
+
| | | buffer and append callback data to it |
|
226
|
+
------------------------ ------------ --------------------------------------------
|
227
|
+
| field | on_h_value | Value for current header started. Allocate |
|
228
|
+
| | | new buffer and copy callback data to it |
|
229
|
+
------------------------ ------------ --------------------------------------------
|
230
|
+
| value | on_h_value | Value continues. Reallocate value buffer |
|
231
|
+
| | | and append callback data to it |
|
232
|
+
------------------------ ------------ --------------------------------------------
|
233
|
+
|
234
|
+
|
235
|
+
Parsing URLs
|
236
|
+
------------
|
237
|
+
|
238
|
+
A simplistic zero-copy URL parser is provided as `http_parser_parse_url()`.
|
239
|
+
Users of this library may wish to use it to parse URLs constructed from
|
240
|
+
consecutive `on_url` callbacks.
|
241
|
+
|
242
|
+
See examples of reading in headers:
|
243
|
+
|
244
|
+
* [partial example](http://gist.github.com/155877) in C
|
245
|
+
* [from http-parser tests](http://github.com/joyent/http-parser/blob/37a0ff8/test.c#L403) in C
|
246
|
+
* [from Node library](http://github.com/joyent/node/blob/842eaf4/src/http.js#L284) in Javascript
|
@@ -0,0 +1,111 @@
|
|
1
|
+
/* Copyright Fedor Indutny. All rights reserved.
|
2
|
+
*
|
3
|
+
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
* of this software and associated documentation files (the "Software"), to
|
5
|
+
* deal in the Software without restriction, including without limitation the
|
6
|
+
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
7
|
+
* sell copies of the Software, and to permit persons to whom the Software is
|
8
|
+
* furnished to do so, subject to the following conditions:
|
9
|
+
*
|
10
|
+
* The above copyright notice and this permission notice shall be included in
|
11
|
+
* all copies or substantial portions of the Software.
|
12
|
+
*
|
13
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
18
|
+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
19
|
+
* IN THE SOFTWARE.
|
20
|
+
*/
|
21
|
+
#include "http_parser.h"
|
22
|
+
#include <assert.h>
|
23
|
+
#include <stdio.h>
|
24
|
+
#include <string.h>
|
25
|
+
#include <sys/time.h>
|
26
|
+
|
27
|
+
static const char data[] =
|
28
|
+
"POST /joyent/http-parser HTTP/1.1\r\n"
|
29
|
+
"Host: github.com\r\n"
|
30
|
+
"DNT: 1\r\n"
|
31
|
+
"Accept-Encoding: gzip, deflate, sdch\r\n"
|
32
|
+
"Accept-Language: ru-RU,ru;q=0.8,en-US;q=0.6,en;q=0.4\r\n"
|
33
|
+
"User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) "
|
34
|
+
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
35
|
+
"Chrome/39.0.2171.65 Safari/537.36\r\n"
|
36
|
+
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,"
|
37
|
+
"image/webp,*/*;q=0.8\r\n"
|
38
|
+
"Referer: https://github.com/joyent/http-parser\r\n"
|
39
|
+
"Connection: keep-alive\r\n"
|
40
|
+
"Transfer-Encoding: chunked\r\n"
|
41
|
+
"Cache-Control: max-age=0\r\n\r\nb\r\nhello world\r\n0\r\n\r\n";
|
42
|
+
static const size_t data_len = sizeof(data) - 1;
|
43
|
+
|
44
|
+
static int on_info(http_parser* p) {
|
45
|
+
return 0;
|
46
|
+
}
|
47
|
+
|
48
|
+
|
49
|
+
static int on_data(http_parser* p, const char *at, size_t length) {
|
50
|
+
return 0;
|
51
|
+
}
|
52
|
+
|
53
|
+
static http_parser_settings settings = {
|
54
|
+
.on_message_begin = on_info,
|
55
|
+
.on_headers_complete = on_info,
|
56
|
+
.on_message_complete = on_info,
|
57
|
+
.on_header_field = on_data,
|
58
|
+
.on_header_value = on_data,
|
59
|
+
.on_url = on_data,
|
60
|
+
.on_status = on_data,
|
61
|
+
.on_body = on_data
|
62
|
+
};
|
63
|
+
|
64
|
+
int bench(int iter_count, int silent) {
|
65
|
+
struct http_parser parser;
|
66
|
+
int i;
|
67
|
+
int err;
|
68
|
+
struct timeval start;
|
69
|
+
struct timeval end;
|
70
|
+
float rps;
|
71
|
+
|
72
|
+
if (!silent) {
|
73
|
+
err = gettimeofday(&start, NULL);
|
74
|
+
assert(err == 0);
|
75
|
+
}
|
76
|
+
|
77
|
+
for (i = 0; i < iter_count; i++) {
|
78
|
+
size_t parsed;
|
79
|
+
http_parser_init(&parser, HTTP_REQUEST);
|
80
|
+
|
81
|
+
parsed = http_parser_execute(&parser, &settings, data, data_len);
|
82
|
+
assert(parsed == data_len);
|
83
|
+
}
|
84
|
+
|
85
|
+
if (!silent) {
|
86
|
+
err = gettimeofday(&end, NULL);
|
87
|
+
assert(err == 0);
|
88
|
+
|
89
|
+
fprintf(stdout, "Benchmark result:\n");
|
90
|
+
|
91
|
+
rps = (float) (end.tv_sec - start.tv_sec) +
|
92
|
+
(end.tv_usec - start.tv_usec) * 1e-6f;
|
93
|
+
fprintf(stdout, "Took %f seconds to run\n", rps);
|
94
|
+
|
95
|
+
rps = (float) iter_count / rps;
|
96
|
+
fprintf(stdout, "%f req/sec\n", rps);
|
97
|
+
fflush(stdout);
|
98
|
+
}
|
99
|
+
|
100
|
+
return 0;
|
101
|
+
}
|
102
|
+
|
103
|
+
int main(int argc, char** argv) {
|
104
|
+
if (argc == 2 && strcmp(argv[1], "infinite") == 0) {
|
105
|
+
for (;;)
|
106
|
+
bench(5000000, 1);
|
107
|
+
return 0;
|
108
|
+
} else {
|
109
|
+
return bench(5000000, 0);
|
110
|
+
}
|
111
|
+
}
|