ruby_http_parser 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,27 @@
1
+ # Ruby bindings to Ryan Dahl's http-parser
2
+
3
+ Ruby bindings to http://github.com/ry/http-parser
4
+
5
+ ## Usage
6
+
7
+ require "net/http/parser"
8
+
9
+ parser = Net::HTTP::RequestParser.new
10
+
11
+ parser.on_headers_complete = proc do |env|
12
+ # Rack formatted env hash
13
+ p env
14
+ end
15
+
16
+ parser.on_body = proc do |chunk|
17
+ # One chunk of the body
18
+ p chunk
19
+ end
20
+
21
+ parser.on_message_complete = proc do |env|
22
+ # Headers and body is all parsed
23
+ puts "Done!"
24
+ end
25
+
26
+ # Feed raw data from the socket to the parser
27
+ parser << raw_data
@@ -0,0 +1,11 @@
1
+ require "rake/extensiontask"
2
+ require "spec/rake/spectask"
3
+
4
+ Rake::ExtensionTask.new("ruby_http_parser")
5
+
6
+ task :default => :spec
7
+
8
+ Spec::Rake::SpecTask.new do |t|
9
+ t.spec_opts = %w(-fs -c)
10
+ t.spec_files = FileList["spec/**/*_spec.rb"]
11
+ end
@@ -0,0 +1,57 @@
1
+ $:.unshift File.dirname(__FILE__) + "/../lib"
2
+ require "rubygems"
3
+ require "thin_parser"
4
+ require "net/http/parser"
5
+ require "benchmark"
6
+
7
+ data = "POST /postit HTTP/1.1\r\n" +
8
+ "Host: localhost:3000\r\n" +
9
+ "User-Agent: Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en-US; rv:1.8.1.9) Gecko/20071025 Firefox/2.0.0.9\r\n" +
10
+ "Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5\r\n" +
11
+ "Accept-Language: en-us,en;q=0.5\r\n" +
12
+ "Accept-Encoding: gzip,deflate\r\n" +
13
+ "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7\r\n" +
14
+ "Keep-Alive: 300\r\n" +
15
+ "Connection: keep-alive\r\n" +
16
+ "Content-Type: text/html\r\n" +
17
+ "Content-Length: 37\r\n" +
18
+ "\r\n" +
19
+ "name=marc&email=macournoyer@gmail.com"
20
+
21
+ def thin(data)
22
+ env = {"rack.input" => StringIO.new}
23
+ Thin::HttpParser.new.execute(env, data, 0)
24
+ env
25
+ end
26
+
27
+ def http_parser(data)
28
+ body = StringIO.new
29
+ env = nil
30
+
31
+ parser = Net::HTTP::RequestParser.new
32
+ parser.on_headers_complete = proc { |e| env = e }
33
+ parser.on_body = proc { |c| body << c }
34
+ parser << data
35
+
36
+ env["rack-input"] = body
37
+ env
38
+ end
39
+
40
+ # p thin(data)
41
+ # p http_parser(data)
42
+
43
+ TESTS = 30_000
44
+ Benchmark.bmbm do |results|
45
+ results.report("thin:") { TESTS.times { thin data } }
46
+ results.report("http-parser:") { TESTS.times { http_parser data } }
47
+ end
48
+
49
+ # On my MBP core duo 2.2Ghz
50
+ # Rehearsal ------------------------------------------------
51
+ # thin: 1.470000 0.000000 1.470000 ( 1.474737)
52
+ # http-parser: 1.270000 0.020000 1.290000 ( 1.292758)
53
+ # --------------------------------------- total: 2.760000sec
54
+ #
55
+ # user system total real
56
+ # thin: 1.150000 0.030000 1.180000 ( 1.173767)
57
+ # http-parser: 1.250000 0.010000 1.260000 ( 1.263796)
@@ -0,0 +1,19 @@
1
+ Copyright 2009 Ryan Dahl <ry@tinyclouds.org>
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to
5
+ deal in the Software without restriction, including without limitation the
6
+ rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7
+ sell copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in
11
+ all copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19
+ IN THE SOFTWARE.
@@ -0,0 +1,33 @@
1
+ OPT_DEBUG=-O0 -g -Wall -Wextra -Werror
2
+ OPT_FAST=-O3 -DHTTP_PARSER_STRICT=0
3
+
4
+
5
+ test: test_debug
6
+ ./test_debug
7
+
8
+ test_debug: http_parser_debug.o test.c
9
+ gcc $(OPT_DEBUG) http_parser.o test.c -o $@
10
+
11
+ http_parser_debug.o: http_parser.c http_parser.h Makefile
12
+ gcc $(OPT_DEBUG) -c http_parser.c
13
+
14
+ test-valgrind: test_debug
15
+ valgrind ./test_debug
16
+
17
+ http_parser.o: http_parser.c http_parser.h Makefile
18
+ gcc $(OPT_FAST) -c http_parser.c
19
+
20
+ test_fast: http_parser.o test.c
21
+ gcc $(OPT_FAST) http_parser.o test.c -o $@
22
+
23
+ test-run-timed: test_fast
24
+ while(true) do time ./test_fast > /dev/null; done
25
+
26
+
27
+ tags: http_parser.c http_parser.h test.c
28
+ ctags $^
29
+
30
+ clean:
31
+ rm -f *.o test test_fast test_debug http_parser.tar tags
32
+
33
+ .PHONY: clean package test-run test-run-timed test-valgrind
@@ -0,0 +1,136 @@
1
+ HTTP Parser
2
+ ===========
3
+
4
+ This is a parser for HTTP messages written in C. It parses both requests
5
+ and responses. The parser is designed to be used in performance HTTP
6
+ applications. It does not make any allocations, it does not buffer data, and
7
+ it can be interrupted at anytime. It only requires about 136 bytes of data
8
+ per message stream (in a web server that is per connection).
9
+
10
+ Features:
11
+
12
+ * No dependencies
13
+ * Parses both requests and responses.
14
+ * Handles persistent streams.
15
+ * Decodes chunked encoding.
16
+ * Extracts the following data from a message
17
+ * header fields and values
18
+ * content-length
19
+ * request method
20
+ * response status code
21
+ * transfer-encoding
22
+ * http version
23
+ * request path, query string, fragment
24
+ * message body
25
+ * Defends against buffer overflow attacks.
26
+
27
+ Usage
28
+ -----
29
+
30
+ One `http_parser` object is used per TCP connection. Initialize the struct
31
+ using `http_parser_init()` and set the callbacks. That might look something
32
+ like this:
33
+
34
+ http_parser *parser = malloc(sizeof(http_parser));
35
+ http_parser_init(parser);
36
+ parser->on_path = my_path_callback;
37
+ parser->on_header_field = my_header_field_callback;
38
+ /* ... */
39
+ parser->data = my_socket;
40
+
41
+ When data is received on the socket execute the parser and check for errors.
42
+
43
+ size_t len = 80*1024, nparsed;
44
+ char buf[len];
45
+ ssize_t recved;
46
+
47
+ recved = recv(fd, buf, len, 0);
48
+
49
+ if (recved < 0) {
50
+ /* Handle error. */
51
+ }
52
+
53
+ /* Start up / continue the parser.
54
+ * Note we pass the recved==0 to http_parse_requests to signal
55
+ * that EOF has been recieved.
56
+ */
57
+ nparsed = http_parse_requests(parser, buf, recved);
58
+
59
+ if (nparsed != recved) {
60
+ /* Handle error. Usually just close the connection. */
61
+ }
62
+
63
+ HTTP needs to know where the end of the stream is. For example, sometimes
64
+ servers send responses without Content-Length and expect the client to
65
+ consume input (for the body) until EOF. To tell http_parser about EOF, give
66
+ `0` as the third parameter to `http_parse_requests()`. Callbacks and errors
67
+ can still be encountered during an EOF, so one must still be prepared
68
+ to receive them.
69
+
70
+ Scalar valued message information such as `status_code`, `method`, and the
71
+ HTTP version are stored in the parser structure. This data is only
72
+ temporarlly stored in `http_parser` and gets reset on each new message. If
73
+ this information is needed later, copy it out of the structure during the
74
+ `headers_complete` callback.
75
+
76
+ The parser decodes the transfer-encoding for both requests and responses
77
+ transparently. That is, a chunked encoding is decoded before being sent to
78
+ the on_body callback.
79
+
80
+ It does not decode the content-encoding (gzip). Not all HTTP applications
81
+ need to inspect the body. Decoding gzip is non-neglagable amount of
82
+ processing (and requires making allocations). HTTP proxies using this
83
+ parser, for example, would not want such a feature.
84
+
85
+ Callbacks
86
+ ---------
87
+
88
+ During the `http_parse_requests()` call, the callbacks set in `http_parser`
89
+ will be executed. The parser maintains state and never looks behind, so
90
+ buffering the data is not necessary. If you need to save certain data for
91
+ later usage, you can do that from the callbacks.
92
+
93
+ There are two types of callbacks:
94
+
95
+ * notification `typedef int (*http_cb) (http_parser*);`
96
+ Callbacks: on_message_begin, on_headers_complete, on_message_complete.
97
+ * data `typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);`
98
+ Callbacks: (requests only) on_path, on_query_string, on_uri, on_fragment,
99
+ (common) on_header_field, on_header_value, on_body;
100
+
101
+ In case you parse HTTP message in chunks (i.e. `read()` request line
102
+ from socket, parse, read half headers, parse, etc) your data callbacks
103
+ may be called more than once. Http-parser guarantees that data pointer is only
104
+ valid for the lifetime of callback. You can also `read()` into a heap allocated
105
+ buffer to avoid copying memory around if this fits your application.
106
+
107
+ Reading headers may be a tricky task if you read/parse headers partially.
108
+ Basically, you need to remember whether last header callback was field or value
109
+ and apply following logic:
110
+
111
+ (on_header_field and on_header_value shortened to on_h_*)
112
+ ------------------------ ------------ --------------------------------------------
113
+ | State (prev. callback) | Callback | Description/action |
114
+ ------------------------ ------------ --------------------------------------------
115
+ | nothing (first call) | on_h_field | Allocate new buffer and copy callback data |
116
+ | | | into it |
117
+ ------------------------ ------------ --------------------------------------------
118
+ | value | on_h_field | New header started. |
119
+ | | | Copy current name,value buffers to headers |
120
+ | | | list and allocate new buffer for new name |
121
+ ------------------------ ------------ --------------------------------------------
122
+ | field | on_h_field | Previous name continues. Reallocate name |
123
+ | | | buffer and append callback data to it |
124
+ ------------------------ ------------ --------------------------------------------
125
+ | field | on_h_value | Value for current header started. Allocate |
126
+ | | | new buffer and copy callback data to it |
127
+ ------------------------ ------------ --------------------------------------------
128
+ | value | on_h_value | Value continues. Reallocate value buffer |
129
+ | | | and append callback data to it |
130
+ ------------------------ ------------ --------------------------------------------
131
+
132
+ See examples of reading in headers:
133
+
134
+ * [partial example](http://gist.github.com/155877) in C
135
+ * [from http-parser tests](http://github.com/ry/http-parser/blob/37a0ff8928fb0d83cec0d0d8909c5a4abcd221af/test.c#L403) in C
136
+ * [from Node library](http://github.com/ry/node/blob/842eaf446d2fdcb33b296c67c911c32a0dabc747/src/http.js#L284) in Javascript
@@ -0,0 +1,1468 @@
1
+ /* Copyright 2009 Ryan Dahl <ry@tinyclouds.org>
2
+ *
3
+ * Some parts of this source file were taken from NGINX
4
+ * (src/http/ngx_http_parser.c) copyright (C) 2002-2009 Igor Sysoev.
5
+ *
6
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ * of this software and associated documentation files (the "Software"), to
8
+ * deal in the Software without restriction, including without limitation the
9
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10
+ * sell copies of the Software, and to permit persons to whom the Software is
11
+ * furnished to do so, subject to the following conditions:
12
+ *
13
+ * The above copyright notice and this permission notice shall be included in
14
+ * all copies or substantial portions of the Software.
15
+ *
16
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22
+ * IN THE SOFTWARE.
23
+ */
24
+ #include <http_parser.h>
25
+ #include <stdint.h>
26
+ #include <assert.h>
27
+ #include <string.h> /* strncmp */
28
+
29
+ #ifndef NULL
30
+ # define NULL ((void*)0)
31
+ #endif
32
+
33
+ #ifndef MIN
34
+ # define MIN(a,b) ((a) < (b) ? (a) : (b))
35
+ #endif
36
+
37
+ #define MAX_FIELD_SIZE (80*1024)
38
+
39
+
40
+ #define MARK(FOR) \
41
+ do { \
42
+ parser->FOR##_mark = p; \
43
+ parser->FOR##_size = 0; \
44
+ } while (0)
45
+
46
+ #define CALLBACK(FOR) \
47
+ do { \
48
+ if (0 != FOR##_callback(parser, p)) return (p - data); \
49
+ parser->FOR##_mark = NULL; \
50
+ } while (0)
51
+
52
+ #define CALLBACK_NOCLEAR(FOR) \
53
+ do { \
54
+ if (0 != FOR##_callback(parser, p)) return (p - data); \
55
+ } while (0)
56
+
57
+ #define CALLBACK2(FOR) \
58
+ do { \
59
+ if (0 != FOR##_callback(parser)) return (p - data); \
60
+ } while (0)
61
+
62
+ #define DEFINE_CALLBACK(FOR) \
63
+ static inline int FOR##_callback (http_parser *parser, const char *p) \
64
+ { \
65
+ if (!parser->FOR##_mark) return 0; \
66
+ assert(parser->FOR##_mark); \
67
+ const char *mark = parser->FOR##_mark; \
68
+ parser->FOR##_size += p - mark; \
69
+ if (parser->FOR##_size > MAX_FIELD_SIZE) return -1; \
70
+ int r = 0; \
71
+ if (parser->on_##FOR) r = parser->on_##FOR(parser, mark, p - mark); \
72
+ return r; \
73
+ }
74
+
75
+ DEFINE_CALLBACK(url)
76
+ DEFINE_CALLBACK(path)
77
+ DEFINE_CALLBACK(query_string)
78
+ DEFINE_CALLBACK(fragment)
79
+ DEFINE_CALLBACK(header_field)
80
+ DEFINE_CALLBACK(header_value)
81
+
82
+ static inline int headers_complete_callback (http_parser *parser)
83
+ {
84
+ if (parser->on_headers_complete == NULL) return 0;
85
+ return parser->on_headers_complete(parser);
86
+ }
87
+
88
+ static inline int message_begin_callback (http_parser *parser)
89
+ {
90
+ if (parser->on_message_begin == NULL) return 0;
91
+ return parser->on_message_begin(parser);
92
+ }
93
+
94
+ static inline int message_complete_callback (http_parser *parser)
95
+ {
96
+ if (parser->on_message_complete == NULL) return 0;
97
+ return parser->on_message_complete(parser);
98
+ }
99
+
100
+ #define CONNECTION "connection"
101
+ #define CONTENT_LENGTH "content-length"
102
+ #define TRANSFER_ENCODING "transfer-encoding"
103
+
104
+ #define CHUNKED "chunked"
105
+ #define KEEP_ALIVE "keep-alive"
106
+ #define CLOSE "close"
107
+
108
+
109
+ static const unsigned char lowcase[] =
110
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
111
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0-\0\0" "0123456789\0\0\0\0\0\0"
112
+ "\0abcdefghijklmnopqrstuvwxyz\0\0\0\0\0"
113
+ "\0abcdefghijklmnopqrstuvwxyz\0\0\0\0\0"
114
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
115
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
116
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
117
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
118
+
119
+ static const int unhex[] =
120
+ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
121
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
122
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
123
+ , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
124
+ ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
125
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
126
+ ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
127
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
128
+ };
129
+
130
+
131
+ static const uint32_t usual[] = {
132
+ 0xffffdbfe, /* 1111 1111 1111 1111 1101 1011 1111 1110 */
133
+
134
+ /* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */
135
+ 0x7ffffff6, /* 0111 1111 1111 1111 1111 1111 1111 0110 */
136
+
137
+ /* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */
138
+ 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
139
+
140
+ /* ~}| {zyx wvut srqp onml kjih gfed cba` */
141
+ 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
142
+
143
+ 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
144
+ 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
145
+ 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
146
+ 0xffffffff /* 1111 1111 1111 1111 1111 1111 1111 1111 */
147
+ };
148
+
149
+ #define USUAL(c) (usual[c >> 5] & (1 << (c & 0x1f)))
150
+
151
+ enum state
152
+ { s_dead = 1 /* important that this is > 0 */
153
+
154
+ , s_start_res
155
+ , s_res_H
156
+ , s_res_HT
157
+ , s_res_HTT
158
+ , s_res_HTTP
159
+ , s_res_first_http_major
160
+ , s_res_http_major
161
+ , s_res_first_http_minor
162
+ , s_res_http_minor
163
+ , s_res_first_status_code
164
+ , s_res_status_code
165
+ , s_res_status
166
+ , s_res_line_almost_done
167
+
168
+ , s_start_req
169
+
170
+ , s_req_method
171
+ , s_req_spaces_before_url
172
+ , s_req_schema
173
+ , s_req_schema_slash
174
+ , s_req_schema_slash_slash
175
+ , s_req_host
176
+ , s_req_port
177
+ , s_req_path
178
+ , s_req_query_string_start
179
+ , s_req_query_string
180
+ , s_req_fragment_start
181
+ , s_req_fragment
182
+ , s_req_http_start
183
+ , s_req_http_H
184
+ , s_req_http_HT
185
+ , s_req_http_HTT
186
+ , s_req_http_HTTP
187
+ , s_req_first_http_major
188
+ , s_req_http_major
189
+ , s_req_first_http_minor
190
+ , s_req_http_minor
191
+ , s_req_line_almost_done
192
+
193
+ , s_header_field_start
194
+ , s_header_field
195
+ , s_header_value_start
196
+ , s_header_value
197
+
198
+ , s_header_almost_done
199
+
200
+ , s_headers_almost_done
201
+ , s_headers_done
202
+
203
+ , s_chunk_size_start
204
+ , s_chunk_size
205
+ , s_chunk_size_almost_done
206
+ , s_chunk_parameters
207
+ , s_chunk_data
208
+ , s_chunk_data_almost_done
209
+ , s_chunk_data_done
210
+
211
+ , s_body_identity
212
+ , s_body_identity_eof
213
+ };
214
+
215
+ enum header_states
216
+ { h_general = 0
217
+ , h_C
218
+ , h_CO
219
+ , h_CON
220
+
221
+ , h_matching_connection
222
+ , h_matching_content_length
223
+ , h_matching_transfer_encoding
224
+
225
+ , h_connection
226
+ , h_content_length
227
+ , h_transfer_encoding
228
+
229
+ , h_matching_transfer_encoding_chunked
230
+ , h_matching_connection_keep_alive
231
+ , h_matching_connection_close
232
+
233
+ , h_transfer_encoding_chunked
234
+ , h_connection_keep_alive
235
+ , h_connection_close
236
+ };
237
+
238
+ enum flags
239
+ { F_CHUNKED = 0x0001
240
+ , F_CONNECTION_KEEP_ALIVE = 0x0002
241
+ , F_CONNECTION_CLOSE = 0x0004
242
+ , F_TRAILING = 0x0010
243
+ };
244
+
245
+ #define CR '\r'
246
+ #define LF '\n'
247
+ #define LOWER(c) (unsigned char)(c | 0x20)
248
+
249
+ #if HTTP_PARSER_STRICT
250
+ # define STRICT_CHECK(cond) if (cond) goto error
251
+ # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
252
+ #else
253
+ # define STRICT_CHECK(cond)
254
+ # define NEW_MESSAGE() start_state
255
+ #endif
256
+
257
+ static inline
258
+ size_t parse (http_parser *parser, const char *data, size_t len, int start_state)
259
+ {
260
+ char c, ch;
261
+ const char *p, *pe;
262
+ ssize_t to_read;
263
+
264
+ enum state state = parser->state;
265
+ enum header_states header_state = parser->header_state;
266
+ size_t index = parser->index;
267
+
268
+ if (len == 0) {
269
+ if (state == s_body_identity_eof) {
270
+ CALLBACK2(message_complete);
271
+ }
272
+ return 0;
273
+ }
274
+
275
+ if (parser->header_field_mark) parser->header_field_mark = data;
276
+ if (parser->header_value_mark) parser->header_value_mark = data;
277
+ if (parser->fragment_mark) parser->fragment_mark = data;
278
+ if (parser->query_string_mark) parser->query_string_mark = data;
279
+ if (parser->path_mark) parser->path_mark = data;
280
+ if (parser->url_mark) parser->url_mark = data;
281
+
282
+ for (p=data, pe=data+len; p != pe; p++) {
283
+ ch = *p;
284
+ switch (state) {
285
+
286
+ case s_dead:
287
+ /* this state is used after a 'Connection: close' message
288
+ * the parser will error out if it reads another message
289
+ */
290
+ goto error;
291
+
292
+ case s_start_res:
293
+ {
294
+ parser->flags = 0;
295
+ parser->content_length = -1;
296
+
297
+ CALLBACK2(message_begin);
298
+
299
+ switch (ch) {
300
+ case 'H':
301
+ state = s_res_H;
302
+ break;
303
+
304
+ case CR:
305
+ case LF:
306
+ break;
307
+
308
+ default:
309
+ goto error;
310
+ }
311
+ break;
312
+ }
313
+
314
+ case s_res_H:
315
+ STRICT_CHECK(ch != 'T');
316
+ state = s_res_HT;
317
+ break;
318
+
319
+ case s_res_HT:
320
+ STRICT_CHECK(ch != 'T');
321
+ state = s_res_HTT;
322
+ break;
323
+
324
+ case s_res_HTT:
325
+ STRICT_CHECK(ch != 'P');
326
+ state = s_res_HTTP;
327
+ break;
328
+
329
+ case s_res_HTTP:
330
+ STRICT_CHECK(ch != '/');
331
+ state = s_res_first_http_major;
332
+ break;
333
+
334
+ case s_res_first_http_major:
335
+ if (ch < '1' || ch > '9') goto error;
336
+ parser->http_major = ch - '0';
337
+ state = s_res_http_major;
338
+ break;
339
+
340
+ /* major HTTP version or dot */
341
+ case s_res_http_major:
342
+ {
343
+ if (ch == '.') {
344
+ state = s_res_first_http_minor;
345
+ break;
346
+ }
347
+
348
+ if (ch < '0' || ch > '9') goto error;
349
+
350
+ parser->http_major *= 10;
351
+ parser->http_major += ch - '0';
352
+
353
+ if (parser->http_major > 999) goto error;
354
+ break;
355
+ }
356
+
357
+ /* first digit of minor HTTP version */
358
+ case s_res_first_http_minor:
359
+ if (ch < '0' || ch > '9') goto error;
360
+ parser->http_minor = ch - '0';
361
+ state = s_res_http_minor;
362
+ break;
363
+
364
+ /* minor HTTP version or end of request line */
365
+ case s_res_http_minor:
366
+ {
367
+ if (ch == ' ') {
368
+ state = s_res_first_status_code;
369
+ break;
370
+ }
371
+
372
+ if (ch < '0' || ch > '9') goto error;
373
+
374
+ parser->http_minor *= 10;
375
+ parser->http_minor += ch - '0';
376
+
377
+ if (parser->http_minor > 999) goto error;
378
+ break;
379
+ }
380
+
381
+ case s_res_first_status_code:
382
+ {
383
+ if (ch < '0' || ch > '9') {
384
+ if (ch == ' ') {
385
+ break;
386
+ }
387
+ goto error;
388
+ }
389
+ parser->status_code = ch - '0';
390
+ state = s_res_status_code;
391
+ break;
392
+ }
393
+
394
+ case s_res_status_code:
395
+ {
396
+ if (ch < '0' || ch > '9') {
397
+ switch (ch) {
398
+ case ' ':
399
+ state = s_res_status;
400
+ break;
401
+ case CR:
402
+ state = s_res_line_almost_done;
403
+ break;
404
+ case LF:
405
+ state = s_header_field_start;
406
+ break;
407
+ default:
408
+ goto error;
409
+ }
410
+ break;
411
+ }
412
+
413
+ parser->status_code *= 10;
414
+ parser->status_code += ch - '0';
415
+
416
+ if (parser->status_code > 999) goto error;
417
+ break;
418
+ }
419
+
420
+ case s_res_status:
421
+ /* the human readable status. e.g. "NOT FOUND"
422
+ * we are not humans so just ignore this */
423
+ if (ch == CR) {
424
+ state = s_res_line_almost_done;
425
+ break;
426
+ }
427
+
428
+ if (ch == LF) {
429
+ state = s_header_field_start;
430
+ break;
431
+ }
432
+ break;
433
+
434
+ case s_res_line_almost_done:
435
+ STRICT_CHECK(ch != LF);
436
+ state = s_header_field_start;
437
+ break;
438
+
439
+ case s_start_req:
440
+ {
441
+ parser->flags = 0;
442
+ parser->content_length = -1;
443
+
444
+ CALLBACK2(message_begin);
445
+
446
+ if (ch < 'A' || 'Z' < ch) goto error;
447
+
448
+ parser->method = 0;
449
+ index = 0;
450
+ parser->buffer[0] = ch;
451
+ state = s_req_method;
452
+ break;
453
+ }
454
+
455
+ case s_req_method:
456
+ if (ch == ' ') {
457
+ assert(index+1 < HTTP_PARSER_MAX_METHOD_LEN);
458
+ parser->buffer[index+1] = '\0';
459
+
460
+ /* TODO Instead of using strncmp() use NGINX's ngx_str3Ocmp() */
461
+
462
+ switch (index+1) {
463
+ case 3:
464
+ if (strncmp(parser->buffer, "GET", 3) == 0) {
465
+ parser->method = HTTP_GET;
466
+ break;
467
+ }
468
+
469
+ if (strncmp(parser->buffer, "PUT", 3) == 0) {
470
+ parser->method = HTTP_PUT;
471
+ break;
472
+ }
473
+
474
+ break;
475
+
476
+ case 4:
477
+ if (strncmp(parser->buffer, "POST", 4) == 0) {
478
+ parser->method = HTTP_POST;
479
+ break;
480
+ }
481
+
482
+ if (strncmp(parser->buffer, "HEAD", 4) == 0) {
483
+ parser->method = HTTP_HEAD;
484
+ break;
485
+ }
486
+
487
+ if (strncmp(parser->buffer, "COPY", 4) == 0) {
488
+ parser->method = HTTP_COPY;
489
+ break;
490
+ }
491
+
492
+ if (strncmp(parser->buffer, "MOVE", 4) == 0) {
493
+ parser->method = HTTP_MOVE;
494
+ break;
495
+ }
496
+
497
+ break;
498
+
499
+ case 5:
500
+ if (strncmp(parser->buffer, "MKCOL", 5) == 0) {
501
+ parser->method = HTTP_MKCOL;
502
+ break;
503
+ }
504
+
505
+ if (strncmp(parser->buffer, "TRACE", 5) == 0) {
506
+ parser->method = HTTP_TRACE;
507
+ break;
508
+ }
509
+
510
+ break;
511
+
512
+ case 6:
513
+ if (strncmp(parser->buffer, "DELETE", 6) == 0) {
514
+ parser->method = HTTP_DELETE;
515
+ break;
516
+ }
517
+
518
+ if (strncmp(parser->buffer, "UNLOCK", 6) == 0) {
519
+ parser->method = HTTP_UNLOCK;
520
+ break;
521
+ }
522
+
523
+ break;
524
+
525
+ case 7:
526
+ if (strncmp(parser->buffer, "OPTIONS", 7) == 0) {
527
+ parser->method = HTTP_OPTIONS;
528
+ break;
529
+ }
530
+
531
+ if (strncmp(parser->buffer, "CONNECT", 7) == 0) {
532
+ parser->method = HTTP_CONNECT;
533
+ break;
534
+ }
535
+
536
+ break;
537
+
538
+ case 8:
539
+ if (strncmp(parser->buffer, "PROPFIND", 8) == 0) {
540
+ parser->method = HTTP_PROPFIND;
541
+ break;
542
+ }
543
+
544
+ break;
545
+
546
+ case 9:
547
+ if (strncmp(parser->buffer, "PROPPATCH", 9) == 0) {
548
+ parser->method = HTTP_PROPPATCH;
549
+ break;
550
+ }
551
+
552
+ break;
553
+ }
554
+ state = s_req_spaces_before_url;
555
+ break;
556
+ }
557
+
558
+ if (ch < 'A' || 'Z' < ch) goto error;
559
+
560
+ if (++index >= HTTP_PARSER_MAX_METHOD_LEN - 1) {
561
+ goto error;
562
+ }
563
+
564
+ parser->buffer[index] = ch;
565
+
566
+ break;
567
+
568
+ case s_req_spaces_before_url:
569
+ {
570
+ if (ch == ' ') break;
571
+
572
+ if (ch == '/') {
573
+ MARK(url);
574
+ MARK(path);
575
+ state = s_req_path;
576
+ break;
577
+ }
578
+
579
+ c = LOWER(ch);
580
+
581
+ if (c >= 'a' && c <= 'z') {
582
+ MARK(url);
583
+ state = s_req_schema;
584
+ break;
585
+ }
586
+
587
+ goto error;
588
+ }
589
+
590
+ case s_req_schema:
591
+ {
592
+ c = LOWER(ch);
593
+
594
+ if (c >= 'a' && c <= 'z') break;
595
+
596
+ if (ch == ':') {
597
+ state = s_req_schema_slash;
598
+ break;
599
+ }
600
+
601
+ goto error;
602
+ }
603
+
604
+ case s_req_schema_slash:
605
+ STRICT_CHECK(ch != '/');
606
+ state = s_req_schema_slash_slash;
607
+ break;
608
+
609
+ case s_req_schema_slash_slash:
610
+ STRICT_CHECK(ch != '/');
611
+ state = s_req_host;
612
+ break;
613
+
614
+ case s_req_host:
615
+ {
616
+ c = LOWER(ch);
617
+ if (c >= 'a' && c <= 'z') break;
618
+ if ((ch >= '0' && ch <= '9') || ch == '.' || ch == '-') break;
619
+ switch (ch) {
620
+ case ':':
621
+ state = s_req_port;
622
+ break;
623
+ case '/':
624
+ MARK(path);
625
+ state = s_req_path;
626
+ break;
627
+ case ' ':
628
+ /* The request line looks like:
629
+ * "GET http://foo.bar.com HTTP/1.1"
630
+ * That is, there is no path.
631
+ */
632
+ CALLBACK(url);
633
+ state = s_req_http_start;
634
+ break;
635
+ default:
636
+ goto error;
637
+ }
638
+ break;
639
+ }
640
+
641
+ case s_req_port:
642
+ {
643
+ if (ch >= '0' && ch <= '9') break;
644
+ switch (ch) {
645
+ case '/':
646
+ MARK(path);
647
+ state = s_req_path;
648
+ break;
649
+ case ' ':
650
+ /* The request line looks like:
651
+ * "GET http://foo.bar.com:1234 HTTP/1.1"
652
+ * That is, there is no path.
653
+ */
654
+ CALLBACK(url);
655
+ state = s_req_http_start;
656
+ break;
657
+ default:
658
+ goto error;
659
+ }
660
+ break;
661
+ }
662
+
663
+ case s_req_path:
664
+ {
665
+ if (USUAL(ch)) break;
666
+
667
+ switch (ch) {
668
+ case ' ':
669
+ CALLBACK(url);
670
+ CALLBACK(path);
671
+ state = s_req_http_start;
672
+ break;
673
+ case CR:
674
+ CALLBACK(url);
675
+ CALLBACK(path);
676
+ parser->http_minor = 9;
677
+ state = s_req_line_almost_done;
678
+ break;
679
+ case LF:
680
+ CALLBACK(url);
681
+ CALLBACK(path);
682
+ parser->http_minor = 9;
683
+ state = s_header_field_start;
684
+ break;
685
+ case '?':
686
+ CALLBACK(path);
687
+ state = s_req_query_string_start;
688
+ break;
689
+ case '#':
690
+ CALLBACK(path);
691
+ state = s_req_fragment_start;
692
+ break;
693
+ default:
694
+ goto error;
695
+ }
696
+ break;
697
+ }
698
+
699
+ case s_req_query_string_start:
700
+ {
701
+ if (USUAL(ch)) {
702
+ MARK(query_string);
703
+ state = s_req_query_string;
704
+ break;
705
+ }
706
+
707
+ switch (ch) {
708
+ case '?':
709
+ break; // XXX ignore extra '?' ... is this right?
710
+ case ' ':
711
+ CALLBACK(url);
712
+ state = s_req_http_start;
713
+ break;
714
+ case CR:
715
+ CALLBACK(url);
716
+ parser->http_minor = 9;
717
+ state = s_req_line_almost_done;
718
+ break;
719
+ case LF:
720
+ CALLBACK(url);
721
+ parser->http_minor = 9;
722
+ state = s_header_field_start;
723
+ break;
724
+ case '#':
725
+ state = s_req_fragment_start;
726
+ break;
727
+ default:
728
+ goto error;
729
+ }
730
+ break;
731
+ }
732
+
733
+ case s_req_query_string:
734
+ {
735
+ if (USUAL(ch)) break;
736
+
737
+ switch (ch) {
738
+ case ' ':
739
+ CALLBACK(url);
740
+ CALLBACK(query_string);
741
+ state = s_req_http_start;
742
+ break;
743
+ case CR:
744
+ CALLBACK(url);
745
+ CALLBACK(query_string);
746
+ parser->http_minor = 9;
747
+ state = s_req_line_almost_done;
748
+ break;
749
+ case LF:
750
+ CALLBACK(url);
751
+ CALLBACK(query_string);
752
+ parser->http_minor = 9;
753
+ state = s_header_field_start;
754
+ break;
755
+ case '#':
756
+ CALLBACK(query_string);
757
+ state = s_req_fragment_start;
758
+ break;
759
+ default:
760
+ goto error;
761
+ }
762
+ break;
763
+ }
764
+
765
+ case s_req_fragment_start:
766
+ {
767
+ if (USUAL(ch)) {
768
+ MARK(fragment);
769
+ state = s_req_fragment;
770
+ break;
771
+ }
772
+
773
+ switch (ch) {
774
+ case ' ':
775
+ CALLBACK(url);
776
+ state = s_req_http_start;
777
+ break;
778
+ case CR:
779
+ CALLBACK(url);
780
+ parser->http_minor = 9;
781
+ state = s_req_line_almost_done;
782
+ break;
783
+ case LF:
784
+ CALLBACK(url);
785
+ parser->http_minor = 9;
786
+ state = s_header_field_start;
787
+ break;
788
+ case '?':
789
+ MARK(fragment);
790
+ state = s_req_fragment;
791
+ break;
792
+ case '#':
793
+ break;
794
+ default:
795
+ goto error;
796
+ }
797
+ break;
798
+ }
799
+
800
+ case s_req_fragment:
801
+ {
802
+ if (USUAL(ch)) break;
803
+
804
+ switch (ch) {
805
+ case ' ':
806
+ CALLBACK(url);
807
+ CALLBACK(fragment);
808
+ state = s_req_http_start;
809
+ break;
810
+ case CR:
811
+ CALLBACK(url);
812
+ CALLBACK(fragment);
813
+ parser->http_minor = 9;
814
+ state = s_req_line_almost_done;
815
+ break;
816
+ case LF:
817
+ CALLBACK(url);
818
+ CALLBACK(fragment);
819
+ parser->http_minor = 9;
820
+ state = s_header_field_start;
821
+ break;
822
+ case '?':
823
+ case '#':
824
+ break;
825
+ default:
826
+ goto error;
827
+ }
828
+ break;
829
+ }
830
+
831
+ case s_req_http_start:
832
+ switch (ch) {
833
+ case 'H':
834
+ state = s_req_http_H;
835
+ break;
836
+ case ' ':
837
+ break;
838
+ default:
839
+ goto error;
840
+ }
841
+ break;
842
+
843
+ case s_req_http_H:
844
+ STRICT_CHECK(ch != 'T');
845
+ state = s_req_http_HT;
846
+ break;
847
+
848
+ case s_req_http_HT:
849
+ STRICT_CHECK(ch != 'T');
850
+ state = s_req_http_HTT;
851
+ break;
852
+
853
+ case s_req_http_HTT:
854
+ STRICT_CHECK(ch != 'P');
855
+ state = s_req_http_HTTP;
856
+ break;
857
+
858
+ case s_req_http_HTTP:
859
+ STRICT_CHECK(ch != '/');
860
+ state = s_req_first_http_major;
861
+ break;
862
+
863
+ /* first digit of major HTTP version */
864
+ case s_req_first_http_major:
865
+ if (ch < '1' || ch > '9') goto error;
866
+ parser->http_major = ch - '0';
867
+ state = s_req_http_major;
868
+ break;
869
+
870
+ /* major HTTP version or dot */
871
+ case s_req_http_major:
872
+ {
873
+ if (ch == '.') {
874
+ state = s_req_first_http_minor;
875
+ break;
876
+ }
877
+
878
+ if (ch < '0' || ch > '9') goto error;
879
+
880
+ parser->http_major *= 10;
881
+ parser->http_major += ch - '0';
882
+
883
+ if (parser->http_major > 999) goto error;
884
+ break;
885
+ }
886
+
887
+ /* first digit of minor HTTP version */
888
+ case s_req_first_http_minor:
889
+ if (ch < '0' || ch > '9') goto error;
890
+ parser->http_minor = ch - '0';
891
+ state = s_req_http_minor;
892
+ break;
893
+
894
+ /* minor HTTP version or end of request line */
895
+ case s_req_http_minor:
896
+ {
897
+ if (ch == CR) {
898
+ state = s_req_line_almost_done;
899
+ break;
900
+ }
901
+
902
+ if (ch == LF) {
903
+ state = s_header_field_start;
904
+ break;
905
+ }
906
+
907
+ /* XXX allow spaces after digit? */
908
+
909
+ if (ch < '0' || ch > '9') goto error;
910
+
911
+ parser->http_minor *= 10;
912
+ parser->http_minor += ch - '0';
913
+
914
+ if (parser->http_minor > 999) goto error;
915
+ break;
916
+ }
917
+
918
+ /* end of request line */
919
+ case s_req_line_almost_done:
920
+ {
921
+ if (ch != LF) goto error;
922
+ state = s_header_field_start;
923
+ break;
924
+ }
925
+
926
+ case s_header_field_start:
927
+ {
928
+ if (ch == CR) {
929
+ state = s_headers_almost_done;
930
+ break;
931
+ }
932
+
933
+ if (ch == LF) {
934
+ state = s_headers_done;
935
+ break;
936
+ }
937
+
938
+ c = LOWER(ch);
939
+
940
+ if (c < 'a' || 'z' < c) goto error;
941
+
942
+ MARK(header_field);
943
+
944
+ index = 0;
945
+ state = s_header_field;
946
+
947
+ switch (c) {
948
+ case 'c':
949
+ header_state = h_C;
950
+ break;
951
+
952
+ case 't':
953
+ header_state = h_matching_transfer_encoding;
954
+ break;
955
+
956
+ default:
957
+ header_state = h_general;
958
+ break;
959
+ }
960
+ break;
961
+ }
962
+
963
+ case s_header_field:
964
+ {
965
+ c = lowcase[(int)ch];
966
+
967
+ if (c) {
968
+ switch (header_state) {
969
+ case h_general:
970
+ break;
971
+
972
+ case h_C:
973
+ index++;
974
+ header_state = (c == 'o' ? h_CO : h_general);
975
+ break;
976
+
977
+ case h_CO:
978
+ index++;
979
+ header_state = (c == 'n' ? h_CON : h_general);
980
+ break;
981
+
982
+ case h_CON:
983
+ index++;
984
+ switch (c) {
985
+ case 'n':
986
+ header_state = h_matching_connection;
987
+ break;
988
+ case 't':
989
+ header_state = h_matching_content_length;
990
+ break;
991
+ default:
992
+ header_state = h_general;
993
+ break;
994
+ }
995
+ break;
996
+
997
+ /* connection */
998
+
999
+ case h_matching_connection:
1000
+ index++;
1001
+ if (index > sizeof(CONNECTION)-1
1002
+ || c != CONNECTION[index]) {
1003
+ header_state = h_general;
1004
+ } else if (index == sizeof(CONNECTION)-2) {
1005
+ header_state = h_connection;
1006
+ }
1007
+ break;
1008
+
1009
+ /* content-length */
1010
+
1011
+ case h_matching_content_length:
1012
+ index++;
1013
+ if (index > sizeof(CONTENT_LENGTH)-1
1014
+ || c != CONTENT_LENGTH[index]) {
1015
+ header_state = h_general;
1016
+ } else if (index == sizeof(CONTENT_LENGTH)-2) {
1017
+ header_state = h_content_length;
1018
+ }
1019
+ break;
1020
+
1021
+ /* transfer-encoding */
1022
+
1023
+ case h_matching_transfer_encoding:
1024
+ index++;
1025
+ if (index > sizeof(TRANSFER_ENCODING)-1
1026
+ || c != TRANSFER_ENCODING[index]) {
1027
+ header_state = h_general;
1028
+ } else if (index == sizeof(TRANSFER_ENCODING)-2) {
1029
+ header_state = h_transfer_encoding;
1030
+ }
1031
+ break;
1032
+
1033
+ case h_connection:
1034
+ case h_content_length:
1035
+ case h_transfer_encoding:
1036
+ if (ch != ' ') header_state = h_general;
1037
+ break;
1038
+
1039
+ default:
1040
+ assert(0 && "Unknown header_state");
1041
+ break;
1042
+ }
1043
+ break;
1044
+ }
1045
+
1046
+ if (ch == ':') {
1047
+ CALLBACK(header_field);
1048
+ state = s_header_value_start;
1049
+ break;
1050
+ }
1051
+
1052
+ if (ch == CR) {
1053
+ state = s_header_almost_done;
1054
+ CALLBACK(header_field);
1055
+ break;
1056
+ }
1057
+
1058
+ if (ch == LF) {
1059
+ CALLBACK(header_field);
1060
+ state = s_header_field_start;
1061
+ break;
1062
+ }
1063
+
1064
+ goto error;
1065
+ }
1066
+
1067
+ case s_header_value_start:
1068
+ {
1069
+ if (ch == ' ') break;
1070
+
1071
+ MARK(header_value);
1072
+
1073
+ state = s_header_value;
1074
+ index = 0;
1075
+
1076
+ c = lowcase[(int)ch];
1077
+
1078
+ if (!c) {
1079
+ if (ch == CR) {
1080
+ header_state = h_general;
1081
+ state = s_header_almost_done;
1082
+ break;
1083
+ }
1084
+
1085
+ if (ch == LF) {
1086
+ state = s_header_field_start;
1087
+ break;
1088
+ }
1089
+
1090
+ header_state = h_general;
1091
+ break;
1092
+ }
1093
+
1094
+ switch (header_state) {
1095
+ case h_transfer_encoding:
1096
+ /* looking for 'Transfer-Encoding: chunked' */
1097
+ if ('c' == c) {
1098
+ header_state = h_matching_transfer_encoding_chunked;
1099
+ } else {
1100
+ header_state = h_general;
1101
+ }
1102
+ break;
1103
+
1104
+ case h_content_length:
1105
+ if (ch < '0' || ch > '9') goto error;
1106
+ parser->content_length = ch - '0';
1107
+ break;
1108
+
1109
+ case h_connection:
1110
+ /* looking for 'Connection: keep-alive' */
1111
+ if (c == 'k') {
1112
+ header_state = h_matching_connection_keep_alive;
1113
+ /* looking for 'Connection: close' */
1114
+ } else if (c == 'c') {
1115
+ header_state = h_matching_connection_close;
1116
+ } else {
1117
+ header_state = h_general;
1118
+ }
1119
+ break;
1120
+
1121
+ default:
1122
+ header_state = h_general;
1123
+ break;
1124
+ }
1125
+ break;
1126
+ }
1127
+
1128
+ case s_header_value:
1129
+ {
1130
+ c = lowcase[(int)ch];
1131
+
1132
+ if (!c) {
1133
+ if (ch == CR) {
1134
+ CALLBACK(header_value);
1135
+ state = s_header_almost_done;
1136
+ break;
1137
+ }
1138
+
1139
+ if (ch == LF) {
1140
+ CALLBACK(header_value);
1141
+ state = s_header_field_start;
1142
+ break;
1143
+ }
1144
+ break;
1145
+ }
1146
+
1147
+ switch (header_state) {
1148
+ case h_general:
1149
+ break;
1150
+
1151
+ case h_connection:
1152
+ case h_transfer_encoding:
1153
+ assert(0 && "Shouldn't get here.");
1154
+ break;
1155
+
1156
+ case h_content_length:
1157
+ if (ch < '0' || ch > '9') goto error;
1158
+ parser->content_length *= 10;
1159
+ parser->content_length += ch - '0';
1160
+ break;
1161
+
1162
+ /* Transfer-Encoding: chunked */
1163
+ case h_matching_transfer_encoding_chunked:
1164
+ index++;
1165
+ if (index > sizeof(CHUNKED)-1
1166
+ || c != CHUNKED[index]) {
1167
+ header_state = h_general;
1168
+ } else if (index == sizeof(CHUNKED)-2) {
1169
+ header_state = h_transfer_encoding_chunked;
1170
+ }
1171
+ break;
1172
+
1173
+ /* looking for 'Connection: keep-alive' */
1174
+ case h_matching_connection_keep_alive:
1175
+ index++;
1176
+ if (index > sizeof(KEEP_ALIVE)-1
1177
+ || c != KEEP_ALIVE[index]) {
1178
+ header_state = h_general;
1179
+ } else if (index == sizeof(KEEP_ALIVE)-2) {
1180
+ header_state = h_connection_keep_alive;
1181
+ }
1182
+ break;
1183
+
1184
+ /* looking for 'Connection: close' */
1185
+ case h_matching_connection_close:
1186
+ index++;
1187
+ if (index > sizeof(CLOSE)-1 || c != CLOSE[index]) {
1188
+ header_state = h_general;
1189
+ } else if (index == sizeof(CLOSE)-2) {
1190
+ header_state = h_connection_close;
1191
+ }
1192
+ break;
1193
+
1194
+ case h_transfer_encoding_chunked:
1195
+ case h_connection_keep_alive:
1196
+ case h_connection_close:
1197
+ if (ch != ' ') header_state = h_general;
1198
+ break;
1199
+
1200
+ default:
1201
+ state = s_header_value;
1202
+ header_state = h_general;
1203
+ break;
1204
+ }
1205
+ break;
1206
+ }
1207
+
1208
+ case s_header_almost_done:
1209
+ {
1210
+ STRICT_CHECK(ch != LF);
1211
+
1212
+ state = s_header_field_start;
1213
+
1214
+ switch (header_state) {
1215
+ case h_connection_keep_alive:
1216
+ parser->flags |= F_CONNECTION_KEEP_ALIVE;
1217
+ break;
1218
+ case h_connection_close:
1219
+ parser->flags |= F_CONNECTION_CLOSE;
1220
+ break;
1221
+ case h_transfer_encoding_chunked:
1222
+ parser->flags |= F_CHUNKED;
1223
+ break;
1224
+ default:
1225
+ break;
1226
+ }
1227
+ break;
1228
+ }
1229
+
1230
+ case s_headers_almost_done:
1231
+ {
1232
+ STRICT_CHECK(ch != LF);
1233
+
1234
+ if (parser->flags & F_TRAILING) {
1235
+ /* End of a chunked request */
1236
+ CALLBACK2(message_complete);
1237
+ state = NEW_MESSAGE();
1238
+ break;
1239
+ }
1240
+
1241
+ parser->body_read = 0;
1242
+
1243
+ CALLBACK2(headers_complete);
1244
+
1245
+ if (parser->flags & F_CHUNKED) {
1246
+ /* chunked encoding - ignore Content-Length header */
1247
+ state = s_chunk_size_start;
1248
+ } else {
1249
+ if (parser->content_length == 0) {
1250
+ /* Content-Length header given but zero: Content-Length: 0\r\n */
1251
+ CALLBACK2(message_complete);
1252
+ state = NEW_MESSAGE();
1253
+ } else if (parser->content_length > 0) {
1254
+ /* Content-Length header given and non-zero */
1255
+ state = s_body_identity;
1256
+ } else {
1257
+ if (start_state == s_start_req || http_should_keep_alive(parser)) {
1258
+ /* Assume content-length 0 - read the next */
1259
+ CALLBACK2(message_complete);
1260
+ state = NEW_MESSAGE();
1261
+ } else {
1262
+ /* Read body until EOF */
1263
+ state = s_body_identity_eof;
1264
+ }
1265
+ }
1266
+ }
1267
+
1268
+ break;
1269
+ }
1270
+
1271
+ case s_body_identity:
1272
+ to_read = MIN(pe - p, (ssize_t)(parser->content_length - parser->body_read));
1273
+ if (to_read > 0) {
1274
+ if (parser->on_body) parser->on_body(parser, p, to_read);
1275
+ p += to_read - 1;
1276
+ parser->body_read += to_read;
1277
+ if (parser->body_read == parser->content_length) {
1278
+ CALLBACK2(message_complete);
1279
+ state = NEW_MESSAGE();
1280
+ }
1281
+ }
1282
+ break;
1283
+
1284
+ /* read until EOF */
1285
+ case s_body_identity_eof:
1286
+ to_read = pe - p;
1287
+ if (to_read > 0) {
1288
+ if (parser->on_body) parser->on_body(parser, p, to_read);
1289
+ p += to_read - 1;
1290
+ parser->body_read += to_read;
1291
+ }
1292
+ break;
1293
+
1294
+ case s_chunk_size_start:
1295
+ {
1296
+ assert(parser->flags & F_CHUNKED);
1297
+
1298
+ c = unhex[(int)ch];
1299
+ if (c == -1) goto error;
1300
+ parser->content_length = c;
1301
+ state = s_chunk_size;
1302
+ break;
1303
+ }
1304
+
1305
+ case s_chunk_size:
1306
+ {
1307
+ assert(parser->flags & F_CHUNKED);
1308
+
1309
+ if (ch == CR) {
1310
+ state = s_chunk_size_almost_done;
1311
+ break;
1312
+ }
1313
+
1314
+ c = unhex[(int)ch];
1315
+
1316
+ if (c == -1) {
1317
+ if (ch == ';' || ch == ' ') {
1318
+ state = s_chunk_parameters;
1319
+ break;
1320
+ }
1321
+ goto error;
1322
+ }
1323
+
1324
+ parser->content_length *= 16;
1325
+ parser->content_length += c;
1326
+ break;
1327
+ }
1328
+
1329
+ case s_chunk_parameters:
1330
+ {
1331
+ assert(parser->flags & F_CHUNKED);
1332
+ /* just ignore this shit. TODO check for overflow */
1333
+ if (ch == CR) {
1334
+ state = s_chunk_size_almost_done;
1335
+ break;
1336
+ }
1337
+ break;
1338
+ }
1339
+
1340
+ case s_chunk_size_almost_done:
1341
+ {
1342
+ assert(parser->flags & F_CHUNKED);
1343
+ STRICT_CHECK(ch != LF);
1344
+
1345
+ if (parser->content_length == 0) {
1346
+ parser->flags |= F_TRAILING;
1347
+ state = s_header_field_start;
1348
+ } else {
1349
+ state = s_chunk_data;
1350
+ }
1351
+ break;
1352
+ }
1353
+
1354
+ case s_chunk_data:
1355
+ {
1356
+ assert(parser->flags & F_CHUNKED);
1357
+
1358
+ to_read = MIN(pe - p, (ssize_t)(parser->content_length));
1359
+
1360
+ if (to_read > 0) {
1361
+ if (parser->on_body) parser->on_body(parser, p, to_read);
1362
+ p += to_read - 1;
1363
+ }
1364
+
1365
+ if (to_read == parser->content_length) {
1366
+ state = s_chunk_data_almost_done;
1367
+ }
1368
+
1369
+ parser->content_length -= to_read;
1370
+ break;
1371
+ }
1372
+
1373
+ case s_chunk_data_almost_done:
1374
+ assert(parser->flags & F_CHUNKED);
1375
+ STRICT_CHECK(ch != CR);
1376
+ state = s_chunk_data_done;
1377
+ break;
1378
+
1379
+ case s_chunk_data_done:
1380
+ assert(parser->flags & F_CHUNKED);
1381
+ STRICT_CHECK(ch != LF);
1382
+ state = s_chunk_size_start;
1383
+ break;
1384
+
1385
+ default:
1386
+ assert(0 && "unhandled state");
1387
+ goto error;
1388
+ }
1389
+ }
1390
+
1391
+ CALLBACK_NOCLEAR(header_field);
1392
+ CALLBACK_NOCLEAR(header_value);
1393
+ CALLBACK_NOCLEAR(fragment);
1394
+ CALLBACK_NOCLEAR(query_string);
1395
+ CALLBACK_NOCLEAR(path);
1396
+ CALLBACK_NOCLEAR(url);
1397
+
1398
+ parser->state = state;
1399
+ parser->header_state = header_state;
1400
+ parser->index = index;
1401
+
1402
+ return len;
1403
+
1404
+ error:
1405
+ return (p - data);
1406
+ }
1407
+
1408
+
1409
+ size_t
1410
+ http_parse_requests (http_parser *parser, const char *data, size_t len)
1411
+ {
1412
+ if (!parser->state) parser->state = s_start_req;
1413
+ return parse(parser, data, len, s_start_req);
1414
+ }
1415
+
1416
+
1417
+ size_t
1418
+ http_parse_responses (http_parser *parser, const char *data, size_t len)
1419
+ {
1420
+ if (!parser->state) parser->state = s_start_res;
1421
+ return parse(parser, data, len, s_start_res);
1422
+ }
1423
+
1424
+
1425
+ int
1426
+ http_should_keep_alive (http_parser *parser)
1427
+ {
1428
+ if (parser->http_major > 0 && parser->http_minor > 0) {
1429
+ /* HTTP/1.1 */
1430
+ if (parser->flags & F_CONNECTION_CLOSE) {
1431
+ return 0;
1432
+ } else {
1433
+ return 1;
1434
+ }
1435
+ } else {
1436
+ /* HTTP/1.0 or earlier */
1437
+ if (parser->flags & F_CONNECTION_KEEP_ALIVE) {
1438
+ return 1;
1439
+ } else {
1440
+ return 0;
1441
+ }
1442
+ }
1443
+ }
1444
+
1445
+
1446
+ void
1447
+ http_parser_init (http_parser *parser)
1448
+ {
1449
+ parser->state = 0;
1450
+ parser->on_message_begin = NULL;
1451
+ parser->on_path = NULL;
1452
+ parser->on_query_string = NULL;
1453
+ parser->on_url = NULL;
1454
+ parser->on_fragment = NULL;
1455
+ parser->on_header_field = NULL;
1456
+ parser->on_header_value = NULL;
1457
+ parser->on_headers_complete = NULL;
1458
+ parser->on_body = NULL;
1459
+ parser->on_message_complete = NULL;
1460
+
1461
+ parser->header_field_mark = NULL;
1462
+ parser->header_value_mark = NULL;
1463
+ parser->query_string_mark = NULL;
1464
+ parser->path_mark = NULL;
1465
+ parser->url_mark = NULL;
1466
+ parser->fragment_mark = NULL;
1467
+ }
1468
+