ruby_http_parser 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,27 @@
1
+ # Ruby bindings to Ryan Dahl's http-parser
2
+
3
+ Ruby bindings to http://github.com/ry/http-parser
4
+
5
+ ## Usage
6
+
7
+ require "net/http/parser"
8
+
9
+ parser = Net::HTTP::RequestParser.new
10
+
11
+ parser.on_headers_complete = proc do |env|
12
+ # Rack formatted env hash
13
+ p env
14
+ end
15
+
16
+ parser.on_body = proc do |chunk|
17
+ # One chunk of the body
18
+ p chunk
19
+ end
20
+
21
+ parser.on_message_complete = proc do |env|
22
+ # Headers and body is all parsed
23
+ puts "Done!"
24
+ end
25
+
26
+ # Feed raw data from the socket to the parser
27
+ parser << raw_data
@@ -0,0 +1,11 @@
1
+ require "rake/extensiontask"
2
+ require "spec/rake/spectask"
3
+
4
+ Rake::ExtensionTask.new("ruby_http_parser")
5
+
6
+ task :default => :spec
7
+
8
+ Spec::Rake::SpecTask.new do |t|
9
+ t.spec_opts = %w(-fs -c)
10
+ t.spec_files = FileList["spec/**/*_spec.rb"]
11
+ end
@@ -0,0 +1,57 @@
1
+ $:.unshift File.dirname(__FILE__) + "/../lib"
2
+ require "rubygems"
3
+ require "thin_parser"
4
+ require "net/http/parser"
5
+ require "benchmark"
6
+
7
+ data = "POST /postit HTTP/1.1\r\n" +
8
+ "Host: localhost:3000\r\n" +
9
+ "User-Agent: Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en-US; rv:1.8.1.9) Gecko/20071025 Firefox/2.0.0.9\r\n" +
10
+ "Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5\r\n" +
11
+ "Accept-Language: en-us,en;q=0.5\r\n" +
12
+ "Accept-Encoding: gzip,deflate\r\n" +
13
+ "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7\r\n" +
14
+ "Keep-Alive: 300\r\n" +
15
+ "Connection: keep-alive\r\n" +
16
+ "Content-Type: text/html\r\n" +
17
+ "Content-Length: 37\r\n" +
18
+ "\r\n" +
19
+ "name=marc&email=macournoyer@gmail.com"
20
+
21
+ def thin(data)
22
+ env = {"rack.input" => StringIO.new}
23
+ Thin::HttpParser.new.execute(env, data, 0)
24
+ env
25
+ end
26
+
27
+ def http_parser(data)
28
+ body = StringIO.new
29
+ env = nil
30
+
31
+ parser = Net::HTTP::RequestParser.new
32
+ parser.on_headers_complete = proc { |e| env = e }
33
+ parser.on_body = proc { |c| body << c }
34
+ parser << data
35
+
36
+ env["rack-input"] = body
37
+ env
38
+ end
39
+
40
+ # p thin(data)
41
+ # p http_parser(data)
42
+
43
+ TESTS = 30_000
44
+ Benchmark.bmbm do |results|
45
+ results.report("thin:") { TESTS.times { thin data } }
46
+ results.report("http-parser:") { TESTS.times { http_parser data } }
47
+ end
48
+
49
+ # On my MBP core duo 2.2Ghz
50
+ # Rehearsal ------------------------------------------------
51
+ # thin: 1.470000 0.000000 1.470000 ( 1.474737)
52
+ # http-parser: 1.270000 0.020000 1.290000 ( 1.292758)
53
+ # --------------------------------------- total: 2.760000sec
54
+ #
55
+ # user system total real
56
+ # thin: 1.150000 0.030000 1.180000 ( 1.173767)
57
+ # http-parser: 1.250000 0.010000 1.260000 ( 1.263796)
@@ -0,0 +1,19 @@
1
+ Copyright 2009 Ryan Dahl <ry@tinyclouds.org>
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to
5
+ deal in the Software without restriction, including without limitation the
6
+ rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7
+ sell copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in
11
+ all copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19
+ IN THE SOFTWARE.
@@ -0,0 +1,33 @@
1
+ OPT_DEBUG=-O0 -g -Wall -Wextra -Werror
2
+ OPT_FAST=-O3 -DHTTP_PARSER_STRICT=0
3
+
4
+
5
+ test: test_debug
6
+ ./test_debug
7
+
8
+ test_debug: http_parser_debug.o test.c
9
+ gcc $(OPT_DEBUG) http_parser.o test.c -o $@
10
+
11
+ http_parser_debug.o: http_parser.c http_parser.h Makefile
12
+ gcc $(OPT_DEBUG) -c http_parser.c
13
+
14
+ test-valgrind: test_debug
15
+ valgrind ./test_debug
16
+
17
+ http_parser.o: http_parser.c http_parser.h Makefile
18
+ gcc $(OPT_FAST) -c http_parser.c
19
+
20
+ test_fast: http_parser.o test.c
21
+ gcc $(OPT_FAST) http_parser.o test.c -o $@
22
+
23
+ test-run-timed: test_fast
24
+ while(true) do time ./test_fast > /dev/null; done
25
+
26
+
27
+ tags: http_parser.c http_parser.h test.c
28
+ ctags $^
29
+
30
+ clean:
31
+ rm -f *.o test test_fast test_debug http_parser.tar tags
32
+
33
+ .PHONY: clean package test-run test-run-timed test-valgrind
@@ -0,0 +1,136 @@
1
+ HTTP Parser
2
+ ===========
3
+
4
+ This is a parser for HTTP messages written in C. It parses both requests
5
+ and responses. The parser is designed to be used in performance HTTP
6
+ applications. It does not make any allocations, it does not buffer data, and
7
+ it can be interrupted at anytime. It only requires about 136 bytes of data
8
+ per message stream (in a web server that is per connection).
9
+
10
+ Features:
11
+
12
+ * No dependencies
13
+ * Parses both requests and responses.
14
+ * Handles persistent streams.
15
+ * Decodes chunked encoding.
16
+ * Extracts the following data from a message
17
+ * header fields and values
18
+ * content-length
19
+ * request method
20
+ * response status code
21
+ * transfer-encoding
22
+ * http version
23
+ * request path, query string, fragment
24
+ * message body
25
+ * Defends against buffer overflow attacks.
26
+
27
+ Usage
28
+ -----
29
+
30
+ One `http_parser` object is used per TCP connection. Initialize the struct
31
+ using `http_parser_init()` and set the callbacks. That might look something
32
+ like this:
33
+
34
+ http_parser *parser = malloc(sizeof(http_parser));
35
+ http_parser_init(parser);
36
+ parser->on_path = my_path_callback;
37
+ parser->on_header_field = my_header_field_callback;
38
+ /* ... */
39
+ parser->data = my_socket;
40
+
41
+ When data is received on the socket execute the parser and check for errors.
42
+
43
+ size_t len = 80*1024, nparsed;
44
+ char buf[len];
45
+ ssize_t recved;
46
+
47
+ recved = recv(fd, buf, len, 0);
48
+
49
+ if (recved < 0) {
50
+ /* Handle error. */
51
+ }
52
+
53
+ /* Start up / continue the parser.
54
+ * Note we pass the recved==0 to http_parse_requests to signal
55
+ * that EOF has been recieved.
56
+ */
57
+ nparsed = http_parse_requests(parser, buf, recved);
58
+
59
+ if (nparsed != recved) {
60
+ /* Handle error. Usually just close the connection. */
61
+ }
62
+
63
+ HTTP needs to know where the end of the stream is. For example, sometimes
64
+ servers send responses without Content-Length and expect the client to
65
+ consume input (for the body) until EOF. To tell http_parser about EOF, give
66
+ `0` as the third parameter to `http_parse_requests()`. Callbacks and errors
67
+ can still be encountered during an EOF, so one must still be prepared
68
+ to receive them.
69
+
70
+ Scalar valued message information such as `status_code`, `method`, and the
71
+ HTTP version are stored in the parser structure. This data is only
72
+ temporarlly stored in `http_parser` and gets reset on each new message. If
73
+ this information is needed later, copy it out of the structure during the
74
+ `headers_complete` callback.
75
+
76
+ The parser decodes the transfer-encoding for both requests and responses
77
+ transparently. That is, a chunked encoding is decoded before being sent to
78
+ the on_body callback.
79
+
80
+ It does not decode the content-encoding (gzip). Not all HTTP applications
81
+ need to inspect the body. Decoding gzip is non-neglagable amount of
82
+ processing (and requires making allocations). HTTP proxies using this
83
+ parser, for example, would not want such a feature.
84
+
85
+ Callbacks
86
+ ---------
87
+
88
+ During the `http_parse_requests()` call, the callbacks set in `http_parser`
89
+ will be executed. The parser maintains state and never looks behind, so
90
+ buffering the data is not necessary. If you need to save certain data for
91
+ later usage, you can do that from the callbacks.
92
+
93
+ There are two types of callbacks:
94
+
95
+ * notification `typedef int (*http_cb) (http_parser*);`
96
+ Callbacks: on_message_begin, on_headers_complete, on_message_complete.
97
+ * data `typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);`
98
+ Callbacks: (requests only) on_path, on_query_string, on_uri, on_fragment,
99
+ (common) on_header_field, on_header_value, on_body;
100
+
101
+ In case you parse HTTP message in chunks (i.e. `read()` request line
102
+ from socket, parse, read half headers, parse, etc) your data callbacks
103
+ may be called more than once. Http-parser guarantees that data pointer is only
104
+ valid for the lifetime of callback. You can also `read()` into a heap allocated
105
+ buffer to avoid copying memory around if this fits your application.
106
+
107
+ Reading headers may be a tricky task if you read/parse headers partially.
108
+ Basically, you need to remember whether last header callback was field or value
109
+ and apply following logic:
110
+
111
+ (on_header_field and on_header_value shortened to on_h_*)
112
+ ------------------------ ------------ --------------------------------------------
113
+ | State (prev. callback) | Callback | Description/action |
114
+ ------------------------ ------------ --------------------------------------------
115
+ | nothing (first call) | on_h_field | Allocate new buffer and copy callback data |
116
+ | | | into it |
117
+ ------------------------ ------------ --------------------------------------------
118
+ | value | on_h_field | New header started. |
119
+ | | | Copy current name,value buffers to headers |
120
+ | | | list and allocate new buffer for new name |
121
+ ------------------------ ------------ --------------------------------------------
122
+ | field | on_h_field | Previous name continues. Reallocate name |
123
+ | | | buffer and append callback data to it |
124
+ ------------------------ ------------ --------------------------------------------
125
+ | field | on_h_value | Value for current header started. Allocate |
126
+ | | | new buffer and copy callback data to it |
127
+ ------------------------ ------------ --------------------------------------------
128
+ | value | on_h_value | Value continues. Reallocate value buffer |
129
+ | | | and append callback data to it |
130
+ ------------------------ ------------ --------------------------------------------
131
+
132
+ See examples of reading in headers:
133
+
134
+ * [partial example](http://gist.github.com/155877) in C
135
+ * [from http-parser tests](http://github.com/ry/http-parser/blob/37a0ff8928fb0d83cec0d0d8909c5a4abcd221af/test.c#L403) in C
136
+ * [from Node library](http://github.com/ry/node/blob/842eaf446d2fdcb33b296c67c911c32a0dabc747/src/http.js#L284) in Javascript
@@ -0,0 +1,1468 @@
1
+ /* Copyright 2009 Ryan Dahl <ry@tinyclouds.org>
2
+ *
3
+ * Some parts of this source file were taken from NGINX
4
+ * (src/http/ngx_http_parser.c) copyright (C) 2002-2009 Igor Sysoev.
5
+ *
6
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ * of this software and associated documentation files (the "Software"), to
8
+ * deal in the Software without restriction, including without limitation the
9
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10
+ * sell copies of the Software, and to permit persons to whom the Software is
11
+ * furnished to do so, subject to the following conditions:
12
+ *
13
+ * The above copyright notice and this permission notice shall be included in
14
+ * all copies or substantial portions of the Software.
15
+ *
16
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22
+ * IN THE SOFTWARE.
23
+ */
24
+ #include <http_parser.h>
25
+ #include <stdint.h>
26
+ #include <assert.h>
27
+ #include <string.h> /* strncmp */
28
+
29
+ #ifndef NULL
30
+ # define NULL ((void*)0)
31
+ #endif
32
+
33
+ #ifndef MIN
34
+ # define MIN(a,b) ((a) < (b) ? (a) : (b))
35
+ #endif
36
+
37
+ #define MAX_FIELD_SIZE (80*1024)
38
+
39
+
40
+ #define MARK(FOR) \
41
+ do { \
42
+ parser->FOR##_mark = p; \
43
+ parser->FOR##_size = 0; \
44
+ } while (0)
45
+
46
+ #define CALLBACK(FOR) \
47
+ do { \
48
+ if (0 != FOR##_callback(parser, p)) return (p - data); \
49
+ parser->FOR##_mark = NULL; \
50
+ } while (0)
51
+
52
+ #define CALLBACK_NOCLEAR(FOR) \
53
+ do { \
54
+ if (0 != FOR##_callback(parser, p)) return (p - data); \
55
+ } while (0)
56
+
57
+ #define CALLBACK2(FOR) \
58
+ do { \
59
+ if (0 != FOR##_callback(parser)) return (p - data); \
60
+ } while (0)
61
+
62
+ #define DEFINE_CALLBACK(FOR) \
63
+ static inline int FOR##_callback (http_parser *parser, const char *p) \
64
+ { \
65
+ if (!parser->FOR##_mark) return 0; \
66
+ assert(parser->FOR##_mark); \
67
+ const char *mark = parser->FOR##_mark; \
68
+ parser->FOR##_size += p - mark; \
69
+ if (parser->FOR##_size > MAX_FIELD_SIZE) return -1; \
70
+ int r = 0; \
71
+ if (parser->on_##FOR) r = parser->on_##FOR(parser, mark, p - mark); \
72
+ return r; \
73
+ }
74
+
75
+ DEFINE_CALLBACK(url)
76
+ DEFINE_CALLBACK(path)
77
+ DEFINE_CALLBACK(query_string)
78
+ DEFINE_CALLBACK(fragment)
79
+ DEFINE_CALLBACK(header_field)
80
+ DEFINE_CALLBACK(header_value)
81
+
82
+ static inline int headers_complete_callback (http_parser *parser)
83
+ {
84
+ if (parser->on_headers_complete == NULL) return 0;
85
+ return parser->on_headers_complete(parser);
86
+ }
87
+
88
+ static inline int message_begin_callback (http_parser *parser)
89
+ {
90
+ if (parser->on_message_begin == NULL) return 0;
91
+ return parser->on_message_begin(parser);
92
+ }
93
+
94
+ static inline int message_complete_callback (http_parser *parser)
95
+ {
96
+ if (parser->on_message_complete == NULL) return 0;
97
+ return parser->on_message_complete(parser);
98
+ }
99
+
100
+ #define CONNECTION "connection"
101
+ #define CONTENT_LENGTH "content-length"
102
+ #define TRANSFER_ENCODING "transfer-encoding"
103
+
104
+ #define CHUNKED "chunked"
105
+ #define KEEP_ALIVE "keep-alive"
106
+ #define CLOSE "close"
107
+
108
+
109
+ static const unsigned char lowcase[] =
110
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
111
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0-\0\0" "0123456789\0\0\0\0\0\0"
112
+ "\0abcdefghijklmnopqrstuvwxyz\0\0\0\0\0"
113
+ "\0abcdefghijklmnopqrstuvwxyz\0\0\0\0\0"
114
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
115
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
116
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
117
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
118
+
119
+ static const int unhex[] =
120
+ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
121
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
122
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
123
+ , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
124
+ ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
125
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
126
+ ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
127
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
128
+ };
129
+
130
+
131
+ static const uint32_t usual[] = {
132
+ 0xffffdbfe, /* 1111 1111 1111 1111 1101 1011 1111 1110 */
133
+
134
+ /* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */
135
+ 0x7ffffff6, /* 0111 1111 1111 1111 1111 1111 1111 0110 */
136
+
137
+ /* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */
138
+ 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
139
+
140
+ /* ~}| {zyx wvut srqp onml kjih gfed cba` */
141
+ 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
142
+
143
+ 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
144
+ 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
145
+ 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
146
+ 0xffffffff /* 1111 1111 1111 1111 1111 1111 1111 1111 */
147
+ };
148
+
149
+ #define USUAL(c) (usual[c >> 5] & (1 << (c & 0x1f)))
150
+
151
+ enum state
152
+ { s_dead = 1 /* important that this is > 0 */
153
+
154
+ , s_start_res
155
+ , s_res_H
156
+ , s_res_HT
157
+ , s_res_HTT
158
+ , s_res_HTTP
159
+ , s_res_first_http_major
160
+ , s_res_http_major
161
+ , s_res_first_http_minor
162
+ , s_res_http_minor
163
+ , s_res_first_status_code
164
+ , s_res_status_code
165
+ , s_res_status
166
+ , s_res_line_almost_done
167
+
168
+ , s_start_req
169
+
170
+ , s_req_method
171
+ , s_req_spaces_before_url
172
+ , s_req_schema
173
+ , s_req_schema_slash
174
+ , s_req_schema_slash_slash
175
+ , s_req_host
176
+ , s_req_port
177
+ , s_req_path
178
+ , s_req_query_string_start
179
+ , s_req_query_string
180
+ , s_req_fragment_start
181
+ , s_req_fragment
182
+ , s_req_http_start
183
+ , s_req_http_H
184
+ , s_req_http_HT
185
+ , s_req_http_HTT
186
+ , s_req_http_HTTP
187
+ , s_req_first_http_major
188
+ , s_req_http_major
189
+ , s_req_first_http_minor
190
+ , s_req_http_minor
191
+ , s_req_line_almost_done
192
+
193
+ , s_header_field_start
194
+ , s_header_field
195
+ , s_header_value_start
196
+ , s_header_value
197
+
198
+ , s_header_almost_done
199
+
200
+ , s_headers_almost_done
201
+ , s_headers_done
202
+
203
+ , s_chunk_size_start
204
+ , s_chunk_size
205
+ , s_chunk_size_almost_done
206
+ , s_chunk_parameters
207
+ , s_chunk_data
208
+ , s_chunk_data_almost_done
209
+ , s_chunk_data_done
210
+
211
+ , s_body_identity
212
+ , s_body_identity_eof
213
+ };
214
+
215
+ enum header_states
216
+ { h_general = 0
217
+ , h_C
218
+ , h_CO
219
+ , h_CON
220
+
221
+ , h_matching_connection
222
+ , h_matching_content_length
223
+ , h_matching_transfer_encoding
224
+
225
+ , h_connection
226
+ , h_content_length
227
+ , h_transfer_encoding
228
+
229
+ , h_matching_transfer_encoding_chunked
230
+ , h_matching_connection_keep_alive
231
+ , h_matching_connection_close
232
+
233
+ , h_transfer_encoding_chunked
234
+ , h_connection_keep_alive
235
+ , h_connection_close
236
+ };
237
+
238
+ enum flags
239
+ { F_CHUNKED = 0x0001
240
+ , F_CONNECTION_KEEP_ALIVE = 0x0002
241
+ , F_CONNECTION_CLOSE = 0x0004
242
+ , F_TRAILING = 0x0010
243
+ };
244
+
245
+ #define CR '\r'
246
+ #define LF '\n'
247
+ #define LOWER(c) (unsigned char)(c | 0x20)
248
+
249
+ #if HTTP_PARSER_STRICT
250
+ # define STRICT_CHECK(cond) if (cond) goto error
251
+ # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
252
+ #else
253
+ # define STRICT_CHECK(cond)
254
+ # define NEW_MESSAGE() start_state
255
+ #endif
256
+
257
+ static inline
258
+ size_t parse (http_parser *parser, const char *data, size_t len, int start_state)
259
+ {
260
+ char c, ch;
261
+ const char *p, *pe;
262
+ ssize_t to_read;
263
+
264
+ enum state state = parser->state;
265
+ enum header_states header_state = parser->header_state;
266
+ size_t index = parser->index;
267
+
268
+ if (len == 0) {
269
+ if (state == s_body_identity_eof) {
270
+ CALLBACK2(message_complete);
271
+ }
272
+ return 0;
273
+ }
274
+
275
+ if (parser->header_field_mark) parser->header_field_mark = data;
276
+ if (parser->header_value_mark) parser->header_value_mark = data;
277
+ if (parser->fragment_mark) parser->fragment_mark = data;
278
+ if (parser->query_string_mark) parser->query_string_mark = data;
279
+ if (parser->path_mark) parser->path_mark = data;
280
+ if (parser->url_mark) parser->url_mark = data;
281
+
282
+ for (p=data, pe=data+len; p != pe; p++) {
283
+ ch = *p;
284
+ switch (state) {
285
+
286
+ case s_dead:
287
+ /* this state is used after a 'Connection: close' message
288
+ * the parser will error out if it reads another message
289
+ */
290
+ goto error;
291
+
292
+ case s_start_res:
293
+ {
294
+ parser->flags = 0;
295
+ parser->content_length = -1;
296
+
297
+ CALLBACK2(message_begin);
298
+
299
+ switch (ch) {
300
+ case 'H':
301
+ state = s_res_H;
302
+ break;
303
+
304
+ case CR:
305
+ case LF:
306
+ break;
307
+
308
+ default:
309
+ goto error;
310
+ }
311
+ break;
312
+ }
313
+
314
+ case s_res_H:
315
+ STRICT_CHECK(ch != 'T');
316
+ state = s_res_HT;
317
+ break;
318
+
319
+ case s_res_HT:
320
+ STRICT_CHECK(ch != 'T');
321
+ state = s_res_HTT;
322
+ break;
323
+
324
+ case s_res_HTT:
325
+ STRICT_CHECK(ch != 'P');
326
+ state = s_res_HTTP;
327
+ break;
328
+
329
+ case s_res_HTTP:
330
+ STRICT_CHECK(ch != '/');
331
+ state = s_res_first_http_major;
332
+ break;
333
+
334
+ case s_res_first_http_major:
335
+ if (ch < '1' || ch > '9') goto error;
336
+ parser->http_major = ch - '0';
337
+ state = s_res_http_major;
338
+ break;
339
+
340
+ /* major HTTP version or dot */
341
+ case s_res_http_major:
342
+ {
343
+ if (ch == '.') {
344
+ state = s_res_first_http_minor;
345
+ break;
346
+ }
347
+
348
+ if (ch < '0' || ch > '9') goto error;
349
+
350
+ parser->http_major *= 10;
351
+ parser->http_major += ch - '0';
352
+
353
+ if (parser->http_major > 999) goto error;
354
+ break;
355
+ }
356
+
357
+ /* first digit of minor HTTP version */
358
+ case s_res_first_http_minor:
359
+ if (ch < '0' || ch > '9') goto error;
360
+ parser->http_minor = ch - '0';
361
+ state = s_res_http_minor;
362
+ break;
363
+
364
+ /* minor HTTP version or end of request line */
365
+ case s_res_http_minor:
366
+ {
367
+ if (ch == ' ') {
368
+ state = s_res_first_status_code;
369
+ break;
370
+ }
371
+
372
+ if (ch < '0' || ch > '9') goto error;
373
+
374
+ parser->http_minor *= 10;
375
+ parser->http_minor += ch - '0';
376
+
377
+ if (parser->http_minor > 999) goto error;
378
+ break;
379
+ }
380
+
381
+ case s_res_first_status_code:
382
+ {
383
+ if (ch < '0' || ch > '9') {
384
+ if (ch == ' ') {
385
+ break;
386
+ }
387
+ goto error;
388
+ }
389
+ parser->status_code = ch - '0';
390
+ state = s_res_status_code;
391
+ break;
392
+ }
393
+
394
+ case s_res_status_code:
395
+ {
396
+ if (ch < '0' || ch > '9') {
397
+ switch (ch) {
398
+ case ' ':
399
+ state = s_res_status;
400
+ break;
401
+ case CR:
402
+ state = s_res_line_almost_done;
403
+ break;
404
+ case LF:
405
+ state = s_header_field_start;
406
+ break;
407
+ default:
408
+ goto error;
409
+ }
410
+ break;
411
+ }
412
+
413
+ parser->status_code *= 10;
414
+ parser->status_code += ch - '0';
415
+
416
+ if (parser->status_code > 999) goto error;
417
+ break;
418
+ }
419
+
420
+ case s_res_status:
421
+ /* the human readable status. e.g. "NOT FOUND"
422
+ * we are not humans so just ignore this */
423
+ if (ch == CR) {
424
+ state = s_res_line_almost_done;
425
+ break;
426
+ }
427
+
428
+ if (ch == LF) {
429
+ state = s_header_field_start;
430
+ break;
431
+ }
432
+ break;
433
+
434
+ case s_res_line_almost_done:
435
+ STRICT_CHECK(ch != LF);
436
+ state = s_header_field_start;
437
+ break;
438
+
439
+ case s_start_req:
440
+ {
441
+ parser->flags = 0;
442
+ parser->content_length = -1;
443
+
444
+ CALLBACK2(message_begin);
445
+
446
+ if (ch < 'A' || 'Z' < ch) goto error;
447
+
448
+ parser->method = 0;
449
+ index = 0;
450
+ parser->buffer[0] = ch;
451
+ state = s_req_method;
452
+ break;
453
+ }
454
+
455
+ case s_req_method:
456
+ if (ch == ' ') {
457
+ assert(index+1 < HTTP_PARSER_MAX_METHOD_LEN);
458
+ parser->buffer[index+1] = '\0';
459
+
460
+ /* TODO Instead of using strncmp() use NGINX's ngx_str3Ocmp() */
461
+
462
+ switch (index+1) {
463
+ case 3:
464
+ if (strncmp(parser->buffer, "GET", 3) == 0) {
465
+ parser->method = HTTP_GET;
466
+ break;
467
+ }
468
+
469
+ if (strncmp(parser->buffer, "PUT", 3) == 0) {
470
+ parser->method = HTTP_PUT;
471
+ break;
472
+ }
473
+
474
+ break;
475
+
476
+ case 4:
477
+ if (strncmp(parser->buffer, "POST", 4) == 0) {
478
+ parser->method = HTTP_POST;
479
+ break;
480
+ }
481
+
482
+ if (strncmp(parser->buffer, "HEAD", 4) == 0) {
483
+ parser->method = HTTP_HEAD;
484
+ break;
485
+ }
486
+
487
+ if (strncmp(parser->buffer, "COPY", 4) == 0) {
488
+ parser->method = HTTP_COPY;
489
+ break;
490
+ }
491
+
492
+ if (strncmp(parser->buffer, "MOVE", 4) == 0) {
493
+ parser->method = HTTP_MOVE;
494
+ break;
495
+ }
496
+
497
+ break;
498
+
499
+ case 5:
500
+ if (strncmp(parser->buffer, "MKCOL", 5) == 0) {
501
+ parser->method = HTTP_MKCOL;
502
+ break;
503
+ }
504
+
505
+ if (strncmp(parser->buffer, "TRACE", 5) == 0) {
506
+ parser->method = HTTP_TRACE;
507
+ break;
508
+ }
509
+
510
+ break;
511
+
512
+ case 6:
513
+ if (strncmp(parser->buffer, "DELETE", 6) == 0) {
514
+ parser->method = HTTP_DELETE;
515
+ break;
516
+ }
517
+
518
+ if (strncmp(parser->buffer, "UNLOCK", 6) == 0) {
519
+ parser->method = HTTP_UNLOCK;
520
+ break;
521
+ }
522
+
523
+ break;
524
+
525
+ case 7:
526
+ if (strncmp(parser->buffer, "OPTIONS", 7) == 0) {
527
+ parser->method = HTTP_OPTIONS;
528
+ break;
529
+ }
530
+
531
+ if (strncmp(parser->buffer, "CONNECT", 7) == 0) {
532
+ parser->method = HTTP_CONNECT;
533
+ break;
534
+ }
535
+
536
+ break;
537
+
538
+ case 8:
539
+ if (strncmp(parser->buffer, "PROPFIND", 8) == 0) {
540
+ parser->method = HTTP_PROPFIND;
541
+ break;
542
+ }
543
+
544
+ break;
545
+
546
+ case 9:
547
+ if (strncmp(parser->buffer, "PROPPATCH", 9) == 0) {
548
+ parser->method = HTTP_PROPPATCH;
549
+ break;
550
+ }
551
+
552
+ break;
553
+ }
554
+ state = s_req_spaces_before_url;
555
+ break;
556
+ }
557
+
558
+ if (ch < 'A' || 'Z' < ch) goto error;
559
+
560
+ if (++index >= HTTP_PARSER_MAX_METHOD_LEN - 1) {
561
+ goto error;
562
+ }
563
+
564
+ parser->buffer[index] = ch;
565
+
566
+ break;
567
+
568
+ case s_req_spaces_before_url:
569
+ {
570
+ if (ch == ' ') break;
571
+
572
+ if (ch == '/') {
573
+ MARK(url);
574
+ MARK(path);
575
+ state = s_req_path;
576
+ break;
577
+ }
578
+
579
+ c = LOWER(ch);
580
+
581
+ if (c >= 'a' && c <= 'z') {
582
+ MARK(url);
583
+ state = s_req_schema;
584
+ break;
585
+ }
586
+
587
+ goto error;
588
+ }
589
+
590
+ case s_req_schema:
591
+ {
592
+ c = LOWER(ch);
593
+
594
+ if (c >= 'a' && c <= 'z') break;
595
+
596
+ if (ch == ':') {
597
+ state = s_req_schema_slash;
598
+ break;
599
+ }
600
+
601
+ goto error;
602
+ }
603
+
604
+ case s_req_schema_slash:
605
+ STRICT_CHECK(ch != '/');
606
+ state = s_req_schema_slash_slash;
607
+ break;
608
+
609
+ case s_req_schema_slash_slash:
610
+ STRICT_CHECK(ch != '/');
611
+ state = s_req_host;
612
+ break;
613
+
614
+ case s_req_host:
615
+ {
616
+ c = LOWER(ch);
617
+ if (c >= 'a' && c <= 'z') break;
618
+ if ((ch >= '0' && ch <= '9') || ch == '.' || ch == '-') break;
619
+ switch (ch) {
620
+ case ':':
621
+ state = s_req_port;
622
+ break;
623
+ case '/':
624
+ MARK(path);
625
+ state = s_req_path;
626
+ break;
627
+ case ' ':
628
+ /* The request line looks like:
629
+ * "GET http://foo.bar.com HTTP/1.1"
630
+ * That is, there is no path.
631
+ */
632
+ CALLBACK(url);
633
+ state = s_req_http_start;
634
+ break;
635
+ default:
636
+ goto error;
637
+ }
638
+ break;
639
+ }
640
+
641
+ case s_req_port:
642
+ {
643
+ if (ch >= '0' && ch <= '9') break;
644
+ switch (ch) {
645
+ case '/':
646
+ MARK(path);
647
+ state = s_req_path;
648
+ break;
649
+ case ' ':
650
+ /* The request line looks like:
651
+ * "GET http://foo.bar.com:1234 HTTP/1.1"
652
+ * That is, there is no path.
653
+ */
654
+ CALLBACK(url);
655
+ state = s_req_http_start;
656
+ break;
657
+ default:
658
+ goto error;
659
+ }
660
+ break;
661
+ }
662
+
663
+ case s_req_path:
664
+ {
665
+ if (USUAL(ch)) break;
666
+
667
+ switch (ch) {
668
+ case ' ':
669
+ CALLBACK(url);
670
+ CALLBACK(path);
671
+ state = s_req_http_start;
672
+ break;
673
+ case CR:
674
+ CALLBACK(url);
675
+ CALLBACK(path);
676
+ parser->http_minor = 9;
677
+ state = s_req_line_almost_done;
678
+ break;
679
+ case LF:
680
+ CALLBACK(url);
681
+ CALLBACK(path);
682
+ parser->http_minor = 9;
683
+ state = s_header_field_start;
684
+ break;
685
+ case '?':
686
+ CALLBACK(path);
687
+ state = s_req_query_string_start;
688
+ break;
689
+ case '#':
690
+ CALLBACK(path);
691
+ state = s_req_fragment_start;
692
+ break;
693
+ default:
694
+ goto error;
695
+ }
696
+ break;
697
+ }
698
+
699
+ case s_req_query_string_start:
700
+ {
701
+ if (USUAL(ch)) {
702
+ MARK(query_string);
703
+ state = s_req_query_string;
704
+ break;
705
+ }
706
+
707
+ switch (ch) {
708
+ case '?':
709
+ break; // XXX ignore extra '?' ... is this right?
710
+ case ' ':
711
+ CALLBACK(url);
712
+ state = s_req_http_start;
713
+ break;
714
+ case CR:
715
+ CALLBACK(url);
716
+ parser->http_minor = 9;
717
+ state = s_req_line_almost_done;
718
+ break;
719
+ case LF:
720
+ CALLBACK(url);
721
+ parser->http_minor = 9;
722
+ state = s_header_field_start;
723
+ break;
724
+ case '#':
725
+ state = s_req_fragment_start;
726
+ break;
727
+ default:
728
+ goto error;
729
+ }
730
+ break;
731
+ }
732
+
733
+ case s_req_query_string:
734
+ {
735
+ if (USUAL(ch)) break;
736
+
737
+ switch (ch) {
738
+ case ' ':
739
+ CALLBACK(url);
740
+ CALLBACK(query_string);
741
+ state = s_req_http_start;
742
+ break;
743
+ case CR:
744
+ CALLBACK(url);
745
+ CALLBACK(query_string);
746
+ parser->http_minor = 9;
747
+ state = s_req_line_almost_done;
748
+ break;
749
+ case LF:
750
+ CALLBACK(url);
751
+ CALLBACK(query_string);
752
+ parser->http_minor = 9;
753
+ state = s_header_field_start;
754
+ break;
755
+ case '#':
756
+ CALLBACK(query_string);
757
+ state = s_req_fragment_start;
758
+ break;
759
+ default:
760
+ goto error;
761
+ }
762
+ break;
763
+ }
764
+
765
+ case s_req_fragment_start:
766
+ {
767
+ if (USUAL(ch)) {
768
+ MARK(fragment);
769
+ state = s_req_fragment;
770
+ break;
771
+ }
772
+
773
+ switch (ch) {
774
+ case ' ':
775
+ CALLBACK(url);
776
+ state = s_req_http_start;
777
+ break;
778
+ case CR:
779
+ CALLBACK(url);
780
+ parser->http_minor = 9;
781
+ state = s_req_line_almost_done;
782
+ break;
783
+ case LF:
784
+ CALLBACK(url);
785
+ parser->http_minor = 9;
786
+ state = s_header_field_start;
787
+ break;
788
+ case '?':
789
+ MARK(fragment);
790
+ state = s_req_fragment;
791
+ break;
792
+ case '#':
793
+ break;
794
+ default:
795
+ goto error;
796
+ }
797
+ break;
798
+ }
799
+
800
+ case s_req_fragment:
801
+ {
802
+ if (USUAL(ch)) break;
803
+
804
+ switch (ch) {
805
+ case ' ':
806
+ CALLBACK(url);
807
+ CALLBACK(fragment);
808
+ state = s_req_http_start;
809
+ break;
810
+ case CR:
811
+ CALLBACK(url);
812
+ CALLBACK(fragment);
813
+ parser->http_minor = 9;
814
+ state = s_req_line_almost_done;
815
+ break;
816
+ case LF:
817
+ CALLBACK(url);
818
+ CALLBACK(fragment);
819
+ parser->http_minor = 9;
820
+ state = s_header_field_start;
821
+ break;
822
+ case '?':
823
+ case '#':
824
+ break;
825
+ default:
826
+ goto error;
827
+ }
828
+ break;
829
+ }
830
+
831
+ case s_req_http_start:
832
+ switch (ch) {
833
+ case 'H':
834
+ state = s_req_http_H;
835
+ break;
836
+ case ' ':
837
+ break;
838
+ default:
839
+ goto error;
840
+ }
841
+ break;
842
+
843
+ case s_req_http_H:
844
+ STRICT_CHECK(ch != 'T');
845
+ state = s_req_http_HT;
846
+ break;
847
+
848
+ case s_req_http_HT:
849
+ STRICT_CHECK(ch != 'T');
850
+ state = s_req_http_HTT;
851
+ break;
852
+
853
+ case s_req_http_HTT:
854
+ STRICT_CHECK(ch != 'P');
855
+ state = s_req_http_HTTP;
856
+ break;
857
+
858
+ case s_req_http_HTTP:
859
+ STRICT_CHECK(ch != '/');
860
+ state = s_req_first_http_major;
861
+ break;
862
+
863
+ /* first digit of major HTTP version */
864
+ case s_req_first_http_major:
865
+ if (ch < '1' || ch > '9') goto error;
866
+ parser->http_major = ch - '0';
867
+ state = s_req_http_major;
868
+ break;
869
+
870
+ /* major HTTP version or dot */
871
+ case s_req_http_major:
872
+ {
873
+ if (ch == '.') {
874
+ state = s_req_first_http_minor;
875
+ break;
876
+ }
877
+
878
+ if (ch < '0' || ch > '9') goto error;
879
+
880
+ parser->http_major *= 10;
881
+ parser->http_major += ch - '0';
882
+
883
+ if (parser->http_major > 999) goto error;
884
+ break;
885
+ }
886
+
887
+ /* first digit of minor HTTP version */
888
+ case s_req_first_http_minor:
889
+ if (ch < '0' || ch > '9') goto error;
890
+ parser->http_minor = ch - '0';
891
+ state = s_req_http_minor;
892
+ break;
893
+
894
+ /* minor HTTP version or end of request line */
895
+ case s_req_http_minor:
896
+ {
897
+ if (ch == CR) {
898
+ state = s_req_line_almost_done;
899
+ break;
900
+ }
901
+
902
+ if (ch == LF) {
903
+ state = s_header_field_start;
904
+ break;
905
+ }
906
+
907
+ /* XXX allow spaces after digit? */
908
+
909
+ if (ch < '0' || ch > '9') goto error;
910
+
911
+ parser->http_minor *= 10;
912
+ parser->http_minor += ch - '0';
913
+
914
+ if (parser->http_minor > 999) goto error;
915
+ break;
916
+ }
917
+
918
+ /* end of request line */
919
+ case s_req_line_almost_done:
920
+ {
921
+ if (ch != LF) goto error;
922
+ state = s_header_field_start;
923
+ break;
924
+ }
925
+
926
+ case s_header_field_start:
927
+ {
928
+ if (ch == CR) {
929
+ state = s_headers_almost_done;
930
+ break;
931
+ }
932
+
933
+ if (ch == LF) {
934
+ state = s_headers_done;
935
+ break;
936
+ }
937
+
938
+ c = LOWER(ch);
939
+
940
+ if (c < 'a' || 'z' < c) goto error;
941
+
942
+ MARK(header_field);
943
+
944
+ index = 0;
945
+ state = s_header_field;
946
+
947
+ switch (c) {
948
+ case 'c':
949
+ header_state = h_C;
950
+ break;
951
+
952
+ case 't':
953
+ header_state = h_matching_transfer_encoding;
954
+ break;
955
+
956
+ default:
957
+ header_state = h_general;
958
+ break;
959
+ }
960
+ break;
961
+ }
962
+
963
+ case s_header_field:
964
+ {
965
+ c = lowcase[(int)ch];
966
+
967
+ if (c) {
968
+ switch (header_state) {
969
+ case h_general:
970
+ break;
971
+
972
+ case h_C:
973
+ index++;
974
+ header_state = (c == 'o' ? h_CO : h_general);
975
+ break;
976
+
977
+ case h_CO:
978
+ index++;
979
+ header_state = (c == 'n' ? h_CON : h_general);
980
+ break;
981
+
982
+ case h_CON:
983
+ index++;
984
+ switch (c) {
985
+ case 'n':
986
+ header_state = h_matching_connection;
987
+ break;
988
+ case 't':
989
+ header_state = h_matching_content_length;
990
+ break;
991
+ default:
992
+ header_state = h_general;
993
+ break;
994
+ }
995
+ break;
996
+
997
+ /* connection */
998
+
999
+ case h_matching_connection:
1000
+ index++;
1001
+ if (index > sizeof(CONNECTION)-1
1002
+ || c != CONNECTION[index]) {
1003
+ header_state = h_general;
1004
+ } else if (index == sizeof(CONNECTION)-2) {
1005
+ header_state = h_connection;
1006
+ }
1007
+ break;
1008
+
1009
+ /* content-length */
1010
+
1011
+ case h_matching_content_length:
1012
+ index++;
1013
+ if (index > sizeof(CONTENT_LENGTH)-1
1014
+ || c != CONTENT_LENGTH[index]) {
1015
+ header_state = h_general;
1016
+ } else if (index == sizeof(CONTENT_LENGTH)-2) {
1017
+ header_state = h_content_length;
1018
+ }
1019
+ break;
1020
+
1021
+ /* transfer-encoding */
1022
+
1023
+ case h_matching_transfer_encoding:
1024
+ index++;
1025
+ if (index > sizeof(TRANSFER_ENCODING)-1
1026
+ || c != TRANSFER_ENCODING[index]) {
1027
+ header_state = h_general;
1028
+ } else if (index == sizeof(TRANSFER_ENCODING)-2) {
1029
+ header_state = h_transfer_encoding;
1030
+ }
1031
+ break;
1032
+
1033
+ case h_connection:
1034
+ case h_content_length:
1035
+ case h_transfer_encoding:
1036
+ if (ch != ' ') header_state = h_general;
1037
+ break;
1038
+
1039
+ default:
1040
+ assert(0 && "Unknown header_state");
1041
+ break;
1042
+ }
1043
+ break;
1044
+ }
1045
+
1046
+ if (ch == ':') {
1047
+ CALLBACK(header_field);
1048
+ state = s_header_value_start;
1049
+ break;
1050
+ }
1051
+
1052
+ if (ch == CR) {
1053
+ state = s_header_almost_done;
1054
+ CALLBACK(header_field);
1055
+ break;
1056
+ }
1057
+
1058
+ if (ch == LF) {
1059
+ CALLBACK(header_field);
1060
+ state = s_header_field_start;
1061
+ break;
1062
+ }
1063
+
1064
+ goto error;
1065
+ }
1066
+
1067
+ case s_header_value_start:
1068
+ {
1069
+ if (ch == ' ') break;
1070
+
1071
+ MARK(header_value);
1072
+
1073
+ state = s_header_value;
1074
+ index = 0;
1075
+
1076
+ c = lowcase[(int)ch];
1077
+
1078
+ if (!c) {
1079
+ if (ch == CR) {
1080
+ header_state = h_general;
1081
+ state = s_header_almost_done;
1082
+ break;
1083
+ }
1084
+
1085
+ if (ch == LF) {
1086
+ state = s_header_field_start;
1087
+ break;
1088
+ }
1089
+
1090
+ header_state = h_general;
1091
+ break;
1092
+ }
1093
+
1094
+ switch (header_state) {
1095
+ case h_transfer_encoding:
1096
+ /* looking for 'Transfer-Encoding: chunked' */
1097
+ if ('c' == c) {
1098
+ header_state = h_matching_transfer_encoding_chunked;
1099
+ } else {
1100
+ header_state = h_general;
1101
+ }
1102
+ break;
1103
+
1104
+ case h_content_length:
1105
+ if (ch < '0' || ch > '9') goto error;
1106
+ parser->content_length = ch - '0';
1107
+ break;
1108
+
1109
+ case h_connection:
1110
+ /* looking for 'Connection: keep-alive' */
1111
+ if (c == 'k') {
1112
+ header_state = h_matching_connection_keep_alive;
1113
+ /* looking for 'Connection: close' */
1114
+ } else if (c == 'c') {
1115
+ header_state = h_matching_connection_close;
1116
+ } else {
1117
+ header_state = h_general;
1118
+ }
1119
+ break;
1120
+
1121
+ default:
1122
+ header_state = h_general;
1123
+ break;
1124
+ }
1125
+ break;
1126
+ }
1127
+
1128
+ case s_header_value:
1129
+ {
1130
+ c = lowcase[(int)ch];
1131
+
1132
+ if (!c) {
1133
+ if (ch == CR) {
1134
+ CALLBACK(header_value);
1135
+ state = s_header_almost_done;
1136
+ break;
1137
+ }
1138
+
1139
+ if (ch == LF) {
1140
+ CALLBACK(header_value);
1141
+ state = s_header_field_start;
1142
+ break;
1143
+ }
1144
+ break;
1145
+ }
1146
+
1147
+ switch (header_state) {
1148
+ case h_general:
1149
+ break;
1150
+
1151
+ case h_connection:
1152
+ case h_transfer_encoding:
1153
+ assert(0 && "Shouldn't get here.");
1154
+ break;
1155
+
1156
+ case h_content_length:
1157
+ if (ch < '0' || ch > '9') goto error;
1158
+ parser->content_length *= 10;
1159
+ parser->content_length += ch - '0';
1160
+ break;
1161
+
1162
+ /* Transfer-Encoding: chunked */
1163
+ case h_matching_transfer_encoding_chunked:
1164
+ index++;
1165
+ if (index > sizeof(CHUNKED)-1
1166
+ || c != CHUNKED[index]) {
1167
+ header_state = h_general;
1168
+ } else if (index == sizeof(CHUNKED)-2) {
1169
+ header_state = h_transfer_encoding_chunked;
1170
+ }
1171
+ break;
1172
+
1173
+ /* looking for 'Connection: keep-alive' */
1174
+ case h_matching_connection_keep_alive:
1175
+ index++;
1176
+ if (index > sizeof(KEEP_ALIVE)-1
1177
+ || c != KEEP_ALIVE[index]) {
1178
+ header_state = h_general;
1179
+ } else if (index == sizeof(KEEP_ALIVE)-2) {
1180
+ header_state = h_connection_keep_alive;
1181
+ }
1182
+ break;
1183
+
1184
+ /* looking for 'Connection: close' */
1185
+ case h_matching_connection_close:
1186
+ index++;
1187
+ if (index > sizeof(CLOSE)-1 || c != CLOSE[index]) {
1188
+ header_state = h_general;
1189
+ } else if (index == sizeof(CLOSE)-2) {
1190
+ header_state = h_connection_close;
1191
+ }
1192
+ break;
1193
+
1194
+ case h_transfer_encoding_chunked:
1195
+ case h_connection_keep_alive:
1196
+ case h_connection_close:
1197
+ if (ch != ' ') header_state = h_general;
1198
+ break;
1199
+
1200
+ default:
1201
+ state = s_header_value;
1202
+ header_state = h_general;
1203
+ break;
1204
+ }
1205
+ break;
1206
+ }
1207
+
1208
+ case s_header_almost_done:
1209
+ {
1210
+ STRICT_CHECK(ch != LF);
1211
+
1212
+ state = s_header_field_start;
1213
+
1214
+ switch (header_state) {
1215
+ case h_connection_keep_alive:
1216
+ parser->flags |= F_CONNECTION_KEEP_ALIVE;
1217
+ break;
1218
+ case h_connection_close:
1219
+ parser->flags |= F_CONNECTION_CLOSE;
1220
+ break;
1221
+ case h_transfer_encoding_chunked:
1222
+ parser->flags |= F_CHUNKED;
1223
+ break;
1224
+ default:
1225
+ break;
1226
+ }
1227
+ break;
1228
+ }
1229
+
1230
+ case s_headers_almost_done:
1231
+ {
1232
+ STRICT_CHECK(ch != LF);
1233
+
1234
+ if (parser->flags & F_TRAILING) {
1235
+ /* End of a chunked request */
1236
+ CALLBACK2(message_complete);
1237
+ state = NEW_MESSAGE();
1238
+ break;
1239
+ }
1240
+
1241
+ parser->body_read = 0;
1242
+
1243
+ CALLBACK2(headers_complete);
1244
+
1245
+ if (parser->flags & F_CHUNKED) {
1246
+ /* chunked encoding - ignore Content-Length header */
1247
+ state = s_chunk_size_start;
1248
+ } else {
1249
+ if (parser->content_length == 0) {
1250
+ /* Content-Length header given but zero: Content-Length: 0\r\n */
1251
+ CALLBACK2(message_complete);
1252
+ state = NEW_MESSAGE();
1253
+ } else if (parser->content_length > 0) {
1254
+ /* Content-Length header given and non-zero */
1255
+ state = s_body_identity;
1256
+ } else {
1257
+ if (start_state == s_start_req || http_should_keep_alive(parser)) {
1258
+ /* Assume content-length 0 - read the next */
1259
+ CALLBACK2(message_complete);
1260
+ state = NEW_MESSAGE();
1261
+ } else {
1262
+ /* Read body until EOF */
1263
+ state = s_body_identity_eof;
1264
+ }
1265
+ }
1266
+ }
1267
+
1268
+ break;
1269
+ }
1270
+
1271
+ case s_body_identity:
1272
+ to_read = MIN(pe - p, (ssize_t)(parser->content_length - parser->body_read));
1273
+ if (to_read > 0) {
1274
+ if (parser->on_body) parser->on_body(parser, p, to_read);
1275
+ p += to_read - 1;
1276
+ parser->body_read += to_read;
1277
+ if (parser->body_read == parser->content_length) {
1278
+ CALLBACK2(message_complete);
1279
+ state = NEW_MESSAGE();
1280
+ }
1281
+ }
1282
+ break;
1283
+
1284
+ /* read until EOF */
1285
+ case s_body_identity_eof:
1286
+ to_read = pe - p;
1287
+ if (to_read > 0) {
1288
+ if (parser->on_body) parser->on_body(parser, p, to_read);
1289
+ p += to_read - 1;
1290
+ parser->body_read += to_read;
1291
+ }
1292
+ break;
1293
+
1294
+ case s_chunk_size_start:
1295
+ {
1296
+ assert(parser->flags & F_CHUNKED);
1297
+
1298
+ c = unhex[(int)ch];
1299
+ if (c == -1) goto error;
1300
+ parser->content_length = c;
1301
+ state = s_chunk_size;
1302
+ break;
1303
+ }
1304
+
1305
+ case s_chunk_size:
1306
+ {
1307
+ assert(parser->flags & F_CHUNKED);
1308
+
1309
+ if (ch == CR) {
1310
+ state = s_chunk_size_almost_done;
1311
+ break;
1312
+ }
1313
+
1314
+ c = unhex[(int)ch];
1315
+
1316
+ if (c == -1) {
1317
+ if (ch == ';' || ch == ' ') {
1318
+ state = s_chunk_parameters;
1319
+ break;
1320
+ }
1321
+ goto error;
1322
+ }
1323
+
1324
+ parser->content_length *= 16;
1325
+ parser->content_length += c;
1326
+ break;
1327
+ }
1328
+
1329
+ case s_chunk_parameters:
1330
+ {
1331
+ assert(parser->flags & F_CHUNKED);
1332
+ /* just ignore this shit. TODO check for overflow */
1333
+ if (ch == CR) {
1334
+ state = s_chunk_size_almost_done;
1335
+ break;
1336
+ }
1337
+ break;
1338
+ }
1339
+
1340
+ case s_chunk_size_almost_done:
1341
+ {
1342
+ assert(parser->flags & F_CHUNKED);
1343
+ STRICT_CHECK(ch != LF);
1344
+
1345
+ if (parser->content_length == 0) {
1346
+ parser->flags |= F_TRAILING;
1347
+ state = s_header_field_start;
1348
+ } else {
1349
+ state = s_chunk_data;
1350
+ }
1351
+ break;
1352
+ }
1353
+
1354
+ case s_chunk_data:
1355
+ {
1356
+ assert(parser->flags & F_CHUNKED);
1357
+
1358
+ to_read = MIN(pe - p, (ssize_t)(parser->content_length));
1359
+
1360
+ if (to_read > 0) {
1361
+ if (parser->on_body) parser->on_body(parser, p, to_read);
1362
+ p += to_read - 1;
1363
+ }
1364
+
1365
+ if (to_read == parser->content_length) {
1366
+ state = s_chunk_data_almost_done;
1367
+ }
1368
+
1369
+ parser->content_length -= to_read;
1370
+ break;
1371
+ }
1372
+
1373
+ case s_chunk_data_almost_done:
1374
+ assert(parser->flags & F_CHUNKED);
1375
+ STRICT_CHECK(ch != CR);
1376
+ state = s_chunk_data_done;
1377
+ break;
1378
+
1379
+ case s_chunk_data_done:
1380
+ assert(parser->flags & F_CHUNKED);
1381
+ STRICT_CHECK(ch != LF);
1382
+ state = s_chunk_size_start;
1383
+ break;
1384
+
1385
+ default:
1386
+ assert(0 && "unhandled state");
1387
+ goto error;
1388
+ }
1389
+ }
1390
+
1391
+ CALLBACK_NOCLEAR(header_field);
1392
+ CALLBACK_NOCLEAR(header_value);
1393
+ CALLBACK_NOCLEAR(fragment);
1394
+ CALLBACK_NOCLEAR(query_string);
1395
+ CALLBACK_NOCLEAR(path);
1396
+ CALLBACK_NOCLEAR(url);
1397
+
1398
+ parser->state = state;
1399
+ parser->header_state = header_state;
1400
+ parser->index = index;
1401
+
1402
+ return len;
1403
+
1404
+ error:
1405
+ return (p - data);
1406
+ }
1407
+
1408
+
1409
+ size_t
1410
+ http_parse_requests (http_parser *parser, const char *data, size_t len)
1411
+ {
1412
+ if (!parser->state) parser->state = s_start_req;
1413
+ return parse(parser, data, len, s_start_req);
1414
+ }
1415
+
1416
+
1417
+ size_t
1418
+ http_parse_responses (http_parser *parser, const char *data, size_t len)
1419
+ {
1420
+ if (!parser->state) parser->state = s_start_res;
1421
+ return parse(parser, data, len, s_start_res);
1422
+ }
1423
+
1424
+
1425
+ int
1426
+ http_should_keep_alive (http_parser *parser)
1427
+ {
1428
+ if (parser->http_major > 0 && parser->http_minor > 0) {
1429
+ /* HTTP/1.1 */
1430
+ if (parser->flags & F_CONNECTION_CLOSE) {
1431
+ return 0;
1432
+ } else {
1433
+ return 1;
1434
+ }
1435
+ } else {
1436
+ /* HTTP/1.0 or earlier */
1437
+ if (parser->flags & F_CONNECTION_KEEP_ALIVE) {
1438
+ return 1;
1439
+ } else {
1440
+ return 0;
1441
+ }
1442
+ }
1443
+ }
1444
+
1445
+
1446
+ void
1447
+ http_parser_init (http_parser *parser)
1448
+ {
1449
+ parser->state = 0;
1450
+ parser->on_message_begin = NULL;
1451
+ parser->on_path = NULL;
1452
+ parser->on_query_string = NULL;
1453
+ parser->on_url = NULL;
1454
+ parser->on_fragment = NULL;
1455
+ parser->on_header_field = NULL;
1456
+ parser->on_header_value = NULL;
1457
+ parser->on_headers_complete = NULL;
1458
+ parser->on_body = NULL;
1459
+ parser->on_message_complete = NULL;
1460
+
1461
+ parser->header_field_mark = NULL;
1462
+ parser->header_value_mark = NULL;
1463
+ parser->query_string_mark = NULL;
1464
+ parser->path_mark = NULL;
1465
+ parser->url_mark = NULL;
1466
+ parser->fragment_mark = NULL;
1467
+ }
1468
+