http-parser 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 4294a329a0339635dbcafcf057d23f832b80c8ec
4
+ data.tar.gz: 23d181a796fd7d9aaec50b32aa525fdb03ce2e86
5
+ SHA512:
6
+ metadata.gz: af8092393a33fa87bb0b0182f34ace79e593b6f1672cd4dee0c2a8fa8d8161b3984071c684054451f926c82ce5ee4de29052c899f560a5ea83c8aef1b7044027
7
+ data.tar.gz: dec2187b798145b8f2c670048a8c885d13d27d874c404ce9f9feb391a61336d9516c9e86ed85604887ab6e03f7f19b48b5be3a6f2752a013ad666883b19d6cde
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2013 CoTag Media
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,70 @@
1
+ # http-parser
2
+
3
+ Ruby FFI bindings to [http-parser](https://github.com/joyent/http-parser)
4
+
5
+ ## Install
6
+
7
+ ```shell
8
+ gem install http-parser
9
+ ```
10
+ This gem will compile a local copy of http-parser
11
+
12
+
13
+ ## Usage
14
+
15
+ ```ruby
16
+ require 'rubygems'
17
+ require 'http-parser'
18
+
19
+ #
20
+ # Create a shared parser
21
+ #
22
+ parser = HttpParser::Parser.new do |parser|
23
+ parser.on_message_begin do |inst|
24
+ puts "message begin"
25
+ end
26
+
27
+ parser.on_message_complete do |inst|
28
+ puts "message end"
29
+ end
30
+
31
+ parser.on_url do |inst, data|
32
+ puts "url: #{data}"
33
+ end
34
+
35
+ parser.on_header_field do |inst, data|
36
+ puts "field: #{data}"
37
+ end
38
+
39
+ parser.on_header_value do |inst, data|
40
+ puts "value: #{data}"
41
+ end
42
+ end
43
+
44
+ #
45
+ # Create state objects to track requests through the parser
46
+ #
47
+ request = HttpParser::Parser.new_instance do |inst|
48
+ inst.type = :request
49
+ end
50
+
51
+ #
52
+ # Parse requests
53
+ #
54
+ parser.parse request, "GET /foo HTTP/1.1\r\n"
55
+ sleep 3
56
+ parser.parse request, "Host: example.com\r\n"
57
+ sleep 3
58
+ parser.parse request, "\r\n"
59
+
60
+ #
61
+ # Re-use the memory for another request
62
+ #
63
+ request.reset!
64
+ ```
65
+
66
+ ## Acknowledgements
67
+
68
+ * https://github.com/joyent/http-parser#readme
69
+ * https://github.com/postmodern/ffi-http-parser#readme
70
+ * https://github.com/deepfryed/http-parser-lite#readme
data/Rakefile ADDED
File without changes
data/ext/Rakefile ADDED
@@ -0,0 +1,9 @@
1
+ require 'ffi-compiler/compile_task'
2
+
3
+ FFI::Compiler::CompileTask.new('http-parser-ext') do |t|
4
+ t.cflags << "-Wall -Wextra -O3"
5
+ t.cflags << "-D_GNU_SOURCE=1" if RbConfig::CONFIG["host_os"].downcase =~ /mingw/
6
+ t.cflags << "-arch x86_64 -arch i386" if t.platform.mac?
7
+ t.ldflags << "-arch x86_64 -arch i386" if t.platform.mac?
8
+ t.export '../lib/http-parser/ext.rb'
9
+ end
@@ -0,0 +1,2175 @@
1
+ /* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
2
+ *
3
+ * Additional changes are licensed under the same terms as NGINX and
4
+ * copyright Joyent, Inc. and other Node contributors. All rights reserved.
5
+ *
6
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ * of this software and associated documentation files (the "Software"), to
8
+ * deal in the Software without restriction, including without limitation the
9
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10
+ * sell copies of the Software, and to permit persons to whom the Software is
11
+ * furnished to do so, subject to the following conditions:
12
+ *
13
+ * The above copyright notice and this permission notice shall be included in
14
+ * all copies or substantial portions of the Software.
15
+ *
16
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22
+ * IN THE SOFTWARE.
23
+ */
24
+ #include "http_parser.h"
25
+ #include <assert.h>
26
+ #include <stddef.h>
27
+ #include <ctype.h>
28
+ #include <stdlib.h>
29
+ #include <string.h>
30
+ #include <limits.h>
31
+
32
+ #ifndef ULLONG_MAX
33
+ # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
34
+ #endif
35
+
36
+ #ifndef MIN
37
+ # define MIN(a,b) ((a) < (b) ? (a) : (b))
38
+ #endif
39
+
40
+ #ifndef ARRAY_SIZE
41
+ # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
42
+ #endif
43
+
44
+ #ifndef BIT_AT
45
+ # define BIT_AT(a, i) \
46
+ (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
47
+ (1 << ((unsigned int) (i) & 7))))
48
+ #endif
49
+
50
+ #ifndef ELEM_AT
51
+ # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
52
+ #endif
53
+
54
+ #define SET_ERRNO(e) \
55
+ do { \
56
+ parser->http_errno = (e); \
57
+ } while(0)
58
+
59
+
60
+ /* Run the notify callback FOR, returning ER if it fails */
61
+ #define CALLBACK_NOTIFY_(FOR, ER) \
62
+ do { \
63
+ assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
64
+ \
65
+ if (settings->on_##FOR) { \
66
+ if (0 != settings->on_##FOR(parser)) { \
67
+ SET_ERRNO(HPE_CB_##FOR); \
68
+ } \
69
+ \
70
+ /* We either errored above or got paused; get out */ \
71
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
72
+ return (ER); \
73
+ } \
74
+ } \
75
+ } while (0)
76
+
77
+ /* Run the notify callback FOR and consume the current byte */
78
+ #define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1)
79
+
80
+ /* Run the notify callback FOR and don't consume the current byte */
81
+ #define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data)
82
+
83
+ /* Run data callback FOR with LEN bytes, returning ER if it fails */
84
+ #define CALLBACK_DATA_(FOR, LEN, ER) \
85
+ do { \
86
+ assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
87
+ \
88
+ if (FOR##_mark) { \
89
+ if (settings->on_##FOR) { \
90
+ if (0 != settings->on_##FOR(parser, FOR##_mark, (LEN))) { \
91
+ SET_ERRNO(HPE_CB_##FOR); \
92
+ } \
93
+ \
94
+ /* We either errored above or got paused; get out */ \
95
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
96
+ return (ER); \
97
+ } \
98
+ } \
99
+ FOR##_mark = NULL; \
100
+ } \
101
+ } while (0)
102
+
103
+ /* Run the data callback FOR and consume the current byte */
104
+ #define CALLBACK_DATA(FOR) \
105
+ CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
106
+
107
+ /* Run the data callback FOR and don't consume the current byte */
108
+ #define CALLBACK_DATA_NOADVANCE(FOR) \
109
+ CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
110
+
111
+ /* Set the mark FOR; non-destructive if mark is already set */
112
+ #define MARK(FOR) \
113
+ do { \
114
+ if (!FOR##_mark) { \
115
+ FOR##_mark = p; \
116
+ } \
117
+ } while (0)
118
+
119
+
120
+ #define PROXY_CONNECTION "proxy-connection"
121
+ #define CONNECTION "connection"
122
+ #define CONTENT_LENGTH "content-length"
123
+ #define TRANSFER_ENCODING "transfer-encoding"
124
+ #define UPGRADE "upgrade"
125
+ #define CHUNKED "chunked"
126
+ #define KEEP_ALIVE "keep-alive"
127
+ #define CLOSE "close"
128
+
129
+
130
+ static const char *method_strings[] =
131
+ {
132
+ #define XX(num, name, string) #string,
133
+ HTTP_METHOD_MAP(XX)
134
+ #undef XX
135
+ };
136
+
137
+
138
+ /* Tokens as defined by rfc 2616. Also lowercases them.
139
+ * token = 1*<any CHAR except CTLs or separators>
140
+ * separators = "(" | ")" | "<" | ">" | "@"
141
+ * | "," | ";" | ":" | "\" | <">
142
+ * | "/" | "[" | "]" | "?" | "="
143
+ * | "{" | "}" | SP | HT
144
+ */
145
+ static const char tokens[256] = {
146
+ /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
147
+ 0, 0, 0, 0, 0, 0, 0, 0,
148
+ /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
149
+ 0, 0, 0, 0, 0, 0, 0, 0,
150
+ /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
151
+ 0, 0, 0, 0, 0, 0, 0, 0,
152
+ /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
153
+ 0, 0, 0, 0, 0, 0, 0, 0,
154
+ /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
155
+ 0, '!', 0, '#', '$', '%', '&', '\'',
156
+ /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
157
+ 0, 0, '*', '+', 0, '-', '.', 0,
158
+ /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
159
+ '0', '1', '2', '3', '4', '5', '6', '7',
160
+ /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
161
+ '8', '9', 0, 0, 0, 0, 0, 0,
162
+ /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
163
+ 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
164
+ /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
165
+ 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
166
+ /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
167
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
168
+ /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
169
+ 'x', 'y', 'z', 0, 0, 0, '^', '_',
170
+ /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
171
+ '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
172
+ /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
173
+ 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
174
+ /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
175
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
176
+ /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
177
+ 'x', 'y', 'z', 0, '|', 0, '~', 0 };
178
+
179
+
180
+ static const int8_t unhex[256] =
181
+ {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
182
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
183
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
184
+ , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
185
+ ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
186
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
187
+ ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
188
+ ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
189
+ };
190
+
191
+
192
+ #if HTTP_PARSER_STRICT
193
+ # define T(v) 0
194
+ #else
195
+ # define T(v) v
196
+ #endif
197
+
198
+
199
+ static const uint8_t normal_url_char[32] = {
200
+ /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
201
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
202
+ /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
203
+ 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
204
+ /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
205
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
206
+ /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
207
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
208
+ /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
209
+ 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
210
+ /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
211
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
212
+ /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
213
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
214
+ /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
215
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
216
+ /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
217
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
218
+ /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
219
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
220
+ /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
221
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
222
+ /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
223
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
224
+ /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
225
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
226
+ /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
227
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
228
+ /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
229
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
230
+ /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
231
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
232
+
233
+ #undef T
234
+
235
+ enum state
236
+ { s_dead = 1 /* important that this is > 0 */
237
+
238
+ , s_start_req_or_res
239
+ , s_res_or_resp_H
240
+ , s_start_res
241
+ , s_res_H
242
+ , s_res_HT
243
+ , s_res_HTT
244
+ , s_res_HTTP
245
+ , s_res_first_http_major
246
+ , s_res_http_major
247
+ , s_res_first_http_minor
248
+ , s_res_http_minor
249
+ , s_res_first_status_code
250
+ , s_res_status_code
251
+ , s_res_status
252
+ , s_res_line_almost_done
253
+
254
+ , s_start_req
255
+
256
+ , s_req_method
257
+ , s_req_spaces_before_url
258
+ , s_req_schema
259
+ , s_req_schema_slash
260
+ , s_req_schema_slash_slash
261
+ , s_req_server_start
262
+ , s_req_server
263
+ , s_req_server_with_at
264
+ , s_req_path
265
+ , s_req_query_string_start
266
+ , s_req_query_string
267
+ , s_req_fragment_start
268
+ , s_req_fragment
269
+ , s_req_http_start
270
+ , s_req_http_H
271
+ , s_req_http_HT
272
+ , s_req_http_HTT
273
+ , s_req_http_HTTP
274
+ , s_req_first_http_major
275
+ , s_req_http_major
276
+ , s_req_first_http_minor
277
+ , s_req_http_minor
278
+ , s_req_line_almost_done
279
+
280
+ , s_header_field_start
281
+ , s_header_field
282
+ , s_header_value_start
283
+ , s_header_value
284
+ , s_header_value_lws
285
+
286
+ , s_header_almost_done
287
+
288
+ , s_chunk_size_start
289
+ , s_chunk_size
290
+ , s_chunk_parameters
291
+ , s_chunk_size_almost_done
292
+
293
+ , s_headers_almost_done
294
+ , s_headers_done
295
+
296
+ /* Important: 's_headers_done' must be the last 'header' state. All
297
+ * states beyond this must be 'body' states. It is used for overflow
298
+ * checking. See the PARSING_HEADER() macro.
299
+ */
300
+
301
+ , s_chunk_data
302
+ , s_chunk_data_almost_done
303
+ , s_chunk_data_done
304
+
305
+ , s_body_identity
306
+ , s_body_identity_eof
307
+
308
+ , s_message_done
309
+ };
310
+
311
+
312
+ #define PARSING_HEADER(state) (state <= s_headers_done)
313
+
314
+
315
+ enum header_states
316
+ { h_general = 0
317
+ , h_C
318
+ , h_CO
319
+ , h_CON
320
+
321
+ , h_matching_connection
322
+ , h_matching_proxy_connection
323
+ , h_matching_content_length
324
+ , h_matching_transfer_encoding
325
+ , h_matching_upgrade
326
+
327
+ , h_connection
328
+ , h_content_length
329
+ , h_transfer_encoding
330
+ , h_upgrade
331
+
332
+ , h_matching_transfer_encoding_chunked
333
+ , h_matching_connection_keep_alive
334
+ , h_matching_connection_close
335
+
336
+ , h_transfer_encoding_chunked
337
+ , h_connection_keep_alive
338
+ , h_connection_close
339
+ };
340
+
341
+ enum http_host_state
342
+ {
343
+ s_http_host_dead = 1
344
+ , s_http_userinfo_start
345
+ , s_http_userinfo
346
+ , s_http_host_start
347
+ , s_http_host_v6_start
348
+ , s_http_host
349
+ , s_http_host_v6
350
+ , s_http_host_v6_end
351
+ , s_http_host_port_start
352
+ , s_http_host_port
353
+ };
354
+
355
+ /* Macros for character classes; depends on strict-mode */
356
+ #define CR '\r'
357
+ #define LF '\n'
358
+ #define LOWER(c) (unsigned char)(c | 0x20)
359
+ #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
360
+ #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
361
+ #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
362
+ #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
363
+ #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
364
+ (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
365
+ (c) == ')')
366
+ #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
367
+ (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
368
+ (c) == '$' || (c) == ',')
369
+
370
+ #if HTTP_PARSER_STRICT
371
+ #define TOKEN(c) (tokens[(unsigned char)c])
372
+ #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
373
+ #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
374
+ #else
375
+ #define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
376
+ #define IS_URL_CHAR(c) \
377
+ (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
378
+ #define IS_HOST_CHAR(c) \
379
+ (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
380
+ #endif
381
+
382
+
383
+ #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
384
+
385
+
386
+ #if HTTP_PARSER_STRICT
387
+ # define STRICT_CHECK(cond) \
388
+ do { \
389
+ if (cond) { \
390
+ SET_ERRNO(HPE_STRICT); \
391
+ goto error; \
392
+ } \
393
+ } while (0)
394
+ # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
395
+ #else
396
+ # define STRICT_CHECK(cond)
397
+ # define NEW_MESSAGE() start_state
398
+ #endif
399
+
400
+
401
+ /* Map errno values to strings for human-readable output */
402
+ #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
403
+ static struct {
404
+ const char *name;
405
+ const char *description;
406
+ } http_strerror_tab[] = {
407
+ HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
408
+ };
409
+ #undef HTTP_STRERROR_GEN
410
+
411
+ int http_message_needs_eof(const http_parser *parser);
412
+
413
+ /* Our URL parser.
414
+ *
415
+ * This is designed to be shared by http_parser_execute() for URL validation,
416
+ * hence it has a state transition + byte-for-byte interface. In addition, it
417
+ * is meant to be embedded in http_parser_parse_url(), which does the dirty
418
+ * work of turning state transitions URL components for its API.
419
+ *
420
+ * This function should only be invoked with non-space characters. It is
421
+ * assumed that the caller cares about (and can detect) the transition between
422
+ * URL and non-URL states by looking for these.
423
+ */
424
+ static enum state
425
+ parse_url_char(enum state s, const char ch)
426
+ {
427
+ if (ch == ' ' || ch == '\r' || ch == '\n') {
428
+ return s_dead;
429
+ }
430
+
431
+ #if HTTP_PARSER_STRICT
432
+ if (ch == '\t' || ch == '\f') {
433
+ return s_dead;
434
+ }
435
+ #endif
436
+
437
+ switch (s) {
438
+ case s_req_spaces_before_url:
439
+ /* Proxied requests are followed by scheme of an absolute URI (alpha).
440
+ * All methods except CONNECT are followed by '/' or '*'.
441
+ */
442
+
443
+ if (ch == '/' || ch == '*') {
444
+ return s_req_path;
445
+ }
446
+
447
+ if (IS_ALPHA(ch)) {
448
+ return s_req_schema;
449
+ }
450
+
451
+ break;
452
+
453
+ case s_req_schema:
454
+ if (IS_ALPHA(ch)) {
455
+ return s;
456
+ }
457
+
458
+ if (ch == ':') {
459
+ return s_req_schema_slash;
460
+ }
461
+
462
+ break;
463
+
464
+ case s_req_schema_slash:
465
+ if (ch == '/') {
466
+ return s_req_schema_slash_slash;
467
+ }
468
+
469
+ break;
470
+
471
+ case s_req_schema_slash_slash:
472
+ if (ch == '/') {
473
+ return s_req_server_start;
474
+ }
475
+
476
+ break;
477
+
478
+ case s_req_server_with_at:
479
+ if (ch == '@') {
480
+ return s_dead;
481
+ }
482
+
483
+ /* FALLTHROUGH */
484
+ case s_req_server_start:
485
+ case s_req_server:
486
+ if (ch == '/') {
487
+ return s_req_path;
488
+ }
489
+
490
+ if (ch == '?') {
491
+ return s_req_query_string_start;
492
+ }
493
+
494
+ if (ch == '@') {
495
+ return s_req_server_with_at;
496
+ }
497
+
498
+ if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
499
+ return s_req_server;
500
+ }
501
+
502
+ break;
503
+
504
+ case s_req_path:
505
+ if (IS_URL_CHAR(ch)) {
506
+ return s;
507
+ }
508
+
509
+ switch (ch) {
510
+ case '?':
511
+ return s_req_query_string_start;
512
+
513
+ case '#':
514
+ return s_req_fragment_start;
515
+ }
516
+
517
+ break;
518
+
519
+ case s_req_query_string_start:
520
+ case s_req_query_string:
521
+ if (IS_URL_CHAR(ch)) {
522
+ return s_req_query_string;
523
+ }
524
+
525
+ switch (ch) {
526
+ case '?':
527
+ /* allow extra '?' in query string */
528
+ return s_req_query_string;
529
+
530
+ case '#':
531
+ return s_req_fragment_start;
532
+ }
533
+
534
+ break;
535
+
536
+ case s_req_fragment_start:
537
+ if (IS_URL_CHAR(ch)) {
538
+ return s_req_fragment;
539
+ }
540
+
541
+ switch (ch) {
542
+ case '?':
543
+ return s_req_fragment;
544
+
545
+ case '#':
546
+ return s;
547
+ }
548
+
549
+ break;
550
+
551
+ case s_req_fragment:
552
+ if (IS_URL_CHAR(ch)) {
553
+ return s;
554
+ }
555
+
556
+ switch (ch) {
557
+ case '?':
558
+ case '#':
559
+ return s;
560
+ }
561
+
562
+ break;
563
+
564
+ default:
565
+ break;
566
+ }
567
+
568
+ /* We should never fall out of the switch above unless there's an error */
569
+ return s_dead;
570
+ }
571
+
572
+ size_t http_parser_execute (http_parser *parser,
573
+ const http_parser_settings *settings,
574
+ const char *data,
575
+ size_t len)
576
+ {
577
+ char c, ch;
578
+ int8_t unhex_val;
579
+ const char *p = data;
580
+ const char *header_field_mark = 0;
581
+ const char *header_value_mark = 0;
582
+ const char *url_mark = 0;
583
+ const char *body_mark = 0;
584
+
585
+ /* We're in an error state. Don't bother doing anything. */
586
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
587
+ return 0;
588
+ }
589
+
590
+ if (len == 0) {
591
+ switch (parser->state) {
592
+ case s_body_identity_eof:
593
+ /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
594
+ * we got paused.
595
+ */
596
+ CALLBACK_NOTIFY_NOADVANCE(message_complete);
597
+ return 0;
598
+
599
+ case s_dead:
600
+ case s_start_req_or_res:
601
+ case s_start_res:
602
+ case s_start_req:
603
+ return 0;
604
+
605
+ default:
606
+ SET_ERRNO(HPE_INVALID_EOF_STATE);
607
+ return 1;
608
+ }
609
+ }
610
+
611
+
612
+ if (parser->state == s_header_field)
613
+ header_field_mark = data;
614
+ if (parser->state == s_header_value)
615
+ header_value_mark = data;
616
+ switch (parser->state) {
617
+ case s_req_path:
618
+ case s_req_schema:
619
+ case s_req_schema_slash:
620
+ case s_req_schema_slash_slash:
621
+ case s_req_server_start:
622
+ case s_req_server:
623
+ case s_req_server_with_at:
624
+ case s_req_query_string_start:
625
+ case s_req_query_string:
626
+ case s_req_fragment_start:
627
+ case s_req_fragment:
628
+ url_mark = data;
629
+ break;
630
+ }
631
+
632
+ for (p=data; p != data + len; p++) {
633
+ ch = *p;
634
+
635
+ if (PARSING_HEADER(parser->state)) {
636
+ ++parser->nread;
637
+ /* Buffer overflow attack */
638
+ if (parser->nread > HTTP_MAX_HEADER_SIZE) {
639
+ SET_ERRNO(HPE_HEADER_OVERFLOW);
640
+ goto error;
641
+ }
642
+ }
643
+
644
+ reexecute_byte:
645
+ switch (parser->state) {
646
+
647
+ case s_dead:
648
+ /* this state is used after a 'Connection: close' message
649
+ * the parser will error out if it reads another message
650
+ */
651
+ if (ch == CR || ch == LF)
652
+ break;
653
+
654
+ SET_ERRNO(HPE_CLOSED_CONNECTION);
655
+ goto error;
656
+
657
+ case s_start_req_or_res:
658
+ {
659
+ if (ch == CR || ch == LF)
660
+ break;
661
+ parser->flags = 0;
662
+ parser->content_length = ULLONG_MAX;
663
+
664
+ if (ch == 'H') {
665
+ parser->state = s_res_or_resp_H;
666
+
667
+ CALLBACK_NOTIFY(message_begin);
668
+ } else {
669
+ parser->type = HTTP_REQUEST;
670
+ parser->state = s_start_req;
671
+ goto reexecute_byte;
672
+ }
673
+
674
+ break;
675
+ }
676
+
677
+ case s_res_or_resp_H:
678
+ if (ch == 'T') {
679
+ parser->type = HTTP_RESPONSE;
680
+ parser->state = s_res_HT;
681
+ } else {
682
+ if (ch != 'E') {
683
+ SET_ERRNO(HPE_INVALID_CONSTANT);
684
+ goto error;
685
+ }
686
+
687
+ parser->type = HTTP_REQUEST;
688
+ parser->method = HTTP_HEAD;
689
+ parser->index = 2;
690
+ parser->state = s_req_method;
691
+ }
692
+ break;
693
+
694
+ case s_start_res:
695
+ {
696
+ parser->flags = 0;
697
+ parser->content_length = ULLONG_MAX;
698
+
699
+ switch (ch) {
700
+ case 'H':
701
+ parser->state = s_res_H;
702
+ break;
703
+
704
+ case CR:
705
+ case LF:
706
+ break;
707
+
708
+ default:
709
+ SET_ERRNO(HPE_INVALID_CONSTANT);
710
+ goto error;
711
+ }
712
+
713
+ CALLBACK_NOTIFY(message_begin);
714
+ break;
715
+ }
716
+
717
+ case s_res_H:
718
+ STRICT_CHECK(ch != 'T');
719
+ parser->state = s_res_HT;
720
+ break;
721
+
722
+ case s_res_HT:
723
+ STRICT_CHECK(ch != 'T');
724
+ parser->state = s_res_HTT;
725
+ break;
726
+
727
+ case s_res_HTT:
728
+ STRICT_CHECK(ch != 'P');
729
+ parser->state = s_res_HTTP;
730
+ break;
731
+
732
+ case s_res_HTTP:
733
+ STRICT_CHECK(ch != '/');
734
+ parser->state = s_res_first_http_major;
735
+ break;
736
+
737
+ case s_res_first_http_major:
738
+ if (ch < '0' || ch > '9') {
739
+ SET_ERRNO(HPE_INVALID_VERSION);
740
+ goto error;
741
+ }
742
+
743
+ parser->http_major = ch - '0';
744
+ parser->state = s_res_http_major;
745
+ break;
746
+
747
+ /* major HTTP version or dot */
748
+ case s_res_http_major:
749
+ {
750
+ if (ch == '.') {
751
+ parser->state = s_res_first_http_minor;
752
+ break;
753
+ }
754
+
755
+ if (!IS_NUM(ch)) {
756
+ SET_ERRNO(HPE_INVALID_VERSION);
757
+ goto error;
758
+ }
759
+
760
+ parser->http_major *= 10;
761
+ parser->http_major += ch - '0';
762
+
763
+ if (parser->http_major > 999) {
764
+ SET_ERRNO(HPE_INVALID_VERSION);
765
+ goto error;
766
+ }
767
+
768
+ break;
769
+ }
770
+
771
+ /* first digit of minor HTTP version */
772
+ case s_res_first_http_minor:
773
+ if (!IS_NUM(ch)) {
774
+ SET_ERRNO(HPE_INVALID_VERSION);
775
+ goto error;
776
+ }
777
+
778
+ parser->http_minor = ch - '0';
779
+ parser->state = s_res_http_minor;
780
+ break;
781
+
782
+ /* minor HTTP version or end of request line */
783
+ case s_res_http_minor:
784
+ {
785
+ if (ch == ' ') {
786
+ parser->state = s_res_first_status_code;
787
+ break;
788
+ }
789
+
790
+ if (!IS_NUM(ch)) {
791
+ SET_ERRNO(HPE_INVALID_VERSION);
792
+ goto error;
793
+ }
794
+
795
+ parser->http_minor *= 10;
796
+ parser->http_minor += ch - '0';
797
+
798
+ if (parser->http_minor > 999) {
799
+ SET_ERRNO(HPE_INVALID_VERSION);
800
+ goto error;
801
+ }
802
+
803
+ break;
804
+ }
805
+
806
+ case s_res_first_status_code:
807
+ {
808
+ if (!IS_NUM(ch)) {
809
+ if (ch == ' ') {
810
+ break;
811
+ }
812
+
813
+ SET_ERRNO(HPE_INVALID_STATUS);
814
+ goto error;
815
+ }
816
+ parser->status_code = ch - '0';
817
+ parser->state = s_res_status_code;
818
+ break;
819
+ }
820
+
821
+ case s_res_status_code:
822
+ {
823
+ if (!IS_NUM(ch)) {
824
+ switch (ch) {
825
+ case ' ':
826
+ parser->state = s_res_status;
827
+ break;
828
+ case CR:
829
+ parser->state = s_res_line_almost_done;
830
+ break;
831
+ case LF:
832
+ parser->state = s_header_field_start;
833
+ break;
834
+ default:
835
+ SET_ERRNO(HPE_INVALID_STATUS);
836
+ goto error;
837
+ }
838
+ break;
839
+ }
840
+
841
+ parser->status_code *= 10;
842
+ parser->status_code += ch - '0';
843
+
844
+ if (parser->status_code > 999) {
845
+ SET_ERRNO(HPE_INVALID_STATUS);
846
+ goto error;
847
+ }
848
+
849
+ break;
850
+ }
851
+
852
+ case s_res_status:
853
+ /* the human readable status. e.g. "NOT FOUND"
854
+ * we are not humans so just ignore this */
855
+ if (ch == CR) {
856
+ parser->state = s_res_line_almost_done;
857
+ break;
858
+ }
859
+
860
+ if (ch == LF) {
861
+ parser->state = s_header_field_start;
862
+ break;
863
+ }
864
+ break;
865
+
866
+ case s_res_line_almost_done:
867
+ STRICT_CHECK(ch != LF);
868
+ parser->state = s_header_field_start;
869
+ CALLBACK_NOTIFY(status_complete);
870
+ break;
871
+
872
+ case s_start_req:
873
+ {
874
+ if (ch == CR || ch == LF)
875
+ break;
876
+ parser->flags = 0;
877
+ parser->content_length = ULLONG_MAX;
878
+
879
+ if (!IS_ALPHA(ch)) {
880
+ SET_ERRNO(HPE_INVALID_METHOD);
881
+ goto error;
882
+ }
883
+
884
+ parser->method = (enum http_method) 0;
885
+ parser->index = 1;
886
+ switch (ch) {
887
+ case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
888
+ case 'D': parser->method = HTTP_DELETE; break;
889
+ case 'G': parser->method = HTTP_GET; break;
890
+ case 'H': parser->method = HTTP_HEAD; break;
891
+ case 'L': parser->method = HTTP_LOCK; break;
892
+ case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
893
+ case 'N': parser->method = HTTP_NOTIFY; break;
894
+ case 'O': parser->method = HTTP_OPTIONS; break;
895
+ case 'P': parser->method = HTTP_POST;
896
+ /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
897
+ break;
898
+ case 'R': parser->method = HTTP_REPORT; break;
899
+ case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break;
900
+ case 'T': parser->method = HTTP_TRACE; break;
901
+ case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
902
+ default:
903
+ SET_ERRNO(HPE_INVALID_METHOD);
904
+ goto error;
905
+ }
906
+ parser->state = s_req_method;
907
+
908
+ CALLBACK_NOTIFY(message_begin);
909
+
910
+ break;
911
+ }
912
+
913
+ case s_req_method:
914
+ {
915
+ const char *matcher;
916
+ if (ch == '\0') {
917
+ SET_ERRNO(HPE_INVALID_METHOD);
918
+ goto error;
919
+ }
920
+
921
+ matcher = method_strings[parser->method];
922
+ if (ch == ' ' && matcher[parser->index] == '\0') {
923
+ parser->state = s_req_spaces_before_url;
924
+ } else if (ch == matcher[parser->index]) {
925
+ ; /* nada */
926
+ } else if (parser->method == HTTP_CONNECT) {
927
+ if (parser->index == 1 && ch == 'H') {
928
+ parser->method = HTTP_CHECKOUT;
929
+ } else if (parser->index == 2 && ch == 'P') {
930
+ parser->method = HTTP_COPY;
931
+ } else {
932
+ goto error;
933
+ }
934
+ } else if (parser->method == HTTP_MKCOL) {
935
+ if (parser->index == 1 && ch == 'O') {
936
+ parser->method = HTTP_MOVE;
937
+ } else if (parser->index == 1 && ch == 'E') {
938
+ parser->method = HTTP_MERGE;
939
+ } else if (parser->index == 1 && ch == '-') {
940
+ parser->method = HTTP_MSEARCH;
941
+ } else if (parser->index == 2 && ch == 'A') {
942
+ parser->method = HTTP_MKACTIVITY;
943
+ } else {
944
+ goto error;
945
+ }
946
+ } else if (parser->method == HTTP_SUBSCRIBE) {
947
+ if (parser->index == 1 && ch == 'E') {
948
+ parser->method = HTTP_SEARCH;
949
+ } else {
950
+ goto error;
951
+ }
952
+ } else if (parser->index == 1 && parser->method == HTTP_POST) {
953
+ if (ch == 'R') {
954
+ parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
955
+ } else if (ch == 'U') {
956
+ parser->method = HTTP_PUT; /* or HTTP_PURGE */
957
+ } else if (ch == 'A') {
958
+ parser->method = HTTP_PATCH;
959
+ } else {
960
+ goto error;
961
+ }
962
+ } else if (parser->index == 2) {
963
+ if (parser->method == HTTP_PUT) {
964
+ if (ch == 'R') parser->method = HTTP_PURGE;
965
+ } else if (parser->method == HTTP_UNLOCK) {
966
+ if (ch == 'S') parser->method = HTTP_UNSUBSCRIBE;
967
+ }
968
+ } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
969
+ parser->method = HTTP_PROPPATCH;
970
+ } else {
971
+ SET_ERRNO(HPE_INVALID_METHOD);
972
+ goto error;
973
+ }
974
+
975
+ ++parser->index;
976
+ break;
977
+ }
978
+
979
+ case s_req_spaces_before_url:
980
+ {
981
+ if (ch == ' ') break;
982
+
983
+ MARK(url);
984
+ if (parser->method == HTTP_CONNECT) {
985
+ parser->state = s_req_server_start;
986
+ }
987
+
988
+ parser->state = parse_url_char((enum state)parser->state, ch);
989
+ if (parser->state == s_dead) {
990
+ SET_ERRNO(HPE_INVALID_URL);
991
+ goto error;
992
+ }
993
+
994
+ break;
995
+ }
996
+
997
+ case s_req_schema:
998
+ case s_req_schema_slash:
999
+ case s_req_schema_slash_slash:
1000
+ case s_req_server_start:
1001
+ {
1002
+ switch (ch) {
1003
+ /* No whitespace allowed here */
1004
+ case ' ':
1005
+ case CR:
1006
+ case LF:
1007
+ SET_ERRNO(HPE_INVALID_URL);
1008
+ goto error;
1009
+ default:
1010
+ parser->state = parse_url_char((enum state)parser->state, ch);
1011
+ if (parser->state == s_dead) {
1012
+ SET_ERRNO(HPE_INVALID_URL);
1013
+ goto error;
1014
+ }
1015
+ }
1016
+
1017
+ break;
1018
+ }
1019
+
1020
+ case s_req_server:
1021
+ case s_req_server_with_at:
1022
+ case s_req_path:
1023
+ case s_req_query_string_start:
1024
+ case s_req_query_string:
1025
+ case s_req_fragment_start:
1026
+ case s_req_fragment:
1027
+ {
1028
+ switch (ch) {
1029
+ case ' ':
1030
+ parser->state = s_req_http_start;
1031
+ CALLBACK_DATA(url);
1032
+ break;
1033
+ case CR:
1034
+ case LF:
1035
+ parser->http_major = 0;
1036
+ parser->http_minor = 9;
1037
+ parser->state = (ch == CR) ?
1038
+ s_req_line_almost_done :
1039
+ s_header_field_start;
1040
+ CALLBACK_DATA(url);
1041
+ break;
1042
+ default:
1043
+ parser->state = parse_url_char((enum state)parser->state, ch);
1044
+ if (parser->state == s_dead) {
1045
+ SET_ERRNO(HPE_INVALID_URL);
1046
+ goto error;
1047
+ }
1048
+ }
1049
+ break;
1050
+ }
1051
+
1052
+ case s_req_http_start:
1053
+ switch (ch) {
1054
+ case 'H':
1055
+ parser->state = s_req_http_H;
1056
+ break;
1057
+ case ' ':
1058
+ break;
1059
+ default:
1060
+ SET_ERRNO(HPE_INVALID_CONSTANT);
1061
+ goto error;
1062
+ }
1063
+ break;
1064
+
1065
+ case s_req_http_H:
1066
+ STRICT_CHECK(ch != 'T');
1067
+ parser->state = s_req_http_HT;
1068
+ break;
1069
+
1070
+ case s_req_http_HT:
1071
+ STRICT_CHECK(ch != 'T');
1072
+ parser->state = s_req_http_HTT;
1073
+ break;
1074
+
1075
+ case s_req_http_HTT:
1076
+ STRICT_CHECK(ch != 'P');
1077
+ parser->state = s_req_http_HTTP;
1078
+ break;
1079
+
1080
+ case s_req_http_HTTP:
1081
+ STRICT_CHECK(ch != '/');
1082
+ parser->state = s_req_first_http_major;
1083
+ break;
1084
+
1085
+ /* first digit of major HTTP version */
1086
+ case s_req_first_http_major:
1087
+ if (ch < '1' || ch > '9') {
1088
+ SET_ERRNO(HPE_INVALID_VERSION);
1089
+ goto error;
1090
+ }
1091
+
1092
+ parser->http_major = ch - '0';
1093
+ parser->state = s_req_http_major;
1094
+ break;
1095
+
1096
+ /* major HTTP version or dot */
1097
+ case s_req_http_major:
1098
+ {
1099
+ if (ch == '.') {
1100
+ parser->state = s_req_first_http_minor;
1101
+ break;
1102
+ }
1103
+
1104
+ if (!IS_NUM(ch)) {
1105
+ SET_ERRNO(HPE_INVALID_VERSION);
1106
+ goto error;
1107
+ }
1108
+
1109
+ parser->http_major *= 10;
1110
+ parser->http_major += ch - '0';
1111
+
1112
+ if (parser->http_major > 999) {
1113
+ SET_ERRNO(HPE_INVALID_VERSION);
1114
+ goto error;
1115
+ }
1116
+
1117
+ break;
1118
+ }
1119
+
1120
+ /* first digit of minor HTTP version */
1121
+ case s_req_first_http_minor:
1122
+ if (!IS_NUM(ch)) {
1123
+ SET_ERRNO(HPE_INVALID_VERSION);
1124
+ goto error;
1125
+ }
1126
+
1127
+ parser->http_minor = ch - '0';
1128
+ parser->state = s_req_http_minor;
1129
+ break;
1130
+
1131
+ /* minor HTTP version or end of request line */
1132
+ case s_req_http_minor:
1133
+ {
1134
+ if (ch == CR) {
1135
+ parser->state = s_req_line_almost_done;
1136
+ break;
1137
+ }
1138
+
1139
+ if (ch == LF) {
1140
+ parser->state = s_header_field_start;
1141
+ break;
1142
+ }
1143
+
1144
+ /* XXX allow spaces after digit? */
1145
+
1146
+ if (!IS_NUM(ch)) {
1147
+ SET_ERRNO(HPE_INVALID_VERSION);
1148
+ goto error;
1149
+ }
1150
+
1151
+ parser->http_minor *= 10;
1152
+ parser->http_minor += ch - '0';
1153
+
1154
+ if (parser->http_minor > 999) {
1155
+ SET_ERRNO(HPE_INVALID_VERSION);
1156
+ goto error;
1157
+ }
1158
+
1159
+ break;
1160
+ }
1161
+
1162
+ /* end of request line */
1163
+ case s_req_line_almost_done:
1164
+ {
1165
+ if (ch != LF) {
1166
+ SET_ERRNO(HPE_LF_EXPECTED);
1167
+ goto error;
1168
+ }
1169
+
1170
+ parser->state = s_header_field_start;
1171
+ break;
1172
+ }
1173
+
1174
+ case s_header_field_start:
1175
+ {
1176
+ if (ch == CR) {
1177
+ parser->state = s_headers_almost_done;
1178
+ break;
1179
+ }
1180
+
1181
+ if (ch == LF) {
1182
+ /* they might be just sending \n instead of \r\n so this would be
1183
+ * the second \n to denote the end of headers*/
1184
+ parser->state = s_headers_almost_done;
1185
+ goto reexecute_byte;
1186
+ }
1187
+
1188
+ c = TOKEN(ch);
1189
+
1190
+ if (!c) {
1191
+ SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1192
+ goto error;
1193
+ }
1194
+
1195
+ MARK(header_field);
1196
+
1197
+ parser->index = 0;
1198
+ parser->state = s_header_field;
1199
+
1200
+ switch (c) {
1201
+ case 'c':
1202
+ parser->header_state = h_C;
1203
+ break;
1204
+
1205
+ case 'p':
1206
+ parser->header_state = h_matching_proxy_connection;
1207
+ break;
1208
+
1209
+ case 't':
1210
+ parser->header_state = h_matching_transfer_encoding;
1211
+ break;
1212
+
1213
+ case 'u':
1214
+ parser->header_state = h_matching_upgrade;
1215
+ break;
1216
+
1217
+ default:
1218
+ parser->header_state = h_general;
1219
+ break;
1220
+ }
1221
+ break;
1222
+ }
1223
+
1224
+ case s_header_field:
1225
+ {
1226
+ c = TOKEN(ch);
1227
+
1228
+ if (c) {
1229
+ switch (parser->header_state) {
1230
+ case h_general:
1231
+ break;
1232
+
1233
+ case h_C:
1234
+ parser->index++;
1235
+ parser->header_state = (c == 'o' ? h_CO : h_general);
1236
+ break;
1237
+
1238
+ case h_CO:
1239
+ parser->index++;
1240
+ parser->header_state = (c == 'n' ? h_CON : h_general);
1241
+ break;
1242
+
1243
+ case h_CON:
1244
+ parser->index++;
1245
+ switch (c) {
1246
+ case 'n':
1247
+ parser->header_state = h_matching_connection;
1248
+ break;
1249
+ case 't':
1250
+ parser->header_state = h_matching_content_length;
1251
+ break;
1252
+ default:
1253
+ parser->header_state = h_general;
1254
+ break;
1255
+ }
1256
+ break;
1257
+
1258
+ /* connection */
1259
+
1260
+ case h_matching_connection:
1261
+ parser->index++;
1262
+ if (parser->index > sizeof(CONNECTION)-1
1263
+ || c != CONNECTION[parser->index]) {
1264
+ parser->header_state = h_general;
1265
+ } else if (parser->index == sizeof(CONNECTION)-2) {
1266
+ parser->header_state = h_connection;
1267
+ }
1268
+ break;
1269
+
1270
+ /* proxy-connection */
1271
+
1272
+ case h_matching_proxy_connection:
1273
+ parser->index++;
1274
+ if (parser->index > sizeof(PROXY_CONNECTION)-1
1275
+ || c != PROXY_CONNECTION[parser->index]) {
1276
+ parser->header_state = h_general;
1277
+ } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1278
+ parser->header_state = h_connection;
1279
+ }
1280
+ break;
1281
+
1282
+ /* content-length */
1283
+
1284
+ case h_matching_content_length:
1285
+ parser->index++;
1286
+ if (parser->index > sizeof(CONTENT_LENGTH)-1
1287
+ || c != CONTENT_LENGTH[parser->index]) {
1288
+ parser->header_state = h_general;
1289
+ } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1290
+ parser->header_state = h_content_length;
1291
+ }
1292
+ break;
1293
+
1294
+ /* transfer-encoding */
1295
+
1296
+ case h_matching_transfer_encoding:
1297
+ parser->index++;
1298
+ if (parser->index > sizeof(TRANSFER_ENCODING)-1
1299
+ || c != TRANSFER_ENCODING[parser->index]) {
1300
+ parser->header_state = h_general;
1301
+ } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1302
+ parser->header_state = h_transfer_encoding;
1303
+ }
1304
+ break;
1305
+
1306
+ /* upgrade */
1307
+
1308
+ case h_matching_upgrade:
1309
+ parser->index++;
1310
+ if (parser->index > sizeof(UPGRADE)-1
1311
+ || c != UPGRADE[parser->index]) {
1312
+ parser->header_state = h_general;
1313
+ } else if (parser->index == sizeof(UPGRADE)-2) {
1314
+ parser->header_state = h_upgrade;
1315
+ }
1316
+ break;
1317
+
1318
+ case h_connection:
1319
+ case h_content_length:
1320
+ case h_transfer_encoding:
1321
+ case h_upgrade:
1322
+ if (ch != ' ') parser->header_state = h_general;
1323
+ break;
1324
+
1325
+ default:
1326
+ assert(0 && "Unknown header_state");
1327
+ break;
1328
+ }
1329
+ break;
1330
+ }
1331
+
1332
+ if (ch == ':') {
1333
+ parser->state = s_header_value_start;
1334
+ CALLBACK_DATA(header_field);
1335
+ break;
1336
+ }
1337
+
1338
+ if (ch == CR) {
1339
+ parser->state = s_header_almost_done;
1340
+ CALLBACK_DATA(header_field);
1341
+ break;
1342
+ }
1343
+
1344
+ if (ch == LF) {
1345
+ parser->state = s_header_field_start;
1346
+ CALLBACK_DATA(header_field);
1347
+ break;
1348
+ }
1349
+
1350
+ SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1351
+ goto error;
1352
+ }
1353
+
1354
+ case s_header_value_start:
1355
+ {
1356
+ if (ch == ' ' || ch == '\t') break;
1357
+
1358
+ MARK(header_value);
1359
+
1360
+ parser->state = s_header_value;
1361
+ parser->index = 0;
1362
+
1363
+ if (ch == CR) {
1364
+ parser->header_state = h_general;
1365
+ parser->state = s_header_almost_done;
1366
+ CALLBACK_DATA(header_value);
1367
+ break;
1368
+ }
1369
+
1370
+ if (ch == LF) {
1371
+ parser->state = s_header_field_start;
1372
+ CALLBACK_DATA(header_value);
1373
+ break;
1374
+ }
1375
+
1376
+ c = LOWER(ch);
1377
+
1378
+ switch (parser->header_state) {
1379
+ case h_upgrade:
1380
+ parser->flags |= F_UPGRADE;
1381
+ parser->header_state = h_general;
1382
+ break;
1383
+
1384
+ case h_transfer_encoding:
1385
+ /* looking for 'Transfer-Encoding: chunked' */
1386
+ if ('c' == c) {
1387
+ parser->header_state = h_matching_transfer_encoding_chunked;
1388
+ } else {
1389
+ parser->header_state = h_general;
1390
+ }
1391
+ break;
1392
+
1393
+ case h_content_length:
1394
+ if (!IS_NUM(ch)) {
1395
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1396
+ goto error;
1397
+ }
1398
+
1399
+ parser->content_length = ch - '0';
1400
+ break;
1401
+
1402
+ case h_connection:
1403
+ /* looking for 'Connection: keep-alive' */
1404
+ if (c == 'k') {
1405
+ parser->header_state = h_matching_connection_keep_alive;
1406
+ /* looking for 'Connection: close' */
1407
+ } else if (c == 'c') {
1408
+ parser->header_state = h_matching_connection_close;
1409
+ } else {
1410
+ parser->header_state = h_general;
1411
+ }
1412
+ break;
1413
+
1414
+ default:
1415
+ parser->header_state = h_general;
1416
+ break;
1417
+ }
1418
+ break;
1419
+ }
1420
+
1421
+ case s_header_value:
1422
+ {
1423
+
1424
+ if (ch == CR) {
1425
+ parser->state = s_header_almost_done;
1426
+ CALLBACK_DATA(header_value);
1427
+ break;
1428
+ }
1429
+
1430
+ if (ch == LF) {
1431
+ parser->state = s_header_almost_done;
1432
+ CALLBACK_DATA_NOADVANCE(header_value);
1433
+ goto reexecute_byte;
1434
+ }
1435
+
1436
+ c = LOWER(ch);
1437
+
1438
+ switch (parser->header_state) {
1439
+ case h_general:
1440
+ break;
1441
+
1442
+ case h_connection:
1443
+ case h_transfer_encoding:
1444
+ assert(0 && "Shouldn't get here.");
1445
+ break;
1446
+
1447
+ case h_content_length:
1448
+ {
1449
+ uint64_t t;
1450
+
1451
+ if (ch == ' ') break;
1452
+
1453
+ if (!IS_NUM(ch)) {
1454
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1455
+ goto error;
1456
+ }
1457
+
1458
+ t = parser->content_length;
1459
+ t *= 10;
1460
+ t += ch - '0';
1461
+
1462
+ /* Overflow? */
1463
+ if (t < parser->content_length || t == ULLONG_MAX) {
1464
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1465
+ goto error;
1466
+ }
1467
+
1468
+ parser->content_length = t;
1469
+ break;
1470
+ }
1471
+
1472
+ /* Transfer-Encoding: chunked */
1473
+ case h_matching_transfer_encoding_chunked:
1474
+ parser->index++;
1475
+ if (parser->index > sizeof(CHUNKED)-1
1476
+ || c != CHUNKED[parser->index]) {
1477
+ parser->header_state = h_general;
1478
+ } else if (parser->index == sizeof(CHUNKED)-2) {
1479
+ parser->header_state = h_transfer_encoding_chunked;
1480
+ }
1481
+ break;
1482
+
1483
+ /* looking for 'Connection: keep-alive' */
1484
+ case h_matching_connection_keep_alive:
1485
+ parser->index++;
1486
+ if (parser->index > sizeof(KEEP_ALIVE)-1
1487
+ || c != KEEP_ALIVE[parser->index]) {
1488
+ parser->header_state = h_general;
1489
+ } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1490
+ parser->header_state = h_connection_keep_alive;
1491
+ }
1492
+ break;
1493
+
1494
+ /* looking for 'Connection: close' */
1495
+ case h_matching_connection_close:
1496
+ parser->index++;
1497
+ if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1498
+ parser->header_state = h_general;
1499
+ } else if (parser->index == sizeof(CLOSE)-2) {
1500
+ parser->header_state = h_connection_close;
1501
+ }
1502
+ break;
1503
+
1504
+ case h_transfer_encoding_chunked:
1505
+ case h_connection_keep_alive:
1506
+ case h_connection_close:
1507
+ if (ch != ' ') parser->header_state = h_general;
1508
+ break;
1509
+
1510
+ default:
1511
+ parser->state = s_header_value;
1512
+ parser->header_state = h_general;
1513
+ break;
1514
+ }
1515
+ break;
1516
+ }
1517
+
1518
+ case s_header_almost_done:
1519
+ {
1520
+ STRICT_CHECK(ch != LF);
1521
+
1522
+ parser->state = s_header_value_lws;
1523
+
1524
+ switch (parser->header_state) {
1525
+ case h_connection_keep_alive:
1526
+ parser->flags |= F_CONNECTION_KEEP_ALIVE;
1527
+ break;
1528
+ case h_connection_close:
1529
+ parser->flags |= F_CONNECTION_CLOSE;
1530
+ break;
1531
+ case h_transfer_encoding_chunked:
1532
+ parser->flags |= F_CHUNKED;
1533
+ break;
1534
+ default:
1535
+ break;
1536
+ }
1537
+
1538
+ break;
1539
+ }
1540
+
1541
+ case s_header_value_lws:
1542
+ {
1543
+ if (ch == ' ' || ch == '\t')
1544
+ parser->state = s_header_value_start;
1545
+ else
1546
+ {
1547
+ parser->state = s_header_field_start;
1548
+ goto reexecute_byte;
1549
+ }
1550
+ break;
1551
+ }
1552
+
1553
+ case s_headers_almost_done:
1554
+ {
1555
+ STRICT_CHECK(ch != LF);
1556
+
1557
+ if (parser->flags & F_TRAILING) {
1558
+ /* End of a chunked request */
1559
+ parser->state = NEW_MESSAGE();
1560
+ CALLBACK_NOTIFY(message_complete);
1561
+ break;
1562
+ }
1563
+
1564
+ parser->state = s_headers_done;
1565
+
1566
+ /* Set this here so that on_headers_complete() callbacks can see it */
1567
+ parser->upgrade =
1568
+ (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT);
1569
+
1570
+ /* Here we call the headers_complete callback. This is somewhat
1571
+ * different than other callbacks because if the user returns 1, we
1572
+ * will interpret that as saying that this message has no body. This
1573
+ * is needed for the annoying case of recieving a response to a HEAD
1574
+ * request.
1575
+ *
1576
+ * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1577
+ * we have to simulate it by handling a change in errno below.
1578
+ */
1579
+ if (settings->on_headers_complete) {
1580
+ switch (settings->on_headers_complete(parser)) {
1581
+ case 0:
1582
+ break;
1583
+
1584
+ case 1:
1585
+ parser->flags |= F_SKIPBODY;
1586
+ break;
1587
+
1588
+ default:
1589
+ SET_ERRNO(HPE_CB_headers_complete);
1590
+ return p - data; /* Error */
1591
+ }
1592
+ }
1593
+
1594
+ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1595
+ return p - data;
1596
+ }
1597
+
1598
+ goto reexecute_byte;
1599
+ }
1600
+
1601
+ case s_headers_done:
1602
+ {
1603
+ STRICT_CHECK(ch != LF);
1604
+
1605
+ parser->nread = 0;
1606
+
1607
+ /* Exit, the rest of the connect is in a different protocol. */
1608
+ if (parser->upgrade) {
1609
+ parser->state = NEW_MESSAGE();
1610
+ CALLBACK_NOTIFY(message_complete);
1611
+ return (p - data) + 1;
1612
+ }
1613
+
1614
+ if (parser->flags & F_SKIPBODY) {
1615
+ parser->state = NEW_MESSAGE();
1616
+ CALLBACK_NOTIFY(message_complete);
1617
+ } else if (parser->flags & F_CHUNKED) {
1618
+ /* chunked encoding - ignore Content-Length header */
1619
+ parser->state = s_chunk_size_start;
1620
+ } else {
1621
+ if (parser->content_length == 0) {
1622
+ /* Content-Length header given but zero: Content-Length: 0\r\n */
1623
+ parser->state = NEW_MESSAGE();
1624
+ CALLBACK_NOTIFY(message_complete);
1625
+ } else if (parser->content_length != ULLONG_MAX) {
1626
+ /* Content-Length header given and non-zero */
1627
+ parser->state = s_body_identity;
1628
+ } else {
1629
+ if (parser->type == HTTP_REQUEST ||
1630
+ !http_message_needs_eof(parser)) {
1631
+ /* Assume content-length 0 - read the next */
1632
+ parser->state = NEW_MESSAGE();
1633
+ CALLBACK_NOTIFY(message_complete);
1634
+ } else {
1635
+ /* Read body until EOF */
1636
+ parser->state = s_body_identity_eof;
1637
+ }
1638
+ }
1639
+ }
1640
+
1641
+ break;
1642
+ }
1643
+
1644
+ case s_body_identity:
1645
+ {
1646
+ uint64_t to_read = MIN(parser->content_length,
1647
+ (uint64_t) ((data + len) - p));
1648
+
1649
+ assert(parser->content_length != 0
1650
+ && parser->content_length != ULLONG_MAX);
1651
+
1652
+ /* The difference between advancing content_length and p is because
1653
+ * the latter will automaticaly advance on the next loop iteration.
1654
+ * Further, if content_length ends up at 0, we want to see the last
1655
+ * byte again for our message complete callback.
1656
+ */
1657
+ MARK(body);
1658
+ parser->content_length -= to_read;
1659
+ p += to_read - 1;
1660
+
1661
+ if (parser->content_length == 0) {
1662
+ parser->state = s_message_done;
1663
+
1664
+ /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1665
+ *
1666
+ * The alternative to doing this is to wait for the next byte to
1667
+ * trigger the data callback, just as in every other case. The
1668
+ * problem with this is that this makes it difficult for the test
1669
+ * harness to distinguish between complete-on-EOF and
1670
+ * complete-on-length. It's not clear that this distinction is
1671
+ * important for applications, but let's keep it for now.
1672
+ */
1673
+ CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1674
+ goto reexecute_byte;
1675
+ }
1676
+
1677
+ break;
1678
+ }
1679
+
1680
+ /* read until EOF */
1681
+ case s_body_identity_eof:
1682
+ MARK(body);
1683
+ p = data + len - 1;
1684
+
1685
+ break;
1686
+
1687
+ case s_message_done:
1688
+ parser->state = NEW_MESSAGE();
1689
+ CALLBACK_NOTIFY(message_complete);
1690
+ break;
1691
+
1692
+ case s_chunk_size_start:
1693
+ {
1694
+ assert(parser->nread == 1);
1695
+ assert(parser->flags & F_CHUNKED);
1696
+
1697
+ unhex_val = unhex[(unsigned char)ch];
1698
+ if (unhex_val == -1) {
1699
+ SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1700
+ goto error;
1701
+ }
1702
+
1703
+ parser->content_length = unhex_val;
1704
+ parser->state = s_chunk_size;
1705
+ break;
1706
+ }
1707
+
1708
+ case s_chunk_size:
1709
+ {
1710
+ uint64_t t;
1711
+
1712
+ assert(parser->flags & F_CHUNKED);
1713
+
1714
+ if (ch == CR) {
1715
+ parser->state = s_chunk_size_almost_done;
1716
+ break;
1717
+ }
1718
+
1719
+ unhex_val = unhex[(unsigned char)ch];
1720
+
1721
+ if (unhex_val == -1) {
1722
+ if (ch == ';' || ch == ' ') {
1723
+ parser->state = s_chunk_parameters;
1724
+ break;
1725
+ }
1726
+
1727
+ SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1728
+ goto error;
1729
+ }
1730
+
1731
+ t = parser->content_length;
1732
+ t *= 16;
1733
+ t += unhex_val;
1734
+
1735
+ /* Overflow? */
1736
+ if (t < parser->content_length || t == ULLONG_MAX) {
1737
+ SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1738
+ goto error;
1739
+ }
1740
+
1741
+ parser->content_length = t;
1742
+ break;
1743
+ }
1744
+
1745
+ case s_chunk_parameters:
1746
+ {
1747
+ assert(parser->flags & F_CHUNKED);
1748
+ /* just ignore this shit. TODO check for overflow */
1749
+ if (ch == CR) {
1750
+ parser->state = s_chunk_size_almost_done;
1751
+ break;
1752
+ }
1753
+ break;
1754
+ }
1755
+
1756
+ case s_chunk_size_almost_done:
1757
+ {
1758
+ assert(parser->flags & F_CHUNKED);
1759
+ STRICT_CHECK(ch != LF);
1760
+
1761
+ parser->nread = 0;
1762
+
1763
+ if (parser->content_length == 0) {
1764
+ parser->flags |= F_TRAILING;
1765
+ parser->state = s_header_field_start;
1766
+ } else {
1767
+ parser->state = s_chunk_data;
1768
+ }
1769
+ break;
1770
+ }
1771
+
1772
+ case s_chunk_data:
1773
+ {
1774
+ uint64_t to_read = MIN(parser->content_length,
1775
+ (uint64_t) ((data + len) - p));
1776
+
1777
+ assert(parser->flags & F_CHUNKED);
1778
+ assert(parser->content_length != 0
1779
+ && parser->content_length != ULLONG_MAX);
1780
+
1781
+ /* See the explanation in s_body_identity for why the content
1782
+ * length and data pointers are managed this way.
1783
+ */
1784
+ MARK(body);
1785
+ parser->content_length -= to_read;
1786
+ p += to_read - 1;
1787
+
1788
+ if (parser->content_length == 0) {
1789
+ parser->state = s_chunk_data_almost_done;
1790
+ }
1791
+
1792
+ break;
1793
+ }
1794
+
1795
+ case s_chunk_data_almost_done:
1796
+ assert(parser->flags & F_CHUNKED);
1797
+ assert(parser->content_length == 0);
1798
+ STRICT_CHECK(ch != CR);
1799
+ parser->state = s_chunk_data_done;
1800
+ CALLBACK_DATA(body);
1801
+ break;
1802
+
1803
+ case s_chunk_data_done:
1804
+ assert(parser->flags & F_CHUNKED);
1805
+ STRICT_CHECK(ch != LF);
1806
+ parser->nread = 0;
1807
+ parser->state = s_chunk_size_start;
1808
+ break;
1809
+
1810
+ default:
1811
+ assert(0 && "unhandled state");
1812
+ SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
1813
+ goto error;
1814
+ }
1815
+ }
1816
+
1817
+ /* Run callbacks for any marks that we have leftover after we ran our of
1818
+ * bytes. There should be at most one of these set, so it's OK to invoke
1819
+ * them in series (unset marks will not result in callbacks).
1820
+ *
1821
+ * We use the NOADVANCE() variety of callbacks here because 'p' has already
1822
+ * overflowed 'data' and this allows us to correct for the off-by-one that
1823
+ * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
1824
+ * value that's in-bounds).
1825
+ */
1826
+
1827
+ assert(((header_field_mark ? 1 : 0) +
1828
+ (header_value_mark ? 1 : 0) +
1829
+ (url_mark ? 1 : 0) +
1830
+ (body_mark ? 1 : 0)) <= 1);
1831
+
1832
+ CALLBACK_DATA_NOADVANCE(header_field);
1833
+ CALLBACK_DATA_NOADVANCE(header_value);
1834
+ CALLBACK_DATA_NOADVANCE(url);
1835
+ CALLBACK_DATA_NOADVANCE(body);
1836
+
1837
+ return len;
1838
+
1839
+ error:
1840
+ if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
1841
+ SET_ERRNO(HPE_UNKNOWN);
1842
+ }
1843
+
1844
+ return (p - data);
1845
+ }
1846
+
1847
+
1848
+ /* Does the parser need to see an EOF to find the end of the message? */
1849
+ int
1850
+ http_message_needs_eof (const http_parser *parser)
1851
+ {
1852
+ if (parser->type == HTTP_REQUEST) {
1853
+ return 0;
1854
+ }
1855
+
1856
+ /* See RFC 2616 section 4.4 */
1857
+ if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
1858
+ parser->status_code == 204 || /* No Content */
1859
+ parser->status_code == 304 || /* Not Modified */
1860
+ parser->flags & F_SKIPBODY) { /* response to a HEAD request */
1861
+ return 0;
1862
+ }
1863
+
1864
+ if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
1865
+ return 0;
1866
+ }
1867
+
1868
+ return 1;
1869
+ }
1870
+
1871
+
1872
+ int
1873
+ http_should_keep_alive (const http_parser *parser)
1874
+ {
1875
+ if (parser->http_major > 0 && parser->http_minor > 0) {
1876
+ /* HTTP/1.1 */
1877
+ if (parser->flags & F_CONNECTION_CLOSE) {
1878
+ return 0;
1879
+ }
1880
+ } else {
1881
+ /* HTTP/1.0 or earlier */
1882
+ if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
1883
+ return 0;
1884
+ }
1885
+ }
1886
+
1887
+ return !http_message_needs_eof(parser);
1888
+ }
1889
+
1890
+
1891
+ const char *
1892
+ http_method_str (enum http_method m)
1893
+ {
1894
+ return ELEM_AT(method_strings, m, "<unknown>");
1895
+ }
1896
+
1897
+
1898
+ void
1899
+ http_parser_init (http_parser *parser, enum http_parser_type t)
1900
+ {
1901
+ void *data = parser->data; /* preserve application data */
1902
+ memset(parser, 0, sizeof(*parser));
1903
+ parser->data = data;
1904
+ parser->type = t;
1905
+ parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
1906
+ parser->http_errno = HPE_OK;
1907
+ }
1908
+
1909
+ const char *
1910
+ http_errno_name(enum http_errno err) {
1911
+ assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1912
+ return http_strerror_tab[err].name;
1913
+ }
1914
+
1915
+ const char *
1916
+ http_errno_description(enum http_errno err) {
1917
+ assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1918
+ return http_strerror_tab[err].description;
1919
+ }
1920
+
1921
+ static enum http_host_state
1922
+ http_parse_host_char(enum http_host_state s, const char ch) {
1923
+ switch(s) {
1924
+ case s_http_userinfo:
1925
+ case s_http_userinfo_start:
1926
+ if (ch == '@') {
1927
+ return s_http_host_start;
1928
+ }
1929
+
1930
+ if (IS_USERINFO_CHAR(ch)) {
1931
+ return s_http_userinfo;
1932
+ }
1933
+ break;
1934
+
1935
+ case s_http_host_start:
1936
+ if (ch == '[') {
1937
+ return s_http_host_v6_start;
1938
+ }
1939
+
1940
+ if (IS_HOST_CHAR(ch)) {
1941
+ return s_http_host;
1942
+ }
1943
+
1944
+ break;
1945
+
1946
+ case s_http_host:
1947
+ if (IS_HOST_CHAR(ch)) {
1948
+ return s_http_host;
1949
+ }
1950
+
1951
+ /* FALLTHROUGH */
1952
+ case s_http_host_v6_end:
1953
+ if (ch == ':') {
1954
+ return s_http_host_port_start;
1955
+ }
1956
+
1957
+ break;
1958
+
1959
+ case s_http_host_v6:
1960
+ if (ch == ']') {
1961
+ return s_http_host_v6_end;
1962
+ }
1963
+
1964
+ /* FALLTHROUGH */
1965
+ case s_http_host_v6_start:
1966
+ if (IS_HEX(ch) || ch == ':' || ch == '.') {
1967
+ return s_http_host_v6;
1968
+ }
1969
+
1970
+ break;
1971
+
1972
+ case s_http_host_port:
1973
+ case s_http_host_port_start:
1974
+ if (IS_NUM(ch)) {
1975
+ return s_http_host_port;
1976
+ }
1977
+
1978
+ break;
1979
+
1980
+ default:
1981
+ break;
1982
+ }
1983
+ return s_http_host_dead;
1984
+ }
1985
+
1986
+ static int
1987
+ http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
1988
+ enum http_host_state s;
1989
+
1990
+ const char *p;
1991
+ size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
1992
+
1993
+ u->field_data[UF_HOST].len = 0;
1994
+
1995
+ s = found_at ? s_http_userinfo_start : s_http_host_start;
1996
+
1997
+ for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
1998
+ enum http_host_state new_s = http_parse_host_char(s, *p);
1999
+
2000
+ if (new_s == s_http_host_dead) {
2001
+ return 1;
2002
+ }
2003
+
2004
+ switch(new_s) {
2005
+ case s_http_host:
2006
+ if (s != s_http_host) {
2007
+ u->field_data[UF_HOST].off = p - buf;
2008
+ }
2009
+ u->field_data[UF_HOST].len++;
2010
+ break;
2011
+
2012
+ case s_http_host_v6:
2013
+ if (s != s_http_host_v6) {
2014
+ u->field_data[UF_HOST].off = p - buf;
2015
+ }
2016
+ u->field_data[UF_HOST].len++;
2017
+ break;
2018
+
2019
+ case s_http_host_port:
2020
+ if (s != s_http_host_port) {
2021
+ u->field_data[UF_PORT].off = p - buf;
2022
+ u->field_data[UF_PORT].len = 0;
2023
+ u->field_set |= (1 << UF_PORT);
2024
+ }
2025
+ u->field_data[UF_PORT].len++;
2026
+ break;
2027
+
2028
+ case s_http_userinfo:
2029
+ if (s != s_http_userinfo) {
2030
+ u->field_data[UF_USERINFO].off = p - buf ;
2031
+ u->field_data[UF_USERINFO].len = 0;
2032
+ u->field_set |= (1 << UF_USERINFO);
2033
+ }
2034
+ u->field_data[UF_USERINFO].len++;
2035
+ break;
2036
+
2037
+ default:
2038
+ break;
2039
+ }
2040
+ s = new_s;
2041
+ }
2042
+
2043
+ /* Make sure we don't end somewhere unexpected */
2044
+ switch (s) {
2045
+ case s_http_host_start:
2046
+ case s_http_host_v6_start:
2047
+ case s_http_host_v6:
2048
+ case s_http_host_port_start:
2049
+ case s_http_userinfo:
2050
+ case s_http_userinfo_start:
2051
+ return 1;
2052
+ default:
2053
+ break;
2054
+ }
2055
+
2056
+ return 0;
2057
+ }
2058
+
2059
+ int
2060
+ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2061
+ struct http_parser_url *u)
2062
+ {
2063
+ enum state s;
2064
+ const char *p;
2065
+ enum http_parser_url_fields uf, old_uf;
2066
+ int found_at = 0;
2067
+
2068
+ u->port = u->field_set = 0;
2069
+ s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2070
+ uf = old_uf = UF_MAX;
2071
+
2072
+ for (p = buf; p < buf + buflen; p++) {
2073
+ s = parse_url_char(s, *p);
2074
+
2075
+ /* Figure out the next field that we're operating on */
2076
+ switch (s) {
2077
+ case s_dead:
2078
+ return 1;
2079
+
2080
+ /* Skip delimeters */
2081
+ case s_req_schema_slash:
2082
+ case s_req_schema_slash_slash:
2083
+ case s_req_server_start:
2084
+ case s_req_query_string_start:
2085
+ case s_req_fragment_start:
2086
+ continue;
2087
+
2088
+ case s_req_schema:
2089
+ uf = UF_SCHEMA;
2090
+ break;
2091
+
2092
+ case s_req_server_with_at:
2093
+ found_at = 1;
2094
+
2095
+ /* FALLTROUGH */
2096
+ case s_req_server:
2097
+ uf = UF_HOST;
2098
+ break;
2099
+
2100
+ case s_req_path:
2101
+ uf = UF_PATH;
2102
+ break;
2103
+
2104
+ case s_req_query_string:
2105
+ uf = UF_QUERY;
2106
+ break;
2107
+
2108
+ case s_req_fragment:
2109
+ uf = UF_FRAGMENT;
2110
+ break;
2111
+
2112
+ default:
2113
+ assert(!"Unexpected state");
2114
+ return 1;
2115
+ }
2116
+
2117
+ /* Nothing's changed; soldier on */
2118
+ if (uf == old_uf) {
2119
+ u->field_data[uf].len++;
2120
+ continue;
2121
+ }
2122
+
2123
+ u->field_data[uf].off = p - buf;
2124
+ u->field_data[uf].len = 1;
2125
+
2126
+ u->field_set |= (1 << uf);
2127
+ old_uf = uf;
2128
+ }
2129
+
2130
+ /* host must be present if there is a schema */
2131
+ /* parsing http:///toto will fail */
2132
+ if ((u->field_set & ((1 << UF_SCHEMA) | (1 << UF_HOST))) != 0) {
2133
+ if (http_parse_host(buf, u, found_at) != 0) {
2134
+ return 1;
2135
+ }
2136
+ }
2137
+
2138
+ /* CONNECT requests can only contain "hostname:port" */
2139
+ if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2140
+ return 1;
2141
+ }
2142
+
2143
+ if (u->field_set & (1 << UF_PORT)) {
2144
+ /* Don't bother with endp; we've already validated the string */
2145
+ unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
2146
+
2147
+ /* Ports have a max value of 2^16 */
2148
+ if (v > 0xffff) {
2149
+ return 1;
2150
+ }
2151
+
2152
+ u->port = (uint16_t) v;
2153
+ }
2154
+
2155
+ return 0;
2156
+ }
2157
+
2158
+ void
2159
+ http_parser_pause(http_parser *parser, int paused) {
2160
+ /* Users should only be pausing/unpausing a parser that is not in an error
2161
+ * state. In non-debug builds, there's not much that we can do about this
2162
+ * other than ignore it.
2163
+ */
2164
+ if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2165
+ HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2166
+ SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2167
+ } else {
2168
+ assert(0 && "Attempting to pause parser in error state");
2169
+ }
2170
+ }
2171
+
2172
+ int
2173
+ http_body_is_final(const struct http_parser *parser) {
2174
+ return parser->state == s_message_done;
2175
+ }