pico_http_parser 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/benchmark/benchmark.rb +13 -5
- data/ext/pico_http_parser/pico_http_parser.c +20 -7
- data/ext/pico_http_parser/picohttpparser/.clang-format +6 -0
- data/ext/pico_http_parser/picohttpparser/Jamfile +7 -0
- data/ext/pico_http_parser/picohttpparser/README.md +92 -0
- data/ext/pico_http_parser/picohttpparser/bench.c +32 -19
- data/ext/pico_http_parser/picohttpparser/picohttpparser.c +509 -342
- data/ext/pico_http_parser/picohttpparser/picohttpparser.h +37 -14
- data/ext/pico_http_parser/picohttpparser/test.c +366 -204
- data/lib/pico_http_parser/version.rb +1 -1
- data/spec/01_simple_spec.rb +0 -3
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 984780783c1c9bfce3fdcc058187bfe3fd553ceb
|
4
|
+
data.tar.gz: 2ad841f14ab2c12a79211634ae6758e22009e0f8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c3dca6150fc78bd0946f4787b7f91dad54a7942ff7bc0f301a7580e277851fed9256008f740d930c4e757055230066af2c0d5ea72c3283f4d99db2f09a8ef1c4
|
7
|
+
data.tar.gz: 623cdcfc79c5c5cec7293e72996a92826a64b1e3d6fa05b1449631067b8cd99c2e80685b6c648e898042aa9813af5c7cb117ef80134c09ba32f28f942ebbefae
|
data/benchmark/benchmark.rb
CHANGED
@@ -2,13 +2,19 @@ require File.expand_path(File.dirname(__FILE__) + '/bench_helper')
|
|
2
2
|
|
3
3
|
require 'pico_http_parser'
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
|
5
|
+
request_bodys = [
|
6
|
+
"GET /foo/bar/baz.html?key=value HTTP/1.0\r\nHost: blooperblorp\r\n\r\n",
|
7
|
+
"GET /foo/bar/baz.html?key=value HTTP/1.0\r\nHost: blooperblorp\r\nUser-Agent: Mozilla/5.0\r\n\r\n",
|
8
|
+
"GET /foo/bar/baz.html?key=value HTTP/1.0\r\nHost: blooperblorp\r\nCookie: foobar\r\nX-Forwarded-For: 127.0.0.1\r\nUser-Agent: Mozilla/5.0\r\n\r\n",
|
9
|
+
"GET /foo/bar/baz.html?key=value HTTP/1.0\r\nHost: blooperblorp\r\nCookie: foobar\r\nX-Forwarded-For: 127.0.0.1\r\nUser-Agent: Mozilla/5.0\r\nAccept: X-5\r\nConnection: XXXXXX-6\r\nReferer: XXXXXXXX-7\r\nAccept-Encoding: XXXXXXX8\r\nCache-Control: XXXXXXXX9\r\nIf-Modified-Since: XXXXXXXXXXXXXXX10\r\n\r\n",
|
10
|
+
"GET /foo/bar/baz.html?key=value HTTP/1.0\r\n\r\n"
|
11
|
+
];
|
8
12
|
|
13
|
+
request_bodys.each do |request_body|
|
14
|
+
puts("benchmark #{request_body}");
|
9
15
|
Benchmark.ips do |x|
|
10
16
|
x.time = 5
|
11
|
-
x.warmup =
|
17
|
+
x.warmup = 1
|
12
18
|
|
13
19
|
x.report("PicoHTTPParser") {
|
14
20
|
env = {}
|
@@ -21,7 +27,7 @@ Benchmark.ips do |x|
|
|
21
27
|
x.report("Unicorn's HttpParser") {
|
22
28
|
parser = HttpParser.new
|
23
29
|
parser.buf << request_body
|
24
|
-
parser.parse
|
30
|
+
env = parser.parse
|
25
31
|
}
|
26
32
|
rescue LoadError
|
27
33
|
puts("Can't benchmark unicorn as it couldn't be loaded.")
|
@@ -29,3 +35,5 @@ Benchmark.ips do |x|
|
|
29
35
|
|
30
36
|
x.compare!
|
31
37
|
end
|
38
|
+
end
|
39
|
+
|
@@ -14,6 +14,11 @@ static VALUE script_name_key;
|
|
14
14
|
static VALUE server_protocol_key;
|
15
15
|
static VALUE query_string_key;
|
16
16
|
|
17
|
+
static VALUE vacant_string_val;
|
18
|
+
|
19
|
+
static VALUE http10_val;
|
20
|
+
static VALUE http11_val;
|
21
|
+
|
17
22
|
struct common_header {
|
18
23
|
const char * name;
|
19
24
|
size_t name_len;
|
@@ -147,8 +152,8 @@ VALUE phr_parse_http_request(VALUE self, VALUE buf, VALUE envref)
|
|
147
152
|
char tmp[MAX_HEADER_NAME_LEN + sizeof("HTTP_") - 1];
|
148
153
|
VALUE last_value;
|
149
154
|
|
150
|
-
buf_str =
|
151
|
-
buf_len =
|
155
|
+
buf_str = RSTRING_PTR(buf);
|
156
|
+
buf_len = RSTRING_LEN(buf);
|
152
157
|
num_headers = MAX_HEADERS;
|
153
158
|
ret = phr_parse_request(buf_str, buf_len, &method, &method_len, &path,
|
154
159
|
&path_len, &minor_version, headers, &num_headers, 0);
|
@@ -157,10 +162,8 @@ VALUE phr_parse_http_request(VALUE self, VALUE buf, VALUE envref)
|
|
157
162
|
|
158
163
|
rb_hash_aset(envref, request_method_key, rb_str_new(method,method_len));
|
159
164
|
rb_hash_aset(envref, request_uri_key, rb_str_new(path, path_len));
|
160
|
-
rb_hash_aset(envref, script_name_key,
|
161
|
-
|
162
|
-
tmp[7] = 48 + ((minor_version > 1 || minor_version < 0 ) ? 0 : minor_version);
|
163
|
-
rb_hash_aset(envref, server_protocol_key, rb_str_new(tmp, sizeof("HTTP/1.0") - 1));
|
165
|
+
rb_hash_aset(envref, script_name_key, vacant_string_val);
|
166
|
+
rb_hash_aset(envref, server_protocol_key, (minor_version == 1) ? http11_val : http10_val);
|
164
167
|
|
165
168
|
/* PATH_INFO QUERY_STRING */
|
166
169
|
path_len = find_ch(path, path_len, '#'); /* strip off all text after # after storing request_uri */
|
@@ -172,7 +175,7 @@ VALUE phr_parse_http_request(VALUE self, VALUE buf, VALUE envref)
|
|
172
175
|
}
|
173
176
|
if (question_at != path_len) ++question_at;
|
174
177
|
rb_hash_aset(envref, query_string_key, rb_str_new(path + question_at, path_len - question_at));
|
175
|
-
|
178
|
+
|
176
179
|
last_value = Qnil;
|
177
180
|
for (i = 0; i < num_headers; ++i) {
|
178
181
|
if (headers[i].name != NULL) {
|
@@ -200,6 +203,7 @@ VALUE phr_parse_http_request(VALUE self, VALUE buf, VALUE envref)
|
|
200
203
|
env_key = rb_str_new(name, name_len);
|
201
204
|
}
|
202
205
|
}
|
206
|
+
|
203
207
|
slot = rb_hash_aref(envref, env_key);
|
204
208
|
if ( slot != Qnil ) {
|
205
209
|
rb_str_cat2(slot, ", ");
|
@@ -209,11 +213,13 @@ VALUE phr_parse_http_request(VALUE self, VALUE buf, VALUE envref)
|
|
209
213
|
rb_hash_aset(envref, env_key, slot);
|
210
214
|
last_value = slot;
|
211
215
|
}
|
216
|
+
|
212
217
|
} else {
|
213
218
|
/* continuing lines of a mulitiline header */
|
214
219
|
if ( last_value != Qnil )
|
215
220
|
rb_str_cat(last_value, headers[i].value, headers[i].value_len);
|
216
221
|
}
|
222
|
+
|
217
223
|
}
|
218
224
|
|
219
225
|
done:
|
@@ -247,6 +253,13 @@ void Init_pico_http_parser()
|
|
247
253
|
set_common_header("USER-AGENT",sizeof("USER-AGENT") - 1, 0);
|
248
254
|
set_common_header("X-FORWARDED-FOR",sizeof("X-FORWARDED-FOR") - 1, 0);
|
249
255
|
|
256
|
+
http10_val = rb_obj_freeze(rb_str_new2("HTTP/1.0"));
|
257
|
+
rb_gc_register_address(&http10_val);
|
258
|
+
http11_val = rb_obj_freeze(rb_str_new2("HTTP/1.1"));
|
259
|
+
rb_gc_register_address(&http11_val);
|
260
|
+
vacant_string_val = rb_obj_freeze(rb_str_new("",0));
|
261
|
+
rb_gc_register_address(&vacant_string_val);
|
262
|
+
|
250
263
|
cPicoHTTPParser = rb_const_get(rb_cObject, rb_intern("PicoHTTPParser"));
|
251
264
|
rb_define_module_function(cPicoHTTPParser, "parse_http_request", phr_parse_http_request, 2);
|
252
265
|
}
|
@@ -14,6 +14,98 @@ Check out [test.c] to find out how to use the parser.
|
|
14
14
|
|
15
15
|
The software is dual-licensed under the Perl License or the MIT License.
|
16
16
|
|
17
|
+
Usage
|
18
|
+
-----
|
19
|
+
|
20
|
+
The library exposes four functions: `phr_parse_request`, `phr_parse_response`, `phr_parse_headers`, `phr_decode_chunked`.
|
21
|
+
|
22
|
+
### phr_parse_request
|
23
|
+
|
24
|
+
The example below reads an HTTP request from socket `sock` using `read(2)`, parses it using `phr_parse_request`, and prints the details.
|
25
|
+
|
26
|
+
```
|
27
|
+
char buf[4096], *method, *path;
|
28
|
+
int pret, minor_version;
|
29
|
+
struct phr_header headers[100];
|
30
|
+
size_t buflen = 0, prevbuflen = 0, method_len, path_len, num_headers;
|
31
|
+
ssize_t rret;
|
32
|
+
|
33
|
+
while (1) {
|
34
|
+
/* read the request */
|
35
|
+
while ((rret = read(sock, buf + buflen, sizeof(buf) - buflen)) == -1 && errno == EINTR)
|
36
|
+
;
|
37
|
+
if (rret <= 0)
|
38
|
+
return IOError;
|
39
|
+
prevbuflen = buflen;
|
40
|
+
buflen += rret;
|
41
|
+
/* parse the request */
|
42
|
+
num_headers = sizeof(headers) / sizeof(headers[0]);
|
43
|
+
pret = phr_parse_request(buf, buflen, &method, &method_len, &path, &path_len,
|
44
|
+
&minor_version, headers, &num_headers, prevbuflen);
|
45
|
+
if (pret > 0)
|
46
|
+
break; /* successfully parsed the request */
|
47
|
+
else if (pret == -1)
|
48
|
+
return ParseError;
|
49
|
+
/* request is incomplete, continue the loop */
|
50
|
+
assert(pret == -2);
|
51
|
+
if (buflen == sizeof(buf))
|
52
|
+
return RequestIsTooLongError;
|
53
|
+
}
|
54
|
+
|
55
|
+
printf("request is %d bytes long\n", pret);
|
56
|
+
printf("method is %.*s\n", (int)method_len, method);
|
57
|
+
printf("path is %.*s\n", (int)path_len, path);
|
58
|
+
printf("HTTP version is 1.%d\n", minor_version);
|
59
|
+
printf("headers:\n");
|
60
|
+
for (i = 0; i != num_headers; ++i) {
|
61
|
+
printf("%.*s: %.*s\n", (int)headers[i].name_len, headers[i].name,
|
62
|
+
(int)headers[i].value_len, headers[i].value);
|
63
|
+
}
|
64
|
+
```
|
65
|
+
|
66
|
+
### phr_parse_response, phr_parse_headers
|
67
|
+
|
68
|
+
`phr_parse_response` and `phr_parse_headers` provide similar interfaces as `phr_parse_request`. `phr_parse_response` parses an HTTP response, and `phr_parse_headers` parses the headers only.
|
69
|
+
|
70
|
+
### phr_decode_chunked
|
71
|
+
|
72
|
+
The example below decodes incoming data in chunked-encoding. The data is decoded in-place.
|
73
|
+
|
74
|
+
```
|
75
|
+
struct phr_chunked_decoder decoder = {}; /* zero-clear */
|
76
|
+
char *buf = malloc(4096);
|
77
|
+
size_t size = 0, capacity = 4096, rsize;
|
78
|
+
ssize_t rret, pret;
|
79
|
+
|
80
|
+
/* set consume_trailer to 1 to discard the trailing header, or the application
|
81
|
+
* should call phr_parse_headers to parse the trailing header */
|
82
|
+
decoder.consume_trailer = 1;
|
83
|
+
|
84
|
+
do {
|
85
|
+
/* expand the buffer if necessary */
|
86
|
+
if (size == capacity) {
|
87
|
+
capacity *= 2;
|
88
|
+
buf = realloc(buf, capacity);
|
89
|
+
assert(buf != NULL);
|
90
|
+
}
|
91
|
+
/* read */
|
92
|
+
while ((rret = read(sock, buf + size, capacity - size)) == -1 && errno == EINTR)
|
93
|
+
;
|
94
|
+
if (rret <= 0)
|
95
|
+
return IOError;
|
96
|
+
/* decode */
|
97
|
+
rsize = rret;
|
98
|
+
pret = phr_decode_chunked(&decoder, buf + size, &rsize);
|
99
|
+
if (pret == -1)
|
100
|
+
return ParseError;
|
101
|
+
size += rsize;
|
102
|
+
} while (pret == -2);
|
103
|
+
|
104
|
+
/* successfully decoded the chunked data */
|
105
|
+
assert(pret >= 0);
|
106
|
+
printf("decoded data is at %p (%zu bytes)\n", buf, size);
|
107
|
+
```
|
108
|
+
|
17
109
|
Benchmark
|
18
110
|
---------
|
19
111
|
|
@@ -28,26 +28,39 @@
|
|
28
28
|
#include <stdio.h>
|
29
29
|
#include "picohttpparser.h"
|
30
30
|
|
31
|
-
#define REQ
|
31
|
+
#define REQ \
|
32
|
+
"GET /wp-content/uploads/2010/03/hello-kitty-darth-vader-pink.jpg HTTP/1.1\r\n" \
|
33
|
+
"Host: www.kittyhell.com\r\n" \
|
34
|
+
"User-Agent: Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; ja-JP-mac; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3 " \
|
35
|
+
"Pathtraq/0.9\r\n" \
|
36
|
+
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n" \
|
37
|
+
"Accept-Language: ja,en-us;q=0.7,en;q=0.3\r\n" \
|
38
|
+
"Accept-Encoding: gzip,deflate\r\n" \
|
39
|
+
"Accept-Charset: Shift_JIS,utf-8;q=0.7,*;q=0.7\r\n" \
|
40
|
+
"Keep-Alive: 115\r\n" \
|
41
|
+
"Connection: keep-alive\r\n" \
|
42
|
+
"Cookie: wp_ozh_wsa_visits=2; wp_ozh_wsa_visit_lasttime=xxxxxxxxxx; " \
|
43
|
+
"__utma=xxxxxxxxx.xxxxxxxxxx.xxxxxxxxxx.xxxxxxxxxx.xxxxxxxxxx.x; " \
|
44
|
+
"__utmz=xxxxxxxxx.xxxxxxxxxx.x.x.utmccn=(referral)|utmcsr=reader.livedoor.com|utmcct=/reader/|utmcmd=referral\r\n" \
|
45
|
+
"\r\n"
|
32
46
|
|
33
47
|
int main(void)
|
34
48
|
{
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
return 0;
|
49
|
+
const char *method;
|
50
|
+
size_t method_len;
|
51
|
+
const char *path;
|
52
|
+
size_t path_len;
|
53
|
+
int minor_version;
|
54
|
+
struct phr_header headers[32];
|
55
|
+
size_t num_headers;
|
56
|
+
int i, ret;
|
57
|
+
|
58
|
+
for (i = 0; i < 10000000; i++) {
|
59
|
+
num_headers = sizeof(headers) / sizeof(headers[0]);
|
60
|
+
ret = phr_parse_request(REQ, sizeof(REQ) - 1, &method, &method_len, &path, &path_len, &minor_version, headers, &num_headers,
|
61
|
+
0);
|
62
|
+
assert(ret == sizeof(REQ) - 1);
|
63
|
+
}
|
64
|
+
|
65
|
+
return 0;
|
53
66
|
}
|
@@ -24,409 +24,576 @@
|
|
24
24
|
* IN THE SOFTWARE.
|
25
25
|
*/
|
26
26
|
|
27
|
+
#include <assert.h>
|
27
28
|
#include <stddef.h>
|
29
|
+
#include <string.h>
|
28
30
|
#ifdef __SSE4_2__
|
29
|
-
#
|
31
|
+
#ifdef _MSC_VER
|
32
|
+
#include <nmmintrin.h>
|
33
|
+
#else
|
34
|
+
#include <x86intrin.h>
|
35
|
+
#endif
|
30
36
|
#endif
|
31
37
|
#include "picohttpparser.h"
|
32
38
|
|
33
39
|
/* $Id$ */
|
34
40
|
|
35
41
|
#if __GNUC__ >= 3
|
36
|
-
#
|
37
|
-
#
|
42
|
+
#define likely(x) __builtin_expect(!!(x), 1)
|
43
|
+
#define unlikely(x) __builtin_expect(!!(x), 0)
|
44
|
+
#else
|
45
|
+
#define likely(x) (x)
|
46
|
+
#define unlikely(x) (x)
|
47
|
+
#endif
|
48
|
+
|
49
|
+
#ifdef _MSC_VER
|
50
|
+
#define ALIGNED(n) _declspec(align(n))
|
38
51
|
#else
|
39
|
-
#
|
40
|
-
# define unlikely(x) (x)
|
52
|
+
#define ALIGNED(n) __attribute__((aligned(n)))
|
41
53
|
#endif
|
42
54
|
|
43
|
-
#define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)
|
44
|
-
|
45
|
-
#define CHECK_EOF()
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
#define EXPECT_CHAR(ch)
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
#define ADVANCE_TOKEN(tok, toklen)
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
static const char*
|
55
|
+
#define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u)
|
56
|
+
|
57
|
+
#define CHECK_EOF() \
|
58
|
+
if (buf == buf_end) { \
|
59
|
+
*ret = -2; \
|
60
|
+
return NULL; \
|
61
|
+
}
|
62
|
+
|
63
|
+
#define EXPECT_CHAR(ch) \
|
64
|
+
CHECK_EOF(); \
|
65
|
+
if (*buf++ != ch) { \
|
66
|
+
*ret = -1; \
|
67
|
+
return NULL; \
|
68
|
+
}
|
69
|
+
|
70
|
+
#define ADVANCE_TOKEN(tok, toklen) \
|
71
|
+
do { \
|
72
|
+
const char *tok_start = buf; \
|
73
|
+
static const char ALIGNED(16) ranges2[] = "\000\040\177\177"; \
|
74
|
+
int found2; \
|
75
|
+
buf = findchar_fast(buf, buf_end, ranges2, sizeof(ranges2) - 1, &found2); \
|
76
|
+
if (!found2) { \
|
77
|
+
CHECK_EOF(); \
|
78
|
+
} \
|
79
|
+
while (1) { \
|
80
|
+
if (*buf == ' ') { \
|
81
|
+
break; \
|
82
|
+
} else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { \
|
83
|
+
if ((unsigned char)*buf < '\040' || *buf == '\177') { \
|
84
|
+
*ret = -1; \
|
85
|
+
return NULL; \
|
86
|
+
} \
|
87
|
+
} \
|
88
|
+
++buf; \
|
89
|
+
CHECK_EOF(); \
|
90
|
+
} \
|
91
|
+
tok = tok_start; \
|
92
|
+
toklen = buf - tok_start; \
|
93
|
+
} while (0)
|
94
|
+
|
95
|
+
static const char *token_char_map = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
|
96
|
+
"\0\1\1\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
|
97
|
+
"\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
|
98
|
+
"\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
|
99
|
+
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
|
100
|
+
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
|
101
|
+
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
|
102
|
+
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
|
103
|
+
|
104
|
+
static const char *findchar_fast(const char *buf, const char *buf_end, const char *ranges, size_t ranges_size, int *found)
|
93
105
|
{
|
94
|
-
|
106
|
+
*found = 0;
|
95
107
|
#if __SSE4_2__
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
108
|
+
if (likely(buf_end - buf >= 16)) {
|
109
|
+
__m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges);
|
110
|
+
|
111
|
+
size_t left = (buf_end - buf) & ~15;
|
112
|
+
do {
|
113
|
+
__m128i b16 = _mm_loadu_si128((void *)buf);
|
114
|
+
int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
|
115
|
+
if (unlikely(r != 16)) {
|
116
|
+
buf += r;
|
117
|
+
*found = 1;
|
118
|
+
break;
|
119
|
+
}
|
120
|
+
buf += 16;
|
121
|
+
left -= 16;
|
122
|
+
} while (likely(left != 0));
|
123
|
+
}
|
124
|
+
#else
|
125
|
+
/* suppress unused parameter warning */
|
126
|
+
(void)buf_end;
|
127
|
+
(void)ranges;
|
128
|
+
(void)ranges_size;
|
112
129
|
#endif
|
113
|
-
|
130
|
+
return buf;
|
114
131
|
}
|
115
132
|
|
116
|
-
static const char*
|
117
|
-
const char** token, size_t* token_len,
|
118
|
-
int* ret)
|
133
|
+
static const char *get_token_to_eol(const char *buf, const char *buf_end, const char **token, size_t *token_len, int *ret)
|
119
134
|
{
|
120
|
-
|
121
|
-
|
135
|
+
const char *token_start = buf;
|
136
|
+
|
122
137
|
#ifdef __SSE4_2__
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
;
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
goto FOUND_CTL;
|
138
|
+
static const char ranges1[] = "\0\010"
|
139
|
+
/* allow HT */
|
140
|
+
"\012\037"
|
141
|
+
/* allow SP and up to but not including DEL */
|
142
|
+
"\177\177"
|
143
|
+
/* allow chars w. MSB set */
|
144
|
+
;
|
145
|
+
int found;
|
146
|
+
buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
|
147
|
+
if (found)
|
148
|
+
goto FOUND_CTL;
|
135
149
|
#else
|
136
|
-
|
137
|
-
|
138
|
-
#define DOIT()
|
139
|
-
|
140
|
-
|
150
|
+
/* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */
|
151
|
+
while (likely(buf_end - buf >= 8)) {
|
152
|
+
#define DOIT() \
|
153
|
+
do { \
|
154
|
+
if (unlikely(!IS_PRINTABLE_ASCII(*buf))) \
|
155
|
+
goto NonPrintable; \
|
156
|
+
++buf; \
|
157
|
+
} while (0)
|
158
|
+
DOIT();
|
159
|
+
DOIT();
|
160
|
+
DOIT();
|
161
|
+
DOIT();
|
162
|
+
DOIT();
|
163
|
+
DOIT();
|
164
|
+
DOIT();
|
165
|
+
DOIT();
|
141
166
|
#undef DOIT
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
167
|
+
continue;
|
168
|
+
NonPrintable:
|
169
|
+
if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
|
170
|
+
goto FOUND_CTL;
|
171
|
+
}
|
172
|
+
++buf;
|
146
173
|
}
|
147
|
-
++buf;
|
148
|
-
}
|
149
174
|
#endif
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
175
|
+
for (;; ++buf) {
|
176
|
+
CHECK_EOF();
|
177
|
+
if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {
|
178
|
+
if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
|
179
|
+
goto FOUND_CTL;
|
180
|
+
}
|
181
|
+
}
|
156
182
|
}
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
*token_len = buf - 2 - token_start;
|
163
|
-
} else if (*buf == '\012') {
|
164
|
-
*token_len = buf - token_start;
|
165
|
-
++buf;
|
166
|
-
} else {
|
167
|
-
*ret = -1;
|
168
|
-
return NULL;
|
169
|
-
}
|
170
|
-
*token = token_start;
|
171
|
-
|
172
|
-
return buf;
|
173
|
-
}
|
174
|
-
|
175
|
-
static const char* is_complete(const char* buf, const char* buf_end,
|
176
|
-
size_t last_len, int* ret)
|
177
|
-
{
|
178
|
-
int ret_cnt = 0;
|
179
|
-
buf = last_len < 3 ? buf : buf + last_len - 3;
|
180
|
-
|
181
|
-
while (1) {
|
182
|
-
CHECK_EOF();
|
183
|
-
if (*buf == '\015') {
|
184
|
-
++buf;
|
185
|
-
CHECK_EOF();
|
186
|
-
EXPECT_CHAR('\012');
|
187
|
-
++ret_cnt;
|
183
|
+
FOUND_CTL:
|
184
|
+
if (likely(*buf == '\015')) {
|
185
|
+
++buf;
|
186
|
+
EXPECT_CHAR('\012');
|
187
|
+
*token_len = buf - 2 - token_start;
|
188
188
|
} else if (*buf == '\012') {
|
189
|
-
|
190
|
-
|
189
|
+
*token_len = buf - token_start;
|
190
|
+
++buf;
|
191
191
|
} else {
|
192
|
-
|
193
|
-
|
192
|
+
*ret = -1;
|
193
|
+
return NULL;
|
194
194
|
}
|
195
|
-
|
196
|
-
|
195
|
+
*token = token_start;
|
196
|
+
|
197
|
+
return buf;
|
198
|
+
}
|
199
|
+
|
200
|
+
static const char *is_complete(const char *buf, const char *buf_end, size_t last_len, int *ret)
|
201
|
+
{
|
202
|
+
int ret_cnt = 0;
|
203
|
+
buf = last_len < 3 ? buf : buf + last_len - 3;
|
204
|
+
|
205
|
+
while (1) {
|
206
|
+
CHECK_EOF();
|
207
|
+
if (*buf == '\015') {
|
208
|
+
++buf;
|
209
|
+
CHECK_EOF();
|
210
|
+
EXPECT_CHAR('\012');
|
211
|
+
++ret_cnt;
|
212
|
+
} else if (*buf == '\012') {
|
213
|
+
++buf;
|
214
|
+
++ret_cnt;
|
215
|
+
} else {
|
216
|
+
++buf;
|
217
|
+
ret_cnt = 0;
|
218
|
+
}
|
219
|
+
if (ret_cnt == 2) {
|
220
|
+
return buf;
|
221
|
+
}
|
197
222
|
}
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
return NULL;
|
223
|
+
|
224
|
+
*ret = -2;
|
225
|
+
return NULL;
|
202
226
|
}
|
203
227
|
|
204
228
|
/* *_buf is always within [buf, buf_end) upon success */
|
205
|
-
static const char*
|
206
|
-
int* ret)
|
229
|
+
static const char *parse_int(const char *buf, const char *buf_end, int *value, int *ret)
|
207
230
|
{
|
208
|
-
|
209
|
-
CHECK_EOF();
|
210
|
-
if (! ('0' <= *buf && *buf <= '9')) {
|
211
|
-
*ret = -1;
|
212
|
-
return NULL;
|
213
|
-
}
|
214
|
-
v = 0;
|
215
|
-
for (; ; ++buf) {
|
231
|
+
int v;
|
216
232
|
CHECK_EOF();
|
217
|
-
if ('0' <= *buf && *buf <= '9') {
|
218
|
-
|
219
|
-
|
220
|
-
|
233
|
+
if (!('0' <= *buf && *buf <= '9')) {
|
234
|
+
*ret = -1;
|
235
|
+
return NULL;
|
236
|
+
}
|
237
|
+
v = 0;
|
238
|
+
for (;; ++buf) {
|
239
|
+
CHECK_EOF();
|
240
|
+
if ('0' <= *buf && *buf <= '9') {
|
241
|
+
v = v * 10 + *buf - '0';
|
242
|
+
} else {
|
243
|
+
break;
|
244
|
+
}
|
221
245
|
}
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
return buf;
|
246
|
+
|
247
|
+
*value = v;
|
248
|
+
return buf;
|
226
249
|
}
|
227
250
|
|
228
251
|
/* returned pointer is always within [buf, buf_end), or null */
|
229
|
-
static const char*
|
230
|
-
int* minor_version, int* ret)
|
252
|
+
static const char *parse_http_version(const char *buf, const char *buf_end, int *minor_version, int *ret)
|
231
253
|
{
|
232
|
-
|
233
|
-
|
234
|
-
|
254
|
+
EXPECT_CHAR('H');
|
255
|
+
EXPECT_CHAR('T');
|
256
|
+
EXPECT_CHAR('T');
|
257
|
+
EXPECT_CHAR('P');
|
258
|
+
EXPECT_CHAR('/');
|
259
|
+
EXPECT_CHAR('1');
|
260
|
+
EXPECT_CHAR('.');
|
261
|
+
return parse_int(buf, buf_end, minor_version, ret);
|
235
262
|
}
|
236
263
|
|
237
|
-
static const char*
|
238
|
-
|
239
|
-
size_t* num_headers, size_t max_headers,
|
240
|
-
int* ret)
|
264
|
+
static const char *parse_headers(const char *buf, const char *buf_end, struct phr_header *headers, size_t *num_headers,
|
265
|
+
size_t max_headers, int *ret)
|
241
266
|
{
|
242
|
-
|
267
|
+
for (;; ++*num_headers) {
|
268
|
+
CHECK_EOF();
|
269
|
+
if (*buf == '\015') {
|
270
|
+
++buf;
|
271
|
+
EXPECT_CHAR('\012');
|
272
|
+
break;
|
273
|
+
} else if (*buf == '\012') {
|
274
|
+
++buf;
|
275
|
+
break;
|
276
|
+
}
|
277
|
+
if (*num_headers == max_headers) {
|
278
|
+
*ret = -1;
|
279
|
+
return NULL;
|
280
|
+
}
|
281
|
+
if (!(*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) {
|
282
|
+
static const char ALIGNED(16) ranges1[] = "::\x00\037";
|
283
|
+
int found;
|
284
|
+
if (!token_char_map[(unsigned char)*buf]) {
|
285
|
+
*ret = -1;
|
286
|
+
return NULL;
|
287
|
+
}
|
288
|
+
/* parsing name, but do not discard SP before colon, see
|
289
|
+
* http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
|
290
|
+
headers[*num_headers].name = buf;
|
291
|
+
buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
|
292
|
+
if (!found) {
|
293
|
+
CHECK_EOF();
|
294
|
+
}
|
295
|
+
while (1) {
|
296
|
+
if (*buf == ':') {
|
297
|
+
break;
|
298
|
+
} else if (*buf < ' ') {
|
299
|
+
*ret = -1;
|
300
|
+
return NULL;
|
301
|
+
}
|
302
|
+
++buf;
|
303
|
+
CHECK_EOF();
|
304
|
+
}
|
305
|
+
headers[*num_headers].name_len = buf - headers[*num_headers].name;
|
306
|
+
++buf;
|
307
|
+
for (;; ++buf) {
|
308
|
+
CHECK_EOF();
|
309
|
+
if (!(*buf == ' ' || *buf == '\t')) {
|
310
|
+
break;
|
311
|
+
}
|
312
|
+
}
|
313
|
+
} else {
|
314
|
+
headers[*num_headers].name = NULL;
|
315
|
+
headers[*num_headers].name_len = 0;
|
316
|
+
}
|
317
|
+
if ((buf = get_token_to_eol(buf, buf_end, &headers[*num_headers].value, &headers[*num_headers].value_len, ret)) == NULL) {
|
318
|
+
return NULL;
|
319
|
+
}
|
320
|
+
}
|
321
|
+
return buf;
|
322
|
+
}
|
323
|
+
|
324
|
+
static const char *parse_request(const char *buf, const char *buf_end, const char **method, size_t *method_len, const char **path,
|
325
|
+
size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers,
|
326
|
+
size_t max_headers, int *ret)
|
327
|
+
{
|
328
|
+
/* skip first empty line (some clients add CRLF after POST content) */
|
243
329
|
CHECK_EOF();
|
244
330
|
if (*buf == '\015') {
|
245
|
-
|
246
|
-
|
247
|
-
break;
|
331
|
+
++buf;
|
332
|
+
EXPECT_CHAR('\012');
|
248
333
|
} else if (*buf == '\012') {
|
249
|
-
|
250
|
-
break;
|
251
|
-
}
|
252
|
-
if (*num_headers == max_headers) {
|
253
|
-
*ret = -1;
|
254
|
-
return NULL;
|
334
|
+
++buf;
|
255
335
|
}
|
256
|
-
|
257
|
-
|
258
|
-
|
336
|
+
|
337
|
+
/* parse request line */
|
338
|
+
ADVANCE_TOKEN(*method, *method_len);
|
339
|
+
++buf;
|
340
|
+
ADVANCE_TOKEN(*path, *path_len);
|
341
|
+
++buf;
|
342
|
+
if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
|
259
343
|
return NULL;
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
int found;
|
266
|
-
buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
|
267
|
-
if (! found) {
|
268
|
-
CHECK_EOF();
|
269
|
-
}
|
270
|
-
while (1) {
|
271
|
-
if (*buf == ':') {
|
272
|
-
break;
|
273
|
-
} else if (*buf < ' ') {
|
274
|
-
*ret = -1;
|
275
|
-
return NULL;
|
276
|
-
}
|
344
|
+
}
|
345
|
+
if (*buf == '\015') {
|
346
|
+
++buf;
|
347
|
+
EXPECT_CHAR('\012');
|
348
|
+
} else if (*buf == '\012') {
|
277
349
|
++buf;
|
278
|
-
CHECK_EOF();
|
279
|
-
}
|
280
|
-
headers[*num_headers].name_len = buf - headers[*num_headers].name;
|
281
|
-
++buf;
|
282
|
-
for (; ; ++buf) {
|
283
|
-
CHECK_EOF();
|
284
|
-
if (! (*buf == ' ' || *buf == '\t')) {
|
285
|
-
break;
|
286
|
-
}
|
287
|
-
}
|
288
350
|
} else {
|
289
|
-
|
290
|
-
|
351
|
+
*ret = -1;
|
352
|
+
return NULL;
|
353
|
+
}
|
354
|
+
|
355
|
+
return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
|
356
|
+
}
|
357
|
+
|
358
|
+
int phr_parse_request(const char *buf_start, size_t len, const char **method, size_t *method_len, const char **path,
|
359
|
+
size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len)
|
360
|
+
{
|
361
|
+
const char *buf = buf_start, *buf_end = buf_start + len;
|
362
|
+
size_t max_headers = *num_headers;
|
363
|
+
int r;
|
364
|
+
|
365
|
+
*method = NULL;
|
366
|
+
*method_len = 0;
|
367
|
+
*path = NULL;
|
368
|
+
*path_len = 0;
|
369
|
+
*minor_version = -1;
|
370
|
+
*num_headers = 0;
|
371
|
+
|
372
|
+
/* if last_len != 0, check if the request is complete (a fast countermeasure
|
373
|
+
againt slowloris */
|
374
|
+
if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
|
375
|
+
return r;
|
291
376
|
}
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
377
|
+
|
378
|
+
if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len, minor_version, headers, num_headers, max_headers,
|
379
|
+
&r)) == NULL) {
|
380
|
+
return r;
|
296
381
|
}
|
297
|
-
|
298
|
-
|
382
|
+
|
383
|
+
return (int)(buf - buf_start);
|
299
384
|
}
|
300
385
|
|
301
|
-
const char*
|
302
|
-
|
303
|
-
const char** path, size_t* path_len,
|
304
|
-
int* minor_version, struct phr_header* headers,
|
305
|
-
size_t* num_headers, size_t max_headers, int* ret)
|
386
|
+
static const char *parse_response(const char *buf, const char *buf_end, int *minor_version, int *status, const char **msg,
|
387
|
+
size_t *msg_len, struct phr_header *headers, size_t *num_headers, size_t max_headers, int *ret)
|
306
388
|
{
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
return NULL;
|
332
|
-
}
|
333
|
-
|
334
|
-
return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
|
389
|
+
/* parse "HTTP/1.x" */
|
390
|
+
if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
|
391
|
+
return NULL;
|
392
|
+
}
|
393
|
+
/* skip space */
|
394
|
+
if (*buf++ != ' ') {
|
395
|
+
*ret = -1;
|
396
|
+
return NULL;
|
397
|
+
}
|
398
|
+
/* parse status code */
|
399
|
+
if ((buf = parse_int(buf, buf_end, status, ret)) == NULL) {
|
400
|
+
return NULL;
|
401
|
+
}
|
402
|
+
/* skip space */
|
403
|
+
if (*buf++ != ' ') {
|
404
|
+
*ret = -1;
|
405
|
+
return NULL;
|
406
|
+
}
|
407
|
+
/* get message */
|
408
|
+
if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {
|
409
|
+
return NULL;
|
410
|
+
}
|
411
|
+
|
412
|
+
return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
|
335
413
|
}
|
336
414
|
|
337
|
-
int
|
338
|
-
|
339
|
-
int* minor_version, struct phr_header* headers,
|
340
|
-
size_t* num_headers, size_t last_len)
|
415
|
+
int phr_parse_response(const char *buf_start, size_t len, int *minor_version, int *status, const char **msg, size_t *msg_len,
|
416
|
+
struct phr_header *headers, size_t *num_headers, size_t last_len)
|
341
417
|
{
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
return r;
|
364
|
-
}
|
365
|
-
|
366
|
-
return (int)(buf - buf_start);
|
418
|
+
const char *buf = buf_start, *buf_end = buf + len;
|
419
|
+
size_t max_headers = *num_headers;
|
420
|
+
int r;
|
421
|
+
|
422
|
+
*minor_version = -1;
|
423
|
+
*status = 0;
|
424
|
+
*msg = NULL;
|
425
|
+
*msg_len = 0;
|
426
|
+
*num_headers = 0;
|
427
|
+
|
428
|
+
/* if last_len != 0, check if the response is complete (a fast countermeasure
|
429
|
+
against slowloris */
|
430
|
+
if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
|
431
|
+
return r;
|
432
|
+
}
|
433
|
+
|
434
|
+
if ((buf = parse_response(buf, buf_end, minor_version, status, msg, msg_len, headers, num_headers, max_headers, &r)) == NULL) {
|
435
|
+
return r;
|
436
|
+
}
|
437
|
+
|
438
|
+
return (int)(buf - buf_start);
|
367
439
|
}
|
368
440
|
|
369
|
-
|
370
|
-
int* minor_version, int* status,
|
371
|
-
const char** msg, size_t* msg_len,
|
372
|
-
struct phr_header* headers,
|
373
|
-
size_t* num_headers, size_t max_headers,
|
374
|
-
int* ret)
|
441
|
+
int phr_parse_headers(const char *buf_start, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len)
|
375
442
|
{
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
return
|
393
|
-
}
|
394
|
-
/* get message */
|
395
|
-
if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {
|
396
|
-
return NULL;
|
397
|
-
}
|
398
|
-
|
399
|
-
return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
|
443
|
+
const char *buf = buf_start, *buf_end = buf + len;
|
444
|
+
size_t max_headers = *num_headers;
|
445
|
+
int r;
|
446
|
+
|
447
|
+
*num_headers = 0;
|
448
|
+
|
449
|
+
/* if last_len != 0, check if the response is complete (a fast countermeasure
|
450
|
+
against slowloris */
|
451
|
+
if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
|
452
|
+
return r;
|
453
|
+
}
|
454
|
+
|
455
|
+
if ((buf = parse_headers(buf, buf_end, headers, num_headers, max_headers, &r)) == NULL) {
|
456
|
+
return r;
|
457
|
+
}
|
458
|
+
|
459
|
+
return (int)(buf - buf_start);
|
400
460
|
}
|
401
461
|
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
462
|
+
enum {
|
463
|
+
CHUNKED_IN_CHUNK_SIZE,
|
464
|
+
CHUNKED_IN_CHUNK_EXT,
|
465
|
+
CHUNKED_IN_CHUNK_DATA,
|
466
|
+
CHUNKED_IN_CHUNK_CRLF,
|
467
|
+
CHUNKED_IN_TRAILERS_LINE_HEAD,
|
468
|
+
CHUNKED_IN_TRAILERS_LINE_MIDDLE
|
469
|
+
};
|
470
|
+
|
471
|
+
static int decode_hex(int ch)
|
406
472
|
{
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
473
|
+
if ('0' <= ch && ch <= '9') {
|
474
|
+
return ch - '0';
|
475
|
+
} else if ('A' <= ch && ch <= 'F') {
|
476
|
+
return ch - 'A' + 0xa;
|
477
|
+
} else if ('a' <= ch && ch <= 'f') {
|
478
|
+
return ch - 'a' + 0xa;
|
479
|
+
} else {
|
480
|
+
return -1;
|
481
|
+
}
|
482
|
+
}
|
483
|
+
|
484
|
+
ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *_bufsz)
|
485
|
+
{
|
486
|
+
size_t dst = 0, src = 0, bufsz = *_bufsz;
|
487
|
+
ssize_t ret = -2; /* incomplete */
|
488
|
+
|
489
|
+
while (1) {
|
490
|
+
switch (decoder->_state) {
|
491
|
+
case CHUNKED_IN_CHUNK_SIZE:
|
492
|
+
for (;; ++src) {
|
493
|
+
int v;
|
494
|
+
if (src == bufsz)
|
495
|
+
goto Exit;
|
496
|
+
if ((v = decode_hex(buf[src])) == -1) {
|
497
|
+
if (decoder->_hex_count == 0) {
|
498
|
+
ret = -1;
|
499
|
+
goto Exit;
|
500
|
+
}
|
501
|
+
break;
|
502
|
+
}
|
503
|
+
if (decoder->_hex_count == sizeof(size_t) * 2) {
|
504
|
+
ret = -1;
|
505
|
+
goto Exit;
|
506
|
+
}
|
507
|
+
decoder->bytes_left_in_chunk = decoder->bytes_left_in_chunk * 16 + v;
|
508
|
+
++decoder->_hex_count;
|
509
|
+
}
|
510
|
+
decoder->_hex_count = 0;
|
511
|
+
decoder->_state = CHUNKED_IN_CHUNK_EXT;
|
512
|
+
/* fallthru */
|
513
|
+
case CHUNKED_IN_CHUNK_EXT:
|
514
|
+
/* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */
|
515
|
+
for (;; ++src) {
|
516
|
+
if (src == bufsz)
|
517
|
+
goto Exit;
|
518
|
+
if (buf[src] == '\012')
|
519
|
+
break;
|
520
|
+
}
|
521
|
+
++src;
|
522
|
+
if (decoder->bytes_left_in_chunk == 0) {
|
523
|
+
if (decoder->consume_trailer) {
|
524
|
+
decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
|
525
|
+
break;
|
526
|
+
} else {
|
527
|
+
goto Complete;
|
528
|
+
}
|
529
|
+
}
|
530
|
+
decoder->_state = CHUNKED_IN_CHUNK_DATA;
|
531
|
+
/* fallthru */
|
532
|
+
case CHUNKED_IN_CHUNK_DATA: {
|
533
|
+
size_t avail = bufsz - src;
|
534
|
+
if (avail < decoder->bytes_left_in_chunk) {
|
535
|
+
if (dst != src)
|
536
|
+
memmove(buf + dst, buf + src, avail);
|
537
|
+
src += avail;
|
538
|
+
dst += avail;
|
539
|
+
decoder->bytes_left_in_chunk -= avail;
|
540
|
+
goto Exit;
|
541
|
+
}
|
542
|
+
if (dst != src)
|
543
|
+
memmove(buf + dst, buf + src, decoder->bytes_left_in_chunk);
|
544
|
+
src += decoder->bytes_left_in_chunk;
|
545
|
+
dst += decoder->bytes_left_in_chunk;
|
546
|
+
decoder->bytes_left_in_chunk = 0;
|
547
|
+
decoder->_state = CHUNKED_IN_CHUNK_CRLF;
|
548
|
+
}
|
549
|
+
/* fallthru */
|
550
|
+
case CHUNKED_IN_CHUNK_CRLF:
|
551
|
+
for (;; ++src) {
|
552
|
+
if (src == bufsz)
|
553
|
+
goto Exit;
|
554
|
+
if (buf[src] != '\015')
|
555
|
+
break;
|
556
|
+
}
|
557
|
+
if (buf[src] != '\012') {
|
558
|
+
ret = -1;
|
559
|
+
goto Exit;
|
560
|
+
}
|
561
|
+
++src;
|
562
|
+
decoder->_state = CHUNKED_IN_CHUNK_SIZE;
|
563
|
+
break;
|
564
|
+
case CHUNKED_IN_TRAILERS_LINE_HEAD:
|
565
|
+
for (;; ++src) {
|
566
|
+
if (src == bufsz)
|
567
|
+
goto Exit;
|
568
|
+
if (buf[src] != '\015')
|
569
|
+
break;
|
570
|
+
}
|
571
|
+
if (buf[src++] == '\012')
|
572
|
+
goto Complete;
|
573
|
+
decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE;
|
574
|
+
/* fallthru */
|
575
|
+
case CHUNKED_IN_TRAILERS_LINE_MIDDLE:
|
576
|
+
for (;; ++src) {
|
577
|
+
if (src == bufsz)
|
578
|
+
goto Exit;
|
579
|
+
if (buf[src] == '\012')
|
580
|
+
break;
|
581
|
+
}
|
582
|
+
++src;
|
583
|
+
decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
|
584
|
+
break;
|
585
|
+
default:
|
586
|
+
assert(!"decoder is corrupt");
|
587
|
+
}
|
588
|
+
}
|
589
|
+
|
590
|
+
Complete:
|
591
|
+
ret = bufsz - src;
|
592
|
+
Exit:
|
593
|
+
if (dst != src)
|
594
|
+
memmove(buf + dst, buf + src, bufsz - src);
|
595
|
+
*_bufsz = dst;
|
596
|
+
return ret;
|
430
597
|
}
|
431
598
|
|
432
599
|
#undef CHECK_EOF
|