pico_http_parser 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/benchmark/benchmark.rb +13 -5
- data/ext/pico_http_parser/pico_http_parser.c +20 -7
- data/ext/pico_http_parser/picohttpparser/.clang-format +6 -0
- data/ext/pico_http_parser/picohttpparser/Jamfile +7 -0
- data/ext/pico_http_parser/picohttpparser/README.md +92 -0
- data/ext/pico_http_parser/picohttpparser/bench.c +32 -19
- data/ext/pico_http_parser/picohttpparser/picohttpparser.c +509 -342
- data/ext/pico_http_parser/picohttpparser/picohttpparser.h +37 -14
- data/ext/pico_http_parser/picohttpparser/test.c +366 -204
- data/lib/pico_http_parser/version.rb +1 -1
- data/spec/01_simple_spec.rb +0 -3
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 984780783c1c9bfce3fdcc058187bfe3fd553ceb
|
4
|
+
data.tar.gz: 2ad841f14ab2c12a79211634ae6758e22009e0f8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c3dca6150fc78bd0946f4787b7f91dad54a7942ff7bc0f301a7580e277851fed9256008f740d930c4e757055230066af2c0d5ea72c3283f4d99db2f09a8ef1c4
|
7
|
+
data.tar.gz: 623cdcfc79c5c5cec7293e72996a92826a64b1e3d6fa05b1449631067b8cd99c2e80685b6c648e898042aa9813af5c7cb117ef80134c09ba32f28f942ebbefae
|
data/benchmark/benchmark.rb
CHANGED
@@ -2,13 +2,19 @@ require File.expand_path(File.dirname(__FILE__) + '/bench_helper')
|
|
2
2
|
|
3
3
|
require 'pico_http_parser'
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
|
5
|
+
request_bodys = [
|
6
|
+
"GET /foo/bar/baz.html?key=value HTTP/1.0\r\nHost: blooperblorp\r\n\r\n",
|
7
|
+
"GET /foo/bar/baz.html?key=value HTTP/1.0\r\nHost: blooperblorp\r\nUser-Agent: Mozilla/5.0\r\n\r\n",
|
8
|
+
"GET /foo/bar/baz.html?key=value HTTP/1.0\r\nHost: blooperblorp\r\nCookie: foobar\r\nX-Forwarded-For: 127.0.0.1\r\nUser-Agent: Mozilla/5.0\r\n\r\n",
|
9
|
+
"GET /foo/bar/baz.html?key=value HTTP/1.0\r\nHost: blooperblorp\r\nCookie: foobar\r\nX-Forwarded-For: 127.0.0.1\r\nUser-Agent: Mozilla/5.0\r\nAccept: X-5\r\nConnection: XXXXXX-6\r\nReferer: XXXXXXXX-7\r\nAccept-Encoding: XXXXXXX8\r\nCache-Control: XXXXXXXX9\r\nIf-Modified-Since: XXXXXXXXXXXXXXX10\r\n\r\n",
|
10
|
+
"GET /foo/bar/baz.html?key=value HTTP/1.0\r\n\r\n"
|
11
|
+
];
|
8
12
|
|
13
|
+
request_bodys.each do |request_body|
|
14
|
+
puts("benchmark #{request_body}");
|
9
15
|
Benchmark.ips do |x|
|
10
16
|
x.time = 5
|
11
|
-
x.warmup =
|
17
|
+
x.warmup = 1
|
12
18
|
|
13
19
|
x.report("PicoHTTPParser") {
|
14
20
|
env = {}
|
@@ -21,7 +27,7 @@ Benchmark.ips do |x|
|
|
21
27
|
x.report("Unicorn's HttpParser") {
|
22
28
|
parser = HttpParser.new
|
23
29
|
parser.buf << request_body
|
24
|
-
parser.parse
|
30
|
+
env = parser.parse
|
25
31
|
}
|
26
32
|
rescue LoadError
|
27
33
|
puts("Can't benchmark unicorn as it couldn't be loaded.")
|
@@ -29,3 +35,5 @@ Benchmark.ips do |x|
|
|
29
35
|
|
30
36
|
x.compare!
|
31
37
|
end
|
38
|
+
end
|
39
|
+
|
@@ -14,6 +14,11 @@ static VALUE script_name_key;
|
|
14
14
|
static VALUE server_protocol_key;
|
15
15
|
static VALUE query_string_key;
|
16
16
|
|
17
|
+
static VALUE vacant_string_val;
|
18
|
+
|
19
|
+
static VALUE http10_val;
|
20
|
+
static VALUE http11_val;
|
21
|
+
|
17
22
|
struct common_header {
|
18
23
|
const char * name;
|
19
24
|
size_t name_len;
|
@@ -147,8 +152,8 @@ VALUE phr_parse_http_request(VALUE self, VALUE buf, VALUE envref)
|
|
147
152
|
char tmp[MAX_HEADER_NAME_LEN + sizeof("HTTP_") - 1];
|
148
153
|
VALUE last_value;
|
149
154
|
|
150
|
-
buf_str =
|
151
|
-
buf_len =
|
155
|
+
buf_str = RSTRING_PTR(buf);
|
156
|
+
buf_len = RSTRING_LEN(buf);
|
152
157
|
num_headers = MAX_HEADERS;
|
153
158
|
ret = phr_parse_request(buf_str, buf_len, &method, &method_len, &path,
|
154
159
|
&path_len, &minor_version, headers, &num_headers, 0);
|
@@ -157,10 +162,8 @@ VALUE phr_parse_http_request(VALUE self, VALUE buf, VALUE envref)
|
|
157
162
|
|
158
163
|
rb_hash_aset(envref, request_method_key, rb_str_new(method,method_len));
|
159
164
|
rb_hash_aset(envref, request_uri_key, rb_str_new(path, path_len));
|
160
|
-
rb_hash_aset(envref, script_name_key,
|
161
|
-
|
162
|
-
tmp[7] = 48 + ((minor_version > 1 || minor_version < 0 ) ? 0 : minor_version);
|
163
|
-
rb_hash_aset(envref, server_protocol_key, rb_str_new(tmp, sizeof("HTTP/1.0") - 1));
|
165
|
+
rb_hash_aset(envref, script_name_key, vacant_string_val);
|
166
|
+
rb_hash_aset(envref, server_protocol_key, (minor_version == 1) ? http11_val : http10_val);
|
164
167
|
|
165
168
|
/* PATH_INFO QUERY_STRING */
|
166
169
|
path_len = find_ch(path, path_len, '#'); /* strip off all text after # after storing request_uri */
|
@@ -172,7 +175,7 @@ VALUE phr_parse_http_request(VALUE self, VALUE buf, VALUE envref)
|
|
172
175
|
}
|
173
176
|
if (question_at != path_len) ++question_at;
|
174
177
|
rb_hash_aset(envref, query_string_key, rb_str_new(path + question_at, path_len - question_at));
|
175
|
-
|
178
|
+
|
176
179
|
last_value = Qnil;
|
177
180
|
for (i = 0; i < num_headers; ++i) {
|
178
181
|
if (headers[i].name != NULL) {
|
@@ -200,6 +203,7 @@ VALUE phr_parse_http_request(VALUE self, VALUE buf, VALUE envref)
|
|
200
203
|
env_key = rb_str_new(name, name_len);
|
201
204
|
}
|
202
205
|
}
|
206
|
+
|
203
207
|
slot = rb_hash_aref(envref, env_key);
|
204
208
|
if ( slot != Qnil ) {
|
205
209
|
rb_str_cat2(slot, ", ");
|
@@ -209,11 +213,13 @@ VALUE phr_parse_http_request(VALUE self, VALUE buf, VALUE envref)
|
|
209
213
|
rb_hash_aset(envref, env_key, slot);
|
210
214
|
last_value = slot;
|
211
215
|
}
|
216
|
+
|
212
217
|
} else {
|
213
218
|
/* continuing lines of a mulitiline header */
|
214
219
|
if ( last_value != Qnil )
|
215
220
|
rb_str_cat(last_value, headers[i].value, headers[i].value_len);
|
216
221
|
}
|
222
|
+
|
217
223
|
}
|
218
224
|
|
219
225
|
done:
|
@@ -247,6 +253,13 @@ void Init_pico_http_parser()
|
|
247
253
|
set_common_header("USER-AGENT",sizeof("USER-AGENT") - 1, 0);
|
248
254
|
set_common_header("X-FORWARDED-FOR",sizeof("X-FORWARDED-FOR") - 1, 0);
|
249
255
|
|
256
|
+
http10_val = rb_obj_freeze(rb_str_new2("HTTP/1.0"));
|
257
|
+
rb_gc_register_address(&http10_val);
|
258
|
+
http11_val = rb_obj_freeze(rb_str_new2("HTTP/1.1"));
|
259
|
+
rb_gc_register_address(&http11_val);
|
260
|
+
vacant_string_val = rb_obj_freeze(rb_str_new("",0));
|
261
|
+
rb_gc_register_address(&vacant_string_val);
|
262
|
+
|
250
263
|
cPicoHTTPParser = rb_const_get(rb_cObject, rb_intern("PicoHTTPParser"));
|
251
264
|
rb_define_module_function(cPicoHTTPParser, "parse_http_request", phr_parse_http_request, 2);
|
252
265
|
}
|
@@ -14,6 +14,98 @@ Check out [test.c] to find out how to use the parser.
|
|
14
14
|
|
15
15
|
The software is dual-licensed under the Perl License or the MIT License.
|
16
16
|
|
17
|
+
Usage
|
18
|
+
-----
|
19
|
+
|
20
|
+
The library exposes four functions: `phr_parse_request`, `phr_parse_response`, `phr_parse_headers`, `phr_decode_chunked`.
|
21
|
+
|
22
|
+
### phr_parse_request
|
23
|
+
|
24
|
+
The example below reads an HTTP request from socket `sock` using `read(2)`, parses it using `phr_parse_request`, and prints the details.
|
25
|
+
|
26
|
+
```
|
27
|
+
char buf[4096], *method, *path;
|
28
|
+
int pret, minor_version;
|
29
|
+
struct phr_header headers[100];
|
30
|
+
size_t buflen = 0, prevbuflen = 0, method_len, path_len, num_headers;
|
31
|
+
ssize_t rret;
|
32
|
+
|
33
|
+
while (1) {
|
34
|
+
/* read the request */
|
35
|
+
while ((rret = read(sock, buf + buflen, sizeof(buf) - buflen)) == -1 && errno == EINTR)
|
36
|
+
;
|
37
|
+
if (rret <= 0)
|
38
|
+
return IOError;
|
39
|
+
prevbuflen = buflen;
|
40
|
+
buflen += rret;
|
41
|
+
/* parse the request */
|
42
|
+
num_headers = sizeof(headers) / sizeof(headers[0]);
|
43
|
+
pret = phr_parse_request(buf, buflen, &method, &method_len, &path, &path_len,
|
44
|
+
&minor_version, headers, &num_headers, prevbuflen);
|
45
|
+
if (pret > 0)
|
46
|
+
break; /* successfully parsed the request */
|
47
|
+
else if (pret == -1)
|
48
|
+
return ParseError;
|
49
|
+
/* request is incomplete, continue the loop */
|
50
|
+
assert(pret == -2);
|
51
|
+
if (buflen == sizeof(buf))
|
52
|
+
return RequestIsTooLongError;
|
53
|
+
}
|
54
|
+
|
55
|
+
printf("request is %d bytes long\n", pret);
|
56
|
+
printf("method is %.*s\n", (int)method_len, method);
|
57
|
+
printf("path is %.*s\n", (int)path_len, path);
|
58
|
+
printf("HTTP version is 1.%d\n", minor_version);
|
59
|
+
printf("headers:\n");
|
60
|
+
for (i = 0; i != num_headers; ++i) {
|
61
|
+
printf("%.*s: %.*s\n", (int)headers[i].name_len, headers[i].name,
|
62
|
+
(int)headers[i].value_len, headers[i].value);
|
63
|
+
}
|
64
|
+
```
|
65
|
+
|
66
|
+
### phr_parse_response, phr_parse_headers
|
67
|
+
|
68
|
+
`phr_parse_response` and `phr_parse_headers` provide similar interfaces as `phr_parse_request`. `phr_parse_response` parses an HTTP response, and `phr_parse_headers` parses the headers only.
|
69
|
+
|
70
|
+
### phr_decode_chunked
|
71
|
+
|
72
|
+
The example below decodes incoming data in chunked-encoding. The data is decoded in-place.
|
73
|
+
|
74
|
+
```
|
75
|
+
struct phr_chunked_decoder decoder = {}; /* zero-clear */
|
76
|
+
char *buf = malloc(4096);
|
77
|
+
size_t size = 0, capacity = 4096, rsize;
|
78
|
+
ssize_t rret, pret;
|
79
|
+
|
80
|
+
/* set consume_trailer to 1 to discard the trailing header, or the application
|
81
|
+
* should call phr_parse_headers to parse the trailing header */
|
82
|
+
decoder.consume_trailer = 1;
|
83
|
+
|
84
|
+
do {
|
85
|
+
/* expand the buffer if necessary */
|
86
|
+
if (size == capacity) {
|
87
|
+
capacity *= 2;
|
88
|
+
buf = realloc(buf, capacity);
|
89
|
+
assert(buf != NULL);
|
90
|
+
}
|
91
|
+
/* read */
|
92
|
+
while ((rret = read(sock, buf + size, capacity - size)) == -1 && errno == EINTR)
|
93
|
+
;
|
94
|
+
if (rret <= 0)
|
95
|
+
return IOError;
|
96
|
+
/* decode */
|
97
|
+
rsize = rret;
|
98
|
+
pret = phr_decode_chunked(&decoder, buf + size, &rsize);
|
99
|
+
if (pret == -1)
|
100
|
+
return ParseError;
|
101
|
+
size += rsize;
|
102
|
+
} while (pret == -2);
|
103
|
+
|
104
|
+
/* successfully decoded the chunked data */
|
105
|
+
assert(pret >= 0);
|
106
|
+
printf("decoded data is at %p (%zu bytes)\n", buf, size);
|
107
|
+
```
|
108
|
+
|
17
109
|
Benchmark
|
18
110
|
---------
|
19
111
|
|
@@ -28,26 +28,39 @@
|
|
28
28
|
#include <stdio.h>
|
29
29
|
#include "picohttpparser.h"
|
30
30
|
|
31
|
-
#define REQ
|
31
|
+
#define REQ \
|
32
|
+
"GET /wp-content/uploads/2010/03/hello-kitty-darth-vader-pink.jpg HTTP/1.1\r\n" \
|
33
|
+
"Host: www.kittyhell.com\r\n" \
|
34
|
+
"User-Agent: Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; ja-JP-mac; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3 " \
|
35
|
+
"Pathtraq/0.9\r\n" \
|
36
|
+
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n" \
|
37
|
+
"Accept-Language: ja,en-us;q=0.7,en;q=0.3\r\n" \
|
38
|
+
"Accept-Encoding: gzip,deflate\r\n" \
|
39
|
+
"Accept-Charset: Shift_JIS,utf-8;q=0.7,*;q=0.7\r\n" \
|
40
|
+
"Keep-Alive: 115\r\n" \
|
41
|
+
"Connection: keep-alive\r\n" \
|
42
|
+
"Cookie: wp_ozh_wsa_visits=2; wp_ozh_wsa_visit_lasttime=xxxxxxxxxx; " \
|
43
|
+
"__utma=xxxxxxxxx.xxxxxxxxxx.xxxxxxxxxx.xxxxxxxxxx.xxxxxxxxxx.x; " \
|
44
|
+
"__utmz=xxxxxxxxx.xxxxxxxxxx.x.x.utmccn=(referral)|utmcsr=reader.livedoor.com|utmcct=/reader/|utmcmd=referral\r\n" \
|
45
|
+
"\r\n"
|
32
46
|
|
33
47
|
int main(void)
|
34
48
|
{
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
return 0;
|
49
|
+
const char *method;
|
50
|
+
size_t method_len;
|
51
|
+
const char *path;
|
52
|
+
size_t path_len;
|
53
|
+
int minor_version;
|
54
|
+
struct phr_header headers[32];
|
55
|
+
size_t num_headers;
|
56
|
+
int i, ret;
|
57
|
+
|
58
|
+
for (i = 0; i < 10000000; i++) {
|
59
|
+
num_headers = sizeof(headers) / sizeof(headers[0]);
|
60
|
+
ret = phr_parse_request(REQ, sizeof(REQ) - 1, &method, &method_len, &path, &path_len, &minor_version, headers, &num_headers,
|
61
|
+
0);
|
62
|
+
assert(ret == sizeof(REQ) - 1);
|
63
|
+
}
|
64
|
+
|
65
|
+
return 0;
|
53
66
|
}
|
@@ -24,409 +24,576 @@
|
|
24
24
|
* IN THE SOFTWARE.
|
25
25
|
*/
|
26
26
|
|
27
|
+
#include <assert.h>
|
27
28
|
#include <stddef.h>
|
29
|
+
#include <string.h>
|
28
30
|
#ifdef __SSE4_2__
|
29
|
-
#
|
31
|
+
#ifdef _MSC_VER
|
32
|
+
#include <nmmintrin.h>
|
33
|
+
#else
|
34
|
+
#include <x86intrin.h>
|
35
|
+
#endif
|
30
36
|
#endif
|
31
37
|
#include "picohttpparser.h"
|
32
38
|
|
33
39
|
/* $Id$ */
|
34
40
|
|
35
41
|
#if __GNUC__ >= 3
|
36
|
-
#
|
37
|
-
#
|
42
|
+
#define likely(x) __builtin_expect(!!(x), 1)
|
43
|
+
#define unlikely(x) __builtin_expect(!!(x), 0)
|
44
|
+
#else
|
45
|
+
#define likely(x) (x)
|
46
|
+
#define unlikely(x) (x)
|
47
|
+
#endif
|
48
|
+
|
49
|
+
#ifdef _MSC_VER
|
50
|
+
#define ALIGNED(n) _declspec(align(n))
|
38
51
|
#else
|
39
|
-
#
|
40
|
-
# define unlikely(x) (x)
|
52
|
+
#define ALIGNED(n) __attribute__((aligned(n)))
|
41
53
|
#endif
|
42
54
|
|
43
|
-
#define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)
|
44
|
-
|
45
|
-
#define CHECK_EOF()
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
#define EXPECT_CHAR(ch)
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
#define ADVANCE_TOKEN(tok, toklen)
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
static const char*
|
55
|
+
#define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u)
|
56
|
+
|
57
|
+
#define CHECK_EOF() \
|
58
|
+
if (buf == buf_end) { \
|
59
|
+
*ret = -2; \
|
60
|
+
return NULL; \
|
61
|
+
}
|
62
|
+
|
63
|
+
#define EXPECT_CHAR(ch) \
|
64
|
+
CHECK_EOF(); \
|
65
|
+
if (*buf++ != ch) { \
|
66
|
+
*ret = -1; \
|
67
|
+
return NULL; \
|
68
|
+
}
|
69
|
+
|
70
|
+
#define ADVANCE_TOKEN(tok, toklen) \
|
71
|
+
do { \
|
72
|
+
const char *tok_start = buf; \
|
73
|
+
static const char ALIGNED(16) ranges2[] = "\000\040\177\177"; \
|
74
|
+
int found2; \
|
75
|
+
buf = findchar_fast(buf, buf_end, ranges2, sizeof(ranges2) - 1, &found2); \
|
76
|
+
if (!found2) { \
|
77
|
+
CHECK_EOF(); \
|
78
|
+
} \
|
79
|
+
while (1) { \
|
80
|
+
if (*buf == ' ') { \
|
81
|
+
break; \
|
82
|
+
} else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { \
|
83
|
+
if ((unsigned char)*buf < '\040' || *buf == '\177') { \
|
84
|
+
*ret = -1; \
|
85
|
+
return NULL; \
|
86
|
+
} \
|
87
|
+
} \
|
88
|
+
++buf; \
|
89
|
+
CHECK_EOF(); \
|
90
|
+
} \
|
91
|
+
tok = tok_start; \
|
92
|
+
toklen = buf - tok_start; \
|
93
|
+
} while (0)
|
94
|
+
|
95
|
+
static const char *token_char_map = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
|
96
|
+
"\0\1\1\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
|
97
|
+
"\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
|
98
|
+
"\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
|
99
|
+
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
|
100
|
+
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
|
101
|
+
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
|
102
|
+
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
|
103
|
+
|
104
|
+
static const char *findchar_fast(const char *buf, const char *buf_end, const char *ranges, size_t ranges_size, int *found)
|
93
105
|
{
|
94
|
-
|
106
|
+
*found = 0;
|
95
107
|
#if __SSE4_2__
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
108
|
+
if (likely(buf_end - buf >= 16)) {
|
109
|
+
__m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges);
|
110
|
+
|
111
|
+
size_t left = (buf_end - buf) & ~15;
|
112
|
+
do {
|
113
|
+
__m128i b16 = _mm_loadu_si128((void *)buf);
|
114
|
+
int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
|
115
|
+
if (unlikely(r != 16)) {
|
116
|
+
buf += r;
|
117
|
+
*found = 1;
|
118
|
+
break;
|
119
|
+
}
|
120
|
+
buf += 16;
|
121
|
+
left -= 16;
|
122
|
+
} while (likely(left != 0));
|
123
|
+
}
|
124
|
+
#else
|
125
|
+
/* suppress unused parameter warning */
|
126
|
+
(void)buf_end;
|
127
|
+
(void)ranges;
|
128
|
+
(void)ranges_size;
|
112
129
|
#endif
|
113
|
-
|
130
|
+
return buf;
|
114
131
|
}
|
115
132
|
|
116
|
-
static const char*
|
117
|
-
const char** token, size_t* token_len,
|
118
|
-
int* ret)
|
133
|
+
static const char *get_token_to_eol(const char *buf, const char *buf_end, const char **token, size_t *token_len, int *ret)
|
119
134
|
{
|
120
|
-
|
121
|
-
|
135
|
+
const char *token_start = buf;
|
136
|
+
|
122
137
|
#ifdef __SSE4_2__
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
;
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
goto FOUND_CTL;
|
138
|
+
static const char ranges1[] = "\0\010"
|
139
|
+
/* allow HT */
|
140
|
+
"\012\037"
|
141
|
+
/* allow SP and up to but not including DEL */
|
142
|
+
"\177\177"
|
143
|
+
/* allow chars w. MSB set */
|
144
|
+
;
|
145
|
+
int found;
|
146
|
+
buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
|
147
|
+
if (found)
|
148
|
+
goto FOUND_CTL;
|
135
149
|
#else
|
136
|
-
|
137
|
-
|
138
|
-
#define DOIT()
|
139
|
-
|
140
|
-
|
150
|
+
/* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */
|
151
|
+
while (likely(buf_end - buf >= 8)) {
|
152
|
+
#define DOIT() \
|
153
|
+
do { \
|
154
|
+
if (unlikely(!IS_PRINTABLE_ASCII(*buf))) \
|
155
|
+
goto NonPrintable; \
|
156
|
+
++buf; \
|
157
|
+
} while (0)
|
158
|
+
DOIT();
|
159
|
+
DOIT();
|
160
|
+
DOIT();
|
161
|
+
DOIT();
|
162
|
+
DOIT();
|
163
|
+
DOIT();
|
164
|
+
DOIT();
|
165
|
+
DOIT();
|
141
166
|
#undef DOIT
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
167
|
+
continue;
|
168
|
+
NonPrintable:
|
169
|
+
if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
|
170
|
+
goto FOUND_CTL;
|
171
|
+
}
|
172
|
+
++buf;
|
146
173
|
}
|
147
|
-
++buf;
|
148
|
-
}
|
149
174
|
#endif
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
175
|
+
for (;; ++buf) {
|
176
|
+
CHECK_EOF();
|
177
|
+
if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {
|
178
|
+
if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
|
179
|
+
goto FOUND_CTL;
|
180
|
+
}
|
181
|
+
}
|
156
182
|
}
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
*token_len = buf - 2 - token_start;
|
163
|
-
} else if (*buf == '\012') {
|
164
|
-
*token_len = buf - token_start;
|
165
|
-
++buf;
|
166
|
-
} else {
|
167
|
-
*ret = -1;
|
168
|
-
return NULL;
|
169
|
-
}
|
170
|
-
*token = token_start;
|
171
|
-
|
172
|
-
return buf;
|
173
|
-
}
|
174
|
-
|
175
|
-
static const char* is_complete(const char* buf, const char* buf_end,
|
176
|
-
size_t last_len, int* ret)
|
177
|
-
{
|
178
|
-
int ret_cnt = 0;
|
179
|
-
buf = last_len < 3 ? buf : buf + last_len - 3;
|
180
|
-
|
181
|
-
while (1) {
|
182
|
-
CHECK_EOF();
|
183
|
-
if (*buf == '\015') {
|
184
|
-
++buf;
|
185
|
-
CHECK_EOF();
|
186
|
-
EXPECT_CHAR('\012');
|
187
|
-
++ret_cnt;
|
183
|
+
FOUND_CTL:
|
184
|
+
if (likely(*buf == '\015')) {
|
185
|
+
++buf;
|
186
|
+
EXPECT_CHAR('\012');
|
187
|
+
*token_len = buf - 2 - token_start;
|
188
188
|
} else if (*buf == '\012') {
|
189
|
-
|
190
|
-
|
189
|
+
*token_len = buf - token_start;
|
190
|
+
++buf;
|
191
191
|
} else {
|
192
|
-
|
193
|
-
|
192
|
+
*ret = -1;
|
193
|
+
return NULL;
|
194
194
|
}
|
195
|
-
|
196
|
-
|
195
|
+
*token = token_start;
|
196
|
+
|
197
|
+
return buf;
|
198
|
+
}
|
199
|
+
|
200
|
+
static const char *is_complete(const char *buf, const char *buf_end, size_t last_len, int *ret)
|
201
|
+
{
|
202
|
+
int ret_cnt = 0;
|
203
|
+
buf = last_len < 3 ? buf : buf + last_len - 3;
|
204
|
+
|
205
|
+
while (1) {
|
206
|
+
CHECK_EOF();
|
207
|
+
if (*buf == '\015') {
|
208
|
+
++buf;
|
209
|
+
CHECK_EOF();
|
210
|
+
EXPECT_CHAR('\012');
|
211
|
+
++ret_cnt;
|
212
|
+
} else if (*buf == '\012') {
|
213
|
+
++buf;
|
214
|
+
++ret_cnt;
|
215
|
+
} else {
|
216
|
+
++buf;
|
217
|
+
ret_cnt = 0;
|
218
|
+
}
|
219
|
+
if (ret_cnt == 2) {
|
220
|
+
return buf;
|
221
|
+
}
|
197
222
|
}
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
return NULL;
|
223
|
+
|
224
|
+
*ret = -2;
|
225
|
+
return NULL;
|
202
226
|
}
|
203
227
|
|
204
228
|
/* *_buf is always within [buf, buf_end) upon success */
|
205
|
-
static const char*
|
206
|
-
int* ret)
|
229
|
+
static const char *parse_int(const char *buf, const char *buf_end, int *value, int *ret)
|
207
230
|
{
|
208
|
-
|
209
|
-
CHECK_EOF();
|
210
|
-
if (! ('0' <= *buf && *buf <= '9')) {
|
211
|
-
*ret = -1;
|
212
|
-
return NULL;
|
213
|
-
}
|
214
|
-
v = 0;
|
215
|
-
for (; ; ++buf) {
|
231
|
+
int v;
|
216
232
|
CHECK_EOF();
|
217
|
-
if ('0' <= *buf && *buf <= '9') {
|
218
|
-
|
219
|
-
|
220
|
-
|
233
|
+
if (!('0' <= *buf && *buf <= '9')) {
|
234
|
+
*ret = -1;
|
235
|
+
return NULL;
|
236
|
+
}
|
237
|
+
v = 0;
|
238
|
+
for (;; ++buf) {
|
239
|
+
CHECK_EOF();
|
240
|
+
if ('0' <= *buf && *buf <= '9') {
|
241
|
+
v = v * 10 + *buf - '0';
|
242
|
+
} else {
|
243
|
+
break;
|
244
|
+
}
|
221
245
|
}
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
return buf;
|
246
|
+
|
247
|
+
*value = v;
|
248
|
+
return buf;
|
226
249
|
}
|
227
250
|
|
228
251
|
/* returned pointer is always within [buf, buf_end), or null */
|
229
|
-
static const char*
|
230
|
-
int* minor_version, int* ret)
|
252
|
+
static const char *parse_http_version(const char *buf, const char *buf_end, int *minor_version, int *ret)
|
231
253
|
{
|
232
|
-
|
233
|
-
|
234
|
-
|
254
|
+
EXPECT_CHAR('H');
|
255
|
+
EXPECT_CHAR('T');
|
256
|
+
EXPECT_CHAR('T');
|
257
|
+
EXPECT_CHAR('P');
|
258
|
+
EXPECT_CHAR('/');
|
259
|
+
EXPECT_CHAR('1');
|
260
|
+
EXPECT_CHAR('.');
|
261
|
+
return parse_int(buf, buf_end, minor_version, ret);
|
235
262
|
}
|
236
263
|
|
237
|
-
static const char*
|
238
|
-
|
239
|
-
size_t* num_headers, size_t max_headers,
|
240
|
-
int* ret)
|
264
|
+
static const char *parse_headers(const char *buf, const char *buf_end, struct phr_header *headers, size_t *num_headers,
|
265
|
+
size_t max_headers, int *ret)
|
241
266
|
{
|
242
|
-
|
267
|
+
for (;; ++*num_headers) {
|
268
|
+
CHECK_EOF();
|
269
|
+
if (*buf == '\015') {
|
270
|
+
++buf;
|
271
|
+
EXPECT_CHAR('\012');
|
272
|
+
break;
|
273
|
+
} else if (*buf == '\012') {
|
274
|
+
++buf;
|
275
|
+
break;
|
276
|
+
}
|
277
|
+
if (*num_headers == max_headers) {
|
278
|
+
*ret = -1;
|
279
|
+
return NULL;
|
280
|
+
}
|
281
|
+
if (!(*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) {
|
282
|
+
static const char ALIGNED(16) ranges1[] = "::\x00\037";
|
283
|
+
int found;
|
284
|
+
if (!token_char_map[(unsigned char)*buf]) {
|
285
|
+
*ret = -1;
|
286
|
+
return NULL;
|
287
|
+
}
|
288
|
+
/* parsing name, but do not discard SP before colon, see
|
289
|
+
* http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
|
290
|
+
headers[*num_headers].name = buf;
|
291
|
+
buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
|
292
|
+
if (!found) {
|
293
|
+
CHECK_EOF();
|
294
|
+
}
|
295
|
+
while (1) {
|
296
|
+
if (*buf == ':') {
|
297
|
+
break;
|
298
|
+
} else if (*buf < ' ') {
|
299
|
+
*ret = -1;
|
300
|
+
return NULL;
|
301
|
+
}
|
302
|
+
++buf;
|
303
|
+
CHECK_EOF();
|
304
|
+
}
|
305
|
+
headers[*num_headers].name_len = buf - headers[*num_headers].name;
|
306
|
+
++buf;
|
307
|
+
for (;; ++buf) {
|
308
|
+
CHECK_EOF();
|
309
|
+
if (!(*buf == ' ' || *buf == '\t')) {
|
310
|
+
break;
|
311
|
+
}
|
312
|
+
}
|
313
|
+
} else {
|
314
|
+
headers[*num_headers].name = NULL;
|
315
|
+
headers[*num_headers].name_len = 0;
|
316
|
+
}
|
317
|
+
if ((buf = get_token_to_eol(buf, buf_end, &headers[*num_headers].value, &headers[*num_headers].value_len, ret)) == NULL) {
|
318
|
+
return NULL;
|
319
|
+
}
|
320
|
+
}
|
321
|
+
return buf;
|
322
|
+
}
|
323
|
+
|
324
|
+
static const char *parse_request(const char *buf, const char *buf_end, const char **method, size_t *method_len, const char **path,
|
325
|
+
size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers,
|
326
|
+
size_t max_headers, int *ret)
|
327
|
+
{
|
328
|
+
/* skip first empty line (some clients add CRLF after POST content) */
|
243
329
|
CHECK_EOF();
|
244
330
|
if (*buf == '\015') {
|
245
|
-
|
246
|
-
|
247
|
-
break;
|
331
|
+
++buf;
|
332
|
+
EXPECT_CHAR('\012');
|
248
333
|
} else if (*buf == '\012') {
|
249
|
-
|
250
|
-
break;
|
251
|
-
}
|
252
|
-
if (*num_headers == max_headers) {
|
253
|
-
*ret = -1;
|
254
|
-
return NULL;
|
334
|
+
++buf;
|
255
335
|
}
|
256
|
-
|
257
|
-
|
258
|
-
|
336
|
+
|
337
|
+
/* parse request line */
|
338
|
+
ADVANCE_TOKEN(*method, *method_len);
|
339
|
+
++buf;
|
340
|
+
ADVANCE_TOKEN(*path, *path_len);
|
341
|
+
++buf;
|
342
|
+
if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
|
259
343
|
return NULL;
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
int found;
|
266
|
-
buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
|
267
|
-
if (! found) {
|
268
|
-
CHECK_EOF();
|
269
|
-
}
|
270
|
-
while (1) {
|
271
|
-
if (*buf == ':') {
|
272
|
-
break;
|
273
|
-
} else if (*buf < ' ') {
|
274
|
-
*ret = -1;
|
275
|
-
return NULL;
|
276
|
-
}
|
344
|
+
}
|
345
|
+
if (*buf == '\015') {
|
346
|
+
++buf;
|
347
|
+
EXPECT_CHAR('\012');
|
348
|
+
} else if (*buf == '\012') {
|
277
349
|
++buf;
|
278
|
-
CHECK_EOF();
|
279
|
-
}
|
280
|
-
headers[*num_headers].name_len = buf - headers[*num_headers].name;
|
281
|
-
++buf;
|
282
|
-
for (; ; ++buf) {
|
283
|
-
CHECK_EOF();
|
284
|
-
if (! (*buf == ' ' || *buf == '\t')) {
|
285
|
-
break;
|
286
|
-
}
|
287
|
-
}
|
288
350
|
} else {
|
289
|
-
|
290
|
-
|
351
|
+
*ret = -1;
|
352
|
+
return NULL;
|
353
|
+
}
|
354
|
+
|
355
|
+
return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
|
356
|
+
}
|
357
|
+
|
358
|
+
int phr_parse_request(const char *buf_start, size_t len, const char **method, size_t *method_len, const char **path,
|
359
|
+
size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len)
|
360
|
+
{
|
361
|
+
const char *buf = buf_start, *buf_end = buf_start + len;
|
362
|
+
size_t max_headers = *num_headers;
|
363
|
+
int r;
|
364
|
+
|
365
|
+
*method = NULL;
|
366
|
+
*method_len = 0;
|
367
|
+
*path = NULL;
|
368
|
+
*path_len = 0;
|
369
|
+
*minor_version = -1;
|
370
|
+
*num_headers = 0;
|
371
|
+
|
372
|
+
/* if last_len != 0, check if the request is complete (a fast countermeasure
|
373
|
+
againt slowloris */
|
374
|
+
if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
|
375
|
+
return r;
|
291
376
|
}
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
377
|
+
|
378
|
+
if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len, minor_version, headers, num_headers, max_headers,
|
379
|
+
&r)) == NULL) {
|
380
|
+
return r;
|
296
381
|
}
|
297
|
-
|
298
|
-
|
382
|
+
|
383
|
+
return (int)(buf - buf_start);
|
299
384
|
}
|
300
385
|
|
301
|
-
const char*
|
302
|
-
|
303
|
-
const char** path, size_t* path_len,
|
304
|
-
int* minor_version, struct phr_header* headers,
|
305
|
-
size_t* num_headers, size_t max_headers, int* ret)
|
386
|
+
static const char *parse_response(const char *buf, const char *buf_end, int *minor_version, int *status, const char **msg,
|
387
|
+
size_t *msg_len, struct phr_header *headers, size_t *num_headers, size_t max_headers, int *ret)
|
306
388
|
{
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
return NULL;
|
332
|
-
}
|
333
|
-
|
334
|
-
return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
|
389
|
+
/* parse "HTTP/1.x" */
|
390
|
+
if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
|
391
|
+
return NULL;
|
392
|
+
}
|
393
|
+
/* skip space */
|
394
|
+
if (*buf++ != ' ') {
|
395
|
+
*ret = -1;
|
396
|
+
return NULL;
|
397
|
+
}
|
398
|
+
/* parse status code */
|
399
|
+
if ((buf = parse_int(buf, buf_end, status, ret)) == NULL) {
|
400
|
+
return NULL;
|
401
|
+
}
|
402
|
+
/* skip space */
|
403
|
+
if (*buf++ != ' ') {
|
404
|
+
*ret = -1;
|
405
|
+
return NULL;
|
406
|
+
}
|
407
|
+
/* get message */
|
408
|
+
if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {
|
409
|
+
return NULL;
|
410
|
+
}
|
411
|
+
|
412
|
+
return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
|
335
413
|
}
|
336
414
|
|
337
|
-
int
|
338
|
-
|
339
|
-
int* minor_version, struct phr_header* headers,
|
340
|
-
size_t* num_headers, size_t last_len)
|
415
|
+
int phr_parse_response(const char *buf_start, size_t len, int *minor_version, int *status, const char **msg, size_t *msg_len,
|
416
|
+
struct phr_header *headers, size_t *num_headers, size_t last_len)
|
341
417
|
{
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
return r;
|
364
|
-
}
|
365
|
-
|
366
|
-
return (int)(buf - buf_start);
|
418
|
+
const char *buf = buf_start, *buf_end = buf + len;
|
419
|
+
size_t max_headers = *num_headers;
|
420
|
+
int r;
|
421
|
+
|
422
|
+
*minor_version = -1;
|
423
|
+
*status = 0;
|
424
|
+
*msg = NULL;
|
425
|
+
*msg_len = 0;
|
426
|
+
*num_headers = 0;
|
427
|
+
|
428
|
+
/* if last_len != 0, check if the response is complete (a fast countermeasure
|
429
|
+
against slowloris */
|
430
|
+
if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
|
431
|
+
return r;
|
432
|
+
}
|
433
|
+
|
434
|
+
if ((buf = parse_response(buf, buf_end, minor_version, status, msg, msg_len, headers, num_headers, max_headers, &r)) == NULL) {
|
435
|
+
return r;
|
436
|
+
}
|
437
|
+
|
438
|
+
return (int)(buf - buf_start);
|
367
439
|
}
|
368
440
|
|
369
|
-
|
370
|
-
int* minor_version, int* status,
|
371
|
-
const char** msg, size_t* msg_len,
|
372
|
-
struct phr_header* headers,
|
373
|
-
size_t* num_headers, size_t max_headers,
|
374
|
-
int* ret)
|
441
|
+
int phr_parse_headers(const char *buf_start, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len)
|
375
442
|
{
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
return
|
393
|
-
}
|
394
|
-
/* get message */
|
395
|
-
if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {
|
396
|
-
return NULL;
|
397
|
-
}
|
398
|
-
|
399
|
-
return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
|
443
|
+
const char *buf = buf_start, *buf_end = buf + len;
|
444
|
+
size_t max_headers = *num_headers;
|
445
|
+
int r;
|
446
|
+
|
447
|
+
*num_headers = 0;
|
448
|
+
|
449
|
+
/* if last_len != 0, check if the response is complete (a fast countermeasure
|
450
|
+
against slowloris */
|
451
|
+
if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
|
452
|
+
return r;
|
453
|
+
}
|
454
|
+
|
455
|
+
if ((buf = parse_headers(buf, buf_end, headers, num_headers, max_headers, &r)) == NULL) {
|
456
|
+
return r;
|
457
|
+
}
|
458
|
+
|
459
|
+
return (int)(buf - buf_start);
|
400
460
|
}
|
401
461
|
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
462
|
+
enum {
|
463
|
+
CHUNKED_IN_CHUNK_SIZE,
|
464
|
+
CHUNKED_IN_CHUNK_EXT,
|
465
|
+
CHUNKED_IN_CHUNK_DATA,
|
466
|
+
CHUNKED_IN_CHUNK_CRLF,
|
467
|
+
CHUNKED_IN_TRAILERS_LINE_HEAD,
|
468
|
+
CHUNKED_IN_TRAILERS_LINE_MIDDLE
|
469
|
+
};
|
470
|
+
|
471
|
+
static int decode_hex(int ch)
|
406
472
|
{
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
473
|
+
if ('0' <= ch && ch <= '9') {
|
474
|
+
return ch - '0';
|
475
|
+
} else if ('A' <= ch && ch <= 'F') {
|
476
|
+
return ch - 'A' + 0xa;
|
477
|
+
} else if ('a' <= ch && ch <= 'f') {
|
478
|
+
return ch - 'a' + 0xa;
|
479
|
+
} else {
|
480
|
+
return -1;
|
481
|
+
}
|
482
|
+
}
|
483
|
+
|
484
|
+
ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *_bufsz)
|
485
|
+
{
|
486
|
+
size_t dst = 0, src = 0, bufsz = *_bufsz;
|
487
|
+
ssize_t ret = -2; /* incomplete */
|
488
|
+
|
489
|
+
while (1) {
|
490
|
+
switch (decoder->_state) {
|
491
|
+
case CHUNKED_IN_CHUNK_SIZE:
|
492
|
+
for (;; ++src) {
|
493
|
+
int v;
|
494
|
+
if (src == bufsz)
|
495
|
+
goto Exit;
|
496
|
+
if ((v = decode_hex(buf[src])) == -1) {
|
497
|
+
if (decoder->_hex_count == 0) {
|
498
|
+
ret = -1;
|
499
|
+
goto Exit;
|
500
|
+
}
|
501
|
+
break;
|
502
|
+
}
|
503
|
+
if (decoder->_hex_count == sizeof(size_t) * 2) {
|
504
|
+
ret = -1;
|
505
|
+
goto Exit;
|
506
|
+
}
|
507
|
+
decoder->bytes_left_in_chunk = decoder->bytes_left_in_chunk * 16 + v;
|
508
|
+
++decoder->_hex_count;
|
509
|
+
}
|
510
|
+
decoder->_hex_count = 0;
|
511
|
+
decoder->_state = CHUNKED_IN_CHUNK_EXT;
|
512
|
+
/* fallthru */
|
513
|
+
case CHUNKED_IN_CHUNK_EXT:
|
514
|
+
/* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */
|
515
|
+
for (;; ++src) {
|
516
|
+
if (src == bufsz)
|
517
|
+
goto Exit;
|
518
|
+
if (buf[src] == '\012')
|
519
|
+
break;
|
520
|
+
}
|
521
|
+
++src;
|
522
|
+
if (decoder->bytes_left_in_chunk == 0) {
|
523
|
+
if (decoder->consume_trailer) {
|
524
|
+
decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
|
525
|
+
break;
|
526
|
+
} else {
|
527
|
+
goto Complete;
|
528
|
+
}
|
529
|
+
}
|
530
|
+
decoder->_state = CHUNKED_IN_CHUNK_DATA;
|
531
|
+
/* fallthru */
|
532
|
+
case CHUNKED_IN_CHUNK_DATA: {
|
533
|
+
size_t avail = bufsz - src;
|
534
|
+
if (avail < decoder->bytes_left_in_chunk) {
|
535
|
+
if (dst != src)
|
536
|
+
memmove(buf + dst, buf + src, avail);
|
537
|
+
src += avail;
|
538
|
+
dst += avail;
|
539
|
+
decoder->bytes_left_in_chunk -= avail;
|
540
|
+
goto Exit;
|
541
|
+
}
|
542
|
+
if (dst != src)
|
543
|
+
memmove(buf + dst, buf + src, decoder->bytes_left_in_chunk);
|
544
|
+
src += decoder->bytes_left_in_chunk;
|
545
|
+
dst += decoder->bytes_left_in_chunk;
|
546
|
+
decoder->bytes_left_in_chunk = 0;
|
547
|
+
decoder->_state = CHUNKED_IN_CHUNK_CRLF;
|
548
|
+
}
|
549
|
+
/* fallthru */
|
550
|
+
case CHUNKED_IN_CHUNK_CRLF:
|
551
|
+
for (;; ++src) {
|
552
|
+
if (src == bufsz)
|
553
|
+
goto Exit;
|
554
|
+
if (buf[src] != '\015')
|
555
|
+
break;
|
556
|
+
}
|
557
|
+
if (buf[src] != '\012') {
|
558
|
+
ret = -1;
|
559
|
+
goto Exit;
|
560
|
+
}
|
561
|
+
++src;
|
562
|
+
decoder->_state = CHUNKED_IN_CHUNK_SIZE;
|
563
|
+
break;
|
564
|
+
case CHUNKED_IN_TRAILERS_LINE_HEAD:
|
565
|
+
for (;; ++src) {
|
566
|
+
if (src == bufsz)
|
567
|
+
goto Exit;
|
568
|
+
if (buf[src] != '\015')
|
569
|
+
break;
|
570
|
+
}
|
571
|
+
if (buf[src++] == '\012')
|
572
|
+
goto Complete;
|
573
|
+
decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE;
|
574
|
+
/* fallthru */
|
575
|
+
case CHUNKED_IN_TRAILERS_LINE_MIDDLE:
|
576
|
+
for (;; ++src) {
|
577
|
+
if (src == bufsz)
|
578
|
+
goto Exit;
|
579
|
+
if (buf[src] == '\012')
|
580
|
+
break;
|
581
|
+
}
|
582
|
+
++src;
|
583
|
+
decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
|
584
|
+
break;
|
585
|
+
default:
|
586
|
+
assert(!"decoder is corrupt");
|
587
|
+
}
|
588
|
+
}
|
589
|
+
|
590
|
+
Complete:
|
591
|
+
ret = bufsz - src;
|
592
|
+
Exit:
|
593
|
+
if (dst != src)
|
594
|
+
memmove(buf + dst, buf + src, bufsz - src);
|
595
|
+
*_bufsz = dst;
|
596
|
+
return ret;
|
430
597
|
}
|
431
598
|
|
432
599
|
#undef CHECK_EOF
|